{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9993222089532967, "eval_steps": 100, "global_step": 2904, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.7182130584192438e-09, "logits/chosen": -2.8902626037597656, "logits/rejected": -2.9005308151245117, "logps/chosen": -283.5440673828125, "logps/rejected": -243.0171356201172, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -2.9894590377807617, "logits/rejected": -2.9360640048980713, "logps/chosen": -280.8406982421875, "logps/rejected": -240.75547790527344, "loss": 0.6935, "rewards/accuracies": 0.4097222089767456, "rewards/chosen": 0.004376528784632683, "rewards/margins": 0.0008075085352174938, "rewards/rejected": 0.003569019725546241, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.940880537033081, "logits/rejected": -2.8801236152648926, "logps/chosen": -262.68853759765625, "logps/rejected": -225.02001953125, "loss": 0.6929, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.002663062419742346, "rewards/margins": 0.007061631418764591, "rewards/rejected": -0.004398568533360958, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -2.9715359210968018, "logits/rejected": -2.9144601821899414, "logps/chosen": -246.590087890625, "logps/rejected": -219.89151000976562, "loss": 0.6931, "rewards/accuracies": 0.515625, "rewards/chosen": 0.003204852808266878, "rewards/margins": -2.4610617401776835e-05, "rewards/rejected": 0.003229462308809161, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -2.992692708969116, "logits/rejected": -2.912419080734253, "logps/chosen": -270.08477783203125, "logps/rejected": -225.15029907226562, "loss": 0.6936, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.0006564104114659131, "rewards/margins": 0.0004107533022761345, "rewards/rejected": -0.001067164121195674, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -3.000673294067383, "logits/rejected": -2.919649362564087, "logps/chosen": -282.7102355957031, "logps/rejected": -224.27810668945312, "loss": 0.6932, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": -0.00029579317197203636, "rewards/margins": 0.001034152926877141, "rewards/rejected": -0.0013299459824338555, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -2.968362331390381, "logits/rejected": -2.8915956020355225, "logps/chosen": -264.82904052734375, "logps/rejected": -226.23526000976562, "loss": 0.6929, "rewards/accuracies": 0.484375, "rewards/chosen": 0.0005982272559776902, "rewards/margins": 0.0011857760837301612, "rewards/rejected": -0.0005875487113371491, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -3.0189285278320312, "logits/rejected": -2.9377713203430176, "logps/chosen": -285.20263671875, "logps/rejected": -243.16024780273438, "loss": 0.6906, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": 0.002918859478086233, "rewards/margins": 0.005133490078151226, "rewards/rejected": -0.0022146315313875675, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -2.99265193939209, "logits/rejected": -2.90586256980896, "logps/chosen": -258.3550720214844, "logps/rejected": -221.766357421875, "loss": 0.6934, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": 0.0006495538400486112, "rewards/margins": -0.0003244529361836612, "rewards/rejected": 0.0009740066016092896, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -3.004547595977783, "logits/rejected": -2.924870252609253, "logps/chosen": -265.66558837890625, "logps/rejected": -231.5462646484375, "loss": 0.6913, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": 0.00795322097837925, "rewards/margins": 0.007618487812578678, "rewards/rejected": 0.0003347333404235542, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.999065399169922, "logits/rejected": -2.9278481006622314, "logps/chosen": -276.21539306640625, "logps/rejected": -240.11709594726562, "loss": 0.6914, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.0034270803444087505, "rewards/margins": 0.005303448066115379, "rewards/rejected": -0.0018763678381219506, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -3.003563642501831, "logits/rejected": -2.940854787826538, "logps/chosen": -269.36297607421875, "logps/rejected": -223.43551635742188, "loss": 0.6928, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.0029028733260929585, "rewards/margins": 0.0021674195304512978, "rewards/rejected": 0.0007354536792263389, "step": 110 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -2.9860854148864746, "logits/rejected": -2.8890106678009033, "logps/chosen": -268.8426208496094, "logps/rejected": -209.8544921875, "loss": 0.6911, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": 0.003004752565175295, "rewards/margins": 0.006681190337985754, "rewards/rejected": -0.0036764375399798155, "step": 120 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -2.960130453109741, "logits/rejected": -2.908878803253174, "logps/chosen": -260.89202880859375, "logps/rejected": -243.7844696044922, "loss": 0.6892, "rewards/accuracies": 0.515625, "rewards/chosen": 0.00208044215105474, "rewards/margins": 0.007954845204949379, "rewards/rejected": -0.00587440375238657, "step": 130 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.9910671710968018, "logits/rejected": -2.916494131088257, "logps/chosen": -266.9673767089844, "logps/rejected": -217.215087890625, "loss": 0.6901, "rewards/accuracies": 0.546875, "rewards/chosen": 0.005343564320355654, "rewards/margins": 0.006032806821167469, "rewards/rejected": -0.0006892428500577807, "step": 140 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -3.019535541534424, "logits/rejected": -2.9581732749938965, "logps/chosen": -277.1641845703125, "logps/rejected": -234.4961395263672, "loss": 0.6887, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.009159883484244347, "rewards/margins": 0.015980485826730728, "rewards/rejected": -0.006820603273808956, "step": 150 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -3.046652317047119, "logits/rejected": -2.9714391231536865, "logps/chosen": -263.0184326171875, "logps/rejected": -208.0572967529297, "loss": 0.6877, "rewards/accuracies": 0.5625, "rewards/chosen": 0.008923342451453209, "rewards/margins": 0.011679740622639656, "rewards/rejected": -0.002756398171186447, "step": 160 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -3.0278427600860596, "logits/rejected": -2.948286533355713, "logps/chosen": -289.84954833984375, "logps/rejected": -237.5100860595703, "loss": 0.6849, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": 0.0031468630768358707, "rewards/margins": 0.013391213491559029, "rewards/rejected": -0.01024434994906187, "step": 170 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -2.96464204788208, "logits/rejected": -2.890578269958496, "logps/chosen": -262.88250732421875, "logps/rejected": -226.43685913085938, "loss": 0.6826, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": 0.012367731891572475, "rewards/margins": 0.025449495762586594, "rewards/rejected": -0.013081766664981842, "step": 180 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -2.9525418281555176, "logits/rejected": -2.902632713317871, "logps/chosen": -267.6039733886719, "logps/rejected": -232.4341278076172, "loss": 0.6835, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.012887142598628998, "rewards/margins": 0.026651531457901, "rewards/rejected": -0.013764390721917152, "step": 190 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -3.0084328651428223, "logits/rejected": -2.940451145172119, "logps/chosen": -272.44256591796875, "logps/rejected": -218.37460327148438, "loss": 0.6811, "rewards/accuracies": 0.659375011920929, "rewards/chosen": 0.012132599949836731, "rewards/margins": 0.027265682816505432, "rewards/rejected": -0.01513308472931385, "step": 200 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -2.9951331615448, "logits/rejected": -2.9081153869628906, "logps/chosen": -266.1490478515625, "logps/rejected": -217.39547729492188, "loss": 0.6783, "rewards/accuracies": 0.653124988079071, "rewards/chosen": 0.01829676702618599, "rewards/margins": 0.03563526272773743, "rewards/rejected": -0.017338499426841736, "step": 210 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -2.9270124435424805, "logits/rejected": -2.8599934577941895, "logps/chosen": -235.32919311523438, "logps/rejected": -214.2061004638672, "loss": 0.6773, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.015379729680716991, "rewards/margins": 0.03227097541093826, "rewards/rejected": -0.016891242936253548, "step": 220 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -3.013188362121582, "logits/rejected": -2.939645290374756, "logps/chosen": -269.3336486816406, "logps/rejected": -238.3136444091797, "loss": 0.6748, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": 0.013882170431315899, "rewards/margins": 0.043641261756420135, "rewards/rejected": -0.029759090393781662, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -2.968400478363037, "logits/rejected": -2.9253265857696533, "logps/chosen": -271.2169494628906, "logps/rejected": -235.88027954101562, "loss": 0.6712, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.020952600985765457, "rewards/margins": 0.045413825660943985, "rewards/rejected": -0.024461226537823677, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -2.9857118129730225, "logits/rejected": -2.927201271057129, "logps/chosen": -260.50408935546875, "logps/rejected": -219.82589721679688, "loss": 0.6709, "rewards/accuracies": 0.671875, "rewards/chosen": 0.024162665009498596, "rewards/margins": 0.05260956287384033, "rewards/rejected": -0.028446903452277184, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -2.983931064605713, "logits/rejected": -2.9271445274353027, "logps/chosen": -282.38555908203125, "logps/rejected": -239.4889678955078, "loss": 0.6631, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.027914946898818016, "rewards/margins": 0.06547725945711136, "rewards/rejected": -0.037562306970357895, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -3.040987730026245, "logits/rejected": -2.9617156982421875, "logps/chosen": -282.022216796875, "logps/rejected": -222.32656860351562, "loss": 0.6608, "rewards/accuracies": 0.65625, "rewards/chosen": 0.024256506934762, "rewards/margins": 0.06711156666278839, "rewards/rejected": -0.04285505786538124, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -2.967451810836792, "logits/rejected": -2.89064621925354, "logps/chosen": -249.1062469482422, "logps/rejected": -230.0796661376953, "loss": 0.6646, "rewards/accuracies": 0.628125011920929, "rewards/chosen": 0.021498382091522217, "rewards/margins": 0.0675952136516571, "rewards/rejected": -0.04609683156013489, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -2.9444422721862793, "logits/rejected": -2.906001567840576, "logps/chosen": -257.80963134765625, "logps/rejected": -226.1893310546875, "loss": 0.6563, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.0328928679227829, "rewards/margins": 0.07963062822818756, "rewards/rejected": -0.046737752854824066, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.982778415614236e-07, "logits/chosen": -3.0241198539733887, "logits/rejected": -2.9573776721954346, "logps/chosen": -261.10980224609375, "logps/rejected": -228.2661590576172, "loss": 0.6576, "rewards/accuracies": 0.659375011920929, "rewards/chosen": 0.03156961873173714, "rewards/margins": 0.07706739008426666, "rewards/rejected": -0.04549776762723923, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.963643321852277e-07, "logits/chosen": -2.939030170440674, "logits/rejected": -2.8984603881835938, "logps/chosen": -275.2110290527344, "logps/rejected": -228.59469604492188, "loss": 0.6478, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": 0.03570122271776199, "rewards/margins": 0.10030387341976166, "rewards/rejected": -0.06460265815258026, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.944508228090318e-07, "logits/chosen": -3.009154796600342, "logits/rejected": -2.921469211578369, "logps/chosen": -263.9809265136719, "logps/rejected": -226.9568328857422, "loss": 0.6442, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.040056101977825165, "rewards/margins": 0.12968766689300537, "rewards/rejected": -0.08963155746459961, "step": 320 }, { "epoch": 0.34, "learning_rate": 4.925373134328357e-07, "logits/chosen": -2.979597568511963, "logits/rejected": -2.906559705734253, "logps/chosen": -253.5226287841797, "logps/rejected": -202.83169555664062, "loss": 0.6395, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.01942339912056923, "rewards/margins": 0.12088962644338608, "rewards/rejected": -0.10146622359752655, "step": 330 }, { "epoch": 0.35, "learning_rate": 4.906238040566398e-07, "logits/chosen": -2.9941489696502686, "logits/rejected": -2.9178366661071777, "logps/chosen": -270.72491455078125, "logps/rejected": -224.19961547851562, "loss": 0.6372, "rewards/accuracies": 0.6468750238418579, "rewards/chosen": 0.039756499230861664, "rewards/margins": 0.12468083202838898, "rewards/rejected": -0.08492432534694672, "step": 340 }, { "epoch": 0.36, "learning_rate": 4.887102946804438e-07, "logits/chosen": -2.983179807662964, "logits/rejected": -2.8937084674835205, "logps/chosen": -258.9554748535156, "logps/rejected": -216.77877807617188, "loss": 0.6402, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.037654388695955276, "rewards/margins": 0.16522696614265442, "rewards/rejected": -0.12757258117198944, "step": 350 }, { "epoch": 0.37, "learning_rate": 4.867967853042479e-07, "logits/chosen": -2.972989082336426, "logits/rejected": -2.8665084838867188, "logps/chosen": -275.40155029296875, "logps/rejected": -227.54544067382812, "loss": 0.6318, "rewards/accuracies": 0.703125, "rewards/chosen": 0.04534328356385231, "rewards/margins": 0.18759654462337494, "rewards/rejected": -0.14225324988365173, "step": 360 }, { "epoch": 0.38, "learning_rate": 4.84883275928052e-07, "logits/chosen": -2.940910816192627, "logits/rejected": -2.914931058883667, "logps/chosen": -258.170166015625, "logps/rejected": -222.706787109375, "loss": 0.6396, "rewards/accuracies": 0.653124988079071, "rewards/chosen": 0.02604937180876732, "rewards/margins": 0.145067036151886, "rewards/rejected": -0.11901766061782837, "step": 370 }, { "epoch": 0.39, "learning_rate": 4.82969766551856e-07, "logits/chosen": -3.0028481483459473, "logits/rejected": -2.9033100605010986, "logps/chosen": -273.29193115234375, "logps/rejected": -220.68887329101562, "loss": 0.6114, "rewards/accuracies": 0.703125, "rewards/chosen": 0.033887721598148346, "rewards/margins": 0.18225380778312683, "rewards/rejected": -0.14836609363555908, "step": 380 }, { "epoch": 0.4, "learning_rate": 4.810562571756601e-07, "logits/chosen": -2.9734809398651123, "logits/rejected": -2.909604072570801, "logps/chosen": -266.8335876464844, "logps/rejected": -222.3031005859375, "loss": 0.6216, "rewards/accuracies": 0.671875, "rewards/chosen": 0.025868883356451988, "rewards/margins": 0.16047567129135132, "rewards/rejected": -0.13460679352283478, "step": 390 }, { "epoch": 0.41, "learning_rate": 4.791427477994642e-07, "logits/chosen": -3.011569023132324, "logits/rejected": -2.9673287868499756, "logps/chosen": -266.60662841796875, "logps/rejected": -248.6490936279297, "loss": 0.6257, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": 0.027015071362257004, "rewards/margins": 0.1724485158920288, "rewards/rejected": -0.1454334259033203, "step": 400 }, { "epoch": 0.42, "learning_rate": 4.772292384232682e-07, "logits/chosen": -2.9473037719726562, "logits/rejected": -2.8615517616271973, "logps/chosen": -259.61822509765625, "logps/rejected": -226.01809692382812, "loss": 0.6263, "rewards/accuracies": 0.690625011920929, "rewards/chosen": 0.03032611683011055, "rewards/margins": 0.17090603709220886, "rewards/rejected": -0.14057990908622742, "step": 410 }, { "epoch": 0.43, "learning_rate": 4.753157290470723e-07, "logits/chosen": -3.0207021236419678, "logits/rejected": -2.95526385307312, "logps/chosen": -264.6485290527344, "logps/rejected": -240.77792358398438, "loss": 0.6177, "rewards/accuracies": 0.6875, "rewards/chosen": 0.03897939249873161, "rewards/margins": 0.216640442609787, "rewards/rejected": -0.17766106128692627, "step": 420 }, { "epoch": 0.44, "learning_rate": 4.7340221967087635e-07, "logits/chosen": -2.9245405197143555, "logits/rejected": -2.8471407890319824, "logps/chosen": -278.7442321777344, "logps/rejected": -211.8600616455078, "loss": 0.6231, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.025491178035736084, "rewards/margins": 0.20820407569408417, "rewards/rejected": -0.1827128827571869, "step": 430 }, { "epoch": 0.45, "learning_rate": 4.714887102946804e-07, "logits/chosen": -3.010326862335205, "logits/rejected": -2.901296854019165, "logps/chosen": -276.81219482421875, "logps/rejected": -230.5873260498047, "loss": 0.5937, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.0355800986289978, "rewards/margins": 0.2455623596906662, "rewards/rejected": -0.2099822461605072, "step": 440 }, { "epoch": 0.46, "learning_rate": 4.6957520091848447e-07, "logits/chosen": -2.9676156044006348, "logits/rejected": -2.8937532901763916, "logps/chosen": -269.66339111328125, "logps/rejected": -230.52163696289062, "loss": 0.6011, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": 0.009764080867171288, "rewards/margins": 0.23638252913951874, "rewards/rejected": -0.2266184538602829, "step": 450 }, { "epoch": 0.48, "learning_rate": 4.6766169154228853e-07, "logits/chosen": -2.9388675689697266, "logits/rejected": -2.9115350246429443, "logps/chosen": -252.7357940673828, "logps/rejected": -224.2154541015625, "loss": 0.6024, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.004697978962212801, "rewards/margins": 0.24117231369018555, "rewards/rejected": -0.24587027728557587, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.657481821660926e-07, "logits/chosen": -3.0146288871765137, "logits/rejected": -2.930093765258789, "logps/chosen": -284.5770568847656, "logps/rejected": -223.614013671875, "loss": 0.6091, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": 0.010038135573267937, "rewards/margins": 0.29235151410102844, "rewards/rejected": -0.28231337666511536, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.6383467278989666e-07, "logits/chosen": -3.0139148235321045, "logits/rejected": -2.920546054840088, "logps/chosen": -290.7146301269531, "logps/rejected": -231.1171875, "loss": 0.581, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.04135869815945625, "rewards/margins": 0.30950188636779785, "rewards/rejected": -0.2681432068347931, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.6192116341370067e-07, "logits/chosen": -2.9442543983459473, "logits/rejected": -2.8480868339538574, "logps/chosen": -294.6175231933594, "logps/rejected": -226.11611938476562, "loss": 0.604, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.0046967691741883755, "rewards/margins": 0.31620293855667114, "rewards/rejected": -0.3115061819553375, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.6000765403750473e-07, "logits/chosen": -2.95143985748291, "logits/rejected": -2.893009901046753, "logps/chosen": -259.2351989746094, "logps/rejected": -232.2927703857422, "loss": 0.6079, "rewards/accuracies": 0.659375011920929, "rewards/chosen": -0.0063209859654307365, "rewards/margins": 0.22812893986701965, "rewards/rejected": -0.23444993793964386, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.580941446613088e-07, "logits/chosen": -2.9778671264648438, "logits/rejected": -2.8938779830932617, "logps/chosen": -266.2318420410156, "logps/rejected": -221.6147918701172, "loss": 0.6004, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.027762463316321373, "rewards/margins": 0.2772659957408905, "rewards/rejected": -0.24950353801250458, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.5618063528511285e-07, "logits/chosen": -2.983058214187622, "logits/rejected": -2.8804774284362793, "logps/chosen": -244.7335968017578, "logps/rejected": -200.14389038085938, "loss": 0.5876, "rewards/accuracies": 0.71875, "rewards/chosen": 0.0016447363886982203, "rewards/margins": 0.3048996925354004, "rewards/rejected": -0.3032549321651459, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.542671259089169e-07, "logits/chosen": -2.9777004718780518, "logits/rejected": -2.899975299835205, "logps/chosen": -273.9354553222656, "logps/rejected": -233.248291015625, "loss": 0.5786, "rewards/accuracies": 0.715624988079071, "rewards/chosen": 0.013322785496711731, "rewards/margins": 0.34341758489608765, "rewards/rejected": -0.33009475469589233, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.52353616532721e-07, "logits/chosen": -2.9999477863311768, "logits/rejected": -2.917571544647217, "logps/chosen": -284.49407958984375, "logps/rejected": -237.95364379882812, "loss": 0.5974, "rewards/accuracies": 0.659375011920929, "rewards/chosen": -0.010401284322142601, "rewards/margins": 0.29033318161964417, "rewards/rejected": -0.30073440074920654, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.5044010715652504e-07, "logits/chosen": -2.9923791885375977, "logits/rejected": -2.921231508255005, "logps/chosen": -277.56494140625, "logps/rejected": -223.3925323486328, "loss": 0.5867, "rewards/accuracies": 0.684374988079071, "rewards/chosen": 0.008537148125469685, "rewards/margins": 0.3291955590248108, "rewards/rejected": -0.3206583857536316, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.485265977803291e-07, "logits/chosen": -3.024245023727417, "logits/rejected": -2.9194655418395996, "logps/chosen": -281.55181884765625, "logps/rejected": -224.04550170898438, "loss": 0.5884, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.013154732994735241, "rewards/margins": 0.34570780396461487, "rewards/rejected": -0.3588625490665436, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.4661308840413316e-07, "logits/chosen": -2.9340929985046387, "logits/rejected": -2.8398380279541016, "logps/chosen": -256.93963623046875, "logps/rejected": -219.94357299804688, "loss": 0.6023, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08448503166437149, "rewards/margins": 0.28318461775779724, "rewards/rejected": -0.36766964197158813, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.446995790279372e-07, "logits/chosen": -2.9707589149475098, "logits/rejected": -2.8493690490722656, "logps/chosen": -278.016357421875, "logps/rejected": -216.64389038085938, "loss": 0.5756, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.03356485813856125, "rewards/margins": 0.40374183654785156, "rewards/rejected": -0.4373067319393158, "step": 580 }, { "epoch": 0.61, "learning_rate": 4.4278606965174123e-07, "logits/chosen": -2.9651870727539062, "logits/rejected": -2.8763153553009033, "logps/chosen": -252.7129364013672, "logps/rejected": -223.58358764648438, "loss": 0.5804, "rewards/accuracies": 0.703125, "rewards/chosen": -0.04651349410414696, "rewards/margins": 0.331315815448761, "rewards/rejected": -0.37782931327819824, "step": 590 }, { "epoch": 0.62, "learning_rate": 4.408725602755453e-07, "logits/chosen": -2.986072540283203, "logits/rejected": -2.885190963745117, "logps/chosen": -255.285400390625, "logps/rejected": -229.14370727539062, "loss": 0.5742, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.04862093925476074, "rewards/margins": 0.377112478017807, "rewards/rejected": -0.42573338747024536, "step": 600 }, { "epoch": 0.63, "learning_rate": 4.3895905089934936e-07, "logits/chosen": -2.9893829822540283, "logits/rejected": -2.9061150550842285, "logps/chosen": -281.68194580078125, "logps/rejected": -224.9415283203125, "loss": 0.5634, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.05259629338979721, "rewards/margins": 0.35992366075515747, "rewards/rejected": -0.4125199317932129, "step": 610 }, { "epoch": 0.64, "learning_rate": 4.370455415231534e-07, "logits/chosen": -2.9851701259613037, "logits/rejected": -2.8783726692199707, "logps/chosen": -253.15786743164062, "logps/rejected": -215.0591278076172, "loss": 0.5753, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.05028218775987625, "rewards/margins": 0.35829657316207886, "rewards/rejected": -0.4085787236690521, "step": 620 }, { "epoch": 0.65, "learning_rate": 4.351320321469575e-07, "logits/chosen": -2.9831159114837646, "logits/rejected": -2.872509241104126, "logps/chosen": -258.7300720214844, "logps/rejected": -213.2132110595703, "loss": 0.5883, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.02935725823044777, "rewards/margins": 0.39590340852737427, "rewards/rejected": -0.42526063323020935, "step": 630 }, { "epoch": 0.66, "learning_rate": 4.3321852277076154e-07, "logits/chosen": -2.931411027908325, "logits/rejected": -2.8679914474487305, "logps/chosen": -256.75213623046875, "logps/rejected": -235.01809692382812, "loss": 0.5761, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.03508998453617096, "rewards/margins": 0.3371378779411316, "rewards/rejected": -0.37222781777381897, "step": 640 }, { "epoch": 0.67, "learning_rate": 4.313050133945656e-07, "logits/chosen": -2.9251325130462646, "logits/rejected": -2.87497878074646, "logps/chosen": -254.54531860351562, "logps/rejected": -231.86703491210938, "loss": 0.5986, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05003873258829117, "rewards/margins": 0.36049655079841614, "rewards/rejected": -0.4105352759361267, "step": 650 }, { "epoch": 0.68, "learning_rate": 4.2939150401836967e-07, "logits/chosen": -2.9279026985168457, "logits/rejected": -2.8727946281433105, "logps/chosen": -253.64981079101562, "logps/rejected": -219.0675811767578, "loss": 0.5918, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.06951335817575455, "rewards/margins": 0.3124743103981018, "rewards/rejected": -0.381987601518631, "step": 660 }, { "epoch": 0.69, "learning_rate": 4.2747799464217373e-07, "logits/chosen": -2.999785900115967, "logits/rejected": -2.882141351699829, "logps/chosen": -273.99639892578125, "logps/rejected": -223.59323120117188, "loss": 0.5787, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.03457346186041832, "rewards/margins": 0.43067875504493713, "rewards/rejected": -0.465252161026001, "step": 670 }, { "epoch": 0.7, "learning_rate": 4.255644852659778e-07, "logits/chosen": -2.9860451221466064, "logits/rejected": -2.910327911376953, "logps/chosen": -277.1084289550781, "logps/rejected": -240.5979766845703, "loss": 0.5693, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.013871532864868641, "rewards/margins": 0.403283029794693, "rewards/rejected": -0.41715455055236816, "step": 680 }, { "epoch": 0.71, "learning_rate": 4.236509758897818e-07, "logits/chosen": -2.996521472930908, "logits/rejected": -2.904085636138916, "logps/chosen": -272.6430969238281, "logps/rejected": -223.96176147460938, "loss": 0.5795, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.033228904008865356, "rewards/margins": 0.39110440015792847, "rewards/rejected": -0.42433327436447144, "step": 690 }, { "epoch": 0.72, "learning_rate": 4.2173746651358586e-07, "logits/chosen": -2.9849753379821777, "logits/rejected": -2.9086861610412598, "logps/chosen": -277.5323181152344, "logps/rejected": -240.6964874267578, "loss": 0.5653, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.015461507253348827, "rewards/margins": 0.4256417155265808, "rewards/rejected": -0.4411032795906067, "step": 700 }, { "epoch": 0.73, "learning_rate": 4.198239571373899e-07, "logits/chosen": -2.970578670501709, "logits/rejected": -2.8901703357696533, "logps/chosen": -262.0364685058594, "logps/rejected": -211.28634643554688, "loss": 0.566, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.0026952396146953106, "rewards/margins": 0.43883174657821655, "rewards/rejected": -0.44152694940567017, "step": 710 }, { "epoch": 0.74, "learning_rate": 4.17910447761194e-07, "logits/chosen": -2.9339656829833984, "logits/rejected": -2.853869676589966, "logps/chosen": -249.8131561279297, "logps/rejected": -214.4302215576172, "loss": 0.5625, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.041089337319135666, "rewards/margins": 0.43385154008865356, "rewards/rejected": -0.47494086623191833, "step": 720 }, { "epoch": 0.75, "learning_rate": 4.1599693838499805e-07, "logits/chosen": -2.930128812789917, "logits/rejected": -2.8671534061431885, "logps/chosen": -269.2236328125, "logps/rejected": -226.4650115966797, "loss": 0.5849, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.05486045032739639, "rewards/margins": 0.3979194462299347, "rewards/rejected": -0.4527798593044281, "step": 730 }, { "epoch": 0.76, "learning_rate": 4.140834290088021e-07, "logits/chosen": -2.91292405128479, "logits/rejected": -2.8605432510375977, "logps/chosen": -269.60748291015625, "logps/rejected": -226.88851928710938, "loss": 0.5671, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.04892410710453987, "rewards/margins": 0.44430097937583923, "rewards/rejected": -0.49322509765625, "step": 740 }, { "epoch": 0.77, "learning_rate": 4.121699196326062e-07, "logits/chosen": -2.9797894954681396, "logits/rejected": -2.9220128059387207, "logps/chosen": -261.55120849609375, "logps/rejected": -226.9205322265625, "loss": 0.5738, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.0773581936955452, "rewards/margins": 0.39150264859199524, "rewards/rejected": -0.46886080503463745, "step": 750 }, { "epoch": 0.78, "learning_rate": 4.1025641025641024e-07, "logits/chosen": -2.9344232082366943, "logits/rejected": -2.873014450073242, "logps/chosen": -248.0850830078125, "logps/rejected": -215.9403533935547, "loss": 0.587, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08575326204299927, "rewards/margins": 0.4175490736961365, "rewards/rejected": -0.5033023357391357, "step": 760 }, { "epoch": 0.8, "learning_rate": 4.083429008802143e-07, "logits/chosen": -2.9882631301879883, "logits/rejected": -2.9340240955352783, "logps/chosen": -264.8534851074219, "logps/rejected": -234.9072265625, "loss": 0.5874, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1017172560095787, "rewards/margins": 0.38931411504745483, "rewards/rejected": -0.49103134870529175, "step": 770 }, { "epoch": 0.81, "learning_rate": 4.0642939150401836e-07, "logits/chosen": -2.984961748123169, "logits/rejected": -2.895946502685547, "logps/chosen": -274.66680908203125, "logps/rejected": -228.8633270263672, "loss": 0.5503, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -0.054658979177474976, "rewards/margins": 0.484149694442749, "rewards/rejected": -0.5388087034225464, "step": 780 }, { "epoch": 0.82, "learning_rate": 4.0451588212782237e-07, "logits/chosen": -2.99807071685791, "logits/rejected": -2.9068822860717773, "logps/chosen": -244.6894073486328, "logps/rejected": -222.9803466796875, "loss": 0.5727, "rewards/accuracies": 0.703125, "rewards/chosen": -0.09993685781955719, "rewards/margins": 0.4389115273952484, "rewards/rejected": -0.5388484597206116, "step": 790 }, { "epoch": 0.83, "learning_rate": 4.0260237275162643e-07, "logits/chosen": -2.981156826019287, "logits/rejected": -2.929170846939087, "logps/chosen": -252.1260986328125, "logps/rejected": -232.20901489257812, "loss": 0.5484, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.08297708630561829, "rewards/margins": 0.4665769040584564, "rewards/rejected": -0.5495539903640747, "step": 800 }, { "epoch": 0.84, "learning_rate": 4.006888633754305e-07, "logits/chosen": -2.9784133434295654, "logits/rejected": -2.8958353996276855, "logps/chosen": -253.4025115966797, "logps/rejected": -217.09378051757812, "loss": 0.5681, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.13211281597614288, "rewards/margins": 0.4216250777244568, "rewards/rejected": -0.5537378787994385, "step": 810 }, { "epoch": 0.85, "learning_rate": 3.9877535399923456e-07, "logits/chosen": -2.9170148372650146, "logits/rejected": -2.9081389904022217, "logps/chosen": -233.7285614013672, "logps/rejected": -218.60305786132812, "loss": 0.5732, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.1225995197892189, "rewards/margins": 0.374833345413208, "rewards/rejected": -0.4974328577518463, "step": 820 }, { "epoch": 0.86, "learning_rate": 3.968618446230386e-07, "logits/chosen": -2.970407724380493, "logits/rejected": -2.8898773193359375, "logps/chosen": -252.6938934326172, "logps/rejected": -204.7732391357422, "loss": 0.5579, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12692540884017944, "rewards/margins": 0.43592119216918945, "rewards/rejected": -0.5628465414047241, "step": 830 }, { "epoch": 0.87, "learning_rate": 3.949483352468427e-07, "logits/chosen": -2.962777614593506, "logits/rejected": -2.8853983879089355, "logps/chosen": -273.10247802734375, "logps/rejected": -236.5925750732422, "loss": 0.5642, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07755005359649658, "rewards/margins": 0.4871961176395416, "rewards/rejected": -0.5647461414337158, "step": 840 }, { "epoch": 0.88, "learning_rate": 3.9303482587064674e-07, "logits/chosen": -2.9445035457611084, "logits/rejected": -2.8726143836975098, "logps/chosen": -264.9803161621094, "logps/rejected": -227.33837890625, "loss": 0.5791, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.15796451270580292, "rewards/margins": 0.3849995732307434, "rewards/rejected": -0.5429641008377075, "step": 850 }, { "epoch": 0.89, "learning_rate": 3.911213164944508e-07, "logits/chosen": -2.994809627532959, "logits/rejected": -2.8971474170684814, "logps/chosen": -269.63507080078125, "logps/rejected": -234.4397430419922, "loss": 0.55, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.10952029377222061, "rewards/margins": 0.5051003694534302, "rewards/rejected": -0.614620566368103, "step": 860 }, { "epoch": 0.9, "learning_rate": 3.8920780711825487e-07, "logits/chosen": -2.9615581035614014, "logits/rejected": -2.8944363594055176, "logps/chosen": -268.522216796875, "logps/rejected": -236.46194458007812, "loss": 0.5847, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09445861726999283, "rewards/margins": 0.4419677257537842, "rewards/rejected": -0.5364263653755188, "step": 870 }, { "epoch": 0.91, "learning_rate": 3.8729429774205893e-07, "logits/chosen": -2.983121156692505, "logits/rejected": -2.8919730186462402, "logps/chosen": -284.83514404296875, "logps/rejected": -234.43490600585938, "loss": 0.5418, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08047501742839813, "rewards/margins": 0.5345014333724976, "rewards/rejected": -0.6149765253067017, "step": 880 }, { "epoch": 0.92, "learning_rate": 3.8538078836586294e-07, "logits/chosen": -2.982217311859131, "logits/rejected": -2.8919973373413086, "logps/chosen": -276.71490478515625, "logps/rejected": -237.01953125, "loss": 0.5842, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10841889679431915, "rewards/margins": 0.4410565495491028, "rewards/rejected": -0.549475371837616, "step": 890 }, { "epoch": 0.93, "learning_rate": 3.83467278989667e-07, "logits/chosen": -2.942038059234619, "logits/rejected": -2.9040706157684326, "logps/chosen": -247.3633575439453, "logps/rejected": -227.09243774414062, "loss": 0.5776, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08092047274112701, "rewards/margins": 0.4484461843967438, "rewards/rejected": -0.529366672039032, "step": 900 }, { "epoch": 0.94, "learning_rate": 3.8155376961347106e-07, "logits/chosen": -2.9575412273406982, "logits/rejected": -2.8804690837860107, "logps/chosen": -286.373291015625, "logps/rejected": -226.7725830078125, "loss": 0.5613, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16980710625648499, "rewards/margins": 0.44477352499961853, "rewards/rejected": -0.6145806312561035, "step": 910 }, { "epoch": 0.95, "learning_rate": 3.796402602372751e-07, "logits/chosen": -2.968951940536499, "logits/rejected": -2.891641616821289, "logps/chosen": -249.3066864013672, "logps/rejected": -229.39794921875, "loss": 0.5576, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.12510491907596588, "rewards/margins": 0.48747557401657104, "rewards/rejected": -0.6125804781913757, "step": 920 }, { "epoch": 0.96, "learning_rate": 3.777267508610792e-07, "logits/chosen": -2.9730169773101807, "logits/rejected": -2.896005630493164, "logps/chosen": -295.7337646484375, "logps/rejected": -240.42074584960938, "loss": 0.5401, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.0670388713479042, "rewards/margins": 0.5632071495056152, "rewards/rejected": -0.6302460432052612, "step": 930 }, { "epoch": 0.97, "learning_rate": 3.7581324148488325e-07, "logits/chosen": -2.9994075298309326, "logits/rejected": -2.9081084728240967, "logps/chosen": -288.58734130859375, "logps/rejected": -228.8270263671875, "loss": 0.5409, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.10843507945537567, "rewards/margins": 0.5393946766853333, "rewards/rejected": -0.6478297710418701, "step": 940 }, { "epoch": 0.98, "learning_rate": 3.738997321086873e-07, "logits/chosen": -3.008603572845459, "logits/rejected": -2.9379982948303223, "logps/chosen": -280.4133605957031, "logps/rejected": -233.2333221435547, "loss": 0.5689, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.07783280313014984, "rewards/margins": 0.5174218416213989, "rewards/rejected": -0.5952546000480652, "step": 950 }, { "epoch": 0.99, "learning_rate": 3.7198622273249137e-07, "logits/chosen": -3.0044474601745605, "logits/rejected": -2.900397539138794, "logps/chosen": -267.80499267578125, "logps/rejected": -220.6837158203125, "loss": 0.545, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0621054545044899, "rewards/margins": 0.5770050287246704, "rewards/rejected": -0.6391104459762573, "step": 960 }, { "epoch": 1.0, "eval_logits/chosen": -2.982574701309204, "eval_logits/rejected": -2.89551043510437, "eval_logps/chosen": -266.4708557128906, "eval_logps/rejected": -227.06637573242188, "eval_loss": 0.5543332099914551, "eval_rewards/accuracies": 0.7269999980926514, "eval_rewards/chosen": -0.09513603150844574, "eval_rewards/margins": 0.5752254128456116, "eval_rewards/rejected": -0.6703614592552185, "eval_runtime": 527.5737, "eval_samples_per_second": 3.791, "eval_steps_per_second": 1.895, "step": 968 }, { "epoch": 1.0, "learning_rate": 3.7007271335629544e-07, "logits/chosen": -2.896667242050171, "logits/rejected": -2.8467178344726562, "logps/chosen": -286.75604248046875, "logps/rejected": -260.253173828125, "loss": 0.5489, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09386046975851059, "rewards/margins": 0.5270522236824036, "rewards/rejected": -0.6209126710891724, "step": 970 }, { "epoch": 1.01, "learning_rate": 3.681592039800995e-07, "logits/chosen": -2.9582056999206543, "logits/rejected": -2.9024696350097656, "logps/chosen": -291.1214599609375, "logps/rejected": -250.9956512451172, "loss": 0.5391, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.03953595831990242, "rewards/margins": 0.6313408613204956, "rewards/rejected": -0.6708768010139465, "step": 980 }, { "epoch": 1.02, "learning_rate": 3.662456946039035e-07, "logits/chosen": -2.923724412918091, "logits/rejected": -2.8657290935516357, "logps/chosen": -259.3362731933594, "logps/rejected": -234.0655975341797, "loss": 0.542, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.17304709553718567, "rewards/margins": 0.4752405285835266, "rewards/rejected": -0.6482875943183899, "step": 990 }, { "epoch": 1.03, "learning_rate": 3.6433218522770757e-07, "logits/chosen": -2.951791286468506, "logits/rejected": -2.883042812347412, "logps/chosen": -249.5883331298828, "logps/rejected": -220.2185821533203, "loss": 0.5673, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1280551552772522, "rewards/margins": 0.5117616653442383, "rewards/rejected": -0.6398168206214905, "step": 1000 }, { "epoch": 1.04, "learning_rate": 3.6241867585151163e-07, "logits/chosen": -2.9808642864227295, "logits/rejected": -2.8994953632354736, "logps/chosen": -266.9244689941406, "logps/rejected": -231.6138153076172, "loss": 0.5525, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.11259253323078156, "rewards/margins": 0.4912075996398926, "rewards/rejected": -0.6038001179695129, "step": 1010 }, { "epoch": 1.05, "learning_rate": 3.605051664753157e-07, "logits/chosen": -2.979203939437866, "logits/rejected": -2.897819757461548, "logps/chosen": -279.91864013671875, "logps/rejected": -225.23867797851562, "loss": 0.5608, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.058581650257110596, "rewards/margins": 0.6009987592697144, "rewards/rejected": -0.659580409526825, "step": 1020 }, { "epoch": 1.06, "learning_rate": 3.5859165709911975e-07, "logits/chosen": -2.950101137161255, "logits/rejected": -2.882319927215576, "logps/chosen": -262.75213623046875, "logps/rejected": -234.2764892578125, "loss": 0.5646, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": -0.10398142039775848, "rewards/margins": 0.4619951844215393, "rewards/rejected": -0.5659765601158142, "step": 1030 }, { "epoch": 1.07, "learning_rate": 3.566781477229238e-07, "logits/chosen": -3.001460552215576, "logits/rejected": -2.9112706184387207, "logps/chosen": -295.07061767578125, "logps/rejected": -255.2864990234375, "loss": 0.5652, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.059899281710386276, "rewards/margins": 0.6083524823188782, "rewards/rejected": -0.6682518720626831, "step": 1040 }, { "epoch": 1.08, "learning_rate": 3.547646383467279e-07, "logits/chosen": -2.9998221397399902, "logits/rejected": -2.904151439666748, "logps/chosen": -261.1492004394531, "logps/rejected": -224.39663696289062, "loss": 0.5636, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.09337648749351501, "rewards/margins": 0.5255954265594482, "rewards/rejected": -0.6189719438552856, "step": 1050 }, { "epoch": 1.09, "learning_rate": 3.5285112897053194e-07, "logits/chosen": -2.96533203125, "logits/rejected": -2.895564556121826, "logps/chosen": -270.18951416015625, "logps/rejected": -243.80697631835938, "loss": 0.5744, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.11165542900562286, "rewards/margins": 0.5082225203514099, "rewards/rejected": -0.6198779344558716, "step": 1060 }, { "epoch": 1.11, "learning_rate": 3.50937619594336e-07, "logits/chosen": -2.9657583236694336, "logits/rejected": -2.8945467472076416, "logps/chosen": -264.00701904296875, "logps/rejected": -235.73129272460938, "loss": 0.5699, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.13867442309856415, "rewards/margins": 0.4999178349971771, "rewards/rejected": -0.6385921835899353, "step": 1070 }, { "epoch": 1.12, "learning_rate": 3.4902411021814007e-07, "logits/chosen": -2.9830238819122314, "logits/rejected": -2.91878080368042, "logps/chosen": -279.8549499511719, "logps/rejected": -233.4591827392578, "loss": 0.5518, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09302230924367905, "rewards/margins": 0.5605674982070923, "rewards/rejected": -0.6535898447036743, "step": 1080 }, { "epoch": 1.13, "learning_rate": 3.4711060084194413e-07, "logits/chosen": -2.9644315242767334, "logits/rejected": -2.8773105144500732, "logps/chosen": -257.06573486328125, "logps/rejected": -222.2319793701172, "loss": 0.5421, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.12000062316656113, "rewards/margins": 0.594086766242981, "rewards/rejected": -0.7140873670578003, "step": 1090 }, { "epoch": 1.14, "learning_rate": 3.4519709146574814e-07, "logits/chosen": -2.9385759830474854, "logits/rejected": -2.886782169342041, "logps/chosen": -259.7723388671875, "logps/rejected": -243.08413696289062, "loss": 0.5698, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.1346462368965149, "rewards/margins": 0.5263178944587708, "rewards/rejected": -0.6609640717506409, "step": 1100 }, { "epoch": 1.15, "learning_rate": 3.432835820895522e-07, "logits/chosen": -2.973198652267456, "logits/rejected": -2.914881944656372, "logps/chosen": -275.39862060546875, "logps/rejected": -231.8275146484375, "loss": 0.5569, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.12493139505386353, "rewards/margins": 0.5488640069961548, "rewards/rejected": -0.6737955212593079, "step": 1110 }, { "epoch": 1.16, "learning_rate": 3.4137007271335626e-07, "logits/chosen": -3.024646282196045, "logits/rejected": -2.9316773414611816, "logps/chosen": -285.3172302246094, "logps/rejected": -232.48098754882812, "loss": 0.5611, "rewards/accuracies": 0.75, "rewards/chosen": -0.10210631787776947, "rewards/margins": 0.5720211267471313, "rewards/rejected": -0.6741273999214172, "step": 1120 }, { "epoch": 1.17, "learning_rate": 3.394565633371603e-07, "logits/chosen": -3.0145974159240723, "logits/rejected": -2.9370007514953613, "logps/chosen": -254.81143188476562, "logps/rejected": -213.2404022216797, "loss": 0.5265, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.12412172555923462, "rewards/margins": 0.5721691846847534, "rewards/rejected": -0.696290910243988, "step": 1130 }, { "epoch": 1.18, "learning_rate": 3.375430539609644e-07, "logits/chosen": -3.003575563430786, "logits/rejected": -2.926270008087158, "logps/chosen": -302.1836242675781, "logps/rejected": -250.45614624023438, "loss": 0.5668, "rewards/accuracies": 0.734375, "rewards/chosen": -0.14901027083396912, "rewards/margins": 0.601496696472168, "rewards/rejected": -0.7505069375038147, "step": 1140 }, { "epoch": 1.19, "learning_rate": 3.3562954458476845e-07, "logits/chosen": -2.939309597015381, "logits/rejected": -2.871006488800049, "logps/chosen": -255.2751922607422, "logps/rejected": -236.4067840576172, "loss": 0.5503, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.17432285845279694, "rewards/margins": 0.5559533834457397, "rewards/rejected": -0.7302762269973755, "step": 1150 }, { "epoch": 1.2, "learning_rate": 3.337160352085725e-07, "logits/chosen": -2.914022922515869, "logits/rejected": -2.8773839473724365, "logps/chosen": -270.58770751953125, "logps/rejected": -240.72500610351562, "loss": 0.5437, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.13600067794322968, "rewards/margins": 0.5414363145828247, "rewards/rejected": -0.6774370074272156, "step": 1160 }, { "epoch": 1.21, "learning_rate": 3.3180252583237657e-07, "logits/chosen": -3.0049827098846436, "logits/rejected": -2.922834873199463, "logps/chosen": -286.50994873046875, "logps/rejected": -224.88308715820312, "loss": 0.5478, "rewards/accuracies": 0.734375, "rewards/chosen": -0.11266575753688812, "rewards/margins": 0.6266575455665588, "rewards/rejected": -0.7393232583999634, "step": 1170 }, { "epoch": 1.22, "learning_rate": 3.2988901645618063e-07, "logits/chosen": -2.953585147857666, "logits/rejected": -2.884155750274658, "logps/chosen": -245.29183959960938, "logps/rejected": -217.1591796875, "loss": 0.5548, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.15721073746681213, "rewards/margins": 0.542457103729248, "rewards/rejected": -0.6996678113937378, "step": 1180 }, { "epoch": 1.23, "learning_rate": 3.279755070799847e-07, "logits/chosen": -2.919790267944336, "logits/rejected": -2.8451826572418213, "logps/chosen": -245.4008331298828, "logps/rejected": -223.47427368164062, "loss": 0.5437, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.12426744401454926, "rewards/margins": 0.5887547731399536, "rewards/rejected": -0.7130222320556641, "step": 1190 }, { "epoch": 1.24, "learning_rate": 3.260619977037887e-07, "logits/chosen": -2.9761576652526855, "logits/rejected": -2.904743194580078, "logps/chosen": -280.2117919921875, "logps/rejected": -250.3348388671875, "loss": 0.5585, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.13924503326416016, "rewards/margins": 0.6378177404403687, "rewards/rejected": -0.777062714099884, "step": 1200 }, { "epoch": 1.25, "learning_rate": 3.2414848832759277e-07, "logits/chosen": -2.9535040855407715, "logits/rejected": -2.9109127521514893, "logps/chosen": -259.5050048828125, "logps/rejected": -241.0478973388672, "loss": 0.557, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.11907804012298584, "rewards/margins": 0.5822851061820984, "rewards/rejected": -0.701363205909729, "step": 1210 }, { "epoch": 1.26, "learning_rate": 3.2223497895139683e-07, "logits/chosen": -2.961881160736084, "logits/rejected": -2.913360595703125, "logps/chosen": -267.1917419433594, "logps/rejected": -225.3775177001953, "loss": 0.5478, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.11649106442928314, "rewards/margins": 0.6062580347061157, "rewards/rejected": -0.7227491140365601, "step": 1220 }, { "epoch": 1.27, "learning_rate": 3.203214695752009e-07, "logits/chosen": -2.9768214225769043, "logits/rejected": -2.902678966522217, "logps/chosen": -283.7835998535156, "logps/rejected": -240.600341796875, "loss": 0.5286, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.029305633157491684, "rewards/margins": 0.6526093482971191, "rewards/rejected": -0.6819149851799011, "step": 1230 }, { "epoch": 1.28, "learning_rate": 3.1840796019900495e-07, "logits/chosen": -2.9970457553863525, "logits/rejected": -2.9251694679260254, "logps/chosen": -281.6250915527344, "logps/rejected": -235.99392700195312, "loss": 0.5522, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15060603618621826, "rewards/margins": 0.5845556259155273, "rewards/rejected": -0.7351616621017456, "step": 1240 }, { "epoch": 1.29, "learning_rate": 3.16494450822809e-07, "logits/chosen": -2.9404520988464355, "logits/rejected": -2.877206325531006, "logps/chosen": -255.9801483154297, "logps/rejected": -236.92898559570312, "loss": 0.5581, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.12293366342782974, "rewards/margins": 0.5340650677680969, "rewards/rejected": -0.6569987535476685, "step": 1250 }, { "epoch": 1.3, "learning_rate": 3.145809414466131e-07, "logits/chosen": -2.9380831718444824, "logits/rejected": -2.897122621536255, "logps/chosen": -263.7361755371094, "logps/rejected": -242.348876953125, "loss": 0.5239, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08699088543653488, "rewards/margins": 0.6291144490242004, "rewards/rejected": -0.7161053419113159, "step": 1260 }, { "epoch": 1.31, "learning_rate": 3.1266743207041714e-07, "logits/chosen": -2.983721971511841, "logits/rejected": -2.9178361892700195, "logps/chosen": -261.14544677734375, "logps/rejected": -226.11856079101562, "loss": 0.564, "rewards/accuracies": 0.734375, "rewards/chosen": -0.10707207024097443, "rewards/margins": 0.5684032440185547, "rewards/rejected": -0.6754752993583679, "step": 1270 }, { "epoch": 1.32, "learning_rate": 3.107539226942212e-07, "logits/chosen": -2.9287922382354736, "logits/rejected": -2.883363962173462, "logps/chosen": -266.85675048828125, "logps/rejected": -227.99252319335938, "loss": 0.5493, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.08746787160634995, "rewards/margins": 0.5371690988540649, "rewards/rejected": -0.6246370077133179, "step": 1280 }, { "epoch": 1.33, "learning_rate": 3.0884041331802526e-07, "logits/chosen": -2.9867660999298096, "logits/rejected": -2.900967836380005, "logps/chosen": -264.8102111816406, "logps/rejected": -233.68057250976562, "loss": 0.5393, "rewards/accuracies": 0.71875, "rewards/chosen": -0.10977361351251602, "rewards/margins": 0.6534188985824585, "rewards/rejected": -0.7631924152374268, "step": 1290 }, { "epoch": 1.34, "learning_rate": 3.0692690394182927e-07, "logits/chosen": -2.9456191062927246, "logits/rejected": -2.879067897796631, "logps/chosen": -256.67578125, "logps/rejected": -216.3768310546875, "loss": 0.54, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -0.1832173466682434, "rewards/margins": 0.5967921018600464, "rewards/rejected": -0.7800094485282898, "step": 1300 }, { "epoch": 1.35, "learning_rate": 3.0501339456563334e-07, "logits/chosen": -2.9797959327697754, "logits/rejected": -2.890228033065796, "logps/chosen": -276.6708679199219, "logps/rejected": -228.39321899414062, "loss": 0.5396, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.11618109047412872, "rewards/margins": 0.6006571054458618, "rewards/rejected": -0.7168381810188293, "step": 1310 }, { "epoch": 1.36, "learning_rate": 3.030998851894374e-07, "logits/chosen": -2.9552395343780518, "logits/rejected": -2.8540492057800293, "logps/chosen": -265.5633850097656, "logps/rejected": -216.38916015625, "loss": 0.5569, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.09466046094894409, "rewards/margins": 0.7107687592506409, "rewards/rejected": -0.8054292798042297, "step": 1320 }, { "epoch": 1.37, "learning_rate": 3.0118637581324146e-07, "logits/chosen": -2.939174175262451, "logits/rejected": -2.8474631309509277, "logps/chosen": -260.9861145019531, "logps/rejected": -238.0146484375, "loss": 0.5629, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.09246645867824554, "rewards/margins": 0.7061804533004761, "rewards/rejected": -0.798646867275238, "step": 1330 }, { "epoch": 1.38, "learning_rate": 2.992728664370455e-07, "logits/chosen": -2.9471426010131836, "logits/rejected": -2.9049131870269775, "logps/chosen": -270.2190856933594, "logps/rejected": -229.753662109375, "loss": 0.5459, "rewards/accuracies": 0.703125, "rewards/chosen": -0.14815892279148102, "rewards/margins": 0.6272038221359253, "rewards/rejected": -0.7753626704216003, "step": 1340 }, { "epoch": 1.39, "learning_rate": 2.973593570608496e-07, "logits/chosen": -2.965989589691162, "logits/rejected": -2.882284641265869, "logps/chosen": -279.1839904785156, "logps/rejected": -228.767333984375, "loss": 0.5068, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.079820916056633, "rewards/margins": 0.6599880456924438, "rewards/rejected": -0.7398089170455933, "step": 1350 }, { "epoch": 1.4, "learning_rate": 2.9544584768465365e-07, "logits/chosen": -2.9589555263519287, "logits/rejected": -2.902416706085205, "logps/chosen": -265.2132873535156, "logps/rejected": -228.28359985351562, "loss": 0.5416, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.11527415364980698, "rewards/margins": 0.5870606303215027, "rewards/rejected": -0.7023347616195679, "step": 1360 }, { "epoch": 1.41, "learning_rate": 2.935323383084577e-07, "logits/chosen": -2.965261936187744, "logits/rejected": -2.9154257774353027, "logps/chosen": -263.22576904296875, "logps/rejected": -254.12026977539062, "loss": 0.5473, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15193209052085876, "rewards/margins": 0.5833896398544312, "rewards/rejected": -0.7353217005729675, "step": 1370 }, { "epoch": 1.43, "learning_rate": 2.9161882893226177e-07, "logits/chosen": -2.935213565826416, "logits/rejected": -2.8553645610809326, "logps/chosen": -267.3035888671875, "logps/rejected": -238.77392578125, "loss": 0.5546, "rewards/accuracies": 0.703125, "rewards/chosen": -0.14637014269828796, "rewards/margins": 0.5497695803642273, "rewards/rejected": -0.6961396932601929, "step": 1380 }, { "epoch": 1.44, "learning_rate": 2.8970531955606583e-07, "logits/chosen": -2.977102756500244, "logits/rejected": -2.903223752975464, "logps/chosen": -265.2420654296875, "logps/rejected": -235.6272430419922, "loss": 0.5643, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.0878833681344986, "rewards/margins": 0.662682294845581, "rewards/rejected": -0.7505656480789185, "step": 1390 }, { "epoch": 1.45, "learning_rate": 2.8779181017986984e-07, "logits/chosen": -2.9243035316467285, "logits/rejected": -2.844212770462036, "logps/chosen": -282.53741455078125, "logps/rejected": -223.8843994140625, "loss": 0.5563, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.0903361365199089, "rewards/margins": 0.6495278477668762, "rewards/rejected": -0.7398639917373657, "step": 1400 }, { "epoch": 1.46, "learning_rate": 2.858783008036739e-07, "logits/chosen": -3.007323741912842, "logits/rejected": -2.8923661708831787, "logps/chosen": -281.55364990234375, "logps/rejected": -231.0473175048828, "loss": 0.5027, "rewards/accuracies": 0.7718750238418579, "rewards/chosen": -0.10108653455972672, "rewards/margins": 0.6869888305664062, "rewards/rejected": -0.78807532787323, "step": 1410 }, { "epoch": 1.47, "learning_rate": 2.8396479142747797e-07, "logits/chosen": -2.9083967208862305, "logits/rejected": -2.8486404418945312, "logps/chosen": -265.4259033203125, "logps/rejected": -237.01345825195312, "loss": 0.5513, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.1434890180826187, "rewards/margins": 0.6202769875526428, "rewards/rejected": -0.7637659311294556, "step": 1420 }, { "epoch": 1.48, "learning_rate": 2.8205128205128203e-07, "logits/chosen": -2.9454987049102783, "logits/rejected": -2.9005463123321533, "logps/chosen": -255.769287109375, "logps/rejected": -233.74087524414062, "loss": 0.5309, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.14371921122074127, "rewards/margins": 0.6379820108413696, "rewards/rejected": -0.7817011475563049, "step": 1430 }, { "epoch": 1.49, "learning_rate": 2.801377726750861e-07, "logits/chosen": -2.9815480709075928, "logits/rejected": -2.9178972244262695, "logps/chosen": -284.90240478515625, "logps/rejected": -222.5718536376953, "loss": 0.5368, "rewards/accuracies": 0.75, "rewards/chosen": -0.1121741309762001, "rewards/margins": 0.7379211187362671, "rewards/rejected": -0.8500951528549194, "step": 1440 }, { "epoch": 1.5, "learning_rate": 2.7822426329889015e-07, "logits/chosen": -2.9842119216918945, "logits/rejected": -2.8780369758605957, "logps/chosen": -300.3102722167969, "logps/rejected": -241.9176788330078, "loss": 0.4944, "rewards/accuracies": 0.75, "rewards/chosen": -0.026770751923322678, "rewards/margins": 0.8221763372421265, "rewards/rejected": -0.8489471673965454, "step": 1450 }, { "epoch": 1.51, "learning_rate": 2.763107539226942e-07, "logits/chosen": -2.944258451461792, "logits/rejected": -2.846299886703491, "logps/chosen": -288.7834167480469, "logps/rejected": -233.4124298095703, "loss": 0.5492, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.1777847707271576, "rewards/margins": 0.6485006213188171, "rewards/rejected": -0.8262853622436523, "step": 1460 }, { "epoch": 1.52, "learning_rate": 2.743972445464983e-07, "logits/chosen": -2.9281058311462402, "logits/rejected": -2.862865924835205, "logps/chosen": -261.91729736328125, "logps/rejected": -238.90914916992188, "loss": 0.5495, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08874638378620148, "rewards/margins": 0.6322054266929626, "rewards/rejected": -0.7209517955780029, "step": 1470 }, { "epoch": 1.53, "learning_rate": 2.7248373517030234e-07, "logits/chosen": -2.9305763244628906, "logits/rejected": -2.8471546173095703, "logps/chosen": -254.6492156982422, "logps/rejected": -207.7649383544922, "loss": 0.5494, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.10472023487091064, "rewards/margins": 0.6028520464897156, "rewards/rejected": -0.7075722813606262, "step": 1480 }, { "epoch": 1.54, "learning_rate": 2.705702257941064e-07, "logits/chosen": -2.973991870880127, "logits/rejected": -2.8729214668273926, "logps/chosen": -250.13442993164062, "logps/rejected": -213.06619262695312, "loss": 0.5246, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10800573974847794, "rewards/margins": 0.6721674203872681, "rewards/rejected": -0.7801731824874878, "step": 1490 }, { "epoch": 1.55, "learning_rate": 2.686567164179104e-07, "logits/chosen": -2.9645466804504395, "logits/rejected": -2.8868494033813477, "logps/chosen": -276.547119140625, "logps/rejected": -252.17776489257812, "loss": 0.508, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.09248501062393188, "rewards/margins": 0.7201012969017029, "rewards/rejected": -0.8125863075256348, "step": 1500 }, { "epoch": 1.56, "learning_rate": 2.6674320704171447e-07, "logits/chosen": -2.967329502105713, "logits/rejected": -2.8831722736358643, "logps/chosen": -287.11468505859375, "logps/rejected": -229.39462280273438, "loss": 0.5537, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.15516069531440735, "rewards/margins": 0.6309596300125122, "rewards/rejected": -0.7861202359199524, "step": 1510 }, { "epoch": 1.57, "learning_rate": 2.6482969766551853e-07, "logits/chosen": -2.991366386413574, "logits/rejected": -2.911867618560791, "logps/chosen": -278.31060791015625, "logps/rejected": -228.8617706298828, "loss": 0.5327, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.08231403678655624, "rewards/margins": 0.7235355973243713, "rewards/rejected": -0.8058496713638306, "step": 1520 }, { "epoch": 1.58, "learning_rate": 2.629161882893226e-07, "logits/chosen": -2.9901280403137207, "logits/rejected": -2.889127016067505, "logps/chosen": -279.82684326171875, "logps/rejected": -227.4166717529297, "loss": 0.5248, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16880467534065247, "rewards/margins": 0.7013665437698364, "rewards/rejected": -0.8701711893081665, "step": 1530 }, { "epoch": 1.59, "learning_rate": 2.6100267891312666e-07, "logits/chosen": -2.9225573539733887, "logits/rejected": -2.8105075359344482, "logps/chosen": -266.20452880859375, "logps/rejected": -226.7688751220703, "loss": 0.5493, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.20703697204589844, "rewards/margins": 0.61225426197052, "rewards/rejected": -0.8192911148071289, "step": 1540 }, { "epoch": 1.6, "learning_rate": 2.590891695369307e-07, "logits/chosen": -2.9432945251464844, "logits/rejected": -2.8401968479156494, "logps/chosen": -273.5209045410156, "logps/rejected": -227.43270874023438, "loss": 0.522, "rewards/accuracies": 0.75, "rewards/chosen": -0.18506412208080292, "rewards/margins": 0.7361394166946411, "rewards/rejected": -0.92120361328125, "step": 1550 }, { "epoch": 1.61, "learning_rate": 2.571756601607348e-07, "logits/chosen": -2.9464735984802246, "logits/rejected": -2.8577117919921875, "logps/chosen": -256.5389099121094, "logps/rejected": -225.31405639648438, "loss": 0.5455, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.19385041296482086, "rewards/margins": 0.617928147315979, "rewards/rejected": -0.8117786645889282, "step": 1560 }, { "epoch": 1.62, "learning_rate": 2.5526215078453884e-07, "logits/chosen": -2.9612929821014404, "logits/rejected": -2.8524017333984375, "logps/chosen": -256.41888427734375, "logps/rejected": -229.5538330078125, "loss": 0.5099, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.21356025338172913, "rewards/margins": 0.686540961265564, "rewards/rejected": -0.9001011848449707, "step": 1570 }, { "epoch": 1.63, "learning_rate": 2.533486414083429e-07, "logits/chosen": -2.951014518737793, "logits/rejected": -2.8533120155334473, "logps/chosen": -275.21649169921875, "logps/rejected": -229.5667724609375, "loss": 0.5329, "rewards/accuracies": 0.703125, "rewards/chosen": -0.18795448541641235, "rewards/margins": 0.6178085207939148, "rewards/rejected": -0.8057630658149719, "step": 1580 }, { "epoch": 1.64, "learning_rate": 2.5143513203214697e-07, "logits/chosen": -2.975377321243286, "logits/rejected": -2.8713319301605225, "logps/chosen": -262.2503662109375, "logps/rejected": -215.01913452148438, "loss": 0.5238, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15069663524627686, "rewards/margins": 0.6844397783279419, "rewards/rejected": -0.8351364135742188, "step": 1590 }, { "epoch": 1.65, "learning_rate": 2.49521622655951e-07, "logits/chosen": -2.982719659805298, "logits/rejected": -2.864827871322632, "logps/chosen": -265.05926513671875, "logps/rejected": -228.1261749267578, "loss": 0.5564, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.1332695037126541, "rewards/margins": 0.6900584101676941, "rewards/rejected": -0.8233280181884766, "step": 1600 }, { "epoch": 1.66, "learning_rate": 2.4760811327975504e-07, "logits/chosen": -2.8991243839263916, "logits/rejected": -2.8543431758880615, "logps/chosen": -240.2902069091797, "logps/rejected": -228.7177734375, "loss": 0.5573, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.18491050601005554, "rewards/margins": 0.5866525173187256, "rewards/rejected": -0.7715630531311035, "step": 1610 }, { "epoch": 1.67, "learning_rate": 2.456946039035591e-07, "logits/chosen": -2.8905270099639893, "logits/rejected": -2.855071783065796, "logps/chosen": -269.6056823730469, "logps/rejected": -242.7838134765625, "loss": 0.544, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13025090098381042, "rewards/margins": 0.6636508107185364, "rewards/rejected": -0.7939016222953796, "step": 1620 }, { "epoch": 1.68, "learning_rate": 2.4378109452736316e-07, "logits/chosen": -2.928283214569092, "logits/rejected": -2.839672803878784, "logps/chosen": -250.72573852539062, "logps/rejected": -220.005859375, "loss": 0.5356, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.21521203219890594, "rewards/margins": 0.6489077806472778, "rewards/rejected": -0.864119827747345, "step": 1630 }, { "epoch": 1.69, "learning_rate": 2.418675851511672e-07, "logits/chosen": -3.0035758018493652, "logits/rejected": -2.890878200531006, "logps/chosen": -280.58770751953125, "logps/rejected": -231.17691040039062, "loss": 0.5461, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.11185511201620102, "rewards/margins": 0.6507663130760193, "rewards/rejected": -0.7626214623451233, "step": 1640 }, { "epoch": 1.7, "learning_rate": 2.399540757749713e-07, "logits/chosen": -2.932917356491089, "logits/rejected": -2.866323947906494, "logps/chosen": -274.95587158203125, "logps/rejected": -242.65017700195312, "loss": 0.5281, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.10276564210653305, "rewards/margins": 0.6475640535354614, "rewards/rejected": -0.7503296732902527, "step": 1650 }, { "epoch": 1.71, "learning_rate": 2.3804056639877535e-07, "logits/chosen": -2.9991672039031982, "logits/rejected": -2.8937621116638184, "logps/chosen": -277.6604309082031, "logps/rejected": -228.29898071289062, "loss": 0.5602, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1638353019952774, "rewards/margins": 0.6358079314231873, "rewards/rejected": -0.7996432781219482, "step": 1660 }, { "epoch": 1.72, "learning_rate": 2.361270570225794e-07, "logits/chosen": -2.9443559646606445, "logits/rejected": -2.8872387409210205, "logps/chosen": -269.5966796875, "logps/rejected": -243.4744110107422, "loss": 0.5317, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.12270579487085342, "rewards/margins": 0.6638936996459961, "rewards/rejected": -0.7865995168685913, "step": 1670 }, { "epoch": 1.74, "learning_rate": 2.3421354764638345e-07, "logits/chosen": -2.9553515911102295, "logits/rejected": -2.8536245822906494, "logps/chosen": -265.5298767089844, "logps/rejected": -210.93533325195312, "loss": 0.5179, "rewards/accuracies": 0.765625, "rewards/chosen": -0.06088445335626602, "rewards/margins": 0.7889713048934937, "rewards/rejected": -0.8498557806015015, "step": 1680 }, { "epoch": 1.75, "learning_rate": 2.323000382701875e-07, "logits/chosen": -2.924121141433716, "logits/rejected": -2.839080333709717, "logps/chosen": -255.9208984375, "logps/rejected": -226.5245819091797, "loss": 0.527, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.11523976176977158, "rewards/margins": 0.6995357275009155, "rewards/rejected": -0.8147755861282349, "step": 1690 }, { "epoch": 1.76, "learning_rate": 2.3038652889399157e-07, "logits/chosen": -2.9121832847595215, "logits/rejected": -2.8598732948303223, "logps/chosen": -266.0028076171875, "logps/rejected": -227.8167724609375, "loss": 0.555, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.16439494490623474, "rewards/margins": 0.6462361216545105, "rewards/rejected": -0.8106310963630676, "step": 1700 }, { "epoch": 1.77, "learning_rate": 2.2847301951779563e-07, "logits/chosen": -2.8927314281463623, "logits/rejected": -2.841042995452881, "logps/chosen": -265.4156799316406, "logps/rejected": -227.14453125, "loss": 0.5243, "rewards/accuracies": 0.778124988079071, "rewards/chosen": -0.1308198869228363, "rewards/margins": 0.6922730207443237, "rewards/rejected": -0.8230929374694824, "step": 1710 }, { "epoch": 1.78, "learning_rate": 2.265595101415997e-07, "logits/chosen": -2.9488277435302734, "logits/rejected": -2.903395414352417, "logps/chosen": -272.43255615234375, "logps/rejected": -235.41677856445312, "loss": 0.5627, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18448616564273834, "rewards/margins": 0.609156608581543, "rewards/rejected": -0.7936427593231201, "step": 1720 }, { "epoch": 1.79, "learning_rate": 2.2464600076540373e-07, "logits/chosen": -2.9433505535125732, "logits/rejected": -2.86824893951416, "logps/chosen": -245.5707244873047, "logps/rejected": -220.27157592773438, "loss": 0.5733, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.19082064926624298, "rewards/margins": 0.6157172322273254, "rewards/rejected": -0.806537926197052, "step": 1730 }, { "epoch": 1.8, "learning_rate": 2.227324913892078e-07, "logits/chosen": -2.982534408569336, "logits/rejected": -2.9001739025115967, "logps/chosen": -264.0472412109375, "logps/rejected": -230.54013061523438, "loss": 0.5505, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1886674463748932, "rewards/margins": 0.6295715570449829, "rewards/rejected": -0.8182390332221985, "step": 1740 }, { "epoch": 1.81, "learning_rate": 2.2081898201301186e-07, "logits/chosen": -2.939004421234131, "logits/rejected": -2.867532253265381, "logps/chosen": -265.92877197265625, "logps/rejected": -226.4512176513672, "loss": 0.5164, "rewards/accuracies": 0.75, "rewards/chosen": -0.12146709114313126, "rewards/margins": 0.6792842149734497, "rewards/rejected": -0.8007512092590332, "step": 1750 }, { "epoch": 1.82, "learning_rate": 2.1890547263681592e-07, "logits/chosen": -2.9981484413146973, "logits/rejected": -2.92271089553833, "logps/chosen": -252.6018524169922, "logps/rejected": -234.91748046875, "loss": 0.5223, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.1921396702528, "rewards/margins": 0.6892775297164917, "rewards/rejected": -0.8814172744750977, "step": 1760 }, { "epoch": 1.83, "learning_rate": 2.1699196326061998e-07, "logits/chosen": -2.9659371376037598, "logits/rejected": -2.899864673614502, "logps/chosen": -255.4652862548828, "logps/rejected": -236.17184448242188, "loss": 0.5292, "rewards/accuracies": 0.703125, "rewards/chosen": -0.20055997371673584, "rewards/margins": 0.6416491866111755, "rewards/rejected": -0.8422091603279114, "step": 1770 }, { "epoch": 1.84, "learning_rate": 2.1507845388442402e-07, "logits/chosen": -2.97383189201355, "logits/rejected": -2.885164737701416, "logps/chosen": -250.85250854492188, "logps/rejected": -216.2353057861328, "loss": 0.5352, "rewards/accuracies": 0.734375, "rewards/chosen": -0.1943538933992386, "rewards/margins": 0.6577661037445068, "rewards/rejected": -0.852120041847229, "step": 1780 }, { "epoch": 1.85, "learning_rate": 2.1316494450822808e-07, "logits/chosen": -2.8917043209075928, "logits/rejected": -2.886598825454712, "logps/chosen": -231.791748046875, "logps/rejected": -216.33157348632812, "loss": 0.5272, "rewards/accuracies": 0.71875, "rewards/chosen": -0.21788883209228516, "rewards/margins": 0.5858948826789856, "rewards/rejected": -0.8037837147712708, "step": 1790 }, { "epoch": 1.86, "learning_rate": 2.1125143513203214e-07, "logits/chosen": -2.9232053756713867, "logits/rejected": -2.853830575942993, "logps/chosen": -258.58123779296875, "logps/rejected": -212.29196166992188, "loss": 0.5372, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.1821872442960739, "rewards/margins": 0.6443108320236206, "rewards/rejected": -0.8264980316162109, "step": 1800 }, { "epoch": 1.87, "learning_rate": 2.093379257558362e-07, "logits/chosen": -2.9652037620544434, "logits/rejected": -2.872067928314209, "logps/chosen": -273.66534423828125, "logps/rejected": -244.85171508789062, "loss": 0.5375, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.1432824283838272, "rewards/margins": 0.6951689720153809, "rewards/rejected": -0.8384513854980469, "step": 1810 }, { "epoch": 1.88, "learning_rate": 2.0742441637964026e-07, "logits/chosen": -2.9519007205963135, "logits/rejected": -2.878570079803467, "logps/chosen": -268.40972900390625, "logps/rejected": -227.53268432617188, "loss": 0.5574, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.2549641728401184, "rewards/margins": 0.5877460241317749, "rewards/rejected": -0.8427101373672485, "step": 1820 }, { "epoch": 1.89, "learning_rate": 2.055109070034443e-07, "logits/chosen": -2.964397430419922, "logits/rejected": -2.8631138801574707, "logps/chosen": -270.572509765625, "logps/rejected": -237.1774139404297, "loss": 0.5297, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.18485566973686218, "rewards/margins": 0.6789379715919495, "rewards/rejected": -0.863793671131134, "step": 1830 }, { "epoch": 1.9, "learning_rate": 2.0359739762724836e-07, "logits/chosen": -2.9794182777404785, "logits/rejected": -2.9060301780700684, "logps/chosen": -264.9988098144531, "logps/rejected": -230.5545196533203, "loss": 0.5527, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.13177114725112915, "rewards/margins": 0.7085077166557312, "rewards/rejected": -0.8402788043022156, "step": 1840 }, { "epoch": 1.91, "learning_rate": 2.0168388825105242e-07, "logits/chosen": -2.945614814758301, "logits/rejected": -2.863433837890625, "logps/chosen": -293.86541748046875, "logps/rejected": -248.41659545898438, "loss": 0.5287, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.15375889837741852, "rewards/margins": 0.6686180830001831, "rewards/rejected": -0.8223770260810852, "step": 1850 }, { "epoch": 1.92, "learning_rate": 1.997703788748565e-07, "logits/chosen": -2.9513707160949707, "logits/rejected": -2.8661911487579346, "logps/chosen": -271.61114501953125, "logps/rejected": -236.2384033203125, "loss": 0.5564, "rewards/accuracies": 0.703125, "rewards/chosen": -0.1368633508682251, "rewards/margins": 0.6556352972984314, "rewards/rejected": -0.7924985885620117, "step": 1860 }, { "epoch": 1.93, "learning_rate": 1.9785686949866055e-07, "logits/chosen": -2.929868698120117, "logits/rejected": -2.884504556655884, "logps/chosen": -247.5407257080078, "logps/rejected": -225.3792266845703, "loss": 0.5396, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.19565653800964355, "rewards/margins": 0.6189417243003845, "rewards/rejected": -0.8145983815193176, "step": 1870 }, { "epoch": 1.94, "learning_rate": 1.9594336012246458e-07, "logits/chosen": -2.9682650566101074, "logits/rejected": -2.867809295654297, "logps/chosen": -289.90716552734375, "logps/rejected": -234.0137939453125, "loss": 0.5642, "rewards/accuracies": 0.6875, "rewards/chosen": -0.23123899102210999, "rewards/margins": 0.6123656034469604, "rewards/rejected": -0.843604564666748, "step": 1880 }, { "epoch": 1.95, "learning_rate": 1.9402985074626865e-07, "logits/chosen": -2.949544906616211, "logits/rejected": -2.896604537963867, "logps/chosen": -256.0013732910156, "logps/rejected": -234.3660888671875, "loss": 0.5228, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.19953693449497223, "rewards/margins": 0.6817682981491089, "rewards/rejected": -0.8813052177429199, "step": 1890 }, { "epoch": 1.96, "learning_rate": 1.921163413700727e-07, "logits/chosen": -2.9528584480285645, "logits/rejected": -2.8806896209716797, "logps/chosen": -303.26580810546875, "logps/rejected": -242.4392547607422, "loss": 0.5174, "rewards/accuracies": 0.734375, "rewards/chosen": -0.10731848329305649, "rewards/margins": 0.7268760204315186, "rewards/rejected": -0.8341944813728333, "step": 1900 }, { "epoch": 1.97, "learning_rate": 1.9020283199387677e-07, "logits/chosen": -2.989190101623535, "logits/rejected": -2.883378267288208, "logps/chosen": -278.9278869628906, "logps/rejected": -235.36630249023438, "loss": 0.5096, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.15197142958641052, "rewards/margins": 0.7376525402069092, "rewards/rejected": -0.8896239399909973, "step": 1910 }, { "epoch": 1.98, "learning_rate": 1.8828932261768083e-07, "logits/chosen": -3.000488758087158, "logits/rejected": -2.928088665008545, "logps/chosen": -272.78717041015625, "logps/rejected": -229.3020782470703, "loss": 0.5455, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.13984522223472595, "rewards/margins": 0.672209620475769, "rewards/rejected": -0.8120548129081726, "step": 1920 }, { "epoch": 1.99, "learning_rate": 1.8637581324148487e-07, "logits/chosen": -2.9804940223693848, "logits/rejected": -2.8817341327667236, "logps/chosen": -277.64569091796875, "logps/rejected": -233.2301788330078, "loss": 0.5144, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10212600231170654, "rewards/margins": 0.7697436213493347, "rewards/rejected": -0.8718697428703308, "step": 1930 }, { "epoch": 2.0, "eval_logits/chosen": -2.970071315765381, "eval_logits/rejected": -2.88126540184021, "eval_logps/chosen": -266.85809326171875, "eval_logps/rejected": -229.24581909179688, "eval_loss": 0.5309731364250183, "eval_rewards/accuracies": 0.7360000014305115, "eval_rewards/chosen": -0.13386200368404388, "eval_rewards/margins": 0.7544430494308472, "eval_rewards/rejected": -0.888305127620697, "eval_runtime": 534.6521, "eval_samples_per_second": 3.741, "eval_steps_per_second": 1.87, "step": 1936 }, { "epoch": 2.0, "learning_rate": 1.8446230386528893e-07, "logits/chosen": -2.8724846839904785, "logits/rejected": -2.832706928253174, "logps/chosen": -281.8897399902344, "logps/rejected": -258.7982482910156, "loss": 0.528, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.12249691784381866, "rewards/margins": 0.6953455209732056, "rewards/rejected": -0.8178424835205078, "step": 1940 }, { "epoch": 2.01, "learning_rate": 1.82548794489093e-07, "logits/chosen": -2.961986541748047, "logits/rejected": -2.8855788707733154, "logps/chosen": -290.3434143066406, "logps/rejected": -245.5084686279297, "loss": 0.5176, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0884619876742363, "rewards/margins": 0.8273890614509583, "rewards/rejected": -0.9158509969711304, "step": 1950 }, { "epoch": 2.02, "learning_rate": 1.8063528511289706e-07, "logits/chosen": -2.9163997173309326, "logits/rejected": -2.8581910133361816, "logps/chosen": -257.60211181640625, "logps/rejected": -231.5768280029297, "loss": 0.5126, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.20751002430915833, "rewards/margins": 0.6879206299781799, "rewards/rejected": -0.8954305648803711, "step": 1960 }, { "epoch": 2.03, "learning_rate": 1.7872177573670112e-07, "logits/chosen": -2.939995288848877, "logits/rejected": -2.8781728744506836, "logps/chosen": -246.7406005859375, "logps/rejected": -225.15170288085938, "loss": 0.5448, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.17099957168102264, "rewards/margins": 0.6288141012191772, "rewards/rejected": -0.7998136878013611, "step": 1970 }, { "epoch": 2.04, "learning_rate": 1.7680826636050515e-07, "logits/chosen": -2.9754204750061035, "logits/rejected": -2.8840906620025635, "logps/chosen": -275.0287170410156, "logps/rejected": -237.91348266601562, "loss": 0.5424, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1663726419210434, "rewards/margins": 0.6445485353469849, "rewards/rejected": -0.8109210729598999, "step": 1980 }, { "epoch": 2.06, "learning_rate": 1.7489475698430921e-07, "logits/chosen": -2.969970464706421, "logits/rejected": -2.8896713256835938, "logps/chosen": -279.9050598144531, "logps/rejected": -220.54638671875, "loss": 0.5357, "rewards/accuracies": 0.734375, "rewards/chosen": -0.09298896789550781, "rewards/margins": 0.739943265914917, "rewards/rejected": -0.83293217420578, "step": 1990 }, { "epoch": 2.07, "learning_rate": 1.7298124760811328e-07, "logits/chosen": -2.936462879180908, "logits/rejected": -2.8681843280792236, "logps/chosen": -261.8529968261719, "logps/rejected": -246.9523468017578, "loss": 0.5444, "rewards/accuracies": 0.671875, "rewards/chosen": -0.14188183844089508, "rewards/margins": 0.5491374135017395, "rewards/rejected": -0.6910191774368286, "step": 2000 }, { "epoch": 2.08, "learning_rate": 1.7106773823191734e-07, "logits/chosen": -2.974335193634033, "logits/rejected": -2.8787505626678467, "logps/chosen": -292.1455383300781, "logps/rejected": -250.3961639404297, "loss": 0.5585, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07010605931282043, "rewards/margins": 0.8234880566596985, "rewards/rejected": -0.893593966960907, "step": 2010 }, { "epoch": 2.09, "learning_rate": 1.691542288557214e-07, "logits/chosen": -2.9863057136535645, "logits/rejected": -2.886086940765381, "logps/chosen": -259.38970947265625, "logps/rejected": -225.5360870361328, "loss": 0.5288, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.1258455216884613, "rewards/margins": 0.6627411842346191, "rewards/rejected": -0.7885867357254028, "step": 2020 }, { "epoch": 2.1, "learning_rate": 1.6724071947952544e-07, "logits/chosen": -2.9508132934570312, "logits/rejected": -2.8835933208465576, "logps/chosen": -289.08050537109375, "logps/rejected": -251.431640625, "loss": 0.5599, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1728096455335617, "rewards/margins": 0.6674363017082214, "rewards/rejected": -0.8402459025382996, "step": 2030 }, { "epoch": 2.11, "learning_rate": 1.653272101033295e-07, "logits/chosen": -2.9639153480529785, "logits/rejected": -2.8938446044921875, "logps/chosen": -249.5262451171875, "logps/rejected": -229.63967895507812, "loss": 0.5623, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.17938874661922455, "rewards/margins": 0.5850684642791748, "rewards/rejected": -0.7644572257995605, "step": 2040 }, { "epoch": 2.12, "learning_rate": 1.6341370072713356e-07, "logits/chosen": -2.977783679962158, "logits/rejected": -2.904069423675537, "logps/chosen": -284.6417236328125, "logps/rejected": -234.9993896484375, "loss": 0.5208, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.13995984196662903, "rewards/margins": 0.7568376660346985, "rewards/rejected": -0.8967974781990051, "step": 2050 }, { "epoch": 2.13, "learning_rate": 1.6150019135093762e-07, "logits/chosen": -2.9578676223754883, "logits/rejected": -2.877027750015259, "logps/chosen": -256.90582275390625, "logps/rejected": -231.245849609375, "loss": 0.5308, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.15393325686454773, "rewards/margins": 0.705695629119873, "rewards/rejected": -0.8596287965774536, "step": 2060 }, { "epoch": 2.14, "learning_rate": 1.5958668197474169e-07, "logits/chosen": -2.9002552032470703, "logits/rejected": -2.8463079929351807, "logps/chosen": -257.6904296875, "logps/rejected": -237.1425323486328, "loss": 0.5517, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.15610209107398987, "rewards/margins": 0.691413402557373, "rewards/rejected": -0.8475154638290405, "step": 2070 }, { "epoch": 2.15, "learning_rate": 1.5767317259854572e-07, "logits/chosen": -2.9789271354675293, "logits/rejected": -2.9140219688415527, "logps/chosen": -281.47113037109375, "logps/rejected": -232.6070098876953, "loss": 0.5355, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.1560235619544983, "rewards/margins": 0.6931401491165161, "rewards/rejected": -0.849163830280304, "step": 2080 }, { "epoch": 2.16, "learning_rate": 1.5575966322234978e-07, "logits/chosen": -3.026348829269409, "logits/rejected": -2.9293127059936523, "logps/chosen": -289.7508850097656, "logps/rejected": -240.6488494873047, "loss": 0.5348, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.10115332901477814, "rewards/margins": 0.763477623462677, "rewards/rejected": -0.8646309971809387, "step": 2090 }, { "epoch": 2.17, "learning_rate": 1.5384615384615385e-07, "logits/chosen": -2.981022357940674, "logits/rejected": -2.9064507484436035, "logps/chosen": -250.6385040283203, "logps/rejected": -212.89682006835938, "loss": 0.5211, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.19691713154315948, "rewards/margins": 0.670680046081543, "rewards/rejected": -0.8675972819328308, "step": 2100 }, { "epoch": 2.18, "learning_rate": 1.519326444699579e-07, "logits/chosen": -3.002115249633789, "logits/rejected": -2.9286153316497803, "logps/chosen": -297.5616760253906, "logps/rejected": -259.6432800292969, "loss": 0.55, "rewards/accuracies": 0.734375, "rewards/chosen": -0.17621631920337677, "rewards/margins": 0.7271560430526733, "rewards/rejected": -0.9033724069595337, "step": 2110 }, { "epoch": 2.19, "learning_rate": 1.5001913509376197e-07, "logits/chosen": -2.9069252014160156, "logits/rejected": -2.8448452949523926, "logps/chosen": -258.4548034667969, "logps/rejected": -225.63473510742188, "loss": 0.5184, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.21923212707042694, "rewards/margins": 0.6920791268348694, "rewards/rejected": -0.9113112688064575, "step": 2120 }, { "epoch": 2.2, "learning_rate": 1.4810562571756603e-07, "logits/chosen": -2.9312896728515625, "logits/rejected": -2.8941497802734375, "logps/chosen": -269.02197265625, "logps/rejected": -255.03750610351562, "loss": 0.5414, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.1833897829055786, "rewards/margins": 0.6754493713378906, "rewards/rejected": -0.8588391542434692, "step": 2130 }, { "epoch": 2.21, "learning_rate": 1.4619211634137007e-07, "logits/chosen": -2.9885783195495605, "logits/rejected": -2.890796184539795, "logps/chosen": -291.0990905761719, "logps/rejected": -219.19259643554688, "loss": 0.5223, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11602451652288437, "rewards/margins": 0.7560437321662903, "rewards/rejected": -0.8720682263374329, "step": 2140 }, { "epoch": 2.22, "learning_rate": 1.4427860696517413e-07, "logits/chosen": -2.92712140083313, "logits/rejected": -2.856602907180786, "logps/chosen": -227.8787078857422, "logps/rejected": -212.1920166015625, "loss": 0.5395, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.20537757873535156, "rewards/margins": 0.7048485279083252, "rewards/rejected": -0.9102262258529663, "step": 2150 }, { "epoch": 2.23, "learning_rate": 1.423650975889782e-07, "logits/chosen": -2.943385362625122, "logits/rejected": -2.8664638996124268, "logps/chosen": -253.6864776611328, "logps/rejected": -239.3394775390625, "loss": 0.5271, "rewards/accuracies": 0.734375, "rewards/chosen": -0.16308438777923584, "rewards/margins": 0.7045339345932007, "rewards/rejected": -0.8676183819770813, "step": 2160 }, { "epoch": 2.24, "learning_rate": 1.4045158821278225e-07, "logits/chosen": -2.92087459564209, "logits/rejected": -2.8633506298065186, "logps/chosen": -283.1237487792969, "logps/rejected": -248.60183715820312, "loss": 0.5294, "rewards/accuracies": 0.75, "rewards/chosen": -0.15694789588451385, "rewards/margins": 0.7922587394714355, "rewards/rejected": -0.9492067098617554, "step": 2170 }, { "epoch": 2.25, "learning_rate": 1.3853807883658632e-07, "logits/chosen": -2.9774370193481445, "logits/rejected": -2.9321320056915283, "logps/chosen": -261.2349853515625, "logps/rejected": -233.4268035888672, "loss": 0.556, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.11548130214214325, "rewards/margins": 0.6996491551399231, "rewards/rejected": -0.8151304125785828, "step": 2180 }, { "epoch": 2.26, "learning_rate": 1.3662456946039035e-07, "logits/chosen": -2.9686853885650635, "logits/rejected": -2.9019927978515625, "logps/chosen": -268.39898681640625, "logps/rejected": -237.2878875732422, "loss": 0.5273, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.12266399711370468, "rewards/margins": 0.7371145486831665, "rewards/rejected": -0.8597785830497742, "step": 2190 }, { "epoch": 2.27, "learning_rate": 1.3471106008419441e-07, "logits/chosen": -2.9438366889953613, "logits/rejected": -2.8742666244506836, "logps/chosen": -281.9891052246094, "logps/rejected": -231.9353790283203, "loss": 0.5076, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.055067289620637894, "rewards/margins": 0.793497622013092, "rewards/rejected": -0.8485648036003113, "step": 2200 }, { "epoch": 2.28, "learning_rate": 1.3279755070799848e-07, "logits/chosen": -2.9759907722473145, "logits/rejected": -2.9127907752990723, "logps/chosen": -271.8773193359375, "logps/rejected": -236.8974151611328, "loss": 0.5401, "rewards/accuracies": 0.75, "rewards/chosen": -0.15906117856502533, "rewards/margins": 0.6746315956115723, "rewards/rejected": -0.833692729473114, "step": 2210 }, { "epoch": 2.29, "learning_rate": 1.3088404133180254e-07, "logits/chosen": -2.95007586479187, "logits/rejected": -2.8902575969696045, "logps/chosen": -270.50225830078125, "logps/rejected": -242.41696166992188, "loss": 0.5442, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.1311570107936859, "rewards/margins": 0.6697767972946167, "rewards/rejected": -0.800933837890625, "step": 2220 }, { "epoch": 2.3, "learning_rate": 1.289705319556066e-07, "logits/chosen": -2.9404823780059814, "logits/rejected": -2.8883235454559326, "logps/chosen": -261.9150085449219, "logps/rejected": -246.35415649414062, "loss": 0.5146, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.14432227611541748, "rewards/margins": 0.7156716585159302, "rewards/rejected": -0.8599939346313477, "step": 2230 }, { "epoch": 2.31, "learning_rate": 1.2705702257941064e-07, "logits/chosen": -2.9673657417297363, "logits/rejected": -2.8869872093200684, "logps/chosen": -265.51739501953125, "logps/rejected": -219.9501953125, "loss": 0.5356, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11545280367136002, "rewards/margins": 0.6775437593460083, "rewards/rejected": -0.7929965257644653, "step": 2240 }, { "epoch": 2.32, "learning_rate": 1.251435132032147e-07, "logits/chosen": -2.9037842750549316, "logits/rejected": -2.8676371574401855, "logps/chosen": -258.1905822753906, "logps/rejected": -230.92578125, "loss": 0.5343, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.12128762900829315, "rewards/margins": 0.6737414002418518, "rewards/rejected": -0.7950290441513062, "step": 2250 }, { "epoch": 2.33, "learning_rate": 1.2323000382701873e-07, "logits/chosen": -2.9720873832702637, "logits/rejected": -2.88736891746521, "logps/chosen": -259.537841796875, "logps/rejected": -232.0224151611328, "loss": 0.5267, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1201951876282692, "rewards/margins": 0.7632410526275635, "rewards/rejected": -0.8834362030029297, "step": 2260 }, { "epoch": 2.34, "learning_rate": 1.213164944508228e-07, "logits/chosen": -2.9495997428894043, "logits/rejected": -2.8767104148864746, "logps/chosen": -278.3399963378906, "logps/rejected": -229.6492462158203, "loss": 0.5242, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.14368261396884918, "rewards/margins": 0.7504141330718994, "rewards/rejected": -0.8940967321395874, "step": 2270 }, { "epoch": 2.35, "learning_rate": 1.1940298507462686e-07, "logits/chosen": -2.962231397628784, "logits/rejected": -2.868900775909424, "logps/chosen": -256.718017578125, "logps/rejected": -216.90011596679688, "loss": 0.5339, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.16063788533210754, "rewards/margins": 0.724357545375824, "rewards/rejected": -0.8849954605102539, "step": 2280 }, { "epoch": 2.37, "learning_rate": 1.1748947569843092e-07, "logits/chosen": -2.955109119415283, "logits/rejected": -2.850020170211792, "logps/chosen": -275.18505859375, "logps/rejected": -227.7105712890625, "loss": 0.5382, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10567112267017365, "rewards/margins": 0.796221911907196, "rewards/rejected": -0.9018930196762085, "step": 2290 }, { "epoch": 2.38, "learning_rate": 1.1557596632223497e-07, "logits/chosen": -2.934636354446411, "logits/rejected": -2.8569560050964355, "logps/chosen": -253.3321990966797, "logps/rejected": -231.0680694580078, "loss": 0.5532, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.11163314431905746, "rewards/margins": 0.8173502087593079, "rewards/rejected": -0.9289833307266235, "step": 2300 }, { "epoch": 2.39, "learning_rate": 1.1366245694603903e-07, "logits/chosen": -2.9444034099578857, "logits/rejected": -2.884538173675537, "logps/chosen": -277.43206787109375, "logps/rejected": -233.67788696289062, "loss": 0.5262, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.16489477455615997, "rewards/margins": 0.730253279209137, "rewards/rejected": -0.8951480984687805, "step": 2310 }, { "epoch": 2.4, "learning_rate": 1.1174894756984308e-07, "logits/chosen": -2.9340274333953857, "logits/rejected": -2.8671398162841797, "logps/chosen": -268.8190002441406, "logps/rejected": -229.046142578125, "loss": 0.4942, "rewards/accuracies": 0.75, "rewards/chosen": -0.09764869511127472, "rewards/margins": 0.7610483765602112, "rewards/rejected": -0.8586970567703247, "step": 2320 }, { "epoch": 2.41, "learning_rate": 1.0983543819364714e-07, "logits/chosen": -2.9890122413635254, "logits/rejected": -2.9282517433166504, "logps/chosen": -272.053466796875, "logps/rejected": -236.0382080078125, "loss": 0.5416, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.16581197082996368, "rewards/margins": 0.6450649499893188, "rewards/rejected": -0.8108768463134766, "step": 2330 }, { "epoch": 2.42, "learning_rate": 1.079219288174512e-07, "logits/chosen": -2.939635753631592, "logits/rejected": -2.8863718509674072, "logps/chosen": -261.1109619140625, "logps/rejected": -251.5067596435547, "loss": 0.5412, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1695874184370041, "rewards/margins": 0.6655629873275757, "rewards/rejected": -0.835150420665741, "step": 2340 }, { "epoch": 2.43, "learning_rate": 1.0600841944125525e-07, "logits/chosen": -2.9380459785461426, "logits/rejected": -2.8458609580993652, "logps/chosen": -268.99334716796875, "logps/rejected": -239.75765991210938, "loss": 0.5268, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.08941121399402618, "rewards/margins": 0.7053884267807007, "rewards/rejected": -0.7947996854782104, "step": 2350 }, { "epoch": 2.44, "learning_rate": 1.0409491006505931e-07, "logits/chosen": -2.929069995880127, "logits/rejected": -2.8647570610046387, "logps/chosen": -265.7629089355469, "logps/rejected": -231.3975067138672, "loss": 0.5552, "rewards/accuracies": 0.71875, "rewards/chosen": -0.1444084644317627, "rewards/margins": 0.7084876894950867, "rewards/rejected": -0.8528962135314941, "step": 2360 }, { "epoch": 2.45, "learning_rate": 1.0218140068886336e-07, "logits/chosen": -2.942918300628662, "logits/rejected": -2.8606324195861816, "logps/chosen": -280.6001281738281, "logps/rejected": -239.6103515625, "loss": 0.5436, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08323343843221664, "rewards/margins": 0.7913548350334167, "rewards/rejected": -0.8745881915092468, "step": 2370 }, { "epoch": 2.46, "learning_rate": 1.0026789131266743e-07, "logits/chosen": -2.9849934577941895, "logits/rejected": -2.8983778953552246, "logps/chosen": -271.6095886230469, "logps/rejected": -224.18411254882812, "loss": 0.5058, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.13086125254631042, "rewards/margins": 0.707243025302887, "rewards/rejected": -0.8381043672561646, "step": 2380 }, { "epoch": 2.47, "learning_rate": 9.835438193647149e-08, "logits/chosen": -2.9231324195861816, "logits/rejected": -2.838575839996338, "logps/chosen": -280.83282470703125, "logps/rejected": -237.5975799560547, "loss": 0.533, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.15010377764701843, "rewards/margins": 0.7357429265975952, "rewards/rejected": -0.885846734046936, "step": 2390 }, { "epoch": 2.48, "learning_rate": 9.644087256027554e-08, "logits/chosen": -2.934373617172241, "logits/rejected": -2.881347179412842, "logps/chosen": -260.05584716796875, "logps/rejected": -235.1454315185547, "loss": 0.5208, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.12409849464893341, "rewards/margins": 0.765944242477417, "rewards/rejected": -0.890042781829834, "step": 2400 }, { "epoch": 2.49, "learning_rate": 9.45273631840796e-08, "logits/chosen": -2.9793200492858887, "logits/rejected": -2.9084548950195312, "logps/chosen": -281.88775634765625, "logps/rejected": -223.9385986328125, "loss": 0.5024, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -0.10361263900995255, "rewards/margins": 0.8771883845329285, "rewards/rejected": -0.980800986289978, "step": 2410 }, { "epoch": 2.5, "learning_rate": 9.261385380788366e-08, "logits/chosen": -2.976109504699707, "logits/rejected": -2.869077444076538, "logps/chosen": -305.3400573730469, "logps/rejected": -238.59024047851562, "loss": 0.4966, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.06264562904834747, "rewards/margins": 0.8607932329177856, "rewards/rejected": -0.9234389066696167, "step": 2420 }, { "epoch": 2.51, "learning_rate": 9.070034443168771e-08, "logits/chosen": -2.938960552215576, "logits/rejected": -2.8416309356689453, "logps/chosen": -286.1895751953125, "logps/rejected": -242.035400390625, "loss": 0.5433, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.19678226113319397, "rewards/margins": 0.6983063817024231, "rewards/rejected": -0.8950886726379395, "step": 2430 }, { "epoch": 2.52, "learning_rate": 8.878683505549177e-08, "logits/chosen": -2.933335542678833, "logits/rejected": -2.8738999366760254, "logps/chosen": -260.69140625, "logps/rejected": -235.0953369140625, "loss": 0.5432, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.09208143502473831, "rewards/margins": 0.7227004766464233, "rewards/rejected": -0.8147819638252258, "step": 2440 }, { "epoch": 2.53, "learning_rate": 8.687332567929582e-08, "logits/chosen": -2.924715518951416, "logits/rejected": -2.836571216583252, "logps/chosen": -255.515625, "logps/rejected": -203.3109588623047, "loss": 0.5284, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.10294497013092041, "rewards/margins": 0.7001079320907593, "rewards/rejected": -0.8030529022216797, "step": 2450 }, { "epoch": 2.54, "learning_rate": 8.495981630309988e-08, "logits/chosen": -2.9428200721740723, "logits/rejected": -2.848986864089966, "logps/chosen": -249.107177734375, "logps/rejected": -229.2167205810547, "loss": 0.5117, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -0.11484906822443008, "rewards/margins": 0.7457095980644226, "rewards/rejected": -0.8605585098266602, "step": 2460 }, { "epoch": 2.55, "learning_rate": 8.304630692690395e-08, "logits/chosen": -2.988035202026367, "logits/rejected": -2.8862037658691406, "logps/chosen": -278.4859924316406, "logps/rejected": -240.38327026367188, "loss": 0.4976, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.11573171615600586, "rewards/margins": 0.826618492603302, "rewards/rejected": -0.9423500895500183, "step": 2470 }, { "epoch": 2.56, "learning_rate": 8.1132797550708e-08, "logits/chosen": -2.963351011276245, "logits/rejected": -2.8760385513305664, "logps/chosen": -292.64678955078125, "logps/rejected": -233.5381622314453, "loss": 0.5371, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.10241115093231201, "rewards/margins": 0.7552922368049622, "rewards/rejected": -0.8577033877372742, "step": 2480 }, { "epoch": 2.57, "learning_rate": 7.921928817451206e-08, "logits/chosen": -2.9652249813079834, "logits/rejected": -2.894345998764038, "logps/chosen": -277.359619140625, "logps/rejected": -226.2375030517578, "loss": 0.5303, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.11767537891864777, "rewards/margins": 0.742813766002655, "rewards/rejected": -0.8604891896247864, "step": 2490 }, { "epoch": 2.58, "learning_rate": 7.73057787983161e-08, "logits/chosen": -2.955096483230591, "logits/rejected": -2.854684829711914, "logps/chosen": -263.79534912109375, "logps/rejected": -233.5551300048828, "loss": 0.5205, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.15160967409610748, "rewards/margins": 0.7750263214111328, "rewards/rejected": -0.9266360402107239, "step": 2500 }, { "epoch": 2.59, "learning_rate": 7.539226942212017e-08, "logits/chosen": -2.958127498626709, "logits/rejected": -2.842663049697876, "logps/chosen": -279.0047912597656, "logps/rejected": -221.5785369873047, "loss": 0.5271, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.22116145491600037, "rewards/margins": 0.7069432735443115, "rewards/rejected": -0.9281047582626343, "step": 2510 }, { "epoch": 2.6, "learning_rate": 7.347876004592423e-08, "logits/chosen": -2.9273300170898438, "logits/rejected": -2.8234877586364746, "logps/chosen": -265.7668151855469, "logps/rejected": -231.17236328125, "loss": 0.5272, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.19352500140666962, "rewards/margins": 0.7516986131668091, "rewards/rejected": -0.9452236294746399, "step": 2520 }, { "epoch": 2.61, "learning_rate": 7.156525066972828e-08, "logits/chosen": -2.9410266876220703, "logits/rejected": -2.850593090057373, "logps/chosen": -255.95846557617188, "logps/rejected": -221.533203125, "loss": 0.5232, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.19206023216247559, "rewards/margins": 0.7426298260688782, "rewards/rejected": -0.9346901178359985, "step": 2530 }, { "epoch": 2.62, "learning_rate": 6.965174129353234e-08, "logits/chosen": -2.980755090713501, "logits/rejected": -2.8749125003814697, "logps/chosen": -265.9952087402344, "logps/rejected": -238.63906860351562, "loss": 0.4901, "rewards/accuracies": 0.78125, "rewards/chosen": -0.21229712665081024, "rewards/margins": 0.739254891872406, "rewards/rejected": -0.9515520334243774, "step": 2540 }, { "epoch": 2.63, "learning_rate": 6.773823191733639e-08, "logits/chosen": -2.9288721084594727, "logits/rejected": -2.8336708545684814, "logps/chosen": -274.4996032714844, "logps/rejected": -233.5682830810547, "loss": 0.5381, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.20412831008434296, "rewards/margins": 0.6856569051742554, "rewards/rejected": -0.8897852897644043, "step": 2550 }, { "epoch": 2.64, "learning_rate": 6.582472254114045e-08, "logits/chosen": -2.9642467498779297, "logits/rejected": -2.8336422443389893, "logps/chosen": -256.45233154296875, "logps/rejected": -203.57568359375, "loss": 0.5228, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.13208219408988953, "rewards/margins": 0.7883843779563904, "rewards/rejected": -0.9204666018486023, "step": 2560 }, { "epoch": 2.65, "learning_rate": 6.391121316494451e-08, "logits/chosen": -2.955697536468506, "logits/rejected": -2.857544422149658, "logps/chosen": -260.65631103515625, "logps/rejected": -230.7210235595703, "loss": 0.5441, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.14992466568946838, "rewards/margins": 0.7190058827400208, "rewards/rejected": -0.8689305186271667, "step": 2570 }, { "epoch": 2.66, "learning_rate": 6.199770378874856e-08, "logits/chosen": -2.9087677001953125, "logits/rejected": -2.8678908348083496, "logps/chosen": -243.47494506835938, "logps/rejected": -231.0225830078125, "loss": 0.5496, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.16769416630268097, "rewards/margins": 0.6585701704025269, "rewards/rejected": -0.8262642621994019, "step": 2580 }, { "epoch": 2.68, "learning_rate": 6.008419441255262e-08, "logits/chosen": -2.8695666790008545, "logits/rejected": -2.8333706855773926, "logps/chosen": -262.6031188964844, "logps/rejected": -240.7636260986328, "loss": 0.5332, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.13960261642932892, "rewards/margins": 0.7401096820831299, "rewards/rejected": -0.8797122836112976, "step": 2590 }, { "epoch": 2.69, "learning_rate": 5.817068503635668e-08, "logits/chosen": -2.9540772438049316, "logits/rejected": -2.851736068725586, "logps/chosen": -265.93572998046875, "logps/rejected": -223.3280487060547, "loss": 0.5262, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.2117738276720047, "rewards/margins": 0.6925970911979675, "rewards/rejected": -0.904370903968811, "step": 2600 }, { "epoch": 2.7, "learning_rate": 5.6257175660160735e-08, "logits/chosen": -2.9979212284088135, "logits/rejected": -2.892470121383667, "logps/chosen": -272.4714660644531, "logps/rejected": -232.4675750732422, "loss": 0.5245, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.10861802101135254, "rewards/margins": 0.7476091384887695, "rewards/rejected": -0.8562272191047668, "step": 2610 }, { "epoch": 2.71, "learning_rate": 5.4343666283964784e-08, "logits/chosen": -2.9274086952209473, "logits/rejected": -2.845494031906128, "logps/chosen": -277.07208251953125, "logps/rejected": -246.3750457763672, "loss": 0.5335, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.1164095550775528, "rewards/margins": 0.6775155067443848, "rewards/rejected": -0.7939250469207764, "step": 2620 }, { "epoch": 2.72, "learning_rate": 5.243015690776884e-08, "logits/chosen": -2.984679698944092, "logits/rejected": -2.8856422901153564, "logps/chosen": -280.80181884765625, "logps/rejected": -229.8297119140625, "loss": 0.5404, "rewards/accuracies": 0.734375, "rewards/chosen": -0.15697725117206573, "rewards/margins": 0.6752403974533081, "rewards/rejected": -0.8322175741195679, "step": 2630 }, { "epoch": 2.73, "learning_rate": 5.05166475315729e-08, "logits/chosen": -2.944037675857544, "logits/rejected": -2.868621349334717, "logps/chosen": -261.1282653808594, "logps/rejected": -230.9731903076172, "loss": 0.5323, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1285724937915802, "rewards/margins": 0.7452605962753296, "rewards/rejected": -0.8738330602645874, "step": 2640 }, { "epoch": 2.74, "learning_rate": 4.860313815537696e-08, "logits/chosen": -2.9450154304504395, "logits/rejected": -2.869443655014038, "logps/chosen": -268.24652099609375, "logps/rejected": -222.32467651367188, "loss": 0.5108, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -0.08030074089765549, "rewards/margins": 0.8575320243835449, "rewards/rejected": -0.9378327131271362, "step": 2650 }, { "epoch": 2.75, "learning_rate": 4.668962877918101e-08, "logits/chosen": -2.9015612602233887, "logits/rejected": -2.8147647380828857, "logps/chosen": -257.51983642578125, "logps/rejected": -224.19351196289062, "loss": 0.5193, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08951184153556824, "rewards/margins": 0.7409173250198364, "rewards/rejected": -0.830429196357727, "step": 2660 }, { "epoch": 2.76, "learning_rate": 4.477611940298507e-08, "logits/chosen": -2.9334254264831543, "logits/rejected": -2.8779664039611816, "logps/chosen": -265.6736755371094, "logps/rejected": -227.59774780273438, "loss": 0.5563, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.22317901253700256, "rewards/margins": 0.6145828366279602, "rewards/rejected": -0.8377618789672852, "step": 2670 }, { "epoch": 2.77, "learning_rate": 4.2862610026789124e-08, "logits/chosen": -2.878678560256958, "logits/rejected": -2.8222813606262207, "logps/chosen": -266.82073974609375, "logps/rejected": -228.44277954101562, "loss": 0.5107, "rewards/accuracies": 0.778124988079071, "rewards/chosen": -0.09435267746448517, "rewards/margins": 0.7934114336967468, "rewards/rejected": -0.8877641558647156, "step": 2680 }, { "epoch": 2.78, "learning_rate": 4.0949100650593186e-08, "logits/chosen": -2.9601547718048096, "logits/rejected": -2.9238483905792236, "logps/chosen": -268.99212646484375, "logps/rejected": -232.2578887939453, "loss": 0.5635, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.18497256934642792, "rewards/margins": 0.6496366858482361, "rewards/rejected": -0.83460932970047, "step": 2690 }, { "epoch": 2.79, "learning_rate": 3.903559127439724e-08, "logits/chosen": -2.937476396560669, "logits/rejected": -2.8699138164520264, "logps/chosen": -248.3408203125, "logps/rejected": -229.4420623779297, "loss": 0.5542, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1882086992263794, "rewards/margins": 0.6778735518455505, "rewards/rejected": -0.8660823106765747, "step": 2700 }, { "epoch": 2.8, "learning_rate": 3.71220818982013e-08, "logits/chosen": -3.002821683883667, "logits/rejected": -2.896698474884033, "logps/chosen": -274.83868408203125, "logps/rejected": -233.81494140625, "loss": 0.5313, "rewards/accuracies": 0.71875, "rewards/chosen": -0.1691424548625946, "rewards/margins": 0.6944646239280701, "rewards/rejected": -0.8636070489883423, "step": 2710 }, { "epoch": 2.81, "learning_rate": 3.520857252200535e-08, "logits/chosen": -2.915297746658325, "logits/rejected": -2.8548359870910645, "logps/chosen": -249.73062133789062, "logps/rejected": -222.02737426757812, "loss": 0.5164, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.1390371024608612, "rewards/margins": 0.7162588834762573, "rewards/rejected": -0.8552959561347961, "step": 2720 }, { "epoch": 2.82, "learning_rate": 3.3295063145809414e-08, "logits/chosen": -2.9932022094726562, "logits/rejected": -2.916374921798706, "logps/chosen": -254.5859375, "logps/rejected": -236.6666259765625, "loss": 0.5182, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.18689362704753876, "rewards/margins": 0.7170951962471008, "rewards/rejected": -0.9039888381958008, "step": 2730 }, { "epoch": 2.83, "learning_rate": 3.138155376961347e-08, "logits/chosen": -2.9603562355041504, "logits/rejected": -2.8859400749206543, "logps/chosen": -262.5657958984375, "logps/rejected": -230.6494140625, "loss": 0.5269, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21314844489097595, "rewards/margins": 0.687771737575531, "rewards/rejected": -0.9009200930595398, "step": 2740 }, { "epoch": 2.84, "learning_rate": 2.9468044393417525e-08, "logits/chosen": -2.9425268173217773, "logits/rejected": -2.8892085552215576, "logps/chosen": -245.33486938476562, "logps/rejected": -219.4418182373047, "loss": 0.5324, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.19100527465343475, "rewards/margins": 0.6618567705154419, "rewards/rejected": -0.8528621792793274, "step": 2750 }, { "epoch": 2.85, "learning_rate": 2.755453501722158e-08, "logits/chosen": -2.909280776977539, "logits/rejected": -2.8858840465545654, "logps/chosen": -233.84860229492188, "logps/rejected": -226.67428588867188, "loss": 0.5113, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.19184017181396484, "rewards/margins": 0.6737228035926819, "rewards/rejected": -0.865562915802002, "step": 2760 }, { "epoch": 2.86, "learning_rate": 2.564102564102564e-08, "logits/chosen": -2.903656482696533, "logits/rejected": -2.82205867767334, "logps/chosen": -258.58551025390625, "logps/rejected": -206.7924041748047, "loss": 0.5331, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.16824088990688324, "rewards/margins": 0.7211980819702148, "rewards/rejected": -0.8894389271736145, "step": 2770 }, { "epoch": 2.87, "learning_rate": 2.3727516264829695e-08, "logits/chosen": -2.992114543914795, "logits/rejected": -2.8973028659820557, "logps/chosen": -270.86248779296875, "logps/rejected": -243.87826538085938, "loss": 0.53, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.13685011863708496, "rewards/margins": 0.7292468547821045, "rewards/rejected": -0.8660969734191895, "step": 2780 }, { "epoch": 2.88, "learning_rate": 2.1814006888633754e-08, "logits/chosen": -2.9266510009765625, "logits/rejected": -2.8487558364868164, "logps/chosen": -276.3330078125, "logps/rejected": -229.3681182861328, "loss": 0.5501, "rewards/accuracies": 0.703125, "rewards/chosen": -0.24254301190376282, "rewards/margins": 0.6456094980239868, "rewards/rejected": -0.8881524801254272, "step": 2790 }, { "epoch": 2.89, "learning_rate": 1.990049751243781e-08, "logits/chosen": -2.964275598526001, "logits/rejected": -2.8702287673950195, "logps/chosen": -260.6219177246094, "logps/rejected": -235.5171661376953, "loss": 0.5428, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.20831432938575745, "rewards/margins": 0.6643073558807373, "rewards/rejected": -0.8726216554641724, "step": 2800 }, { "epoch": 2.9, "learning_rate": 1.7986988136241865e-08, "logits/chosen": -2.9846198558807373, "logits/rejected": -2.905627727508545, "logps/chosen": -270.52923583984375, "logps/rejected": -231.1419677734375, "loss": 0.5301, "rewards/accuracies": 0.75, "rewards/chosen": -0.13217493891716003, "rewards/margins": 0.8096184730529785, "rewards/rejected": -0.9417934417724609, "step": 2810 }, { "epoch": 2.91, "learning_rate": 1.6073478760045924e-08, "logits/chosen": -2.9574029445648193, "logits/rejected": -2.872276782989502, "logps/chosen": -297.28192138671875, "logps/rejected": -245.7416534423828, "loss": 0.5405, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.14524348080158234, "rewards/margins": 0.6443182229995728, "rewards/rejected": -0.7895617485046387, "step": 2820 }, { "epoch": 2.92, "learning_rate": 1.4159969383849981e-08, "logits/chosen": -2.93812894821167, "logits/rejected": -2.873464584350586, "logps/chosen": -258.6783142089844, "logps/rejected": -237.9629669189453, "loss": 0.5411, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15294918417930603, "rewards/margins": 0.6953471302986145, "rewards/rejected": -0.8482963442802429, "step": 2830 }, { "epoch": 2.93, "learning_rate": 1.2246460007654037e-08, "logits/chosen": -2.9222159385681152, "logits/rejected": -2.85912823677063, "logps/chosen": -254.00830078125, "logps/rejected": -225.37673950195312, "loss": 0.5528, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.21158838272094727, "rewards/margins": 0.633982241153717, "rewards/rejected": -0.8455705642700195, "step": 2840 }, { "epoch": 2.94, "learning_rate": 1.0332950631458094e-08, "logits/chosen": -2.9641215801239014, "logits/rejected": -2.85703182220459, "logps/chosen": -288.60205078125, "logps/rejected": -226.69711303710938, "loss": 0.5268, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.20843060314655304, "rewards/margins": 0.699650764465332, "rewards/rejected": -0.9080813527107239, "step": 2850 }, { "epoch": 2.95, "learning_rate": 8.419441255262151e-09, "logits/chosen": -2.9417290687561035, "logits/rejected": -2.8945915699005127, "logps/chosen": -264.1558532714844, "logps/rejected": -241.14175415039062, "loss": 0.5279, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19092315435409546, "rewards/margins": 0.7110624313354492, "rewards/rejected": -0.9019856452941895, "step": 2860 }, { "epoch": 2.96, "learning_rate": 6.505931879066207e-09, "logits/chosen": -2.964904546737671, "logits/rejected": -2.8833911418914795, "logps/chosen": -304.9076232910156, "logps/rejected": -246.4352569580078, "loss": 0.5096, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10954318940639496, "rewards/margins": 0.7617713809013367, "rewards/rejected": -0.8713146448135376, "step": 2870 }, { "epoch": 2.97, "learning_rate": 4.592422502870264e-09, "logits/chosen": -2.994156837463379, "logits/rejected": -2.8914027214050293, "logps/chosen": -280.44158935546875, "logps/rejected": -231.46670532226562, "loss": 0.5116, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16673938930034637, "rewards/margins": 0.7518764138221741, "rewards/rejected": -0.9186156988143921, "step": 2880 }, { "epoch": 2.98, "learning_rate": 2.6789131266743202e-09, "logits/chosen": -2.9862735271453857, "logits/rejected": -2.914008617401123, "logps/chosen": -266.1647033691406, "logps/rejected": -229.02548217773438, "loss": 0.5345, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.13051697611808777, "rewards/margins": 0.6995090246200562, "rewards/rejected": -0.8300260305404663, "step": 2890 }, { "epoch": 3.0, "learning_rate": 7.654037504783773e-10, "logits/chosen": -2.9640724658966064, "logits/rejected": -2.8667378425598145, "logps/chosen": -284.4615478515625, "logps/rejected": -240.8423614501953, "loss": 0.518, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07154907286167145, "rewards/margins": 0.8137946128845215, "rewards/rejected": -0.8853437304496765, "step": 2900 }, { "epoch": 3.0, "eval_logits/chosen": -2.9668068885803223, "eval_logits/rejected": -2.877352237701416, "eval_logps/chosen": -266.8033752441406, "eval_logps/rejected": -229.52769470214844, "eval_loss": 0.5254282355308533, "eval_rewards/accuracies": 0.7400000095367432, "eval_rewards/chosen": -0.12839017808437347, "eval_rewards/margins": 0.7881025671958923, "eval_rewards/rejected": -0.9164927005767822, "eval_runtime": 536.7366, "eval_samples_per_second": 3.726, "eval_steps_per_second": 1.863, "step": 2904 }, { "epoch": 3.0, "step": 2904, "total_flos": 0.0, "train_loss": 0.5633651232629111, "train_runtime": 75701.2005, "train_samples_per_second": 2.456, "train_steps_per_second": 0.038 } ], "logging_steps": 10, "max_steps": 2904, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }