{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 7000, "global_step": 16770, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.981514609421586e-09, "logits/chosen": -3.0218682289123535, "logits/rejected": -2.936706066131592, "logps/chosen": -67.43382263183594, "logps/rejected": -38.91685485839844, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.9815146094215865e-08, "logits/chosen": -2.962446451187134, "logits/rejected": -2.947007894515991, "logps/chosen": -56.465572357177734, "logps/rejected": -37.82605743408203, "loss": 0.6931, "rewards/accuracies": 0.4722222089767456, "rewards/chosen": 5.504820728674531e-05, "rewards/margins": 0.00028492926503531635, "rewards/rejected": -0.00022988107230048627, "step": 10 }, { "epoch": 0.0, "learning_rate": 5.963029218843173e-08, "logits/chosen": -2.960874080657959, "logits/rejected": -2.9500772953033447, "logps/chosen": -68.88651275634766, "logps/rejected": -38.4207649230957, "loss": 0.6931, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.00011163230374222621, "rewards/margins": 0.0002123880694853142, "rewards/rejected": -0.00032402039505541325, "step": 20 }, { "epoch": 0.0, "learning_rate": 8.94454382826476e-08, "logits/chosen": -2.941279172897339, "logits/rejected": -2.943176746368408, "logps/chosen": -63.398460388183594, "logps/rejected": -37.388614654541016, "loss": 0.6932, "rewards/accuracies": 0.5, "rewards/chosen": -0.00021476647816598415, "rewards/margins": -6.91671302774921e-05, "rewards/rejected": -0.0001455993769923225, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.1926058437686346e-07, "logits/chosen": -2.9860801696777344, "logits/rejected": -2.9472920894622803, "logps/chosen": -62.9001579284668, "logps/rejected": -36.533016204833984, "loss": 0.6928, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0003909949737135321, "rewards/margins": 0.0002806334232445806, "rewards/rejected": 0.0001103615722968243, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.490757304710793e-07, "logits/chosen": -2.9337716102600098, "logits/rejected": -2.9104549884796143, "logps/chosen": -61.770286560058594, "logps/rejected": -38.02623748779297, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": -0.00018526267376728356, "rewards/margins": -5.912015330977738e-05, "rewards/rejected": -0.00012614250590559095, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.788908765652952e-07, "logits/chosen": -3.006845474243164, "logits/rejected": -2.986870288848877, "logps/chosen": -63.095970153808594, "logps/rejected": -38.01917266845703, "loss": 0.6927, "rewards/accuracies": 0.625, "rewards/chosen": 9.181881614495069e-05, "rewards/margins": 0.0005735540762543678, "rewards/rejected": -0.00048173521645367146, "step": 60 }, { "epoch": 0.0, "learning_rate": 2.0870602265951104e-07, "logits/chosen": -2.9873225688934326, "logits/rejected": -2.944399118423462, "logps/chosen": -66.11896514892578, "logps/rejected": -39.65594482421875, "loss": 0.6923, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.0008778914925642312, "rewards/margins": 0.0018451397772878408, "rewards/rejected": -0.0009672483429312706, "step": 70 }, { "epoch": 0.0, "learning_rate": 2.385211687537269e-07, "logits/chosen": -2.9733798503875732, "logits/rejected": -2.934694766998291, "logps/chosen": -65.12422180175781, "logps/rejected": -37.80875778198242, "loss": 0.6918, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.001645894953981042, "rewards/margins": 0.0027065686881542206, "rewards/rejected": -0.0010606737341731787, "step": 80 }, { "epoch": 0.01, "learning_rate": 2.6833631484794277e-07, "logits/chosen": -2.9713034629821777, "logits/rejected": -2.970456600189209, "logps/chosen": -67.38812255859375, "logps/rejected": -37.401695251464844, "loss": 0.6912, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.0014037579530850053, "rewards/margins": 0.003809246001765132, "rewards/rejected": -0.002405487699434161, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.981514609421586e-07, "logits/chosen": -2.990466833114624, "logits/rejected": -3.0021064281463623, "logps/chosen": -64.66390228271484, "logps/rejected": -38.19758224487305, "loss": 0.6902, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.00282643036916852, "rewards/margins": 0.006060936953872442, "rewards/rejected": -0.0032345070503652096, "step": 100 }, { "epoch": 0.01, "learning_rate": 3.2796660703637447e-07, "logits/chosen": -2.964850664138794, "logits/rejected": -2.9280309677124023, "logps/chosen": -62.46397018432617, "logps/rejected": -37.72308349609375, "loss": 0.6893, "rewards/accuracies": 1.0, "rewards/chosen": 0.0031955738086253405, "rewards/margins": 0.008131476119160652, "rewards/rejected": -0.004935902543365955, "step": 110 }, { "epoch": 0.01, "learning_rate": 3.577817531305904e-07, "logits/chosen": -2.976289987564087, "logits/rejected": -2.958486557006836, "logps/chosen": -60.07708740234375, "logps/rejected": -39.44427490234375, "loss": 0.6881, "rewards/accuracies": 1.0, "rewards/chosen": 0.0050882394425570965, "rewards/margins": 0.010633114725351334, "rewards/rejected": -0.005544875282794237, "step": 120 }, { "epoch": 0.01, "learning_rate": 3.8759689922480623e-07, "logits/chosen": -2.996931552886963, "logits/rejected": -2.9738945960998535, "logps/chosen": -56.91551971435547, "logps/rejected": -37.899986267089844, "loss": 0.6861, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.005769859068095684, "rewards/margins": 0.013562721200287342, "rewards/rejected": -0.007792862597852945, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.174120453190221e-07, "logits/chosen": -2.968843936920166, "logits/rejected": -2.9377543926239014, "logps/chosen": -63.5054931640625, "logps/rejected": -37.78097152709961, "loss": 0.6839, "rewards/accuracies": 1.0, "rewards/chosen": 0.00786468107253313, "rewards/margins": 0.017556851729750633, "rewards/rejected": -0.009692170657217503, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.47227191413238e-07, "logits/chosen": -2.9909555912017822, "logits/rejected": -2.949923276901245, "logps/chosen": -66.87939453125, "logps/rejected": -39.131874084472656, "loss": 0.6812, "rewards/accuracies": 1.0, "rewards/chosen": 0.010932034812867641, "rewards/margins": 0.025025665760040283, "rewards/rejected": -0.014093630015850067, "step": 150 }, { "epoch": 0.01, "learning_rate": 4.770423375074538e-07, "logits/chosen": -2.9987645149230957, "logits/rejected": -2.9707846641540527, "logps/chosen": -61.40474319458008, "logps/rejected": -39.31603240966797, "loss": 0.6782, "rewards/accuracies": 1.0, "rewards/chosen": 0.011595867574214935, "rewards/margins": 0.030306387692689896, "rewards/rejected": -0.01871052011847496, "step": 160 }, { "epoch": 0.01, "learning_rate": 5.068574836016696e-07, "logits/chosen": -2.9568097591400146, "logits/rejected": -2.9365909099578857, "logps/chosen": -67.08219909667969, "logps/rejected": -39.7484245300293, "loss": 0.675, "rewards/accuracies": 1.0, "rewards/chosen": 0.013100977055728436, "rewards/margins": 0.03490080684423447, "rewards/rejected": -0.021799826994538307, "step": 170 }, { "epoch": 0.01, "learning_rate": 5.366726296958855e-07, "logits/chosen": -2.9665210247039795, "logits/rejected": -2.9450478553771973, "logps/chosen": -64.0983657836914, "logps/rejected": -40.714378356933594, "loss": 0.6708, "rewards/accuracies": 1.0, "rewards/chosen": 0.01625046506524086, "rewards/margins": 0.045740626752376556, "rewards/rejected": -0.029490163549780846, "step": 180 }, { "epoch": 0.01, "learning_rate": 5.664877757901014e-07, "logits/chosen": -3.0251972675323486, "logits/rejected": -2.9676074981689453, "logps/chosen": -68.52401733398438, "logps/rejected": -42.10633850097656, "loss": 0.6667, "rewards/accuracies": 1.0, "rewards/chosen": 0.02029956690967083, "rewards/margins": 0.056243497878313065, "rewards/rejected": -0.035943929105997086, "step": 190 }, { "epoch": 0.01, "learning_rate": 5.963029218843172e-07, "logits/chosen": -2.941378355026245, "logits/rejected": -2.916774272918701, "logps/chosen": -62.6572151184082, "logps/rejected": -41.44105911254883, "loss": 0.6627, "rewards/accuracies": 1.0, "rewards/chosen": 0.020069191232323647, "rewards/margins": 0.06305578351020813, "rewards/rejected": -0.04298659414052963, "step": 200 }, { "epoch": 0.01, "learning_rate": 6.26118067978533e-07, "logits/chosen": -2.9863531589508057, "logits/rejected": -2.966203212738037, "logps/chosen": -58.01508331298828, "logps/rejected": -42.450294494628906, "loss": 0.6562, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.020371051505208015, "rewards/margins": 0.07060989737510681, "rewards/rejected": -0.050238847732543945, "step": 210 }, { "epoch": 0.01, "learning_rate": 6.559332140727489e-07, "logits/chosen": -2.9891295433044434, "logits/rejected": -2.965472936630249, "logps/chosen": -68.88212585449219, "logps/rejected": -44.070655822753906, "loss": 0.6493, "rewards/accuracies": 1.0, "rewards/chosen": 0.026916515082120895, "rewards/margins": 0.09226812422275543, "rewards/rejected": -0.06535160541534424, "step": 220 }, { "epoch": 0.01, "learning_rate": 6.857483601669648e-07, "logits/chosen": -2.952012062072754, "logits/rejected": -2.924546480178833, "logps/chosen": -65.06964111328125, "logps/rejected": -44.74851608276367, "loss": 0.6429, "rewards/accuracies": 1.0, "rewards/chosen": 0.02850278653204441, "rewards/margins": 0.10565153509378433, "rewards/rejected": -0.07714874297380447, "step": 230 }, { "epoch": 0.01, "learning_rate": 7.155635062611808e-07, "logits/chosen": -2.9891390800476074, "logits/rejected": -2.9870388507843018, "logps/chosen": -61.817710876464844, "logps/rejected": -45.596343994140625, "loss": 0.6352, "rewards/accuracies": 1.0, "rewards/chosen": 0.026659509167075157, "rewards/margins": 0.11715468019247055, "rewards/rejected": -0.09049518406391144, "step": 240 }, { "epoch": 0.01, "learning_rate": 7.453786523553966e-07, "logits/chosen": -2.9766712188720703, "logits/rejected": -2.966275691986084, "logps/chosen": -62.50507736206055, "logps/rejected": -48.683876037597656, "loss": 0.6284, "rewards/accuracies": 1.0, "rewards/chosen": 0.02841700240969658, "rewards/margins": 0.1292591094970703, "rewards/rejected": -0.10084209591150284, "step": 250 }, { "epoch": 0.02, "learning_rate": 7.751937984496125e-07, "logits/chosen": -2.999868392944336, "logits/rejected": -2.9732205867767334, "logps/chosen": -64.61723327636719, "logps/rejected": -51.173179626464844, "loss": 0.6192, "rewards/accuracies": 1.0, "rewards/chosen": 0.030589986592531204, "rewards/margins": 0.1554131805896759, "rewards/rejected": -0.124823197722435, "step": 260 }, { "epoch": 0.02, "learning_rate": 8.050089445438284e-07, "logits/chosen": -2.975881576538086, "logits/rejected": -2.9688634872436523, "logps/chosen": -62.05030059814453, "logps/rejected": -50.632564544677734, "loss": 0.6071, "rewards/accuracies": 1.0, "rewards/chosen": 0.03683885559439659, "rewards/margins": 0.17412006855010986, "rewards/rejected": -0.13728120923042297, "step": 270 }, { "epoch": 0.02, "learning_rate": 8.348240906380442e-07, "logits/chosen": -2.972630500793457, "logits/rejected": -2.9668948650360107, "logps/chosen": -66.72364807128906, "logps/rejected": -52.2687873840332, "loss": 0.6017, "rewards/accuracies": 1.0, "rewards/chosen": 0.037119414657354355, "rewards/margins": 0.19582489132881165, "rewards/rejected": -0.15870548784732819, "step": 280 }, { "epoch": 0.02, "learning_rate": 8.646392367322601e-07, "logits/chosen": -2.98591685295105, "logits/rejected": -2.944474220275879, "logps/chosen": -52.836631774902344, "logps/rejected": -55.96368408203125, "loss": 0.5888, "rewards/accuracies": 1.0, "rewards/chosen": 0.030178675428032875, "rewards/margins": 0.21391499042510986, "rewards/rejected": -0.18373630940914154, "step": 290 }, { "epoch": 0.02, "learning_rate": 8.94454382826476e-07, "logits/chosen": -2.982726812362671, "logits/rejected": -2.9632513523101807, "logps/chosen": -63.959014892578125, "logps/rejected": -58.19898223876953, "loss": 0.5802, "rewards/accuracies": 1.0, "rewards/chosen": 0.04776749014854431, "rewards/margins": 0.2504541873931885, "rewards/rejected": -0.20268671214580536, "step": 300 }, { "epoch": 0.02, "learning_rate": 9.242695289206919e-07, "logits/chosen": -2.9884026050567627, "logits/rejected": -2.974195957183838, "logps/chosen": -59.83971405029297, "logps/rejected": -61.806671142578125, "loss": 0.5576, "rewards/accuracies": 1.0, "rewards/chosen": 0.04895300790667534, "rewards/margins": 0.2941287159919739, "rewards/rejected": -0.24517571926116943, "step": 310 }, { "epoch": 0.02, "learning_rate": 9.540846750149077e-07, "logits/chosen": -2.979891538619995, "logits/rejected": -2.948497772216797, "logps/chosen": -54.18998336791992, "logps/rejected": -63.59241485595703, "loss": 0.5446, "rewards/accuracies": 1.0, "rewards/chosen": 0.04513512924313545, "rewards/margins": 0.31227290630340576, "rewards/rejected": -0.2671377956867218, "step": 320 }, { "epoch": 0.02, "learning_rate": 9.838998211091236e-07, "logits/chosen": -3.01401424407959, "logits/rejected": -2.986703634262085, "logps/chosen": -58.62263870239258, "logps/rejected": -70.4931411743164, "loss": 0.5228, "rewards/accuracies": 1.0, "rewards/chosen": 0.056499116122722626, "rewards/margins": 0.3785272240638733, "rewards/rejected": -0.3220280706882477, "step": 330 }, { "epoch": 0.02, "learning_rate": 1.0137149672033393e-06, "logits/chosen": -2.9774539470672607, "logits/rejected": -2.9727795124053955, "logps/chosen": -59.099464416503906, "logps/rejected": -73.94728088378906, "loss": 0.5025, "rewards/accuracies": 1.0, "rewards/chosen": 0.06891719996929169, "rewards/margins": 0.432667076587677, "rewards/rejected": -0.3637498617172241, "step": 340 }, { "epoch": 0.02, "learning_rate": 1.0435301132975552e-06, "logits/chosen": -2.958286762237549, "logits/rejected": -2.9447312355041504, "logps/chosen": -59.09956741333008, "logps/rejected": -79.24665832519531, "loss": 0.4868, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.05371891334652901, "rewards/margins": 0.4693606495857239, "rewards/rejected": -0.41564178466796875, "step": 350 }, { "epoch": 0.02, "learning_rate": 1.073345259391771e-06, "logits/chosen": -2.9668009281158447, "logits/rejected": -2.950000762939453, "logps/chosen": -57.104408264160156, "logps/rejected": -84.49302673339844, "loss": 0.4588, "rewards/accuracies": 1.0, "rewards/chosen": 0.07701251655817032, "rewards/margins": 0.5421547293663025, "rewards/rejected": -0.46514225006103516, "step": 360 }, { "epoch": 0.02, "learning_rate": 1.103160405485987e-06, "logits/chosen": -2.987603187561035, "logits/rejected": -2.9524295330047607, "logps/chosen": -55.36252975463867, "logps/rejected": -90.12848663330078, "loss": 0.443, "rewards/accuracies": 1.0, "rewards/chosen": 0.07807400822639465, "rewards/margins": 0.5937131643295288, "rewards/rejected": -0.5156391859054565, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.1329755515802029e-06, "logits/chosen": -2.9757156372070312, "logits/rejected": -2.9683690071105957, "logps/chosen": -62.766815185546875, "logps/rejected": -94.85152435302734, "loss": 0.4171, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.09451368451118469, "rewards/margins": 0.6583266258239746, "rewards/rejected": -0.5638128519058228, "step": 380 }, { "epoch": 0.02, "learning_rate": 1.1627906976744188e-06, "logits/chosen": -2.962899684906006, "logits/rejected": -2.9512689113616943, "logps/chosen": -48.61424255371094, "logps/rejected": -101.32770538330078, "loss": 0.3887, "rewards/accuracies": 1.0, "rewards/chosen": 0.1129467710852623, "rewards/margins": 0.7443889379501343, "rewards/rejected": -0.6314421892166138, "step": 390 }, { "epoch": 0.02, "learning_rate": 1.1926058437686345e-06, "logits/chosen": -2.956197738647461, "logits/rejected": -2.9548983573913574, "logps/chosen": -51.880767822265625, "logps/rejected": -105.0481948852539, "loss": 0.3735, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.12102735042572021, "rewards/margins": 0.7910811305046082, "rewards/rejected": -0.6700536608695984, "step": 400 }, { "epoch": 0.02, "learning_rate": 1.2224209898628504e-06, "logits/chosen": -2.951767921447754, "logits/rejected": -2.9641811847686768, "logps/chosen": -48.880271911621094, "logps/rejected": -112.1520767211914, "loss": 0.3459, "rewards/accuracies": 1.0, "rewards/chosen": 0.1516953706741333, "rewards/margins": 0.891093373298645, "rewards/rejected": -0.7393979430198669, "step": 410 }, { "epoch": 0.03, "learning_rate": 1.252236135957066e-06, "logits/chosen": -2.9767134189605713, "logits/rejected": -2.9484751224517822, "logps/chosen": -46.60969924926758, "logps/rejected": -115.45204162597656, "loss": 0.3282, "rewards/accuracies": 1.0, "rewards/chosen": 0.18312522768974304, "rewards/margins": 0.9682098627090454, "rewards/rejected": -0.7850846648216248, "step": 420 }, { "epoch": 0.03, "learning_rate": 1.282051282051282e-06, "logits/chosen": -2.9981486797332764, "logits/rejected": -2.9711692333221436, "logps/chosen": -41.53202438354492, "logps/rejected": -118.77608489990234, "loss": 0.3161, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.16396602988243103, "rewards/margins": 0.9690536260604858, "rewards/rejected": -0.805087685585022, "step": 430 }, { "epoch": 0.03, "learning_rate": 1.3118664281454979e-06, "logits/chosen": -2.9755756855010986, "logits/rejected": -2.9677977561950684, "logps/chosen": -43.42798614501953, "logps/rejected": -124.58668518066406, "loss": 0.2946, "rewards/accuracies": 1.0, "rewards/chosen": 0.2056199610233307, "rewards/margins": 1.0799310207366943, "rewards/rejected": -0.8743109703063965, "step": 440 }, { "epoch": 0.03, "learning_rate": 1.3416815742397138e-06, "logits/chosen": -2.9456050395965576, "logits/rejected": -2.938547134399414, "logps/chosen": -39.62173843383789, "logps/rejected": -124.97493743896484, "loss": 0.2928, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.18644092977046967, "rewards/margins": 1.0570735931396484, "rewards/rejected": -0.8706328272819519, "step": 450 }, { "epoch": 0.03, "learning_rate": 1.3714967203339297e-06, "logits/chosen": -3.002803087234497, "logits/rejected": -2.9854350090026855, "logps/chosen": -38.61174392700195, "logps/rejected": -134.84027099609375, "loss": 0.2646, "rewards/accuracies": 1.0, "rewards/chosen": 0.21981830894947052, "rewards/margins": 1.1931068897247314, "rewards/rejected": -0.9732885360717773, "step": 460 }, { "epoch": 0.03, "learning_rate": 1.4013118664281456e-06, "logits/chosen": -2.9459478855133057, "logits/rejected": -2.9124436378479004, "logps/chosen": -38.08759307861328, "logps/rejected": -133.2669219970703, "loss": 0.2634, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.20384104549884796, "rewards/margins": 1.1654164791107178, "rewards/rejected": -0.9615755081176758, "step": 470 }, { "epoch": 0.03, "learning_rate": 1.4311270125223615e-06, "logits/chosen": -2.9741978645324707, "logits/rejected": -2.9587838649749756, "logps/chosen": -38.21046829223633, "logps/rejected": -142.6593780517578, "loss": 0.2448, "rewards/accuracies": 1.0, "rewards/chosen": 0.21974727511405945, "rewards/margins": 1.2785038948059082, "rewards/rejected": -1.0587565898895264, "step": 480 }, { "epoch": 0.03, "learning_rate": 1.4609421586165772e-06, "logits/chosen": -2.9854302406311035, "logits/rejected": -2.929260015487671, "logps/chosen": -38.77074432373047, "logps/rejected": -148.45794677734375, "loss": 0.2296, "rewards/accuracies": 1.0, "rewards/chosen": 0.2418777048587799, "rewards/margins": 1.3558166027069092, "rewards/rejected": -1.1139390468597412, "step": 490 }, { "epoch": 0.03, "learning_rate": 1.490757304710793e-06, "logits/chosen": -2.9749088287353516, "logits/rejected": -2.9633567333221436, "logps/chosen": -47.048980712890625, "logps/rejected": -151.99789428710938, "loss": 0.2189, "rewards/accuracies": 1.0, "rewards/chosen": 0.25067856907844543, "rewards/margins": 1.4060004949569702, "rewards/rejected": -1.1553219556808472, "step": 500 }, { "epoch": 0.03, "learning_rate": 1.520572450805009e-06, "logits/chosen": -2.975252628326416, "logits/rejected": -2.9446215629577637, "logps/chosen": -38.66350555419922, "logps/rejected": -159.1326446533203, "loss": 0.2105, "rewards/accuracies": 1.0, "rewards/chosen": 0.25232720375061035, "rewards/margins": 1.4574434757232666, "rewards/rejected": -1.2051162719726562, "step": 510 }, { "epoch": 0.03, "learning_rate": 1.550387596899225e-06, "logits/chosen": -2.9406862258911133, "logits/rejected": -2.9339542388916016, "logps/chosen": -40.225276947021484, "logps/rejected": -158.60324096679688, "loss": 0.2164, "rewards/accuracies": 1.0, "rewards/chosen": 0.24609224498271942, "rewards/margins": 1.4612188339233398, "rewards/rejected": -1.215126633644104, "step": 520 }, { "epoch": 0.03, "learning_rate": 1.5802027429934408e-06, "logits/chosen": -2.9712047576904297, "logits/rejected": -2.964679002761841, "logps/chosen": -39.63045120239258, "logps/rejected": -161.8488006591797, "loss": 0.1998, "rewards/accuracies": 1.0, "rewards/chosen": 0.24778684973716736, "rewards/margins": 1.4898221492767334, "rewards/rejected": -1.2420352697372437, "step": 530 }, { "epoch": 0.03, "learning_rate": 1.6100178890876567e-06, "logits/chosen": -2.9773550033569336, "logits/rejected": -2.9555976390838623, "logps/chosen": -42.95372772216797, "logps/rejected": -167.4945831298828, "loss": 0.1955, "rewards/accuracies": 1.0, "rewards/chosen": 0.245849609375, "rewards/margins": 1.5561649799346924, "rewards/rejected": -1.310315489768982, "step": 540 }, { "epoch": 0.03, "learning_rate": 1.6398330351818726e-06, "logits/chosen": -2.961595058441162, "logits/rejected": -2.928737163543701, "logps/chosen": -40.49113845825195, "logps/rejected": -170.0043182373047, "loss": 0.1931, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.22345349192619324, "rewards/margins": 1.553979754447937, "rewards/rejected": -1.3305261135101318, "step": 550 }, { "epoch": 0.03, "learning_rate": 1.6696481812760883e-06, "logits/chosen": -2.965538740158081, "logits/rejected": -2.9750213623046875, "logps/chosen": -35.041778564453125, "logps/rejected": -176.42152404785156, "loss": 0.1805, "rewards/accuracies": 1.0, "rewards/chosen": 0.2564579248428345, "rewards/margins": 1.6430635452270508, "rewards/rejected": -1.3866057395935059, "step": 560 }, { "epoch": 0.03, "learning_rate": 1.6994633273703042e-06, "logits/chosen": -2.9975168704986572, "logits/rejected": -2.9697446823120117, "logps/chosen": -42.11443328857422, "logps/rejected": -179.57394409179688, "loss": 0.1696, "rewards/accuracies": 1.0, "rewards/chosen": 0.2718260884284973, "rewards/margins": 1.694568395614624, "rewards/rejected": -1.422742486000061, "step": 570 }, { "epoch": 0.03, "learning_rate": 1.7292784734645201e-06, "logits/chosen": -2.9578099250793457, "logits/rejected": -2.9211487770080566, "logps/chosen": -33.25984573364258, "logps/rejected": -184.76364135742188, "loss": 0.1631, "rewards/accuracies": 1.0, "rewards/chosen": 0.2600173354148865, "rewards/margins": 1.735528588294983, "rewards/rejected": -1.4755113124847412, "step": 580 }, { "epoch": 0.04, "learning_rate": 1.759093619558736e-06, "logits/chosen": -2.9391167163848877, "logits/rejected": -2.905545949935913, "logps/chosen": -36.13165283203125, "logps/rejected": -191.04977416992188, "loss": 0.1658, "rewards/accuracies": 1.0, "rewards/chosen": 0.2645508646965027, "rewards/margins": 1.7953293323516846, "rewards/rejected": -1.5307782888412476, "step": 590 }, { "epoch": 0.04, "learning_rate": 1.788908765652952e-06, "logits/chosen": -2.933382034301758, "logits/rejected": -2.9165964126586914, "logps/chosen": -39.63783264160156, "logps/rejected": -189.66615295410156, "loss": 0.1608, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23412856459617615, "rewards/margins": 1.7271678447723389, "rewards/rejected": -1.4930393695831299, "step": 600 }, { "epoch": 0.04, "learning_rate": 1.8187239117471678e-06, "logits/chosen": -2.9637372493743896, "logits/rejected": -2.924687147140503, "logps/chosen": -35.364013671875, "logps/rejected": -201.4823760986328, "loss": 0.1385, "rewards/accuracies": 1.0, "rewards/chosen": 0.2652861475944519, "rewards/margins": 1.9042457342147827, "rewards/rejected": -1.638959527015686, "step": 610 }, { "epoch": 0.04, "learning_rate": 1.8485390578413837e-06, "logits/chosen": -2.9786150455474854, "logits/rejected": -2.9007158279418945, "logps/chosen": -30.669368743896484, "logps/rejected": -205.71688842773438, "loss": 0.1331, "rewards/accuracies": 1.0, "rewards/chosen": 0.27900952100753784, "rewards/margins": 1.9454971551895142, "rewards/rejected": -1.6664879322052002, "step": 620 }, { "epoch": 0.04, "learning_rate": 1.8783542039355994e-06, "logits/chosen": -2.9280943870544434, "logits/rejected": -2.876917600631714, "logps/chosen": -37.091346740722656, "logps/rejected": -216.9874725341797, "loss": 0.1315, "rewards/accuracies": 1.0, "rewards/chosen": 0.28317663073539734, "rewards/margins": 2.0717968940734863, "rewards/rejected": -1.7886205911636353, "step": 630 }, { "epoch": 0.04, "learning_rate": 1.9081693500298153e-06, "logits/chosen": -2.9675099849700928, "logits/rejected": -2.931936502456665, "logps/chosen": -31.261856079101562, "logps/rejected": -221.5929412841797, "loss": 0.1169, "rewards/accuracies": 1.0, "rewards/chosen": 0.26551198959350586, "rewards/margins": 2.100052833557129, "rewards/rejected": -1.8345407247543335, "step": 640 }, { "epoch": 0.04, "learning_rate": 1.9379844961240315e-06, "logits/chosen": -2.953190565109253, "logits/rejected": -2.9310998916625977, "logps/chosen": -36.9582405090332, "logps/rejected": -224.72500610351562, "loss": 0.1107, "rewards/accuracies": 1.0, "rewards/chosen": 0.2583983540534973, "rewards/margins": 2.1199750900268555, "rewards/rejected": -1.8615766763687134, "step": 650 }, { "epoch": 0.04, "learning_rate": 1.967799642218247e-06, "logits/chosen": -2.9613311290740967, "logits/rejected": -2.910433292388916, "logps/chosen": -34.4904899597168, "logps/rejected": -235.203369140625, "loss": 0.1095, "rewards/accuracies": 1.0, "rewards/chosen": 0.26224562525749207, "rewards/margins": 2.2326736450195312, "rewards/rejected": -1.9704278707504272, "step": 660 }, { "epoch": 0.04, "learning_rate": 1.997614788312463e-06, "logits/chosen": -2.945631265640259, "logits/rejected": -2.909071445465088, "logps/chosen": -36.619651794433594, "logps/rejected": -242.97445678710938, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": 0.26473408937454224, "rewards/margins": 2.304016590118408, "rewards/rejected": -2.0392825603485107, "step": 670 }, { "epoch": 0.04, "learning_rate": 2.0274299344066785e-06, "logits/chosen": -2.9870681762695312, "logits/rejected": -2.9122252464294434, "logps/chosen": -40.851409912109375, "logps/rejected": -249.24685668945312, "loss": 0.1106, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.22577068209648132, "rewards/margins": 2.3387513160705566, "rewards/rejected": -2.112980842590332, "step": 680 }, { "epoch": 0.04, "learning_rate": 2.0572450805008946e-06, "logits/chosen": -2.9736366271972656, "logits/rejected": -2.911349058151245, "logps/chosen": -36.63948440551758, "logps/rejected": -268.5782165527344, "loss": 0.0731, "rewards/accuracies": 1.0, "rewards/chosen": 0.285511314868927, "rewards/margins": 2.592862606048584, "rewards/rejected": -2.307351589202881, "step": 690 }, { "epoch": 0.04, "learning_rate": 2.0870602265951103e-06, "logits/chosen": -2.9388504028320312, "logits/rejected": -2.8983030319213867, "logps/chosen": -40.48635482788086, "logps/rejected": -277.5279846191406, "loss": 0.0664, "rewards/accuracies": 1.0, "rewards/chosen": 0.2753918468952179, "rewards/margins": 2.6735894680023193, "rewards/rejected": -2.398197650909424, "step": 700 }, { "epoch": 0.04, "learning_rate": 2.1168753726893265e-06, "logits/chosen": -2.9426770210266113, "logits/rejected": -2.8750863075256348, "logps/chosen": -38.95457458496094, "logps/rejected": -312.7801513671875, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": 0.2553301751613617, "rewards/margins": 3.0066561698913574, "rewards/rejected": -2.751326084136963, "step": 710 }, { "epoch": 0.04, "learning_rate": 2.146690518783542e-06, "logits/chosen": -2.928013563156128, "logits/rejected": -2.894011974334717, "logps/chosen": -40.7637825012207, "logps/rejected": -375.2185974121094, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": 0.20871691405773163, "rewards/margins": 3.572239398956299, "rewards/rejected": -3.3635220527648926, "step": 720 }, { "epoch": 0.04, "learning_rate": 2.176505664877758e-06, "logits/chosen": -2.927023410797119, "logits/rejected": -2.844879388809204, "logps/chosen": -50.63892364501953, "logps/rejected": -510.1908264160156, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 0.14439234137535095, "rewards/margins": 4.870084762573242, "rewards/rejected": -4.725692272186279, "step": 730 }, { "epoch": 0.04, "learning_rate": 2.206320810971974e-06, "logits/chosen": -2.935652256011963, "logits/rejected": -2.8129940032958984, "logps/chosen": -56.20857620239258, "logps/rejected": -566.111328125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 0.07859300076961517, "rewards/margins": 5.366889476776123, "rewards/rejected": -5.288297176361084, "step": 740 }, { "epoch": 0.04, "learning_rate": 2.2361359570661897e-06, "logits/chosen": -2.9132425785064697, "logits/rejected": -2.8033363819122314, "logps/chosen": -55.53290557861328, "logps/rejected": -622.1617431640625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 0.05675225704908371, "rewards/margins": 5.898639678955078, "rewards/rejected": -5.8418869972229, "step": 750 }, { "epoch": 0.05, "learning_rate": 2.2659511031604058e-06, "logits/chosen": -2.927593231201172, "logits/rejected": -2.8140652179718018, "logps/chosen": -73.15713500976562, "logps/rejected": -649.6702880859375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.10560838878154755, "rewards/margins": 6.02633810043335, "rewards/rejected": -6.131946563720703, "step": 760 }, { "epoch": 0.05, "learning_rate": 2.2957662492546215e-06, "logits/chosen": -2.9094057083129883, "logits/rejected": -2.782464027404785, "logps/chosen": -62.5818977355957, "logps/rejected": -668.749267578125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.03897203505039215, "rewards/margins": 6.352088928222656, "rewards/rejected": -6.313117027282715, "step": 770 }, { "epoch": 0.05, "learning_rate": 2.3255813953488376e-06, "logits/chosen": -2.9289677143096924, "logits/rejected": -2.8123393058776855, "logps/chosen": -49.84427261352539, "logps/rejected": -685.9219970703125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.1265488564968109, "rewards/margins": 6.60431432723999, "rewards/rejected": -6.4777655601501465, "step": 780 }, { "epoch": 0.05, "learning_rate": 2.3553965414430533e-06, "logits/chosen": -2.933450222015381, "logits/rejected": -2.776223659515381, "logps/chosen": -61.3447265625, "logps/rejected": -716.4561767578125, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 0.0013283550506457686, "rewards/margins": 6.800276279449463, "rewards/rejected": -6.798947811126709, "step": 790 }, { "epoch": 0.05, "learning_rate": 2.385211687537269e-06, "logits/chosen": -2.94747257232666, "logits/rejected": -2.7992312908172607, "logps/chosen": -61.9112663269043, "logps/rejected": -739.0569458007812, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.056593157351017, "rewards/margins": 7.056301116943359, "rewards/rejected": -6.999708652496338, "step": 800 }, { "epoch": 0.05, "learning_rate": 2.415026833631485e-06, "logits/chosen": -2.9657230377197266, "logits/rejected": -2.8273873329162598, "logps/chosen": -58.36040496826172, "logps/rejected": -759.1055908203125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.0673176646232605, "rewards/margins": 7.262142181396484, "rewards/rejected": -7.19482421875, "step": 810 }, { "epoch": 0.05, "learning_rate": 2.4448419797257008e-06, "logits/chosen": -2.975888967514038, "logits/rejected": -2.8092477321624756, "logps/chosen": -66.42881774902344, "logps/rejected": -743.7403564453125, "loss": 0.0797, "rewards/accuracies": 1.0, "rewards/chosen": 0.007080644369125366, "rewards/margins": 7.057661533355713, "rewards/rejected": -7.0505805015563965, "step": 820 }, { "epoch": 0.05, "learning_rate": 2.474657125819917e-06, "logits/chosen": -2.944556713104248, "logits/rejected": -2.807175636291504, "logps/chosen": -53.7796745300293, "logps/rejected": -738.9183349609375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.10738842189311981, "rewards/margins": 7.136092185974121, "rewards/rejected": -7.0287041664123535, "step": 830 }, { "epoch": 0.05, "learning_rate": 2.504472271914132e-06, "logits/chosen": -2.9551358222961426, "logits/rejected": -2.7876429557800293, "logps/chosen": -55.13721466064453, "logps/rejected": -743.2687377929688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.08093860000371933, "rewards/margins": 7.139859676361084, "rewards/rejected": -7.058920860290527, "step": 840 }, { "epoch": 0.05, "learning_rate": 2.5342874180083483e-06, "logits/chosen": -2.9683070182800293, "logits/rejected": -2.846489191055298, "logps/chosen": -60.13019943237305, "logps/rejected": -711.4315795898438, "loss": 0.0334, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.06761562824249268, "rewards/margins": 6.8154802322387695, "rewards/rejected": -6.747864723205566, "step": 850 }, { "epoch": 0.05, "learning_rate": 2.564102564102564e-06, "logits/chosen": -2.9205780029296875, "logits/rejected": -2.824169874191284, "logps/chosen": -66.83427429199219, "logps/rejected": -720.9044799804688, "loss": 0.0808, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.032160453498363495, "rewards/margins": 6.80129337310791, "rewards/rejected": -6.833454132080078, "step": 860 }, { "epoch": 0.05, "learning_rate": 2.59391771019678e-06, "logits/chosen": -2.9584438800811768, "logits/rejected": -2.8315420150756836, "logps/chosen": -49.3359375, "logps/rejected": -733.1146240234375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.09850139170885086, "rewards/margins": 7.058738708496094, "rewards/rejected": -6.9602370262146, "step": 870 }, { "epoch": 0.05, "learning_rate": 2.6237328562909958e-06, "logits/chosen": -2.9851458072662354, "logits/rejected": -2.8313965797424316, "logps/chosen": -70.78266906738281, "logps/rejected": -761.7115478515625, "loss": 0.0246, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.01931942254304886, "rewards/margins": 7.240239143371582, "rewards/rejected": -7.220919132232666, "step": 880 }, { "epoch": 0.05, "learning_rate": 2.653548002385212e-06, "logits/chosen": -2.9779679775238037, "logits/rejected": -2.828104019165039, "logps/chosen": -58.75334930419922, "logps/rejected": -754.3145751953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.06594493985176086, "rewards/margins": 7.227665901184082, "rewards/rejected": -7.16171932220459, "step": 890 }, { "epoch": 0.05, "learning_rate": 2.6833631484794276e-06, "logits/chosen": -2.9391167163848877, "logits/rejected": -2.8240389823913574, "logps/chosen": -50.938236236572266, "logps/rejected": -756.2517700195312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.11424784362316132, "rewards/margins": 7.3018903732299805, "rewards/rejected": -7.187643527984619, "step": 900 }, { "epoch": 0.05, "learning_rate": 2.7131782945736433e-06, "logits/chosen": -2.9505858421325684, "logits/rejected": -2.835918664932251, "logps/chosen": -85.41227722167969, "logps/rejected": -746.486572265625, "loss": 0.1465, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19494356215000153, "rewards/margins": 6.8970770835876465, "rewards/rejected": -7.0920209884643555, "step": 910 }, { "epoch": 0.05, "learning_rate": 2.7429934406678594e-06, "logits/chosen": -2.9539926052093506, "logits/rejected": -2.864917039871216, "logps/chosen": -51.41550827026367, "logps/rejected": -734.2505493164062, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.16175466775894165, "rewards/margins": 7.1333489418029785, "rewards/rejected": -6.971594333648682, "step": 920 }, { "epoch": 0.06, "learning_rate": 2.772808586762075e-06, "logits/chosen": -2.9284985065460205, "logits/rejected": -2.8185455799102783, "logps/chosen": -61.472023010253906, "logps/rejected": -723.4031982421875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.07130110263824463, "rewards/margins": 6.917788505554199, "rewards/rejected": -6.846487522125244, "step": 930 }, { "epoch": 0.06, "learning_rate": 2.802623732856291e-06, "logits/chosen": -2.934460163116455, "logits/rejected": -2.83193039894104, "logps/chosen": -53.94060516357422, "logps/rejected": -763.4229125976562, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.11008824408054352, "rewards/margins": 7.3813300132751465, "rewards/rejected": -7.271241664886475, "step": 940 }, { "epoch": 0.06, "learning_rate": 2.832438878950507e-06, "logits/chosen": -2.9612298011779785, "logits/rejected": -2.8407912254333496, "logps/chosen": -53.47686767578125, "logps/rejected": -687.0736083984375, "loss": 0.016, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.03034086897969246, "rewards/margins": 6.525498390197754, "rewards/rejected": -6.495157718658447, "step": 950 }, { "epoch": 0.06, "learning_rate": 2.862254025044723e-06, "logits/chosen": -2.9520936012268066, "logits/rejected": -2.8232078552246094, "logps/chosen": -46.88372039794922, "logps/rejected": -751.0515747070312, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.17546603083610535, "rewards/margins": 7.306546211242676, "rewards/rejected": -7.13107967376709, "step": 960 }, { "epoch": 0.06, "learning_rate": 2.8920691711389387e-06, "logits/chosen": -2.9509940147399902, "logits/rejected": -2.795253276824951, "logps/chosen": -68.1034927368164, "logps/rejected": -737.3883056640625, "loss": 0.0838, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.0684007853269577, "rewards/margins": 6.925948143005371, "rewards/rejected": -6.994348049163818, "step": 970 }, { "epoch": 0.06, "learning_rate": 2.9218843172331544e-06, "logits/chosen": -2.931248188018799, "logits/rejected": -2.8768303394317627, "logps/chosen": -66.80424499511719, "logps/rejected": -780.681640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.003529875772073865, "rewards/margins": 7.414741516113281, "rewards/rejected": -7.418271541595459, "step": 980 }, { "epoch": 0.06, "learning_rate": 2.9516994633273705e-06, "logits/chosen": -2.9377660751342773, "logits/rejected": -2.8372390270233154, "logps/chosen": -61.295677185058594, "logps/rejected": -804.2783203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.024168942123651505, "rewards/margins": 7.687748908996582, "rewards/rejected": -7.663578987121582, "step": 990 }, { "epoch": 0.06, "learning_rate": 2.981514609421586e-06, "logits/chosen": -2.9764480590820312, "logits/rejected": -2.8632521629333496, "logps/chosen": -62.231231689453125, "logps/rejected": -785.5969848632812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.024116892367601395, "rewards/margins": 7.487069129943848, "rewards/rejected": -7.462952613830566, "step": 1000 }, { "epoch": 0.06, "learning_rate": 3.0113297555158023e-06, "logits/chosen": -2.9528627395629883, "logits/rejected": -2.850304126739502, "logps/chosen": -66.63923645019531, "logps/rejected": -787.0718994140625, "loss": 0.0824, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.038939811289310455, "rewards/margins": 7.46230936050415, "rewards/rejected": -7.501250267028809, "step": 1010 }, { "epoch": 0.06, "learning_rate": 3.041144901610018e-06, "logits/chosen": -2.9471917152404785, "logits/rejected": -2.8134803771972656, "logps/chosen": -37.87491226196289, "logps/rejected": -732.5760498046875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.25654834508895874, "rewards/margins": 7.206686496734619, "rewards/rejected": -6.950139045715332, "step": 1020 }, { "epoch": 0.06, "learning_rate": 3.070960047704234e-06, "logits/chosen": -2.9677505493164062, "logits/rejected": -2.8343663215637207, "logps/chosen": -39.002647399902344, "logps/rejected": -771.248779296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.21354231238365173, "rewards/margins": 7.532044410705566, "rewards/rejected": -7.3185014724731445, "step": 1030 }, { "epoch": 0.06, "learning_rate": 3.10077519379845e-06, "logits/chosen": -2.9292187690734863, "logits/rejected": -2.799205780029297, "logps/chosen": -50.51420211791992, "logps/rejected": -799.5140991210938, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.1468392312526703, "rewards/margins": 7.769438743591309, "rewards/rejected": -7.622599124908447, "step": 1040 }, { "epoch": 0.06, "learning_rate": 3.1305903398926655e-06, "logits/chosen": -2.9708163738250732, "logits/rejected": -2.827094316482544, "logps/chosen": -52.063026428222656, "logps/rejected": -855.7509765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.13825547695159912, "rewards/margins": 8.320818901062012, "rewards/rejected": -8.182562828063965, "step": 1050 }, { "epoch": 0.06, "learning_rate": 3.1604054859868816e-06, "logits/chosen": -2.9458494186401367, "logits/rejected": -2.8050601482391357, "logps/chosen": -53.6517333984375, "logps/rejected": -815.8658447265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.09550432860851288, "rewards/margins": 7.879818916320801, "rewards/rejected": -7.784315586090088, "step": 1060 }, { "epoch": 0.06, "learning_rate": 3.1902206320810973e-06, "logits/chosen": -2.9375643730163574, "logits/rejected": -2.791468858718872, "logps/chosen": -52.582542419433594, "logps/rejected": -823.3201293945312, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.10163005441427231, "rewards/margins": 7.966217041015625, "rewards/rejected": -7.864585876464844, "step": 1070 }, { "epoch": 0.06, "learning_rate": 3.2200357781753134e-06, "logits/chosen": -2.9350087642669678, "logits/rejected": -2.794058322906494, "logps/chosen": -66.25208282470703, "logps/rejected": -843.6322021484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.023152854293584824, "rewards/margins": 8.082487106323242, "rewards/rejected": -8.059333801269531, "step": 1080 }, { "epoch": 0.06, "learning_rate": 3.249850924269529e-06, "logits/chosen": -2.921220302581787, "logits/rejected": -2.8159546852111816, "logps/chosen": -87.72096252441406, "logps/rejected": -767.0853271484375, "loss": 0.2151, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20939891040325165, "rewards/margins": 7.0865478515625, "rewards/rejected": -7.295947074890137, "step": 1090 }, { "epoch": 0.07, "learning_rate": 3.2796660703637452e-06, "logits/chosen": -2.9682905673980713, "logits/rejected": -2.856982469558716, "logps/chosen": -44.80827331542969, "logps/rejected": -780.9285888671875, "loss": 0.0702, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.16055050492286682, "rewards/margins": 7.601635932922363, "rewards/rejected": -7.441084861755371, "step": 1100 }, { "epoch": 0.07, "learning_rate": 3.309481216457961e-06, "logits/chosen": -2.9696075916290283, "logits/rejected": -2.846613645553589, "logps/chosen": -39.627410888671875, "logps/rejected": -706.2952880859375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.249398872256279, "rewards/margins": 6.945621490478516, "rewards/rejected": -6.69622278213501, "step": 1110 }, { "epoch": 0.07, "learning_rate": 3.3392963625521766e-06, "logits/chosen": -2.8932414054870605, "logits/rejected": -2.810833692550659, "logps/chosen": -62.70648193359375, "logps/rejected": -752.75537109375, "loss": 0.0685, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.00022234841890167445, "rewards/margins": 7.146376609802246, "rewards/rejected": -7.146154880523682, "step": 1120 }, { "epoch": 0.07, "learning_rate": 3.3691115086463927e-06, "logits/chosen": -2.9645304679870605, "logits/rejected": -2.8411436080932617, "logps/chosen": -67.86104583740234, "logps/rejected": -772.1302490234375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.06475340574979782, "rewards/margins": 7.275450706481934, "rewards/rejected": -7.340205192565918, "step": 1130 }, { "epoch": 0.07, "learning_rate": 3.3989266547406084e-06, "logits/chosen": -2.9525272846221924, "logits/rejected": -2.8478264808654785, "logps/chosen": -56.380645751953125, "logps/rejected": -797.6961059570312, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.032509945333004, "rewards/margins": 7.616178035736084, "rewards/rejected": -7.5836687088012695, "step": 1140 }, { "epoch": 0.07, "learning_rate": 3.4287418008348246e-06, "logits/chosen": -2.9776408672332764, "logits/rejected": -2.83225679397583, "logps/chosen": -63.97186279296875, "logps/rejected": -811.70458984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.02378031238913536, "rewards/margins": 7.755499362945557, "rewards/rejected": -7.731719017028809, "step": 1150 }, { "epoch": 0.07, "learning_rate": 3.4585569469290402e-06, "logits/chosen": -2.9427390098571777, "logits/rejected": -2.8289268016815186, "logps/chosen": -53.437538146972656, "logps/rejected": -779.2871704101562, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.10934819281101227, "rewards/margins": 7.5239458084106445, "rewards/rejected": -7.414597511291504, "step": 1160 }, { "epoch": 0.07, "learning_rate": 3.4883720930232564e-06, "logits/chosen": -2.917423963546753, "logits/rejected": -2.8024415969848633, "logps/chosen": -64.57073211669922, "logps/rejected": -812.3079223632812, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.023101195693016052, "rewards/margins": 7.761740684509277, "rewards/rejected": -7.738638877868652, "step": 1170 }, { "epoch": 0.07, "learning_rate": 3.518187239117472e-06, "logits/chosen": -2.911154270172119, "logits/rejected": -2.8212318420410156, "logps/chosen": -79.54310607910156, "logps/rejected": -739.0757446289062, "loss": 0.0659, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.11583219468593597, "rewards/margins": 6.904937744140625, "rewards/rejected": -7.0207695960998535, "step": 1180 }, { "epoch": 0.07, "learning_rate": 3.5480023852116878e-06, "logits/chosen": -2.962933301925659, "logits/rejected": -2.8397510051727295, "logps/chosen": -61.581825256347656, "logps/rejected": -855.8854370117188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.037254128605127335, "rewards/margins": 8.212289810180664, "rewards/rejected": -8.17503547668457, "step": 1190 }, { "epoch": 0.07, "learning_rate": 3.577817531305904e-06, "logits/chosen": -2.9256014823913574, "logits/rejected": -2.837109327316284, "logps/chosen": -60.897491455078125, "logps/rejected": -838.4263916015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.02157021127641201, "rewards/margins": 8.039704322814941, "rewards/rejected": -8.018134117126465, "step": 1200 }, { "epoch": 0.07, "learning_rate": 3.6076326774001196e-06, "logits/chosen": -2.978276252746582, "logits/rejected": -2.852952480316162, "logps/chosen": -71.32048034667969, "logps/rejected": -851.6654052734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.019057447090744972, "rewards/margins": 8.128162384033203, "rewards/rejected": -8.147220611572266, "step": 1210 }, { "epoch": 0.07, "learning_rate": 3.6374478234943357e-06, "logits/chosen": -2.94616961479187, "logits/rejected": -2.859433174133301, "logps/chosen": -56.56980514526367, "logps/rejected": -802.4037475585938, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.10236085951328278, "rewards/margins": 7.760002136230469, "rewards/rejected": -7.657642364501953, "step": 1220 }, { "epoch": 0.07, "learning_rate": 3.6672629695885514e-06, "logits/chosen": -2.9344871044158936, "logits/rejected": -2.8236775398254395, "logps/chosen": -49.70731735229492, "logps/rejected": -792.5345458984375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": 0.1404164880514145, "rewards/margins": 7.691163539886475, "rewards/rejected": -7.550746917724609, "step": 1230 }, { "epoch": 0.07, "learning_rate": 3.6970781156827675e-06, "logits/chosen": -2.9523799419403076, "logits/rejected": -2.852698564529419, "logps/chosen": -54.118797302246094, "logps/rejected": -832.2688598632812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.12421531975269318, "rewards/margins": 8.072728157043457, "rewards/rejected": -7.948511600494385, "step": 1240 }, { "epoch": 0.07, "learning_rate": 3.726893261776983e-06, "logits/chosen": -2.9282047748565674, "logits/rejected": -2.831162929534912, "logps/chosen": -48.48308563232422, "logps/rejected": -839.7501220703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.10753337293863297, "rewards/margins": 8.119595527648926, "rewards/rejected": -8.012062072753906, "step": 1250 }, { "epoch": 0.08, "learning_rate": 3.756708407871199e-06, "logits/chosen": -2.991194725036621, "logits/rejected": -2.8635239601135254, "logps/chosen": -55.58338165283203, "logps/rejected": -831.9315185546875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.10668520629405975, "rewards/margins": 8.040712356567383, "rewards/rejected": -7.934027194976807, "step": 1260 }, { "epoch": 0.08, "learning_rate": 3.786523553965415e-06, "logits/chosen": -2.970008134841919, "logits/rejected": -2.807595729827881, "logps/chosen": -61.25394821166992, "logps/rejected": -870.4525146484375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.07233773916959763, "rewards/margins": 8.415926933288574, "rewards/rejected": -8.343588829040527, "step": 1270 }, { "epoch": 0.08, "learning_rate": 3.816338700059631e-06, "logits/chosen": -2.945420980453491, "logits/rejected": -2.798638105392456, "logps/chosen": -85.82127380371094, "logps/rejected": -839.9280395507812, "loss": 0.0662, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17583775520324707, "rewards/margins": 7.850653171539307, "rewards/rejected": -8.026491165161133, "step": 1280 }, { "epoch": 0.08, "learning_rate": 3.846153846153847e-06, "logits/chosen": -2.9410746097564697, "logits/rejected": -2.8313770294189453, "logps/chosen": -69.71158599853516, "logps/rejected": -873.66455078125, "loss": 0.0158, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.04627273976802826, "rewards/margins": 8.307173728942871, "rewards/rejected": -8.353446960449219, "step": 1290 }, { "epoch": 0.08, "learning_rate": 3.875968992248063e-06, "logits/chosen": -2.937119722366333, "logits/rejected": -2.8577029705047607, "logps/chosen": -60.37322998046875, "logps/rejected": -832.6936645507812, "loss": 0.0932, "rewards/accuracies": 1.0, "rewards/chosen": 0.005199164152145386, "rewards/margins": 7.9486799240112305, "rewards/rejected": -7.943480014801025, "step": 1300 }, { "epoch": 0.08, "learning_rate": 3.905784138342278e-06, "logits/chosen": -2.978745937347412, "logits/rejected": -2.8602681159973145, "logps/chosen": -57.73615646362305, "logps/rejected": -836.34375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.12005873024463654, "rewards/margins": 8.1077299118042, "rewards/rejected": -7.987672328948975, "step": 1310 }, { "epoch": 0.08, "learning_rate": 3.935599284436494e-06, "logits/chosen": -2.917057514190674, "logits/rejected": -2.8130440711975098, "logps/chosen": -55.53342819213867, "logps/rejected": -830.1809692382812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.07480813562870026, "rewards/margins": 7.993597507476807, "rewards/rejected": -7.918789863586426, "step": 1320 }, { "epoch": 0.08, "learning_rate": 3.96541443053071e-06, "logits/chosen": -2.9308481216430664, "logits/rejected": -2.8554842472076416, "logps/chosen": -74.8079833984375, "logps/rejected": -783.2691040039062, "loss": 0.0593, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.06850177049636841, "rewards/margins": 7.384908199310303, "rewards/rejected": -7.453409671783447, "step": 1330 }, { "epoch": 0.08, "learning_rate": 3.995229576624926e-06, "logits/chosen": -2.952489137649536, "logits/rejected": -2.8710484504699707, "logps/chosen": -49.32084274291992, "logps/rejected": -835.0719604492188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.10292410850524902, "rewards/margins": 8.077082633972168, "rewards/rejected": -7.974157810211182, "step": 1340 }, { "epoch": 0.08, "learning_rate": 4.025044722719142e-06, "logits/chosen": -2.9528369903564453, "logits/rejected": -2.854156017303467, "logps/chosen": -76.77101135253906, "logps/rejected": -846.5745849609375, "loss": 0.0587, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1340237557888031, "rewards/margins": 7.954353332519531, "rewards/rejected": -8.088376998901367, "step": 1350 }, { "epoch": 0.08, "learning_rate": 4.054859868813357e-06, "logits/chosen": -2.9570131301879883, "logits/rejected": -2.8411474227905273, "logps/chosen": -82.59919738769531, "logps/rejected": -863.3250122070312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.12739042937755585, "rewards/margins": 8.12880802154541, "rewards/rejected": -8.256197929382324, "step": 1360 }, { "epoch": 0.08, "learning_rate": 4.084675014907573e-06, "logits/chosen": -2.9535927772521973, "logits/rejected": -2.850450277328491, "logps/chosen": -68.17179107666016, "logps/rejected": -832.2708740234375, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -0.10263939201831818, "rewards/margins": 7.841641426086426, "rewards/rejected": -7.944281101226807, "step": 1370 }, { "epoch": 0.08, "learning_rate": 4.114490161001789e-06, "logits/chosen": -2.9321084022521973, "logits/rejected": -2.8804666996002197, "logps/chosen": -68.42265319824219, "logps/rejected": -800.1928100585938, "loss": 0.099, "rewards/accuracies": 1.0, "rewards/chosen": -0.06528677046298981, "rewards/margins": 7.555539608001709, "rewards/rejected": -7.62082576751709, "step": 1380 }, { "epoch": 0.08, "learning_rate": 4.1443053070960046e-06, "logits/chosen": -2.9421005249023438, "logits/rejected": -2.8554928302764893, "logps/chosen": -69.97665405273438, "logps/rejected": -797.6248168945312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.06231091544032097, "rewards/margins": 7.533974647521973, "rewards/rejected": -7.596285820007324, "step": 1390 }, { "epoch": 0.08, "learning_rate": 4.174120453190221e-06, "logits/chosen": -2.9417827129364014, "logits/rejected": -2.87119460105896, "logps/chosen": -50.77277374267578, "logps/rejected": -833.1838989257812, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": 0.11081106960773468, "rewards/margins": 8.063919067382812, "rewards/rejected": -7.9531097412109375, "step": 1400 }, { "epoch": 0.08, "learning_rate": 4.203935599284437e-06, "logits/chosen": -2.983537435531616, "logits/rejected": -2.882728099822998, "logps/chosen": -38.677284240722656, "logps/rejected": -778.9420776367188, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.20782116055488586, "rewards/margins": 7.609622001647949, "rewards/rejected": -7.40179967880249, "step": 1410 }, { "epoch": 0.08, "learning_rate": 4.233750745378653e-06, "logits/chosen": -2.967069149017334, "logits/rejected": -2.867262840270996, "logps/chosen": -44.10684585571289, "logps/rejected": -792.5245361328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.21291284263134003, "rewards/margins": 7.764891624450684, "rewards/rejected": -7.551979064941406, "step": 1420 }, { "epoch": 0.09, "learning_rate": 4.263565891472868e-06, "logits/chosen": -2.950510263442993, "logits/rejected": -2.845777988433838, "logps/chosen": -57.32572555541992, "logps/rejected": -859.3717041015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.03477778285741806, "rewards/margins": 8.243889808654785, "rewards/rejected": -8.209112167358398, "step": 1430 }, { "epoch": 0.09, "learning_rate": 4.293381037567084e-06, "logits/chosen": -2.963660955429077, "logits/rejected": -2.8721413612365723, "logps/chosen": -66.78321838378906, "logps/rejected": -844.2529296875, "loss": 0.0115, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.036757443100214005, "rewards/margins": 8.096086502075195, "rewards/rejected": -8.059328079223633, "step": 1440 }, { "epoch": 0.09, "learning_rate": 4.3231961836613e-06, "logits/chosen": -2.9418163299560547, "logits/rejected": -2.8510735034942627, "logps/chosen": -81.5918197631836, "logps/rejected": -816.4410400390625, "loss": 0.0742, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.12166912853717804, "rewards/margins": 7.657325744628906, "rewards/rejected": -7.778996467590332, "step": 1450 }, { "epoch": 0.09, "learning_rate": 4.353011329755516e-06, "logits/chosen": -2.9710350036621094, "logits/rejected": -2.8575940132141113, "logps/chosen": -56.44457244873047, "logps/rejected": -844.6815185546875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.06740139424800873, "rewards/margins": 8.153116226196289, "rewards/rejected": -8.085716247558594, "step": 1460 }, { "epoch": 0.09, "learning_rate": 4.382826475849732e-06, "logits/chosen": -2.9913582801818848, "logits/rejected": -2.8557257652282715, "logps/chosen": -61.04204177856445, "logps/rejected": -832.654296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.06690309196710587, "rewards/margins": 8.019301414489746, "rewards/rejected": -7.952397346496582, "step": 1470 }, { "epoch": 0.09, "learning_rate": 4.412641621943948e-06, "logits/chosen": -2.946587324142456, "logits/rejected": -2.8608665466308594, "logps/chosen": -78.7093505859375, "logps/rejected": -859.4927978515625, "loss": 0.0388, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17765815556049347, "rewards/margins": 8.048316955566406, "rewards/rejected": -8.225974082946777, "step": 1480 }, { "epoch": 0.09, "learning_rate": 4.442456768038164e-06, "logits/chosen": -2.9439327716827393, "logits/rejected": -2.843292713165283, "logps/chosen": -92.28190612792969, "logps/rejected": -844.7705078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.272264301776886, "rewards/margins": 7.79150915145874, "rewards/rejected": -8.063772201538086, "step": 1490 }, { "epoch": 0.09, "learning_rate": 4.472271914132379e-06, "logits/chosen": -2.971674680709839, "logits/rejected": -2.8799490928649902, "logps/chosen": -79.70256042480469, "logps/rejected": -851.8480224609375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.12765036523342133, "rewards/margins": 8.005836486816406, "rewards/rejected": -8.1334867477417, "step": 1500 }, { "epoch": 0.09, "learning_rate": 4.502087060226595e-06, "logits/chosen": -2.9640636444091797, "logits/rejected": -2.8365471363067627, "logps/chosen": -49.924705505371094, "logps/rejected": -864.9312744140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.15793825685977936, "rewards/margins": 8.429708480834961, "rewards/rejected": -8.271771430969238, "step": 1510 }, { "epoch": 0.09, "learning_rate": 4.5319022063208115e-06, "logits/chosen": -2.936018943786621, "logits/rejected": -2.819920063018799, "logps/chosen": -55.14573287963867, "logps/rejected": -865.2366333007812, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.08863909542560577, "rewards/margins": 8.376001358032227, "rewards/rejected": -8.287362098693848, "step": 1520 }, { "epoch": 0.09, "learning_rate": 4.561717352415027e-06, "logits/chosen": -2.9792511463165283, "logits/rejected": -2.8655331134796143, "logps/chosen": -45.40349578857422, "logps/rejected": -860.7001953125, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": 0.21952152252197266, "rewards/margins": 8.451563835144043, "rewards/rejected": -8.23204231262207, "step": 1530 }, { "epoch": 0.09, "learning_rate": 4.591532498509243e-06, "logits/chosen": -2.9692814350128174, "logits/rejected": -2.869812488555908, "logps/chosen": -62.5212516784668, "logps/rejected": -885.185546875, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": 0.025884171947836876, "rewards/margins": 8.504364013671875, "rewards/rejected": -8.478480339050293, "step": 1540 }, { "epoch": 0.09, "learning_rate": 4.621347644603459e-06, "logits/chosen": -2.9681038856506348, "logits/rejected": -2.867732524871826, "logps/chosen": -52.20361328125, "logps/rejected": -878.3974609375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.05458943918347359, "rewards/margins": 8.469630241394043, "rewards/rejected": -8.415040969848633, "step": 1550 }, { "epoch": 0.09, "learning_rate": 4.651162790697675e-06, "logits/chosen": -2.92891263961792, "logits/rejected": -2.8252596855163574, "logps/chosen": -61.71787643432617, "logps/rejected": -912.6612548828125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.02772531844675541, "rewards/margins": 8.727641105651855, "rewards/rejected": -8.755365371704102, "step": 1560 }, { "epoch": 0.09, "learning_rate": 4.68097793679189e-06, "logits/chosen": -2.960237979888916, "logits/rejected": -2.8692328929901123, "logps/chosen": -52.9825553894043, "logps/rejected": -867.1370849609375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.13431011140346527, "rewards/margins": 8.429038047790527, "rewards/rejected": -8.29472827911377, "step": 1570 }, { "epoch": 0.09, "learning_rate": 4.7107930828861065e-06, "logits/chosen": -2.9521450996398926, "logits/rejected": -2.868682384490967, "logps/chosen": -46.400596618652344, "logps/rejected": -862.8753662109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.1698424071073532, "rewards/margins": 8.42706298828125, "rewards/rejected": -8.257222175598145, "step": 1580 }, { "epoch": 0.09, "learning_rate": 4.740608228980323e-06, "logits/chosen": -2.959049701690674, "logits/rejected": -2.8271801471710205, "logps/chosen": -52.452789306640625, "logps/rejected": -901.5426025390625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.1390310674905777, "rewards/margins": 8.774405479431152, "rewards/rejected": -8.635374069213867, "step": 1590 }, { "epoch": 0.1, "learning_rate": 4.770423375074538e-06, "logits/chosen": -2.9610068798065186, "logits/rejected": -2.8333799839019775, "logps/chosen": -52.75615692138672, "logps/rejected": -905.1751708984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.09056927263736725, "rewards/margins": 8.769072532653809, "rewards/rejected": -8.678503036499023, "step": 1600 }, { "epoch": 0.1, "learning_rate": 4.800238521168754e-06, "logits/chosen": -2.9743571281433105, "logits/rejected": -2.8583149909973145, "logps/chosen": -57.59144973754883, "logps/rejected": -889.9105224609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.1298973262310028, "rewards/margins": 8.651684761047363, "rewards/rejected": -8.521787643432617, "step": 1610 }, { "epoch": 0.1, "learning_rate": 4.83005366726297e-06, "logits/chosen": -2.981600284576416, "logits/rejected": -2.8229737281799316, "logps/chosen": -56.11140823364258, "logps/rejected": -903.0538330078125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.06562893837690353, "rewards/margins": 8.72365665435791, "rewards/rejected": -8.658028602600098, "step": 1620 }, { "epoch": 0.1, "learning_rate": 4.859868813357186e-06, "logits/chosen": -2.941699504852295, "logits/rejected": -2.812774896621704, "logps/chosen": -68.4901123046875, "logps/rejected": -919.5745849609375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.06523467600345612, "rewards/margins": 8.757169723510742, "rewards/rejected": -8.822403907775879, "step": 1630 }, { "epoch": 0.1, "learning_rate": 4.8896839594514015e-06, "logits/chosen": -2.979139804840088, "logits/rejected": -2.8709495067596436, "logps/chosen": -68.3390884399414, "logps/rejected": -943.32275390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.05745609477162361, "rewards/margins": 9.006002426147461, "rewards/rejected": -9.063459396362305, "step": 1640 }, { "epoch": 0.1, "learning_rate": 4.919499105545618e-06, "logits/chosen": -2.9662771224975586, "logits/rejected": -2.8282623291015625, "logps/chosen": -73.66764068603516, "logps/rejected": -941.9275512695312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.10303034633398056, "rewards/margins": 8.942924499511719, "rewards/rejected": -9.045953750610352, "step": 1650 }, { "epoch": 0.1, "learning_rate": 4.949314251639834e-06, "logits/chosen": -2.9652342796325684, "logits/rejected": -2.83856201171875, "logps/chosen": -64.9266357421875, "logps/rejected": -954.0256958007812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.004475998692214489, "rewards/margins": 9.17994499206543, "rewards/rejected": -9.175470352172852, "step": 1660 }, { "epoch": 0.1, "learning_rate": 4.979129397734049e-06, "logits/chosen": -2.932119607925415, "logits/rejected": -2.8284549713134766, "logps/chosen": -56.88899612426758, "logps/rejected": -951.2662353515625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.02996777929365635, "rewards/margins": 9.160717964172363, "rewards/rejected": -9.130751609802246, "step": 1670 }, { "epoch": 0.1, "learning_rate": 4.99999951258251e-06, "logits/chosen": -2.959083080291748, "logits/rejected": -2.8922276496887207, "logps/chosen": -63.5770263671875, "logps/rejected": -1007.3986206054688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.011779346503317356, "rewards/margins": 9.709405899047852, "rewards/rejected": -9.697626113891602, "step": 1680 }, { "epoch": 0.1, "learning_rate": 4.9999908473879605e-06, "logits/chosen": -2.9543569087982178, "logits/rejected": -2.850839138031006, "logps/chosen": -58.661895751953125, "logps/rejected": -941.4034423828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.04295549541711807, "rewards/margins": 9.087894439697266, "rewards/rejected": -9.044939041137695, "step": 1690 }, { "epoch": 0.1, "learning_rate": 4.999971350736829e-06, "logits/chosen": -2.934324026107788, "logits/rejected": -2.8332202434539795, "logps/chosen": -52.15330123901367, "logps/rejected": -932.9797973632812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.10418331623077393, "rewards/margins": 9.058653831481934, "rewards/rejected": -8.95447063446045, "step": 1700 }, { "epoch": 0.1, "learning_rate": 4.999941022713586e-06, "logits/chosen": -2.9422943592071533, "logits/rejected": -2.8219618797302246, "logps/chosen": -47.537879943847656, "logps/rejected": -952.1214599609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.11056973785161972, "rewards/margins": 9.25611400604248, "rewards/rejected": -9.145544052124023, "step": 1710 }, { "epoch": 0.1, "learning_rate": 4.999899863449631e-06, "logits/chosen": -2.959444522857666, "logits/rejected": -2.8423378467559814, "logps/chosen": -55.27605438232422, "logps/rejected": -945.9014892578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.09202218055725098, "rewards/margins": 9.17585277557373, "rewards/rejected": -9.083830833435059, "step": 1720 }, { "epoch": 0.1, "learning_rate": 4.999847873123291e-06, "logits/chosen": -2.9738545417785645, "logits/rejected": -2.8512320518493652, "logps/chosen": -55.554840087890625, "logps/rejected": -945.4483642578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.08008383214473724, "rewards/margins": 9.16378402709961, "rewards/rejected": -9.083700180053711, "step": 1730 }, { "epoch": 0.1, "learning_rate": 4.999785051959819e-06, "logits/chosen": -2.9619839191436768, "logits/rejected": -2.8432111740112305, "logps/chosen": -58.36278533935547, "logps/rejected": -943.3648681640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.09746307879686356, "rewards/margins": 9.166163444519043, "rewards/rejected": -9.068700790405273, "step": 1740 }, { "epoch": 0.1, "learning_rate": 4.999711400231393e-06, "logits/chosen": -2.9860432147979736, "logits/rejected": -2.851942300796509, "logps/chosen": -51.372314453125, "logps/rejected": -995.8817138671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.1180131584405899, "rewards/margins": 9.710302352905273, "rewards/rejected": -9.59228801727295, "step": 1750 }, { "epoch": 0.1, "learning_rate": 4.999626918257117e-06, "logits/chosen": -2.950094699859619, "logits/rejected": -2.845525026321411, "logps/chosen": -51.88335037231445, "logps/rejected": -1012.0709838867188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.0803295224905014, "rewards/margins": 9.829864501953125, "rewards/rejected": -9.74953556060791, "step": 1760 }, { "epoch": 0.11, "learning_rate": 4.999531606403018e-06, "logits/chosen": -2.9301562309265137, "logits/rejected": -2.8342392444610596, "logps/chosen": -46.81805419921875, "logps/rejected": -932.3685302734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.1403868943452835, "rewards/margins": 9.092107772827148, "rewards/rejected": -8.951720237731934, "step": 1770 }, { "epoch": 0.11, "learning_rate": 4.999425465082043e-06, "logits/chosen": -2.9261631965637207, "logits/rejected": -2.7935142517089844, "logps/chosen": -59.28017044067383, "logps/rejected": -989.1407470703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.10629899799823761, "rewards/margins": 9.619695663452148, "rewards/rejected": -9.513397216796875, "step": 1780 }, { "epoch": 0.11, "learning_rate": 4.99930849475406e-06, "logits/chosen": -2.957244873046875, "logits/rejected": -2.8449857234954834, "logps/chosen": -49.23945236206055, "logps/rejected": -1004.6325073242188, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 0.1350083351135254, "rewards/margins": 9.812288284301758, "rewards/rejected": -9.677278518676758, "step": 1790 }, { "epoch": 0.11, "learning_rate": 4.999180695925856e-06, "logits/chosen": -2.980672597885132, "logits/rejected": -2.8906588554382324, "logps/chosen": -43.63019561767578, "logps/rejected": -945.4990234375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.17754368484020233, "rewards/margins": 9.24999713897705, "rewards/rejected": -9.072452545166016, "step": 1800 }, { "epoch": 0.11, "learning_rate": 4.999042069151129e-06, "logits/chosen": -2.9444472789764404, "logits/rejected": -2.867940902709961, "logps/chosen": -40.078163146972656, "logps/rejected": -949.5886840820312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.18242432177066803, "rewards/margins": 9.297201156616211, "rewards/rejected": -9.114776611328125, "step": 1810 }, { "epoch": 0.11, "learning_rate": 4.998892615030496e-06, "logits/chosen": -2.963578701019287, "logits/rejected": -2.866978645324707, "logps/chosen": -44.85164260864258, "logps/rejected": -945.8570556640625, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": 0.17667728662490845, "rewards/margins": 9.263506889343262, "rewards/rejected": -9.086830139160156, "step": 1820 }, { "epoch": 0.11, "learning_rate": 4.99873233421148e-06, "logits/chosen": -2.941805124282837, "logits/rejected": -2.836016893386841, "logps/chosen": -51.32543182373047, "logps/rejected": -920.9342651367188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.1497163623571396, "rewards/margins": 8.975264549255371, "rewards/rejected": -8.82554817199707, "step": 1830 }, { "epoch": 0.11, "learning_rate": 4.9985612273885145e-06, "logits/chosen": -2.9724180698394775, "logits/rejected": -2.8858962059020996, "logps/chosen": -64.11556243896484, "logps/rejected": -843.2903442382812, "loss": 0.052, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.016977693885564804, "rewards/margins": 8.073419570922852, "rewards/rejected": -8.056442260742188, "step": 1840 }, { "epoch": 0.11, "learning_rate": 4.998379295302936e-06, "logits/chosen": -2.9345078468322754, "logits/rejected": -2.845078945159912, "logps/chosen": -77.18492126464844, "logps/rejected": -906.4959716796875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.16813500225543976, "rewards/margins": 8.52023696899414, "rewards/rejected": -8.688371658325195, "step": 1850 }, { "epoch": 0.11, "learning_rate": 4.9981865387429825e-06, "logits/chosen": -2.974233865737915, "logits/rejected": -2.850526809692383, "logps/chosen": -124.40218353271484, "logps/rejected": -939.4627685546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6152923703193665, "rewards/margins": 8.387138366699219, "rewards/rejected": -9.00243091583252, "step": 1860 }, { "epoch": 0.11, "learning_rate": 4.997982958543792e-06, "logits/chosen": -2.949894428253174, "logits/rejected": -2.818716526031494, "logps/chosen": -118.48817443847656, "logps/rejected": -1024.0811767578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.5254218578338623, "rewards/margins": 9.345009803771973, "rewards/rejected": -9.870431900024414, "step": 1870 }, { "epoch": 0.11, "learning_rate": 4.9977685555873955e-06, "logits/chosen": -2.9752907752990723, "logits/rejected": -2.8415160179138184, "logps/chosen": -121.38582611083984, "logps/rejected": -944.46435546875, "loss": 0.0851, "rewards/accuracies": 1.0, "rewards/chosen": -0.5538108348846436, "rewards/margins": 8.514638900756836, "rewards/rejected": -9.068449020385742, "step": 1880 }, { "epoch": 0.11, "learning_rate": 4.997543330802716e-06, "logits/chosen": -2.974660634994507, "logits/rejected": -2.8538661003112793, "logps/chosen": -54.957481384277344, "logps/rejected": -999.7957153320312, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.09890345484018326, "rewards/margins": 9.717883110046387, "rewards/rejected": -9.618979454040527, "step": 1890 }, { "epoch": 0.11, "learning_rate": 4.997307285165559e-06, "logits/chosen": -2.9241747856140137, "logits/rejected": -2.8261189460754395, "logps/chosen": -44.20441818237305, "logps/rejected": -889.74951171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.20693016052246094, "rewards/margins": 8.732515335083008, "rewards/rejected": -8.525586128234863, "step": 1900 }, { "epoch": 0.11, "learning_rate": 4.997060419698618e-06, "logits/chosen": -2.9329235553741455, "logits/rejected": -2.8412880897521973, "logps/chosen": -61.673828125, "logps/rejected": -878.3505859375, "loss": 0.0619, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.0142064793035388, "rewards/margins": 8.422164916992188, "rewards/rejected": -8.407957077026367, "step": 1910 }, { "epoch": 0.11, "learning_rate": 4.996802735471461e-06, "logits/chosen": -2.9551236629486084, "logits/rejected": -2.863818645477295, "logps/chosen": -45.395286560058594, "logps/rejected": -891.0927734375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.1725916862487793, "rewards/margins": 8.708904266357422, "rewards/rejected": -8.536312103271484, "step": 1920 }, { "epoch": 0.12, "learning_rate": 4.996534233600531e-06, "logits/chosen": -2.9402060508728027, "logits/rejected": -2.821347951889038, "logps/chosen": -45.679473876953125, "logps/rejected": -947.8079833984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.20218610763549805, "rewards/margins": 9.289873123168945, "rewards/rejected": -9.087687492370605, "step": 1930 }, { "epoch": 0.12, "learning_rate": 4.996254915249138e-06, "logits/chosen": -2.977008581161499, "logits/rejected": -2.8665661811828613, "logps/chosen": -46.59867477416992, "logps/rejected": -879.0603637695312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.1566549688577652, "rewards/margins": 8.560900688171387, "rewards/rejected": -8.404245376586914, "step": 1940 }, { "epoch": 0.12, "learning_rate": 4.995964781627457e-06, "logits/chosen": -2.9712634086608887, "logits/rejected": -2.8670601844787598, "logps/chosen": -106.1603775024414, "logps/rejected": -983.8519287109375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.4712972640991211, "rewards/margins": 9.001543045043945, "rewards/rejected": -9.472841262817383, "step": 1950 }, { "epoch": 0.12, "learning_rate": 4.99566383399252e-06, "logits/chosen": -2.9497740268707275, "logits/rejected": -2.8398914337158203, "logps/chosen": -162.8874053955078, "logps/rejected": -974.7918701171875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9869812726974487, "rewards/margins": 8.378569602966309, "rewards/rejected": -9.36555290222168, "step": 1960 }, { "epoch": 0.12, "learning_rate": 4.995352073648213e-06, "logits/chosen": -2.950453519821167, "logits/rejected": -2.851583480834961, "logps/chosen": -69.71537780761719, "logps/rejected": -1008.3463134765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.028142184019088745, "rewards/margins": 9.685117721557617, "rewards/rejected": -9.713260650634766, "step": 1970 }, { "epoch": 0.12, "learning_rate": 4.9950295019452665e-06, "logits/chosen": -2.9719862937927246, "logits/rejected": -2.8701815605163574, "logps/chosen": -63.449066162109375, "logps/rejected": -1038.838134765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.023493584245443344, "rewards/margins": 9.998686790466309, "rewards/rejected": -10.022181510925293, "step": 1980 }, { "epoch": 0.12, "learning_rate": 4.9946961202812566e-06, "logits/chosen": -2.9477620124816895, "logits/rejected": -2.8670742511749268, "logps/chosen": -72.92020416259766, "logps/rejected": -927.1637573242188, "loss": 0.1405, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.030682409182190895, "rewards/margins": 8.850085258483887, "rewards/rejected": -8.880766868591309, "step": 1990 }, { "epoch": 0.12, "learning_rate": 4.99435193010059e-06, "logits/chosen": -2.9413959980010986, "logits/rejected": -2.858614921569824, "logps/chosen": -41.556976318359375, "logps/rejected": -777.1593017578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.20922109484672546, "rewards/margins": 7.6098198890686035, "rewards/rejected": -7.400598049163818, "step": 2000 }, { "epoch": 0.12, "learning_rate": 4.993996932894507e-06, "logits/chosen": -2.9597530364990234, "logits/rejected": -2.845543384552002, "logps/chosen": -64.46644592285156, "logps/rejected": -849.7469482421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.014239134266972542, "rewards/margins": 8.131264686584473, "rewards/rejected": -8.117025375366211, "step": 2010 }, { "epoch": 0.12, "learning_rate": 4.993631130201066e-06, "logits/chosen": -2.954618453979492, "logits/rejected": -2.8723948001861572, "logps/chosen": -70.39875793457031, "logps/rejected": -915.5260009765625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.06234943866729736, "rewards/margins": 8.707820892333984, "rewards/rejected": -8.770170211791992, "step": 2020 }, { "epoch": 0.12, "learning_rate": 4.993254523605144e-06, "logits/chosen": -2.9476194381713867, "logits/rejected": -2.8462414741516113, "logps/chosen": -79.46333312988281, "logps/rejected": -940.7806396484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1955333948135376, "rewards/margins": 8.847306251525879, "rewards/rejected": -9.042840003967285, "step": 2030 }, { "epoch": 0.12, "learning_rate": 4.9928671147384255e-06, "logits/chosen": -2.9254276752471924, "logits/rejected": -2.8385252952575684, "logps/chosen": -62.8824348449707, "logps/rejected": -948.6345825195312, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.014571787789463997, "rewards/margins": 9.09857177734375, "rewards/rejected": -9.113143920898438, "step": 2040 }, { "epoch": 0.12, "learning_rate": 4.992468905279398e-06, "logits/chosen": -2.906681537628174, "logits/rejected": -2.839059829711914, "logps/chosen": -157.54974365234375, "logps/rejected": -989.6416015625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9313791394233704, "rewards/margins": 8.594076156616211, "rewards/rejected": -9.525456428527832, "step": 2050 }, { "epoch": 0.12, "learning_rate": 4.992059896953343e-06, "logits/chosen": -2.9540200233459473, "logits/rejected": -2.854724645614624, "logps/chosen": -261.4559020996094, "logps/rejected": -1037.230712890625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -2.0125346183776855, "rewards/margins": 7.979687690734863, "rewards/rejected": -9.992222785949707, "step": 2060 }, { "epoch": 0.12, "learning_rate": 4.99164009153233e-06, "logits/chosen": -2.945979356765747, "logits/rejected": -2.8699681758880615, "logps/chosen": -69.53071594238281, "logps/rejected": -936.02734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.0601426437497139, "rewards/margins": 8.923473358154297, "rewards/rejected": -8.98361587524414, "step": 2070 }, { "epoch": 0.12, "learning_rate": 4.991209490835207e-06, "logits/chosen": -2.9740195274353027, "logits/rejected": -2.888162612915039, "logps/chosen": -35.27337646484375, "logps/rejected": -896.4847412109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.269118070602417, "rewards/margins": 8.847146034240723, "rewards/rejected": -8.578025817871094, "step": 2080 }, { "epoch": 0.12, "learning_rate": 4.990768096727594e-06, "logits/chosen": -2.963611125946045, "logits/rejected": -2.8710274696350098, "logps/chosen": -37.78728485107422, "logps/rejected": -965.2390747070312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2858414053916931, "rewards/margins": 9.55634880065918, "rewards/rejected": -9.2705078125, "step": 2090 }, { "epoch": 0.13, "learning_rate": 4.990315911121874e-06, "logits/chosen": -2.960092306137085, "logits/rejected": -2.872699737548828, "logps/chosen": -38.32144546508789, "logps/rejected": -1007.7561645507812, "loss": 0.0813, "rewards/accuracies": 1.0, "rewards/chosen": 0.21387045085430145, "rewards/margins": 9.897279739379883, "rewards/rejected": -9.683408737182617, "step": 2100 }, { "epoch": 0.13, "learning_rate": 4.989852935977187e-06, "logits/chosen": -2.9342198371887207, "logits/rejected": -2.8510549068450928, "logps/chosen": -60.72428512573242, "logps/rejected": -903.5880737304688, "loss": 0.1872, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.01291152834892273, "rewards/margins": 8.678878784179688, "rewards/rejected": -8.665966033935547, "step": 2110 }, { "epoch": 0.13, "learning_rate": 4.989379173299416e-06, "logits/chosen": -2.9772562980651855, "logits/rejected": -2.9047322273254395, "logps/chosen": -31.685562133789062, "logps/rejected": -742.3798828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.27260759472846985, "rewards/margins": 7.313261985778809, "rewards/rejected": -7.040654182434082, "step": 2120 }, { "epoch": 0.13, "learning_rate": 4.988894625141186e-06, "logits/chosen": -2.9627575874328613, "logits/rejected": -2.8957152366638184, "logps/chosen": -52.527435302734375, "logps/rejected": -858.0653076171875, "loss": 0.0631, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.1165364608168602, "rewards/margins": 8.330681800842285, "rewards/rejected": -8.214144706726074, "step": 2130 }, { "epoch": 0.13, "learning_rate": 4.98839929360185e-06, "logits/chosen": -2.9487318992614746, "logits/rejected": -2.861151695251465, "logps/chosen": -56.81452560424805, "logps/rejected": -866.6612548828125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 0.10767602920532227, "rewards/margins": 8.390599250793457, "rewards/rejected": -8.28292179107666, "step": 2140 }, { "epoch": 0.13, "learning_rate": 4.9878931808274796e-06, "logits/chosen": -2.9535036087036133, "logits/rejected": -2.8629953861236572, "logps/chosen": -127.41032409667969, "logps/rejected": -869.6463623046875, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -0.7162045836448669, "rewards/margins": 7.602516174316406, "rewards/rejected": -8.318720817565918, "step": 2150 }, { "epoch": 0.13, "learning_rate": 4.9873762890108596e-06, "logits/chosen": -2.9757449626922607, "logits/rejected": -2.8921334743499756, "logps/chosen": -80.45555114746094, "logps/rejected": -903.0352783203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.1490497887134552, "rewards/margins": 8.50477409362793, "rewards/rejected": -8.653824806213379, "step": 2160 }, { "epoch": 0.13, "learning_rate": 4.986848620391473e-06, "logits/chosen": -2.9436023235321045, "logits/rejected": -2.8823113441467285, "logps/chosen": -52.28654861450195, "logps/rejected": -917.3688354492188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.10300836712121964, "rewards/margins": 8.905111312866211, "rewards/rejected": -8.802103042602539, "step": 2170 }, { "epoch": 0.13, "learning_rate": 4.986310177255498e-06, "logits/chosen": -2.9419848918914795, "logits/rejected": -2.8666701316833496, "logps/chosen": -54.8491325378418, "logps/rejected": -837.8796997070312, "loss": 0.0423, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.09057823568582535, "rewards/margins": 8.07280158996582, "rewards/rejected": -7.982222557067871, "step": 2180 }, { "epoch": 0.13, "learning_rate": 4.985760961935791e-06, "logits/chosen": -2.971418619155884, "logits/rejected": -2.901982545852661, "logps/chosen": -53.1193962097168, "logps/rejected": -946.23974609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.12189672887325287, "rewards/margins": 9.220931053161621, "rewards/rejected": -9.099035263061523, "step": 2190 }, { "epoch": 0.13, "learning_rate": 4.985200976811882e-06, "logits/chosen": -2.962893009185791, "logits/rejected": -2.885829210281372, "logps/chosen": -44.871917724609375, "logps/rejected": -903.8126220703125, "loss": 0.0918, "rewards/accuracies": 1.0, "rewards/chosen": 0.1707591712474823, "rewards/margins": 8.83647346496582, "rewards/rejected": -8.6657133102417, "step": 2200 }, { "epoch": 0.13, "learning_rate": 4.9846302243099624e-06, "logits/chosen": -2.9510750770568848, "logits/rejected": -2.8943705558776855, "logps/chosen": -52.7039909362793, "logps/rejected": -795.1301879882812, "loss": 0.0384, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.10009205341339111, "rewards/margins": 7.669191837310791, "rewards/rejected": -7.569098472595215, "step": 2210 }, { "epoch": 0.13, "learning_rate": 4.984048706902872e-06, "logits/chosen": -2.986412763595581, "logits/rejected": -2.8778998851776123, "logps/chosen": -44.76283645629883, "logps/rejected": -848.52685546875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.15771527588367462, "rewards/margins": 8.263212203979492, "rewards/rejected": -8.105498313903809, "step": 2220 }, { "epoch": 0.13, "learning_rate": 4.9834564271100925e-06, "logits/chosen": -2.979132890701294, "logits/rejected": -2.896970272064209, "logps/chosen": -62.106285095214844, "logps/rejected": -899.6920776367188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.017178431153297424, "rewards/margins": 8.599143981933594, "rewards/rejected": -8.616321563720703, "step": 2230 }, { "epoch": 0.13, "learning_rate": 4.982853387497737e-06, "logits/chosen": -2.9530258178710938, "logits/rejected": -2.8785574436187744, "logps/chosen": -70.67894744873047, "logps/rejected": -904.8424072265625, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.07714924216270447, "rewards/margins": 8.605292320251465, "rewards/rejected": -8.682441711425781, "step": 2240 }, { "epoch": 0.13, "learning_rate": 4.98223959067853e-06, "logits/chosen": -2.9438140392303467, "logits/rejected": -2.8551700115203857, "logps/chosen": -76.45069885253906, "logps/rejected": -971.1153564453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.14506937563419342, "rewards/margins": 9.182818412780762, "rewards/rejected": -9.327889442443848, "step": 2250 }, { "epoch": 0.13, "learning_rate": 4.9816150393118105e-06, "logits/chosen": -2.964266300201416, "logits/rejected": -2.8914237022399902, "logps/chosen": -71.19879150390625, "logps/rejected": -921.7913818359375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.09960738569498062, "rewards/margins": 8.742380142211914, "rewards/rejected": -8.841987609863281, "step": 2260 }, { "epoch": 0.14, "learning_rate": 4.980979736103506e-06, "logits/chosen": -2.958503007888794, "logits/rejected": -2.87711763381958, "logps/chosen": -47.76176071166992, "logps/rejected": -925.2347412109375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.17690588533878326, "rewards/margins": 9.056108474731445, "rewards/rejected": -8.879201889038086, "step": 2270 }, { "epoch": 0.14, "learning_rate": 4.980333683806132e-06, "logits/chosen": -2.9801459312438965, "logits/rejected": -2.8632309436798096, "logps/chosen": -41.220577239990234, "logps/rejected": -955.9514770507812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2588732838630676, "rewards/margins": 9.436012268066406, "rewards/rejected": -9.177138328552246, "step": 2280 }, { "epoch": 0.14, "learning_rate": 4.979676885218772e-06, "logits/chosen": -2.927335262298584, "logits/rejected": -2.843510150909424, "logps/chosen": -44.9033317565918, "logps/rejected": -986.2160034179688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.22740483283996582, "rewards/margins": 9.713313102722168, "rewards/rejected": -9.485909461975098, "step": 2290 }, { "epoch": 0.14, "learning_rate": 4.979009343187073e-06, "logits/chosen": -2.9468741416931152, "logits/rejected": -2.865966320037842, "logps/chosen": -47.56918716430664, "logps/rejected": -970.8502807617188, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.16878795623779297, "rewards/margins": 9.498997688293457, "rewards/rejected": -9.330209732055664, "step": 2300 }, { "epoch": 0.14, "learning_rate": 4.9783310606032245e-06, "logits/chosen": -2.9712982177734375, "logits/rejected": -2.8787612915039062, "logps/chosen": -55.614219665527344, "logps/rejected": -1000.5245971679688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.05771643668413162, "rewards/margins": 9.68165397644043, "rewards/rejected": -9.623937606811523, "step": 2310 }, { "epoch": 0.14, "learning_rate": 4.977642040405954e-06, "logits/chosen": -2.929609775543213, "logits/rejected": -2.855452060699463, "logps/chosen": -72.2454833984375, "logps/rejected": -988.2828979492188, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.10407350957393646, "rewards/margins": 9.397204399108887, "rewards/rejected": -9.501277923583984, "step": 2320 }, { "epoch": 0.14, "learning_rate": 4.976942285580507e-06, "logits/chosen": -2.949213743209839, "logits/rejected": -2.860365390777588, "logps/chosen": -134.41497802734375, "logps/rejected": -1028.3236083984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7220079302787781, "rewards/margins": 9.183069229125977, "rewards/rejected": -9.905077934265137, "step": 2330 }, { "epoch": 0.14, "learning_rate": 4.976231799158643e-06, "logits/chosen": -2.9890880584716797, "logits/rejected": -2.847445011138916, "logps/chosen": -101.89849853515625, "logps/rejected": -1035.537353515625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.4025927186012268, "rewards/margins": 9.575505256652832, "rewards/rejected": -9.978097915649414, "step": 2340 }, { "epoch": 0.14, "learning_rate": 4.975510584218614e-06, "logits/chosen": -2.9594712257385254, "logits/rejected": -2.8624861240386963, "logps/chosen": -66.60189056396484, "logps/rejected": -1064.6917724609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.05322108790278435, "rewards/margins": 10.22591495513916, "rewards/rejected": -10.279135704040527, "step": 2350 }, { "epoch": 0.14, "learning_rate": 4.974778643885153e-06, "logits/chosen": -2.9398505687713623, "logits/rejected": -2.860945224761963, "logps/chosen": -68.85578918457031, "logps/rejected": -1014.9910278320312, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.054184626787900925, "rewards/margins": 9.713037490844727, "rewards/rejected": -9.767221450805664, "step": 2360 }, { "epoch": 0.14, "learning_rate": 4.974035981329465e-06, "logits/chosen": -2.9818103313446045, "logits/rejected": -2.8641133308410645, "logps/chosen": -81.35318756103516, "logps/rejected": -1049.381103515625, "loss": 0.0483, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.14686064422130585, "rewards/margins": 9.974977493286133, "rewards/rejected": -10.121838569641113, "step": 2370 }, { "epoch": 0.14, "learning_rate": 4.973282599769207e-06, "logits/chosen": -2.9469661712646484, "logits/rejected": -2.8466956615448, "logps/chosen": -57.83955764770508, "logps/rejected": -1023.7216796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.025726955384016037, "rewards/margins": 9.87195873260498, "rewards/rejected": -9.846231460571289, "step": 2380 }, { "epoch": 0.14, "learning_rate": 4.972518502468482e-06, "logits/chosen": -2.941382884979248, "logits/rejected": -2.871415138244629, "logps/chosen": -57.07490158081055, "logps/rejected": -965.48779296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.03393467515707016, "rewards/margins": 9.31026840209961, "rewards/rejected": -9.276331901550293, "step": 2390 }, { "epoch": 0.14, "learning_rate": 4.971743692737814e-06, "logits/chosen": -2.9544639587402344, "logits/rejected": -2.8425090312957764, "logps/chosen": -72.48136901855469, "logps/rejected": -980.38232421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.10478965938091278, "rewards/margins": 9.319761276245117, "rewards/rejected": -9.424551010131836, "step": 2400 }, { "epoch": 0.14, "learning_rate": 4.970958173934144e-06, "logits/chosen": -2.9861598014831543, "logits/rejected": -2.8865461349487305, "logps/chosen": -70.74130249023438, "logps/rejected": -1030.4306640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.026911329478025436, "rewards/margins": 9.901135444641113, "rewards/rejected": -9.928047180175781, "step": 2410 }, { "epoch": 0.14, "learning_rate": 4.970161949460808e-06, "logits/chosen": -2.956362247467041, "logits/rejected": -2.862729549407959, "logps/chosen": -60.67615509033203, "logps/rejected": -1001.1976318359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.06034597009420395, "rewards/margins": 9.681482315063477, "rewards/rejected": -9.621134757995605, "step": 2420 }, { "epoch": 0.14, "learning_rate": 4.969355022767529e-06, "logits/chosen": -2.9671881198883057, "logits/rejected": -2.8608107566833496, "logps/chosen": -72.86878967285156, "logps/rejected": -1049.2708740234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.054186590015888214, "rewards/margins": 10.053744316101074, "rewards/rejected": -10.107931137084961, "step": 2430 }, { "epoch": 0.15, "learning_rate": 4.968537397350395e-06, "logits/chosen": -2.9818739891052246, "logits/rejected": -2.8581364154815674, "logps/chosen": -64.81021118164062, "logps/rejected": -1043.052978515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.005666804499924183, "rewards/margins": 10.039942741394043, "rewards/rejected": -10.045608520507812, "step": 2440 }, { "epoch": 0.15, "learning_rate": 4.967709076751848e-06, "logits/chosen": -2.951430320739746, "logits/rejected": -2.8436644077301025, "logps/chosen": -63.454322814941406, "logps/rejected": -1078.3201904296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.019255178049206734, "rewards/margins": 10.42496109008789, "rewards/rejected": -10.405704498291016, "step": 2450 }, { "epoch": 0.15, "learning_rate": 4.96687006456067e-06, "logits/chosen": -2.9567294120788574, "logits/rejected": -2.859009265899658, "logps/chosen": -74.52302551269531, "logps/rejected": -994.3258056640625, "loss": 0.0089, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.08216142654418945, "rewards/margins": 9.476049423217773, "rewards/rejected": -9.558210372924805, "step": 2460 }, { "epoch": 0.15, "learning_rate": 4.966020364411964e-06, "logits/chosen": -2.9782068729400635, "logits/rejected": -2.8819756507873535, "logps/chosen": -54.56938552856445, "logps/rejected": -968.2350463867188, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.1396438181400299, "rewards/margins": 9.442314147949219, "rewards/rejected": -9.3026704788208, "step": 2470 }, { "epoch": 0.15, "learning_rate": 4.965159979987139e-06, "logits/chosen": -2.9574875831604004, "logits/rejected": -2.8682565689086914, "logps/chosen": -53.4520263671875, "logps/rejected": -951.3831176757812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.150371253490448, "rewards/margins": 9.27844524383545, "rewards/rejected": -9.128074645996094, "step": 2480 }, { "epoch": 0.15, "learning_rate": 4.964288915013895e-06, "logits/chosen": -3.0018887519836426, "logits/rejected": -2.8697009086608887, "logps/chosen": -53.776695251464844, "logps/rejected": -1035.00146484375, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 0.09582284837961197, "rewards/margins": 10.071747779846191, "rewards/rejected": -9.975923538208008, "step": 2490 }, { "epoch": 0.15, "learning_rate": 4.963407173266208e-06, "logits/chosen": -2.959106683731079, "logits/rejected": -2.8568146228790283, "logps/chosen": -69.120849609375, "logps/rejected": -992.5695190429688, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.07120773941278458, "rewards/margins": 9.475765228271484, "rewards/rejected": -9.546972274780273, "step": 2500 }, { "epoch": 0.15, "learning_rate": 4.962514758564309e-06, "logits/chosen": -2.9296813011169434, "logits/rejected": -2.8415138721466064, "logps/chosen": -55.42619705200195, "logps/rejected": -1074.56884765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.043193213641643524, "rewards/margins": 10.415844917297363, "rewards/rejected": -10.372652053833008, "step": 2510 }, { "epoch": 0.15, "learning_rate": 4.961611674774674e-06, "logits/chosen": -2.9726715087890625, "logits/rejected": -2.887467861175537, "logps/chosen": -52.93353271484375, "logps/rejected": -952.38330078125, "loss": 0.0114, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.07102523744106293, "rewards/margins": 9.212408065795898, "rewards/rejected": -9.141383171081543, "step": 2520 }, { "epoch": 0.15, "learning_rate": 4.960697925810003e-06, "logits/chosen": -2.9569027423858643, "logits/rejected": -2.91054630279541, "logps/chosen": -39.932350158691406, "logps/rejected": -970.1618041992188, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.24427469074726105, "rewards/margins": 9.551437377929688, "rewards/rejected": -9.30716323852539, "step": 2530 }, { "epoch": 0.15, "learning_rate": 4.9597735156292024e-06, "logits/chosen": -2.9646506309509277, "logits/rejected": -2.8531908988952637, "logps/chosen": -57.5560417175293, "logps/rejected": -986.1834106445312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.1097189337015152, "rewards/margins": 9.602791786193848, "rewards/rejected": -9.493074417114258, "step": 2540 }, { "epoch": 0.15, "learning_rate": 4.9588384482373695e-06, "logits/chosen": -2.9355504512786865, "logits/rejected": -2.867257595062256, "logps/chosen": -72.02931213378906, "logps/rejected": -1032.745361328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.08119822293519974, "rewards/margins": 9.880294799804688, "rewards/rejected": -9.961492538452148, "step": 2550 }, { "epoch": 0.15, "learning_rate": 4.957892727685778e-06, "logits/chosen": -2.993722438812256, "logits/rejected": -2.8871734142303467, "logps/chosen": -65.03272247314453, "logps/rejected": -1042.750244140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.015622774139046669, "rewards/margins": 10.069446563720703, "rewards/rejected": -10.053825378417969, "step": 2560 }, { "epoch": 0.15, "learning_rate": 4.956936358071853e-06, "logits/chosen": -2.975273847579956, "logits/rejected": -2.8740267753601074, "logps/chosen": -54.11704635620117, "logps/rejected": -1040.681884765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.17591051757335663, "rewards/margins": 10.202128410339355, "rewards/rejected": -10.026217460632324, "step": 2570 }, { "epoch": 0.15, "learning_rate": 4.955969343539162e-06, "logits/chosen": -2.9521117210388184, "logits/rejected": -2.8139100074768066, "logps/chosen": -52.07745361328125, "logps/rejected": -1060.5126953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.10936890542507172, "rewards/margins": 10.329370498657227, "rewards/rejected": -10.220003128051758, "step": 2580 }, { "epoch": 0.15, "learning_rate": 4.954991688277391e-06, "logits/chosen": -2.915135622024536, "logits/rejected": -2.8330397605895996, "logps/chosen": -54.18512725830078, "logps/rejected": -1024.5478515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.06751909106969833, "rewards/margins": 9.937823295593262, "rewards/rejected": -9.870304107666016, "step": 2590 }, { "epoch": 0.16, "learning_rate": 4.954003396522325e-06, "logits/chosen": -2.983473777770996, "logits/rejected": -2.89306640625, "logps/chosen": -43.304664611816406, "logps/rejected": -1096.1494140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.18473705649375916, "rewards/margins": 10.77103328704834, "rewards/rejected": -10.586296081542969, "step": 2600 }, { "epoch": 0.16, "learning_rate": 4.953004472555838e-06, "logits/chosen": -2.9548442363739014, "logits/rejected": -2.816035270690918, "logps/chosen": -61.39183807373047, "logps/rejected": -900.6253662109375, "loss": 0.0746, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.030104851350188255, "rewards/margins": 8.65517520904541, "rewards/rejected": -8.625070571899414, "step": 2610 }, { "epoch": 0.16, "learning_rate": 4.951994920705865e-06, "logits/chosen": -2.9941742420196533, "logits/rejected": -2.932429313659668, "logps/chosen": -59.56300735473633, "logps/rejected": -1016.6419067382812, "loss": 0.0645, "rewards/accuracies": 1.0, "rewards/chosen": 0.03961055725812912, "rewards/margins": 9.830606460571289, "rewards/rejected": -9.790994644165039, "step": 2620 }, { "epoch": 0.16, "learning_rate": 4.95097474534639e-06, "logits/chosen": -2.941455364227295, "logits/rejected": -2.8995704650878906, "logps/chosen": -63.13153076171875, "logps/rejected": -942.1927490234375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.05063943937420845, "rewards/margins": 9.10390567779541, "rewards/rejected": -9.053265571594238, "step": 2630 }, { "epoch": 0.16, "learning_rate": 4.949943950897422e-06, "logits/chosen": -2.9724724292755127, "logits/rejected": -2.8806252479553223, "logps/chosen": -45.168785095214844, "logps/rejected": -936.5042114257812, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.18214330077171326, "rewards/margins": 9.170300483703613, "rewards/rejected": -8.988157272338867, "step": 2640 }, { "epoch": 0.16, "learning_rate": 4.94890254182498e-06, "logits/chosen": -2.9569613933563232, "logits/rejected": -2.882477045059204, "logps/chosen": -49.79032516479492, "logps/rejected": -1054.190185546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.15952204167842865, "rewards/margins": 10.316502571105957, "rewards/rejected": -10.156980514526367, "step": 2650 }, { "epoch": 0.16, "learning_rate": 4.947850522641072e-06, "logits/chosen": -2.9504122734069824, "logits/rejected": -2.8628299236297607, "logps/chosen": -56.89536666870117, "logps/rejected": -1015.61376953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.0404062382876873, "rewards/margins": 9.817435264587402, "rewards/rejected": -9.777029037475586, "step": 2660 }, { "epoch": 0.16, "learning_rate": 4.946787897903674e-06, "logits/chosen": -2.9724783897399902, "logits/rejected": -2.853990077972412, "logps/chosen": -42.43660354614258, "logps/rejected": -1069.816650390625, "loss": 0.0852, "rewards/accuracies": 1.0, "rewards/chosen": 0.22516293823719025, "rewards/margins": 10.555418014526367, "rewards/rejected": -10.330256462097168, "step": 2670 }, { "epoch": 0.16, "learning_rate": 4.945714672216713e-06, "logits/chosen": -2.9569344520568848, "logits/rejected": -2.8978726863861084, "logps/chosen": -47.40793228149414, "logps/rejected": -1003.4295043945312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.16983157396316528, "rewards/margins": 9.83198356628418, "rewards/rejected": -9.662150382995605, "step": 2680 }, { "epoch": 0.16, "learning_rate": 4.944630850230045e-06, "logits/chosen": -2.9499363899230957, "logits/rejected": -2.8723857402801514, "logps/chosen": -54.71223831176758, "logps/rejected": -946.0211791992188, "loss": 0.035, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.1321926862001419, "rewards/margins": 9.219447135925293, "rewards/rejected": -9.087254524230957, "step": 2690 }, { "epoch": 0.16, "learning_rate": 4.9435364366394334e-06, "logits/chosen": -2.981926918029785, "logits/rejected": -2.888505458831787, "logps/chosen": -36.991336822509766, "logps/rejected": -938.9461059570312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.20426340401172638, "rewards/margins": 9.211601257324219, "rewards/rejected": -9.007338523864746, "step": 2700 }, { "epoch": 0.16, "learning_rate": 4.942431436186536e-06, "logits/chosen": -2.942267417907715, "logits/rejected": -2.8647377490997314, "logps/chosen": -48.552650451660156, "logps/rejected": -947.3763427734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.12513083219528198, "rewards/margins": 9.22277545928955, "rewards/rejected": -9.097644805908203, "step": 2710 }, { "epoch": 0.16, "learning_rate": 4.941315853658873e-06, "logits/chosen": -2.9587295055389404, "logits/rejected": -2.857226610183716, "logps/chosen": -50.41681671142578, "logps/rejected": -907.0501098632812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.19839799404144287, "rewards/margins": 8.883308410644531, "rewards/rejected": -8.684911727905273, "step": 2720 }, { "epoch": 0.16, "learning_rate": 4.940189693889819e-06, "logits/chosen": -2.9573190212249756, "logits/rejected": -2.8321292400360107, "logps/chosen": -51.81952667236328, "logps/rejected": -978.16845703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.10128283500671387, "rewards/margins": 9.5099458694458, "rewards/rejected": -9.408662796020508, "step": 2730 }, { "epoch": 0.16, "learning_rate": 4.939052961758569e-06, "logits/chosen": -2.9771602153778076, "logits/rejected": -2.8976359367370605, "logps/chosen": -54.787872314453125, "logps/rejected": -999.02099609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.0750129371881485, "rewards/margins": 9.693206787109375, "rewards/rejected": -9.618194580078125, "step": 2740 }, { "epoch": 0.16, "learning_rate": 4.937905662190129e-06, "logits/chosen": -2.9760470390319824, "logits/rejected": -2.8650577068328857, "logps/chosen": -57.52571487426758, "logps/rejected": -1023.3736572265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.02280553989112377, "rewards/margins": 9.884014129638672, "rewards/rejected": -9.86120891571045, "step": 2750 }, { "epoch": 0.16, "learning_rate": 4.936747800155285e-06, "logits/chosen": -2.9456734657287598, "logits/rejected": -2.8886818885803223, "logps/chosen": -48.00023651123047, "logps/rejected": -967.7344970703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.13082967698574066, "rewards/margins": 9.424239158630371, "rewards/rejected": -9.293411254882812, "step": 2760 }, { "epoch": 0.17, "learning_rate": 4.935579380670592e-06, "logits/chosen": -2.98140287399292, "logits/rejected": -2.8898417949676514, "logps/chosen": -49.300086975097656, "logps/rejected": -1012.50390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.1719980686903, "rewards/margins": 9.917055130004883, "rewards/rejected": -9.745058059692383, "step": 2770 }, { "epoch": 0.17, "learning_rate": 4.934400408798339e-06, "logits/chosen": -2.936044216156006, "logits/rejected": -2.8301520347595215, "logps/chosen": -47.889556884765625, "logps/rejected": -972.4114990234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.16290335357189178, "rewards/margins": 9.514323234558105, "rewards/rejected": -9.351420402526855, "step": 2780 }, { "epoch": 0.17, "learning_rate": 4.93321088964654e-06, "logits/chosen": -2.9664034843444824, "logits/rejected": -2.8527307510375977, "logps/chosen": -46.75312805175781, "logps/rejected": -978.7747802734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16828954219818115, "rewards/margins": 9.591493606567383, "rewards/rejected": -9.42320442199707, "step": 2790 }, { "epoch": 0.17, "learning_rate": 4.932010828368903e-06, "logits/chosen": -2.974907398223877, "logits/rejected": -2.8927981853485107, "logps/chosen": -49.723060607910156, "logps/rejected": -996.3927612304688, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.1359521895647049, "rewards/margins": 9.715728759765625, "rewards/rejected": -9.579774856567383, "step": 2800 }, { "epoch": 0.17, "learning_rate": 4.930800230164812e-06, "logits/chosen": -2.965446710586548, "logits/rejected": -2.845327854156494, "logps/chosen": -51.6041374206543, "logps/rejected": -1088.370361328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.11454744637012482, "rewards/margins": 10.625516891479492, "rewards/rejected": -10.510969161987305, "step": 2810 }, { "epoch": 0.17, "learning_rate": 4.929579100279302e-06, "logits/chosen": -2.9859225749969482, "logits/rejected": -2.883265733718872, "logps/chosen": -43.5790901184082, "logps/rejected": -1050.3604736328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19967174530029297, "rewards/margins": 10.326684951782227, "rewards/rejected": -10.12701416015625, "step": 2820 }, { "epoch": 0.17, "learning_rate": 4.92834744400304e-06, "logits/chosen": -2.955544948577881, "logits/rejected": -2.864315986633301, "logps/chosen": -67.30000305175781, "logps/rejected": -1084.368408203125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.03332411125302315, "rewards/margins": 10.428949356079102, "rewards/rejected": -10.462273597717285, "step": 2830 }, { "epoch": 0.17, "learning_rate": 4.927105266672296e-06, "logits/chosen": -2.9702792167663574, "logits/rejected": -2.8655378818511963, "logps/chosen": -104.3841323852539, "logps/rejected": -1104.6790771484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.3541564345359802, "rewards/margins": 10.302824020385742, "rewards/rejected": -10.65697956085205, "step": 2840 }, { "epoch": 0.17, "learning_rate": 4.925852573668928e-06, "logits/chosen": -2.9897191524505615, "logits/rejected": -2.9109058380126953, "logps/chosen": -123.2896499633789, "logps/rejected": -1167.528564453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5692538022994995, "rewards/margins": 10.728341102600098, "rewards/rejected": -11.29759407043457, "step": 2850 }, { "epoch": 0.17, "learning_rate": 4.924589370420351e-06, "logits/chosen": -2.9982030391693115, "logits/rejected": -2.902489185333252, "logps/chosen": -73.14472961425781, "logps/rejected": -1020.1849365234375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.09072460234165192, "rewards/margins": 9.741647720336914, "rewards/rejected": -9.832369804382324, "step": 2860 }, { "epoch": 0.17, "learning_rate": 4.923315662399517e-06, "logits/chosen": -2.9266700744628906, "logits/rejected": -2.8844971656799316, "logps/chosen": -51.11089324951172, "logps/rejected": -955.8761596679688, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.172594353556633, "rewards/margins": 9.352328300476074, "rewards/rejected": -9.179734230041504, "step": 2870 }, { "epoch": 0.17, "learning_rate": 4.9220314551248915e-06, "logits/chosen": -2.9461703300476074, "logits/rejected": -2.798128843307495, "logps/chosen": -43.66674041748047, "logps/rejected": -958.7591552734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.19586893916130066, "rewards/margins": 9.403657913208008, "rewards/rejected": -9.207788467407227, "step": 2880 }, { "epoch": 0.17, "learning_rate": 4.920736754160429e-06, "logits/chosen": -2.971539258956909, "logits/rejected": -2.908888339996338, "logps/chosen": -61.88190841674805, "logps/rejected": -1043.7586669921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.02804974839091301, "rewards/margins": 10.098593711853027, "rewards/rejected": -10.070545196533203, "step": 2890 }, { "epoch": 0.17, "learning_rate": 4.91943156511555e-06, "logits/chosen": -2.9774200916290283, "logits/rejected": -2.8639121055603027, "logps/chosen": -60.19709396362305, "logps/rejected": -1109.225830078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.011752814054489136, "rewards/margins": 10.707283973693848, "rewards/rejected": -10.719037055969238, "step": 2900 }, { "epoch": 0.17, "learning_rate": 4.918115893645113e-06, "logits/chosen": -2.977572202682495, "logits/rejected": -2.8727574348449707, "logps/chosen": -84.39601135253906, "logps/rejected": -1131.7991943359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.18071883916854858, "rewards/margins": 10.752632141113281, "rewards/rejected": -10.933351516723633, "step": 2910 }, { "epoch": 0.17, "learning_rate": 4.916789745449396e-06, "logits/chosen": -2.9503846168518066, "logits/rejected": -2.8656723499298096, "logps/chosen": -56.78056716918945, "logps/rejected": -1042.875732421875, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": 0.05359282344579697, "rewards/margins": 10.111259460449219, "rewards/rejected": -10.057666778564453, "step": 2920 }, { "epoch": 0.17, "learning_rate": 4.915453126274065e-06, "logits/chosen": -2.9641928672790527, "logits/rejected": -2.854383945465088, "logps/chosen": -40.709571838378906, "logps/rejected": -1007.8673095703125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.24723069369792938, "rewards/margins": 9.96162223815918, "rewards/rejected": -9.714391708374023, "step": 2930 }, { "epoch": 0.18, "learning_rate": 4.914106041910155e-06, "logits/chosen": -2.9491474628448486, "logits/rejected": -2.8283474445343018, "logps/chosen": -78.45081329345703, "logps/rejected": -1065.942138671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.08843568712472916, "rewards/margins": 10.197487831115723, "rewards/rejected": -10.285922050476074, "step": 2940 }, { "epoch": 0.18, "learning_rate": 4.9127484981940425e-06, "logits/chosen": -2.956881046295166, "logits/rejected": -2.866255283355713, "logps/chosen": -69.83671569824219, "logps/rejected": -1127.74560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.05478878691792488, "rewards/margins": 10.836261749267578, "rewards/rejected": -10.891050338745117, "step": 2950 }, { "epoch": 0.18, "learning_rate": 4.911380501007417e-06, "logits/chosen": -2.9872665405273438, "logits/rejected": -2.8426127433776855, "logps/chosen": -71.17304992675781, "logps/rejected": -1114.8095703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.05085368826985359, "rewards/margins": 10.722138404846191, "rewards/rejected": -10.772991180419922, "step": 2960 }, { "epoch": 0.18, "learning_rate": 4.910002056277263e-06, "logits/chosen": -2.9557697772979736, "logits/rejected": -2.833559989929199, "logps/chosen": -63.65752029418945, "logps/rejected": -1092.1741943359375, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": 0.04358328506350517, "rewards/margins": 10.57384204864502, "rewards/rejected": -10.530259132385254, "step": 2970 }, { "epoch": 0.18, "learning_rate": 4.908613169975828e-06, "logits/chosen": -2.9492058753967285, "logits/rejected": -2.8577487468719482, "logps/chosen": -44.603843688964844, "logps/rejected": -1056.6339111328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18536703288555145, "rewards/margins": 10.364958763122559, "rewards/rejected": -10.17959213256836, "step": 2980 }, { "epoch": 0.18, "learning_rate": 4.9072138481205985e-06, "logits/chosen": -2.9577853679656982, "logits/rejected": -2.874290943145752, "logps/chosen": -37.99690628051758, "logps/rejected": -1006.8896484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2249661237001419, "rewards/margins": 9.917187690734863, "rewards/rejected": -9.692220687866211, "step": 2990 }, { "epoch": 0.18, "learning_rate": 4.905804096774274e-06, "logits/chosen": -2.9554154872894287, "logits/rejected": -2.8696045875549316, "logps/chosen": -48.18318557739258, "logps/rejected": -1026.7672119140625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.12706971168518066, "rewards/margins": 10.022737503051758, "rewards/rejected": -9.895668029785156, "step": 3000 }, { "epoch": 0.18, "learning_rate": 4.90438392204474e-06, "logits/chosen": -2.978259801864624, "logits/rejected": -2.886735677719116, "logps/chosen": -87.91888427734375, "logps/rejected": -1054.121337890625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.19893428683280945, "rewards/margins": 9.959081649780273, "rewards/rejected": -10.158018112182617, "step": 3010 }, { "epoch": 0.18, "learning_rate": 4.902953330085045e-06, "logits/chosen": -2.973253011703491, "logits/rejected": -2.8797106742858887, "logps/chosen": -94.67001342773438, "logps/rejected": -1053.0162353515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3396374583244324, "rewards/margins": 9.828425407409668, "rewards/rejected": -10.168063163757324, "step": 3020 }, { "epoch": 0.18, "learning_rate": 4.901512327093369e-06, "logits/chosen": -2.9804909229278564, "logits/rejected": -2.871004581451416, "logps/chosen": -77.38380432128906, "logps/rejected": -1108.5672607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.12006773799657822, "rewards/margins": 10.58984088897705, "rewards/rejected": -10.709907531738281, "step": 3030 }, { "epoch": 0.18, "learning_rate": 4.900060919313001e-06, "logits/chosen": -2.9830996990203857, "logits/rejected": -2.88403058052063, "logps/chosen": -82.14553833007812, "logps/rejected": -1072.0496826171875, "loss": 0.0529, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17751523852348328, "rewards/margins": 10.165783882141113, "rewards/rejected": -10.343297958374023, "step": 3040 }, { "epoch": 0.18, "learning_rate": 4.8985991130323055e-06, "logits/chosen": -2.9373552799224854, "logits/rejected": -2.879936695098877, "logps/chosen": -94.3067626953125, "logps/rejected": -980.8263549804688, "loss": 0.0305, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3377383351325989, "rewards/margins": 9.085062980651855, "rewards/rejected": -9.42280101776123, "step": 3050 }, { "epoch": 0.18, "learning_rate": 4.8971269145847036e-06, "logits/chosen": -2.9581799507141113, "logits/rejected": -2.8713488578796387, "logps/chosen": -129.9235076904297, "logps/rejected": -960.3141479492188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7009344100952148, "rewards/margins": 8.520880699157715, "rewards/rejected": -9.221816062927246, "step": 3060 }, { "epoch": 0.18, "learning_rate": 4.895644330348639e-06, "logits/chosen": -2.9953055381774902, "logits/rejected": -2.8852944374084473, "logps/chosen": -45.889549255371094, "logps/rejected": -973.03369140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.10925817489624023, "rewards/margins": 9.458818435668945, "rewards/rejected": -9.349560737609863, "step": 3070 }, { "epoch": 0.18, "learning_rate": 4.8941513667475545e-06, "logits/chosen": -3.0029428005218506, "logits/rejected": -2.917351245880127, "logps/chosen": -37.85194778442383, "logps/rejected": -1027.5169677734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18980060517787933, "rewards/margins": 10.095051765441895, "rewards/rejected": -9.905251502990723, "step": 3080 }, { "epoch": 0.18, "learning_rate": 4.892648030249863e-06, "logits/chosen": -2.9786131381988525, "logits/rejected": -2.9129385948181152, "logps/chosen": -48.78046417236328, "logps/rejected": -992.8571166992188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.21341562271118164, "rewards/margins": 9.761307716369629, "rewards/rejected": -9.547889709472656, "step": 3090 }, { "epoch": 0.18, "learning_rate": 4.891134327368919e-06, "logits/chosen": -2.979504346847534, "logits/rejected": -2.918461799621582, "logps/chosen": -45.552085876464844, "logps/rejected": -985.6292114257812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.21483096480369568, "rewards/margins": 9.68559741973877, "rewards/rejected": -9.4707670211792, "step": 3100 }, { "epoch": 0.19, "learning_rate": 4.889610264662984e-06, "logits/chosen": -2.994410991668701, "logits/rejected": -2.8839643001556396, "logps/chosen": -40.928382873535156, "logps/rejected": -1035.4713134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2053946554660797, "rewards/margins": 10.185602188110352, "rewards/rejected": -9.980207443237305, "step": 3110 }, { "epoch": 0.19, "learning_rate": 4.888075848735216e-06, "logits/chosen": -2.980168342590332, "logits/rejected": -2.909775733947754, "logps/chosen": -43.190696716308594, "logps/rejected": -1028.197021484375, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": 0.23698899149894714, "rewards/margins": 10.128162384033203, "rewards/rejected": -9.891172409057617, "step": 3120 }, { "epoch": 0.19, "learning_rate": 4.8865310862336185e-06, "logits/chosen": -2.972912549972534, "logits/rejected": -2.900979518890381, "logps/chosen": -41.773956298828125, "logps/rejected": -1014.2683715820312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2605302929878235, "rewards/margins": 10.03182315826416, "rewards/rejected": -9.771292686462402, "step": 3130 }, { "epoch": 0.19, "learning_rate": 4.88497598385103e-06, "logits/chosen": -2.923527479171753, "logits/rejected": -2.840421438217163, "logps/chosen": -37.317543029785156, "logps/rejected": -1032.218017578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.21564380824565887, "rewards/margins": 10.156819343566895, "rewards/rejected": -9.94117546081543, "step": 3140 }, { "epoch": 0.19, "learning_rate": 4.883410548325083e-06, "logits/chosen": -2.9571986198425293, "logits/rejected": -2.8680596351623535, "logps/chosen": -49.276580810546875, "logps/rejected": -1116.514404296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18114157021045685, "rewards/margins": 10.969841003417969, "rewards/rejected": -10.788700103759766, "step": 3150 }, { "epoch": 0.19, "learning_rate": 4.881834786438183e-06, "logits/chosen": -2.943228244781494, "logits/rejected": -2.8872551918029785, "logps/chosen": -42.54434585571289, "logps/rejected": -1053.8485107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23213811218738556, "rewards/margins": 10.400126457214355, "rewards/rejected": -10.167988777160645, "step": 3160 }, { "epoch": 0.19, "learning_rate": 4.880248705017472e-06, "logits/chosen": -2.9851269721984863, "logits/rejected": -2.892990827560425, "logps/chosen": -35.10634994506836, "logps/rejected": -999.5642700195312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.22910138964653015, "rewards/margins": 9.858392715454102, "rewards/rejected": -9.629291534423828, "step": 3170 }, { "epoch": 0.19, "learning_rate": 4.878652310934804e-06, "logits/chosen": -2.9725711345672607, "logits/rejected": -2.906649589538574, "logps/chosen": -39.38788986206055, "logps/rejected": -1055.1795654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22858448326587677, "rewards/margins": 10.398777961730957, "rewards/rejected": -10.17019271850586, "step": 3180 }, { "epoch": 0.19, "learning_rate": 4.877045611106715e-06, "logits/chosen": -2.9919393062591553, "logits/rejected": -2.9301717281341553, "logps/chosen": -40.116004943847656, "logps/rejected": -1054.2490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25794392824172974, "rewards/margins": 10.42171573638916, "rewards/rejected": -10.163771629333496, "step": 3190 }, { "epoch": 0.19, "learning_rate": 4.8754286124943885e-06, "logits/chosen": -2.961378335952759, "logits/rejected": -2.897733449935913, "logps/chosen": -46.8245735168457, "logps/rejected": -1016.8805541992188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2582324743270874, "rewards/margins": 10.052190780639648, "rewards/rejected": -9.79395866394043, "step": 3200 }, { "epoch": 0.19, "learning_rate": 4.873801322103632e-06, "logits/chosen": -2.9616150856018066, "logits/rejected": -2.86604380607605, "logps/chosen": -39.605770111083984, "logps/rejected": -1114.091064453125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.22610947489738464, "rewards/margins": 10.984445571899414, "rewards/rejected": -10.758336067199707, "step": 3210 }, { "epoch": 0.19, "learning_rate": 4.872163746984839e-06, "logits/chosen": -2.928082227706909, "logits/rejected": -2.8627514839172363, "logps/chosen": -45.92241668701172, "logps/rejected": -1087.9366455078125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.12850940227508545, "rewards/margins": 10.623930931091309, "rewards/rejected": -10.495420455932617, "step": 3220 }, { "epoch": 0.19, "learning_rate": 4.8705158942329676e-06, "logits/chosen": -2.95467209815979, "logits/rejected": -2.8933050632476807, "logps/chosen": -42.874210357666016, "logps/rejected": -1032.445068359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.1890021413564682, "rewards/margins": 10.133962631225586, "rewards/rejected": -9.94495964050293, "step": 3230 }, { "epoch": 0.19, "learning_rate": 4.8688577709875015e-06, "logits/chosen": -2.9485697746276855, "logits/rejected": -2.8615517616271973, "logps/chosen": -44.63134002685547, "logps/rejected": -1038.889404296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.19510862231254578, "rewards/margins": 10.207784652709961, "rewards/rejected": -10.012675285339355, "step": 3240 }, { "epoch": 0.19, "learning_rate": 4.8671893844324215e-06, "logits/chosen": -2.944218873977661, "logits/rejected": -2.848785877227783, "logps/chosen": -43.50326156616211, "logps/rejected": -1092.7396240234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2134886234998703, "rewards/margins": 10.765542984008789, "rewards/rejected": -10.552055358886719, "step": 3250 }, { "epoch": 0.19, "learning_rate": 4.865510741796178e-06, "logits/chosen": -2.956845760345459, "logits/rejected": -2.8670265674591064, "logps/chosen": -43.12239456176758, "logps/rejected": -1097.885986328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2098098248243332, "rewards/margins": 10.80146598815918, "rewards/rejected": -10.591656684875488, "step": 3260 }, { "epoch": 0.19, "learning_rate": 4.863821850351655e-06, "logits/chosen": -2.9129586219787598, "logits/rejected": -2.845798969268799, "logps/chosen": -42.166114807128906, "logps/rejected": -1066.141845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19420184195041656, "rewards/margins": 10.471370697021484, "rewards/rejected": -10.277170181274414, "step": 3270 }, { "epoch": 0.2, "learning_rate": 4.862122717416142e-06, "logits/chosen": -2.9540505409240723, "logits/rejected": -2.8405508995056152, "logps/chosen": -44.972652435302734, "logps/rejected": -1051.3975830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23038356006145477, "rewards/margins": 10.372662544250488, "rewards/rejected": -10.142279624938965, "step": 3280 }, { "epoch": 0.2, "learning_rate": 4.860413350351299e-06, "logits/chosen": -2.9800453186035156, "logits/rejected": -2.8804965019226074, "logps/chosen": -38.05177688598633, "logps/rejected": -1083.3980712890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2185901403427124, "rewards/margins": 10.680554389953613, "rewards/rejected": -10.461962699890137, "step": 3290 }, { "epoch": 0.2, "learning_rate": 4.8586937565631265e-06, "logits/chosen": -2.9386141300201416, "logits/rejected": -2.840808153152466, "logps/chosen": -46.96597671508789, "logps/rejected": -1080.457275390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.20134034752845764, "rewards/margins": 10.621806144714355, "rewards/rejected": -10.420466423034668, "step": 3300 }, { "epoch": 0.2, "learning_rate": 4.856963943501935e-06, "logits/chosen": -2.94827938079834, "logits/rejected": -2.850008964538574, "logps/chosen": -44.79694366455078, "logps/rejected": -1090.703369140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.19133782386779785, "rewards/margins": 10.723566055297852, "rewards/rejected": -10.532228469848633, "step": 3310 }, { "epoch": 0.2, "learning_rate": 4.85522391866231e-06, "logits/chosen": -2.972383975982666, "logits/rejected": -2.901149272918701, "logps/chosen": -42.33626174926758, "logps/rejected": -1165.077880859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18628986179828644, "rewards/margins": 11.459733963012695, "rewards/rejected": -11.273444175720215, "step": 3320 }, { "epoch": 0.2, "learning_rate": 4.85347368958308e-06, "logits/chosen": -2.9746334552764893, "logits/rejected": -2.8701202869415283, "logps/chosen": -46.675865173339844, "logps/rejected": -1103.2406005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18363074958324432, "rewards/margins": 10.844404220581055, "rewards/rejected": -10.660772323608398, "step": 3330 }, { "epoch": 0.2, "learning_rate": 4.8517132638472845e-06, "logits/chosen": -2.9576921463012695, "logits/rejected": -2.8849258422851562, "logps/chosen": -46.532554626464844, "logps/rejected": -1057.11865234375, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": 0.1483963578939438, "rewards/margins": 10.35015869140625, "rewards/rejected": -10.201762199401855, "step": 3340 }, { "epoch": 0.2, "learning_rate": 4.849942649082143e-06, "logits/chosen": -2.9304111003875732, "logits/rejected": -2.832392454147339, "logps/chosen": -41.16474151611328, "logps/rejected": -1109.0081787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21406090259552002, "rewards/margins": 10.91746711730957, "rewards/rejected": -10.703404426574707, "step": 3350 }, { "epoch": 0.2, "learning_rate": 4.848161852959016e-06, "logits/chosen": -2.9727137088775635, "logits/rejected": -2.893472194671631, "logps/chosen": -41.14421844482422, "logps/rejected": -1073.107421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.20253677666187286, "rewards/margins": 10.562378883361816, "rewards/rejected": -10.359843254089355, "step": 3360 }, { "epoch": 0.2, "learning_rate": 4.84637088319338e-06, "logits/chosen": -2.979112148284912, "logits/rejected": -2.8810553550720215, "logps/chosen": -45.763343811035156, "logps/rejected": -1149.87060546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.19753466546535492, "rewards/margins": 11.31944751739502, "rewards/rejected": -11.121912002563477, "step": 3370 }, { "epoch": 0.2, "learning_rate": 4.844569747544788e-06, "logits/chosen": -2.970979690551758, "logits/rejected": -2.88137149810791, "logps/chosen": -43.17435836791992, "logps/rejected": -1145.624267578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.18971891701221466, "rewards/margins": 11.265644073486328, "rewards/rejected": -11.075925827026367, "step": 3380 }, { "epoch": 0.2, "learning_rate": 4.842758453816836e-06, "logits/chosen": -2.977569103240967, "logits/rejected": -2.888596534729004, "logps/chosen": -42.410221099853516, "logps/rejected": -1139.0423583984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1997387707233429, "rewards/margins": 11.209894180297852, "rewards/rejected": -11.010154724121094, "step": 3390 }, { "epoch": 0.2, "learning_rate": 4.840937009857134e-06, "logits/chosen": -2.9522457122802734, "logits/rejected": -2.831906795501709, "logps/chosen": -48.42243194580078, "logps/rejected": -1086.2392578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.1356564313173294, "rewards/margins": 10.632362365722656, "rewards/rejected": -10.496706008911133, "step": 3400 }, { "epoch": 0.2, "learning_rate": 4.839105423557266e-06, "logits/chosen": -2.9524800777435303, "logits/rejected": -2.868647575378418, "logps/chosen": -86.33299255371094, "logps/rejected": -1110.974609375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.2529589533805847, "rewards/margins": 10.473810195922852, "rewards/rejected": -10.726768493652344, "step": 3410 }, { "epoch": 0.2, "learning_rate": 4.8372637028527615e-06, "logits/chosen": -2.9537911415100098, "logits/rejected": -2.846672773361206, "logps/chosen": -75.83551025390625, "logps/rejected": -1070.7047119140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.08390065282583237, "rewards/margins": 10.241632461547852, "rewards/rejected": -10.325532913208008, "step": 3420 }, { "epoch": 0.2, "learning_rate": 4.835411855723056e-06, "logits/chosen": -2.9632837772369385, "logits/rejected": -2.8593602180480957, "logps/chosen": -50.737545013427734, "logps/rejected": -1066.8052978515625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 0.09185446798801422, "rewards/margins": 10.389303207397461, "rewards/rejected": -10.297449111938477, "step": 3430 }, { "epoch": 0.21, "learning_rate": 4.83354989019146e-06, "logits/chosen": -2.959899425506592, "logits/rejected": -2.8537254333496094, "logps/chosen": -39.653297424316406, "logps/rejected": -1092.880126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25444620847702026, "rewards/margins": 10.810781478881836, "rewards/rejected": -10.55633544921875, "step": 3440 }, { "epoch": 0.21, "learning_rate": 4.831677814325122e-06, "logits/chosen": -2.9868760108947754, "logits/rejected": -2.8948543071746826, "logps/chosen": -57.68395233154297, "logps/rejected": -1008.3638916015625, "loss": 0.2333, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.054385095834732056, "rewards/margins": 9.768671035766602, "rewards/rejected": -9.714284896850586, "step": 3450 }, { "epoch": 0.21, "learning_rate": 4.8297956362349955e-06, "logits/chosen": -2.967568874359131, "logits/rejected": -2.869018793106079, "logps/chosen": -39.31049346923828, "logps/rejected": -785.8841552734375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.23930588364601135, "rewards/margins": 7.72772216796875, "rewards/rejected": -7.488417148590088, "step": 3460 }, { "epoch": 0.21, "learning_rate": 4.8279033640758026e-06, "logits/chosen": -2.958725929260254, "logits/rejected": -2.8774220943450928, "logps/chosen": -38.74791717529297, "logps/rejected": -899.1671142578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2517092823982239, "rewards/margins": 8.862866401672363, "rewards/rejected": -8.61115837097168, "step": 3470 }, { "epoch": 0.21, "learning_rate": 4.826001006045997e-06, "logits/chosen": -2.975959062576294, "logits/rejected": -2.8970422744750977, "logps/chosen": -36.612876892089844, "logps/rejected": -970.6847534179688, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 0.27700570225715637, "rewards/margins": 9.607995986938477, "rewards/rejected": -9.330989837646484, "step": 3480 }, { "epoch": 0.21, "learning_rate": 4.824088570387735e-06, "logits/chosen": -2.9542181491851807, "logits/rejected": -2.8645007610321045, "logps/chosen": -64.59330749511719, "logps/rejected": -983.4475708007812, "loss": 0.052, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.017155051231384277, "rewards/margins": 9.440872192382812, "rewards/rejected": -9.458028793334961, "step": 3490 }, { "epoch": 0.21, "learning_rate": 4.822166065386832e-06, "logits/chosen": -2.9731013774871826, "logits/rejected": -2.8947932720184326, "logps/chosen": -51.13991928100586, "logps/rejected": -943.62646484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.11826042085886002, "rewards/margins": 9.186973571777344, "rewards/rejected": -9.068713188171387, "step": 3500 }, { "epoch": 0.21, "learning_rate": 4.820233499372728e-06, "logits/chosen": -2.9383385181427, "logits/rejected": -2.838041067123413, "logps/chosen": -51.230018615722656, "logps/rejected": -1045.8521728515625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 0.15142546594142914, "rewards/margins": 10.228178977966309, "rewards/rejected": -10.076753616333008, "step": 3510 }, { "epoch": 0.21, "learning_rate": 4.8182908807184585e-06, "logits/chosen": -2.9561715126037598, "logits/rejected": -2.8703436851501465, "logps/chosen": -39.30424118041992, "logps/rejected": -1150.572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2876901924610138, "rewards/margins": 11.414693832397461, "rewards/rejected": -11.127004623413086, "step": 3520 }, { "epoch": 0.21, "learning_rate": 4.816338217840607e-06, "logits/chosen": -2.9600601196289062, "logits/rejected": -2.9189743995666504, "logps/chosen": -33.856178283691406, "logps/rejected": -1027.8829345703125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.25155049562454224, "rewards/margins": 10.15180778503418, "rewards/rejected": -9.900257110595703, "step": 3530 }, { "epoch": 0.21, "learning_rate": 4.814375519199281e-06, "logits/chosen": -2.9670374393463135, "logits/rejected": -2.838848829269409, "logps/chosen": -31.702667236328125, "logps/rejected": -1112.681884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2775587737560272, "rewards/margins": 11.033100128173828, "rewards/rejected": -10.75554370880127, "step": 3540 }, { "epoch": 0.21, "learning_rate": 4.812402793298063e-06, "logits/chosen": -2.9505691528320312, "logits/rejected": -2.870941638946533, "logps/chosen": -51.85960006713867, "logps/rejected": -1037.045166015625, "loss": 0.0439, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.11638001352548599, "rewards/margins": 10.115426063537598, "rewards/rejected": -9.999045372009277, "step": 3550 }, { "epoch": 0.21, "learning_rate": 4.810420048683985e-06, "logits/chosen": -2.9718728065490723, "logits/rejected": -2.897033214569092, "logps/chosen": -35.49930953979492, "logps/rejected": -1053.573486328125, "loss": 0.0526, "rewards/accuracies": 1.0, "rewards/chosen": 0.25687235593795776, "rewards/margins": 10.409124374389648, "rewards/rejected": -10.152252197265625, "step": 3560 }, { "epoch": 0.21, "learning_rate": 4.808427293947481e-06, "logits/chosen": -2.955242395401001, "logits/rejected": -2.89896821975708, "logps/chosen": -34.58183288574219, "logps/rejected": -993.6935424804688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2700592577457428, "rewards/margins": 9.83727741241455, "rewards/rejected": -9.567218780517578, "step": 3570 }, { "epoch": 0.21, "learning_rate": 4.806424537722359e-06, "logits/chosen": -3.0045690536499023, "logits/rejected": -2.852506637573242, "logps/chosen": -29.951335906982422, "logps/rejected": -1059.0360107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27667292952537537, "rewards/margins": 10.48770809173584, "rewards/rejected": -10.211034774780273, "step": 3580 }, { "epoch": 0.21, "learning_rate": 4.804411788685755e-06, "logits/chosen": -2.9736783504486084, "logits/rejected": -2.89290714263916, "logps/chosen": -45.498260498046875, "logps/rejected": -1002.7349853515625, "loss": 0.0308, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.1521184742450714, "rewards/margins": 9.809548377990723, "rewards/rejected": -9.657429695129395, "step": 3590 }, { "epoch": 0.21, "learning_rate": 4.802389055558105e-06, "logits/chosen": -2.946091413497925, "logits/rejected": -2.8411927223205566, "logps/chosen": -36.8270149230957, "logps/rejected": -995.2396240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2619747817516327, "rewards/margins": 9.832331657409668, "rewards/rejected": -9.570358276367188, "step": 3600 }, { "epoch": 0.22, "learning_rate": 4.8003563471030974e-06, "logits/chosen": -2.9350483417510986, "logits/rejected": -2.871772289276123, "logps/chosen": -40.312034606933594, "logps/rejected": -1014.216796875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": 0.23532173037528992, "rewards/margins": 10.001082420349121, "rewards/rejected": -9.76576042175293, "step": 3610 }, { "epoch": 0.22, "learning_rate": 4.7983136721276435e-06, "logits/chosen": -2.9762444496154785, "logits/rejected": -2.875495433807373, "logps/chosen": -40.46122360229492, "logps/rejected": -1060.4749755859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2646368741989136, "rewards/margins": 10.47488784790039, "rewards/rejected": -10.210250854492188, "step": 3620 }, { "epoch": 0.22, "learning_rate": 4.796261039481833e-06, "logits/chosen": -2.96022367477417, "logits/rejected": -2.8595688343048096, "logps/chosen": -44.987159729003906, "logps/rejected": -1021.1622924804688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2135053128004074, "rewards/margins": 10.041603088378906, "rewards/rejected": -9.82809829711914, "step": 3630 }, { "epoch": 0.22, "learning_rate": 4.7941984580589e-06, "logits/chosen": -2.965026378631592, "logits/rejected": -2.8817501068115234, "logps/chosen": -36.495567321777344, "logps/rejected": -961.7275390625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 0.23980645835399628, "rewards/margins": 9.472783088684082, "rewards/rejected": -9.232975959777832, "step": 3640 }, { "epoch": 0.22, "learning_rate": 4.7921259367951804e-06, "logits/chosen": -2.938784599304199, "logits/rejected": -2.8968136310577393, "logps/chosen": -38.07603454589844, "logps/rejected": -1014.5634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27043598890304565, "rewards/margins": 10.031245231628418, "rewards/rejected": -9.760808944702148, "step": 3650 }, { "epoch": 0.22, "learning_rate": 4.790043484670077e-06, "logits/chosen": -2.9595084190368652, "logits/rejected": -2.8744256496429443, "logps/chosen": -32.422576904296875, "logps/rejected": -1037.545654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27105608582496643, "rewards/margins": 10.253263473510742, "rewards/rejected": -9.982208251953125, "step": 3660 }, { "epoch": 0.22, "learning_rate": 4.787951110706019e-06, "logits/chosen": -2.955275774002075, "logits/rejected": -2.8704380989074707, "logps/chosen": -36.3739013671875, "logps/rejected": -1074.092529296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.29931291937828064, "rewards/margins": 10.670496940612793, "rewards/rejected": -10.371185302734375, "step": 3670 }, { "epoch": 0.22, "learning_rate": 4.785848823968424e-06, "logits/chosen": -2.989478826522827, "logits/rejected": -2.8917901515960693, "logps/chosen": -38.866737365722656, "logps/rejected": -1054.180419921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2902584373950958, "rewards/margins": 10.45865249633789, "rewards/rejected": -10.168394088745117, "step": 3680 }, { "epoch": 0.22, "learning_rate": 4.783736633565654e-06, "logits/chosen": -2.965167999267578, "logits/rejected": -2.8799972534179688, "logps/chosen": -46.40712356567383, "logps/rejected": -1032.6148681640625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.1800728142261505, "rewards/margins": 10.125263214111328, "rewards/rejected": -9.9451904296875, "step": 3690 }, { "epoch": 0.22, "learning_rate": 4.781614548648983e-06, "logits/chosen": -2.907405376434326, "logits/rejected": -2.821118116378784, "logps/chosen": -36.286277770996094, "logps/rejected": -959.2039184570312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.26970070600509644, "rewards/margins": 9.484086990356445, "rewards/rejected": -9.214385032653809, "step": 3700 }, { "epoch": 0.22, "learning_rate": 4.779482578412553e-06, "logits/chosen": -2.9488508701324463, "logits/rejected": -2.87520170211792, "logps/chosen": -33.38228988647461, "logps/rejected": -1064.163818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2919490337371826, "rewards/margins": 10.551262855529785, "rewards/rejected": -10.259313583374023, "step": 3710 }, { "epoch": 0.22, "learning_rate": 4.7773407320933345e-06, "logits/chosen": -2.9636118412017822, "logits/rejected": -2.853388786315918, "logps/chosen": -32.39838409423828, "logps/rejected": -1050.6060791015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.27809011936187744, "rewards/margins": 10.411128997802734, "rewards/rejected": -10.133039474487305, "step": 3720 }, { "epoch": 0.22, "learning_rate": 4.775189018971088e-06, "logits/chosen": -2.973031759262085, "logits/rejected": -2.8997578620910645, "logps/chosen": -35.17776107788086, "logps/rejected": -1055.5634765625, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": 0.26477423310279846, "rewards/margins": 10.447488784790039, "rewards/rejected": -10.182714462280273, "step": 3730 }, { "epoch": 0.22, "learning_rate": 4.773027448368323e-06, "logits/chosen": -2.93021559715271, "logits/rejected": -2.865452527999878, "logps/chosen": -33.70945358276367, "logps/rejected": -1005.0369873046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2888611853122711, "rewards/margins": 9.961088180541992, "rewards/rejected": -9.672225952148438, "step": 3740 }, { "epoch": 0.22, "learning_rate": 4.770856029650257e-06, "logits/chosen": -2.9366297721862793, "logits/rejected": -2.89388370513916, "logps/chosen": -37.689064025878906, "logps/rejected": -974.5921630859375, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": 0.22417159378528595, "rewards/margins": 9.607596397399902, "rewards/rejected": -9.383424758911133, "step": 3750 }, { "epoch": 0.22, "learning_rate": 4.768674772224775e-06, "logits/chosen": -2.9551548957824707, "logits/rejected": -2.841517210006714, "logps/chosen": -47.234580993652344, "logps/rejected": -1068.5751953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.14152735471725464, "rewards/margins": 10.431649208068848, "rewards/rejected": -10.290121078491211, "step": 3760 }, { "epoch": 0.22, "learning_rate": 4.766483685542389e-06, "logits/chosen": -2.963460922241211, "logits/rejected": -2.8733997344970703, "logps/chosen": -117.19401550292969, "logps/rejected": -1079.19140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5714648365974426, "rewards/margins": 9.844542503356934, "rewards/rejected": -10.416007041931152, "step": 3770 }, { "epoch": 0.23, "learning_rate": 4.764282779096199e-06, "logits/chosen": -2.948030948638916, "logits/rejected": -2.850903272628784, "logps/chosen": -90.09801483154297, "logps/rejected": -1068.052001953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.262870728969574, "rewards/margins": 10.032754898071289, "rewards/rejected": -10.295624732971191, "step": 3780 }, { "epoch": 0.23, "learning_rate": 4.762072062421849e-06, "logits/chosen": -2.95293927192688, "logits/rejected": -2.8400561809539795, "logps/chosen": -72.85230255126953, "logps/rejected": -1071.443115234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.13441982865333557, "rewards/margins": 10.199308395385742, "rewards/rejected": -10.333727836608887, "step": 3790 }, { "epoch": 0.23, "learning_rate": 4.759851545097486e-06, "logits/chosen": -2.9831318855285645, "logits/rejected": -2.851274251937866, "logps/chosen": -41.39330291748047, "logps/rejected": -1108.9027099609375, "loss": 0.048, "rewards/accuracies": 1.0, "rewards/chosen": 0.23995518684387207, "rewards/margins": 10.94389533996582, "rewards/rejected": -10.703940391540527, "step": 3800 }, { "epoch": 0.23, "learning_rate": 4.75762123674372e-06, "logits/chosen": -2.96423602104187, "logits/rejected": -2.874102830886841, "logps/chosen": -33.912906646728516, "logps/rejected": -1035.610595703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2762759327888489, "rewards/margins": 10.26301097869873, "rewards/rejected": -9.986734390258789, "step": 3810 }, { "epoch": 0.23, "learning_rate": 4.755381147023582e-06, "logits/chosen": -2.9354476928710938, "logits/rejected": -2.8540759086608887, "logps/chosen": -31.629180908203125, "logps/rejected": -974.8140869140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2813805639743805, "rewards/margins": 9.657938957214355, "rewards/rejected": -9.376558303833008, "step": 3820 }, { "epoch": 0.23, "learning_rate": 4.7531312856424814e-06, "logits/chosen": -2.97872257232666, "logits/rejected": -2.912909507751465, "logps/chosen": -34.85889434814453, "logps/rejected": -995.8204345703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2892376780509949, "rewards/margins": 9.864042282104492, "rewards/rejected": -9.57480525970459, "step": 3830 }, { "epoch": 0.23, "learning_rate": 4.750871662348164e-06, "logits/chosen": -2.9521827697753906, "logits/rejected": -2.8969693183898926, "logps/chosen": -37.385711669921875, "logps/rejected": -984.4793090820312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2821754813194275, "rewards/margins": 9.753137588500977, "rewards/rejected": -9.470961570739746, "step": 3840 }, { "epoch": 0.23, "learning_rate": 4.748602286930671e-06, "logits/chosen": -2.938340187072754, "logits/rejected": -2.8652706146240234, "logps/chosen": -31.711421966552734, "logps/rejected": -991.2652587890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.29910561442375183, "rewards/margins": 9.830814361572266, "rewards/rejected": -9.531709671020508, "step": 3850 }, { "epoch": 0.23, "learning_rate": 4.746323169222295e-06, "logits/chosen": -2.938422203063965, "logits/rejected": -2.8597915172576904, "logps/chosen": -41.92852020263672, "logps/rejected": -1074.6275634765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.3008686304092407, "rewards/margins": 10.67236328125, "rewards/rejected": -10.37149429321289, "step": 3860 }, { "epoch": 0.23, "learning_rate": 4.744034319097536e-06, "logits/chosen": -2.9498867988586426, "logits/rejected": -2.891054630279541, "logps/chosen": -37.561256408691406, "logps/rejected": -1090.5543212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.288105845451355, "rewards/margins": 10.799072265625, "rewards/rejected": -10.510966300964355, "step": 3870 }, { "epoch": 0.23, "learning_rate": 4.741735746473063e-06, "logits/chosen": -2.9693338871002197, "logits/rejected": -2.858861207962036, "logps/chosen": -31.345439910888672, "logps/rejected": -1083.5074462890625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.26425501704216003, "rewards/margins": 10.72079849243164, "rewards/rejected": -10.45654296875, "step": 3880 }, { "epoch": 0.23, "learning_rate": 4.739427461307671e-06, "logits/chosen": -2.949789047241211, "logits/rejected": -2.878394365310669, "logps/chosen": -38.78950881958008, "logps/rejected": -1111.31396484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2755851745605469, "rewards/margins": 11.016317367553711, "rewards/rejected": -10.74073314666748, "step": 3890 }, { "epoch": 0.23, "learning_rate": 4.73710947360223e-06, "logits/chosen": -2.9387149810791016, "logits/rejected": -2.8408782482147217, "logps/chosen": -39.071163177490234, "logps/rejected": -1122.125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2834100127220154, "rewards/margins": 11.11713981628418, "rewards/rejected": -10.833728790283203, "step": 3900 }, { "epoch": 0.23, "learning_rate": 4.734781793399651e-06, "logits/chosen": -2.9724655151367188, "logits/rejected": -2.8559584617614746, "logps/chosen": -38.800636291503906, "logps/rejected": -1145.936279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.27654245495796204, "rewards/margins": 11.360654830932617, "rewards/rejected": -11.084113121032715, "step": 3910 }, { "epoch": 0.23, "learning_rate": 4.732444430784838e-06, "logits/chosen": -2.9546453952789307, "logits/rejected": -2.854300022125244, "logps/chosen": -41.025394439697266, "logps/rejected": -1112.866455078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28154778480529785, "rewards/margins": 11.025461196899414, "rewards/rejected": -10.743912696838379, "step": 3920 }, { "epoch": 0.23, "learning_rate": 4.730097395884645e-06, "logits/chosen": -2.9869484901428223, "logits/rejected": -2.8660876750946045, "logps/chosen": -37.1798095703125, "logps/rejected": -1112.751708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25559476017951965, "rewards/margins": 11.016273498535156, "rewards/rejected": -10.760679244995117, "step": 3930 }, { "epoch": 0.23, "learning_rate": 4.727740698867831e-06, "logits/chosen": -2.9429688453674316, "logits/rejected": -2.889047145843506, "logps/chosen": -41.60962677001953, "logps/rejected": -1049.450927734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22706946730613708, "rewards/margins": 10.340311050415039, "rewards/rejected": -10.113241195678711, "step": 3940 }, { "epoch": 0.24, "learning_rate": 4.725374349945019e-06, "logits/chosen": -2.9626359939575195, "logits/rejected": -2.8875746726989746, "logps/chosen": -35.556129455566406, "logps/rejected": -1118.1834716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2860024571418762, "rewards/margins": 11.077192306518555, "rewards/rejected": -10.791190147399902, "step": 3950 }, { "epoch": 0.24, "learning_rate": 4.7229983593686465e-06, "logits/chosen": -2.9474542140960693, "logits/rejected": -2.867326021194458, "logps/chosen": -36.07481002807617, "logps/rejected": -1094.6268310546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27770116925239563, "rewards/margins": 10.840095520019531, "rewards/rejected": -10.562395095825195, "step": 3960 }, { "epoch": 0.24, "learning_rate": 4.72061273743293e-06, "logits/chosen": -2.95428466796875, "logits/rejected": -2.8671469688415527, "logps/chosen": -36.105628967285156, "logps/rejected": -1121.514404296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2629804015159607, "rewards/margins": 11.11111068725586, "rewards/rejected": -10.848130226135254, "step": 3970 }, { "epoch": 0.24, "learning_rate": 4.718217494473809e-06, "logits/chosen": -2.946845531463623, "logits/rejected": -2.8523857593536377, "logps/chosen": -35.911869049072266, "logps/rejected": -1068.6629638671875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2442527711391449, "rewards/margins": 10.552589416503906, "rewards/rejected": -10.30833625793457, "step": 3980 }, { "epoch": 0.24, "learning_rate": 4.715812640868911e-06, "logits/chosen": -2.983731269836426, "logits/rejected": -2.8690402507781982, "logps/chosen": -124.13836669921875, "logps/rejected": -1161.9986572265625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.49340683221817017, "rewards/margins": 10.755437850952148, "rewards/rejected": -11.248845100402832, "step": 3990 }, { "epoch": 0.24, "learning_rate": 4.7133981870375e-06, "logits/chosen": -2.9694743156433105, "logits/rejected": -2.860291004180908, "logps/chosen": -127.95903015136719, "logps/rejected": -1143.655029296875, "loss": 0.0305, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5909176468849182, "rewards/margins": 10.466508865356445, "rewards/rejected": -11.057424545288086, "step": 4000 }, { "epoch": 0.24, "learning_rate": 4.710974143440435e-06, "logits/chosen": -2.9585962295532227, "logits/rejected": -2.8790175914764404, "logps/chosen": -146.52188110351562, "logps/rejected": -1167.8035888671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8671528697013855, "rewards/margins": 10.42747688293457, "rewards/rejected": -11.294631004333496, "step": 4010 }, { "epoch": 0.24, "learning_rate": 4.708540520580125e-06, "logits/chosen": -2.9754343032836914, "logits/rejected": -2.881608247756958, "logps/chosen": -101.80975341796875, "logps/rejected": -1100.275390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.3548346161842346, "rewards/margins": 10.281804084777832, "rewards/rejected": -10.636638641357422, "step": 4020 }, { "epoch": 0.24, "learning_rate": 4.70609732900048e-06, "logits/chosen": -2.9519450664520264, "logits/rejected": -2.867504596710205, "logps/chosen": -75.32319641113281, "logps/rejected": -1121.07373046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.07846306264400482, "rewards/margins": 10.7451753616333, "rewards/rejected": -10.823637962341309, "step": 4030 }, { "epoch": 0.24, "learning_rate": 4.703644579286867e-06, "logits/chosen": -2.9364542961120605, "logits/rejected": -2.856759548187256, "logps/chosen": -53.9243278503418, "logps/rejected": -1139.924072265625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 0.1171170026063919, "rewards/margins": 11.154694557189941, "rewards/rejected": -11.037576675415039, "step": 4040 }, { "epoch": 0.24, "learning_rate": 4.701182282066068e-06, "logits/chosen": -2.9631266593933105, "logits/rejected": -2.864654064178467, "logps/chosen": -42.69145584106445, "logps/rejected": -1131.5335693359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25359922647476196, "rewards/margins": 11.181697845458984, "rewards/rejected": -10.928098678588867, "step": 4050 }, { "epoch": 0.24, "learning_rate": 4.698710448006226e-06, "logits/chosen": -2.9610912799835205, "logits/rejected": -2.8938281536102295, "logps/chosen": -32.4743766784668, "logps/rejected": -1108.657470703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.22133894264698029, "rewards/margins": 10.931636810302734, "rewards/rejected": -10.710298538208008, "step": 4060 }, { "epoch": 0.24, "learning_rate": 4.696229087816808e-06, "logits/chosen": -2.9607338905334473, "logits/rejected": -2.888510227203369, "logps/chosen": -39.95230484008789, "logps/rejected": -1150.90087890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23148581385612488, "rewards/margins": 11.358827590942383, "rewards/rejected": -11.127341270446777, "step": 4070 }, { "epoch": 0.24, "learning_rate": 4.693738212248549e-06, "logits/chosen": -2.9576456546783447, "logits/rejected": -2.8683547973632812, "logps/chosen": -43.529136657714844, "logps/rejected": -1137.80712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20046332478523254, "rewards/margins": 11.209012031555176, "rewards/rejected": -11.008546829223633, "step": 4080 }, { "epoch": 0.24, "learning_rate": 4.6912378320934134e-06, "logits/chosen": -2.924999713897705, "logits/rejected": -2.8575098514556885, "logps/chosen": -44.17028045654297, "logps/rejected": -1098.173095703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2604959011077881, "rewards/margins": 10.864347457885742, "rewards/rejected": -10.603853225708008, "step": 4090 }, { "epoch": 0.24, "learning_rate": 4.688727958184545e-06, "logits/chosen": -2.967804193496704, "logits/rejected": -2.9006435871124268, "logps/chosen": -43.992679595947266, "logps/rejected": -1101.570556640625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2365589439868927, "rewards/margins": 10.867574691772461, "rewards/rejected": -10.631016731262207, "step": 4100 }, { "epoch": 0.25, "learning_rate": 4.68620860139622e-06, "logits/chosen": -2.935903787612915, "logits/rejected": -2.850356340408325, "logps/chosen": -41.6516227722168, "logps/rejected": -1098.2264404296875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.2254423201084137, "rewards/margins": 10.833172798156738, "rewards/rejected": -10.607730865478516, "step": 4110 }, { "epoch": 0.25, "learning_rate": 4.683679772643799e-06, "logits/chosen": -2.951389789581299, "logits/rejected": -2.868295431137085, "logps/chosen": -47.11709976196289, "logps/rejected": -1174.3531494140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.12531395256519318, "rewards/margins": 11.492620468139648, "rewards/rejected": -11.36730670928955, "step": 4120 }, { "epoch": 0.25, "learning_rate": 4.681141482883682e-06, "logits/chosen": -2.9335196018218994, "logits/rejected": -2.8770909309387207, "logps/chosen": -46.68126678466797, "logps/rejected": -1172.6685791015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1905217170715332, "rewards/margins": 11.534502983093262, "rewards/rejected": -11.343982696533203, "step": 4130 }, { "epoch": 0.25, "learning_rate": 4.6785937431132596e-06, "logits/chosen": -2.9407949447631836, "logits/rejected": -2.8388571739196777, "logps/chosen": -52.60106658935547, "logps/rejected": -1186.5836181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.17114000022411346, "rewards/margins": 11.648022651672363, "rewards/rejected": -11.476883888244629, "step": 4140 }, { "epoch": 0.25, "learning_rate": 4.676036564370865e-06, "logits/chosen": -2.9735066890716553, "logits/rejected": -2.8550028800964355, "logps/chosen": -45.26028823852539, "logps/rejected": -1116.932861328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16026142239570618, "rewards/margins": 10.9557523727417, "rewards/rejected": -10.795492172241211, "step": 4150 }, { "epoch": 0.25, "learning_rate": 4.6734699577357265e-06, "logits/chosen": -2.997424364089966, "logits/rejected": -2.89945650100708, "logps/chosen": -48.02872848510742, "logps/rejected": -1134.5687255859375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.15413841605186462, "rewards/margins": 11.12299633026123, "rewards/rejected": -10.96885871887207, "step": 4160 }, { "epoch": 0.25, "learning_rate": 4.670893934327921e-06, "logits/chosen": -2.9881510734558105, "logits/rejected": -2.8788020610809326, "logps/chosen": -48.52680206298828, "logps/rejected": -1174.68359375, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 0.1252235621213913, "rewards/margins": 11.497689247131348, "rewards/rejected": -11.372465133666992, "step": 4170 }, { "epoch": 0.25, "learning_rate": 4.668308505308323e-06, "logits/chosen": -2.9511711597442627, "logits/rejected": -2.8834383487701416, "logps/chosen": -35.375144958496094, "logps/rejected": -1126.400390625, "loss": 0.0895, "rewards/accuracies": 1.0, "rewards/chosen": 0.27935975790023804, "rewards/margins": 11.1679105758667, "rewards/rejected": -10.888551712036133, "step": 4180 }, { "epoch": 0.25, "learning_rate": 4.6657136818785596e-06, "logits/chosen": -2.9620156288146973, "logits/rejected": -2.9350714683532715, "logps/chosen": -32.36371612548828, "logps/rejected": -1032.7333984375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.2840259075164795, "rewards/margins": 10.240843772888184, "rewards/rejected": -9.956819534301758, "step": 4190 }, { "epoch": 0.25, "learning_rate": 4.663109475280958e-06, "logits/chosen": -2.959348678588867, "logits/rejected": -2.9151225090026855, "logps/chosen": -35.24489212036133, "logps/rejected": -1094.9820556640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.2819388508796692, "rewards/margins": 10.847404479980469, "rewards/rejected": -10.56546688079834, "step": 4200 }, { "epoch": 0.25, "learning_rate": 4.660495896798499e-06, "logits/chosen": -3.000102996826172, "logits/rejected": -2.877501964569092, "logps/chosen": -41.67681121826172, "logps/rejected": -1095.2076416015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2586834728717804, "rewards/margins": 10.83288288116455, "rewards/rejected": -10.574198722839355, "step": 4210 }, { "epoch": 0.25, "learning_rate": 4.65787295775477e-06, "logits/chosen": -2.9690194129943848, "logits/rejected": -2.861571788787842, "logps/chosen": -37.837646484375, "logps/rejected": -1153.4100341796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.263165146112442, "rewards/margins": 11.420072555541992, "rewards/rejected": -11.156908988952637, "step": 4220 }, { "epoch": 0.25, "learning_rate": 4.655240669513913e-06, "logits/chosen": -2.9687952995300293, "logits/rejected": -2.865004301071167, "logps/chosen": -36.85852813720703, "logps/rejected": -1146.2891845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25392016768455505, "rewards/margins": 11.341791152954102, "rewards/rejected": -11.087869644165039, "step": 4230 }, { "epoch": 0.25, "learning_rate": 4.652599043480574e-06, "logits/chosen": -2.9662528038024902, "logits/rejected": -2.9026379585266113, "logps/chosen": -36.09339141845703, "logps/rejected": -1154.521728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25786009430885315, "rewards/margins": 11.410934448242188, "rewards/rejected": -11.153074264526367, "step": 4240 }, { "epoch": 0.25, "learning_rate": 4.64994809109986e-06, "logits/chosen": -2.954988479614258, "logits/rejected": -2.869450807571411, "logps/chosen": -41.52710723876953, "logps/rejected": -1087.855224609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25283336639404297, "rewards/margins": 10.760127067565918, "rewards/rejected": -10.507293701171875, "step": 4250 }, { "epoch": 0.25, "learning_rate": 4.647287823857283e-06, "logits/chosen": -2.947375535964966, "logits/rejected": -2.85819673538208, "logps/chosen": -37.95494842529297, "logps/rejected": -1136.021240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.233219712972641, "rewards/margins": 11.22145938873291, "rewards/rejected": -10.988240242004395, "step": 4260 }, { "epoch": 0.25, "learning_rate": 4.644618253278712e-06, "logits/chosen": -2.973496675491333, "logits/rejected": -2.8844223022460938, "logps/chosen": -36.876747131347656, "logps/rejected": -1117.244384765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.24744394421577454, "rewards/margins": 11.049226760864258, "rewards/rejected": -10.801782608032227, "step": 4270 }, { "epoch": 0.26, "learning_rate": 4.6419393909303254e-06, "logits/chosen": -2.959179401397705, "logits/rejected": -2.8804988861083984, "logps/chosen": -42.891841888427734, "logps/rejected": -1079.6258544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2470661699771881, "rewards/margins": 10.657031059265137, "rewards/rejected": -10.409965515136719, "step": 4280 }, { "epoch": 0.26, "learning_rate": 4.639251248418558e-06, "logits/chosen": -2.9720730781555176, "logits/rejected": -2.8604254722595215, "logps/chosen": -38.82190704345703, "logps/rejected": -1119.742919921875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.2453114092350006, "rewards/margins": 11.065515518188477, "rewards/rejected": -10.820204734802246, "step": 4290 }, { "epoch": 0.26, "learning_rate": 4.636553837390051e-06, "logits/chosen": -2.9662017822265625, "logits/rejected": -2.8681468963623047, "logps/chosen": -37.75049591064453, "logps/rejected": -1174.5018310546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.25653085112571716, "rewards/margins": 11.63978099822998, "rewards/rejected": -11.38325023651123, "step": 4300 }, { "epoch": 0.26, "learning_rate": 4.6338471695316046e-06, "logits/chosen": -2.955739974975586, "logits/rejected": -2.871833562850952, "logps/chosen": -38.968196868896484, "logps/rejected": -1168.5921630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22725239396095276, "rewards/margins": 11.541814804077148, "rewards/rejected": -11.314563751220703, "step": 4310 }, { "epoch": 0.26, "learning_rate": 4.631131256570124e-06, "logits/chosen": -2.9677960872650146, "logits/rejected": -2.870626449584961, "logps/chosen": -39.67884063720703, "logps/rejected": -1128.8651123046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2500230073928833, "rewards/margins": 11.131875991821289, "rewards/rejected": -10.881853103637695, "step": 4320 }, { "epoch": 0.26, "learning_rate": 4.628406110272568e-06, "logits/chosen": -3.000276565551758, "logits/rejected": -2.877251625061035, "logps/chosen": -37.848880767822266, "logps/rejected": -1149.478515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24664461612701416, "rewards/margins": 11.35969066619873, "rewards/rejected": -11.113046646118164, "step": 4330 }, { "epoch": 0.26, "learning_rate": 4.625671742445903e-06, "logits/chosen": -2.9321296215057373, "logits/rejected": -2.8446240425109863, "logps/chosen": -36.498374938964844, "logps/rejected": -1154.8353271484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.24557630717754364, "rewards/margins": 11.41264820098877, "rewards/rejected": -11.167071342468262, "step": 4340 }, { "epoch": 0.26, "learning_rate": 4.622928164937046e-06, "logits/chosen": -2.9716415405273438, "logits/rejected": -2.8585569858551025, "logps/chosen": -46.74121856689453, "logps/rejected": -1191.2548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16064290702342987, "rewards/margins": 11.697857856750488, "rewards/rejected": -11.537214279174805, "step": 4350 }, { "epoch": 0.26, "learning_rate": 4.620175389632817e-06, "logits/chosen": -2.950866937637329, "logits/rejected": -2.8497202396392822, "logps/chosen": -108.0619125366211, "logps/rejected": -1198.1654052734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.45785006880760193, "rewards/margins": 11.160713195800781, "rewards/rejected": -11.618562698364258, "step": 4360 }, { "epoch": 0.26, "learning_rate": 4.617413428459887e-06, "logits/chosen": -2.9367730617523193, "logits/rejected": -2.8352813720703125, "logps/chosen": -173.125244140625, "logps/rejected": -1142.4083251953125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1167656183242798, "rewards/margins": 9.93166732788086, "rewards/rejected": -11.048433303833008, "step": 4370 }, { "epoch": 0.26, "learning_rate": 4.614642293384724e-06, "logits/chosen": -2.939786434173584, "logits/rejected": -2.8428735733032227, "logps/chosen": -64.32971954345703, "logps/rejected": -1206.7099609375, "loss": 0.0971, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.03509117290377617, "rewards/margins": 11.657116889953613, "rewards/rejected": -11.692207336425781, "step": 4380 }, { "epoch": 0.26, "learning_rate": 4.611861996413542e-06, "logits/chosen": -2.9561524391174316, "logits/rejected": -2.8756608963012695, "logps/chosen": -35.6544075012207, "logps/rejected": -1081.575927734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2997229993343353, "rewards/margins": 10.745903015136719, "rewards/rejected": -10.446179389953613, "step": 4390 }, { "epoch": 0.26, "learning_rate": 4.609072549592255e-06, "logits/chosen": -2.9307684898376465, "logits/rejected": -2.856536388397217, "logps/chosen": -40.32312774658203, "logps/rejected": -1111.79248046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.29164814949035645, "rewards/margins": 11.018877029418945, "rewards/rejected": -10.727228164672852, "step": 4400 }, { "epoch": 0.26, "learning_rate": 4.6062739650064135e-06, "logits/chosen": -2.939666986465454, "logits/rejected": -2.881887912750244, "logps/chosen": -35.42809295654297, "logps/rejected": -1088.260498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2915201783180237, "rewards/margins": 10.782896041870117, "rewards/rejected": -10.491374969482422, "step": 4410 }, { "epoch": 0.26, "learning_rate": 4.603466254781162e-06, "logits/chosen": -2.9672279357910156, "logits/rejected": -2.8784804344177246, "logps/chosen": -34.050270080566406, "logps/rejected": -1131.605712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25293344259262085, "rewards/margins": 11.182655334472656, "rewards/rejected": -10.929722785949707, "step": 4420 }, { "epoch": 0.26, "learning_rate": 4.600649431081181e-06, "logits/chosen": -2.953089952468872, "logits/rejected": -2.9113247394561768, "logps/chosen": -38.85418701171875, "logps/rejected": -988.3271484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.27741631865501404, "rewards/margins": 9.782652854919434, "rewards/rejected": -9.505236625671387, "step": 4430 }, { "epoch": 0.26, "learning_rate": 4.597823506110637e-06, "logits/chosen": -2.9229180812835693, "logits/rejected": -2.8847856521606445, "logps/chosen": -34.154388427734375, "logps/rejected": -1098.6739501953125, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": 0.2795941233634949, "rewards/margins": 10.902578353881836, "rewards/rejected": -10.62298583984375, "step": 4440 }, { "epoch": 0.27, "learning_rate": 4.594988492113128e-06, "logits/chosen": -2.9665422439575195, "logits/rejected": -2.8809871673583984, "logps/chosen": -34.010929107666016, "logps/rejected": -1006.3865356445312, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 0.2771574854850769, "rewards/margins": 9.98104476928711, "rewards/rejected": -9.703887939453125, "step": 4450 }, { "epoch": 0.27, "learning_rate": 4.592144401371632e-06, "logits/chosen": -2.9626877307891846, "logits/rejected": -2.928879976272583, "logps/chosen": -40.81189727783203, "logps/rejected": -958.4293823242188, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.28771209716796875, "rewards/margins": 9.493204116821289, "rewards/rejected": -9.20549201965332, "step": 4460 }, { "epoch": 0.27, "learning_rate": 4.5892912462084515e-06, "logits/chosen": -2.9822604656219482, "logits/rejected": -2.8773388862609863, "logps/chosen": -37.05847930908203, "logps/rejected": -958.4656982421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.2835359275341034, "rewards/margins": 9.485928535461426, "rewards/rejected": -9.202393531799316, "step": 4470 }, { "epoch": 0.27, "learning_rate": 4.586429038985163e-06, "logits/chosen": -2.9525272846221924, "logits/rejected": -2.860757350921631, "logps/chosen": -32.40262222290039, "logps/rejected": -1101.0291748046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.30284255743026733, "rewards/margins": 10.940454483032227, "rewards/rejected": -10.637612342834473, "step": 4480 }, { "epoch": 0.27, "learning_rate": 4.583557792102559e-06, "logits/chosen": -2.9756393432617188, "logits/rejected": -2.9125142097473145, "logps/chosen": -35.616939544677734, "logps/rejected": -1068.53662109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.27384287118911743, "rewards/margins": 10.59113883972168, "rewards/rejected": -10.317296981811523, "step": 4490 }, { "epoch": 0.27, "learning_rate": 4.580677518000604e-06, "logits/chosen": -2.9633402824401855, "logits/rejected": -2.8803019523620605, "logps/chosen": -34.52521514892578, "logps/rejected": -1152.6893310546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.3016580045223236, "rewards/margins": 11.450372695922852, "rewards/rejected": -11.148714065551758, "step": 4500 }, { "epoch": 0.27, "learning_rate": 4.577788229158364e-06, "logits/chosen": -2.964059352874756, "logits/rejected": -2.85782790184021, "logps/chosen": -35.361366271972656, "logps/rejected": -1125.31298828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2784844934940338, "rewards/margins": 11.152929306030273, "rewards/rejected": -10.874444961547852, "step": 4510 }, { "epoch": 0.27, "learning_rate": 4.574889938093971e-06, "logits/chosen": -2.949462890625, "logits/rejected": -2.879258632659912, "logps/chosen": -39.89530563354492, "logps/rejected": -1166.216064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2654039263725281, "rewards/margins": 11.553186416625977, "rewards/rejected": -11.28778076171875, "step": 4520 }, { "epoch": 0.27, "learning_rate": 4.571982657364555e-06, "logits/chosen": -2.9241931438446045, "logits/rejected": -2.8650612831115723, "logps/chosen": -36.93219757080078, "logps/rejected": -1191.379150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2879732847213745, "rewards/margins": 11.826117515563965, "rewards/rejected": -11.538142204284668, "step": 4530 }, { "epoch": 0.27, "learning_rate": 4.569066399566196e-06, "logits/chosen": -2.971449613571167, "logits/rejected": -2.8849806785583496, "logps/chosen": -31.455364227294922, "logps/rejected": -1179.7509765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2906108498573303, "rewards/margins": 11.71440315246582, "rewards/rejected": -11.42379093170166, "step": 4540 }, { "epoch": 0.27, "learning_rate": 4.566141177333871e-06, "logits/chosen": -2.9716110229492188, "logits/rejected": -2.8765616416931152, "logps/chosen": -32.106834411621094, "logps/rejected": -1170.793212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2796618640422821, "rewards/margins": 11.61168098449707, "rewards/rejected": -11.332018852233887, "step": 4550 }, { "epoch": 0.27, "learning_rate": 4.563207003341389e-06, "logits/chosen": -2.9551029205322266, "logits/rejected": -2.895502805709839, "logps/chosen": -40.75751876831055, "logps/rejected": -1115.5899658203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2924448847770691, "rewards/margins": 11.062292098999023, "rewards/rejected": -10.769847869873047, "step": 4560 }, { "epoch": 0.27, "learning_rate": 4.56026389030135e-06, "logits/chosen": -2.923676013946533, "logits/rejected": -2.865589141845703, "logps/chosen": -35.92970657348633, "logps/rejected": -1098.6800537109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28916287422180176, "rewards/margins": 10.909271240234375, "rewards/rejected": -10.620107650756836, "step": 4570 }, { "epoch": 0.27, "learning_rate": 4.557311850965081e-06, "logits/chosen": -2.9543888568878174, "logits/rejected": -2.8663742542266846, "logps/chosen": -32.92295455932617, "logps/rejected": -1225.07470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2918144166469574, "rewards/margins": 12.166589736938477, "rewards/rejected": -11.874774932861328, "step": 4580 }, { "epoch": 0.27, "learning_rate": 4.554350898122585e-06, "logits/chosen": -2.9443953037261963, "logits/rejected": -2.8717644214630127, "logps/chosen": -36.601905822753906, "logps/rejected": -1129.571533203125, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": 0.24913087487220764, "rewards/margins": 11.160833358764648, "rewards/rejected": -10.911702156066895, "step": 4590 }, { "epoch": 0.27, "learning_rate": 4.551381044602478e-06, "logits/chosen": -3.0040812492370605, "logits/rejected": -2.89408540725708, "logps/chosen": -31.736988067626953, "logps/rejected": -1201.6827392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.28789740800857544, "rewards/margins": 11.927529335021973, "rewards/rejected": -11.639631271362305, "step": 4600 }, { "epoch": 0.27, "learning_rate": 4.548402303271946e-06, "logits/chosen": -2.935138702392578, "logits/rejected": -2.877499580383301, "logps/chosen": -30.9594669342041, "logps/rejected": -1112.0, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2960861027240753, "rewards/margins": 11.045784950256348, "rewards/rejected": -10.749698638916016, "step": 4610 }, { "epoch": 0.28, "learning_rate": 4.5454146870366775e-06, "logits/chosen": -2.9478774070739746, "logits/rejected": -2.899352550506592, "logps/chosen": -37.93483352661133, "logps/rejected": -1103.2679443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2903367877006531, "rewards/margins": 10.949884414672852, "rewards/rejected": -10.659547805786133, "step": 4620 }, { "epoch": 0.28, "learning_rate": 4.542418208840816e-06, "logits/chosen": -2.955773115158081, "logits/rejected": -2.895573854446411, "logps/chosen": -33.74528884887695, "logps/rejected": -1097.5692138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27939096093177795, "rewards/margins": 10.875417709350586, "rewards/rejected": -10.596026420593262, "step": 4630 }, { "epoch": 0.28, "learning_rate": 4.539412881666896e-06, "logits/chosen": -2.967395544052124, "logits/rejected": -2.892394542694092, "logps/chosen": -35.97848892211914, "logps/rejected": -1080.373779296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28454941511154175, "rewards/margins": 10.719263076782227, "rewards/rejected": -10.434714317321777, "step": 4640 }, { "epoch": 0.28, "learning_rate": 4.536398718535795e-06, "logits/chosen": -2.961688280105591, "logits/rejected": -2.897355318069458, "logps/chosen": -35.565956115722656, "logps/rejected": -1159.0927734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.30134448409080505, "rewards/margins": 11.525108337402344, "rewards/rejected": -11.223763465881348, "step": 4650 }, { "epoch": 0.28, "learning_rate": 4.5333757325066715e-06, "logits/chosen": -2.9208579063415527, "logits/rejected": -2.840675115585327, "logps/chosen": -38.74554443359375, "logps/rejected": -1179.308837890625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.26944947242736816, "rewards/margins": 11.669000625610352, "rewards/rejected": -11.39954948425293, "step": 4660 }, { "epoch": 0.28, "learning_rate": 4.5303439366769095e-06, "logits/chosen": -2.955282211303711, "logits/rejected": -2.854884147644043, "logps/chosen": -32.659934997558594, "logps/rejected": -1105.266357421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2741527855396271, "rewards/margins": 10.95141315460205, "rewards/rejected": -10.67725944519043, "step": 4670 }, { "epoch": 0.28, "learning_rate": 4.527303344182065e-06, "logits/chosen": -2.9167912006378174, "logits/rejected": -2.838804244995117, "logps/chosen": -37.79563522338867, "logps/rejected": -1129.3125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27460163831710815, "rewards/margins": 11.195679664611816, "rewards/rejected": -10.921077728271484, "step": 4680 }, { "epoch": 0.28, "learning_rate": 4.524253968195802e-06, "logits/chosen": -2.9606659412384033, "logits/rejected": -2.8887457847595215, "logps/chosen": -32.979469299316406, "logps/rejected": -1218.0670166015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2833353877067566, "rewards/margins": 12.093156814575195, "rewards/rejected": -11.809821128845215, "step": 4690 }, { "epoch": 0.28, "learning_rate": 4.521195821929843e-06, "logits/chosen": -2.9708919525146484, "logits/rejected": -2.89040207862854, "logps/chosen": -41.004661560058594, "logps/rejected": -1105.510009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2453722208738327, "rewards/margins": 10.912529945373535, "rewards/rejected": -10.667156219482422, "step": 4700 }, { "epoch": 0.28, "learning_rate": 4.5181289186339085e-06, "logits/chosen": -2.936830520629883, "logits/rejected": -2.829691171646118, "logps/chosen": -36.83415985107422, "logps/rejected": -1161.697998046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.30055153369903564, "rewards/margins": 11.534215927124023, "rewards/rejected": -11.233665466308594, "step": 4710 }, { "epoch": 0.28, "learning_rate": 4.51505327159566e-06, "logits/chosen": -2.978635549545288, "logits/rejected": -2.8599579334259033, "logps/chosen": -34.85923385620117, "logps/rejected": -1195.510986328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27231794595718384, "rewards/margins": 11.84208869934082, "rewards/rejected": -11.569770812988281, "step": 4720 }, { "epoch": 0.28, "learning_rate": 4.511968894140639e-06, "logits/chosen": -2.9731945991516113, "logits/rejected": -2.8797898292541504, "logps/chosen": -41.42657470703125, "logps/rejected": -1128.136474609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.28306087851524353, "rewards/margins": 11.188992500305176, "rewards/rejected": -10.90593147277832, "step": 4730 }, { "epoch": 0.28, "learning_rate": 4.508875799632215e-06, "logits/chosen": -2.9664483070373535, "logits/rejected": -2.8990588188171387, "logps/chosen": -39.13322830200195, "logps/rejected": -1194.906005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2737151086330414, "rewards/margins": 11.836987495422363, "rewards/rejected": -11.563272476196289, "step": 4740 }, { "epoch": 0.28, "learning_rate": 4.505774001471527e-06, "logits/chosen": -2.9530136585235596, "logits/rejected": -2.8527896404266357, "logps/chosen": -32.53175735473633, "logps/rejected": -1170.964599609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2878793478012085, "rewards/margins": 11.625240325927734, "rewards/rejected": -11.337360382080078, "step": 4750 }, { "epoch": 0.28, "learning_rate": 4.502663513097419e-06, "logits/chosen": -2.963670253753662, "logits/rejected": -2.854801654815674, "logps/chosen": -49.826263427734375, "logps/rejected": -1101.52783203125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.16523872315883636, "rewards/margins": 10.799046516418457, "rewards/rejected": -10.633807182312012, "step": 4760 }, { "epoch": 0.28, "learning_rate": 4.499544347986388e-06, "logits/chosen": -2.9236323833465576, "logits/rejected": -2.8353984355926514, "logps/chosen": -34.49101257324219, "logps/rejected": -1135.210693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2467772215604782, "rewards/margins": 11.220293045043945, "rewards/rejected": -10.973515510559082, "step": 4770 }, { "epoch": 0.29, "learning_rate": 4.4964165196525255e-06, "logits/chosen": -2.9889039993286133, "logits/rejected": -2.891756296157837, "logps/chosen": -37.684226989746094, "logps/rejected": -1197.7994384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2833424508571625, "rewards/margins": 11.896390914916992, "rewards/rejected": -11.613048553466797, "step": 4780 }, { "epoch": 0.29, "learning_rate": 4.493280041647454e-06, "logits/chosen": -2.9791948795318604, "logits/rejected": -2.869785785675049, "logps/chosen": -53.26668167114258, "logps/rejected": -1137.0694580078125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 0.1318216472864151, "rewards/margins": 11.126330375671387, "rewards/rejected": -10.994510650634766, "step": 4790 }, { "epoch": 0.29, "learning_rate": 4.490134927560276e-06, "logits/chosen": -2.9673333168029785, "logits/rejected": -2.868361711502075, "logps/chosen": -38.05815887451172, "logps/rejected": -1159.852783203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20312781631946564, "rewards/margins": 11.422969818115234, "rewards/rejected": -11.219841957092285, "step": 4800 }, { "epoch": 0.29, "learning_rate": 4.486981191017505e-06, "logits/chosen": -2.9754676818847656, "logits/rejected": -2.843756914138794, "logps/chosen": -42.0322151184082, "logps/rejected": -1166.747314453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20274639129638672, "rewards/margins": 11.484840393066406, "rewards/rejected": -11.28209400177002, "step": 4810 }, { "epoch": 0.29, "learning_rate": 4.4838188456830175e-06, "logits/chosen": -2.943711042404175, "logits/rejected": -2.8709869384765625, "logps/chosen": -53.13884353637695, "logps/rejected": -1131.937744140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.11141364276409149, "rewards/margins": 11.058423042297363, "rewards/rejected": -10.947009086608887, "step": 4820 }, { "epoch": 0.29, "learning_rate": 4.480647905257985e-06, "logits/chosen": -2.9719583988189697, "logits/rejected": -2.8802073001861572, "logps/chosen": -58.5169677734375, "logps/rejected": -1145.5029296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.03854336217045784, "rewards/margins": 11.108865737915039, "rewards/rejected": -11.070322036743164, "step": 4830 }, { "epoch": 0.29, "learning_rate": 4.47746838348082e-06, "logits/chosen": -2.9779839515686035, "logits/rejected": -2.8634166717529297, "logps/chosen": -116.1864242553711, "logps/rejected": -1156.2957763671875, "loss": 0.012, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5293279886245728, "rewards/margins": 10.671977996826172, "rewards/rejected": -11.201306343078613, "step": 4840 }, { "epoch": 0.29, "learning_rate": 4.474280294127112e-06, "logits/chosen": -2.966014862060547, "logits/rejected": -2.8927388191223145, "logps/chosen": -66.63335418701172, "logps/rejected": -1195.50244140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.08105408400297165, "rewards/margins": 11.503679275512695, "rewards/rejected": -11.584732055664062, "step": 4850 }, { "epoch": 0.29, "learning_rate": 4.471083651009574e-06, "logits/chosen": -2.9738662242889404, "logits/rejected": -2.8865697383880615, "logps/chosen": -70.5742416381836, "logps/rejected": -1168.4891357421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.10542134195566177, "rewards/margins": 11.20809268951416, "rewards/rejected": -11.31351375579834, "step": 4860 }, { "epoch": 0.29, "learning_rate": 4.4678784679779766e-06, "logits/chosen": -2.961843490600586, "logits/rejected": -2.858717679977417, "logps/chosen": -52.013946533203125, "logps/rejected": -1189.382080078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.17035481333732605, "rewards/margins": 11.695201873779297, "rewards/rejected": -11.524847030639648, "step": 4870 }, { "epoch": 0.29, "learning_rate": 4.464664758919092e-06, "logits/chosen": -2.9721741676330566, "logits/rejected": -2.8882405757904053, "logps/chosen": -53.94782257080078, "logps/rejected": -1135.9566650390625, "loss": 0.0402, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.12222828716039658, "rewards/margins": 11.10088062286377, "rewards/rejected": -10.97865104675293, "step": 4880 }, { "epoch": 0.29, "learning_rate": 4.461442537756629e-06, "logits/chosen": -2.912480115890503, "logits/rejected": -2.81573748588562, "logps/chosen": -40.86740493774414, "logps/rejected": -1153.3560791015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2526918351650238, "rewards/margins": 11.403447151184082, "rewards/rejected": -11.150754928588867, "step": 4890 }, { "epoch": 0.29, "learning_rate": 4.458211818451179e-06, "logits/chosen": -3.0065367221832275, "logits/rejected": -2.888075590133667, "logps/chosen": -33.706932067871094, "logps/rejected": -1116.4793701171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.252431720495224, "rewards/margins": 11.04487419128418, "rewards/rejected": -10.792441368103027, "step": 4900 }, { "epoch": 0.29, "learning_rate": 4.454972615000153e-06, "logits/chosen": -2.919451951980591, "logits/rejected": -2.8475170135498047, "logps/chosen": -38.50215148925781, "logps/rejected": -1133.876220703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25476130843162537, "rewards/margins": 11.21600341796875, "rewards/rejected": -10.96124267578125, "step": 4910 }, { "epoch": 0.29, "learning_rate": 4.451724941437718e-06, "logits/chosen": -2.947608709335327, "logits/rejected": -2.8511850833892822, "logps/chosen": -43.176753997802734, "logps/rejected": -1126.4942626953125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.2858401834964752, "rewards/margins": 11.17546272277832, "rewards/rejected": -10.889622688293457, "step": 4920 }, { "epoch": 0.29, "learning_rate": 4.448468811834739e-06, "logits/chosen": -2.9690027236938477, "logits/rejected": -2.893310070037842, "logps/chosen": -33.42264175415039, "logps/rejected": -1159.760986328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26599255204200745, "rewards/margins": 11.483040809631348, "rewards/rejected": -11.217049598693848, "step": 4930 }, { "epoch": 0.29, "learning_rate": 4.445204240298718e-06, "logits/chosen": -2.9614386558532715, "logits/rejected": -2.8593909740448, "logps/chosen": -34.920108795166016, "logps/rejected": -1193.9361572265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2743667960166931, "rewards/margins": 11.814088821411133, "rewards/rejected": -11.53972339630127, "step": 4940 }, { "epoch": 0.3, "learning_rate": 4.441931240973735e-06, "logits/chosen": -2.986588954925537, "logits/rejected": -2.8608298301696777, "logps/chosen": -32.940330505371094, "logps/rejected": -1191.7967529296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.2661217153072357, "rewards/margins": 11.808540344238281, "rewards/rejected": -11.542418479919434, "step": 4950 }, { "epoch": 0.3, "learning_rate": 4.43864982804038e-06, "logits/chosen": -2.983107805252075, "logits/rejected": -2.871973991394043, "logps/chosen": -37.880245208740234, "logps/rejected": -1158.892333984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.284981906414032, "rewards/margins": 11.496149063110352, "rewards/rejected": -11.21116828918457, "step": 4960 }, { "epoch": 0.3, "learning_rate": 4.435360015715697e-06, "logits/chosen": -2.9720005989074707, "logits/rejected": -2.829814910888672, "logps/chosen": -37.38566970825195, "logps/rejected": -1183.0797119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2873549461364746, "rewards/margins": 11.740813255310059, "rewards/rejected": -11.453457832336426, "step": 4970 }, { "epoch": 0.3, "learning_rate": 4.4320618182531244e-06, "logits/chosen": -2.9221608638763428, "logits/rejected": -2.836338520050049, "logps/chosen": -38.683250427246094, "logps/rejected": -1150.73095703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28101760149002075, "rewards/margins": 11.412875175476074, "rewards/rejected": -11.131857872009277, "step": 4980 }, { "epoch": 0.3, "learning_rate": 4.428755249942425e-06, "logits/chosen": -2.991145610809326, "logits/rejected": -2.88478422164917, "logps/chosen": -36.87883377075195, "logps/rejected": -1166.271728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.29670625925064087, "rewards/margins": 11.585086822509766, "rewards/rejected": -11.28838062286377, "step": 4990 }, { "epoch": 0.3, "learning_rate": 4.4254403251096345e-06, "logits/chosen": -2.970564365386963, "logits/rejected": -2.8858301639556885, "logps/chosen": -36.866947174072266, "logps/rejected": -1153.4267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2476746290922165, "rewards/margins": 11.394681930541992, "rewards/rejected": -11.147006034851074, "step": 5000 }, { "epoch": 0.3, "learning_rate": 4.422117058116989e-06, "logits/chosen": -2.9512341022491455, "logits/rejected": -2.862576723098755, "logps/chosen": -36.78702926635742, "logps/rejected": -1198.058837890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.29165118932724, "rewards/margins": 11.89011287689209, "rewards/rejected": -11.59846305847168, "step": 5010 }, { "epoch": 0.3, "learning_rate": 4.418785463362871e-06, "logits/chosen": -2.9656381607055664, "logits/rejected": -2.861790657043457, "logps/chosen": -40.500091552734375, "logps/rejected": -1143.8209228515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.250846803188324, "rewards/margins": 11.297709465026855, "rewards/rejected": -11.046862602233887, "step": 5020 }, { "epoch": 0.3, "learning_rate": 4.415445555281742e-06, "logits/chosen": -2.957887649536133, "logits/rejected": -2.8752729892730713, "logps/chosen": -34.895957946777344, "logps/rejected": -1208.907958984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2870170474052429, "rewards/margins": 12.008065223693848, "rewards/rejected": -11.721049308776855, "step": 5030 }, { "epoch": 0.3, "learning_rate": 4.412097348344084e-06, "logits/chosen": -2.946477174758911, "logits/rejected": -2.8658294677734375, "logps/chosen": -41.21668243408203, "logps/rejected": -1232.41796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26809272170066833, "rewards/margins": 12.213543891906738, "rewards/rejected": -11.945451736450195, "step": 5040 }, { "epoch": 0.3, "learning_rate": 4.408740857056332e-06, "logits/chosen": -2.9765141010284424, "logits/rejected": -2.8645873069763184, "logps/chosen": -33.927345275878906, "logps/rejected": -1223.337158203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2796933650970459, "rewards/margins": 12.134493827819824, "rewards/rejected": -11.854801177978516, "step": 5050 }, { "epoch": 0.3, "learning_rate": 4.405376095960816e-06, "logits/chosen": -2.9647459983825684, "logits/rejected": -2.858790874481201, "logps/chosen": -35.121639251708984, "logps/rejected": -1165.2913818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2740638852119446, "rewards/margins": 11.545048713684082, "rewards/rejected": -11.27098560333252, "step": 5060 }, { "epoch": 0.3, "learning_rate": 4.402003079635695e-06, "logits/chosen": -2.9724628925323486, "logits/rejected": -2.8544468879699707, "logps/chosen": -36.832645416259766, "logps/rejected": -1125.03857421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.22401531040668488, "rewards/margins": 11.103219985961914, "rewards/rejected": -10.879202842712402, "step": 5070 }, { "epoch": 0.3, "learning_rate": 4.398621822694894e-06, "logits/chosen": -2.9562618732452393, "logits/rejected": -2.8933660984039307, "logps/chosen": -42.61988067626953, "logps/rejected": -1160.116943359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2328265905380249, "rewards/margins": 11.448716163635254, "rewards/rejected": -11.215889930725098, "step": 5080 }, { "epoch": 0.3, "learning_rate": 4.3952323397880426e-06, "logits/chosen": -2.9467008113861084, "logits/rejected": -2.834378480911255, "logps/chosen": -53.190704345703125, "logps/rejected": -1137.9227294921875, "loss": 0.0642, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.1017858013510704, "rewards/margins": 11.086835861206055, "rewards/rejected": -10.985048294067383, "step": 5090 }, { "epoch": 0.3, "learning_rate": 4.391834645600408e-06, "logits/chosen": -2.9915993213653564, "logits/rejected": -2.903843879699707, "logps/chosen": -44.03375244140625, "logps/rejected": -1097.896240234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.18741045892238617, "rewards/margins": 10.781913757324219, "rewards/rejected": -10.594502449035645, "step": 5100 }, { "epoch": 0.3, "learning_rate": 4.388428754852835e-06, "logits/chosen": -2.931053876876831, "logits/rejected": -2.8393309116363525, "logps/chosen": -35.673683166503906, "logps/rejected": -1152.415771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24306723475456238, "rewards/margins": 11.385045051574707, "rewards/rejected": -11.141977310180664, "step": 5110 }, { "epoch": 0.31, "learning_rate": 4.385014682301682e-06, "logits/chosen": -2.9859938621520996, "logits/rejected": -2.8806214332580566, "logps/chosen": -36.37189483642578, "logps/rejected": -1134.5433349609375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.2594851553440094, "rewards/margins": 11.228067398071289, "rewards/rejected": -10.968582153320312, "step": 5120 }, { "epoch": 0.31, "learning_rate": 4.381592442738753e-06, "logits/chosen": -2.93361234664917, "logits/rejected": -2.8411355018615723, "logps/chosen": -33.507320404052734, "logps/rejected": -1222.56640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2846871614456177, "rewards/margins": 12.131587982177734, "rewards/rejected": -11.84689998626709, "step": 5130 }, { "epoch": 0.31, "learning_rate": 4.3781620509912395e-06, "logits/chosen": -2.9380526542663574, "logits/rejected": -2.8500075340270996, "logps/chosen": -57.57692337036133, "logps/rejected": -1155.871826171875, "loss": 0.0305, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.10134019702672958, "rewards/margins": 11.308897972106934, "rewards/rejected": -11.207558631896973, "step": 5140 }, { "epoch": 0.31, "learning_rate": 4.374723521921651e-06, "logits/chosen": -2.9446682929992676, "logits/rejected": -2.8281874656677246, "logps/chosen": -56.26776123046875, "logps/rejected": -1175.0733642578125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.12381017208099365, "rewards/margins": 11.502933502197266, "rewards/rejected": -11.37912368774414, "step": 5150 }, { "epoch": 0.31, "learning_rate": 4.3712768704277535e-06, "logits/chosen": -2.9708282947540283, "logits/rejected": -2.8469979763031006, "logps/chosen": -46.24055099487305, "logps/rejected": -1142.1422119140625, "loss": 0.0203, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.1871252954006195, "rewards/margins": 11.232915878295898, "rewards/rejected": -11.04578971862793, "step": 5160 }, { "epoch": 0.31, "learning_rate": 4.367822111442504e-06, "logits/chosen": -2.9855217933654785, "logits/rejected": -2.9178860187530518, "logps/chosen": -38.71630859375, "logps/rejected": -1148.5791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.29054322838783264, "rewards/margins": 11.407349586486816, "rewards/rejected": -11.116806030273438, "step": 5170 }, { "epoch": 0.31, "learning_rate": 4.364359259933985e-06, "logits/chosen": -2.9610812664031982, "logits/rejected": -2.8789889812469482, "logps/chosen": -41.821815490722656, "logps/rejected": -1119.8319091796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2610735297203064, "rewards/margins": 11.078737258911133, "rewards/rejected": -10.81766414642334, "step": 5180 }, { "epoch": 0.31, "learning_rate": 4.3608883309053425e-06, "logits/chosen": -2.97413969039917, "logits/rejected": -2.906275510787964, "logps/chosen": -49.7827033996582, "logps/rejected": -1059.408447265625, "loss": 0.0411, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.16653789579868317, "rewards/margins": 10.383173942565918, "rewards/rejected": -10.216636657714844, "step": 5190 }, { "epoch": 0.31, "learning_rate": 4.35740933939472e-06, "logits/chosen": -2.9232616424560547, "logits/rejected": -2.867077350616455, "logps/chosen": -41.84632110595703, "logps/rejected": -1131.424072265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.274752676486969, "rewards/margins": 11.207147598266602, "rewards/rejected": -10.932394981384277, "step": 5200 }, { "epoch": 0.31, "learning_rate": 4.353922300475189e-06, "logits/chosen": -2.9587953090667725, "logits/rejected": -2.843602418899536, "logps/chosen": -38.611968994140625, "logps/rejected": -1198.242431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23766660690307617, "rewards/margins": 11.83879566192627, "rewards/rejected": -11.601129531860352, "step": 5210 }, { "epoch": 0.31, "learning_rate": 4.350427229254689e-06, "logits/chosen": -2.9383931159973145, "logits/rejected": -2.8407034873962402, "logps/chosen": -39.410728454589844, "logps/rejected": -1151.1204833984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.23785650730133057, "rewards/margins": 11.368574142456055, "rewards/rejected": -11.130717277526855, "step": 5220 }, { "epoch": 0.31, "learning_rate": 4.346924140875961e-06, "logits/chosen": -2.929699420928955, "logits/rejected": -2.824612617492676, "logps/chosen": -39.45151901245117, "logps/rejected": -1132.2574462890625, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": 0.2538520395755768, "rewards/margins": 11.193817138671875, "rewards/rejected": -10.939966201782227, "step": 5230 }, { "epoch": 0.31, "learning_rate": 4.34341305051648e-06, "logits/chosen": -2.8993256092071533, "logits/rejected": -2.7992308139801025, "logps/chosen": -37.06409454345703, "logps/rejected": -1072.103759765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2529296576976776, "rewards/margins": 10.593194961547852, "rewards/rejected": -10.340266227722168, "step": 5240 }, { "epoch": 0.31, "learning_rate": 4.339893973388392e-06, "logits/chosen": -2.956387996673584, "logits/rejected": -2.8628673553466797, "logps/chosen": -44.876808166503906, "logps/rejected": -1079.9322509765625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.14010050892829895, "rewards/margins": 10.557741165161133, "rewards/rejected": -10.417640686035156, "step": 5250 }, { "epoch": 0.31, "learning_rate": 4.3363669247384446e-06, "logits/chosen": -2.9878122806549072, "logits/rejected": -2.8917317390441895, "logps/chosen": -36.089500427246094, "logps/rejected": -1146.854248046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2735409438610077, "rewards/margins": 11.372001647949219, "rewards/rejected": -11.098461151123047, "step": 5260 }, { "epoch": 0.31, "learning_rate": 4.332831919847922e-06, "logits/chosen": -2.9661240577697754, "logits/rejected": -2.8793680667877197, "logps/chosen": -33.842098236083984, "logps/rejected": -1102.639404296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2861076295375824, "rewards/margins": 10.930473327636719, "rewards/rejected": -10.644365310668945, "step": 5270 }, { "epoch": 0.31, "learning_rate": 4.329288974032583e-06, "logits/chosen": -2.956880569458008, "logits/rejected": -2.837952136993408, "logps/chosen": -42.800994873046875, "logps/rejected": -1164.749755859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2906186878681183, "rewards/margins": 11.551324844360352, "rewards/rejected": -11.260706901550293, "step": 5280 }, { "epoch": 0.32, "learning_rate": 4.325738102642589e-06, "logits/chosen": -2.9676382541656494, "logits/rejected": -2.8495635986328125, "logps/chosen": -37.437904357910156, "logps/rejected": -1161.7109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2785903513431549, "rewards/margins": 11.516439437866211, "rewards/rejected": -11.237848281860352, "step": 5290 }, { "epoch": 0.32, "learning_rate": 4.322179321062439e-06, "logits/chosen": -2.9554975032806396, "logits/rejected": -2.843256711959839, "logps/chosen": -34.51882553100586, "logps/rejected": -1153.517822265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.26034483313560486, "rewards/margins": 11.412541389465332, "rewards/rejected": -11.152196884155273, "step": 5300 }, { "epoch": 0.32, "learning_rate": 4.318612644710906e-06, "logits/chosen": -2.94887375831604, "logits/rejected": -2.8576037883758545, "logps/chosen": -36.666587829589844, "logps/rejected": -1199.220458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25914937257766724, "rewards/margins": 11.88100814819336, "rewards/rejected": -11.621859550476074, "step": 5310 }, { "epoch": 0.32, "learning_rate": 4.315038089040965e-06, "logits/chosen": -2.9605777263641357, "logits/rejected": -2.8260130882263184, "logps/chosen": -31.751333236694336, "logps/rejected": -1156.5784912109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2755129337310791, "rewards/margins": 11.472201347351074, "rewards/rejected": -11.196687698364258, "step": 5320 }, { "epoch": 0.32, "learning_rate": 4.311455669539732e-06, "logits/chosen": -2.9603285789489746, "logits/rejected": -2.8517260551452637, "logps/chosen": -36.05811309814453, "logps/rejected": -1181.9122314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2820877432823181, "rewards/margins": 11.727853775024414, "rewards/rejected": -11.445765495300293, "step": 5330 }, { "epoch": 0.32, "learning_rate": 4.307865401728392e-06, "logits/chosen": -2.976041316986084, "logits/rejected": -2.8848304748535156, "logps/chosen": -50.970008850097656, "logps/rejected": -1181.744873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16035479307174683, "rewards/margins": 11.609807968139648, "rewards/rejected": -11.449453353881836, "step": 5340 }, { "epoch": 0.32, "learning_rate": 4.3042673011621334e-06, "logits/chosen": -2.923210620880127, "logits/rejected": -2.7968709468841553, "logps/chosen": -35.08024215698242, "logps/rejected": -1190.5296630859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2665466368198395, "rewards/margins": 11.801897048950195, "rewards/rejected": -11.535350799560547, "step": 5350 }, { "epoch": 0.32, "learning_rate": 4.300661383430081e-06, "logits/chosen": -2.9694600105285645, "logits/rejected": -2.8642418384552, "logps/chosen": -34.06355667114258, "logps/rejected": -1170.6053466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2508760392665863, "rewards/margins": 11.570866584777832, "rewards/rejected": -11.319990158081055, "step": 5360 }, { "epoch": 0.32, "learning_rate": 4.2970476641552304e-06, "logits/chosen": -2.962054967880249, "logits/rejected": -2.8461766242980957, "logps/chosen": -37.82260513305664, "logps/rejected": -1177.677490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2862320840358734, "rewards/margins": 11.686994552612305, "rewards/rejected": -11.400762557983398, "step": 5370 }, { "epoch": 0.32, "learning_rate": 4.293426158994375e-06, "logits/chosen": -2.978912591934204, "logits/rejected": -2.8361425399780273, "logps/chosen": -30.82650375366211, "logps/rejected": -1139.0159912109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2654479146003723, "rewards/margins": 11.285482406616211, "rewards/rejected": -11.020034790039062, "step": 5380 }, { "epoch": 0.32, "learning_rate": 4.289796883638042e-06, "logits/chosen": -2.963656187057495, "logits/rejected": -2.9037413597106934, "logps/chosen": -39.819053649902344, "logps/rejected": -1131.25390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.27545469999313354, "rewards/margins": 11.205072402954102, "rewards/rejected": -10.929617881774902, "step": 5390 }, { "epoch": 0.32, "learning_rate": 4.2861598538104255e-06, "logits/chosen": -2.968256711959839, "logits/rejected": -2.8649821281433105, "logps/chosen": -54.61588668823242, "logps/rejected": -1173.4908447265625, "loss": 0.1161, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.06482221186161041, "rewards/margins": 11.40770435333252, "rewards/rejected": -11.342880249023438, "step": 5400 }, { "epoch": 0.32, "learning_rate": 4.282515085269315e-06, "logits/chosen": -2.9855988025665283, "logits/rejected": -2.85105562210083, "logps/chosen": -40.157936096191406, "logps/rejected": -1106.1693115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2705349326133728, "rewards/margins": 10.94929313659668, "rewards/rejected": -10.678759574890137, "step": 5410 }, { "epoch": 0.32, "learning_rate": 4.278862593806029e-06, "logits/chosen": -2.9688711166381836, "logits/rejected": -2.8602845668792725, "logps/chosen": -34.183372497558594, "logps/rejected": -1145.6961669921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2778831124305725, "rewards/margins": 11.349536895751953, "rewards/rejected": -11.071653366088867, "step": 5420 }, { "epoch": 0.32, "learning_rate": 4.275202395245346e-06, "logits/chosen": -2.9579787254333496, "logits/rejected": -2.866832971572876, "logps/chosen": -39.16919708251953, "logps/rejected": -1097.012939453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2753898501396179, "rewards/margins": 10.880233764648438, "rewards/rejected": -10.604843139648438, "step": 5430 }, { "epoch": 0.32, "learning_rate": 4.271534505445438e-06, "logits/chosen": -2.980177879333496, "logits/rejected": -2.840693473815918, "logps/chosen": -40.25445556640625, "logps/rejected": -1154.109619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2779989242553711, "rewards/margins": 11.42538833618164, "rewards/rejected": -11.147390365600586, "step": 5440 }, { "epoch": 0.32, "learning_rate": 4.267858940297799e-06, "logits/chosen": -2.9316563606262207, "logits/rejected": -2.8428473472595215, "logps/chosen": -34.153106689453125, "logps/rejected": -1182.520751953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26168566942214966, "rewards/margins": 11.703983306884766, "rewards/rejected": -11.442296981811523, "step": 5450 }, { "epoch": 0.33, "learning_rate": 4.264175715727176e-06, "logits/chosen": -2.962459087371826, "logits/rejected": -2.8201887607574463, "logps/chosen": -37.47086715698242, "logps/rejected": -1164.06494140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2609640657901764, "rewards/margins": 11.514555931091309, "rewards/rejected": -11.253591537475586, "step": 5460 }, { "epoch": 0.33, "learning_rate": 4.2604848476915015e-06, "logits/chosen": -2.9381210803985596, "logits/rejected": -2.851914882659912, "logps/chosen": -38.12614059448242, "logps/rejected": -1199.5472412109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.22663073241710663, "rewards/margins": 11.855208396911621, "rewards/rejected": -11.628576278686523, "step": 5470 }, { "epoch": 0.33, "learning_rate": 4.256786352181827e-06, "logits/chosen": -2.948462963104248, "logits/rejected": -2.8870158195495605, "logps/chosen": -43.824623107910156, "logps/rejected": -1166.6280517578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23041662573814392, "rewards/margins": 11.506271362304688, "rewards/rejected": -11.275854110717773, "step": 5480 }, { "epoch": 0.33, "learning_rate": 4.253080245222246e-06, "logits/chosen": -2.9832558631896973, "logits/rejected": -2.90729022026062, "logps/chosen": -40.94194030761719, "logps/rejected": -1197.1387939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2814161777496338, "rewards/margins": 11.876954078674316, "rewards/rejected": -11.595538139343262, "step": 5490 }, { "epoch": 0.33, "learning_rate": 4.249366542869835e-06, "logits/chosen": -2.973797559738159, "logits/rejected": -2.8705148696899414, "logps/chosen": -40.14179229736328, "logps/rejected": -1153.10986328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26586565375328064, "rewards/margins": 11.414573669433594, "rewards/rejected": -11.148707389831543, "step": 5500 }, { "epoch": 0.33, "learning_rate": 4.245645261214572e-06, "logits/chosen": -2.9446394443511963, "logits/rejected": -2.851593494415283, "logps/chosen": -47.64521789550781, "logps/rejected": -1127.107666015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20615415275096893, "rewards/margins": 11.100519180297852, "rewards/rejected": -10.894364356994629, "step": 5510 }, { "epoch": 0.33, "learning_rate": 4.24191641637928e-06, "logits/chosen": -2.970038414001465, "logits/rejected": -2.8868021965026855, "logps/chosen": -38.56375503540039, "logps/rejected": -1170.857421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2535915672779083, "rewards/margins": 11.570221900939941, "rewards/rejected": -11.316629409790039, "step": 5520 }, { "epoch": 0.33, "learning_rate": 4.238180024519543e-06, "logits/chosen": -2.961519956588745, "logits/rejected": -2.8856730461120605, "logps/chosen": -39.27671432495117, "logps/rejected": -1134.1868896484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.23865613341331482, "rewards/margins": 11.200682640075684, "rewards/rejected": -10.962026596069336, "step": 5530 }, { "epoch": 0.33, "learning_rate": 4.234436101823648e-06, "logits/chosen": -2.969996929168701, "logits/rejected": -2.8816919326782227, "logps/chosen": -35.30864334106445, "logps/rejected": -1209.2469482421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2543494999408722, "rewards/margins": 11.959943771362305, "rewards/rejected": -11.705595016479492, "step": 5540 }, { "epoch": 0.33, "learning_rate": 4.230684664512509e-06, "logits/chosen": -2.97570538520813, "logits/rejected": -2.855786085128784, "logps/chosen": -50.81586456298828, "logps/rejected": -1217.2237548828125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.14057140052318573, "rewards/margins": 11.946771621704102, "rewards/rejected": -11.80620002746582, "step": 5550 }, { "epoch": 0.33, "learning_rate": 4.226925728839598e-06, "logits/chosen": -2.9823174476623535, "logits/rejected": -2.8549113273620605, "logps/chosen": -42.55535125732422, "logps/rejected": -1184.54833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2795482575893402, "rewards/margins": 11.740431785583496, "rewards/rejected": -11.460882186889648, "step": 5560 }, { "epoch": 0.33, "learning_rate": 4.223159311090874e-06, "logits/chosen": -2.941434144973755, "logits/rejected": -2.818748950958252, "logps/chosen": -40.2122802734375, "logps/rejected": -1205.829833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2600758671760559, "rewards/margins": 11.938928604125977, "rewards/rejected": -11.678852081298828, "step": 5570 }, { "epoch": 0.33, "learning_rate": 4.2193854275847115e-06, "logits/chosen": -2.993622064590454, "logits/rejected": -2.857090473175049, "logps/chosen": -33.56757354736328, "logps/rejected": -1189.461181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2613045275211334, "rewards/margins": 11.778921127319336, "rewards/rejected": -11.517618179321289, "step": 5580 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -2.977385997772217, "logits/rejected": -2.865907669067383, "logps/chosen": -37.69004821777344, "logps/rejected": -1222.4765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2676447927951813, "rewards/margins": 12.109650611877441, "rewards/rejected": -11.842005729675293, "step": 5590 }, { "epoch": 0.33, "learning_rate": 4.211815328735239e-06, "logits/chosen": -2.945812940597534, "logits/rejected": -2.834010601043701, "logps/chosen": -42.764583587646484, "logps/rejected": -1199.1871337890625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.2208024561405182, "rewards/margins": 11.825658798217773, "rewards/rejected": -11.604856491088867, "step": 5600 }, { "epoch": 0.33, "learning_rate": 4.208019146190127e-06, "logits/chosen": -2.9310336112976074, "logits/rejected": -2.8395543098449707, "logps/chosen": -42.1476936340332, "logps/rejected": -1266.361083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21116404235363007, "rewards/margins": 12.506176948547363, "rewards/rejected": -12.295011520385742, "step": 5610 }, { "epoch": 0.34, "learning_rate": 4.204215563483833e-06, "logits/chosen": -2.956695079803467, "logits/rejected": -2.8684439659118652, "logps/chosen": -57.94805145263672, "logps/rejected": -1150.477783203125, "loss": 0.0352, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.0009952023392543197, "rewards/margins": 11.123412132263184, "rewards/rejected": -11.124406814575195, "step": 5620 }, { "epoch": 0.34, "learning_rate": 4.200404597095754e-06, "logits/chosen": -2.9468894004821777, "logits/rejected": -2.824767589569092, "logps/chosen": -81.07737731933594, "logps/rejected": -1196.653564453125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.13358251750469208, "rewards/margins": 11.459383964538574, "rewards/rejected": -11.592967987060547, "step": 5630 }, { "epoch": 0.34, "learning_rate": 4.196586263537277e-06, "logits/chosen": -2.9309632778167725, "logits/rejected": -2.829756259918213, "logps/chosen": -43.06009292602539, "logps/rejected": -1152.0164794921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2564636766910553, "rewards/margins": 11.392030715942383, "rewards/rejected": -11.135568618774414, "step": 5640 }, { "epoch": 0.34, "learning_rate": 4.192760579351708e-06, "logits/chosen": -2.965984344482422, "logits/rejected": -2.8314125537872314, "logps/chosen": -36.55514144897461, "logps/rejected": -1098.070556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2599940896034241, "rewards/margins": 10.855396270751953, "rewards/rejected": -10.595402717590332, "step": 5650 }, { "epoch": 0.34, "learning_rate": 4.188927561114201e-06, "logits/chosen": -2.9534497261047363, "logits/rejected": -2.831744432449341, "logps/chosen": -36.046146392822266, "logps/rejected": -1129.1776123046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2772894501686096, "rewards/margins": 11.19037914276123, "rewards/rejected": -10.913087844848633, "step": 5660 }, { "epoch": 0.34, "learning_rate": 4.185087225431686e-06, "logits/chosen": -2.9626553058624268, "logits/rejected": -2.8671679496765137, "logps/chosen": -33.96709060668945, "logps/rejected": -1149.2548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27136075496673584, "rewards/margins": 11.384600639343262, "rewards/rejected": -11.113241195678711, "step": 5670 }, { "epoch": 0.34, "learning_rate": 4.181239588942793e-06, "logits/chosen": -2.982642412185669, "logits/rejected": -2.846287488937378, "logps/chosen": -45.982566833496094, "logps/rejected": -1157.0694580078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2531251311302185, "rewards/margins": 11.449549674987793, "rewards/rejected": -11.196425437927246, "step": 5680 }, { "epoch": 0.34, "learning_rate": 4.177384668317788e-06, "logits/chosen": -2.971146821975708, "logits/rejected": -2.8680100440979004, "logps/chosen": -34.74654769897461, "logps/rejected": -1125.4078369140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.264121949672699, "rewards/margins": 11.151817321777344, "rewards/rejected": -10.8876953125, "step": 5690 }, { "epoch": 0.34, "learning_rate": 4.173522480258494e-06, "logits/chosen": -2.9120564460754395, "logits/rejected": -2.8375182151794434, "logps/chosen": -37.7298698425293, "logps/rejected": -1116.786865234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.26953521370887756, "rewards/margins": 11.064477920532227, "rewards/rejected": -10.794941902160645, "step": 5700 }, { "epoch": 0.34, "learning_rate": 4.1696530414982225e-06, "logits/chosen": -2.9632601737976074, "logits/rejected": -2.8655548095703125, "logps/chosen": -40.98187255859375, "logps/rejected": -1154.717041015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27522343397140503, "rewards/margins": 11.437749862670898, "rewards/rejected": -11.162527084350586, "step": 5710 }, { "epoch": 0.34, "learning_rate": 4.165776368801695e-06, "logits/chosen": -2.9524037837982178, "logits/rejected": -2.8494913578033447, "logps/chosen": -32.93456268310547, "logps/rejected": -1164.995849609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.26844412088394165, "rewards/margins": 11.542767524719238, "rewards/rejected": -11.274324417114258, "step": 5720 }, { "epoch": 0.34, "learning_rate": 4.16189247896498e-06, "logits/chosen": -2.952301263809204, "logits/rejected": -2.852144718170166, "logps/chosen": -37.571495056152344, "logps/rejected": -1146.27880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2774450182914734, "rewards/margins": 11.363956451416016, "rewards/rejected": -11.08651065826416, "step": 5730 }, { "epoch": 0.34, "learning_rate": 4.1580013888154126e-06, "logits/chosen": -2.941039562225342, "logits/rejected": -2.814276933670044, "logps/chosen": -35.799598693847656, "logps/rejected": -1142.436279296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.26437145471572876, "rewards/margins": 11.318870544433594, "rewards/rejected": -11.0545015335083, "step": 5740 }, { "epoch": 0.34, "learning_rate": 4.154103115211523e-06, "logits/chosen": -2.9374048709869385, "logits/rejected": -2.829129934310913, "logps/chosen": -33.37335968017578, "logps/rejected": -1121.00634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26429784297943115, "rewards/margins": 11.10743522644043, "rewards/rejected": -10.843137741088867, "step": 5750 }, { "epoch": 0.34, "learning_rate": 4.150197675042966e-06, "logits/chosen": -2.966052293777466, "logits/rejected": -2.893841028213501, "logps/chosen": -33.51409149169922, "logps/rejected": -1174.79638671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.27863049507141113, "rewards/margins": 11.634002685546875, "rewards/rejected": -11.355371475219727, "step": 5760 }, { "epoch": 0.34, "learning_rate": 4.146285085230447e-06, "logits/chosen": -2.931187868118286, "logits/rejected": -2.8616998195648193, "logps/chosen": -38.56104278564453, "logps/rejected": -1137.2467041015625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.28891366720199585, "rewards/margins": 11.29463005065918, "rewards/rejected": -11.005716323852539, "step": 5770 }, { "epoch": 0.34, "learning_rate": 4.1423653627256445e-06, "logits/chosen": -2.971792459487915, "logits/rejected": -2.8718762397766113, "logps/chosen": -34.804161071777344, "logps/rejected": -1158.776611328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24224014580249786, "rewards/margins": 11.449408531188965, "rewards/rejected": -11.207168579101562, "step": 5780 }, { "epoch": 0.35, "learning_rate": 4.138438524511145e-06, "logits/chosen": -2.960482597351074, "logits/rejected": -2.854236125946045, "logps/chosen": -36.27681350708008, "logps/rejected": -1199.9127197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2576017379760742, "rewards/margins": 11.875042915344238, "rewards/rejected": -11.617439270019531, "step": 5790 }, { "epoch": 0.35, "learning_rate": 4.134504587600359e-06, "logits/chosen": -2.931502103805542, "logits/rejected": -2.8224070072174072, "logps/chosen": -38.95409393310547, "logps/rejected": -1146.715087890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25337329506874084, "rewards/margins": 11.332015037536621, "rewards/rejected": -11.078641891479492, "step": 5800 }, { "epoch": 0.35, "learning_rate": 4.130563569037458e-06, "logits/chosen": -2.9682419300079346, "logits/rejected": -2.861370325088501, "logps/chosen": -52.2618293762207, "logps/rejected": -1166.738525390625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.173188716173172, "rewards/margins": 11.461832046508789, "rewards/rejected": -11.288643836975098, "step": 5810 }, { "epoch": 0.35, "learning_rate": 4.126615485897292e-06, "logits/chosen": -2.9382388591766357, "logits/rejected": -2.855658769607544, "logps/chosen": -51.48486328125, "logps/rejected": -1268.4949951171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.09962350130081177, "rewards/margins": 12.404073715209961, "rewards/rejected": -12.304449081420898, "step": 5820 }, { "epoch": 0.35, "learning_rate": 4.12266035528532e-06, "logits/chosen": -2.9454970359802246, "logits/rejected": -2.835984706878662, "logps/chosen": -88.15637969970703, "logps/rejected": -1233.4547119140625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.22888760268688202, "rewards/margins": 11.723175048828125, "rewards/rejected": -11.952062606811523, "step": 5830 }, { "epoch": 0.35, "learning_rate": 4.118698194337536e-06, "logits/chosen": -2.9588065147399902, "logits/rejected": -2.8312747478485107, "logps/chosen": -141.01443481445312, "logps/rejected": -1292.294189453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7853646278381348, "rewards/margins": 11.754876136779785, "rewards/rejected": -12.540239334106445, "step": 5840 }, { "epoch": 0.35, "learning_rate": 4.114729020220392e-06, "logits/chosen": -2.961221218109131, "logits/rejected": -2.819941282272339, "logps/chosen": -85.80769348144531, "logps/rejected": -1177.177490234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.2032221257686615, "rewards/margins": 11.196673393249512, "rewards/rejected": -11.399895668029785, "step": 5850 }, { "epoch": 0.35, "learning_rate": 4.110752850130724e-06, "logits/chosen": -2.9745564460754395, "logits/rejected": -2.8104653358459473, "logps/chosen": -71.0392837524414, "logps/rejected": -1307.9666748046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.00993295293301344, "rewards/margins": 12.701044082641602, "rewards/rejected": -12.710977554321289, "step": 5860 }, { "epoch": 0.35, "learning_rate": 4.106769701295683e-06, "logits/chosen": -2.9861247539520264, "logits/rejected": -2.8488729000091553, "logps/chosen": -53.94163131713867, "logps/rejected": -1282.3289794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.07812146097421646, "rewards/margins": 12.520390510559082, "rewards/rejected": -12.442266464233398, "step": 5870 }, { "epoch": 0.35, "learning_rate": 4.102779590972652e-06, "logits/chosen": -2.9439337253570557, "logits/rejected": -2.8084330558776855, "logps/chosen": -52.471031188964844, "logps/rejected": -1257.429931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.16071149706840515, "rewards/margins": 12.356766700744629, "rewards/rejected": -12.19605541229248, "step": 5880 }, { "epoch": 0.35, "learning_rate": 4.098782536449179e-06, "logits/chosen": -2.947552442550659, "logits/rejected": -2.8340811729431152, "logps/chosen": -55.031700134277344, "logps/rejected": -1236.2286376953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.0742129236459732, "rewards/margins": 12.049055099487305, "rewards/rejected": -11.97484302520752, "step": 5890 }, { "epoch": 0.35, "learning_rate": 4.094778555042893e-06, "logits/chosen": -2.9471631050109863, "logits/rejected": -2.8495709896087646, "logps/chosen": -53.44951248168945, "logps/rejected": -1179.4134521484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.11755702644586563, "rewards/margins": 11.541104316711426, "rewards/rejected": -11.42354679107666, "step": 5900 }, { "epoch": 0.35, "learning_rate": 4.090767664101442e-06, "logits/chosen": -2.9276411533355713, "logits/rejected": -2.8484439849853516, "logps/chosen": -40.390480041503906, "logps/rejected": -1182.739013671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.24742570519447327, "rewards/margins": 11.69420337677002, "rewards/rejected": -11.44677734375, "step": 5910 }, { "epoch": 0.35, "learning_rate": 4.086749881002403e-06, "logits/chosen": -2.979123592376709, "logits/rejected": -2.843557834625244, "logps/chosen": -48.88060760498047, "logps/rejected": -1256.8289794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1362086832523346, "rewards/margins": 12.33609390258789, "rewards/rejected": -12.1998872756958, "step": 5920 }, { "epoch": 0.35, "learning_rate": 4.0827252231532185e-06, "logits/chosen": -3.0027053356170654, "logits/rejected": -2.880157947540283, "logps/chosen": -37.320594787597656, "logps/rejected": -1218.24609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25991564989089966, "rewards/margins": 12.075892448425293, "rewards/rejected": -11.8159761428833, "step": 5930 }, { "epoch": 0.35, "learning_rate": 4.078693707991115e-06, "logits/chosen": -2.9616711139678955, "logits/rejected": -2.8583643436431885, "logps/chosen": -38.60324478149414, "logps/rejected": -1243.7193603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2471843957901001, "rewards/margins": 12.304712295532227, "rewards/rejected": -12.057527542114258, "step": 5940 }, { "epoch": 0.35, "learning_rate": 4.0746553529830274e-06, "logits/chosen": -2.999906301498413, "logits/rejected": -2.872551441192627, "logps/chosen": -43.17071533203125, "logps/rejected": -1177.928466796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.238895982503891, "rewards/margins": 11.638501167297363, "rewards/rejected": -11.399603843688965, "step": 5950 }, { "epoch": 0.36, "learning_rate": 4.070610175625528e-06, "logits/chosen": -3.0004849433898926, "logits/rejected": -2.8950695991516113, "logps/chosen": -39.673946380615234, "logps/rejected": -1296.5655517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2627701759338379, "rewards/margins": 12.85399055480957, "rewards/rejected": -12.591218948364258, "step": 5960 }, { "epoch": 0.36, "learning_rate": 4.066558193444746e-06, "logits/chosen": -2.955176830291748, "logits/rejected": -2.8709235191345215, "logps/chosen": -35.90766143798828, "logps/rejected": -1250.343994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22466786205768585, "rewards/margins": 12.359811782836914, "rewards/rejected": -12.135144233703613, "step": 5970 }, { "epoch": 0.36, "learning_rate": 4.0624994239962935e-06, "logits/chosen": -2.946345329284668, "logits/rejected": -2.8499655723571777, "logps/chosen": -52.00640869140625, "logps/rejected": -1190.816650390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2263215035200119, "rewards/margins": 11.759523391723633, "rewards/rejected": -11.533201217651367, "step": 5980 }, { "epoch": 0.36, "learning_rate": 4.058433884865188e-06, "logits/chosen": -2.9856996536254883, "logits/rejected": -2.8987536430358887, "logps/chosen": -37.55769348144531, "logps/rejected": -1285.5849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24370077252388, "rewards/margins": 12.718389511108398, "rewards/rejected": -12.474689483642578, "step": 5990 }, { "epoch": 0.36, "learning_rate": 4.0543615936657785e-06, "logits/chosen": -2.936974048614502, "logits/rejected": -2.792173147201538, "logps/chosen": -48.796142578125, "logps/rejected": -1237.42431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24671487510204315, "rewards/margins": 12.251456260681152, "rewards/rejected": -12.004741668701172, "step": 6000 }, { "epoch": 0.36, "learning_rate": 4.050282568041668e-06, "logits/chosen": -2.9726555347442627, "logits/rejected": -2.8348541259765625, "logps/chosen": -40.503257751464844, "logps/rejected": -1157.927001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2502528131008148, "rewards/margins": 11.45900821685791, "rewards/rejected": -11.208754539489746, "step": 6010 }, { "epoch": 0.36, "learning_rate": 4.046196825665638e-06, "logits/chosen": -2.951873779296875, "logits/rejected": -2.8690340518951416, "logps/chosen": -34.05118179321289, "logps/rejected": -1247.45703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2351098358631134, "rewards/margins": 12.333869934082031, "rewards/rejected": -12.098759651184082, "step": 6020 }, { "epoch": 0.36, "learning_rate": 4.042104384239568e-06, "logits/chosen": -2.9804561138153076, "logits/rejected": -2.8583693504333496, "logps/chosen": -41.965599060058594, "logps/rejected": -1268.069091796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21741525828838348, "rewards/margins": 12.522712707519531, "rewards/rejected": -12.305298805236816, "step": 6030 }, { "epoch": 0.36, "learning_rate": 4.038005261494364e-06, "logits/chosen": -2.996720790863037, "logits/rejected": -2.8846709728240967, "logps/chosen": -39.36211013793945, "logps/rejected": -1218.1517333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24033427238464355, "rewards/margins": 12.042052268981934, "rewards/rejected": -11.801718711853027, "step": 6040 }, { "epoch": 0.36, "learning_rate": 4.033899475189877e-06, "logits/chosen": -2.971318006515503, "logits/rejected": -2.8614773750305176, "logps/chosen": -37.536712646484375, "logps/rejected": -1228.6805419921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24298326671123505, "rewards/margins": 12.150762557983398, "rewards/rejected": -11.907779693603516, "step": 6050 }, { "epoch": 0.36, "learning_rate": 4.029787043114835e-06, "logits/chosen": -2.970419406890869, "logits/rejected": -2.8537583351135254, "logps/chosen": -42.82847213745117, "logps/rejected": -1216.8851318359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2252771556377411, "rewards/margins": 12.022607803344727, "rewards/rejected": -11.797331809997559, "step": 6060 }, { "epoch": 0.36, "learning_rate": 4.025667983086753e-06, "logits/chosen": -2.975149631500244, "logits/rejected": -2.873218536376953, "logps/chosen": -51.729827880859375, "logps/rejected": -1260.8369140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.12390215694904327, "rewards/margins": 12.348201751708984, "rewards/rejected": -12.224298477172852, "step": 6070 }, { "epoch": 0.36, "learning_rate": 4.021542312951862e-06, "logits/chosen": -2.995450735092163, "logits/rejected": -2.8956902027130127, "logps/chosen": -49.945247650146484, "logps/rejected": -1226.35595703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2067936658859253, "rewards/margins": 12.108040809631348, "rewards/rejected": -11.901247024536133, "step": 6080 }, { "epoch": 0.36, "learning_rate": 4.017410050585038e-06, "logits/chosen": -2.961235523223877, "logits/rejected": -2.869438648223877, "logps/chosen": -46.453426361083984, "logps/rejected": -1217.749267578125, "loss": 0.041, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.14411117136478424, "rewards/margins": 11.954954147338867, "rewards/rejected": -11.810842514038086, "step": 6090 }, { "epoch": 0.36, "learning_rate": 4.013271213889712e-06, "logits/chosen": -2.9681637287139893, "logits/rejected": -2.866706609725952, "logps/chosen": -42.829315185546875, "logps/rejected": -1188.4849853515625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.24817070364952087, "rewards/margins": 11.764603614807129, "rewards/rejected": -11.516432762145996, "step": 6100 }, { "epoch": 0.36, "learning_rate": 4.009125820797802e-06, "logits/chosen": -2.9840054512023926, "logits/rejected": -2.9019675254821777, "logps/chosen": -37.75501251220703, "logps/rejected": -1162.066162109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2783030867576599, "rewards/margins": 11.508886337280273, "rewards/rejected": -11.230584144592285, "step": 6110 }, { "epoch": 0.36, "learning_rate": 4.0049738892696345e-06, "logits/chosen": -2.9408745765686035, "logits/rejected": -2.8598902225494385, "logps/chosen": -37.966434478759766, "logps/rejected": -1176.622314453125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.23577305674552917, "rewards/margins": 11.631083488464355, "rewards/rejected": -11.39531135559082, "step": 6120 }, { "epoch": 0.37, "learning_rate": 4.000815437293858e-06, "logits/chosen": -2.9253220558166504, "logits/rejected": -2.8436031341552734, "logps/chosen": -41.48207092285156, "logps/rejected": -1167.5062255859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2620571255683899, "rewards/margins": 11.563077926635742, "rewards/rejected": -11.30102252960205, "step": 6130 }, { "epoch": 0.37, "learning_rate": 3.996650482887377e-06, "logits/chosen": -3.0145339965820312, "logits/rejected": -2.9311721324920654, "logps/chosen": -39.73225402832031, "logps/rejected": -1150.7486572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2725737392902374, "rewards/margins": 11.41012191772461, "rewards/rejected": -11.137547492980957, "step": 6140 }, { "epoch": 0.37, "learning_rate": 3.992479044095267e-06, "logits/chosen": -2.976961612701416, "logits/rejected": -2.8790996074676514, "logps/chosen": -49.763492584228516, "logps/rejected": -1101.8179931640625, "loss": 0.0481, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.12242066860198975, "rewards/margins": 10.768552780151367, "rewards/rejected": -10.64613151550293, "step": 6150 }, { "epoch": 0.37, "learning_rate": 3.988301138990697e-06, "logits/chosen": -2.9676034450531006, "logits/rejected": -2.9143168926239014, "logps/chosen": -31.514598846435547, "logps/rejected": -1090.146240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.251849502325058, "rewards/margins": 10.773566246032715, "rewards/rejected": -10.521716117858887, "step": 6160 }, { "epoch": 0.37, "learning_rate": 3.984116785674852e-06, "logits/chosen": -2.976090431213379, "logits/rejected": -2.8775782585144043, "logps/chosen": -33.26485824584961, "logps/rejected": -1099.267578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.27277716994285583, "rewards/margins": 10.884699821472168, "rewards/rejected": -10.611923217773438, "step": 6170 }, { "epoch": 0.37, "learning_rate": 3.979926002276856e-06, "logits/chosen": -2.9747276306152344, "logits/rejected": -2.9039433002471924, "logps/chosen": -39.975135803222656, "logps/rejected": -1156.0147705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27796146273612976, "rewards/margins": 11.4613618850708, "rewards/rejected": -11.183399200439453, "step": 6180 }, { "epoch": 0.37, "learning_rate": 3.97572880695369e-06, "logits/chosen": -2.9873762130737305, "logits/rejected": -2.9010043144226074, "logps/chosen": -34.952484130859375, "logps/rejected": -1111.000732421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2731941342353821, "rewards/margins": 11.014963150024414, "rewards/rejected": -10.741769790649414, "step": 6190 }, { "epoch": 0.37, "learning_rate": 3.971525217890117e-06, "logits/chosen": -2.94822359085083, "logits/rejected": -2.87857723236084, "logps/chosen": -38.43194580078125, "logps/rejected": -1065.7220458984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2654396891593933, "rewards/margins": 10.53149700164795, "rewards/rejected": -10.266057968139648, "step": 6200 }, { "epoch": 0.37, "learning_rate": 3.967315253298599e-06, "logits/chosen": -2.959761381149292, "logits/rejected": -2.9023754596710205, "logps/chosen": -35.243568420410156, "logps/rejected": -1200.04052734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2790403962135315, "rewards/margins": 11.906234741210938, "rewards/rejected": -11.627195358276367, "step": 6210 }, { "epoch": 0.37, "learning_rate": 3.963098931419223e-06, "logits/chosen": -3.002694606781006, "logits/rejected": -2.8812077045440674, "logps/chosen": -43.97034454345703, "logps/rejected": -1125.1392822265625, "loss": 0.0263, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.18536365032196045, "rewards/margins": 11.06453800201416, "rewards/rejected": -10.879175186157227, "step": 6220 }, { "epoch": 0.37, "learning_rate": 3.958876270519619e-06, "logits/chosen": -2.936555862426758, "logits/rejected": -2.8906123638153076, "logps/chosen": -37.1700439453125, "logps/rejected": -1150.360107421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2765842378139496, "rewards/margins": 11.411191940307617, "rewards/rejected": -11.134607315063477, "step": 6230 }, { "epoch": 0.37, "learning_rate": 3.9546472888948825e-06, "logits/chosen": -3.005706310272217, "logits/rejected": -2.925377368927002, "logps/chosen": -37.501914978027344, "logps/rejected": -1169.1650390625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 0.25691691040992737, "rewards/margins": 11.57032585144043, "rewards/rejected": -11.313407897949219, "step": 6240 }, { "epoch": 0.37, "learning_rate": 3.950412004867491e-06, "logits/chosen": -2.9609224796295166, "logits/rejected": -2.834057331085205, "logps/chosen": -48.59664535522461, "logps/rejected": -1240.451416015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2148861140012741, "rewards/margins": 12.248612403869629, "rewards/rejected": -12.033724784851074, "step": 6250 }, { "epoch": 0.37, "learning_rate": 3.94617043678723e-06, "logits/chosen": -2.940373659133911, "logits/rejected": -2.8487985134124756, "logps/chosen": -51.54941940307617, "logps/rejected": -1182.2481689453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1099018007516861, "rewards/margins": 11.561564445495605, "rewards/rejected": -11.45166301727295, "step": 6260 }, { "epoch": 0.37, "learning_rate": 3.941922603031113e-06, "logits/chosen": -2.9776337146759033, "logits/rejected": -2.8563811779022217, "logps/chosen": -32.940940856933594, "logps/rejected": -1269.7301025390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.25012141466140747, "rewards/margins": 12.565393447875977, "rewards/rejected": -12.315271377563477, "step": 6270 }, { "epoch": 0.37, "learning_rate": 3.937668522003295e-06, "logits/chosen": -2.9969348907470703, "logits/rejected": -2.9038565158843994, "logps/chosen": -48.3404426574707, "logps/rejected": -1162.013427734375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 0.1645338237285614, "rewards/margins": 11.410345077514648, "rewards/rejected": -11.245810508728027, "step": 6280 }, { "epoch": 0.38, "learning_rate": 3.933408212135003e-06, "logits/chosen": -2.959956645965576, "logits/rejected": -2.8994650840759277, "logps/chosen": -31.950374603271484, "logps/rejected": -1216.8792724609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.28773874044418335, "rewards/margins": 12.085836410522461, "rewards/rejected": -11.798097610473633, "step": 6290 }, { "epoch": 0.38, "learning_rate": 3.929141691884448e-06, "logits/chosen": -2.995929479598999, "logits/rejected": -2.886451005935669, "logps/chosen": -35.68267822265625, "logps/rejected": -1279.7657470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2622206211090088, "rewards/margins": 12.684645652770996, "rewards/rejected": -12.42242431640625, "step": 6300 }, { "epoch": 0.38, "learning_rate": 3.9248689797367515e-06, "logits/chosen": -2.9518942832946777, "logits/rejected": -2.897639036178589, "logps/chosen": -37.871131896972656, "logps/rejected": -1194.183349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2649412751197815, "rewards/margins": 11.825479507446289, "rewards/rejected": -11.560537338256836, "step": 6310 }, { "epoch": 0.38, "learning_rate": 3.920590094203856e-06, "logits/chosen": -2.9817211627960205, "logits/rejected": -2.903563976287842, "logps/chosen": -38.10309982299805, "logps/rejected": -1206.3885498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.29698652029037476, "rewards/margins": 11.9892578125, "rewards/rejected": -11.692273139953613, "step": 6320 }, { "epoch": 0.38, "learning_rate": 3.916305053824458e-06, "logits/chosen": -2.9885334968566895, "logits/rejected": -2.881443500518799, "logps/chosen": -36.26619338989258, "logps/rejected": -1235.4827880859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2886439561843872, "rewards/margins": 12.257651329040527, "rewards/rejected": -11.96900749206543, "step": 6330 }, { "epoch": 0.38, "learning_rate": 3.912013877163916e-06, "logits/chosen": -2.9744632244110107, "logits/rejected": -2.858530282974243, "logps/chosen": -35.26032257080078, "logps/rejected": -1220.6048583984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2847423255443573, "rewards/margins": 12.11082649230957, "rewards/rejected": -11.82608413696289, "step": 6340 }, { "epoch": 0.38, "learning_rate": 3.907716582814175e-06, "logits/chosen": -2.9679882526397705, "logits/rejected": -2.890401840209961, "logps/chosen": -34.63629150390625, "logps/rejected": -1197.85498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2728061378002167, "rewards/margins": 11.88093376159668, "rewards/rejected": -11.60812759399414, "step": 6350 }, { "epoch": 0.38, "learning_rate": 3.903413189393687e-06, "logits/chosen": -2.961139678955078, "logits/rejected": -2.901623249053955, "logps/chosen": -34.88634490966797, "logps/rejected": -1259.202392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2700972557067871, "rewards/margins": 12.482666015625, "rewards/rejected": -12.212568283081055, "step": 6360 }, { "epoch": 0.38, "learning_rate": 3.899103715547325e-06, "logits/chosen": -2.98533296585083, "logits/rejected": -2.8932411670684814, "logps/chosen": -34.99639129638672, "logps/rejected": -1237.0377197265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.29349982738494873, "rewards/margins": 12.288232803344727, "rewards/rejected": -11.994732856750488, "step": 6370 }, { "epoch": 0.38, "learning_rate": 3.894788179946313e-06, "logits/chosen": -2.967682361602783, "logits/rejected": -2.8516623973846436, "logps/chosen": -46.9188117980957, "logps/rejected": -1194.538330078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20385102927684784, "rewards/margins": 11.775546073913574, "rewards/rejected": -11.571695327758789, "step": 6380 }, { "epoch": 0.38, "learning_rate": 3.890466601288131e-06, "logits/chosen": -2.9750328063964844, "logits/rejected": -2.874901294708252, "logps/chosen": -31.782339096069336, "logps/rejected": -1217.439208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26016831398010254, "rewards/margins": 12.04824447631836, "rewards/rejected": -11.78807544708252, "step": 6390 }, { "epoch": 0.38, "learning_rate": 3.886138998296446e-06, "logits/chosen": -2.9724738597869873, "logits/rejected": -2.871464967727661, "logps/chosen": -33.0458869934082, "logps/rejected": -1213.8704833984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2736976146697998, "rewards/margins": 12.041886329650879, "rewards/rejected": -11.768189430236816, "step": 6400 }, { "epoch": 0.38, "learning_rate": 3.881805389721021e-06, "logits/chosen": -2.9624316692352295, "logits/rejected": -2.8803231716156006, "logps/chosen": -37.68369674682617, "logps/rejected": -1234.304443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27510881423950195, "rewards/margins": 12.234437942504883, "rewards/rejected": -11.959330558776855, "step": 6410 }, { "epoch": 0.38, "learning_rate": 3.877465794337648e-06, "logits/chosen": -2.976207733154297, "logits/rejected": -2.908445358276367, "logps/chosen": -35.881195068359375, "logps/rejected": -1195.8072509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.28505417704582214, "rewards/margins": 11.869986534118652, "rewards/rejected": -11.584933280944824, "step": 6420 }, { "epoch": 0.38, "learning_rate": 3.873120230948045e-06, "logits/chosen": -2.9825499057769775, "logits/rejected": -2.8971526622772217, "logps/chosen": -32.22164535522461, "logps/rejected": -1239.249755859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2743077278137207, "rewards/margins": 12.279618263244629, "rewards/rejected": -12.005309104919434, "step": 6430 }, { "epoch": 0.38, "learning_rate": 3.868768718379798e-06, "logits/chosen": -2.9811923503875732, "logits/rejected": -2.8814303874969482, "logps/chosen": -35.63703155517578, "logps/rejected": -1231.9984130859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2515020966529846, "rewards/margins": 12.18869400024414, "rewards/rejected": -11.937192916870117, "step": 6440 }, { "epoch": 0.38, "learning_rate": 3.8644112754862614e-06, "logits/chosen": -3.014219045639038, "logits/rejected": -2.894878625869751, "logps/chosen": -38.526817321777344, "logps/rejected": -1227.487548828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.26937729120254517, "rewards/margins": 12.150225639343262, "rewards/rejected": -11.88084888458252, "step": 6450 }, { "epoch": 0.39, "learning_rate": 3.860047921146487e-06, "logits/chosen": -2.9796042442321777, "logits/rejected": -2.868691921234131, "logps/chosen": -48.74736785888672, "logps/rejected": -1196.425537109375, "loss": 0.0213, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.1612693965435028, "rewards/margins": 11.764142990112305, "rewards/rejected": -11.602873802185059, "step": 6460 }, { "epoch": 0.39, "learning_rate": 3.855678674265136e-06, "logits/chosen": -2.965853214263916, "logits/rejected": -2.874544620513916, "logps/chosen": -35.79082489013672, "logps/rejected": -1167.725341796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2831656038761139, "rewards/margins": 11.58508014678955, "rewards/rejected": -11.301915168762207, "step": 6470 }, { "epoch": 0.39, "learning_rate": 3.851303553772402e-06, "logits/chosen": -2.9961695671081543, "logits/rejected": -2.8981900215148926, "logps/chosen": -32.54561996459961, "logps/rejected": -1183.7755126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27932971715927124, "rewards/margins": 11.7438325881958, "rewards/rejected": -11.464502334594727, "step": 6480 }, { "epoch": 0.39, "learning_rate": 3.846922578623924e-06, "logits/chosen": -2.9879133701324463, "logits/rejected": -2.8889293670654297, "logps/chosen": -38.4987678527832, "logps/rejected": -1235.765380859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.29408618807792664, "rewards/margins": 12.276421546936035, "rewards/rejected": -11.982336044311523, "step": 6490 }, { "epoch": 0.39, "learning_rate": 3.84253576780071e-06, "logits/chosen": -2.980134963989258, "logits/rejected": -2.9010727405548096, "logps/chosen": -53.19743728637695, "logps/rejected": -1172.748291015625, "loss": 0.0716, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.10201660543680191, "rewards/margins": 11.460982322692871, "rewards/rejected": -11.358965873718262, "step": 6500 }, { "epoch": 0.39, "learning_rate": 3.83814314030905e-06, "logits/chosen": -2.9797983169555664, "logits/rejected": -2.8730082511901855, "logps/chosen": -34.15122604370117, "logps/rejected": -1195.209228515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.27806130051612854, "rewards/margins": 11.846278190612793, "rewards/rejected": -11.568216323852539, "step": 6510 }, { "epoch": 0.39, "learning_rate": 3.833744715180433e-06, "logits/chosen": -2.9937429428100586, "logits/rejected": -2.9062461853027344, "logps/chosen": -49.65286636352539, "logps/rejected": -1144.61865234375, "loss": 0.0917, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.12974250316619873, "rewards/margins": 11.21839427947998, "rewards/rejected": -11.088653564453125, "step": 6520 }, { "epoch": 0.39, "learning_rate": 3.829340511471471e-06, "logits/chosen": -2.9602580070495605, "logits/rejected": -2.9142675399780273, "logps/chosen": -38.744564056396484, "logps/rejected": -1097.558837890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2749900817871094, "rewards/margins": 10.884711265563965, "rewards/rejected": -10.609720230102539, "step": 6530 }, { "epoch": 0.39, "learning_rate": 3.824930548263811e-06, "logits/chosen": -2.9947903156280518, "logits/rejected": -2.903127431869507, "logps/chosen": -33.646820068359375, "logps/rejected": -1154.123291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.3004276752471924, "rewards/margins": 11.464394569396973, "rewards/rejected": -11.16396713256836, "step": 6540 }, { "epoch": 0.39, "learning_rate": 3.82051484466405e-06, "logits/chosen": -2.9982190132141113, "logits/rejected": -2.887406826019287, "logps/chosen": -38.3655891418457, "logps/rejected": -1100.351318359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2587660253047943, "rewards/margins": 10.873112678527832, "rewards/rejected": -10.61434555053711, "step": 6550 }, { "epoch": 0.39, "learning_rate": 3.816093419803663e-06, "logits/chosen": -2.972909927368164, "logits/rejected": -2.8952138423919678, "logps/chosen": -37.18119430541992, "logps/rejected": -1094.8421630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2926306426525116, "rewards/margins": 10.856956481933594, "rewards/rejected": -10.564325332641602, "step": 6560 }, { "epoch": 0.39, "learning_rate": 3.811666292838905e-06, "logits/chosen": -2.9745960235595703, "logits/rejected": -2.899721384048462, "logps/chosen": -40.044185638427734, "logps/rejected": -1104.3577880859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2740420699119568, "rewards/margins": 10.945684432983398, "rewards/rejected": -10.67164134979248, "step": 6570 }, { "epoch": 0.39, "learning_rate": 3.8072334829507414e-06, "logits/chosen": -2.969742774963379, "logits/rejected": -2.9063210487365723, "logps/chosen": -44.7501220703125, "logps/rejected": -1087.185791015625, "loss": 0.0352, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.16024665534496307, "rewards/margins": 10.660860061645508, "rewards/rejected": -10.500612258911133, "step": 6580 }, { "epoch": 0.39, "learning_rate": 3.802795009344757e-06, "logits/chosen": -2.961656093597412, "logits/rejected": -2.8982510566711426, "logps/chosen": -35.548500061035156, "logps/rejected": -1080.971435546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2830962538719177, "rewards/margins": 10.715140342712402, "rewards/rejected": -10.432044982910156, "step": 6590 }, { "epoch": 0.39, "learning_rate": 3.798350891251076e-06, "logits/chosen": -2.960141897201538, "logits/rejected": -2.886611223220825, "logps/chosen": -32.94182205200195, "logps/rejected": -1146.6732177734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.27426815032958984, "rewards/margins": 11.37125015258789, "rewards/rejected": -11.0969820022583, "step": 6600 }, { "epoch": 0.39, "learning_rate": 3.7939011479242784e-06, "logits/chosen": -2.983370542526245, "logits/rejected": -2.900102138519287, "logps/chosen": -34.16075897216797, "logps/rejected": -1075.8597412109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.28570055961608887, "rewards/margins": 10.672664642333984, "rewards/rejected": -10.386964797973633, "step": 6610 }, { "epoch": 0.39, "learning_rate": 3.7894457986433143e-06, "logits/chosen": -2.9408962726593018, "logits/rejected": -2.847820997238159, "logps/chosen": -35.116336822509766, "logps/rejected": -1057.8084716796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2652117609977722, "rewards/margins": 10.449121475219727, "rewards/rejected": -10.18390941619873, "step": 6620 }, { "epoch": 0.4, "learning_rate": 3.7849848627114248e-06, "logits/chosen": -2.972806215286255, "logits/rejected": -2.8824875354766846, "logps/chosen": -34.461856842041016, "logps/rejected": -1130.7637939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28763484954833984, "rewards/margins": 11.22070598602295, "rewards/rejected": -10.933072090148926, "step": 6630 }, { "epoch": 0.4, "learning_rate": 3.7805183594560525e-06, "logits/chosen": -2.9932210445404053, "logits/rejected": -2.9221127033233643, "logps/chosen": -35.80140686035156, "logps/rejected": -1081.6563720703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.27717748284339905, "rewards/margins": 10.721138000488281, "rewards/rejected": -10.443960189819336, "step": 6640 }, { "epoch": 0.4, "learning_rate": 3.7760463082287647e-06, "logits/chosen": -2.9646878242492676, "logits/rejected": -2.8751158714294434, "logps/chosen": -33.74504089355469, "logps/rejected": -1142.1353759765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2817414402961731, "rewards/margins": 11.32699203491211, "rewards/rejected": -11.045249938964844, "step": 6650 }, { "epoch": 0.4, "learning_rate": 3.7715687284051618e-06, "logits/chosen": -2.9667468070983887, "logits/rejected": -2.8913238048553467, "logps/chosen": -35.123435974121094, "logps/rejected": -1138.9931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.28982651233673096, "rewards/margins": 11.300909042358398, "rewards/rejected": -11.011083602905273, "step": 6660 }, { "epoch": 0.4, "learning_rate": 3.7670856393848e-06, "logits/chosen": -2.974761962890625, "logits/rejected": -2.8755383491516113, "logps/chosen": -36.31507873535156, "logps/rejected": -1101.4864501953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2783536911010742, "rewards/margins": 10.912192344665527, "rewards/rejected": -10.633838653564453, "step": 6670 }, { "epoch": 0.4, "learning_rate": 3.7625970605911038e-06, "logits/chosen": -2.952380418777466, "logits/rejected": -2.8652398586273193, "logps/chosen": -34.799278259277344, "logps/rejected": -1088.0108642578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2711164951324463, "rewards/margins": 10.782071113586426, "rewards/rejected": -10.510954856872559, "step": 6680 }, { "epoch": 0.4, "learning_rate": 3.7581030114712837e-06, "logits/chosen": -2.977484941482544, "logits/rejected": -2.89845609664917, "logps/chosen": -39.17837905883789, "logps/rejected": -1096.477783203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2607581615447998, "rewards/margins": 10.842626571655273, "rewards/rejected": -10.581869125366211, "step": 6690 }, { "epoch": 0.4, "learning_rate": 3.75360351149625e-06, "logits/chosen": -2.9993343353271484, "logits/rejected": -2.9079127311706543, "logps/chosen": -34.76203155517578, "logps/rejected": -1137.75, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28455179929733276, "rewards/margins": 11.292159080505371, "rewards/rejected": -11.007607460021973, "step": 6700 }, { "epoch": 0.4, "learning_rate": 3.7490985801605303e-06, "logits/chosen": -2.9794936180114746, "logits/rejected": -2.885404586791992, "logps/chosen": -37.283721923828125, "logps/rejected": -1149.0013427734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.27431944012641907, "rewards/margins": 11.403681755065918, "rewards/rejected": -11.129362106323242, "step": 6710 }, { "epoch": 0.4, "learning_rate": 3.744588236982181e-06, "logits/chosen": -2.980041742324829, "logits/rejected": -2.9042110443115234, "logps/chosen": -34.27482604980469, "logps/rejected": -1146.787353515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.28742271661758423, "rewards/margins": 11.37596321105957, "rewards/rejected": -11.08853816986084, "step": 6720 }, { "epoch": 0.4, "learning_rate": 3.7400725015027107e-06, "logits/chosen": -2.963160991668701, "logits/rejected": -2.9137561321258545, "logps/chosen": -36.699764251708984, "logps/rejected": -1157.8375244140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2906799912452698, "rewards/margins": 11.47517204284668, "rewards/rejected": -11.184491157531738, "step": 6730 }, { "epoch": 0.4, "learning_rate": 3.7355513932869862e-06, "logits/chosen": -3.001856565475464, "logits/rejected": -2.9224562644958496, "logps/chosen": -38.12511444091797, "logps/rejected": -1127.7803955078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2869042754173279, "rewards/margins": 11.189265251159668, "rewards/rejected": -10.902360916137695, "step": 6740 }, { "epoch": 0.4, "learning_rate": 3.7310249319231552e-06, "logits/chosen": -2.974370241165161, "logits/rejected": -2.8968796730041504, "logps/chosen": -37.50678253173828, "logps/rejected": -1151.9293212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2720983028411865, "rewards/margins": 11.417986869812012, "rewards/rejected": -11.145889282226562, "step": 6750 }, { "epoch": 0.4, "learning_rate": 3.726493137022557e-06, "logits/chosen": -2.957367420196533, "logits/rejected": -2.9000725746154785, "logps/chosen": -38.16182327270508, "logps/rejected": -1126.245361328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.29080480337142944, "rewards/margins": 11.177884101867676, "rewards/rejected": -10.887079238891602, "step": 6760 }, { "epoch": 0.4, "learning_rate": 3.7219560282196397e-06, "logits/chosen": -2.9838271141052246, "logits/rejected": -2.8711767196655273, "logps/chosen": -39.154457092285156, "logps/rejected": -1091.0650634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26341989636421204, "rewards/margins": 10.790274620056152, "rewards/rejected": -10.526854515075684, "step": 6770 }, { "epoch": 0.4, "learning_rate": 3.7174136251718735e-06, "logits/chosen": -2.9563753604888916, "logits/rejected": -2.889951467514038, "logps/chosen": -37.14928436279297, "logps/rejected": -1136.7244873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28702259063720703, "rewards/margins": 11.270711898803711, "rewards/rejected": -10.983689308166504, "step": 6780 }, { "epoch": 0.4, "learning_rate": 3.712865947559667e-06, "logits/chosen": -2.968411922454834, "logits/rejected": -2.864734649658203, "logps/chosen": -39.658843994140625, "logps/rejected": -1204.1754150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.3024138808250427, "rewards/margins": 11.95676040649414, "rewards/rejected": -11.65434741973877, "step": 6790 }, { "epoch": 0.41, "learning_rate": 3.7083130150862835e-06, "logits/chosen": -3.0089824199676514, "logits/rejected": -2.9198708534240723, "logps/chosen": -34.938316345214844, "logps/rejected": -1202.4771728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.3057253062725067, "rewards/margins": 11.961941719055176, "rewards/rejected": -11.656216621398926, "step": 6800 }, { "epoch": 0.41, "learning_rate": 3.7037548474777484e-06, "logits/chosen": -3.0088279247283936, "logits/rejected": -2.921771287918091, "logps/chosen": -39.95829772949219, "logps/rejected": -1153.1646728515625, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": 0.2906303405761719, "rewards/margins": 11.436050415039062, "rewards/rejected": -11.145419120788574, "step": 6810 }, { "epoch": 0.41, "learning_rate": 3.6991914644827732e-06, "logits/chosen": -2.988388776779175, "logits/rejected": -2.891087532043457, "logps/chosen": -33.894691467285156, "logps/rejected": -1099.203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28703877329826355, "rewards/margins": 10.902120590209961, "rewards/rejected": -10.615079879760742, "step": 6820 }, { "epoch": 0.41, "learning_rate": 3.6946228858726642e-06, "logits/chosen": -2.956768035888672, "logits/rejected": -2.901477575302124, "logps/chosen": -34.089622497558594, "logps/rejected": -1121.078857421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.28013619780540466, "rewards/margins": 11.090435981750488, "rewards/rejected": -10.810300827026367, "step": 6830 }, { "epoch": 0.41, "learning_rate": 3.690049131441238e-06, "logits/chosen": -2.9673073291778564, "logits/rejected": -2.918999671936035, "logps/chosen": -34.78194808959961, "logps/rejected": -1175.3424072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2774762213230133, "rewards/margins": 11.65666675567627, "rewards/rejected": -11.379192352294922, "step": 6840 }, { "epoch": 0.41, "learning_rate": 3.6854702210047353e-06, "logits/chosen": -2.967132091522217, "logits/rejected": -2.8915319442749023, "logps/chosen": -37.98741912841797, "logps/rejected": -1127.545654296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.25527629256248474, "rewards/margins": 11.154945373535156, "rewards/rejected": -10.89966869354248, "step": 6850 }, { "epoch": 0.41, "learning_rate": 3.6808861744017386e-06, "logits/chosen": -2.9637064933776855, "logits/rejected": -2.8718793392181396, "logps/chosen": -35.42226791381836, "logps/rejected": -1227.718017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.28548818826675415, "rewards/margins": 12.188883781433105, "rewards/rejected": -11.90339469909668, "step": 6860 }, { "epoch": 0.41, "learning_rate": 3.6762970114930796e-06, "logits/chosen": -2.9644083976745605, "logits/rejected": -2.9113001823425293, "logps/chosen": -36.74219512939453, "logps/rejected": -1189.732421875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.2567044198513031, "rewards/margins": 11.776748657226562, "rewards/rejected": -11.52004337310791, "step": 6870 }, { "epoch": 0.41, "learning_rate": 3.6717027521617593e-06, "logits/chosen": -2.997196674346924, "logits/rejected": -2.866471767425537, "logps/chosen": -40.88426208496094, "logps/rejected": -1242.6767578125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.2849300503730774, "rewards/margins": 12.330923080444336, "rewards/rejected": -12.045992851257324, "step": 6880 }, { "epoch": 0.41, "learning_rate": 3.6671034163128594e-06, "logits/chosen": -2.9941563606262207, "logits/rejected": -2.8519928455352783, "logps/chosen": -56.3566780090332, "logps/rejected": -1236.5130615234375, "loss": 0.1185, "rewards/accuracies": 1.0, "rewards/chosen": 0.05815911293029785, "rewards/margins": 12.036954879760742, "rewards/rejected": -11.978796005249023, "step": 6890 }, { "epoch": 0.41, "learning_rate": 3.662499023873454e-06, "logits/chosen": -2.9506423473358154, "logits/rejected": -2.850269317626953, "logps/chosen": -67.83695983886719, "logps/rejected": -1132.992431640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.021381264552474022, "rewards/margins": 10.929180145263672, "rewards/rejected": -10.950563430786133, "step": 6900 }, { "epoch": 0.41, "learning_rate": 3.657889594792528e-06, "logits/chosen": -3.009965419769287, "logits/rejected": -2.858022928237915, "logps/chosen": -64.09721374511719, "logps/rejected": -1131.816650390625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.031236395239830017, "rewards/margins": 10.960638046264648, "rewards/rejected": -10.929403305053711, "step": 6910 }, { "epoch": 0.41, "learning_rate": 3.653275149040887e-06, "logits/chosen": -2.9643521308898926, "logits/rejected": -2.8706250190734863, "logps/chosen": -56.82476043701172, "logps/rejected": -1098.34765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.11411658674478531, "rewards/margins": 10.724761962890625, "rewards/rejected": -10.610645294189453, "step": 6920 }, { "epoch": 0.41, "learning_rate": 3.6486557066110694e-06, "logits/chosen": -2.9958789348602295, "logits/rejected": -2.889495372772217, "logps/chosen": -49.41600799560547, "logps/rejected": -1138.31103515625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.14257310330867767, "rewards/margins": 11.142919540405273, "rewards/rejected": -11.000348091125488, "step": 6930 }, { "epoch": 0.41, "learning_rate": 3.644031287517267e-06, "logits/chosen": -2.9711594581604004, "logits/rejected": -2.8422303199768066, "logps/chosen": -45.40135955810547, "logps/rejected": -1158.193603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.17631639540195465, "rewards/margins": 11.374528884887695, "rewards/rejected": -11.198213577270508, "step": 6940 }, { "epoch": 0.41, "learning_rate": 3.639401911795232e-06, "logits/chosen": -2.970655918121338, "logits/rejected": -2.8614859580993652, "logps/chosen": -42.27909469604492, "logps/rejected": -1190.83056640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2349916398525238, "rewards/margins": 11.76417064666748, "rewards/rejected": -11.529178619384766, "step": 6950 }, { "epoch": 0.42, "learning_rate": 3.6347675995021874e-06, "logits/chosen": -2.980012893676758, "logits/rejected": -2.860460042953491, "logps/chosen": -52.53969192504883, "logps/rejected": -1112.449462890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2013874351978302, "rewards/margins": 10.943784713745117, "rewards/rejected": -10.742396354675293, "step": 6960 }, { "epoch": 0.42, "learning_rate": 3.6301283707167495e-06, "logits/chosen": -2.983964443206787, "logits/rejected": -2.863053560256958, "logps/chosen": -55.17057418823242, "logps/rejected": -1179.18505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.17477022111415863, "rewards/margins": 11.588520050048828, "rewards/rejected": -11.413749694824219, "step": 6970 }, { "epoch": 0.42, "learning_rate": 3.6254842455388347e-06, "logits/chosen": -2.9535224437713623, "logits/rejected": -2.863802433013916, "logps/chosen": -39.318885803222656, "logps/rejected": -1196.2105712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.204986572265625, "rewards/margins": 11.7885103225708, "rewards/rejected": -11.58352279663086, "step": 6980 }, { "epoch": 0.42, "learning_rate": 3.6208352440895704e-06, "logits/chosen": -2.965789318084717, "logits/rejected": -2.85290265083313, "logps/chosen": -54.328460693359375, "logps/rejected": -1133.638427734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14126615226268768, "rewards/margins": 11.106844902038574, "rewards/rejected": -10.965578079223633, "step": 6990 }, { "epoch": 0.42, "learning_rate": 3.6161813865112155e-06, "logits/chosen": -2.962584972381592, "logits/rejected": -2.875392436981201, "logps/chosen": -48.909889221191406, "logps/rejected": -1118.4710693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19673006236553192, "rewards/margins": 11.004569053649902, "rewards/rejected": -10.807838439941406, "step": 7000 }, { "epoch": 0.42, "eval_logits/chosen": -2.9349584579467773, "eval_logits/rejected": -2.907924175262451, "eval_logps/chosen": -47.62433624267578, "eval_logps/rejected": -1167.173583984375, "eval_loss": 2.2104897652752697e-05, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.11744074523448944, "eval_rewards/margins": 11.406791687011719, "eval_rewards/rejected": -11.289350509643555, "eval_runtime": 4.2427, "eval_samples_per_second": 1.178, "eval_steps_per_second": 0.236, "step": 7000 }, { "epoch": 0.42, "learning_rate": 3.611522692967065e-06, "logits/chosen": -2.9643123149871826, "logits/rejected": -2.851972818374634, "logps/chosen": -44.091670989990234, "logps/rejected": -1108.5048828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21062318980693817, "rewards/margins": 10.927730560302734, "rewards/rejected": -10.717107772827148, "step": 7010 }, { "epoch": 0.42, "learning_rate": 3.6068591836413687e-06, "logits/chosen": -2.981865644454956, "logits/rejected": -2.8751792907714844, "logps/chosen": -51.3531494140625, "logps/rejected": -1152.352783203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.1015225425362587, "rewards/margins": 11.25377368927002, "rewards/rejected": -11.152252197265625, "step": 7020 }, { "epoch": 0.42, "learning_rate": 3.602190878739239e-06, "logits/chosen": -2.9626619815826416, "logits/rejected": -2.856879711151123, "logps/chosen": -56.2475471496582, "logps/rejected": -1226.1304931640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.06048056483268738, "rewards/margins": 11.932774543762207, "rewards/rejected": -11.872292518615723, "step": 7030 }, { "epoch": 0.42, "learning_rate": 3.5975177984865673e-06, "logits/chosen": -2.949892520904541, "logits/rejected": -2.83105731010437, "logps/chosen": -60.375885009765625, "logps/rejected": -1261.0311279296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.012570028193295002, "rewards/margins": 12.24005126953125, "rewards/rejected": -12.227479934692383, "step": 7040 }, { "epoch": 0.42, "learning_rate": 3.592839963129934e-06, "logits/chosen": -2.9456706047058105, "logits/rejected": -2.848299026489258, "logps/chosen": -57.69365310668945, "logps/rejected": -1186.158203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.04163042828440666, "rewards/margins": 11.549665451049805, "rewards/rejected": -11.508035659790039, "step": 7050 }, { "epoch": 0.42, "learning_rate": 3.588157392936521e-06, "logits/chosen": -2.9468867778778076, "logits/rejected": -2.8309714794158936, "logps/chosen": -51.45591354370117, "logps/rejected": -1230.8641357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.11274968087673187, "rewards/margins": 12.050989151000977, "rewards/rejected": -11.938237190246582, "step": 7060 }, { "epoch": 0.42, "learning_rate": 3.583470108194026e-06, "logits/chosen": -2.9572207927703857, "logits/rejected": -2.8621106147766113, "logps/chosen": -39.27837371826172, "logps/rejected": -1227.78271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21241426467895508, "rewards/margins": 12.105578422546387, "rewards/rejected": -11.893163681030273, "step": 7070 }, { "epoch": 0.42, "learning_rate": 3.5787781292105704e-06, "logits/chosen": -2.9747512340545654, "logits/rejected": -2.8858442306518555, "logps/chosen": -42.78657531738281, "logps/rejected": -1151.605712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1817934811115265, "rewards/margins": 11.31758975982666, "rewards/rejected": -11.135795593261719, "step": 7080 }, { "epoch": 0.42, "learning_rate": 3.5740814763146164e-06, "logits/chosen": -2.937091112136841, "logits/rejected": -2.835345506668091, "logps/chosen": -40.43604278564453, "logps/rejected": -1221.916259765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.19722725450992584, "rewards/margins": 12.041267395019531, "rewards/rejected": -11.844039916992188, "step": 7090 }, { "epoch": 0.42, "learning_rate": 3.569380169854875e-06, "logits/chosen": -3.009007215499878, "logits/rejected": -2.925165891647339, "logps/chosen": -39.536224365234375, "logps/rejected": -1229.6920166015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19350628554821014, "rewards/margins": 12.113792419433594, "rewards/rejected": -11.920286178588867, "step": 7100 }, { "epoch": 0.42, "learning_rate": 3.5646742302002185e-06, "logits/chosen": -2.9600539207458496, "logits/rejected": -2.8631184101104736, "logps/chosen": -41.846656799316406, "logps/rejected": -1282.306884765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2128446102142334, "rewards/margins": 12.658052444458008, "rewards/rejected": -12.445207595825195, "step": 7110 }, { "epoch": 0.42, "learning_rate": 3.5599636777395954e-06, "logits/chosen": -2.954801559448242, "logits/rejected": -2.859997272491455, "logps/chosen": -47.76865768432617, "logps/rejected": -1171.5987548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20391757786273956, "rewards/margins": 11.55122184753418, "rewards/rejected": -11.34730339050293, "step": 7120 }, { "epoch": 0.43, "learning_rate": 3.555248532881938e-06, "logits/chosen": -2.9332220554351807, "logits/rejected": -2.8502609729766846, "logps/chosen": -52.22391891479492, "logps/rejected": -1227.760009765625, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": 0.1016826182603836, "rewards/margins": 11.990949630737305, "rewards/rejected": -11.889266967773438, "step": 7130 }, { "epoch": 0.43, "learning_rate": 3.5505288160560745e-06, "logits/chosen": -2.9862828254699707, "logits/rejected": -2.8888049125671387, "logps/chosen": -45.394569396972656, "logps/rejected": -1133.2509765625, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 0.19393852353096008, "rewards/margins": 11.150472640991211, "rewards/rejected": -10.956534385681152, "step": 7140 }, { "epoch": 0.43, "learning_rate": 3.545804547710645e-06, "logits/chosen": -2.990804672241211, "logits/rejected": -2.862340211868286, "logps/chosen": -40.6457633972168, "logps/rejected": -1235.1636962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.17717739939689636, "rewards/margins": 12.149925231933594, "rewards/rejected": -11.972745895385742, "step": 7150 }, { "epoch": 0.43, "learning_rate": 3.5410757483140057e-06, "logits/chosen": -2.9902901649475098, "logits/rejected": -2.8771910667419434, "logps/chosen": -38.85820770263672, "logps/rejected": -1165.609130859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2004707306623459, "rewards/margins": 11.478397369384766, "rewards/rejected": -11.277926445007324, "step": 7160 }, { "epoch": 0.43, "learning_rate": 3.5363424383541465e-06, "logits/chosen": -2.9633541107177734, "logits/rejected": -2.8619625568389893, "logps/chosen": -40.24680709838867, "logps/rejected": -1191.791259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.205248162150383, "rewards/margins": 11.745960235595703, "rewards/rejected": -11.540712356567383, "step": 7170 }, { "epoch": 0.43, "learning_rate": 3.5316046383385983e-06, "logits/chosen": -2.9633145332336426, "logits/rejected": -2.8491523265838623, "logps/chosen": -46.746299743652344, "logps/rejected": -1217.077392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.17320765554904938, "rewards/margins": 11.955747604370117, "rewards/rejected": -11.782539367675781, "step": 7180 }, { "epoch": 0.43, "learning_rate": 3.526862368794347e-06, "logits/chosen": -2.9640049934387207, "logits/rejected": -2.835196018218994, "logps/chosen": -40.16733169555664, "logps/rejected": -1158.6522216796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.19455082714557648, "rewards/margins": 11.406923294067383, "rewards/rejected": -11.212371826171875, "step": 7190 }, { "epoch": 0.43, "learning_rate": 3.522115650267743e-06, "logits/chosen": -2.977959156036377, "logits/rejected": -2.8593060970306396, "logps/chosen": -50.781288146972656, "logps/rejected": -1163.6214599609375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.16347452998161316, "rewards/margins": 11.420965194702148, "rewards/rejected": -11.257489204406738, "step": 7200 }, { "epoch": 0.43, "learning_rate": 3.5173645033244103e-06, "logits/chosen": -2.9389450550079346, "logits/rejected": -2.8479621410369873, "logps/chosen": -35.22345733642578, "logps/rejected": -1144.3004150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2799467444419861, "rewards/margins": 11.340679168701172, "rewards/rejected": -11.0607328414917, "step": 7210 }, { "epoch": 0.43, "learning_rate": 3.5126089485491627e-06, "logits/chosen": -2.9734604358673096, "logits/rejected": -2.8724148273468018, "logps/chosen": -32.071128845214844, "logps/rejected": -1190.3253173828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27814292907714844, "rewards/margins": 11.811691284179688, "rewards/rejected": -11.533548355102539, "step": 7220 }, { "epoch": 0.43, "learning_rate": 3.5078490065459083e-06, "logits/chosen": -2.9399383068084717, "logits/rejected": -2.8609042167663574, "logps/chosen": -36.89484405517578, "logps/rejected": -1162.475830078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.25963860750198364, "rewards/margins": 11.497381210327148, "rewards/rejected": -11.23774242401123, "step": 7230 }, { "epoch": 0.43, "learning_rate": 3.503084697937565e-06, "logits/chosen": -2.9760046005249023, "logits/rejected": -2.8943045139312744, "logps/chosen": -44.48744583129883, "logps/rejected": -1160.7484130859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.20421108603477478, "rewards/margins": 11.434778213500977, "rewards/rejected": -11.23056697845459, "step": 7240 }, { "epoch": 0.43, "learning_rate": 3.4983160433659702e-06, "logits/chosen": -2.980762004852295, "logits/rejected": -2.8702902793884277, "logps/chosen": -44.79534149169922, "logps/rejected": -1198.37646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.192635178565979, "rewards/margins": 11.81123161315918, "rewards/rejected": -11.618596076965332, "step": 7250 }, { "epoch": 0.43, "learning_rate": 3.493543063491788e-06, "logits/chosen": -2.975956678390503, "logits/rejected": -2.895249128341675, "logps/chosen": -52.418678283691406, "logps/rejected": -1210.0272216796875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.17572656273841858, "rewards/margins": 11.903901100158691, "rewards/rejected": -11.728174209594727, "step": 7260 }, { "epoch": 0.43, "learning_rate": 3.4887657789944236e-06, "logits/chosen": -2.9481759071350098, "logits/rejected": -2.856750011444092, "logps/chosen": -42.64104080200195, "logps/rejected": -1128.502685546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2733258008956909, "rewards/margins": 11.182697296142578, "rewards/rejected": -10.909370422363281, "step": 7270 }, { "epoch": 0.43, "learning_rate": 3.4839842105719346e-06, "logits/chosen": -2.9193408489227295, "logits/rejected": -2.847799301147461, "logps/chosen": -33.57160568237305, "logps/rejected": -1214.4326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25742679834365845, "rewards/margins": 12.011921882629395, "rewards/rejected": -11.754495620727539, "step": 7280 }, { "epoch": 0.43, "learning_rate": 3.4791983789409358e-06, "logits/chosen": -2.987452983856201, "logits/rejected": -2.843458652496338, "logps/chosen": -37.44170379638672, "logps/rejected": -1183.241943359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25825580954551697, "rewards/margins": 11.725860595703125, "rewards/rejected": -11.467606544494629, "step": 7290 }, { "epoch": 0.44, "learning_rate": 3.474408304836514e-06, "logits/chosen": -3.004626750946045, "logits/rejected": -2.893467664718628, "logps/chosen": -32.09941864013672, "logps/rejected": -1249.3489990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2430475503206253, "rewards/margins": 12.352835655212402, "rewards/rejected": -12.10978889465332, "step": 7300 }, { "epoch": 0.44, "learning_rate": 3.4696140090121377e-06, "logits/chosen": -2.9062118530273438, "logits/rejected": -2.8599767684936523, "logps/chosen": -39.637901306152344, "logps/rejected": -1190.582763671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.2256324738264084, "rewards/margins": 11.762540817260742, "rewards/rejected": -11.536909103393555, "step": 7310 }, { "epoch": 0.44, "learning_rate": 3.4648155122395653e-06, "logits/chosen": -2.9693233966827393, "logits/rejected": -2.8904950618743896, "logps/chosen": -34.595054626464844, "logps/rejected": -1177.8004150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26757651567459106, "rewards/margins": 11.669134140014648, "rewards/rejected": -11.401556015014648, "step": 7320 }, { "epoch": 0.44, "learning_rate": 3.460012835308757e-06, "logits/chosen": -2.976123571395874, "logits/rejected": -2.894958972930908, "logps/chosen": -39.35161209106445, "logps/rejected": -1229.5931396484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27109187841415405, "rewards/margins": 12.19562816619873, "rewards/rejected": -11.92453670501709, "step": 7330 }, { "epoch": 0.44, "learning_rate": 3.455205999027783e-06, "logits/chosen": -2.9473328590393066, "logits/rejected": -2.827439785003662, "logps/chosen": -74.1365737915039, "logps/rejected": -1168.0263671875, "loss": 0.0468, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.057220209389925, "rewards/margins": 11.235620498657227, "rewards/rejected": -11.292840957641602, "step": 7340 }, { "epoch": 0.44, "learning_rate": 3.4503950242227356e-06, "logits/chosen": -2.9852287769317627, "logits/rejected": -2.8706459999084473, "logps/chosen": -36.88417434692383, "logps/rejected": -1221.8701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.250962495803833, "rewards/margins": 12.086122512817383, "rewards/rejected": -11.835161209106445, "step": 7350 }, { "epoch": 0.44, "learning_rate": 3.445579931737637e-06, "logits/chosen": -2.937481164932251, "logits/rejected": -2.831019878387451, "logps/chosen": -48.148170471191406, "logps/rejected": -1179.4093017578125, "loss": 0.0499, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1022704467177391, "rewards/margins": 11.510068893432617, "rewards/rejected": -11.40779972076416, "step": 7360 }, { "epoch": 0.44, "learning_rate": 3.44076074243435e-06, "logits/chosen": -2.975801944732666, "logits/rejected": -2.894432544708252, "logps/chosen": -41.70307922363281, "logps/rejected": -1140.346435546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2622518837451935, "rewards/margins": 11.284624099731445, "rewards/rejected": -11.022372245788574, "step": 7370 }, { "epoch": 0.44, "learning_rate": 3.435937477192486e-06, "logits/chosen": -2.9499287605285645, "logits/rejected": -2.8628056049346924, "logps/chosen": -36.762107849121094, "logps/rejected": -1090.152099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23527634143829346, "rewards/margins": 10.755160331726074, "rewards/rejected": -10.51988410949707, "step": 7380 }, { "epoch": 0.44, "learning_rate": 3.431110156909316e-06, "logits/chosen": -2.9659290313720703, "logits/rejected": -2.8426666259765625, "logps/chosen": -36.0003547668457, "logps/rejected": -1121.7955322265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26085036993026733, "rewards/margins": 11.09553337097168, "rewards/rejected": -10.834683418273926, "step": 7390 }, { "epoch": 0.44, "learning_rate": 3.4262788024996835e-06, "logits/chosen": -2.980234384536743, "logits/rejected": -2.8717684745788574, "logps/chosen": -38.34262466430664, "logps/rejected": -1177.4927978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2758311331272125, "rewards/margins": 11.676765441894531, "rewards/rejected": -11.400934219360352, "step": 7400 }, { "epoch": 0.44, "learning_rate": 3.421443434895905e-06, "logits/chosen": -2.956507921218872, "logits/rejected": -2.8881211280822754, "logps/chosen": -42.74211883544922, "logps/rejected": -1107.0245361328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.2443157136440277, "rewards/margins": 10.93587589263916, "rewards/rejected": -10.691560745239258, "step": 7410 }, { "epoch": 0.44, "learning_rate": 3.4166040750476868e-06, "logits/chosen": -2.9392025470733643, "logits/rejected": -2.8382201194763184, "logps/chosen": -41.778663635253906, "logps/rejected": -1128.5548095703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21661654114723206, "rewards/margins": 11.12719440460205, "rewards/rejected": -10.910577774047852, "step": 7420 }, { "epoch": 0.44, "learning_rate": 3.4117607439220336e-06, "logits/chosen": -2.9556846618652344, "logits/rejected": -2.8817944526672363, "logps/chosen": -37.60063934326172, "logps/rejected": -1135.350830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24934406578540802, "rewards/margins": 11.226262092590332, "rewards/rejected": -10.976917266845703, "step": 7430 }, { "epoch": 0.44, "learning_rate": 3.406913462503153e-06, "logits/chosen": -2.976123809814453, "logits/rejected": -2.8673958778381348, "logps/chosen": -38.57599639892578, "logps/rejected": -1178.6722412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2794368267059326, "rewards/margins": 11.677671432495117, "rewards/rejected": -11.398233413696289, "step": 7440 }, { "epoch": 0.44, "learning_rate": 3.40206225179237e-06, "logits/chosen": -2.9606659412384033, "logits/rejected": -2.833573818206787, "logps/chosen": -36.50050354003906, "logps/rejected": -1122.6612548828125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.24185983836650848, "rewards/margins": 11.085638046264648, "rewards/rejected": -10.84377670288086, "step": 7450 }, { "epoch": 0.44, "learning_rate": 3.397207132808033e-06, "logits/chosen": -2.9441747665405273, "logits/rejected": -2.849066734313965, "logps/chosen": -38.0032844543457, "logps/rejected": -1151.6109619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27161356806755066, "rewards/margins": 11.41721248626709, "rewards/rejected": -11.145600318908691, "step": 7460 }, { "epoch": 0.45, "learning_rate": 3.3923481265854226e-06, "logits/chosen": -2.980806350708008, "logits/rejected": -2.8704183101654053, "logps/chosen": -38.962791442871094, "logps/rejected": -1195.2640380859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.29237666726112366, "rewards/margins": 11.875585556030273, "rewards/rejected": -11.583208084106445, "step": 7470 }, { "epoch": 0.45, "learning_rate": 3.387485254176663e-06, "logits/chosen": -2.9690229892730713, "logits/rejected": -2.847907304763794, "logps/chosen": -37.396602630615234, "logps/rejected": -1181.9183349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.27301889657974243, "rewards/margins": 11.708510398864746, "rewards/rejected": -11.435491561889648, "step": 7480 }, { "epoch": 0.45, "learning_rate": 3.382618536650626e-06, "logits/chosen": -2.9625370502471924, "logits/rejected": -2.8834712505340576, "logps/chosen": -39.2883415222168, "logps/rejected": -1183.6888427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.27168965339660645, "rewards/margins": 11.732646942138672, "rewards/rejected": -11.460958480834961, "step": 7490 }, { "epoch": 0.45, "learning_rate": 3.377747995092846e-06, "logits/chosen": -2.9641880989074707, "logits/rejected": -2.8239762783050537, "logps/chosen": -33.22515869140625, "logps/rejected": -1167.5155029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27279412746429443, "rewards/margins": 11.579374313354492, "rewards/rejected": -11.306577682495117, "step": 7500 }, { "epoch": 0.45, "learning_rate": 3.3728736506054234e-06, "logits/chosen": -2.9259192943573, "logits/rejected": -2.8507466316223145, "logps/chosen": -38.25919723510742, "logps/rejected": -1179.6278076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2743486762046814, "rewards/margins": 11.688879013061523, "rewards/rejected": -11.414529800415039, "step": 7510 }, { "epoch": 0.45, "learning_rate": 3.3679955243069364e-06, "logits/chosen": -2.974205732345581, "logits/rejected": -2.864873170852661, "logps/chosen": -32.143760681152344, "logps/rejected": -1145.9415283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27610263228416443, "rewards/margins": 11.36326789855957, "rewards/rejected": -11.087164878845215, "step": 7520 }, { "epoch": 0.45, "learning_rate": 3.3631136373323468e-06, "logits/chosen": -2.9570415019989014, "logits/rejected": -2.869084596633911, "logps/chosen": -41.94949722290039, "logps/rejected": -1088.835205078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2478419542312622, "rewards/margins": 10.750309944152832, "rewards/rejected": -10.502467155456543, "step": 7530 }, { "epoch": 0.45, "learning_rate": 3.3582280108329125e-06, "logits/chosen": -2.979475736618042, "logits/rejected": -2.8624682426452637, "logps/chosen": -38.818511962890625, "logps/rejected": -1152.4996337890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2704901099205017, "rewards/margins": 11.426569938659668, "rewards/rejected": -11.156081199645996, "step": 7540 }, { "epoch": 0.45, "learning_rate": 3.353338665976089e-06, "logits/chosen": -2.9606709480285645, "logits/rejected": -2.8573989868164062, "logps/chosen": -39.881919860839844, "logps/rejected": -1167.0576171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2707950472831726, "rewards/margins": 11.559568405151367, "rewards/rejected": -11.288774490356445, "step": 7550 }, { "epoch": 0.45, "learning_rate": 3.3484456239454467e-06, "logits/chosen": -2.951982021331787, "logits/rejected": -2.883371353149414, "logps/chosen": -57.836669921875, "logps/rejected": -1143.220947265625, "loss": 0.0269, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.031516194343566895, "rewards/margins": 11.076032638549805, "rewards/rejected": -11.044516563415527, "step": 7560 }, { "epoch": 0.45, "learning_rate": 3.3435489059405713e-06, "logits/chosen": -2.958313465118408, "logits/rejected": -2.874022960662842, "logps/chosen": -52.27512741088867, "logps/rejected": -1118.242919921875, "loss": 0.0188, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.11773236095905304, "rewards/margins": 10.938504219055176, "rewards/rejected": -10.820772171020508, "step": 7570 }, { "epoch": 0.45, "learning_rate": 3.3386485331769747e-06, "logits/chosen": -2.94983172416687, "logits/rejected": -2.8346304893493652, "logps/chosen": -40.81236267089844, "logps/rejected": -1246.726806640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2518131136894226, "rewards/margins": 12.3305082321167, "rewards/rejected": -12.078695297241211, "step": 7580 }, { "epoch": 0.45, "learning_rate": 3.3337445268860065e-06, "logits/chosen": -3.001126766204834, "logits/rejected": -2.8774607181549072, "logps/chosen": -45.408409118652344, "logps/rejected": -1220.4532470703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20859935879707336, "rewards/margins": 12.029722213745117, "rewards/rejected": -11.821123123168945, "step": 7590 }, { "epoch": 0.45, "learning_rate": 3.328836908314755e-06, "logits/chosen": -2.974332332611084, "logits/rejected": -2.834503650665283, "logps/chosen": -38.66448211669922, "logps/rejected": -1185.2452392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22896306216716766, "rewards/margins": 11.685976028442383, "rewards/rejected": -11.457013130187988, "step": 7600 }, { "epoch": 0.45, "learning_rate": 3.3239256987259635e-06, "logits/chosen": -2.9514386653900146, "logits/rejected": -2.8771402835845947, "logps/chosen": -44.13435745239258, "logps/rejected": -1159.5294189453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.17842493951320648, "rewards/margins": 11.40188217163086, "rewards/rejected": -11.223455429077148, "step": 7610 }, { "epoch": 0.45, "learning_rate": 3.319010919397929e-06, "logits/chosen": -2.9728589057922363, "logits/rejected": -2.8729562759399414, "logps/chosen": -55.771690368652344, "logps/rejected": -1162.4407958984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.10273414850234985, "rewards/margins": 11.34472942352295, "rewards/rejected": -11.241994857788086, "step": 7620 }, { "epoch": 0.45, "learning_rate": 3.3140925916244184e-06, "logits/chosen": -2.9900150299072266, "logits/rejected": -2.857741594314575, "logps/chosen": -42.52189636230469, "logps/rejected": -1189.978759765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23979556560516357, "rewards/margins": 11.76148796081543, "rewards/rejected": -11.52169132232666, "step": 7630 }, { "epoch": 0.46, "learning_rate": 3.3091707367145707e-06, "logits/chosen": -2.9370009899139404, "logits/rejected": -2.853353500366211, "logps/chosen": -35.38311004638672, "logps/rejected": -1226.843505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24305443465709686, "rewards/margins": 12.134391784667969, "rewards/rejected": -11.891336441040039, "step": 7640 }, { "epoch": 0.46, "learning_rate": 3.304245375992807e-06, "logits/chosen": -2.9609158039093018, "logits/rejected": -2.8662521839141846, "logps/chosen": -38.448211669921875, "logps/rejected": -1183.75390625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.17152813076972961, "rewards/margins": 11.622697830200195, "rewards/rejected": -11.451169967651367, "step": 7650 }, { "epoch": 0.46, "learning_rate": 3.299316530798738e-06, "logits/chosen": -2.9470582008361816, "logits/rejected": -2.862051010131836, "logps/chosen": -42.501564025878906, "logps/rejected": -1189.782958984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.24495358765125275, "rewards/margins": 11.762331008911133, "rewards/rejected": -11.517377853393555, "step": 7660 }, { "epoch": 0.46, "learning_rate": 3.2943842224870705e-06, "logits/chosen": -2.949016571044922, "logits/rejected": -2.830435276031494, "logps/chosen": -45.46600341796875, "logps/rejected": -1170.058349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23286637663841248, "rewards/margins": 11.55592155456543, "rewards/rejected": -11.3230562210083, "step": 7670 }, { "epoch": 0.46, "learning_rate": 3.2894484724275156e-06, "logits/chosen": -2.9677631855010986, "logits/rejected": -2.849534511566162, "logps/chosen": -39.45310592651367, "logps/rejected": -1180.109619140625, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": 0.22187092900276184, "rewards/margins": 11.63815975189209, "rewards/rejected": -11.416289329528809, "step": 7680 }, { "epoch": 0.46, "learning_rate": 3.284509302004699e-06, "logits/chosen": -2.937098503112793, "logits/rejected": -2.84637713432312, "logps/chosen": -59.74436569213867, "logps/rejected": -1224.7445068359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.07761208713054657, "rewards/margins": 11.942061424255371, "rewards/rejected": -11.864450454711914, "step": 7690 }, { "epoch": 0.46, "learning_rate": 3.2795667326180604e-06, "logits/chosen": -2.9326419830322266, "logits/rejected": -2.8620619773864746, "logps/chosen": -56.3132438659668, "logps/rejected": -1243.968017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.05987129360437393, "rewards/margins": 12.126077651977539, "rewards/rejected": -12.066205978393555, "step": 7700 }, { "epoch": 0.46, "learning_rate": 3.2746207856817695e-06, "logits/chosen": -2.9536242485046387, "logits/rejected": -2.865689754486084, "logps/chosen": -48.68503952026367, "logps/rejected": -1234.4141845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.15584543347358704, "rewards/margins": 12.123297691345215, "rewards/rejected": -11.967453002929688, "step": 7710 }, { "epoch": 0.46, "learning_rate": 3.2696714826246295e-06, "logits/chosen": -2.948604106903076, "logits/rejected": -2.8611338138580322, "logps/chosen": -44.647281646728516, "logps/rejected": -1221.5972900390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2125421017408371, "rewards/margins": 12.04237174987793, "rewards/rejected": -11.829830169677734, "step": 7720 }, { "epoch": 0.46, "learning_rate": 3.2647188448899813e-06, "logits/chosen": -2.987624406814575, "logits/rejected": -2.8641891479492188, "logps/chosen": -51.538116455078125, "logps/rejected": -1178.285400390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1723364293575287, "rewards/margins": 11.58304214477539, "rewards/rejected": -11.410706520080566, "step": 7730 }, { "epoch": 0.46, "learning_rate": 3.2597628939356174e-06, "logits/chosen": -2.9434356689453125, "logits/rejected": -2.8573789596557617, "logps/chosen": -38.865760803222656, "logps/rejected": -1188.11376953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.21496149897575378, "rewards/margins": 11.701139450073242, "rewards/rejected": -11.486177444458008, "step": 7740 }, { "epoch": 0.46, "learning_rate": 3.254803651233683e-06, "logits/chosen": -2.938588857650757, "logits/rejected": -2.853731393814087, "logps/chosen": -42.14262008666992, "logps/rejected": -1141.761474609375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.23735132813453674, "rewards/margins": 11.265127182006836, "rewards/rejected": -11.027776718139648, "step": 7750 }, { "epoch": 0.46, "learning_rate": 3.249841138270585e-06, "logits/chosen": -2.9678702354431152, "logits/rejected": -2.8526384830474854, "logps/chosen": -36.893775939941406, "logps/rejected": -1070.2921142578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2506834864616394, "rewards/margins": 10.57944393157959, "rewards/rejected": -10.328760147094727, "step": 7760 }, { "epoch": 0.46, "learning_rate": 3.2448753765469e-06, "logits/chosen": -2.941859483718872, "logits/rejected": -2.8793516159057617, "logps/chosen": -37.32954788208008, "logps/rejected": -1172.6298828125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.24981184303760529, "rewards/margins": 11.59046745300293, "rewards/rejected": -11.340658187866211, "step": 7770 }, { "epoch": 0.46, "learning_rate": 3.23990638757728e-06, "logits/chosen": -2.9937186241149902, "logits/rejected": -2.874683141708374, "logps/chosen": -39.20782470703125, "logps/rejected": -1231.411376953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.26566022634506226, "rewards/margins": 12.192790031433105, "rewards/rejected": -11.927129745483398, "step": 7780 }, { "epoch": 0.46, "learning_rate": 3.2349341928903588e-06, "logits/chosen": -2.9650704860687256, "logits/rejected": -2.838141918182373, "logps/chosen": -35.476261138916016, "logps/rejected": -1112.859619140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.26768606901168823, "rewards/margins": 11.033960342407227, "rewards/rejected": -10.766273498535156, "step": 7790 }, { "epoch": 0.47, "learning_rate": 3.2299588140286597e-06, "logits/chosen": -2.9399678707122803, "logits/rejected": -2.8384976387023926, "logps/chosen": -37.37017059326172, "logps/rejected": -1164.9776611328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.25882187485694885, "rewards/margins": 11.523100852966309, "rewards/rejected": -11.264280319213867, "step": 7800 }, { "epoch": 0.47, "learning_rate": 3.2249802725485026e-06, "logits/chosen": -2.94730806350708, "logits/rejected": -2.845520496368408, "logps/chosen": -35.6010856628418, "logps/rejected": -1226.797119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26936227083206177, "rewards/margins": 12.152708053588867, "rewards/rejected": -11.883344650268555, "step": 7810 }, { "epoch": 0.47, "learning_rate": 3.2199985900199064e-06, "logits/chosen": -2.9743552207946777, "logits/rejected": -2.834465265274048, "logps/chosen": -36.08306121826172, "logps/rejected": -1220.112548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26845332980155945, "rewards/margins": 12.09821891784668, "rewards/rejected": -11.829767227172852, "step": 7820 }, { "epoch": 0.47, "learning_rate": 3.215013788026504e-06, "logits/chosen": -2.993812322616577, "logits/rejected": -2.8490610122680664, "logps/chosen": -48.684295654296875, "logps/rejected": -1188.1248779296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.17870351672172546, "rewards/margins": 11.685510635375977, "rewards/rejected": -11.506807327270508, "step": 7830 }, { "epoch": 0.47, "learning_rate": 3.2100258881654387e-06, "logits/chosen": -2.9364311695098877, "logits/rejected": -2.829209804534912, "logps/chosen": -45.97379684448242, "logps/rejected": -1182.7911376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20607921481132507, "rewards/margins": 11.662303924560547, "rewards/rejected": -11.45622444152832, "step": 7840 }, { "epoch": 0.47, "learning_rate": 3.20503491204728e-06, "logits/chosen": -2.9499802589416504, "logits/rejected": -2.820557117462158, "logps/chosen": -42.87006759643555, "logps/rejected": -1172.5428466796875, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": 0.2612249255180359, "rewards/margins": 11.616161346435547, "rewards/rejected": -11.354936599731445, "step": 7850 }, { "epoch": 0.47, "learning_rate": 3.200040881295922e-06, "logits/chosen": -2.964287281036377, "logits/rejected": -2.834635019302368, "logps/chosen": -33.699310302734375, "logps/rejected": -1209.7777099609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2530985474586487, "rewards/margins": 11.976705551147461, "rewards/rejected": -11.723607063293457, "step": 7860 }, { "epoch": 0.47, "learning_rate": 3.1950438175484965e-06, "logits/chosen": -2.9568777084350586, "logits/rejected": -2.837040424346924, "logps/chosen": -38.72541046142578, "logps/rejected": -1241.737060546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2710217833518982, "rewards/margins": 12.3190336227417, "rewards/rejected": -12.048009872436523, "step": 7870 }, { "epoch": 0.47, "learning_rate": 3.1900437424552726e-06, "logits/chosen": -2.979931354522705, "logits/rejected": -2.8689162731170654, "logps/chosen": -35.48863220214844, "logps/rejected": -1224.1981201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2521322965621948, "rewards/margins": 12.12166690826416, "rewards/rejected": -11.869534492492676, "step": 7880 }, { "epoch": 0.47, "learning_rate": 3.1850406776795682e-06, "logits/chosen": -2.971280813217163, "logits/rejected": -2.841848611831665, "logps/chosen": -35.90252685546875, "logps/rejected": -1239.264404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26714977622032166, "rewards/margins": 12.28566837310791, "rewards/rejected": -12.018518447875977, "step": 7890 }, { "epoch": 0.47, "learning_rate": 3.1800346448976567e-06, "logits/chosen": -2.9650070667266846, "logits/rejected": -2.8733553886413574, "logps/chosen": -48.53470993041992, "logps/rejected": -1227.710205078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.15961462259292603, "rewards/margins": 12.05864429473877, "rewards/rejected": -11.899028778076172, "step": 7900 }, { "epoch": 0.47, "learning_rate": 3.1750256657986643e-06, "logits/chosen": -2.968278646469116, "logits/rejected": -2.867421865463257, "logps/chosen": -50.008018493652344, "logps/rejected": -1233.573974609375, "loss": 0.0244, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.18787893652915955, "rewards/margins": 12.152277946472168, "rewards/rejected": -11.964399337768555, "step": 7910 }, { "epoch": 0.47, "learning_rate": 3.1700137620844897e-06, "logits/chosen": -2.9510269165039062, "logits/rejected": -2.8780198097229004, "logps/chosen": -36.898704528808594, "logps/rejected": -1178.3173828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2290199249982834, "rewards/margins": 11.624314308166504, "rewards/rejected": -11.395295143127441, "step": 7920 }, { "epoch": 0.47, "learning_rate": 3.164998955469697e-06, "logits/chosen": -2.982407569885254, "logits/rejected": -2.889998197555542, "logps/chosen": -42.87178039550781, "logps/rejected": -1117.108154296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.213764026761055, "rewards/margins": 11.000961303710938, "rewards/rejected": -10.787198066711426, "step": 7930 }, { "epoch": 0.47, "learning_rate": 3.1599812676814314e-06, "logits/chosen": -2.9538769721984863, "logits/rejected": -2.8981399536132812, "logps/chosen": -37.92377471923828, "logps/rejected": -1156.33203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.19612343609333038, "rewards/margins": 11.394877433776855, "rewards/rejected": -11.198755264282227, "step": 7940 }, { "epoch": 0.47, "learning_rate": 3.1549607204593185e-06, "logits/chosen": -2.950965404510498, "logits/rejected": -2.8172192573547363, "logps/chosen": -37.53428268432617, "logps/rejected": -1148.1180419921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21557533740997314, "rewards/margins": 11.316210746765137, "rewards/rejected": -11.100634574890137, "step": 7950 }, { "epoch": 0.47, "learning_rate": 3.1499373355553746e-06, "logits/chosen": -2.9494681358337402, "logits/rejected": -2.880131959915161, "logps/chosen": -36.957420349121094, "logps/rejected": -1158.2139892578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22617194056510925, "rewards/margins": 11.442097663879395, "rewards/rejected": -11.215925216674805, "step": 7960 }, { "epoch": 0.48, "learning_rate": 3.1449111347339084e-06, "logits/chosen": -2.96299409866333, "logits/rejected": -2.885401725769043, "logps/chosen": -39.93885040283203, "logps/rejected": -1141.097900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2360679805278778, "rewards/margins": 11.268847465515137, "rewards/rejected": -11.032780647277832, "step": 7970 }, { "epoch": 0.48, "learning_rate": 3.139882139771431e-06, "logits/chosen": -2.979128837585449, "logits/rejected": -2.8688220977783203, "logps/chosen": -45.068931579589844, "logps/rejected": -1095.7208251953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.21925148367881775, "rewards/margins": 10.81289291381836, "rewards/rejected": -10.59364128112793, "step": 7980 }, { "epoch": 0.48, "learning_rate": 3.134850372456558e-06, "logits/chosen": -2.947359561920166, "logits/rejected": -2.8866496086120605, "logps/chosen": -46.00815200805664, "logps/rejected": -1140.1456298828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21194282174110413, "rewards/margins": 11.228066444396973, "rewards/rejected": -11.016124725341797, "step": 7990 }, { "epoch": 0.48, "learning_rate": 3.1298158545899167e-06, "logits/chosen": -2.95280385017395, "logits/rejected": -2.9039194583892822, "logps/chosen": -41.31835174560547, "logps/rejected": -1161.6370849609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.21608667075634003, "rewards/margins": 11.461472511291504, "rewards/rejected": -11.24538516998291, "step": 8000 }, { "epoch": 0.48, "learning_rate": 3.1247786079840513e-06, "logits/chosen": -2.9570536613464355, "logits/rejected": -2.898334503173828, "logps/chosen": -38.98703384399414, "logps/rejected": -1093.0426025390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23625075817108154, "rewards/margins": 10.787592887878418, "rewards/rejected": -10.551342010498047, "step": 8010 }, { "epoch": 0.48, "learning_rate": 3.11973865446333e-06, "logits/chosen": -2.972517967224121, "logits/rejected": -2.884218692779541, "logps/chosen": -35.510589599609375, "logps/rejected": -1125.9576416015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2345292568206787, "rewards/margins": 11.118875503540039, "rewards/rejected": -10.884345054626465, "step": 8020 }, { "epoch": 0.48, "learning_rate": 3.1146960158638475e-06, "logits/chosen": -2.927402973175049, "logits/rejected": -2.818859815597534, "logps/chosen": -33.16678237915039, "logps/rejected": -1193.2440185546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24450048804283142, "rewards/margins": 11.793451309204102, "rewards/rejected": -11.548949241638184, "step": 8030 }, { "epoch": 0.48, "learning_rate": 3.109650714033331e-06, "logits/chosen": -2.994617462158203, "logits/rejected": -2.8889412879943848, "logps/chosen": -39.289058685302734, "logps/rejected": -1177.198486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22440233826637268, "rewards/margins": 11.620431900024414, "rewards/rejected": -11.396028518676758, "step": 8040 }, { "epoch": 0.48, "learning_rate": 3.10460277083105e-06, "logits/chosen": -2.973637342453003, "logits/rejected": -2.8828444480895996, "logps/chosen": -40.842254638671875, "logps/rejected": -1184.3079833984375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.1971646100282669, "rewards/margins": 11.656611442565918, "rewards/rejected": -11.459446907043457, "step": 8050 }, { "epoch": 0.48, "learning_rate": 3.099552208127713e-06, "logits/chosen": -2.9754014015197754, "logits/rejected": -2.885457754135132, "logps/chosen": -39.940284729003906, "logps/rejected": -1154.883056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2171953171491623, "rewards/margins": 11.396567344665527, "rewards/rejected": -11.179372787475586, "step": 8060 }, { "epoch": 0.48, "learning_rate": 3.0944990478053816e-06, "logits/chosen": -2.950632095336914, "logits/rejected": -2.868307590484619, "logps/chosen": -45.841514587402344, "logps/rejected": -1196.620849609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20966795086860657, "rewards/margins": 11.774394989013672, "rewards/rejected": -11.564725875854492, "step": 8070 }, { "epoch": 0.48, "learning_rate": 3.089443311757371e-06, "logits/chosen": -2.965151309967041, "logits/rejected": -2.882850408554077, "logps/chosen": -41.176029205322266, "logps/rejected": -1185.954833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.19254614412784576, "rewards/margins": 11.669876098632812, "rewards/rejected": -11.477330207824707, "step": 8080 }, { "epoch": 0.48, "learning_rate": 3.0843850218881545e-06, "logits/chosen": -2.9698076248168945, "logits/rejected": -2.8814609050750732, "logps/chosen": -48.3082160949707, "logps/rejected": -1227.242919921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.202851340174675, "rewards/margins": 12.09326171875, "rewards/rejected": -11.890409469604492, "step": 8090 }, { "epoch": 0.48, "learning_rate": 3.0793242001132725e-06, "logits/chosen": -2.9870705604553223, "logits/rejected": -2.8667547702789307, "logps/chosen": -38.688758850097656, "logps/rejected": -1147.412841796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.21577727794647217, "rewards/margins": 11.309242248535156, "rewards/rejected": -11.093465805053711, "step": 8100 }, { "epoch": 0.48, "learning_rate": 3.074260868359233e-06, "logits/chosen": -2.970268487930298, "logits/rejected": -2.8638625144958496, "logps/chosen": -38.709800720214844, "logps/rejected": -1196.795166015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.22909533977508545, "rewards/margins": 11.824028015136719, "rewards/rejected": -11.59493350982666, "step": 8110 }, { "epoch": 0.48, "learning_rate": 3.0691950485634192e-06, "logits/chosen": -2.984344005584717, "logits/rejected": -2.8710198402404785, "logps/chosen": -37.35481643676758, "logps/rejected": -1270.3497314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2270474135875702, "rewards/margins": 12.55960750579834, "rewards/rejected": -12.332560539245605, "step": 8120 }, { "epoch": 0.48, "learning_rate": 3.0641267626739946e-06, "logits/chosen": -2.994691848754883, "logits/rejected": -2.9219043254852295, "logps/chosen": -38.01766586303711, "logps/rejected": -1180.606201171875, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": 0.21459552645683289, "rewards/margins": 11.645423889160156, "rewards/rejected": -11.430829048156738, "step": 8130 }, { "epoch": 0.49, "learning_rate": 3.059056032649808e-06, "logits/chosen": -2.9309115409851074, "logits/rejected": -2.877362012863159, "logps/chosen": -34.1313591003418, "logps/rejected": -1192.5589599609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.203037828207016, "rewards/margins": 11.75660514831543, "rewards/rejected": -11.553567886352539, "step": 8140 }, { "epoch": 0.49, "learning_rate": 3.0539828804602955e-06, "logits/chosen": -2.9961459636688232, "logits/rejected": -2.8799901008605957, "logps/chosen": -47.805076599121094, "logps/rejected": -1267.4478759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2217770367860794, "rewards/margins": 12.510674476623535, "rewards/rejected": -12.288896560668945, "step": 8150 }, { "epoch": 0.49, "learning_rate": 3.0489073280853886e-06, "logits/chosen": -2.9726040363311768, "logits/rejected": -2.906320571899414, "logps/chosen": -46.75722122192383, "logps/rejected": -1203.666748046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2115451544523239, "rewards/margins": 11.876327514648438, "rewards/rejected": -11.664783477783203, "step": 8160 }, { "epoch": 0.49, "learning_rate": 3.043829397515419e-06, "logits/chosen": -2.9444355964660645, "logits/rejected": -2.8544487953186035, "logps/chosen": -45.7466926574707, "logps/rejected": -1233.299560546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.21180494129657745, "rewards/margins": 12.172625541687012, "rewards/rejected": -11.960821151733398, "step": 8170 }, { "epoch": 0.49, "learning_rate": 3.03874911075102e-06, "logits/chosen": -2.971848487854004, "logits/rejected": -2.8628897666931152, "logps/chosen": -42.13836669921875, "logps/rejected": -1249.8304443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21523675322532654, "rewards/margins": 12.329458236694336, "rewards/rejected": -12.114221572875977, "step": 8180 }, { "epoch": 0.49, "learning_rate": 3.0336664898030344e-06, "logits/chosen": -2.9682748317718506, "logits/rejected": -2.883039712905884, "logps/chosen": -44.283348083496094, "logps/rejected": -1197.6610107421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2157357633113861, "rewards/margins": 11.798887252807617, "rewards/rejected": -11.583150863647461, "step": 8190 }, { "epoch": 0.49, "learning_rate": 3.0285815566924186e-06, "logits/chosen": -2.978893756866455, "logits/rejected": -2.8729586601257324, "logps/chosen": -38.992454528808594, "logps/rejected": -1213.5335693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23318319022655487, "rewards/margins": 11.976998329162598, "rewards/rejected": -11.743814468383789, "step": 8200 }, { "epoch": 0.49, "learning_rate": 3.023494333450146e-06, "logits/chosen": -2.9367027282714844, "logits/rejected": -2.84428071975708, "logps/chosen": -37.762413024902344, "logps/rejected": -1229.7569580078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22450153529644012, "rewards/margins": 12.141807556152344, "rewards/rejected": -11.917306900024414, "step": 8210 }, { "epoch": 0.49, "learning_rate": 3.018404842117112e-06, "logits/chosen": -2.961998224258423, "logits/rejected": -2.870405673980713, "logps/chosen": -42.35347366333008, "logps/rejected": -1243.6419677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22562667727470398, "rewards/margins": 12.272897720336914, "rewards/rejected": -12.047271728515625, "step": 8220 }, { "epoch": 0.49, "learning_rate": 3.01331310474404e-06, "logits/chosen": -2.9898171424865723, "logits/rejected": -2.8841209411621094, "logps/chosen": -41.70316696166992, "logps/rejected": -1240.6300048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2089872807264328, "rewards/margins": 12.25507926940918, "rewards/rejected": -12.04609203338623, "step": 8230 }, { "epoch": 0.49, "learning_rate": 3.0082191433913825e-06, "logits/chosen": -3.0011541843414307, "logits/rejected": -2.8708033561706543, "logps/chosen": -37.508609771728516, "logps/rejected": -1249.951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2402503937482834, "rewards/margins": 12.361468315124512, "rewards/rejected": -12.121217727661133, "step": 8240 }, { "epoch": 0.49, "learning_rate": 3.0031229801292293e-06, "logits/chosen": -3.0036120414733887, "logits/rejected": -2.874596118927002, "logps/chosen": -36.04401397705078, "logps/rejected": -1290.0743408203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24675187468528748, "rewards/margins": 12.780680656433105, "rewards/rejected": -12.533927917480469, "step": 8250 }, { "epoch": 0.49, "learning_rate": 2.99802463703721e-06, "logits/chosen": -2.9743664264678955, "logits/rejected": -2.8879714012145996, "logps/chosen": -42.53742218017578, "logps/rejected": -1275.2073974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2420441210269928, "rewards/margins": 12.604070663452148, "rewards/rejected": -12.36202621459961, "step": 8260 }, { "epoch": 0.49, "learning_rate": 2.9929241362043976e-06, "logits/chosen": -2.959660291671753, "logits/rejected": -2.852196455001831, "logps/chosen": -34.64464569091797, "logps/rejected": -1225.455322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2356066256761551, "rewards/margins": 12.113018989562988, "rewards/rejected": -11.877411842346191, "step": 8270 }, { "epoch": 0.49, "learning_rate": 2.9878214997292155e-06, "logits/chosen": -2.9865050315856934, "logits/rejected": -2.863224506378174, "logps/chosen": -44.00574493408203, "logps/rejected": -1186.180908203125, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.22519409656524658, "rewards/margins": 11.716894149780273, "rewards/rejected": -11.491701126098633, "step": 8280 }, { "epoch": 0.49, "learning_rate": 2.9827167497193367e-06, "logits/chosen": -2.986934185028076, "logits/rejected": -2.9344654083251953, "logps/chosen": -43.524436950683594, "logps/rejected": -1173.4625244140625, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": 0.23471041023731232, "rewards/margins": 11.611597061157227, "rewards/rejected": -11.376885414123535, "step": 8290 }, { "epoch": 0.49, "learning_rate": 2.9776099082915954e-06, "logits/chosen": -2.9654834270477295, "logits/rejected": -2.8878111839294434, "logps/chosen": -40.95265579223633, "logps/rejected": -1174.11474609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2335340976715088, "rewards/margins": 11.6024751663208, "rewards/rejected": -11.368942260742188, "step": 8300 }, { "epoch": 0.5, "learning_rate": 2.9725009975718845e-06, "logits/chosen": -2.9464104175567627, "logits/rejected": -2.8547966480255127, "logps/chosen": -43.89667510986328, "logps/rejected": -1095.61962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19203241169452667, "rewards/margins": 10.769052505493164, "rewards/rejected": -10.577020645141602, "step": 8310 }, { "epoch": 0.5, "learning_rate": 2.9673900396950622e-06, "logits/chosen": -2.9888980388641357, "logits/rejected": -2.9544930458068848, "logps/chosen": -46.41544723510742, "logps/rejected": -1154.2598876953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.193424254655838, "rewards/margins": 11.35975456237793, "rewards/rejected": -11.166332244873047, "step": 8320 }, { "epoch": 0.5, "learning_rate": 2.9622770568048577e-06, "logits/chosen": -2.9622507095336914, "logits/rejected": -2.8494722843170166, "logps/chosen": -42.986114501953125, "logps/rejected": -1135.3475341796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21809124946594238, "rewards/margins": 11.1937255859375, "rewards/rejected": -10.975634574890137, "step": 8330 }, { "epoch": 0.5, "learning_rate": 2.9571620710537726e-06, "logits/chosen": -2.9603095054626465, "logits/rejected": -2.9047508239746094, "logps/chosen": -48.1090202331543, "logps/rejected": -1108.709716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21128936111927032, "rewards/margins": 10.930219650268555, "rewards/rejected": -10.718931198120117, "step": 8340 }, { "epoch": 0.5, "learning_rate": 2.9520451046029862e-06, "logits/chosen": -2.9438250064849854, "logits/rejected": -2.8823959827423096, "logps/chosen": -42.684730529785156, "logps/rejected": -1149.1904296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20005354285240173, "rewards/margins": 11.314567565917969, "rewards/rejected": -11.114513397216797, "step": 8350 }, { "epoch": 0.5, "learning_rate": 2.9469261796222608e-06, "logits/chosen": -2.941500663757324, "logits/rejected": -2.8579535484313965, "logps/chosen": -38.15983963012695, "logps/rejected": -1129.218505859375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 0.1956539750099182, "rewards/margins": 11.121232986450195, "rewards/rejected": -10.925580978393555, "step": 8360 }, { "epoch": 0.5, "learning_rate": 2.9418053182898428e-06, "logits/chosen": -2.9640469551086426, "logits/rejected": -2.867971420288086, "logps/chosen": -39.757713317871094, "logps/rejected": -1152.5611572265625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.23202142119407654, "rewards/margins": 11.381186485290527, "rewards/rejected": -11.14916706085205, "step": 8370 }, { "epoch": 0.5, "learning_rate": 2.936682542792367e-06, "logits/chosen": -2.9818005561828613, "logits/rejected": -2.871063709259033, "logps/chosen": -36.97361755371094, "logps/rejected": -1224.499267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2497704029083252, "rewards/margins": 12.132501602172852, "rewards/rejected": -11.882732391357422, "step": 8380 }, { "epoch": 0.5, "learning_rate": 2.9315578753247632e-06, "logits/chosen": -2.984191656112671, "logits/rejected": -2.8738763332366943, "logps/chosen": -46.861114501953125, "logps/rejected": -1179.7171630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21018680930137634, "rewards/margins": 11.626368522644043, "rewards/rejected": -11.416182518005371, "step": 8390 }, { "epoch": 0.5, "learning_rate": 2.9264313380901586e-06, "logits/chosen": -2.9873549938201904, "logits/rejected": -2.871145248413086, "logps/chosen": -42.856224060058594, "logps/rejected": -1216.3046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2678599953651428, "rewards/margins": 12.063026428222656, "rewards/rejected": -11.795166015625, "step": 8400 }, { "epoch": 0.5, "learning_rate": 2.921302953299781e-06, "logits/chosen": -2.9540820121765137, "logits/rejected": -2.8523054122924805, "logps/chosen": -35.8204460144043, "logps/rejected": -1201.0185546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24314948916435242, "rewards/margins": 11.879920959472656, "rewards/rejected": -11.636771202087402, "step": 8410 }, { "epoch": 0.5, "learning_rate": 2.916172743172861e-06, "logits/chosen": -2.946516990661621, "logits/rejected": -2.8705668449401855, "logps/chosen": -48.04414749145508, "logps/rejected": -1177.322021484375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.21064500510692596, "rewards/margins": 11.613953590393066, "rewards/rejected": -11.403307914733887, "step": 8420 }, { "epoch": 0.5, "learning_rate": 2.911040729936542e-06, "logits/chosen": -2.982999324798584, "logits/rejected": -2.888136386871338, "logps/chosen": -37.22028350830078, "logps/rejected": -1155.195556640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.24210409820079803, "rewards/margins": 11.42443561553955, "rewards/rejected": -11.182331085205078, "step": 8430 }, { "epoch": 0.5, "learning_rate": 2.905906935825774e-06, "logits/chosen": -2.9691436290740967, "logits/rejected": -2.8382582664489746, "logps/chosen": -42.78316116333008, "logps/rejected": -1232.88671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24289271235466003, "rewards/margins": 12.193568229675293, "rewards/rejected": -11.950676918029785, "step": 8440 }, { "epoch": 0.5, "learning_rate": 2.900771383083227e-06, "logits/chosen": -2.9802980422973633, "logits/rejected": -2.8515467643737793, "logps/chosen": -41.669029235839844, "logps/rejected": -1291.3974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2626536786556244, "rewards/margins": 12.800603866577148, "rewards/rejected": -12.537949562072754, "step": 8450 }, { "epoch": 0.5, "learning_rate": 2.895634093959189e-06, "logits/chosen": -2.9572300910949707, "logits/rejected": -2.83836030960083, "logps/chosen": -41.77558135986328, "logps/rejected": -1279.21875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22848618030548096, "rewards/margins": 12.636858940124512, "rewards/rejected": -12.40837287902832, "step": 8460 }, { "epoch": 0.51, "learning_rate": 2.8904950907114715e-06, "logits/chosen": -3.0008997917175293, "logits/rejected": -2.875725269317627, "logps/chosen": -39.023921966552734, "logps/rejected": -1189.060791015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2434626817703247, "rewards/margins": 11.750813484191895, "rewards/rejected": -11.507351875305176, "step": 8470 }, { "epoch": 0.51, "learning_rate": 2.885354395605311e-06, "logits/chosen": -2.9919943809509277, "logits/rejected": -2.883138656616211, "logps/chosen": -40.960838317871094, "logps/rejected": -1276.8411865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23085685074329376, "rewards/margins": 12.608600616455078, "rewards/rejected": -12.377744674682617, "step": 8480 }, { "epoch": 0.51, "learning_rate": 2.880212030913276e-06, "logits/chosen": -2.971890926361084, "logits/rejected": -2.8759477138519287, "logps/chosen": -40.53205490112305, "logps/rejected": -1227.43310546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2127629518508911, "rewards/margins": 12.112916946411133, "rewards/rejected": -11.900151252746582, "step": 8490 }, { "epoch": 0.51, "learning_rate": 2.875068018915169e-06, "logits/chosen": -2.955104351043701, "logits/rejected": -2.854205846786499, "logps/chosen": -41.025306701660156, "logps/rejected": -1256.681396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2254287749528885, "rewards/margins": 12.414690017700195, "rewards/rejected": -12.189260482788086, "step": 8500 }, { "epoch": 0.51, "learning_rate": 2.8699223818979274e-06, "logits/chosen": -2.9566197395324707, "logits/rejected": -2.8445637226104736, "logps/chosen": -41.012351989746094, "logps/rejected": -1254.6580810546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26428937911987305, "rewards/margins": 12.430181503295898, "rewards/rejected": -12.165892601013184, "step": 8510 }, { "epoch": 0.51, "learning_rate": 2.8647751421555313e-06, "logits/chosen": -2.975059986114502, "logits/rejected": -2.874263048171997, "logps/chosen": -49.39105987548828, "logps/rejected": -1220.208251953125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.1853337287902832, "rewards/margins": 12.006001472473145, "rewards/rejected": -11.820666313171387, "step": 8520 }, { "epoch": 0.51, "learning_rate": 2.859626321988903e-06, "logits/chosen": -2.9831767082214355, "logits/rejected": -2.877561092376709, "logps/chosen": -40.31803512573242, "logps/rejected": -1252.0775146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20548415184020996, "rewards/margins": 12.342676162719727, "rewards/rejected": -12.13719367980957, "step": 8530 }, { "epoch": 0.51, "learning_rate": 2.8544759437058135e-06, "logits/chosen": -2.987328052520752, "logits/rejected": -2.864075183868408, "logps/chosen": -57.44240188598633, "logps/rejected": -1230.9613037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.06458355486392975, "rewards/margins": 12.016793251037598, "rewards/rejected": -11.95220947265625, "step": 8540 }, { "epoch": 0.51, "learning_rate": 2.8493240296207835e-06, "logits/chosen": -2.937746047973633, "logits/rejected": -2.8454833030700684, "logps/chosen": -44.925880432128906, "logps/rejected": -1234.482666015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.17738083004951477, "rewards/margins": 12.150038719177246, "rewards/rejected": -11.972658157348633, "step": 8550 }, { "epoch": 0.51, "learning_rate": 2.844170602054989e-06, "logits/chosen": -2.9714770317077637, "logits/rejected": -2.8370883464813232, "logps/chosen": -49.54924774169922, "logps/rejected": -1249.984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16000409424304962, "rewards/margins": 12.282330513000488, "rewards/rejected": -12.122326850891113, "step": 8560 }, { "epoch": 0.51, "learning_rate": 2.8390156833361616e-06, "logits/chosen": -2.974047899246216, "logits/rejected": -2.87410569190979, "logps/chosen": -56.825843811035156, "logps/rejected": -1261.9178466796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14603962004184723, "rewards/margins": 12.389521598815918, "rewards/rejected": -12.24348258972168, "step": 8570 }, { "epoch": 0.51, "learning_rate": 2.833859295798495e-06, "logits/chosen": -2.9456558227539062, "logits/rejected": -2.835428476333618, "logps/chosen": -54.60789108276367, "logps/rejected": -1299.87890625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.1303602010011673, "rewards/margins": 12.741490364074707, "rewards/rejected": -12.61113166809082, "step": 8580 }, { "epoch": 0.51, "learning_rate": 2.828701461782546e-06, "logits/chosen": -2.950392246246338, "logits/rejected": -2.8744330406188965, "logps/chosen": -42.5540885925293, "logps/rejected": -1239.76708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19210441410541534, "rewards/margins": 12.211462020874023, "rewards/rejected": -12.019357681274414, "step": 8590 }, { "epoch": 0.51, "learning_rate": 2.8235422036351384e-06, "logits/chosen": -2.950082302093506, "logits/rejected": -2.8700544834136963, "logps/chosen": -50.750267028808594, "logps/rejected": -1289.634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.08250294625759125, "rewards/margins": 12.605796813964844, "rewards/rejected": -12.523294448852539, "step": 8600 }, { "epoch": 0.51, "learning_rate": 2.818381543709267e-06, "logits/chosen": -2.9815609455108643, "logits/rejected": -2.8708770275115967, "logps/chosen": -49.278099060058594, "logps/rejected": -1222.1695556640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.17292986810207367, "rewards/margins": 12.028228759765625, "rewards/rejected": -11.855299949645996, "step": 8610 }, { "epoch": 0.51, "learning_rate": 2.813219504363998e-06, "logits/chosen": -2.992460250854492, "logits/rejected": -2.878612518310547, "logps/chosen": -54.06462860107422, "logps/rejected": -1268.912841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14218321442604065, "rewards/margins": 12.462061882019043, "rewards/rejected": -12.319879531860352, "step": 8620 }, { "epoch": 0.51, "learning_rate": 2.8080561079643758e-06, "logits/chosen": -2.987910032272339, "logits/rejected": -2.880995512008667, "logps/chosen": -50.10963821411133, "logps/rejected": -1310.1510009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1067296713590622, "rewards/margins": 12.831001281738281, "rewards/rejected": -12.724270820617676, "step": 8630 }, { "epoch": 0.52, "learning_rate": 2.802891376881325e-06, "logits/chosen": -2.963162660598755, "logits/rejected": -2.838036060333252, "logps/chosen": -47.818084716796875, "logps/rejected": -1224.3487548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.16880476474761963, "rewards/margins": 12.040132522583008, "rewards/rejected": -11.871329307556152, "step": 8640 }, { "epoch": 0.52, "learning_rate": 2.7977253334915495e-06, "logits/chosen": -2.9554998874664307, "logits/rejected": -2.852532386779785, "logps/chosen": -48.949378967285156, "logps/rejected": -1274.3826904296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.177504763007164, "rewards/margins": 12.544794082641602, "rewards/rejected": -12.367288589477539, "step": 8650 }, { "epoch": 0.52, "learning_rate": 2.7925580001774422e-06, "logits/chosen": -2.946216344833374, "logits/rejected": -2.8464770317077637, "logps/chosen": -47.58275604248047, "logps/rejected": -1236.18359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22008883953094482, "rewards/margins": 12.212506294250488, "rewards/rejected": -11.99241828918457, "step": 8660 }, { "epoch": 0.52, "learning_rate": 2.787389399326984e-06, "logits/chosen": -2.9712047576904297, "logits/rejected": -2.865224838256836, "logps/chosen": -43.718780517578125, "logps/rejected": -1257.56201171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.197851300239563, "rewards/margins": 12.39890193939209, "rewards/rejected": -12.201051712036133, "step": 8670 }, { "epoch": 0.52, "learning_rate": 2.7822195533336466e-06, "logits/chosen": -2.9707939624786377, "logits/rejected": -2.8826301097869873, "logps/chosen": -48.75074005126953, "logps/rejected": -1233.8076171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.15061214566230774, "rewards/margins": 12.116867065429688, "rewards/rejected": -11.966255187988281, "step": 8680 }, { "epoch": 0.52, "learning_rate": 2.7770484845962976e-06, "logits/chosen": -3.0149874687194824, "logits/rejected": -2.8935227394104004, "logps/chosen": -49.97515106201172, "logps/rejected": -1247.17041015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.1281612366437912, "rewards/margins": 12.219125747680664, "rewards/rejected": -12.09096622467041, "step": 8690 }, { "epoch": 0.52, "learning_rate": 2.7718762155191015e-06, "logits/chosen": -2.9922499656677246, "logits/rejected": -2.8592000007629395, "logps/chosen": -58.54777908325195, "logps/rejected": -1198.770263671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14283354580402374, "rewards/margins": 11.755352973937988, "rewards/rejected": -11.612518310546875, "step": 8700 }, { "epoch": 0.52, "learning_rate": 2.766702768511423e-06, "logits/chosen": -2.955118179321289, "logits/rejected": -2.858304977416992, "logps/chosen": -44.114566802978516, "logps/rejected": -1269.830322265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.15682296454906464, "rewards/margins": 12.481486320495605, "rewards/rejected": -12.324663162231445, "step": 8710 }, { "epoch": 0.52, "learning_rate": 2.7615281659877304e-06, "logits/chosen": -2.9917354583740234, "logits/rejected": -2.8864848613739014, "logps/chosen": -47.01155090332031, "logps/rejected": -1192.4898681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.18086951971054077, "rewards/margins": 11.73695182800293, "rewards/rejected": -11.556082725524902, "step": 8720 }, { "epoch": 0.52, "learning_rate": 2.7563524303675005e-06, "logits/chosen": -2.92706298828125, "logits/rejected": -2.8163540363311768, "logps/chosen": -55.44435501098633, "logps/rejected": -1267.1630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.12611976265907288, "rewards/margins": 12.422361373901367, "rewards/rejected": -12.296239852905273, "step": 8730 }, { "epoch": 0.52, "learning_rate": 2.7511755840751165e-06, "logits/chosen": -2.9666545391082764, "logits/rejected": -2.871886730194092, "logps/chosen": -48.88249206542969, "logps/rejected": -1323.2427978515625, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": 0.1312357485294342, "rewards/margins": 12.9941987991333, "rewards/rejected": -12.862963676452637, "step": 8740 }, { "epoch": 0.52, "learning_rate": 2.7459976495397738e-06, "logits/chosen": -2.9647655487060547, "logits/rejected": -2.838284969329834, "logps/chosen": -74.62385559082031, "logps/rejected": -1283.798095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.056770432740449905, "rewards/margins": 12.406367301940918, "rewards/rejected": -12.463136672973633, "step": 8750 }, { "epoch": 0.52, "learning_rate": 2.7408186491953862e-06, "logits/chosen": -2.9809114933013916, "logits/rejected": -2.856923818588257, "logps/chosen": -62.075721740722656, "logps/rejected": -1264.59033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.012090039439499378, "rewards/margins": 12.260701179504395, "rewards/rejected": -12.272790908813477, "step": 8760 }, { "epoch": 0.52, "learning_rate": 2.735638605480482e-06, "logits/chosen": -2.977473735809326, "logits/rejected": -2.841017961502075, "logps/chosen": -61.064674377441406, "logps/rejected": -1232.320556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.03991900011897087, "rewards/margins": 11.983195304870605, "rewards/rejected": -11.943277359008789, "step": 8770 }, { "epoch": 0.52, "learning_rate": 2.730457540838109e-06, "logits/chosen": -2.941861867904663, "logits/rejected": -2.8452060222625732, "logps/chosen": -52.70491409301758, "logps/rejected": -1263.21923828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.08596739917993546, "rewards/margins": 12.341135025024414, "rewards/rejected": -12.255167961120605, "step": 8780 }, { "epoch": 0.52, "learning_rate": 2.725275477715743e-06, "logits/chosen": -2.981248378753662, "logits/rejected": -2.8314578533172607, "logps/chosen": -75.89530181884766, "logps/rejected": -1329.743896484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.1595805138349533, "rewards/margins": 12.751399993896484, "rewards/rejected": -12.910982131958008, "step": 8790 }, { "epoch": 0.52, "learning_rate": 2.7200924385651805e-06, "logits/chosen": -2.949754476547241, "logits/rejected": -2.8321094512939453, "logps/chosen": -56.72241973876953, "logps/rejected": -1225.424072265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.02431623637676239, "rewards/margins": 11.90939998626709, "rewards/rejected": -11.88508415222168, "step": 8800 }, { "epoch": 0.53, "learning_rate": 2.7149084458424497e-06, "logits/chosen": -2.959397315979004, "logits/rejected": -2.8715085983276367, "logps/chosen": -45.30323791503906, "logps/rejected": -1311.68212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1581621617078781, "rewards/margins": 12.89634895324707, "rewards/rejected": -12.738187789916992, "step": 8810 }, { "epoch": 0.53, "learning_rate": 2.70972352200771e-06, "logits/chosen": -2.9788358211517334, "logits/rejected": -2.884868621826172, "logps/chosen": -49.41818618774414, "logps/rejected": -1325.9334716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.16685834527015686, "rewards/margins": 13.038371086120605, "rewards/rejected": -12.871513366699219, "step": 8820 }, { "epoch": 0.53, "learning_rate": 2.7045376895251544e-06, "logits/chosen": -2.9577267169952393, "logits/rejected": -2.8634543418884277, "logps/chosen": -34.049888610839844, "logps/rejected": -1245.668212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2170393019914627, "rewards/margins": 12.298728942871094, "rewards/rejected": -12.08168888092041, "step": 8830 }, { "epoch": 0.53, "learning_rate": 2.6993509708629133e-06, "logits/chosen": -2.994814395904541, "logits/rejected": -2.9016194343566895, "logps/chosen": -46.942420959472656, "logps/rejected": -1347.003662109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1835002452135086, "rewards/margins": 13.271388053894043, "rewards/rejected": -13.08788776397705, "step": 8840 }, { "epoch": 0.53, "learning_rate": 2.694163388492957e-06, "logits/chosen": -2.9597342014312744, "logits/rejected": -2.8775787353515625, "logps/chosen": -49.50385284423828, "logps/rejected": -1246.2098388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.15555530786514282, "rewards/margins": 12.232217788696289, "rewards/rejected": -12.076662063598633, "step": 8850 }, { "epoch": 0.53, "learning_rate": 2.6889749648909946e-06, "logits/chosen": -2.9768807888031006, "logits/rejected": -2.866602659225464, "logps/chosen": -45.21308135986328, "logps/rejected": -1306.904052734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19287309050559998, "rewards/margins": 12.870916366577148, "rewards/rejected": -12.678044319152832, "step": 8860 }, { "epoch": 0.53, "learning_rate": 2.6837857225363837e-06, "logits/chosen": -2.967308521270752, "logits/rejected": -2.866363048553467, "logps/chosen": -43.703025817871094, "logps/rejected": -1243.571533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.13728201389312744, "rewards/margins": 12.196931838989258, "rewards/rejected": -12.059650421142578, "step": 8870 }, { "epoch": 0.53, "learning_rate": 2.6785956839120294e-06, "logits/chosen": -2.9711251258850098, "logits/rejected": -2.8878767490386963, "logps/chosen": -43.91981506347656, "logps/rejected": -1243.4139404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.15857043862342834, "rewards/margins": 12.213834762573242, "rewards/rejected": -12.055264472961426, "step": 8880 }, { "epoch": 0.53, "learning_rate": 2.6734048715042824e-06, "logits/chosen": -2.9600207805633545, "logits/rejected": -2.848414897918701, "logps/chosen": -47.99745178222656, "logps/rejected": -1240.1851806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1445685178041458, "rewards/margins": 12.185558319091797, "rewards/rejected": -12.040989875793457, "step": 8890 }, { "epoch": 0.53, "learning_rate": 2.668213307802851e-06, "logits/chosen": -2.953331470489502, "logits/rejected": -2.8871254920959473, "logps/chosen": -45.36017990112305, "logps/rejected": -1291.743408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23142893612384796, "rewards/margins": 12.766741752624512, "rewards/rejected": -12.535313606262207, "step": 8900 }, { "epoch": 0.53, "learning_rate": 2.663021015300695e-06, "logits/chosen": -2.952596664428711, "logits/rejected": -2.8581175804138184, "logps/chosen": -46.52622985839844, "logps/rejected": -1262.698974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.18250781297683716, "rewards/margins": 12.426782608032227, "rewards/rejected": -12.244274139404297, "step": 8910 }, { "epoch": 0.53, "learning_rate": 2.657828016493933e-06, "logits/chosen": -2.9571893215179443, "logits/rejected": -2.867591381072998, "logps/chosen": -52.16246795654297, "logps/rejected": -1283.0552978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.12554006278514862, "rewards/margins": 12.573314666748047, "rewards/rejected": -12.447773933410645, "step": 8920 }, { "epoch": 0.53, "learning_rate": 2.6526343338817445e-06, "logits/chosen": -2.971665382385254, "logits/rejected": -2.869389057159424, "logps/chosen": -50.15416717529297, "logps/rejected": -1220.4505615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.15726175904273987, "rewards/margins": 11.990255355834961, "rewards/rejected": -11.83299446105957, "step": 8930 }, { "epoch": 0.53, "learning_rate": 2.647439989966272e-06, "logits/chosen": -2.9658076763153076, "logits/rejected": -2.850288152694702, "logps/chosen": -40.789466857910156, "logps/rejected": -1270.8355712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1988821029663086, "rewards/margins": 12.523916244506836, "rewards/rejected": -12.325034141540527, "step": 8940 }, { "epoch": 0.53, "learning_rate": 2.6422450072525198e-06, "logits/chosen": -2.9682209491729736, "logits/rejected": -2.8659145832061768, "logps/chosen": -41.479148864746094, "logps/rejected": -1149.8201904296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.18285584449768066, "rewards/margins": 11.307470321655273, "rewards/rejected": -11.124614715576172, "step": 8950 }, { "epoch": 0.53, "learning_rate": 2.6370494082482632e-06, "logits/chosen": -2.9515223503112793, "logits/rejected": -2.8611979484558105, "logps/chosen": -49.92157745361328, "logps/rejected": -1207.210205078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.166936993598938, "rewards/margins": 11.849946975708008, "rewards/rejected": -11.68301010131836, "step": 8960 }, { "epoch": 0.53, "learning_rate": 2.6318532154639474e-06, "logits/chosen": -2.93013334274292, "logits/rejected": -2.8332791328430176, "logps/chosen": -50.42721176147461, "logps/rejected": -1230.2890625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.16743223369121552, "rewards/margins": 12.099576950073242, "rewards/rejected": -11.932145118713379, "step": 8970 }, { "epoch": 0.54, "learning_rate": 2.626656451412588e-06, "logits/chosen": -2.9798433780670166, "logits/rejected": -2.8885045051574707, "logps/chosen": -45.05487823486328, "logps/rejected": -1276.306884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2561742663383484, "rewards/margins": 12.631049156188965, "rewards/rejected": -12.37487506866455, "step": 8980 }, { "epoch": 0.54, "learning_rate": 2.6214591386096782e-06, "logits/chosen": -2.9241082668304443, "logits/rejected": -2.8290045261383057, "logps/chosen": -40.32552719116211, "logps/rejected": -1340.1346435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22797603905200958, "rewards/margins": 13.255635261535645, "rewards/rejected": -13.02765941619873, "step": 8990 }, { "epoch": 0.54, "learning_rate": 2.6162612995730874e-06, "logits/chosen": -2.999342679977417, "logits/rejected": -2.877455472946167, "logps/chosen": -48.30815887451172, "logps/rejected": -1260.2291259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2286851704120636, "rewards/margins": 12.461039543151855, "rewards/rejected": -12.232354164123535, "step": 9000 }, { "epoch": 0.54, "learning_rate": 2.6110629568229647e-06, "logits/chosen": -2.965094804763794, "logits/rejected": -2.8664510250091553, "logps/chosen": -40.789344787597656, "logps/rejected": -1239.308837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24673199653625488, "rewards/margins": 12.273656845092773, "rewards/rejected": -12.026923179626465, "step": 9010 }, { "epoch": 0.54, "learning_rate": 2.6058641328816425e-06, "logits/chosen": -2.9797792434692383, "logits/rejected": -2.8848366737365723, "logps/chosen": -44.01903533935547, "logps/rejected": -1263.5869140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.19013036787509918, "rewards/margins": 12.446131706237793, "rewards/rejected": -12.256001472473145, "step": 9020 }, { "epoch": 0.54, "learning_rate": 2.6006648502735384e-06, "logits/chosen": -2.966172456741333, "logits/rejected": -2.8894152641296387, "logps/chosen": -39.97404479980469, "logps/rejected": -1235.9765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.24868126213550568, "rewards/margins": 12.242337226867676, "rewards/rejected": -11.993654251098633, "step": 9030 }, { "epoch": 0.54, "learning_rate": 2.5954651315250543e-06, "logits/chosen": -2.9834389686584473, "logits/rejected": -2.872826099395752, "logps/chosen": -40.74134826660156, "logps/rejected": -1257.401123046875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.20899811387062073, "rewards/margins": 12.416910171508789, "rewards/rejected": -12.207910537719727, "step": 9040 }, { "epoch": 0.54, "learning_rate": 2.5902649991644855e-06, "logits/chosen": -2.990124225616455, "logits/rejected": -2.9175057411193848, "logps/chosen": -40.97984313964844, "logps/rejected": -1266.8646240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2466599941253662, "rewards/margins": 12.532567977905273, "rewards/rejected": -12.285909652709961, "step": 9050 }, { "epoch": 0.54, "learning_rate": 2.5850644757219177e-06, "logits/chosen": -2.983463764190674, "logits/rejected": -2.9224655628204346, "logps/chosen": -41.55308151245117, "logps/rejected": -1276.3968505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2486501932144165, "rewards/margins": 12.647331237792969, "rewards/rejected": -12.398681640625, "step": 9060 }, { "epoch": 0.54, "learning_rate": 2.5798635837291304e-06, "logits/chosen": -2.9603021144866943, "logits/rejected": -2.887413501739502, "logps/chosen": -41.849456787109375, "logps/rejected": -1235.6866455078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20148447155952454, "rewards/margins": 12.187830924987793, "rewards/rejected": -11.986348152160645, "step": 9070 }, { "epoch": 0.54, "learning_rate": 2.5746623457194996e-06, "logits/chosen": -2.962017059326172, "logits/rejected": -2.8917458057403564, "logps/chosen": -40.132904052734375, "logps/rejected": -1292.154052734375, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.21738657355308533, "rewards/margins": 12.769416809082031, "rewards/rejected": -12.552031517028809, "step": 9080 }, { "epoch": 0.54, "learning_rate": 2.569460784227903e-06, "logits/chosen": -2.9728434085845947, "logits/rejected": -2.8888721466064453, "logps/chosen": -42.2921142578125, "logps/rejected": -1260.2838134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20211729407310486, "rewards/margins": 12.427568435668945, "rewards/rejected": -12.22545051574707, "step": 9090 }, { "epoch": 0.54, "learning_rate": 2.5642589217906164e-06, "logits/chosen": -2.9713072776794434, "logits/rejected": -2.8750357627868652, "logps/chosen": -43.75558853149414, "logps/rejected": -1256.7825927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1991276741027832, "rewards/margins": 12.390151977539062, "rewards/rejected": -12.191025733947754, "step": 9100 }, { "epoch": 0.54, "learning_rate": 2.559056780945223e-06, "logits/chosen": -2.986717462539673, "logits/rejected": -2.894167423248291, "logps/chosen": -52.99860763549805, "logps/rejected": -1224.610107421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.13073019683361053, "rewards/margins": 12.005308151245117, "rewards/rejected": -11.874578475952148, "step": 9110 }, { "epoch": 0.54, "learning_rate": 2.5538543842305085e-06, "logits/chosen": -2.9762673377990723, "logits/rejected": -2.8544113636016846, "logps/chosen": -45.780303955078125, "logps/rejected": -1308.70947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.17599241435527802, "rewards/margins": 12.883338928222656, "rewards/rejected": -12.707345962524414, "step": 9120 }, { "epoch": 0.54, "learning_rate": 2.5486517541863696e-06, "logits/chosen": -2.989480495452881, "logits/rejected": -2.904838800430298, "logps/chosen": -48.786006927490234, "logps/rejected": -1215.019775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21205444633960724, "rewards/margins": 11.999664306640625, "rewards/rejected": -11.78761100769043, "step": 9130 }, { "epoch": 0.55, "learning_rate": 2.5434489133537154e-06, "logits/chosen": -2.9888834953308105, "logits/rejected": -2.8975064754486084, "logps/chosen": -48.088924407958984, "logps/rejected": -1201.7445068359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.1954115927219391, "rewards/margins": 11.833611488342285, "rewards/rejected": -11.638200759887695, "step": 9140 }, { "epoch": 0.55, "learning_rate": 2.5382458842743634e-06, "logits/chosen": -2.938774824142456, "logits/rejected": -2.8624167442321777, "logps/chosen": -44.606319427490234, "logps/rejected": -1231.3193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.14642134308815002, "rewards/margins": 12.076452255249023, "rewards/rejected": -11.930031776428223, "step": 9150 }, { "epoch": 0.55, "learning_rate": 2.53304268949095e-06, "logits/chosen": -2.9361376762390137, "logits/rejected": -2.877662181854248, "logps/chosen": -44.15776443481445, "logps/rejected": -1249.456298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1906069815158844, "rewards/margins": 12.28660774230957, "rewards/rejected": -12.096000671386719, "step": 9160 }, { "epoch": 0.55, "learning_rate": 2.5278393515468312e-06, "logits/chosen": -2.9742002487182617, "logits/rejected": -2.865609884262085, "logps/chosen": -54.28617477416992, "logps/rejected": -1250.508056640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.11291607469320297, "rewards/margins": 12.237199783325195, "rewards/rejected": -12.124281883239746, "step": 9170 }, { "epoch": 0.55, "learning_rate": 2.5226358929859793e-06, "logits/chosen": -3.0043816566467285, "logits/rejected": -2.884965419769287, "logps/chosen": -48.6788330078125, "logps/rejected": -1179.1820068359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.13627132773399353, "rewards/margins": 11.552314758300781, "rewards/rejected": -11.416043281555176, "step": 9180 }, { "epoch": 0.55, "learning_rate": 2.517432336352891e-06, "logits/chosen": -2.9576492309570312, "logits/rejected": -2.8653149604797363, "logps/chosen": -55.18242645263672, "logps/rejected": -1305.3255615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.11836574971675873, "rewards/margins": 12.7894926071167, "rewards/rejected": -12.671127319335938, "step": 9190 }, { "epoch": 0.55, "learning_rate": 2.5122287041924897e-06, "logits/chosen": -2.980027675628662, "logits/rejected": -2.896211862564087, "logps/chosen": -52.74467849731445, "logps/rejected": -1279.013916015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.12098772823810577, "rewards/margins": 12.535577774047852, "rewards/rejected": -12.414590835571289, "step": 9200 }, { "epoch": 0.55, "learning_rate": 2.507025019050022e-06, "logits/chosen": -2.9801268577575684, "logits/rejected": -2.899020195007324, "logps/chosen": -49.745635986328125, "logps/rejected": -1226.7257080078125, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": 0.12398876249790192, "rewards/margins": 12.029330253601074, "rewards/rejected": -11.905342102050781, "step": 9210 }, { "epoch": 0.55, "learning_rate": 2.5018213034709683e-06, "logits/chosen": -2.9556212425231934, "logits/rejected": -2.8501930236816406, "logps/chosen": -50.41413497924805, "logps/rejected": -1216.897216796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.15183250606060028, "rewards/margins": 11.942179679870605, "rewards/rejected": -11.790346145629883, "step": 9220 }, { "epoch": 0.55, "learning_rate": 2.496617580000937e-06, "logits/chosen": -2.9717164039611816, "logits/rejected": -2.8582329750061035, "logps/chosen": -43.24429702758789, "logps/rejected": -1289.617919921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.1564403921365738, "rewards/margins": 12.663263320922852, "rewards/rejected": -12.50682544708252, "step": 9230 }, { "epoch": 0.55, "learning_rate": 2.491413871185574e-06, "logits/chosen": -2.963707447052002, "logits/rejected": -2.903217315673828, "logps/chosen": -42.025020599365234, "logps/rejected": -1183.453369140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2044510394334793, "rewards/margins": 11.665146827697754, "rewards/rejected": -11.460695266723633, "step": 9240 }, { "epoch": 0.55, "learning_rate": 2.486210199570459e-06, "logits/chosen": -2.9756312370300293, "logits/rejected": -2.88602876663208, "logps/chosen": -56.78643035888672, "logps/rejected": -1260.242431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.0809493213891983, "rewards/margins": 12.31794261932373, "rewards/rejected": -12.236992835998535, "step": 9250 }, { "epoch": 0.55, "learning_rate": 2.4810065877010137e-06, "logits/chosen": -2.976147413253784, "logits/rejected": -2.8907580375671387, "logps/chosen": -55.22056198120117, "logps/rejected": -1239.6168212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.14412766695022583, "rewards/margins": 12.166702270507812, "rewards/rejected": -12.022576332092285, "step": 9260 }, { "epoch": 0.55, "learning_rate": 2.475803058122397e-06, "logits/chosen": -3.0137171745300293, "logits/rejected": -2.887436866760254, "logps/chosen": -36.76655197143555, "logps/rejected": -1262.840576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22873082756996155, "rewards/margins": 12.492914199829102, "rewards/rejected": -12.264183044433594, "step": 9270 }, { "epoch": 0.55, "learning_rate": 2.470599633379415e-06, "logits/chosen": -2.9577810764312744, "logits/rejected": -2.8587794303894043, "logps/chosen": -44.51557922363281, "logps/rejected": -1259.281982421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20409934222698212, "rewards/margins": 12.41288948059082, "rewards/rejected": -12.20879077911377, "step": 9280 }, { "epoch": 0.55, "learning_rate": 2.465396336016417e-06, "logits/chosen": -2.996368885040283, "logits/rejected": -2.880734443664551, "logps/chosen": -54.73809814453125, "logps/rejected": -1308.103515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.11750304698944092, "rewards/margins": 12.81140422821045, "rewards/rejected": -12.693901062011719, "step": 9290 }, { "epoch": 0.55, "learning_rate": 2.460193188577201e-06, "logits/chosen": -2.951235294342041, "logits/rejected": -2.8351738452911377, "logps/chosen": -42.72834014892578, "logps/rejected": -1239.0233154296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.21830396354198456, "rewards/margins": 12.229582786560059, "rewards/rejected": -12.01128101348877, "step": 9300 }, { "epoch": 0.56, "learning_rate": 2.454990213604917e-06, "logits/chosen": -2.989422559738159, "logits/rejected": -2.867706537246704, "logps/chosen": -40.17791748046875, "logps/rejected": -1310.503173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21283376216888428, "rewards/margins": 12.938833236694336, "rewards/rejected": -12.72599983215332, "step": 9310 }, { "epoch": 0.56, "learning_rate": 2.449787433641965e-06, "logits/chosen": -2.9797723293304443, "logits/rejected": -2.874389171600342, "logps/chosen": -38.178955078125, "logps/rejected": -1276.9794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23491671681404114, "rewards/margins": 12.610333442687988, "rewards/rejected": -12.375417709350586, "step": 9320 }, { "epoch": 0.56, "learning_rate": 2.4445848712299027e-06, "logits/chosen": -2.9647350311279297, "logits/rejected": -2.875432014465332, "logps/chosen": -40.44805908203125, "logps/rejected": -1242.7060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21663685142993927, "rewards/margins": 12.271024703979492, "rewards/rejected": -12.054386138916016, "step": 9330 }, { "epoch": 0.56, "learning_rate": 2.4393825489093438e-06, "logits/chosen": -2.989455461502075, "logits/rejected": -2.8662023544311523, "logps/chosen": -40.48072052001953, "logps/rejected": -1235.3988037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22040827572345734, "rewards/margins": 12.17780876159668, "rewards/rejected": -11.957399368286133, "step": 9340 }, { "epoch": 0.56, "learning_rate": 2.434180489219863e-06, "logits/chosen": -2.9959051609039307, "logits/rejected": -2.880833148956299, "logps/chosen": -35.732810974121094, "logps/rejected": -1218.936279296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.23007817566394806, "rewards/margins": 12.054116249084473, "rewards/rejected": -11.824037551879883, "step": 9350 }, { "epoch": 0.56, "learning_rate": 2.428978714699894e-06, "logits/chosen": -2.96873140335083, "logits/rejected": -2.8943722248077393, "logps/chosen": -42.40863800048828, "logps/rejected": -1273.91650390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23969228565692902, "rewards/margins": 12.612908363342285, "rewards/rejected": -12.37321662902832, "step": 9360 }, { "epoch": 0.56, "learning_rate": 2.4237772478866403e-06, "logits/chosen": -2.990269660949707, "logits/rejected": -2.858637809753418, "logps/chosen": -43.64851379394531, "logps/rejected": -1214.3477783203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.24452492594718933, "rewards/margins": 12.012109756469727, "rewards/rejected": -11.767583847045898, "step": 9370 }, { "epoch": 0.56, "learning_rate": 2.4185761113159677e-06, "logits/chosen": -2.9418365955352783, "logits/rejected": -2.8713150024414062, "logps/chosen": -35.403411865234375, "logps/rejected": -1222.1220703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.22957739233970642, "rewards/margins": 12.055707931518555, "rewards/rejected": -11.826130867004395, "step": 9380 }, { "epoch": 0.56, "learning_rate": 2.4133753275223114e-06, "logits/chosen": -2.960195302963257, "logits/rejected": -2.8634166717529297, "logps/chosen": -41.960514068603516, "logps/rejected": -1262.65234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2213628739118576, "rewards/margins": 12.463578224182129, "rewards/rejected": -12.242216110229492, "step": 9390 }, { "epoch": 0.56, "learning_rate": 2.4081749190385818e-06, "logits/chosen": -2.9701061248779297, "logits/rejected": -2.9194276332855225, "logps/chosen": -36.380672454833984, "logps/rejected": -1214.156982421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.24058249592781067, "rewards/margins": 12.004617691040039, "rewards/rejected": -11.764036178588867, "step": 9400 }, { "epoch": 0.56, "learning_rate": 2.402974908396059e-06, "logits/chosen": -2.962836742401123, "logits/rejected": -2.845795154571533, "logps/chosen": -45.22550964355469, "logps/rejected": -1189.0948486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2281181514263153, "rewards/margins": 11.739258766174316, "rewards/rejected": -11.511140823364258, "step": 9410 }, { "epoch": 0.56, "learning_rate": 2.397775318124302e-06, "logits/chosen": -2.9512839317321777, "logits/rejected": -2.888848066329956, "logps/chosen": -37.60903549194336, "logps/rejected": -1208.861572265625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.21124553680419922, "rewards/margins": 11.922050476074219, "rewards/rejected": -11.71080493927002, "step": 9420 }, { "epoch": 0.56, "learning_rate": 2.3925761707510484e-06, "logits/chosen": -3.001338481903076, "logits/rejected": -2.9347152709960938, "logps/chosen": -40.7947998046875, "logps/rejected": -1223.028564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23186656832695007, "rewards/margins": 12.075756072998047, "rewards/rejected": -11.843889236450195, "step": 9430 }, { "epoch": 0.56, "learning_rate": 2.387377488802116e-06, "logits/chosen": -2.9383914470672607, "logits/rejected": -2.8530519008636475, "logps/chosen": -46.814170837402344, "logps/rejected": -1285.8856201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2181713581085205, "rewards/margins": 12.698246955871582, "rewards/rejected": -12.48007583618164, "step": 9440 }, { "epoch": 0.56, "learning_rate": 2.382179294801305e-06, "logits/chosen": -2.980072498321533, "logits/rejected": -2.880871295928955, "logps/chosen": -38.453033447265625, "logps/rejected": -1291.5772705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2407650202512741, "rewards/margins": 12.773893356323242, "rewards/rejected": -12.53312873840332, "step": 9450 }, { "epoch": 0.56, "learning_rate": 2.376981611270305e-06, "logits/chosen": -2.9513192176818848, "logits/rejected": -2.881021022796631, "logps/chosen": -42.526371002197266, "logps/rejected": -1270.74609375, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": 0.22270245850086212, "rewards/margins": 12.548460006713867, "rewards/rejected": -12.325757026672363, "step": 9460 }, { "epoch": 0.56, "learning_rate": 2.3717844607285905e-06, "logits/chosen": -2.9926583766937256, "logits/rejected": -2.899226665496826, "logps/chosen": -35.52269744873047, "logps/rejected": -1268.6619873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.251014769077301, "rewards/margins": 12.556009292602539, "rewards/rejected": -12.304996490478516, "step": 9470 }, { "epoch": 0.57, "learning_rate": 2.3665878656933285e-06, "logits/chosen": -2.981405258178711, "logits/rejected": -2.9108242988586426, "logps/chosen": -35.53313446044922, "logps/rejected": -1274.2786865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2580853998661041, "rewards/margins": 12.616819381713867, "rewards/rejected": -12.358733177185059, "step": 9480 }, { "epoch": 0.57, "learning_rate": 2.3613918486792777e-06, "logits/chosen": -2.993091106414795, "logits/rejected": -2.88267183303833, "logps/chosen": -42.620445251464844, "logps/rejected": -1317.543701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21344904601573944, "rewards/margins": 12.990190505981445, "rewards/rejected": -12.776740074157715, "step": 9490 }, { "epoch": 0.57, "learning_rate": 2.3561964321986963e-06, "logits/chosen": -2.9992728233337402, "logits/rejected": -2.8799238204956055, "logps/chosen": -41.222930908203125, "logps/rejected": -1304.063720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2711508274078369, "rewards/margins": 12.930099487304688, "rewards/rejected": -12.658947944641113, "step": 9500 }, { "epoch": 0.57, "learning_rate": 2.351001638761236e-06, "logits/chosen": -3.009870767593384, "logits/rejected": -2.9049689769744873, "logps/chosen": -40.434295654296875, "logps/rejected": -1319.9505615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22117583453655243, "rewards/margins": 13.051579475402832, "rewards/rejected": -12.830401420593262, "step": 9510 }, { "epoch": 0.57, "learning_rate": 2.34580749087385e-06, "logits/chosen": -2.9871726036071777, "logits/rejected": -2.907174825668335, "logps/chosen": -40.509910583496094, "logps/rejected": -1226.049560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24265722930431366, "rewards/margins": 12.125676155090332, "rewards/rejected": -11.883018493652344, "step": 9520 }, { "epoch": 0.57, "learning_rate": 2.3406140110406984e-06, "logits/chosen": -2.989603281021118, "logits/rejected": -2.8962368965148926, "logps/chosen": -44.63233184814453, "logps/rejected": -1305.19091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21196778118610382, "rewards/margins": 12.897290229797363, "rewards/rejected": -12.685319900512695, "step": 9530 }, { "epoch": 0.57, "learning_rate": 2.3354212217630428e-06, "logits/chosen": -3.0290143489837646, "logits/rejected": -2.922792673110962, "logps/chosen": -38.12199401855469, "logps/rejected": -1324.93212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2168455421924591, "rewards/margins": 13.082479476928711, "rewards/rejected": -12.865633964538574, "step": 9540 }, { "epoch": 0.57, "learning_rate": 2.3302291455391525e-06, "logits/chosen": -2.978048801422119, "logits/rejected": -2.8746883869171143, "logps/chosen": -60.62965774536133, "logps/rejected": -1231.0616455078125, "loss": 0.0508, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.07666100561618805, "rewards/margins": 12.011474609375, "rewards/rejected": -11.934813499450684, "step": 9550 }, { "epoch": 0.57, "learning_rate": 2.3250378048642117e-06, "logits/chosen": -2.990445852279663, "logits/rejected": -2.9036803245544434, "logps/chosen": -39.65859603881836, "logps/rejected": -1270.784423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2315705269575119, "rewards/margins": 12.564870834350586, "rewards/rejected": -12.333301544189453, "step": 9560 }, { "epoch": 0.57, "learning_rate": 2.3198472222302144e-06, "logits/chosen": -2.9781603813171387, "logits/rejected": -2.870568037033081, "logps/chosen": -41.394535064697266, "logps/rejected": -1242.1280517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24270308017730713, "rewards/margins": 12.290084838867188, "rewards/rejected": -12.047381401062012, "step": 9570 }, { "epoch": 0.57, "learning_rate": 2.3146574201258697e-06, "logits/chosen": -2.9689974784851074, "logits/rejected": -2.8347415924072266, "logps/chosen": -40.71967315673828, "logps/rejected": -1203.2076416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2685660719871521, "rewards/margins": 11.922643661499023, "rewards/rejected": -11.654077529907227, "step": 9580 }, { "epoch": 0.57, "learning_rate": 2.309468421036509e-06, "logits/chosen": -2.9729204177856445, "logits/rejected": -2.8938140869140625, "logps/chosen": -40.575721740722656, "logps/rejected": -1239.2877197265625, "loss": 0.0102, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.19172948598861694, "rewards/margins": 12.212854385375977, "rewards/rejected": -12.021124839782715, "step": 9590 }, { "epoch": 0.57, "learning_rate": 2.3042802474439805e-06, "logits/chosen": -2.972482442855835, "logits/rejected": -2.8853089809417725, "logps/chosen": -39.83426284790039, "logps/rejected": -1248.469970703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.24646826088428497, "rewards/margins": 12.367602348327637, "rewards/rejected": -12.121134757995605, "step": 9600 }, { "epoch": 0.57, "learning_rate": 2.299092921826556e-06, "logits/chosen": -2.9871268272399902, "logits/rejected": -2.9079020023345947, "logps/chosen": -37.69023895263672, "logps/rejected": -1256.027099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25097811222076416, "rewards/margins": 12.435636520385742, "rewards/rejected": -12.184660911560059, "step": 9610 }, { "epoch": 0.57, "learning_rate": 2.293906466658837e-06, "logits/chosen": -2.9797866344451904, "logits/rejected": -2.921504497528076, "logps/chosen": -40.65472412109375, "logps/rejected": -1221.1839599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25753307342529297, "rewards/margins": 12.088210105895996, "rewards/rejected": -11.830676078796387, "step": 9620 }, { "epoch": 0.57, "learning_rate": 2.288720904411651e-06, "logits/chosen": -2.9724273681640625, "logits/rejected": -2.907377004623413, "logps/chosen": -36.00993728637695, "logps/rejected": -1266.8876953125, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": 0.2480040341615677, "rewards/margins": 12.529034614562988, "rewards/rejected": -12.281028747558594, "step": 9630 }, { "epoch": 0.57, "learning_rate": 2.283536257551955e-06, "logits/chosen": -2.9574809074401855, "logits/rejected": -2.87984037399292, "logps/chosen": -37.035335540771484, "logps/rejected": -1184.8258056640625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.25751715898513794, "rewards/margins": 11.724468231201172, "rewards/rejected": -11.466951370239258, "step": 9640 }, { "epoch": 0.58, "learning_rate": 2.278352548542744e-06, "logits/chosen": -2.9399967193603516, "logits/rejected": -2.882890224456787, "logps/chosen": -40.815879821777344, "logps/rejected": -1169.46142578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24682684242725372, "rewards/margins": 11.553632736206055, "rewards/rejected": -11.306805610656738, "step": 9650 }, { "epoch": 0.58, "learning_rate": 2.2731697998429485e-06, "logits/chosen": -2.985818862915039, "logits/rejected": -2.9154739379882812, "logps/chosen": -42.50580978393555, "logps/rejected": -1101.7093505859375, "loss": 0.0148, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.19362972676753998, "rewards/margins": 10.833837509155273, "rewards/rejected": -10.64020824432373, "step": 9660 }, { "epoch": 0.58, "learning_rate": 2.267988033907335e-06, "logits/chosen": -2.988497257232666, "logits/rejected": -2.904738426208496, "logps/chosen": -54.941619873046875, "logps/rejected": -1182.1480712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14398293197155, "rewards/margins": 11.586263656616211, "rewards/rejected": -11.442280769348145, "step": 9670 }, { "epoch": 0.58, "learning_rate": 2.2628072731864186e-06, "logits/chosen": -2.9598867893218994, "logits/rejected": -2.8779075145721436, "logps/chosen": -63.598609924316406, "logps/rejected": -1269.5775146484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.006036737002432346, "rewards/margins": 12.296332359313965, "rewards/rejected": -12.302370071411133, "step": 9680 }, { "epoch": 0.58, "learning_rate": 2.257627540126353e-06, "logits/chosen": -2.987274646759033, "logits/rejected": -2.89931583404541, "logps/chosen": -64.37332916259766, "logps/rejected": -1221.43896484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.014147159643471241, "rewards/margins": 11.846152305603027, "rewards/rejected": -11.832006454467773, "step": 9690 }, { "epoch": 0.58, "learning_rate": 2.2524488571688407e-06, "logits/chosen": -3.0034637451171875, "logits/rejected": -2.882441759109497, "logps/chosen": -62.52915573120117, "logps/rejected": -1263.821533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.04359915107488632, "rewards/margins": 12.305795669555664, "rewards/rejected": -12.262197494506836, "step": 9700 }, { "epoch": 0.58, "learning_rate": 2.247271246751039e-06, "logits/chosen": -2.9640774726867676, "logits/rejected": -2.8395307064056396, "logps/chosen": -57.16559982299805, "logps/rejected": -1220.156005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.07854527235031128, "rewards/margins": 11.907176971435547, "rewards/rejected": -11.828633308410645, "step": 9710 }, { "epoch": 0.58, "learning_rate": 2.242094731305452e-06, "logits/chosen": -2.993281126022339, "logits/rejected": -2.878988742828369, "logps/chosen": -63.22277069091797, "logps/rejected": -1230.7012939453125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.04849022626876831, "rewards/margins": 11.898063659667969, "rewards/rejected": -11.946553230285645, "step": 9720 }, { "epoch": 0.58, "learning_rate": 2.236919333259844e-06, "logits/chosen": -2.979945659637451, "logits/rejected": -2.883234977722168, "logps/chosen": -80.49848937988281, "logps/rejected": -1251.7884521484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.14198894798755646, "rewards/margins": 12.004901885986328, "rewards/rejected": -12.146891593933105, "step": 9730 }, { "epoch": 0.58, "learning_rate": 2.231745075037137e-06, "logits/chosen": -2.9895477294921875, "logits/rejected": -2.90083646774292, "logps/chosen": -79.69084167480469, "logps/rejected": -1230.3558349609375, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -0.15557768940925598, "rewards/margins": 11.762667655944824, "rewards/rejected": -11.918244361877441, "step": 9740 }, { "epoch": 0.58, "learning_rate": 2.2265719790553147e-06, "logits/chosen": -2.989647388458252, "logits/rejected": -2.897660493850708, "logps/chosen": -86.86996459960938, "logps/rejected": -1186.6939697265625, "loss": 0.0178, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.26261237263679504, "rewards/margins": 11.227874755859375, "rewards/rejected": -11.490486145019531, "step": 9750 }, { "epoch": 0.58, "learning_rate": 2.221400067727323e-06, "logits/chosen": -3.007261276245117, "logits/rejected": -2.904583692550659, "logps/chosen": -88.12381744384766, "logps/rejected": -1212.2861328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.21627256274223328, "rewards/margins": 11.522557258605957, "rewards/rejected": -11.738829612731934, "step": 9760 }, { "epoch": 0.58, "learning_rate": 2.21622936346098e-06, "logits/chosen": -3.006793975830078, "logits/rejected": -2.9098784923553467, "logps/chosen": -80.12518310546875, "logps/rejected": -1269.3741455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.17039601504802704, "rewards/margins": 12.144688606262207, "rewards/rejected": -12.315084457397461, "step": 9770 }, { "epoch": 0.58, "learning_rate": 2.2110598886588693e-06, "logits/chosen": -2.9720585346221924, "logits/rejected": -2.8704726696014404, "logps/chosen": -81.26084899902344, "logps/rejected": -1172.9483642578125, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": -0.14412884414196014, "rewards/margins": 11.215883255004883, "rewards/rejected": -11.360013961791992, "step": 9780 }, { "epoch": 0.58, "learning_rate": 2.2058916657182493e-06, "logits/chosen": -2.9960734844207764, "logits/rejected": -2.8965671062469482, "logps/chosen": -98.97016143798828, "logps/rejected": -1285.3533935546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4145797789096832, "rewards/margins": 12.070545196533203, "rewards/rejected": -12.485124588012695, "step": 9790 }, { "epoch": 0.58, "learning_rate": 2.2007247170309567e-06, "logits/chosen": -2.967376708984375, "logits/rejected": -2.880034923553467, "logps/chosen": -83.8415756225586, "logps/rejected": -1289.577880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.20105037093162537, "rewards/margins": 12.320840835571289, "rewards/rejected": -12.521890640258789, "step": 9800 }, { "epoch": 0.58, "learning_rate": 2.195559064983304e-06, "logits/chosen": -3.0369668006896973, "logits/rejected": -2.8679347038269043, "logps/chosen": -83.0972900390625, "logps/rejected": -1240.521240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.23345330357551575, "rewards/margins": 11.784460067749023, "rewards/rejected": -12.017913818359375, "step": 9810 }, { "epoch": 0.59, "learning_rate": 2.1903947319559884e-06, "logits/chosen": -2.959038257598877, "logits/rejected": -2.8575453758239746, "logps/chosen": -101.90208435058594, "logps/rejected": -1261.6361083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3307816982269287, "rewards/margins": 11.897555351257324, "rewards/rejected": -12.228338241577148, "step": 9820 }, { "epoch": 0.59, "learning_rate": 2.1852317403239907e-06, "logits/chosen": -3.005913257598877, "logits/rejected": -2.880601406097412, "logps/chosen": -75.00233459472656, "logps/rejected": -1197.5980224609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.12252752482891083, "rewards/margins": 11.480939865112305, "rewards/rejected": -11.60346794128418, "step": 9830 }, { "epoch": 0.59, "learning_rate": 2.180070112456482e-06, "logits/chosen": -2.9647719860076904, "logits/rejected": -2.8713574409484863, "logps/chosen": -85.9264144897461, "logps/rejected": -1217.588623046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.19437281787395477, "rewards/margins": 11.61212158203125, "rewards/rejected": -11.806495666503906, "step": 9840 }, { "epoch": 0.59, "learning_rate": 2.174909870716721e-06, "logits/chosen": -2.975794553756714, "logits/rejected": -2.8570408821105957, "logps/chosen": -91.0063705444336, "logps/rejected": -1233.118896484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.2875121533870697, "rewards/margins": 11.674736022949219, "rewards/rejected": -11.962246894836426, "step": 9850 }, { "epoch": 0.59, "learning_rate": 2.169751037461966e-06, "logits/chosen": -3.0044291019439697, "logits/rejected": -2.8917171955108643, "logps/chosen": -81.74671936035156, "logps/rejected": -1268.9600830078125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.12717114388942719, "rewards/margins": 12.188758850097656, "rewards/rejected": -12.315929412841797, "step": 9860 }, { "epoch": 0.59, "learning_rate": 2.1645936350433692e-06, "logits/chosen": -2.954209089279175, "logits/rejected": -2.8926329612731934, "logps/chosen": -64.61448669433594, "logps/rejected": -1278.6722412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.008383015170693398, "rewards/margins": 12.396199226379395, "rewards/rejected": -12.404581069946289, "step": 9870 }, { "epoch": 0.59, "learning_rate": 2.159437685805883e-06, "logits/chosen": -2.989283561706543, "logits/rejected": -2.888155698776245, "logps/chosen": -74.52119445800781, "logps/rejected": -1224.699951171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.0890112817287445, "rewards/margins": 11.794181823730469, "rewards/rejected": -11.883193016052246, "step": 9880 }, { "epoch": 0.59, "learning_rate": 2.154283212088168e-06, "logits/chosen": -2.9932754039764404, "logits/rejected": -2.8881843090057373, "logps/chosen": -63.68756103515625, "logps/rejected": -1287.634521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.0059070587158203125, "rewards/margins": 12.493738174438477, "rewards/rejected": -12.499645233154297, "step": 9890 }, { "epoch": 0.59, "learning_rate": 2.149130236222487e-06, "logits/chosen": -2.9777047634124756, "logits/rejected": -2.897897720336914, "logps/chosen": -64.17404174804688, "logps/rejected": -1207.408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.02817385457456112, "rewards/margins": 11.711917877197266, "rewards/rejected": -11.68374252319336, "step": 9900 }, { "epoch": 0.59, "learning_rate": 2.143978780534616e-06, "logits/chosen": -2.971426486968994, "logits/rejected": -2.9091827869415283, "logps/chosen": -68.61616516113281, "logps/rejected": -1234.3909912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.03058856725692749, "rewards/margins": 11.94639778137207, "rewards/rejected": -11.9769868850708, "step": 9910 }, { "epoch": 0.59, "learning_rate": 2.138828867343746e-06, "logits/chosen": -2.9721484184265137, "logits/rejected": -2.8680427074432373, "logps/chosen": -67.0702133178711, "logps/rejected": -1240.4036865234375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.018487973138689995, "rewards/margins": 12.036812782287598, "rewards/rejected": -12.01832389831543, "step": 9920 }, { "epoch": 0.59, "learning_rate": 2.1336805189623813e-06, "logits/chosen": -2.9876708984375, "logits/rejected": -2.886920928955078, "logps/chosen": -61.87831497192383, "logps/rejected": -1243.1683349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.0662279948592186, "rewards/margins": 12.112349510192871, "rewards/rejected": -12.046120643615723, "step": 9930 }, { "epoch": 0.59, "learning_rate": 2.128533757696248e-06, "logits/chosen": -2.9893276691436768, "logits/rejected": -2.877152919769287, "logps/chosen": -48.3928337097168, "logps/rejected": -1218.15673828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.15706050395965576, "rewards/margins": 11.95707893371582, "rewards/rejected": -11.800019264221191, "step": 9940 }, { "epoch": 0.59, "learning_rate": 2.123388605844198e-06, "logits/chosen": -3.0115621089935303, "logits/rejected": -2.9057838916778564, "logps/chosen": -47.50642776489258, "logps/rejected": -1251.9359130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1576121300458908, "rewards/margins": 12.306461334228516, "rewards/rejected": -12.148848533630371, "step": 9950 }, { "epoch": 0.59, "learning_rate": 2.1182450856981066e-06, "logits/chosen": -2.96591854095459, "logits/rejected": -2.90376353263855, "logps/chosen": -53.99578094482422, "logps/rejected": -1214.895263671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.11360809952020645, "rewards/margins": 11.88642692565918, "rewards/rejected": -11.772819519042969, "step": 9960 }, { "epoch": 0.59, "learning_rate": 2.113103219542782e-06, "logits/chosen": -2.988293170928955, "logits/rejected": -2.910637378692627, "logps/chosen": -51.88544464111328, "logps/rejected": -1198.21875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.16181662678718567, "rewards/margins": 11.768038749694824, "rewards/rejected": -11.606222152709961, "step": 9970 }, { "epoch": 0.6, "learning_rate": 2.107963029655867e-06, "logits/chosen": -2.9404966831207275, "logits/rejected": -2.8478968143463135, "logps/chosen": -52.40068817138672, "logps/rejected": -1217.57373046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1329415887594223, "rewards/margins": 11.940637588500977, "rewards/rejected": -11.807695388793945, "step": 9980 }, { "epoch": 0.6, "learning_rate": 2.1028245383077392e-06, "logits/chosen": -2.9553277492523193, "logits/rejected": -2.880298137664795, "logps/chosen": -48.15936279296875, "logps/rejected": -1172.3740234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.13994662463665009, "rewards/margins": 11.48833179473877, "rewards/rejected": -11.348384857177734, "step": 9990 }, { "epoch": 0.6, "learning_rate": 2.0976877677614183e-06, "logits/chosen": -3.0120344161987305, "logits/rejected": -2.9063429832458496, "logps/chosen": -54.44757080078125, "logps/rejected": -1272.0706787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.12709757685661316, "rewards/margins": 12.444068908691406, "rewards/rejected": -12.316971778869629, "step": 10000 }, { "epoch": 0.6, "learning_rate": 2.09255274027247e-06, "logits/chosen": -2.9989466667175293, "logits/rejected": -2.906202554702759, "logps/chosen": -59.957191467285156, "logps/rejected": -1238.20166015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.06595077365636826, "rewards/margins": 12.07018756866455, "rewards/rejected": -12.004236221313477, "step": 10010 }, { "epoch": 0.6, "learning_rate": 2.087419478088906e-06, "logits/chosen": -2.9656615257263184, "logits/rejected": -2.8944828510284424, "logps/chosen": -43.84659957885742, "logps/rejected": -1256.838134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14336314797401428, "rewards/margins": 12.340673446655273, "rewards/rejected": -12.197309494018555, "step": 10020 }, { "epoch": 0.6, "learning_rate": 2.0822880034510897e-06, "logits/chosen": -2.9585108757019043, "logits/rejected": -2.9052176475524902, "logps/chosen": -56.184104919433594, "logps/rejected": -1185.11865234375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.08653213083744049, "rewards/margins": 11.563650131225586, "rewards/rejected": -11.477117538452148, "step": 10030 }, { "epoch": 0.6, "learning_rate": 2.077158338591641e-06, "logits/chosen": -3.0109517574310303, "logits/rejected": -2.9136478900909424, "logps/chosen": -47.826316833496094, "logps/rejected": -1255.184326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14824263751506805, "rewards/margins": 12.31456470489502, "rewards/rejected": -12.166321754455566, "step": 10040 }, { "epoch": 0.6, "learning_rate": 2.0720305057353384e-06, "logits/chosen": -2.9836106300354004, "logits/rejected": -2.929137706756592, "logps/chosen": -54.755348205566406, "logps/rejected": -1263.6102294921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.14798133075237274, "rewards/margins": 12.390007972717285, "rewards/rejected": -12.242025375366211, "step": 10050 }, { "epoch": 0.6, "learning_rate": 2.0669045270990216e-06, "logits/chosen": -2.973325490951538, "logits/rejected": -2.878922462463379, "logps/chosen": -45.087677001953125, "logps/rejected": -1202.84814453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.14023935794830322, "rewards/margins": 11.794641494750977, "rewards/rejected": -11.654401779174805, "step": 10060 }, { "epoch": 0.6, "learning_rate": 2.0617804248914992e-06, "logits/chosen": -3.008338451385498, "logits/rejected": -2.9396920204162598, "logps/chosen": -47.00397491455078, "logps/rejected": -1232.800537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.14542421698570251, "rewards/margins": 12.103399276733398, "rewards/rejected": -11.957974433898926, "step": 10070 }, { "epoch": 0.6, "learning_rate": 2.056658221313449e-06, "logits/chosen": -2.9608349800109863, "logits/rejected": -2.881699323654175, "logps/chosen": -47.428138732910156, "logps/rejected": -1261.240478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.17581234872341156, "rewards/margins": 12.412851333618164, "rewards/rejected": -12.237039566040039, "step": 10080 }, { "epoch": 0.6, "learning_rate": 2.0515379385573205e-06, "logits/chosen": -2.9810233116149902, "logits/rejected": -2.9071760177612305, "logps/chosen": -48.9428825378418, "logps/rejected": -1190.912109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20392461121082306, "rewards/margins": 11.73646354675293, "rewards/rejected": -11.532540321350098, "step": 10090 }, { "epoch": 0.6, "learning_rate": 2.0464195988072454e-06, "logits/chosen": -2.9627740383148193, "logits/rejected": -2.904270648956299, "logps/chosen": -43.60102081298828, "logps/rejected": -1257.66064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.17605920135974884, "rewards/margins": 12.376079559326172, "rewards/rejected": -12.200021743774414, "step": 10100 }, { "epoch": 0.6, "learning_rate": 2.041303224238934e-06, "logits/chosen": -2.983078956604004, "logits/rejected": -2.8782269954681396, "logps/chosen": -44.80977249145508, "logps/rejected": -1233.2423095703125, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": 0.19593128561973572, "rewards/margins": 12.156793594360352, "rewards/rejected": -11.960861206054688, "step": 10110 }, { "epoch": 0.6, "learning_rate": 2.036188837019582e-06, "logits/chosen": -2.9880175590515137, "logits/rejected": -2.8979029655456543, "logps/chosen": -44.110103607177734, "logps/rejected": -1264.1719970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1913793981075287, "rewards/margins": 12.448756217956543, "rewards/rejected": -12.257376670837402, "step": 10120 }, { "epoch": 0.6, "learning_rate": 2.031076459307777e-06, "logits/chosen": -2.985647678375244, "logits/rejected": -2.877490282058716, "logps/chosen": -46.05023956298828, "logps/rejected": -1214.4090576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.161293163895607, "rewards/margins": 11.934606552124023, "rewards/rejected": -11.773313522338867, "step": 10130 }, { "epoch": 0.6, "learning_rate": 2.0259661132533983e-06, "logits/chosen": -3.002603054046631, "logits/rejected": -2.93235445022583, "logps/chosen": -45.18977737426758, "logps/rejected": -1168.8697509765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.19292041659355164, "rewards/margins": 11.49923038482666, "rewards/rejected": -11.306310653686523, "step": 10140 }, { "epoch": 0.61, "learning_rate": 2.020857820997524e-06, "logits/chosen": -3.0286450386047363, "logits/rejected": -2.899977445602417, "logps/chosen": -40.91245651245117, "logps/rejected": -1203.5699462890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2202138453722, "rewards/margins": 11.87900447845459, "rewards/rejected": -11.658790588378906, "step": 10150 }, { "epoch": 0.61, "learning_rate": 2.015751604672333e-06, "logits/chosen": -2.9583382606506348, "logits/rejected": -2.88571834564209, "logps/chosen": -43.04931640625, "logps/rejected": -1185.896240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20838050544261932, "rewards/margins": 11.677148818969727, "rewards/rejected": -11.468768119812012, "step": 10160 }, { "epoch": 0.61, "learning_rate": 2.010647486401011e-06, "logits/chosen": -3.030371904373169, "logits/rejected": -2.907797336578369, "logps/chosen": -59.92121124267578, "logps/rejected": -1253.1114501953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.09574386477470398, "rewards/margins": 12.263181686401367, "rewards/rejected": -12.167436599731445, "step": 10170 }, { "epoch": 0.61, "learning_rate": 2.005545488297652e-06, "logits/chosen": -2.964763641357422, "logits/rejected": -2.86401629447937, "logps/chosen": -50.27057647705078, "logps/rejected": -1240.2330322265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.12242677062749863, "rewards/margins": 12.149578094482422, "rewards/rejected": -12.027151107788086, "step": 10180 }, { "epoch": 0.61, "learning_rate": 2.0004456324671673e-06, "logits/chosen": -2.9726755619049072, "logits/rejected": -2.851649522781372, "logps/chosen": -49.672420501708984, "logps/rejected": -1190.982177734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.15064500272274017, "rewards/margins": 11.678483963012695, "rewards/rejected": -11.527837753295898, "step": 10190 }, { "epoch": 0.61, "learning_rate": 1.9953479410051833e-06, "logits/chosen": -3.005970001220703, "logits/rejected": -2.9001383781433105, "logps/chosen": -48.133888244628906, "logps/rejected": -1250.644775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.19392266869544983, "rewards/margins": 12.320107460021973, "rewards/rejected": -12.12618350982666, "step": 10200 }, { "epoch": 0.61, "learning_rate": 1.9902524359979494e-06, "logits/chosen": -3.011765241622925, "logits/rejected": -2.892868757247925, "logps/chosen": -41.93086242675781, "logps/rejected": -1254.873291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.17535096406936646, "rewards/margins": 12.333257675170898, "rewards/rejected": -12.157905578613281, "step": 10210 }, { "epoch": 0.61, "learning_rate": 1.985159139522245e-06, "logits/chosen": -2.97802734375, "logits/rejected": -2.9133260250091553, "logps/chosen": -51.163734436035156, "logps/rejected": -1263.924072265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.14304152131080627, "rewards/margins": 12.41795825958252, "rewards/rejected": -12.274917602539062, "step": 10220 }, { "epoch": 0.61, "learning_rate": 1.9800680736452773e-06, "logits/chosen": -3.014920711517334, "logits/rejected": -2.908125400543213, "logps/chosen": -51.96516036987305, "logps/rejected": -1243.1982421875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.11639292538166046, "rewards/margins": 12.178349494934082, "rewards/rejected": -12.061956405639648, "step": 10230 }, { "epoch": 0.61, "learning_rate": 1.974979260424591e-06, "logits/chosen": -3.0080807209014893, "logits/rejected": -2.9025843143463135, "logps/chosen": -55.50785446166992, "logps/rejected": -1221.5638427734375, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": 0.1395467221736908, "rewards/margins": 11.973094940185547, "rewards/rejected": -11.833548545837402, "step": 10240 }, { "epoch": 0.61, "learning_rate": 1.969892721907971e-06, "logits/chosen": -2.9984562397003174, "logits/rejected": -2.9183292388916016, "logps/chosen": -46.04682922363281, "logps/rejected": -1327.11328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.17787842452526093, "rewards/margins": 13.05920124053955, "rewards/rejected": -12.881322860717773, "step": 10250 }, { "epoch": 0.61, "learning_rate": 1.9648084801333468e-06, "logits/chosen": -2.9780983924865723, "logits/rejected": -2.9047141075134277, "logps/chosen": -46.120384216308594, "logps/rejected": -1270.521240234375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.1433325707912445, "rewards/margins": 12.467695236206055, "rewards/rejected": -12.324361801147461, "step": 10260 }, { "epoch": 0.61, "learning_rate": 1.9597265571286945e-06, "logits/chosen": -2.983590602874756, "logits/rejected": -2.9063339233398438, "logps/chosen": -44.916343688964844, "logps/rejected": -1258.3865966796875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.12653997540473938, "rewards/margins": 12.326775550842285, "rewards/rejected": -12.200235366821289, "step": 10270 }, { "epoch": 0.61, "learning_rate": 1.9546469749119485e-06, "logits/chosen": -3.014946460723877, "logits/rejected": -2.943242073059082, "logps/chosen": -43.85417938232422, "logps/rejected": -1210.2137451171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22454604506492615, "rewards/margins": 11.946128845214844, "rewards/rejected": -11.72158145904541, "step": 10280 }, { "epoch": 0.61, "learning_rate": 1.9495697554908984e-06, "logits/chosen": -2.984243154525757, "logits/rejected": -2.8968911170959473, "logps/chosen": -36.43543243408203, "logps/rejected": -1228.230224609375, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 0.24347683787345886, "rewards/margins": 12.149175643920898, "rewards/rejected": -11.905699729919434, "step": 10290 }, { "epoch": 0.61, "learning_rate": 1.944494920863096e-06, "logits/chosen": -2.987532138824463, "logits/rejected": -2.9008584022521973, "logps/chosen": -40.02655792236328, "logps/rejected": -1210.4114990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24683311581611633, "rewards/margins": 11.979547500610352, "rewards/rejected": -11.732714653015137, "step": 10300 }, { "epoch": 0.61, "learning_rate": 1.939422493015764e-06, "logits/chosen": -2.978304624557495, "logits/rejected": -2.8942551612854004, "logps/chosen": -37.48662567138672, "logps/rejected": -1275.249267578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2692325711250305, "rewards/margins": 12.646955490112305, "rewards/rejected": -12.377723693847656, "step": 10310 }, { "epoch": 0.62, "learning_rate": 1.934352493925695e-06, "logits/chosen": -3.022442579269409, "logits/rejected": -2.9380009174346924, "logps/chosen": -41.84482955932617, "logps/rejected": -1198.031005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1912289261817932, "rewards/margins": 11.792135238647461, "rewards/rejected": -11.600906372070312, "step": 10320 }, { "epoch": 0.62, "learning_rate": 1.929284945559159e-06, "logits/chosen": -3.0098960399627686, "logits/rejected": -2.904622793197632, "logps/chosen": -49.16112518310547, "logps/rejected": -1222.9183349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22528496384620667, "rewards/margins": 12.080643653869629, "rewards/rejected": -11.855359077453613, "step": 10330 }, { "epoch": 0.62, "learning_rate": 1.9242198698718096e-06, "logits/chosen": -2.996006488800049, "logits/rejected": -2.89890718460083, "logps/chosen": -40.45452117919922, "logps/rejected": -1260.2952880859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.23945312201976776, "rewards/margins": 12.47046947479248, "rewards/rejected": -12.2310152053833, "step": 10340 }, { "epoch": 0.62, "learning_rate": 1.919157288808585e-06, "logits/chosen": -2.9776573181152344, "logits/rejected": -2.8813130855560303, "logps/chosen": -39.498985290527344, "logps/rejected": -1255.38037109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.24492335319519043, "rewards/margins": 12.420185089111328, "rewards/rejected": -12.175261497497559, "step": 10350 }, { "epoch": 0.62, "learning_rate": 1.914097224303616e-06, "logits/chosen": -2.962517499923706, "logits/rejected": -2.916799545288086, "logps/chosen": -43.54413986206055, "logps/rejected": -1248.92236328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.20718789100646973, "rewards/margins": 12.322469711303711, "rewards/rejected": -12.11528205871582, "step": 10360 }, { "epoch": 0.62, "learning_rate": 1.9090396982801317e-06, "logits/chosen": -2.9847970008850098, "logits/rejected": -2.8919882774353027, "logps/chosen": -42.88835144042969, "logps/rejected": -1247.9215087890625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.16615010797977448, "rewards/margins": 12.266698837280273, "rewards/rejected": -12.10054874420166, "step": 10370 }, { "epoch": 0.62, "learning_rate": 1.9039847326503608e-06, "logits/chosen": -3.0027976036071777, "logits/rejected": -2.924764633178711, "logps/chosen": -42.485939025878906, "logps/rejected": -1196.369873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19839748740196228, "rewards/margins": 11.787178039550781, "rewards/rejected": -11.588780403137207, "step": 10380 }, { "epoch": 0.62, "learning_rate": 1.8989323493154402e-06, "logits/chosen": -2.9948151111602783, "logits/rejected": -2.895102024078369, "logps/chosen": -38.49906921386719, "logps/rejected": -1231.546142578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25486189126968384, "rewards/margins": 12.188624382019043, "rewards/rejected": -11.933761596679688, "step": 10390 }, { "epoch": 0.62, "learning_rate": 1.893882570165318e-06, "logits/chosen": -2.9822678565979004, "logits/rejected": -2.9130947589874268, "logps/chosen": -36.1973991394043, "logps/rejected": -1290.816162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25253063440322876, "rewards/margins": 12.779218673706055, "rewards/rejected": -12.52668571472168, "step": 10400 }, { "epoch": 0.62, "learning_rate": 1.8888354170786604e-06, "logits/chosen": -2.9890918731689453, "logits/rejected": -2.881345748901367, "logps/chosen": -40.198734283447266, "logps/rejected": -1283.2003173828125, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": 0.20753152668476105, "rewards/margins": 12.652364730834961, "rewards/rejected": -12.444833755493164, "step": 10410 }, { "epoch": 0.62, "learning_rate": 1.8837909119227541e-06, "logits/chosen": -2.960191249847412, "logits/rejected": -2.919783592224121, "logps/chosen": -63.0287971496582, "logps/rejected": -1224.600341796875, "loss": 0.0704, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.015386441722512245, "rewards/margins": 11.851868629455566, "rewards/rejected": -11.867255210876465, "step": 10420 }, { "epoch": 0.62, "learning_rate": 1.878749076553416e-06, "logits/chosen": -2.965596914291382, "logits/rejected": -2.880345106124878, "logps/chosen": -40.927635192871094, "logps/rejected": -1263.786865234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.24113352596759796, "rewards/margins": 12.49066162109375, "rewards/rejected": -12.249528884887695, "step": 10430 }, { "epoch": 0.62, "learning_rate": 1.873709932814894e-06, "logits/chosen": -2.9687466621398926, "logits/rejected": -2.892817735671997, "logps/chosen": -44.194637298583984, "logps/rejected": -1265.63671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24140766263008118, "rewards/margins": 12.510370254516602, "rewards/rejected": -12.268962860107422, "step": 10440 }, { "epoch": 0.62, "learning_rate": 1.8686735025397728e-06, "logits/chosen": -2.9717555046081543, "logits/rejected": -2.893064260482788, "logps/chosen": -48.17349624633789, "logps/rejected": -1246.6009521484375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.21784944832324982, "rewards/margins": 12.311479568481445, "rewards/rejected": -12.093629837036133, "step": 10450 }, { "epoch": 0.62, "learning_rate": 1.8636398075488857e-06, "logits/chosen": -2.968006134033203, "logits/rejected": -2.870689868927002, "logps/chosen": -43.60340881347656, "logps/rejected": -1150.2166748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20973841845989227, "rewards/margins": 11.327409744262695, "rewards/rejected": -11.11767292022705, "step": 10460 }, { "epoch": 0.62, "learning_rate": 1.8586088696512101e-06, "logits/chosen": -2.9684784412384033, "logits/rejected": -2.8862736225128174, "logps/chosen": -49.20990753173828, "logps/rejected": -1110.7032470703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.14282085001468658, "rewards/margins": 10.879621505737305, "rewards/rejected": -10.73680305480957, "step": 10470 }, { "epoch": 0.62, "learning_rate": 1.85358071064378e-06, "logits/chosen": -3.0041065216064453, "logits/rejected": -2.9401345252990723, "logps/chosen": -40.246856689453125, "logps/rejected": -1184.995849609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.23264548182487488, "rewards/margins": 11.70594596862793, "rewards/rejected": -11.473299026489258, "step": 10480 }, { "epoch": 0.63, "learning_rate": 1.8485553523115902e-06, "logits/chosen": -2.969520092010498, "logits/rejected": -2.897005796432495, "logps/chosen": -39.06232452392578, "logps/rejected": -1284.166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24158374965190887, "rewards/margins": 12.701441764831543, "rewards/rejected": -12.459859848022461, "step": 10490 }, { "epoch": 0.63, "learning_rate": 1.8435328164275007e-06, "logits/chosen": -2.952425003051758, "logits/rejected": -2.8789334297180176, "logps/chosen": -42.24785232543945, "logps/rejected": -1258.302490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2573634684085846, "rewards/margins": 12.460243225097656, "rewards/rejected": -12.202878952026367, "step": 10500 }, { "epoch": 0.63, "learning_rate": 1.838513124752142e-06, "logits/chosen": -2.9966320991516113, "logits/rejected": -2.9002420902252197, "logps/chosen": -32.056304931640625, "logps/rejected": -1236.835693359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.21774622797966003, "rewards/margins": 12.207391738891602, "rewards/rejected": -11.989645004272461, "step": 10510 }, { "epoch": 0.63, "learning_rate": 1.833496299033824e-06, "logits/chosen": -2.969428062438965, "logits/rejected": -2.894129753112793, "logps/chosen": -42.31773376464844, "logps/rejected": -1213.6947021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23123876750469208, "rewards/margins": 11.988715171813965, "rewards/rejected": -11.757476806640625, "step": 10520 }, { "epoch": 0.63, "learning_rate": 1.8284823610084375e-06, "logits/chosen": -2.9854602813720703, "logits/rejected": -2.892146110534668, "logps/chosen": -53.74165725708008, "logps/rejected": -1202.258544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16240635514259338, "rewards/margins": 11.813117980957031, "rewards/rejected": -11.650711059570312, "step": 10530 }, { "epoch": 0.63, "learning_rate": 1.8234713323993622e-06, "logits/chosen": -2.992870569229126, "logits/rejected": -2.8981447219848633, "logps/chosen": -35.920894622802734, "logps/rejected": -1193.6834716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2300628423690796, "rewards/margins": 11.790849685668945, "rewards/rejected": -11.560785293579102, "step": 10540 }, { "epoch": 0.63, "learning_rate": 1.8184632349173747e-06, "logits/chosen": -2.9665567874908447, "logits/rejected": -2.8933279514312744, "logps/chosen": -35.467891693115234, "logps/rejected": -1233.094482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2485441267490387, "rewards/margins": 12.196107864379883, "rewards/rejected": -11.947563171386719, "step": 10550 }, { "epoch": 0.63, "learning_rate": 1.8134580902605491e-06, "logits/chosen": -3.00695538520813, "logits/rejected": -2.9101743698120117, "logps/chosen": -45.51826858520508, "logps/rejected": -1212.165283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22238750755786896, "rewards/margins": 11.963212013244629, "rewards/rejected": -11.740824699401855, "step": 10560 }, { "epoch": 0.63, "learning_rate": 1.8084559201141677e-06, "logits/chosen": -2.9600138664245605, "logits/rejected": -2.904247760772705, "logps/chosen": -55.27196502685547, "logps/rejected": -1227.4288330078125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.09724445641040802, "rewards/margins": 11.988153457641602, "rewards/rejected": -11.890909194946289, "step": 10570 }, { "epoch": 0.63, "learning_rate": 1.803456746150627e-06, "logits/chosen": -2.955606460571289, "logits/rejected": -2.8885598182678223, "logps/chosen": -35.94068145751953, "logps/rejected": -1263.5439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21760194003582, "rewards/margins": 12.480493545532227, "rewards/rejected": -12.262890815734863, "step": 10580 }, { "epoch": 0.63, "learning_rate": 1.7984605900293395e-06, "logits/chosen": -2.9637694358825684, "logits/rejected": -2.8530335426330566, "logps/chosen": -35.740631103515625, "logps/rejected": -1194.198486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21210196614265442, "rewards/margins": 11.781591415405273, "rewards/rejected": -11.569490432739258, "step": 10590 }, { "epoch": 0.63, "learning_rate": 1.7934674733966426e-06, "logits/chosen": -2.9831855297088623, "logits/rejected": -2.897275447845459, "logps/chosen": -39.44312286376953, "logps/rejected": -1252.8935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21949870884418488, "rewards/margins": 12.360796928405762, "rewards/rejected": -12.141298294067383, "step": 10600 }, { "epoch": 0.63, "learning_rate": 1.7884774178857079e-06, "logits/chosen": -2.9638466835021973, "logits/rejected": -2.870460271835327, "logps/chosen": -40.34562301635742, "logps/rejected": -1247.535400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.19980283081531525, "rewards/margins": 12.295950889587402, "rewards/rejected": -12.096147537231445, "step": 10610 }, { "epoch": 0.63, "learning_rate": 1.7834904451164417e-06, "logits/chosen": -2.968020439147949, "logits/rejected": -2.8671422004699707, "logps/chosen": -40.09918975830078, "logps/rejected": -1208.0081787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18617036938667297, "rewards/margins": 11.890741348266602, "rewards/rejected": -11.704570770263672, "step": 10620 }, { "epoch": 0.63, "learning_rate": 1.7785065766953932e-06, "logits/chosen": -2.9588077068328857, "logits/rejected": -2.8666579723358154, "logps/chosen": -36.06768035888672, "logps/rejected": -1241.0003662109375, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": 0.22429952025413513, "rewards/margins": 12.2379732131958, "rewards/rejected": -12.013673782348633, "step": 10630 }, { "epoch": 0.63, "learning_rate": 1.7735258342156653e-06, "logits/chosen": -2.946554660797119, "logits/rejected": -2.8612866401672363, "logps/chosen": -54.3779411315918, "logps/rejected": -1169.535400390625, "loss": 0.0107, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.09811432659626007, "rewards/margins": 11.412057876586914, "rewards/rejected": -11.313943862915039, "step": 10640 }, { "epoch": 0.64, "learning_rate": 1.768548239256815e-06, "logits/chosen": -2.987761974334717, "logits/rejected": -2.9252943992614746, "logps/chosen": -42.2876091003418, "logps/rejected": -1215.947509765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23709623515605927, "rewards/margins": 12.00109577178955, "rewards/rejected": -11.763999938964844, "step": 10650 }, { "epoch": 0.64, "learning_rate": 1.7635738133847608e-06, "logits/chosen": -3.0049545764923096, "logits/rejected": -2.91749906539917, "logps/chosen": -63.31340789794922, "logps/rejected": -1233.8861083984375, "loss": 0.1337, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.024061882868409157, "rewards/margins": 11.983494758605957, "rewards/rejected": -11.959432601928711, "step": 10660 }, { "epoch": 0.64, "learning_rate": 1.7586025781516958e-06, "logits/chosen": -2.9574806690216064, "logits/rejected": -2.88138747215271, "logps/chosen": -38.609981536865234, "logps/rejected": -1175.1163330078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2311614751815796, "rewards/margins": 11.589154243469238, "rewards/rejected": -11.357992172241211, "step": 10670 }, { "epoch": 0.64, "learning_rate": 1.7536345550959844e-06, "logits/chosen": -3.0012075901031494, "logits/rejected": -2.873645305633545, "logps/chosen": -42.169471740722656, "logps/rejected": -1257.2357177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.17448493838310242, "rewards/margins": 12.370179176330566, "rewards/rejected": -12.195694923400879, "step": 10680 }, { "epoch": 0.64, "learning_rate": 1.7486697657420752e-06, "logits/chosen": -2.9922616481781006, "logits/rejected": -2.9302570819854736, "logps/chosen": -42.762840270996094, "logps/rejected": -1193.6761474609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.16765224933624268, "rewards/margins": 11.733104705810547, "rewards/rejected": -11.565452575683594, "step": 10690 }, { "epoch": 0.64, "learning_rate": 1.743708231600409e-06, "logits/chosen": -2.9927773475646973, "logits/rejected": -2.910099506378174, "logps/chosen": -47.803627014160156, "logps/rejected": -1242.369384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.16502931714057922, "rewards/margins": 12.203119277954102, "rewards/rejected": -12.03808879852295, "step": 10700 }, { "epoch": 0.64, "learning_rate": 1.7387499741673197e-06, "logits/chosen": -2.9607009887695312, "logits/rejected": -2.8712410926818848, "logps/chosen": -68.94219207763672, "logps/rejected": -1143.188232421875, "loss": 0.0831, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.06837329268455505, "rewards/margins": 10.986330032348633, "rewards/rejected": -11.054704666137695, "step": 10710 }, { "epoch": 0.64, "learning_rate": 1.7337950149249466e-06, "logits/chosen": -2.98933482170105, "logits/rejected": -2.882178544998169, "logps/chosen": -48.95232391357422, "logps/rejected": -1202.4798583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.18664607405662537, "rewards/margins": 11.836923599243164, "rewards/rejected": -11.650278091430664, "step": 10720 }, { "epoch": 0.64, "learning_rate": 1.7288433753411383e-06, "logits/chosen": -2.9955081939697266, "logits/rejected": -2.895533323287964, "logps/chosen": -44.5189208984375, "logps/rejected": -1229.751220703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16499601304531097, "rewards/margins": 12.082512855529785, "rewards/rejected": -11.91751766204834, "step": 10730 }, { "epoch": 0.64, "learning_rate": 1.7238950768693619e-06, "logits/chosen": -2.992671251296997, "logits/rejected": -2.876626491546631, "logps/chosen": -56.114768981933594, "logps/rejected": -1280.001708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.11620695888996124, "rewards/margins": 12.538996696472168, "rewards/rejected": -12.422791481018066, "step": 10740 }, { "epoch": 0.64, "learning_rate": 1.7189501409486061e-06, "logits/chosen": -2.9676215648651123, "logits/rejected": -2.853475332260132, "logps/chosen": -45.09934616088867, "logps/rejected": -1255.3992919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2015409767627716, "rewards/margins": 12.383906364440918, "rewards/rejected": -12.182364463806152, "step": 10750 }, { "epoch": 0.64, "learning_rate": 1.7140085890032951e-06, "logits/chosen": -2.981097936630249, "logits/rejected": -2.8725085258483887, "logps/chosen": -60.303062438964844, "logps/rejected": -1266.611083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.12294516712427139, "rewards/margins": 12.418302536010742, "rewards/rejected": -12.295357704162598, "step": 10760 }, { "epoch": 0.64, "learning_rate": 1.7090704424431882e-06, "logits/chosen": -2.975358486175537, "logits/rejected": -2.8951308727264404, "logps/chosen": -52.57039260864258, "logps/rejected": -1186.1082763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.17622964084148407, "rewards/margins": 11.657492637634277, "rewards/rejected": -11.48126220703125, "step": 10770 }, { "epoch": 0.64, "learning_rate": 1.704135722663291e-06, "logits/chosen": -2.984504222869873, "logits/rejected": -2.880404472351074, "logps/chosen": -54.8341178894043, "logps/rejected": -1235.0748291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.11250364780426025, "rewards/margins": 12.082189559936523, "rewards/rejected": -11.969687461853027, "step": 10780 }, { "epoch": 0.64, "learning_rate": 1.6992044510437644e-06, "logits/chosen": -2.9928760528564453, "logits/rejected": -2.8898205757141113, "logps/chosen": -52.57133102416992, "logps/rejected": -1238.4532470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.133560910820961, "rewards/margins": 12.145816802978516, "rewards/rejected": -12.01225471496582, "step": 10790 }, { "epoch": 0.64, "learning_rate": 1.6942766489498278e-06, "logits/chosen": -3.0073390007019043, "logits/rejected": -2.924746513366699, "logps/chosen": -53.13264083862305, "logps/rejected": -1132.726318359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.14473368227481842, "rewards/margins": 11.103751182556152, "rewards/rejected": -10.959016799926758, "step": 10800 }, { "epoch": 0.64, "learning_rate": 1.689352337731669e-06, "logits/chosen": -3.029569149017334, "logits/rejected": -2.9222521781921387, "logps/chosen": -51.05519485473633, "logps/rejected": -1276.4715576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.15148842334747314, "rewards/margins": 12.537653923034668, "rewards/rejected": -12.3861665725708, "step": 10810 }, { "epoch": 0.65, "learning_rate": 1.6844315387243514e-06, "logits/chosen": -3.0081655979156494, "logits/rejected": -2.8776793479919434, "logps/chosen": -49.08383560180664, "logps/rejected": -1200.604248046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.12913735210895538, "rewards/margins": 11.765018463134766, "rewards/rejected": -11.635881423950195, "step": 10820 }, { "epoch": 0.65, "learning_rate": 1.6795142732477222e-06, "logits/chosen": -2.938312292098999, "logits/rejected": -2.8853821754455566, "logps/chosen": -48.729339599609375, "logps/rejected": -1131.48876953125, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/chosen": 0.13820254802703857, "rewards/margins": 11.063444137573242, "rewards/rejected": -10.925241470336914, "step": 10830 }, { "epoch": 0.65, "learning_rate": 1.6746005626063163e-06, "logits/chosen": -2.9760992527008057, "logits/rejected": -2.8829421997070312, "logps/chosen": -46.726173400878906, "logps/rejected": -1203.9490966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1900329887866974, "rewards/margins": 11.85496997833252, "rewards/rejected": -11.664937019348145, "step": 10840 }, { "epoch": 0.65, "learning_rate": 1.6696904280892716e-06, "logits/chosen": -2.9518003463745117, "logits/rejected": -2.8510873317718506, "logps/chosen": -43.0867805480957, "logps/rejected": -1217.8863525390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19286546111106873, "rewards/margins": 11.98359489440918, "rewards/rejected": -11.790731430053711, "step": 10850 }, { "epoch": 0.65, "learning_rate": 1.6647838909702287e-06, "logits/chosen": -2.9696528911590576, "logits/rejected": -2.8723528385162354, "logps/chosen": -43.95452117919922, "logps/rejected": -1259.061279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16097040474414825, "rewards/margins": 12.372607231140137, "rewards/rejected": -12.211637496948242, "step": 10860 }, { "epoch": 0.65, "learning_rate": 1.6598809725072412e-06, "logits/chosen": -3.0172154903411865, "logits/rejected": -2.909411668777466, "logps/chosen": -44.04686737060547, "logps/rejected": -1249.3194580078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.13693220913410187, "rewards/margins": 12.252671241760254, "rewards/rejected": -12.115739822387695, "step": 10870 }, { "epoch": 0.65, "learning_rate": 1.6549816939426888e-06, "logits/chosen": -2.9759883880615234, "logits/rejected": -2.9251396656036377, "logps/chosen": -36.532493591308594, "logps/rejected": -1140.4698486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20786194503307343, "rewards/margins": 11.229242324829102, "rewards/rejected": -11.021379470825195, "step": 10880 }, { "epoch": 0.65, "learning_rate": 1.6500860765031767e-06, "logits/chosen": -2.9683289527893066, "logits/rejected": -2.8750457763671875, "logps/chosen": -48.7242317199707, "logps/rejected": -1178.96435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.18797823786735535, "rewards/margins": 11.608308792114258, "rewards/rejected": -11.420331954956055, "step": 10890 }, { "epoch": 0.65, "learning_rate": 1.64519414139945e-06, "logits/chosen": -2.984302043914795, "logits/rejected": -2.901674270629883, "logps/chosen": -41.495235443115234, "logps/rejected": -1187.12548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1725185215473175, "rewards/margins": 11.668099403381348, "rewards/rejected": -11.495580673217773, "step": 10900 }, { "epoch": 0.65, "learning_rate": 1.6403059098263003e-06, "logits/chosen": -3.0055034160614014, "logits/rejected": -2.9049746990203857, "logps/chosen": -47.704063415527344, "logps/rejected": -1193.620849609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22259357571601868, "rewards/margins": 11.773444175720215, "rewards/rejected": -11.550848960876465, "step": 10910 }, { "epoch": 0.65, "learning_rate": 1.6354214029624719e-06, "logits/chosen": -2.990929365158081, "logits/rejected": -2.908055305480957, "logps/chosen": -45.698204040527344, "logps/rejected": -1262.812744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19821026921272278, "rewards/margins": 12.45020866394043, "rewards/rejected": -12.251996994018555, "step": 10920 }, { "epoch": 0.65, "learning_rate": 1.6305406419705704e-06, "logits/chosen": -2.9791440963745117, "logits/rejected": -2.8905744552612305, "logps/chosen": -47.75502395629883, "logps/rejected": -1298.935791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.16419032216072083, "rewards/margins": 12.775630950927734, "rewards/rejected": -12.611440658569336, "step": 10930 }, { "epoch": 0.65, "learning_rate": 1.6256636479969757e-06, "logits/chosen": -2.996849536895752, "logits/rejected": -2.916567802429199, "logps/chosen": -47.66053009033203, "logps/rejected": -1202.910888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1249987855553627, "rewards/margins": 11.780752182006836, "rewards/rejected": -11.655754089355469, "step": 10940 }, { "epoch": 0.65, "learning_rate": 1.6207904421717438e-06, "logits/chosen": -2.978367328643799, "logits/rejected": -2.88767671585083, "logps/chosen": -43.603233337402344, "logps/rejected": -1194.6976318359375, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": 0.19448694586753845, "rewards/margins": 11.768349647521973, "rewards/rejected": -11.57386302947998, "step": 10950 }, { "epoch": 0.65, "learning_rate": 1.6159210456085179e-06, "logits/chosen": -2.979886054992676, "logits/rejected": -2.8568344116210938, "logps/chosen": -47.201011657714844, "logps/rejected": -1197.8004150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.19414173066616058, "rewards/margins": 11.808497428894043, "rewards/rejected": -11.614356994628906, "step": 10960 }, { "epoch": 0.65, "learning_rate": 1.6110554794044397e-06, "logits/chosen": -2.9552578926086426, "logits/rejected": -2.9047274589538574, "logps/chosen": -68.34508514404297, "logps/rejected": -1111.14892578125, "loss": 0.0196, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.014781939797103405, "rewards/margins": 10.742494583129883, "rewards/rejected": -10.72771167755127, "step": 10970 }, { "epoch": 0.65, "learning_rate": 1.6061937646400526e-06, "logits/chosen": -2.9881348609924316, "logits/rejected": -2.905012369155884, "logps/chosen": -48.559661865234375, "logps/rejected": -1173.442138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19721554219722748, "rewards/margins": 11.559896469116211, "rewards/rejected": -11.362682342529297, "step": 10980 }, { "epoch": 0.66, "learning_rate": 1.6013359223792155e-06, "logits/chosen": -3.0039331912994385, "logits/rejected": -2.918799877166748, "logps/chosen": -55.20995330810547, "logps/rejected": -1143.3565673828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.09131300449371338, "rewards/margins": 11.159490585327148, "rewards/rejected": -11.068178176879883, "step": 10990 }, { "epoch": 0.66, "learning_rate": 1.596481973669009e-06, "logits/chosen": -2.958709955215454, "logits/rejected": -2.8917505741119385, "logps/chosen": -44.061500549316406, "logps/rejected": -1192.954833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16884025931358337, "rewards/margins": 11.714811325073242, "rewards/rejected": -11.545970916748047, "step": 11000 }, { "epoch": 0.66, "learning_rate": 1.591631939539644e-06, "logits/chosen": -2.9705731868743896, "logits/rejected": -2.8927385807037354, "logps/chosen": -54.38584518432617, "logps/rejected": -1241.524169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.11722073704004288, "rewards/margins": 12.148224830627441, "rewards/rejected": -12.031003952026367, "step": 11010 }, { "epoch": 0.66, "learning_rate": 1.5867858410043688e-06, "logits/chosen": -2.9729554653167725, "logits/rejected": -2.915727138519287, "logps/chosen": -55.79547119140625, "logps/rejected": -1212.1881103515625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.0914226546883583, "rewards/margins": 11.833329200744629, "rewards/rejected": -11.74190616607666, "step": 11020 }, { "epoch": 0.66, "learning_rate": 1.5819436990593855e-06, "logits/chosen": -3.0036733150482178, "logits/rejected": -2.8880362510681152, "logps/chosen": -55.474464416503906, "logps/rejected": -1240.963134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.12092127650976181, "rewards/margins": 12.154128074645996, "rewards/rejected": -12.03320598602295, "step": 11030 }, { "epoch": 0.66, "learning_rate": 1.5771055346837498e-06, "logits/chosen": -2.9713246822357178, "logits/rejected": -2.89347505569458, "logps/chosen": -49.90826416015625, "logps/rejected": -1241.78466796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2040265053510666, "rewards/margins": 12.224294662475586, "rewards/rejected": -12.020268440246582, "step": 11040 }, { "epoch": 0.66, "learning_rate": 1.5722713688392844e-06, "logits/chosen": -2.97822642326355, "logits/rejected": -2.889800548553467, "logps/chosen": -47.24172592163086, "logps/rejected": -1257.7080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.19293369352817535, "rewards/margins": 12.392191886901855, "rewards/rejected": -12.199258804321289, "step": 11050 }, { "epoch": 0.66, "learning_rate": 1.5674412224704902e-06, "logits/chosen": -2.9878478050231934, "logits/rejected": -2.9121429920196533, "logps/chosen": -47.36176300048828, "logps/rejected": -1167.8480224609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.18234723806381226, "rewards/margins": 11.488906860351562, "rewards/rejected": -11.306559562683105, "step": 11060 }, { "epoch": 0.66, "learning_rate": 1.5626151165044522e-06, "logits/chosen": -2.9972054958343506, "logits/rejected": -2.8912196159362793, "logps/chosen": -43.50969696044922, "logps/rejected": -1200.972412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22030779719352722, "rewards/margins": 11.847841262817383, "rewards/rejected": -11.627533912658691, "step": 11070 }, { "epoch": 0.66, "learning_rate": 1.557793071850749e-06, "logits/chosen": -2.9773120880126953, "logits/rejected": -2.851593255996704, "logps/chosen": -48.466739654541016, "logps/rejected": -1153.3035888671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.1664770096540451, "rewards/margins": 11.314115524291992, "rewards/rejected": -11.147637367248535, "step": 11080 }, { "epoch": 0.66, "learning_rate": 1.552975109401365e-06, "logits/chosen": -3.008653163909912, "logits/rejected": -2.8983054161071777, "logps/chosen": -46.71669006347656, "logps/rejected": -1149.79443359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.21464033424854279, "rewards/margins": 11.329327583312988, "rewards/rejected": -11.114686965942383, "step": 11090 }, { "epoch": 0.66, "learning_rate": 1.5481612500305964e-06, "logits/chosen": -2.9790821075439453, "logits/rejected": -2.897669553756714, "logps/chosen": -42.14753341674805, "logps/rejected": -1209.4359130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20746560394763947, "rewards/margins": 11.939809799194336, "rewards/rejected": -11.732345581054688, "step": 11100 }, { "epoch": 0.66, "learning_rate": 1.5433515145949636e-06, "logits/chosen": -2.990865707397461, "logits/rejected": -2.9042115211486816, "logps/chosen": -50.05828094482422, "logps/rejected": -1238.6444091796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18781769275665283, "rewards/margins": 12.202497482299805, "rewards/rejected": -12.014678955078125, "step": 11110 }, { "epoch": 0.66, "learning_rate": 1.5385459239331173e-06, "logits/chosen": -2.9759609699249268, "logits/rejected": -2.9085612297058105, "logps/chosen": -45.44398880004883, "logps/rejected": -1211.8238525390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18409463763237, "rewards/margins": 11.925585746765137, "rewards/rejected": -11.741491317749023, "step": 11120 }, { "epoch": 0.66, "learning_rate": 1.5337444988657546e-06, "logits/chosen": -3.022376537322998, "logits/rejected": -2.9095864295959473, "logps/chosen": -50.225852966308594, "logps/rejected": -1230.343505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1681569367647171, "rewards/margins": 12.091958045959473, "rewards/rejected": -11.923800468444824, "step": 11130 }, { "epoch": 0.66, "learning_rate": 1.5289472601955219e-06, "logits/chosen": -2.9922235012054443, "logits/rejected": -2.9123711585998535, "logps/chosen": -51.19879913330078, "logps/rejected": -1194.74267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14014706015586853, "rewards/margins": 11.707077980041504, "rewards/rejected": -11.566929817199707, "step": 11140 }, { "epoch": 0.66, "learning_rate": 1.5241542287069273e-06, "logits/chosen": -2.991950750350952, "logits/rejected": -2.895827293395996, "logps/chosen": -39.705528259277344, "logps/rejected": -1208.2125244140625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.21603624522686005, "rewards/margins": 11.921350479125977, "rewards/rejected": -11.705312728881836, "step": 11150 }, { "epoch": 0.67, "learning_rate": 1.5193654251662531e-06, "logits/chosen": -2.9689841270446777, "logits/rejected": -2.862626552581787, "logps/chosen": -44.686912536621094, "logps/rejected": -1191.9239501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2166585624217987, "rewards/margins": 11.753263473510742, "rewards/rejected": -11.536603927612305, "step": 11160 }, { "epoch": 0.67, "learning_rate": 1.514580870321462e-06, "logits/chosen": -2.9977307319641113, "logits/rejected": -2.9046216011047363, "logps/chosen": -52.60981369018555, "logps/rejected": -1245.4212646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.13499298691749573, "rewards/margins": 12.218323707580566, "rewards/rejected": -12.083331108093262, "step": 11170 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -2.989666223526001, "logits/rejected": -2.89737606048584, "logps/chosen": -46.604698181152344, "logps/rejected": -1157.076904296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20810088515281677, "rewards/margins": 11.415570259094238, "rewards/rejected": -11.20746898651123, "step": 11180 }, { "epoch": 0.67, "learning_rate": 1.5050245896192503e-06, "logits/chosen": -2.9589438438415527, "logits/rejected": -2.8732669353485107, "logps/chosen": -80.24202728271484, "logps/rejected": -1180.392333984375, "loss": 0.1458, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.13702335953712463, "rewards/margins": 11.294260025024414, "rewards/rejected": -11.431282997131348, "step": 11190 }, { "epoch": 0.67, "learning_rate": 1.5002529051653576e-06, "logits/chosen": -2.947169065475464, "logits/rejected": -2.871530532836914, "logps/chosen": -50.33572006225586, "logps/rejected": -1228.176025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1710774451494217, "rewards/margins": 12.081416130065918, "rewards/rejected": -11.91033935546875, "step": 11200 }, { "epoch": 0.67, "learning_rate": 1.4954855522142225e-06, "logits/chosen": -2.9729020595550537, "logits/rejected": -2.856149673461914, "logps/chosen": -49.24439239501953, "logps/rejected": -1175.499755859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.08051478117704391, "rewards/margins": 11.46330738067627, "rewards/rejected": -11.382792472839355, "step": 11210 }, { "epoch": 0.67, "learning_rate": 1.4907225514208724e-06, "logits/chosen": -2.9980578422546387, "logits/rejected": -2.861168622970581, "logps/chosen": -47.23252868652344, "logps/rejected": -1119.0400390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18464988470077515, "rewards/margins": 10.99216365814209, "rewards/rejected": -10.807513236999512, "step": 11220 }, { "epoch": 0.67, "learning_rate": 1.4859639234214774e-06, "logits/chosen": -3.0173678398132324, "logits/rejected": -2.9166085720062256, "logps/chosen": -53.04567337036133, "logps/rejected": -1214.9571533203125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 0.11937393993139267, "rewards/margins": 11.89087963104248, "rewards/rejected": -11.771506309509277, "step": 11230 }, { "epoch": 0.67, "learning_rate": 1.48120968883326e-06, "logits/chosen": -2.9637343883514404, "logits/rejected": -2.85284686088562, "logps/chosen": -45.88273620605469, "logps/rejected": -1130.7349853515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.22956819832324982, "rewards/margins": 11.16730785369873, "rewards/rejected": -10.937738418579102, "step": 11240 }, { "epoch": 0.67, "learning_rate": 1.4764598682544124e-06, "logits/chosen": -2.973855495452881, "logits/rejected": -2.8733229637145996, "logps/chosen": -34.693199157714844, "logps/rejected": -1128.670166015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2224206030368805, "rewards/margins": 11.132538795471191, "rewards/rejected": -10.910120010375977, "step": 11250 }, { "epoch": 0.67, "learning_rate": 1.4717144822639988e-06, "logits/chosen": -2.9855000972747803, "logits/rejected": -2.8902859687805176, "logps/chosen": -42.67506790161133, "logps/rejected": -1119.3917236328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2061883509159088, "rewards/margins": 11.022645950317383, "rewards/rejected": -10.816457748413086, "step": 11260 }, { "epoch": 0.67, "learning_rate": 1.4669735514218709e-06, "logits/chosen": -2.9891791343688965, "logits/rejected": -2.909029960632324, "logps/chosen": -49.121482849121094, "logps/rejected": -1154.9688720703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21645662188529968, "rewards/margins": 11.382869720458984, "rewards/rejected": -11.166413307189941, "step": 11270 }, { "epoch": 0.67, "learning_rate": 1.46223709626858e-06, "logits/chosen": -2.9526121616363525, "logits/rejected": -2.849761486053467, "logps/chosen": -47.366798400878906, "logps/rejected": -1202.530517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22606079280376434, "rewards/margins": 11.865777015686035, "rewards/rejected": -11.639716148376465, "step": 11280 }, { "epoch": 0.67, "learning_rate": 1.457505137325283e-06, "logits/chosen": -2.981325387954712, "logits/rejected": -2.906311511993408, "logps/chosen": -47.57198715209961, "logps/rejected": -1143.587646484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.15424416959285736, "rewards/margins": 11.235780715942383, "rewards/rejected": -11.081535339355469, "step": 11290 }, { "epoch": 0.67, "learning_rate": 1.452777695093659e-06, "logits/chosen": -2.9661917686462402, "logits/rejected": -2.873295545578003, "logps/chosen": -41.01392364501953, "logps/rejected": -1180.0885009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20941004157066345, "rewards/margins": 11.622716903686523, "rewards/rejected": -11.41330623626709, "step": 11300 }, { "epoch": 0.67, "learning_rate": 1.448054790055817e-06, "logits/chosen": -2.968008041381836, "logits/rejected": -2.863581418991089, "logps/chosen": -62.19414138793945, "logps/rejected": -1132.3837890625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.008876070380210876, "rewards/margins": 10.944957733154297, "rewards/rejected": -10.93608283996582, "step": 11310 }, { "epoch": 0.68, "learning_rate": 1.443336442674208e-06, "logits/chosen": -2.966935634613037, "logits/rejected": -2.8853538036346436, "logps/chosen": -42.041561126708984, "logps/rejected": -1179.4193115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24466416239738464, "rewards/margins": 11.659932136535645, "rewards/rejected": -11.415267944335938, "step": 11320 }, { "epoch": 0.68, "learning_rate": 1.438622673391537e-06, "logits/chosen": -2.965528726577759, "logits/rejected": -2.864677906036377, "logps/chosen": -42.39848709106445, "logps/rejected": -1168.321533203125, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": 0.19765041768550873, "rewards/margins": 11.512903213500977, "rewards/rejected": -11.315252304077148, "step": 11330 }, { "epoch": 0.68, "learning_rate": 1.4339135026306738e-06, "logits/chosen": -2.949399709701538, "logits/rejected": -2.8690638542175293, "logps/chosen": -45.273399353027344, "logps/rejected": -1133.8909912109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21662819385528564, "rewards/margins": 11.17823314666748, "rewards/rejected": -10.961605072021484, "step": 11340 }, { "epoch": 0.68, "learning_rate": 1.4292089507945655e-06, "logits/chosen": -2.97699236869812, "logits/rejected": -2.9021215438842773, "logps/chosen": -47.91228103637695, "logps/rejected": -1116.572998046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19935894012451172, "rewards/margins": 11.006315231323242, "rewards/rejected": -10.806957244873047, "step": 11350 }, { "epoch": 0.68, "learning_rate": 1.424509038266143e-06, "logits/chosen": -2.9339704513549805, "logits/rejected": -2.86683988571167, "logps/chosen": -38.939876556396484, "logps/rejected": -1115.972412109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.21837103366851807, "rewards/margins": 11.0088529586792, "rewards/rejected": -10.790481567382812, "step": 11360 }, { "epoch": 0.68, "learning_rate": 1.4198137854082443e-06, "logits/chosen": -2.9964206218719482, "logits/rejected": -2.8725571632385254, "logps/chosen": -46.660865783691406, "logps/rejected": -1150.775390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21428155899047852, "rewards/margins": 11.351811408996582, "rewards/rejected": -11.137530326843262, "step": 11370 }, { "epoch": 0.68, "learning_rate": 1.4151232125635123e-06, "logits/chosen": -2.9937281608581543, "logits/rejected": -2.8985135555267334, "logps/chosen": -47.461830139160156, "logps/rejected": -1078.79296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20375847816467285, "rewards/margins": 10.602882385253906, "rewards/rejected": -10.399125099182129, "step": 11380 }, { "epoch": 0.68, "learning_rate": 1.4104373400543162e-06, "logits/chosen": -3.0143704414367676, "logits/rejected": -2.9041645526885986, "logps/chosen": -47.059898376464844, "logps/rejected": -1151.7337646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14604613184928894, "rewards/margins": 11.270441055297852, "rewards/rejected": -11.124395370483398, "step": 11390 }, { "epoch": 0.68, "learning_rate": 1.405756188182661e-06, "logits/chosen": -3.0073611736297607, "logits/rejected": -2.9055333137512207, "logps/chosen": -45.010902404785156, "logps/rejected": -1147.753662109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20738156139850616, "rewards/margins": 11.305407524108887, "rewards/rejected": -11.09802532196045, "step": 11400 }, { "epoch": 0.68, "learning_rate": 1.4010797772300972e-06, "logits/chosen": -2.986105442047119, "logits/rejected": -2.8776988983154297, "logps/chosen": -39.50450897216797, "logps/rejected": -1161.011962890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.22133250534534454, "rewards/margins": 11.463359832763672, "rewards/rejected": -11.242026329040527, "step": 11410 }, { "epoch": 0.68, "learning_rate": 1.396408127457637e-06, "logits/chosen": -2.982786178588867, "logits/rejected": -2.8583645820617676, "logps/chosen": -44.070350646972656, "logps/rejected": -1176.3333740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20366895198822021, "rewards/margins": 11.588472366333008, "rewards/rejected": -11.384805679321289, "step": 11420 }, { "epoch": 0.68, "learning_rate": 1.3917412591056623e-06, "logits/chosen": -2.996035099029541, "logits/rejected": -2.8634631633758545, "logps/chosen": -44.008888244628906, "logps/rejected": -1124.511962890625, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": 0.21000048518180847, "rewards/margins": 11.079353332519531, "rewards/rejected": -10.869353294372559, "step": 11430 }, { "epoch": 0.68, "learning_rate": 1.3870791923938408e-06, "logits/chosen": -2.9895644187927246, "logits/rejected": -2.8870601654052734, "logps/chosen": -42.137813568115234, "logps/rejected": -1221.6668701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20080363750457764, "rewards/margins": 12.042611122131348, "rewards/rejected": -11.841806411743164, "step": 11440 }, { "epoch": 0.68, "learning_rate": 1.3824219475210337e-06, "logits/chosen": -2.9699034690856934, "logits/rejected": -2.8661084175109863, "logps/chosen": -43.113285064697266, "logps/rejected": -1180.036376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20199063420295715, "rewards/margins": 11.63547420501709, "rewards/rejected": -11.433484077453613, "step": 11450 }, { "epoch": 0.68, "learning_rate": 1.3777695446652167e-06, "logits/chosen": -2.9958674907684326, "logits/rejected": -2.8910489082336426, "logps/chosen": -51.863258361816406, "logps/rejected": -1180.840087890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14342352747917175, "rewards/margins": 11.568155288696289, "rewards/rejected": -11.42473316192627, "step": 11460 }, { "epoch": 0.68, "learning_rate": 1.3731220039833798e-06, "logits/chosen": -2.971407175064087, "logits/rejected": -2.8658719062805176, "logps/chosen": -50.532127380371094, "logps/rejected": -1185.2158203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.17759762704372406, "rewards/margins": 11.6591796875, "rewards/rejected": -11.481581687927246, "step": 11470 }, { "epoch": 0.68, "learning_rate": 1.3684793456114526e-06, "logits/chosen": -2.985055446624756, "logits/rejected": -2.870612859725952, "logps/chosen": -44.08103942871094, "logps/rejected": -1208.422607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.19478361308574677, "rewards/margins": 11.8945894241333, "rewards/rejected": -11.699808120727539, "step": 11480 }, { "epoch": 0.69, "learning_rate": 1.3638415896642093e-06, "logits/chosen": -2.9767301082611084, "logits/rejected": -2.8622469902038574, "logps/chosen": -51.73183059692383, "logps/rejected": -1152.476318359375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 0.10901723802089691, "rewards/margins": 11.247450828552246, "rewards/rejected": -11.138433456420898, "step": 11490 }, { "epoch": 0.69, "learning_rate": 1.359208756235184e-06, "logits/chosen": -2.9875614643096924, "logits/rejected": -2.8916451930999756, "logps/chosen": -42.525455474853516, "logps/rejected": -1202.492431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2360583245754242, "rewards/margins": 11.874776840209961, "rewards/rejected": -11.63871955871582, "step": 11500 }, { "epoch": 0.69, "learning_rate": 1.3545808653965847e-06, "logits/chosen": -2.953772783279419, "logits/rejected": -2.898655891418457, "logps/chosen": -38.51740646362305, "logps/rejected": -1151.892822265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2351156771183014, "rewards/margins": 11.357824325561523, "rewards/rejected": -11.122709274291992, "step": 11510 }, { "epoch": 0.69, "learning_rate": 1.349957937199204e-06, "logits/chosen": -2.993791341781616, "logits/rejected": -2.9190802574157715, "logps/chosen": -54.783912658691406, "logps/rejected": -1131.146240234375, "loss": 0.0141, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.10358577966690063, "rewards/margins": 11.025497436523438, "rewards/rejected": -10.921911239624023, "step": 11520 }, { "epoch": 0.69, "learning_rate": 1.3453399916723343e-06, "logits/chosen": -2.970876455307007, "logits/rejected": -2.8284363746643066, "logps/chosen": -43.61065673828125, "logps/rejected": -1148.99462890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.20133164525032043, "rewards/margins": 11.320257186889648, "rewards/rejected": -11.118927001953125, "step": 11530 }, { "epoch": 0.69, "learning_rate": 1.3407270488236769e-06, "logits/chosen": -2.9991328716278076, "logits/rejected": -2.8960061073303223, "logps/chosen": -44.195091247558594, "logps/rejected": -1171.778076171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24960783123970032, "rewards/margins": 11.57624626159668, "rewards/rejected": -11.326639175415039, "step": 11540 }, { "epoch": 0.69, "learning_rate": 1.3361191286392644e-06, "logits/chosen": -3.0043954849243164, "logits/rejected": -2.8925528526306152, "logps/chosen": -34.86273193359375, "logps/rejected": -1226.934814453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2291848361492157, "rewards/margins": 12.115181922912598, "rewards/rejected": -11.88599681854248, "step": 11550 }, { "epoch": 0.69, "learning_rate": 1.3315162510833623e-06, "logits/chosen": -2.994278907775879, "logits/rejected": -2.924050807952881, "logps/chosen": -35.32712173461914, "logps/rejected": -1202.3531494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25929147005081177, "rewards/margins": 11.910421371459961, "rewards/rejected": -11.651130676269531, "step": 11560 }, { "epoch": 0.69, "learning_rate": 1.3269184360983919e-06, "logits/chosen": -2.973792314529419, "logits/rejected": -2.880009174346924, "logps/chosen": -43.15287399291992, "logps/rejected": -1138.9202880859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20293202996253967, "rewards/margins": 11.211197853088379, "rewards/rejected": -11.008264541625977, "step": 11570 }, { "epoch": 0.69, "learning_rate": 1.3223257036048395e-06, "logits/chosen": -2.9948344230651855, "logits/rejected": -2.8692779541015625, "logps/chosen": -47.26508712768555, "logps/rejected": -1176.089599609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23711225390434265, "rewards/margins": 11.619583129882812, "rewards/rejected": -11.382472038269043, "step": 11580 }, { "epoch": 0.69, "learning_rate": 1.3177380735011714e-06, "logits/chosen": -2.936007261276245, "logits/rejected": -2.842250347137451, "logps/chosen": -43.95637130737305, "logps/rejected": -1202.848388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2281177043914795, "rewards/margins": 11.879964828491211, "rewards/rejected": -11.651845932006836, "step": 11590 }, { "epoch": 0.69, "learning_rate": 1.3131555656637459e-06, "logits/chosen": -2.951169967651367, "logits/rejected": -2.8458704948425293, "logps/chosen": -43.742210388183594, "logps/rejected": -1114.5655517578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2354658842086792, "rewards/margins": 11.0015230178833, "rewards/rejected": -10.766057014465332, "step": 11600 }, { "epoch": 0.69, "learning_rate": 1.3085781999467303e-06, "logits/chosen": -2.9666428565979004, "logits/rejected": -2.8865151405334473, "logps/chosen": -46.862037658691406, "logps/rejected": -1195.6497802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20852677524089813, "rewards/margins": 11.792407035827637, "rewards/rejected": -11.583879470825195, "step": 11610 }, { "epoch": 0.69, "learning_rate": 1.3040059961820135e-06, "logits/chosen": -2.9621939659118652, "logits/rejected": -2.8957417011260986, "logps/chosen": -40.90565872192383, "logps/rejected": -1197.643798828125, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": 0.24161338806152344, "rewards/margins": 11.858468055725098, "rewards/rejected": -11.61685562133789, "step": 11620 }, { "epoch": 0.69, "learning_rate": 1.2994389741791152e-06, "logits/chosen": -2.9473018646240234, "logits/rejected": -2.862879514694214, "logps/chosen": -39.148677825927734, "logps/rejected": -1157.0625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22594675421714783, "rewards/margins": 11.427282333374023, "rewards/rejected": -11.201335906982422, "step": 11630 }, { "epoch": 0.69, "learning_rate": 1.294877153725112e-06, "logits/chosen": -2.9707465171813965, "logits/rejected": -2.89689302444458, "logps/chosen": -40.21677780151367, "logps/rejected": -1164.2886962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2257329225540161, "rewards/margins": 11.487849235534668, "rewards/rejected": -11.262115478515625, "step": 11640 }, { "epoch": 0.69, "learning_rate": 1.2903205545845378e-06, "logits/chosen": -2.953613519668579, "logits/rejected": -2.8798699378967285, "logps/chosen": -40.79751968383789, "logps/rejected": -1132.746826171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21749050915241241, "rewards/margins": 11.170268058776855, "rewards/rejected": -10.952778816223145, "step": 11650 }, { "epoch": 0.7, "learning_rate": 1.285769196499308e-06, "logits/chosen": -2.983457088470459, "logits/rejected": -2.8922226428985596, "logps/chosen": -47.60618591308594, "logps/rejected": -1182.3720703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.20825329422950745, "rewards/margins": 11.651674270629883, "rewards/rejected": -11.44342041015625, "step": 11660 }, { "epoch": 0.7, "learning_rate": 1.28122309918863e-06, "logits/chosen": -2.964012622833252, "logits/rejected": -2.864854335784912, "logps/chosen": -45.7902717590332, "logps/rejected": -1165.3603515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21136493980884552, "rewards/margins": 11.479959487915039, "rewards/rejected": -11.268595695495605, "step": 11670 }, { "epoch": 0.7, "learning_rate": 1.2766822823489175e-06, "logits/chosen": -2.9518961906433105, "logits/rejected": -2.8550055027008057, "logps/chosen": -46.69945526123047, "logps/rejected": -1168.0201416015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19960328936576843, "rewards/margins": 11.499316215515137, "rewards/rejected": -11.299712181091309, "step": 11680 }, { "epoch": 0.7, "learning_rate": 1.2721467656537074e-06, "logits/chosen": -2.9930520057678223, "logits/rejected": -2.8923087120056152, "logps/chosen": -40.81746292114258, "logps/rejected": -1168.647705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23054301738739014, "rewards/margins": 11.525400161743164, "rewards/rejected": -11.2948579788208, "step": 11690 }, { "epoch": 0.7, "learning_rate": 1.2676165687535719e-06, "logits/chosen": -2.9788079261779785, "logits/rejected": -2.9137725830078125, "logps/chosen": -43.557411193847656, "logps/rejected": -1152.1654052734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.2078150510787964, "rewards/margins": 11.346084594726562, "rewards/rejected": -11.13826847076416, "step": 11700 }, { "epoch": 0.7, "learning_rate": 1.2630917112760365e-06, "logits/chosen": -2.978635311126709, "logits/rejected": -2.8734307289123535, "logps/chosen": -43.14468002319336, "logps/rejected": -1127.475341796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1887015402317047, "rewards/margins": 11.078083038330078, "rewards/rejected": -10.889381408691406, "step": 11710 }, { "epoch": 0.7, "learning_rate": 1.2585722128254896e-06, "logits/chosen": -2.965806007385254, "logits/rejected": -2.8903889656066895, "logps/chosen": -49.46418380737305, "logps/rejected": -1115.8258056640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16491682827472687, "rewards/margins": 10.95075798034668, "rewards/rejected": -10.78584098815918, "step": 11720 }, { "epoch": 0.7, "learning_rate": 1.2540580929831065e-06, "logits/chosen": -2.9566237926483154, "logits/rejected": -2.851381778717041, "logps/chosen": -44.47636413574219, "logps/rejected": -1181.746826171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.23251020908355713, "rewards/margins": 11.655981063842773, "rewards/rejected": -11.423470497131348, "step": 11730 }, { "epoch": 0.7, "learning_rate": 1.249549371306753e-06, "logits/chosen": -2.9782683849334717, "logits/rejected": -2.8658106327056885, "logps/chosen": -53.157325744628906, "logps/rejected": -1119.99609375, "loss": 0.0162, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.13558395206928253, "rewards/margins": 10.949732780456543, "rewards/rejected": -10.814149856567383, "step": 11740 }, { "epoch": 0.7, "learning_rate": 1.2450460673309115e-06, "logits/chosen": -2.9673807621002197, "logits/rejected": -2.8922605514526367, "logps/chosen": -36.957366943359375, "logps/rejected": -1161.579345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23830199241638184, "rewards/margins": 11.47529125213623, "rewards/rejected": -11.236989974975586, "step": 11750 }, { "epoch": 0.7, "learning_rate": 1.2405482005665894e-06, "logits/chosen": -2.9570083618164062, "logits/rejected": -2.8619301319122314, "logps/chosen": -65.73750305175781, "logps/rejected": -1157.44287109375, "loss": 0.0932, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.001805050647817552, "rewards/margins": 11.19676399230957, "rewards/rejected": -11.194957733154297, "step": 11760 }, { "epoch": 0.7, "learning_rate": 1.236055790501238e-06, "logits/chosen": -2.9959282875061035, "logits/rejected": -2.8633198738098145, "logps/chosen": -42.213966369628906, "logps/rejected": -1140.922119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23485954105854034, "rewards/margins": 11.270306587219238, "rewards/rejected": -11.035447120666504, "step": 11770 }, { "epoch": 0.7, "learning_rate": 1.231568856598666e-06, "logits/chosen": -2.99499249458313, "logits/rejected": -2.9011573791503906, "logps/chosen": -38.46477508544922, "logps/rejected": -1167.1292724609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.24254314601421356, "rewards/margins": 11.522289276123047, "rewards/rejected": -11.279746055603027, "step": 11780 }, { "epoch": 0.7, "learning_rate": 1.2270874182989566e-06, "logits/chosen": -2.9618537425994873, "logits/rejected": -2.861665725708008, "logps/chosen": -34.01465606689453, "logps/rejected": -1130.404296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23580500483512878, "rewards/margins": 11.167341232299805, "rewards/rejected": -10.931537628173828, "step": 11790 }, { "epoch": 0.7, "learning_rate": 1.2226114950183836e-06, "logits/chosen": -2.9822356700897217, "logits/rejected": -2.9043755531311035, "logps/chosen": -39.431556701660156, "logps/rejected": -1137.254150390625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.22944872081279755, "rewards/margins": 11.2265625, "rewards/rejected": -10.997115135192871, "step": 11800 }, { "epoch": 0.7, "learning_rate": 1.2181411061493229e-06, "logits/chosen": -2.9859983921051025, "logits/rejected": -2.8685221672058105, "logps/chosen": -36.24113845825195, "logps/rejected": -1121.3841552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24744531512260437, "rewards/margins": 11.076356887817383, "rewards/rejected": -10.828911781311035, "step": 11810 }, { "epoch": 0.7, "learning_rate": 1.213676271060178e-06, "logits/chosen": -2.983349084854126, "logits/rejected": -2.8832027912139893, "logps/chosen": -36.741004943847656, "logps/rejected": -1174.3651123046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27064797282218933, "rewards/margins": 11.636423110961914, "rewards/rejected": -11.365774154663086, "step": 11820 }, { "epoch": 0.71, "learning_rate": 1.2092170090952838e-06, "logits/chosen": -2.9929051399230957, "logits/rejected": -2.9086177349090576, "logps/chosen": -40.043487548828125, "logps/rejected": -1189.5416259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26555031538009644, "rewards/margins": 11.775030136108398, "rewards/rejected": -11.509480476379395, "step": 11830 }, { "epoch": 0.71, "learning_rate": 1.204763339574833e-06, "logits/chosen": -2.967975616455078, "logits/rejected": -2.8885464668273926, "logps/chosen": -36.68044662475586, "logps/rejected": -1184.0504150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26540905237197876, "rewards/margins": 11.73690128326416, "rewards/rejected": -11.471492767333984, "step": 11840 }, { "epoch": 0.71, "learning_rate": 1.2003152817947878e-06, "logits/chosen": -2.95615816116333, "logits/rejected": -2.861989974975586, "logps/chosen": -46.40386962890625, "logps/rejected": -1108.5872802734375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 0.1844225823879242, "rewards/margins": 10.896265983581543, "rewards/rejected": -10.711844444274902, "step": 11850 }, { "epoch": 0.71, "learning_rate": 1.1958728550267958e-06, "logits/chosen": -2.9726595878601074, "logits/rejected": -2.8892173767089844, "logps/chosen": -53.17555618286133, "logps/rejected": -1101.4166259765625, "loss": 0.0111, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.1202959269285202, "rewards/margins": 10.761265754699707, "rewards/rejected": -10.640970230102539, "step": 11860 }, { "epoch": 0.71, "learning_rate": 1.1914360785181099e-06, "logits/chosen": -2.986640214920044, "logits/rejected": -2.869215726852417, "logps/chosen": -35.662235260009766, "logps/rejected": -1174.603271484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2559961676597595, "rewards/margins": 11.610061645507812, "rewards/rejected": -11.354065895080566, "step": 11870 }, { "epoch": 0.71, "learning_rate": 1.1870049714915e-06, "logits/chosen": -3.002635955810547, "logits/rejected": -2.911438226699829, "logps/chosen": -40.895050048828125, "logps/rejected": -1160.8365478515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2687997817993164, "rewards/margins": 11.509298324584961, "rewards/rejected": -11.240498542785645, "step": 11880 }, { "epoch": 0.71, "learning_rate": 1.182579553145175e-06, "logits/chosen": -2.969472885131836, "logits/rejected": -2.8841958045959473, "logps/chosen": -39.07511901855469, "logps/rejected": -1189.1009521484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2644314765930176, "rewards/margins": 11.773249626159668, "rewards/rejected": -11.508817672729492, "step": 11890 }, { "epoch": 0.71, "learning_rate": 1.1781598426526935e-06, "logits/chosen": -2.954538106918335, "logits/rejected": -2.910521984100342, "logps/chosen": -32.152137756347656, "logps/rejected": -1162.2843017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22496478259563446, "rewards/margins": 11.45982551574707, "rewards/rejected": -11.234861373901367, "step": 11900 }, { "epoch": 0.71, "learning_rate": 1.1737458591628898e-06, "logits/chosen": -2.987802028656006, "logits/rejected": -2.891878366470337, "logps/chosen": -40.500606536865234, "logps/rejected": -1174.0113525390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28148946166038513, "rewards/margins": 11.641619682312012, "rewards/rejected": -11.360130310058594, "step": 11910 }, { "epoch": 0.71, "learning_rate": 1.1693376217997795e-06, "logits/chosen": -2.987597942352295, "logits/rejected": -2.8672566413879395, "logps/chosen": -35.482696533203125, "logps/rejected": -1139.1376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27935850620269775, "rewards/margins": 11.298341751098633, "rewards/rejected": -11.018982887268066, "step": 11920 }, { "epoch": 0.71, "learning_rate": 1.164935149662485e-06, "logits/chosen": -2.9540762901306152, "logits/rejected": -2.881880283355713, "logps/chosen": -35.51407241821289, "logps/rejected": -1102.9366455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26043421030044556, "rewards/margins": 10.912699699401855, "rewards/rejected": -10.652265548706055, "step": 11930 }, { "epoch": 0.71, "learning_rate": 1.1605384618251533e-06, "logits/chosen": -2.993917465209961, "logits/rejected": -2.8904967308044434, "logps/chosen": -42.25178909301758, "logps/rejected": -1119.0673828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.24596843123435974, "rewards/margins": 11.062363624572754, "rewards/rejected": -10.81639575958252, "step": 11940 }, { "epoch": 0.71, "learning_rate": 1.156147577336865e-06, "logits/chosen": -2.9982552528381348, "logits/rejected": -2.876122236251831, "logps/chosen": -36.24424743652344, "logps/rejected": -1173.70263671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.23458552360534668, "rewards/margins": 11.603422164916992, "rewards/rejected": -11.368837356567383, "step": 11950 }, { "epoch": 0.71, "learning_rate": 1.1517625152215603e-06, "logits/chosen": -2.972898006439209, "logits/rejected": -2.889772891998291, "logps/chosen": -47.55752944946289, "logps/rejected": -1176.8929443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22611713409423828, "rewards/margins": 11.608799934387207, "rewards/rejected": -11.382681846618652, "step": 11960 }, { "epoch": 0.71, "learning_rate": 1.1473832944779525e-06, "logits/chosen": -2.972881317138672, "logits/rejected": -2.8689887523651123, "logps/chosen": -35.20206832885742, "logps/rejected": -1175.5858154296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2866462469100952, "rewards/margins": 11.654053688049316, "rewards/rejected": -11.367406845092773, "step": 11970 }, { "epoch": 0.71, "learning_rate": 1.1430099340794482e-06, "logits/chosen": -2.961158037185669, "logits/rejected": -2.865914821624756, "logps/chosen": -37.80493927001953, "logps/rejected": -1183.5911865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22775602340698242, "rewards/margins": 11.68901538848877, "rewards/rejected": -11.461259841918945, "step": 11980 }, { "epoch": 0.71, "learning_rate": 1.138642452974059e-06, "logits/chosen": -2.9787769317626953, "logits/rejected": -2.880988359451294, "logps/chosen": -35.887550354003906, "logps/rejected": -1193.4393310546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24525102972984314, "rewards/margins": 11.80025577545166, "rewards/rejected": -11.555005073547363, "step": 11990 }, { "epoch": 0.72, "learning_rate": 1.1342808700843297e-06, "logits/chosen": -2.9695677757263184, "logits/rejected": -2.869367837905884, "logps/chosen": -38.45390701293945, "logps/rejected": -1159.9268798828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26684877276420593, "rewards/margins": 11.494234085083008, "rewards/rejected": -11.227384567260742, "step": 12000 }, { "epoch": 0.72, "learning_rate": 1.1299252043072478e-06, "logits/chosen": -2.9908947944641113, "logits/rejected": -2.8914685249328613, "logps/chosen": -33.186180114746094, "logps/rejected": -1194.9954833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2740139067173004, "rewards/margins": 11.848718643188477, "rewards/rejected": -11.574705123901367, "step": 12010 }, { "epoch": 0.72, "learning_rate": 1.1255754745141617e-06, "logits/chosen": -2.9684338569641113, "logits/rejected": -2.893120288848877, "logps/chosen": -36.8491325378418, "logps/rejected": -1207.61474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24972519278526306, "rewards/margins": 11.953694343566895, "rewards/rejected": -11.703967094421387, "step": 12020 }, { "epoch": 0.72, "learning_rate": 1.1212316995507079e-06, "logits/chosen": -2.9929397106170654, "logits/rejected": -2.896528482437134, "logps/chosen": -46.448753356933594, "logps/rejected": -1172.0018310546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25816696882247925, "rewards/margins": 11.611831665039062, "rewards/rejected": -11.35366439819336, "step": 12030 }, { "epoch": 0.72, "learning_rate": 1.1168938982367162e-06, "logits/chosen": -2.9523792266845703, "logits/rejected": -2.879960298538208, "logps/chosen": -36.3621940612793, "logps/rejected": -1170.0487060546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2707396447658539, "rewards/margins": 11.593080520629883, "rewards/rejected": -11.32234001159668, "step": 12040 }, { "epoch": 0.72, "learning_rate": 1.112562089366139e-06, "logits/chosen": -2.936981439590454, "logits/rejected": -2.853071451187134, "logps/chosen": -38.08700180053711, "logps/rejected": -1211.254150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27414631843566895, "rewards/margins": 12.0153169631958, "rewards/rejected": -11.741170883178711, "step": 12050 }, { "epoch": 0.72, "learning_rate": 1.108236291706965e-06, "logits/chosen": -2.9489123821258545, "logits/rejected": -2.86153244972229, "logps/chosen": -36.734130859375, "logps/rejected": -1115.458740234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.25665873289108276, "rewards/margins": 11.046777725219727, "rewards/rejected": -10.790118217468262, "step": 12060 }, { "epoch": 0.72, "learning_rate": 1.1039165240011388e-06, "logits/chosen": -2.983938217163086, "logits/rejected": -2.8762736320495605, "logps/chosen": -37.92504119873047, "logps/rejected": -1147.318603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26469841599464417, "rewards/margins": 11.352068901062012, "rewards/rejected": -11.087371826171875, "step": 12070 }, { "epoch": 0.72, "learning_rate": 1.0996028049644792e-06, "logits/chosen": -2.958718776702881, "logits/rejected": -2.8768887519836426, "logps/chosen": -41.51806640625, "logps/rejected": -1118.773681640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.1928490549325943, "rewards/margins": 11.01382827758789, "rewards/rejected": -10.820979118347168, "step": 12080 }, { "epoch": 0.72, "learning_rate": 1.095295153286599e-06, "logits/chosen": -2.9928250312805176, "logits/rejected": -2.903658390045166, "logps/chosen": -40.29800796508789, "logps/rejected": -1130.06005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2313009798526764, "rewards/margins": 11.176233291625977, "rewards/rejected": -10.944931983947754, "step": 12090 }, { "epoch": 0.72, "learning_rate": 1.090993587630824e-06, "logits/chosen": -2.9578380584716797, "logits/rejected": -2.839456558227539, "logps/chosen": -49.56802749633789, "logps/rejected": -1135.094970703125, "loss": 0.0257, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.17364776134490967, "rewards/margins": 11.150922775268555, "rewards/rejected": -10.977275848388672, "step": 12100 }, { "epoch": 0.72, "learning_rate": 1.0866981266341084e-06, "logits/chosen": -2.9836795330047607, "logits/rejected": -2.885863780975342, "logps/chosen": -47.33232879638672, "logps/rejected": -1188.791748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.293260395526886, "rewards/margins": 11.799732208251953, "rewards/rejected": -11.506471633911133, "step": 12110 }, { "epoch": 0.72, "learning_rate": 1.082408788906964e-06, "logits/chosen": -2.9591565132141113, "logits/rejected": -2.8527989387512207, "logps/chosen": -34.33154296875, "logps/rejected": -1214.028564453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.26425081491470337, "rewards/margins": 12.034966468811035, "rewards/rejected": -11.770715713500977, "step": 12120 }, { "epoch": 0.72, "learning_rate": 1.078125593033366e-06, "logits/chosen": -2.959596633911133, "logits/rejected": -2.8661646842956543, "logps/chosen": -40.44380187988281, "logps/rejected": -1169.208740234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2741473913192749, "rewards/margins": 11.580734252929688, "rewards/rejected": -11.306586265563965, "step": 12130 }, { "epoch": 0.72, "learning_rate": 1.0738485575706834e-06, "logits/chosen": -2.9857029914855957, "logits/rejected": -2.9023795127868652, "logps/chosen": -40.11229705810547, "logps/rejected": -1188.66796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27432578802108765, "rewards/margins": 11.786205291748047, "rewards/rejected": -11.511880874633789, "step": 12140 }, { "epoch": 0.72, "learning_rate": 1.0695777010495936e-06, "logits/chosen": -2.9798636436462402, "logits/rejected": -2.8890364170074463, "logps/chosen": -54.93559646606445, "logps/rejected": -1142.947998046875, "loss": 0.0147, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.10827766358852386, "rewards/margins": 11.16468334197998, "rewards/rejected": -11.05640697479248, "step": 12150 }, { "epoch": 0.73, "learning_rate": 1.065313041974003e-06, "logits/chosen": -2.959062099456787, "logits/rejected": -2.861224889755249, "logps/chosen": -36.75259017944336, "logps/rejected": -1184.8089599609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2764037251472473, "rewards/margins": 11.743910789489746, "rewards/rejected": -11.467508316040039, "step": 12160 }, { "epoch": 0.73, "learning_rate": 1.0610545988209671e-06, "logits/chosen": -3.025331974029541, "logits/rejected": -2.9071688652038574, "logps/chosen": -37.47315216064453, "logps/rejected": -1195.7047119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2393031120300293, "rewards/margins": 11.817562103271484, "rewards/rejected": -11.57826042175293, "step": 12170 }, { "epoch": 0.73, "learning_rate": 1.0568023900406108e-06, "logits/chosen": -2.976341962814331, "logits/rejected": -2.857032060623169, "logps/chosen": -44.40494918823242, "logps/rejected": -1195.4232177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25683727860450745, "rewards/margins": 11.834452629089355, "rewards/rejected": -11.577615737915039, "step": 12180 }, { "epoch": 0.73, "learning_rate": 1.0525564340560476e-06, "logits/chosen": -2.952303171157837, "logits/rejected": -2.851487636566162, "logps/chosen": -34.995338439941406, "logps/rejected": -1194.9796142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2309255301952362, "rewards/margins": 11.801692962646484, "rewards/rejected": -11.570768356323242, "step": 12190 }, { "epoch": 0.73, "learning_rate": 1.048316749263298e-06, "logits/chosen": -2.9381628036499023, "logits/rejected": -2.8619794845581055, "logps/chosen": -37.24252700805664, "logps/rejected": -1169.7255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23713581264019012, "rewards/margins": 11.553815841674805, "rewards/rejected": -11.316679000854492, "step": 12200 }, { "epoch": 0.73, "learning_rate": 1.044083354031217e-06, "logits/chosen": -2.967029094696045, "logits/rejected": -2.893406867980957, "logps/chosen": -47.820701599121094, "logps/rejected": -1085.49609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.18313643336296082, "rewards/margins": 10.656448364257812, "rewards/rejected": -10.473312377929688, "step": 12210 }, { "epoch": 0.73, "learning_rate": 1.039856266701404e-06, "logits/chosen": -2.9632041454315186, "logits/rejected": -2.857893228530884, "logps/chosen": -43.17525100708008, "logps/rejected": -1159.887939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22063703835010529, "rewards/margins": 11.443717002868652, "rewards/rejected": -11.223078727722168, "step": 12220 }, { "epoch": 0.73, "learning_rate": 1.035635505588132e-06, "logits/chosen": -2.971557855606079, "logits/rejected": -2.877236843109131, "logps/chosen": -32.1794548034668, "logps/rejected": -1173.7420654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2600270211696625, "rewards/margins": 11.618906021118164, "rewards/rejected": -11.358879089355469, "step": 12230 }, { "epoch": 0.73, "learning_rate": 1.0314210889782642e-06, "logits/chosen": -2.9517247676849365, "logits/rejected": -2.859025239944458, "logps/chosen": -32.33240509033203, "logps/rejected": -1176.85107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28396862745285034, "rewards/margins": 11.674752235412598, "rewards/rejected": -11.390782356262207, "step": 12240 }, { "epoch": 0.73, "learning_rate": 1.0272130351311758e-06, "logits/chosen": -2.9752228260040283, "logits/rejected": -2.9064173698425293, "logps/chosen": -41.53142547607422, "logps/rejected": -1233.6319580078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24977794289588928, "rewards/margins": 12.196327209472656, "rewards/rejected": -11.946549415588379, "step": 12250 }, { "epoch": 0.73, "learning_rate": 1.0230113622786744e-06, "logits/chosen": -2.97226881980896, "logits/rejected": -2.9231579303741455, "logps/chosen": -40.39386749267578, "logps/rejected": -1179.3980712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24653077125549316, "rewards/margins": 11.675973892211914, "rewards/rejected": -11.429443359375, "step": 12260 }, { "epoch": 0.73, "learning_rate": 1.0188160886249219e-06, "logits/chosen": -2.985018253326416, "logits/rejected": -2.8760008811950684, "logps/chosen": -41.23345947265625, "logps/rejected": -1195.0472412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24275727570056915, "rewards/margins": 11.821584701538086, "rewards/rejected": -11.578826904296875, "step": 12270 }, { "epoch": 0.73, "learning_rate": 1.0146272323463548e-06, "logits/chosen": -2.967759847640991, "logits/rejected": -2.865983724594116, "logps/chosen": -38.503990173339844, "logps/rejected": -1218.6331787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.263762891292572, "rewards/margins": 12.059723854064941, "rewards/rejected": -11.795961380004883, "step": 12280 }, { "epoch": 0.73, "learning_rate": 1.0104448115916035e-06, "logits/chosen": -2.9755725860595703, "logits/rejected": -2.876980781555176, "logps/chosen": -38.954078674316406, "logps/rejected": -1194.80126953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.25288230180740356, "rewards/margins": 11.831363677978516, "rewards/rejected": -11.578481674194336, "step": 12290 }, { "epoch": 0.73, "learning_rate": 1.0062688444814208e-06, "logits/chosen": -2.971505641937256, "logits/rejected": -2.8939414024353027, "logps/chosen": -60.00901412963867, "logps/rejected": -1127.25439453125, "loss": 0.0484, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.06358586251735687, "rewards/margins": 10.968308448791504, "rewards/rejected": -10.904722213745117, "step": 12300 }, { "epoch": 0.73, "learning_rate": 1.0020993491085936e-06, "logits/chosen": -2.96516752243042, "logits/rejected": -2.8684802055358887, "logps/chosen": -38.481597900390625, "logps/rejected": -1178.405517578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26165342330932617, "rewards/margins": 11.663217544555664, "rewards/rejected": -11.40156364440918, "step": 12310 }, { "epoch": 0.73, "learning_rate": 9.979363435378717e-07, "logits/chosen": -2.9567415714263916, "logits/rejected": -2.878603458404541, "logps/chosen": -38.567501068115234, "logps/rejected": -1129.0992431640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.23991826176643372, "rewards/margins": 11.158260345458984, "rewards/rejected": -10.918341636657715, "step": 12320 }, { "epoch": 0.74, "learning_rate": 9.937798458058864e-07, "logits/chosen": -2.947622776031494, "logits/rejected": -2.8692736625671387, "logps/chosen": -37.45185089111328, "logps/rejected": -1107.591064453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.24202676117420197, "rewards/margins": 10.94930648803711, "rewards/rejected": -10.707279205322266, "step": 12330 }, { "epoch": 0.74, "learning_rate": 9.896298739210745e-07, "logits/chosen": -2.986729383468628, "logits/rejected": -2.9139909744262695, "logps/chosen": -38.77259063720703, "logps/rejected": -1144.2406005859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.23104842007160187, "rewards/margins": 11.291156768798828, "rewards/rejected": -11.06010913848877, "step": 12340 }, { "epoch": 0.74, "learning_rate": 9.85486445863597e-07, "logits/chosen": -2.9922842979431152, "logits/rejected": -2.8733534812927246, "logps/chosen": -34.517311096191406, "logps/rejected": -1187.5609130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21110956370830536, "rewards/margins": 11.713881492614746, "rewards/rejected": -11.502772331237793, "step": 12350 }, { "epoch": 0.74, "learning_rate": 9.813495795852646e-07, "logits/chosen": -2.950638771057129, "logits/rejected": -2.870116710662842, "logps/chosen": -36.972991943359375, "logps/rejected": -1166.299072265625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 0.24014706909656525, "rewards/margins": 11.522899627685547, "rewards/rejected": -11.282752990722656, "step": 12360 }, { "epoch": 0.74, "learning_rate": 9.772192930094588e-07, "logits/chosen": -2.955618381500244, "logits/rejected": -2.840933322906494, "logps/chosen": -37.2730598449707, "logps/rejected": -1219.2705078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2568295896053314, "rewards/margins": 12.07409381866455, "rewards/rejected": -11.817262649536133, "step": 12370 }, { "epoch": 0.74, "learning_rate": 9.730956040310499e-07, "logits/chosen": -2.991342544555664, "logits/rejected": -2.9050543308258057, "logps/chosen": -48.30164337158203, "logps/rejected": -1139.393798828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.17863276600837708, "rewards/margins": 11.196471214294434, "rewards/rejected": -11.017838478088379, "step": 12380 }, { "epoch": 0.74, "learning_rate": 9.689785305163307e-07, "logits/chosen": -2.9603588581085205, "logits/rejected": -2.850473403930664, "logps/chosen": -37.47677230834961, "logps/rejected": -1137.758544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28734517097473145, "rewards/margins": 11.289468765258789, "rewards/rejected": -11.002123832702637, "step": 12390 }, { "epoch": 0.74, "learning_rate": 9.648680903029245e-07, "logits/chosen": -2.978358745574951, "logits/rejected": -2.896768093109131, "logps/chosen": -54.629180908203125, "logps/rejected": -1162.615478515625, "loss": 0.0428, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.11209311336278915, "rewards/margins": 11.363478660583496, "rewards/rejected": -11.251385688781738, "step": 12400 }, { "epoch": 0.74, "learning_rate": 9.607643011997195e-07, "logits/chosen": -2.9816641807556152, "logits/rejected": -2.88753080368042, "logps/chosen": -40.10388946533203, "logps/rejected": -1183.19873046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2487632781267166, "rewards/margins": 11.692834854125977, "rewards/rejected": -11.444070816040039, "step": 12410 }, { "epoch": 0.74, "learning_rate": 9.566671809867864e-07, "logits/chosen": -2.984673261642456, "logits/rejected": -2.9047646522521973, "logps/chosen": -40.03968811035156, "logps/rejected": -1180.5390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2549511194229126, "rewards/margins": 11.680717468261719, "rewards/rejected": -11.425766944885254, "step": 12420 }, { "epoch": 0.74, "learning_rate": 9.52576747415302e-07, "logits/chosen": -2.9844226837158203, "logits/rejected": -2.9337713718414307, "logps/chosen": -39.459266662597656, "logps/rejected": -1177.5567626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26081061363220215, "rewards/margins": 11.652107238769531, "rewards/rejected": -11.39129638671875, "step": 12430 }, { "epoch": 0.74, "learning_rate": 9.484930182074722e-07, "logits/chosen": -2.9697678089141846, "logits/rejected": -2.873410224914551, "logps/chosen": -37.36946105957031, "logps/rejected": -1167.4879150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27488023042678833, "rewards/margins": 11.567070960998535, "rewards/rejected": -11.292190551757812, "step": 12440 }, { "epoch": 0.74, "learning_rate": 9.444160110564563e-07, "logits/chosen": -2.9714651107788086, "logits/rejected": -2.875026226043701, "logps/chosen": -40.845741271972656, "logps/rejected": -1109.27587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.27259570360183716, "rewards/margins": 10.985481262207031, "rewards/rejected": -10.712884902954102, "step": 12450 }, { "epoch": 0.74, "learning_rate": 9.403457436262906e-07, "logits/chosen": -2.968984842300415, "logits/rejected": -2.896516799926758, "logps/chosen": -38.262672424316406, "logps/rejected": -1155.863037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23486852645874023, "rewards/margins": 11.420372009277344, "rewards/rejected": -11.185503959655762, "step": 12460 }, { "epoch": 0.74, "learning_rate": 9.362822335518062e-07, "logits/chosen": -2.9503836631774902, "logits/rejected": -2.839237689971924, "logps/chosen": -48.15040969848633, "logps/rejected": -1170.212158203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21227622032165527, "rewards/margins": 11.537359237670898, "rewards/rejected": -11.325082778930664, "step": 12470 }, { "epoch": 0.74, "learning_rate": 9.322254984385651e-07, "logits/chosen": -2.9740242958068848, "logits/rejected": -2.897141218185425, "logps/chosen": -33.915748596191406, "logps/rejected": -1179.763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2372654676437378, "rewards/margins": 11.657106399536133, "rewards/rejected": -11.419840812683105, "step": 12480 }, { "epoch": 0.74, "learning_rate": 9.281755558627686e-07, "logits/chosen": -2.9605021476745605, "logits/rejected": -2.8578908443450928, "logps/chosen": -41.305763244628906, "logps/rejected": -1168.185302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2609034478664398, "rewards/margins": 11.571598052978516, "rewards/rejected": -11.310694694519043, "step": 12490 }, { "epoch": 0.75, "learning_rate": 9.241324233711929e-07, "logits/chosen": -2.951767683029175, "logits/rejected": -2.871333599090576, "logps/chosen": -30.266916275024414, "logps/rejected": -1190.376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2597092390060425, "rewards/margins": 11.788762092590332, "rewards/rejected": -11.529052734375, "step": 12500 }, { "epoch": 0.75, "learning_rate": 9.200961184811075e-07, "logits/chosen": -2.942389488220215, "logits/rejected": -2.8651461601257324, "logps/chosen": -42.28224563598633, "logps/rejected": -1166.012939453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.24014775454998016, "rewards/margins": 11.513397216796875, "rewards/rejected": -11.273247718811035, "step": 12510 }, { "epoch": 0.75, "learning_rate": 9.160666586802011e-07, "logits/chosen": -2.9826252460479736, "logits/rejected": -2.8799729347229004, "logps/chosen": -35.38467788696289, "logps/rejected": -1157.2989501953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25897639989852905, "rewards/margins": 11.450986862182617, "rewards/rejected": -11.192010879516602, "step": 12520 }, { "epoch": 0.75, "learning_rate": 9.12044061426505e-07, "logits/chosen": -2.9826321601867676, "logits/rejected": -2.9118235111236572, "logps/chosen": -37.193477630615234, "logps/rejected": -1212.234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.24778708815574646, "rewards/margins": 11.98847770690918, "rewards/rejected": -11.740690231323242, "step": 12530 }, { "epoch": 0.75, "learning_rate": 9.080283441483182e-07, "logits/chosen": -2.9722399711608887, "logits/rejected": -2.8850831985473633, "logps/chosen": -41.62861251831055, "logps/rejected": -1163.427001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2446100264787674, "rewards/margins": 11.501559257507324, "rewards/rejected": -11.256950378417969, "step": 12540 }, { "epoch": 0.75, "learning_rate": 9.040195242441322e-07, "logits/chosen": -2.9646708965301514, "logits/rejected": -2.8700923919677734, "logps/chosen": -38.2463264465332, "logps/rejected": -1182.385009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24883432686328888, "rewards/margins": 11.692138671875, "rewards/rejected": -11.443305015563965, "step": 12550 }, { "epoch": 0.75, "learning_rate": 9.000176190825513e-07, "logits/chosen": -2.9940528869628906, "logits/rejected": -2.897892475128174, "logps/chosen": -44.93384552001953, "logps/rejected": -1173.1185302734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.2557370066642761, "rewards/margins": 11.61365795135498, "rewards/rejected": -11.35792064666748, "step": 12560 }, { "epoch": 0.75, "learning_rate": 8.960226460022272e-07, "logits/chosen": -2.9750049114227295, "logits/rejected": -2.8631157875061035, "logps/chosen": -39.03056335449219, "logps/rejected": -1101.9461669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24933266639709473, "rewards/margins": 10.88164234161377, "rewards/rejected": -10.63231086730957, "step": 12570 }, { "epoch": 0.75, "learning_rate": 8.920346223117721e-07, "logits/chosen": -2.9525625705718994, "logits/rejected": -2.8669161796569824, "logps/chosen": -38.90068817138672, "logps/rejected": -1127.7830810546875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.22416849434375763, "rewards/margins": 11.137886047363281, "rewards/rejected": -10.913717269897461, "step": 12580 }, { "epoch": 0.75, "learning_rate": 8.88053565289691e-07, "logits/chosen": -2.9766736030578613, "logits/rejected": -2.890133857727051, "logps/chosen": -34.43910598754883, "logps/rejected": -1117.6988525390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25915125012397766, "rewards/margins": 11.075946807861328, "rewards/rejected": -10.816795349121094, "step": 12590 }, { "epoch": 0.75, "learning_rate": 8.840794921843085e-07, "logits/chosen": -2.9437596797943115, "logits/rejected": -2.869204044342041, "logps/chosen": -30.868921279907227, "logps/rejected": -1149.90087890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.24720633029937744, "rewards/margins": 11.369736671447754, "rewards/rejected": -11.122529983520508, "step": 12600 }, { "epoch": 0.75, "learning_rate": 8.801124202136846e-07, "logits/chosen": -3.0011610984802246, "logits/rejected": -2.91206431388855, "logps/chosen": -46.17510223388672, "logps/rejected": -1194.6365966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2171311378479004, "rewards/margins": 11.791913986206055, "rewards/rejected": -11.574782371520996, "step": 12610 }, { "epoch": 0.75, "learning_rate": 8.761523665655508e-07, "logits/chosen": -2.9563121795654297, "logits/rejected": -2.904092788696289, "logps/chosen": -39.947879791259766, "logps/rejected": -1187.360595703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2310194969177246, "rewards/margins": 11.726469993591309, "rewards/rejected": -11.495450973510742, "step": 12620 }, { "epoch": 0.75, "learning_rate": 8.721993483972294e-07, "logits/chosen": -2.9383018016815186, "logits/rejected": -2.8935160636901855, "logps/chosen": -36.42755889892578, "logps/rejected": -1095.85107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2517063021659851, "rewards/margins": 10.821484565734863, "rewards/rejected": -10.569779396057129, "step": 12630 }, { "epoch": 0.75, "learning_rate": 8.682533828355616e-07, "logits/chosen": -2.989175796508789, "logits/rejected": -2.8767189979553223, "logps/chosen": -40.821983337402344, "logps/rejected": -1147.575927734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2668913006782532, "rewards/margins": 11.371709823608398, "rewards/rejected": -11.104818344116211, "step": 12640 }, { "epoch": 0.75, "learning_rate": 8.643144869768294e-07, "logits/chosen": -2.994812488555908, "logits/rejected": -2.934831142425537, "logps/chosen": -39.99990463256836, "logps/rejected": -1170.942138671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2398485690355301, "rewards/margins": 11.563240051269531, "rewards/rejected": -11.323391914367676, "step": 12650 }, { "epoch": 0.75, "learning_rate": 8.6038267788669e-07, "logits/chosen": -2.9645984172821045, "logits/rejected": -2.881371021270752, "logps/chosen": -45.75031280517578, "logps/rejected": -1154.321533203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23920436203479767, "rewards/margins": 11.395447731018066, "rewards/rejected": -11.156240463256836, "step": 12660 }, { "epoch": 0.76, "learning_rate": 8.56457972600093e-07, "logits/chosen": -2.98840594291687, "logits/rejected": -2.9233546257019043, "logps/chosen": -36.95977020263672, "logps/rejected": -1113.468017578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.22890691459178925, "rewards/margins": 10.99429702758789, "rewards/rejected": -10.765390396118164, "step": 12670 }, { "epoch": 0.76, "learning_rate": 8.525403881212083e-07, "logits/chosen": -2.9825987815856934, "logits/rejected": -2.8890469074249268, "logps/chosen": -42.40168762207031, "logps/rejected": -1143.2803955078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.22717733681201935, "rewards/margins": 11.25462818145752, "rewards/rejected": -11.027450561523438, "step": 12680 }, { "epoch": 0.76, "learning_rate": 8.486299414233598e-07, "logits/chosen": -2.982424259185791, "logits/rejected": -2.8843843936920166, "logps/chosen": -40.37944412231445, "logps/rejected": -1175.121337890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20340974628925323, "rewards/margins": 11.570235252380371, "rewards/rejected": -11.366826057434082, "step": 12690 }, { "epoch": 0.76, "learning_rate": 8.447266494489408e-07, "logits/chosen": -2.9896693229675293, "logits/rejected": -2.8988497257232666, "logps/chosen": -36.530799865722656, "logps/rejected": -1134.4080810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26738086342811584, "rewards/margins": 11.23652172088623, "rewards/rejected": -10.969141006469727, "step": 12700 }, { "epoch": 0.76, "learning_rate": 8.408305291093488e-07, "logits/chosen": -2.9627532958984375, "logits/rejected": -2.8922131061553955, "logps/chosen": -39.43797302246094, "logps/rejected": -1169.07080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23556537926197052, "rewards/margins": 11.565427780151367, "rewards/rejected": -11.329861640930176, "step": 12710 }, { "epoch": 0.76, "learning_rate": 8.369415972849087e-07, "logits/chosen": -2.9630329608917236, "logits/rejected": -2.8846445083618164, "logps/chosen": -41.80946731567383, "logps/rejected": -1204.492431640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.27570343017578125, "rewards/margins": 11.947206497192383, "rewards/rejected": -11.671504020690918, "step": 12720 }, { "epoch": 0.76, "learning_rate": 8.330598708248011e-07, "logits/chosen": -2.9917588233947754, "logits/rejected": -2.8886303901672363, "logps/chosen": -35.02491760253906, "logps/rejected": -1190.856201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2939489483833313, "rewards/margins": 11.82092571258545, "rewards/rejected": -11.5269775390625, "step": 12730 }, { "epoch": 0.76, "learning_rate": 8.291853665469887e-07, "logits/chosen": -3.0022222995758057, "logits/rejected": -2.9099414348602295, "logps/chosen": -34.14080810546875, "logps/rejected": -1149.6165771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25735577940940857, "rewards/margins": 11.375974655151367, "rewards/rejected": -11.11861801147461, "step": 12740 }, { "epoch": 0.76, "learning_rate": 8.253181012381409e-07, "logits/chosen": -2.967909097671509, "logits/rejected": -2.8771347999572754, "logps/chosen": -32.38801574707031, "logps/rejected": -1191.3057861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24263577163219452, "rewards/margins": 11.763463020324707, "rewards/rejected": -11.52082633972168, "step": 12750 }, { "epoch": 0.76, "learning_rate": 8.214580916535683e-07, "logits/chosen": -2.9858031272888184, "logits/rejected": -2.9003069400787354, "logps/chosen": -36.68400192260742, "logps/rejected": -1145.746826171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24204783141613007, "rewards/margins": 11.327195167541504, "rewards/rejected": -11.0851469039917, "step": 12760 }, { "epoch": 0.76, "learning_rate": 8.176053545171403e-07, "logits/chosen": -2.931042432785034, "logits/rejected": -2.848179578781128, "logps/chosen": -39.9108772277832, "logps/rejected": -1195.7877197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2667325437068939, "rewards/margins": 11.857017517089844, "rewards/rejected": -11.590283393859863, "step": 12770 }, { "epoch": 0.76, "learning_rate": 8.13759906521221e-07, "logits/chosen": -2.961855411529541, "logits/rejected": -2.8795785903930664, "logps/chosen": -36.678009033203125, "logps/rejected": -1099.8970947265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.26351165771484375, "rewards/margins": 10.886760711669922, "rewards/rejected": -10.623248100280762, "step": 12780 }, { "epoch": 0.76, "learning_rate": 8.099217643265928e-07, "logits/chosen": -2.9696269035339355, "logits/rejected": -2.8655881881713867, "logps/chosen": -42.87749481201172, "logps/rejected": -1191.08935546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20433101058006287, "rewards/margins": 11.738983154296875, "rewards/rejected": -11.534652709960938, "step": 12790 }, { "epoch": 0.76, "learning_rate": 8.06090944562385e-07, "logits/chosen": -2.946380615234375, "logits/rejected": -2.8625597953796387, "logps/chosen": -38.646690368652344, "logps/rejected": -1153.239013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21835453808307648, "rewards/margins": 11.380167007446289, "rewards/rejected": -11.161811828613281, "step": 12800 }, { "epoch": 0.76, "learning_rate": 8.022674638259995e-07, "logits/chosen": -2.9987502098083496, "logits/rejected": -2.8940787315368652, "logps/chosen": -41.67390823364258, "logps/rejected": -1167.9033203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25852206349372864, "rewards/margins": 11.543627738952637, "rewards/rejected": -11.285106658935547, "step": 12810 }, { "epoch": 0.76, "learning_rate": 7.984513386830453e-07, "logits/chosen": -2.9981799125671387, "logits/rejected": -2.8899760246276855, "logps/chosen": -33.407527923583984, "logps/rejected": -1212.7255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2633565068244934, "rewards/margins": 12.022603034973145, "rewards/rejected": -11.759245872497559, "step": 12820 }, { "epoch": 0.77, "learning_rate": 7.94642585667261e-07, "logits/chosen": -2.9917218685150146, "logits/rejected": -2.886504888534546, "logps/chosen": -43.47898864746094, "logps/rejected": -1167.538330078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23688074946403503, "rewards/margins": 11.523409843444824, "rewards/rejected": -11.286529541015625, "step": 12830 }, { "epoch": 0.77, "learning_rate": 7.908412212804414e-07, "logits/chosen": -2.961561918258667, "logits/rejected": -2.8773391246795654, "logps/chosen": -36.449951171875, "logps/rejected": -1136.6986083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.253568172454834, "rewards/margins": 11.24908447265625, "rewards/rejected": -10.995516777038574, "step": 12840 }, { "epoch": 0.77, "learning_rate": 7.870472619923755e-07, "logits/chosen": -2.978184461593628, "logits/rejected": -2.8944172859191895, "logps/chosen": -35.462242126464844, "logps/rejected": -1217.478271484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.269324392080307, "rewards/margins": 12.061972618103027, "rewards/rejected": -11.792647361755371, "step": 12850 }, { "epoch": 0.77, "learning_rate": 7.832607242407631e-07, "logits/chosen": -2.9574227333068848, "logits/rejected": -2.873260974884033, "logps/chosen": -42.303531646728516, "logps/rejected": -1156.354736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2686860263347626, "rewards/margins": 11.444948196411133, "rewards/rejected": -11.176262855529785, "step": 12860 }, { "epoch": 0.77, "learning_rate": 7.794816244311526e-07, "logits/chosen": -2.9495716094970703, "logits/rejected": -2.8776497840881348, "logps/chosen": -46.660858154296875, "logps/rejected": -1180.473876953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.14631585776805878, "rewards/margins": 11.579211235046387, "rewards/rejected": -11.432894706726074, "step": 12870 }, { "epoch": 0.77, "learning_rate": 7.757099789368663e-07, "logits/chosen": -2.9738612174987793, "logits/rejected": -2.9199798107147217, "logps/chosen": -39.99109649658203, "logps/rejected": -1132.2886962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2528306543827057, "rewards/margins": 11.19813346862793, "rewards/rejected": -10.945302963256836, "step": 12880 }, { "epoch": 0.77, "learning_rate": 7.7194580409893e-07, "logits/chosen": -2.93312931060791, "logits/rejected": -2.864772081375122, "logps/chosen": -41.26976776123047, "logps/rejected": -1186.22509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24673688411712646, "rewards/margins": 11.730290412902832, "rewards/rejected": -11.483553886413574, "step": 12890 }, { "epoch": 0.77, "learning_rate": 7.681891162260016e-07, "logits/chosen": -2.9674689769744873, "logits/rejected": -2.866370677947998, "logps/chosen": -50.80043029785156, "logps/rejected": -1173.061767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20062939822673798, "rewards/margins": 11.557558059692383, "rewards/rejected": -11.356928825378418, "step": 12900 }, { "epoch": 0.77, "learning_rate": 7.644399315943016e-07, "logits/chosen": -2.9826319217681885, "logits/rejected": -2.9009556770324707, "logps/chosen": -35.814727783203125, "logps/rejected": -1138.4031982421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.22952504456043243, "rewards/margins": 11.235901832580566, "rewards/rejected": -11.006376266479492, "step": 12910 }, { "epoch": 0.77, "learning_rate": 7.606982664475421e-07, "logits/chosen": -3.0007832050323486, "logits/rejected": -2.9029664993286133, "logps/chosen": -42.71171188354492, "logps/rejected": -1160.0499267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2472195327281952, "rewards/margins": 11.453767776489258, "rewards/rejected": -11.206547737121582, "step": 12920 }, { "epoch": 0.77, "learning_rate": 7.569641369968539e-07, "logits/chosen": -2.9842305183410645, "logits/rejected": -2.9025282859802246, "logps/chosen": -41.08287811279297, "logps/rejected": -1165.5123291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23395875096321106, "rewards/margins": 11.51643180847168, "rewards/rejected": -11.282472610473633, "step": 12930 }, { "epoch": 0.77, "learning_rate": 7.532375594207236e-07, "logits/chosen": -2.98160982131958, "logits/rejected": -2.8670520782470703, "logps/chosen": -42.616119384765625, "logps/rejected": -1165.1119384765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26418548822402954, "rewards/margins": 11.540108680725098, "rewards/rejected": -11.275923728942871, "step": 12940 }, { "epoch": 0.77, "learning_rate": 7.495185498649132e-07, "logits/chosen": -2.9818663597106934, "logits/rejected": -2.8885326385498047, "logps/chosen": -39.11552429199219, "logps/rejected": -1183.7349853515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23417524993419647, "rewards/margins": 11.688667297363281, "rewards/rejected": -11.454489707946777, "step": 12950 }, { "epoch": 0.77, "learning_rate": 7.45807124442399e-07, "logits/chosen": -2.974630355834961, "logits/rejected": -2.9097037315368652, "logps/chosen": -48.46276092529297, "logps/rejected": -1185.633544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18516448140144348, "rewards/margins": 11.674873352050781, "rewards/rejected": -11.489710807800293, "step": 12960 }, { "epoch": 0.77, "learning_rate": 7.421032992332967e-07, "logits/chosen": -2.9961819648742676, "logits/rejected": -2.900266170501709, "logps/chosen": -34.970909118652344, "logps/rejected": -1192.48779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26016154885292053, "rewards/margins": 11.820328712463379, "rewards/rejected": -11.560165405273438, "step": 12970 }, { "epoch": 0.77, "learning_rate": 7.384070902847943e-07, "logits/chosen": -2.9880919456481934, "logits/rejected": -2.9205920696258545, "logps/chosen": -44.4803352355957, "logps/rejected": -1191.315673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2566242814064026, "rewards/margins": 11.794254302978516, "rewards/rejected": -11.537630081176758, "step": 12980 }, { "epoch": 0.77, "learning_rate": 7.347185136110808e-07, "logits/chosen": -2.9799933433532715, "logits/rejected": -2.8882176876068115, "logps/chosen": -52.24310302734375, "logps/rejected": -1208.7960205078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.14129433035850525, "rewards/margins": 11.847726821899414, "rewards/rejected": -11.70643138885498, "step": 12990 }, { "epoch": 0.78, "learning_rate": 7.31037585193278e-07, "logits/chosen": -2.9599382877349854, "logits/rejected": -2.8870794773101807, "logps/chosen": -40.96083450317383, "logps/rejected": -1221.837158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2668150067329407, "rewards/margins": 12.122048377990723, "rewards/rejected": -11.855232238769531, "step": 13000 }, { "epoch": 0.78, "learning_rate": 7.273643209793719e-07, "logits/chosen": -2.9862351417541504, "logits/rejected": -2.8769006729125977, "logps/chosen": -39.10072708129883, "logps/rejected": -1170.840576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2883894145488739, "rewards/margins": 11.608667373657227, "rewards/rejected": -11.320279121398926, "step": 13010 }, { "epoch": 0.78, "learning_rate": 7.236987368841386e-07, "logits/chosen": -2.9622349739074707, "logits/rejected": -2.86673641204834, "logps/chosen": -30.428333282470703, "logps/rejected": -1206.8980712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2658829391002655, "rewards/margins": 11.95760726928711, "rewards/rejected": -11.69172477722168, "step": 13020 }, { "epoch": 0.78, "learning_rate": 7.200408487890859e-07, "logits/chosen": -3.006075382232666, "logits/rejected": -2.9050614833831787, "logps/chosen": -35.234127044677734, "logps/rejected": -1225.963623046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2573295533657074, "rewards/margins": 12.134730339050293, "rewards/rejected": -11.877401351928711, "step": 13030 }, { "epoch": 0.78, "learning_rate": 7.163906725423717e-07, "logits/chosen": -2.9853854179382324, "logits/rejected": -2.8901495933532715, "logps/chosen": -36.236148834228516, "logps/rejected": -1134.234130859375, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 0.2383004128932953, "rewards/margins": 11.20942211151123, "rewards/rejected": -10.971121788024902, "step": 13040 }, { "epoch": 0.78, "learning_rate": 7.127482239587449e-07, "logits/chosen": -2.977261781692505, "logits/rejected": -2.897221326828003, "logps/chosen": -34.922325134277344, "logps/rejected": -1177.4359130859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24805620312690735, "rewards/margins": 11.641170501708984, "rewards/rejected": -11.393115043640137, "step": 13050 }, { "epoch": 0.78, "learning_rate": 7.091135188194729e-07, "logits/chosen": -2.962979793548584, "logits/rejected": -2.8422176837921143, "logps/chosen": -33.50499725341797, "logps/rejected": -1171.582275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26243892312049866, "rewards/margins": 11.603673934936523, "rewards/rejected": -11.341236114501953, "step": 13060 }, { "epoch": 0.78, "learning_rate": 7.054865728722732e-07, "logits/chosen": -2.9959750175476074, "logits/rejected": -2.905747652053833, "logps/chosen": -37.749305725097656, "logps/rejected": -1235.416259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22743234038352966, "rewards/margins": 12.205141067504883, "rewards/rejected": -11.977709770202637, "step": 13070 }, { "epoch": 0.78, "learning_rate": 7.018674018312468e-07, "logits/chosen": -2.9908010959625244, "logits/rejected": -2.8921220302581787, "logps/chosen": -42.15400314331055, "logps/rejected": -1175.144287109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2667926549911499, "rewards/margins": 11.64500904083252, "rewards/rejected": -11.378215789794922, "step": 13080 }, { "epoch": 0.78, "learning_rate": 6.982560213768088e-07, "logits/chosen": -2.9786126613616943, "logits/rejected": -2.9103143215179443, "logps/chosen": -35.540931701660156, "logps/rejected": -1200.62939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.263420045375824, "rewards/margins": 11.888856887817383, "rewards/rejected": -11.625435829162598, "step": 13090 }, { "epoch": 0.78, "learning_rate": 6.946524471556212e-07, "logits/chosen": -2.9769179821014404, "logits/rejected": -2.898912191390991, "logps/chosen": -45.150596618652344, "logps/rejected": -1179.6561279296875, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": 0.18312278389930725, "rewards/margins": 11.611780166625977, "rewards/rejected": -11.428657531738281, "step": 13100 }, { "epoch": 0.78, "learning_rate": 6.91056694780522e-07, "logits/chosen": -2.9817028045654297, "logits/rejected": -2.8993420600891113, "logps/chosen": -43.18596649169922, "logps/rejected": -1130.6590576171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2501266896724701, "rewards/margins": 11.170671463012695, "rewards/rejected": -10.92054557800293, "step": 13110 }, { "epoch": 0.78, "learning_rate": 6.874687798304657e-07, "logits/chosen": -2.9815475940704346, "logits/rejected": -2.89372181892395, "logps/chosen": -46.394866943359375, "logps/rejected": -1153.734375, "loss": 0.0344, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.160084530711174, "rewards/margins": 11.332670211791992, "rewards/rejected": -11.172586441040039, "step": 13120 }, { "epoch": 0.78, "learning_rate": 6.83888717850445e-07, "logits/chosen": -3.010356903076172, "logits/rejected": -2.8883392810821533, "logps/chosen": -45.560157775878906, "logps/rejected": -1187.464599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25042614340782166, "rewards/margins": 11.764959335327148, "rewards/rejected": -11.514533996582031, "step": 13130 }, { "epoch": 0.78, "learning_rate": 6.803165243514315e-07, "logits/chosen": -2.982649087905884, "logits/rejected": -2.885910987854004, "logps/chosen": -39.95037078857422, "logps/rejected": -1204.854248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22669526934623718, "rewards/margins": 11.903605461120605, "rewards/rejected": -11.676908493041992, "step": 13140 }, { "epoch": 0.78, "learning_rate": 6.767522148103054e-07, "logits/chosen": -2.959219217300415, "logits/rejected": -2.892970323562622, "logps/chosen": -52.755149841308594, "logps/rejected": -1143.560791015625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.11989717185497284, "rewards/margins": 11.185112953186035, "rewards/rejected": -11.065216064453125, "step": 13150 }, { "epoch": 0.78, "learning_rate": 6.731958046697893e-07, "logits/chosen": -3.003920793533325, "logits/rejected": -2.933013439178467, "logps/chosen": -44.93250274658203, "logps/rejected": -1171.6116943359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26744818687438965, "rewards/margins": 11.615091323852539, "rewards/rejected": -11.34764289855957, "step": 13160 }, { "epoch": 0.79, "learning_rate": 6.696473093383798e-07, "logits/chosen": -2.981093406677246, "logits/rejected": -2.8797850608825684, "logps/chosen": -50.78957748413086, "logps/rejected": -1134.0130615234375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 0.13959218561649323, "rewards/margins": 11.107871055603027, "rewards/rejected": -10.968278884887695, "step": 13170 }, { "epoch": 0.79, "learning_rate": 6.66106744190283e-07, "logits/chosen": -2.9505467414855957, "logits/rejected": -2.841791868209839, "logps/chosen": -35.058837890625, "logps/rejected": -1135.846923828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.23811304569244385, "rewards/margins": 11.219001770019531, "rewards/rejected": -10.980887413024902, "step": 13180 }, { "epoch": 0.79, "learning_rate": 6.625741245653466e-07, "logits/chosen": -2.9578518867492676, "logits/rejected": -2.9062275886535645, "logps/chosen": -37.12611770629883, "logps/rejected": -1199.2882080078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24143870174884796, "rewards/margins": 11.844308853149414, "rewards/rejected": -11.602869033813477, "step": 13190 }, { "epoch": 0.79, "learning_rate": 6.590494657689909e-07, "logits/chosen": -2.9502573013305664, "logits/rejected": -2.885908603668213, "logps/chosen": -43.99097442626953, "logps/rejected": -1180.81884765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.16462095081806183, "rewards/margins": 11.601786613464355, "rewards/rejected": -11.437165260314941, "step": 13200 }, { "epoch": 0.79, "learning_rate": 6.5553278307215e-07, "logits/chosen": -3.021517753601074, "logits/rejected": -2.8670706748962402, "logps/chosen": -38.20450973510742, "logps/rejected": -1148.793701171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2596825063228607, "rewards/margins": 11.368005752563477, "rewards/rejected": -11.10832405090332, "step": 13210 }, { "epoch": 0.79, "learning_rate": 6.520240917111961e-07, "logits/chosen": -2.9677319526672363, "logits/rejected": -2.868558645248413, "logps/chosen": -40.249149322509766, "logps/rejected": -1184.193603515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.23464694619178772, "rewards/margins": 11.697311401367188, "rewards/rejected": -11.462663650512695, "step": 13220 }, { "epoch": 0.79, "learning_rate": 6.485234068878809e-07, "logits/chosen": -2.966905117034912, "logits/rejected": -2.858126640319824, "logps/chosen": -35.00957489013672, "logps/rejected": -1211.048095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2635464370250702, "rewards/margins": 11.994108200073242, "rewards/rejected": -11.730562210083008, "step": 13230 }, { "epoch": 0.79, "learning_rate": 6.450307437692663e-07, "logits/chosen": -2.9755916595458984, "logits/rejected": -2.9098384380340576, "logps/chosen": -35.93362808227539, "logps/rejected": -1171.087890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27591991424560547, "rewards/margins": 11.599993705749512, "rewards/rejected": -11.324073791503906, "step": 13240 }, { "epoch": 0.79, "learning_rate": 6.415461174876589e-07, "logits/chosen": -2.936924934387207, "logits/rejected": -2.8551576137542725, "logps/chosen": -39.1335563659668, "logps/rejected": -1140.230224609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.2351628541946411, "rewards/margins": 11.246999740600586, "rewards/rejected": -11.01183795928955, "step": 13250 }, { "epoch": 0.79, "learning_rate": 6.380695431405453e-07, "logits/chosen": -3.007658004760742, "logits/rejected": -2.888615608215332, "logps/chosen": -56.616424560546875, "logps/rejected": -1182.036376953125, "loss": 0.0426, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.10373535007238388, "rewards/margins": 11.54834270477295, "rewards/rejected": -11.444607734680176, "step": 13260 }, { "epoch": 0.79, "learning_rate": 6.346010357905269e-07, "logits/chosen": -2.967711925506592, "logits/rejected": -2.8649659156799316, "logps/chosen": -41.1605110168457, "logps/rejected": -1158.9202880859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27703073620796204, "rewards/margins": 11.47865104675293, "rewards/rejected": -11.201620101928711, "step": 13270 }, { "epoch": 0.79, "learning_rate": 6.311406104652534e-07, "logits/chosen": -2.9286251068115234, "logits/rejected": -2.853178024291992, "logps/chosen": -34.32659149169922, "logps/rejected": -1182.890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26697105169296265, "rewards/margins": 11.712141036987305, "rewards/rejected": -11.445170402526855, "step": 13280 }, { "epoch": 0.79, "learning_rate": 6.276882821573566e-07, "logits/chosen": -2.981884479522705, "logits/rejected": -2.9003021717071533, "logps/chosen": -44.560428619384766, "logps/rejected": -1145.7440185546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26878196001052856, "rewards/margins": 11.354134559631348, "rewards/rejected": -11.085352897644043, "step": 13290 }, { "epoch": 0.79, "learning_rate": 6.242440658243915e-07, "logits/chosen": -3.0070464611053467, "logits/rejected": -2.9118566513061523, "logps/chosen": -43.006378173828125, "logps/rejected": -1182.9075927734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24991340935230255, "rewards/margins": 11.693563461303711, "rewards/rejected": -11.443650245666504, "step": 13300 }, { "epoch": 0.79, "learning_rate": 6.208079763887626e-07, "logits/chosen": -2.9662320613861084, "logits/rejected": -2.8882341384887695, "logps/chosen": -46.00581741333008, "logps/rejected": -1162.703125, "loss": 0.0114, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.17499391734600067, "rewards/margins": 11.422395706176758, "rewards/rejected": -11.247401237487793, "step": 13310 }, { "epoch": 0.79, "learning_rate": 6.173800287376669e-07, "logits/chosen": -2.970722198486328, "logits/rejected": -2.86197829246521, "logps/chosen": -43.18091583251953, "logps/rejected": -1163.251708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23576636612415314, "rewards/margins": 11.49732780456543, "rewards/rejected": -11.26156234741211, "step": 13320 }, { "epoch": 0.79, "learning_rate": 6.139602377230247e-07, "logits/chosen": -3.000231981277466, "logits/rejected": -2.899887800216675, "logps/chosen": -34.10944366455078, "logps/rejected": -1230.47509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25906723737716675, "rewards/margins": 12.179491996765137, "rewards/rejected": -11.920424461364746, "step": 13330 }, { "epoch": 0.8, "learning_rate": 6.105486181614176e-07, "logits/chosen": -2.976515769958496, "logits/rejected": -2.8901095390319824, "logps/chosen": -39.76033020019531, "logps/rejected": -1223.3797607421875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2533120810985565, "rewards/margins": 12.119939804077148, "rewards/rejected": -11.86662769317627, "step": 13340 }, { "epoch": 0.8, "learning_rate": 6.071451848340235e-07, "logits/chosen": -3.0024428367614746, "logits/rejected": -2.9125306606292725, "logps/chosen": -40.2620849609375, "logps/rejected": -1222.3720703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2690242528915405, "rewards/margins": 12.100770950317383, "rewards/rejected": -11.831746101379395, "step": 13350 }, { "epoch": 0.8, "learning_rate": 6.037499524865523e-07, "logits/chosen": -2.977649211883545, "logits/rejected": -2.9158577919006348, "logps/chosen": -36.540618896484375, "logps/rejected": -1145.9830322265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.25233152508735657, "rewards/margins": 11.31787395477295, "rewards/rejected": -11.065542221069336, "step": 13360 }, { "epoch": 0.8, "learning_rate": 6.003629358291832e-07, "logits/chosen": -2.9660048484802246, "logits/rejected": -2.8753650188446045, "logps/chosen": -34.7838134765625, "logps/rejected": -1191.35302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2492460310459137, "rewards/margins": 11.784268379211426, "rewards/rejected": -11.535021781921387, "step": 13370 }, { "epoch": 0.8, "learning_rate": 5.969841495364978e-07, "logits/chosen": -3.0068647861480713, "logits/rejected": -2.894684076309204, "logps/chosen": -41.882354736328125, "logps/rejected": -1190.75390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.21789176762104034, "rewards/margins": 11.755881309509277, "rewards/rejected": -11.53799057006836, "step": 13380 }, { "epoch": 0.8, "learning_rate": 5.936136082474228e-07, "logits/chosen": -2.962165355682373, "logits/rejected": -2.8952765464782715, "logps/chosen": -35.17695236206055, "logps/rejected": -1119.89697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.27416354417800903, "rewards/margins": 11.085711479187012, "rewards/rejected": -10.811548233032227, "step": 13390 }, { "epoch": 0.8, "learning_rate": 5.902513265651585e-07, "logits/chosen": -2.957274913787842, "logits/rejected": -2.8875694274902344, "logps/chosen": -51.1654052734375, "logps/rejected": -1127.357666015625, "loss": 0.0601, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.11431536823511124, "rewards/margins": 11.005800247192383, "rewards/rejected": -10.891483306884766, "step": 13400 }, { "epoch": 0.8, "learning_rate": 5.868973190571214e-07, "logits/chosen": -2.9975578784942627, "logits/rejected": -2.901479959487915, "logps/chosen": -37.99674987792969, "logps/rejected": -1213.0218505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2573775053024292, "rewards/margins": 12.000085830688477, "rewards/rejected": -11.742708206176758, "step": 13410 }, { "epoch": 0.8, "learning_rate": 5.835516002548816e-07, "logits/chosen": -2.9739162921905518, "logits/rejected": -2.896472692489624, "logps/chosen": -36.52669143676758, "logps/rejected": -1174.554443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2719309628009796, "rewards/margins": 11.639739036560059, "rewards/rejected": -11.367810249328613, "step": 13420 }, { "epoch": 0.8, "learning_rate": 5.802141846540932e-07, "logits/chosen": -2.9541172981262207, "logits/rejected": -2.8446662425994873, "logps/chosen": -35.76244354248047, "logps/rejected": -1160.61865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24146828055381775, "rewards/margins": 11.458246231079102, "rewards/rejected": -11.216778755187988, "step": 13430 }, { "epoch": 0.8, "learning_rate": 5.768850867144385e-07, "logits/chosen": -2.9504048824310303, "logits/rejected": -2.8676021099090576, "logps/chosen": -44.78017044067383, "logps/rejected": -1136.851318359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23313739895820618, "rewards/margins": 11.232633590698242, "rewards/rejected": -10.999497413635254, "step": 13440 }, { "epoch": 0.8, "learning_rate": 5.735643208595623e-07, "logits/chosen": -2.9532885551452637, "logits/rejected": -2.8688886165618896, "logps/chosen": -40.60300827026367, "logps/rejected": -1152.687255859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.21493062376976013, "rewards/margins": 11.355000495910645, "rewards/rejected": -11.140069961547852, "step": 13450 }, { "epoch": 0.8, "learning_rate": 5.702519014770108e-07, "logits/chosen": -2.956547737121582, "logits/rejected": -2.883270263671875, "logps/chosen": -39.3338623046875, "logps/rejected": -1153.084228515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2551047205924988, "rewards/margins": 11.41288948059082, "rewards/rejected": -11.157785415649414, "step": 13460 }, { "epoch": 0.8, "learning_rate": 5.669478429181646e-07, "logits/chosen": -2.976693868637085, "logits/rejected": -2.9147417545318604, "logps/chosen": -40.274742126464844, "logps/rejected": -1158.727294921875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.2740509510040283, "rewards/margins": 11.483926773071289, "rewards/rejected": -11.20987606048584, "step": 13470 }, { "epoch": 0.8, "learning_rate": 5.636521594981851e-07, "logits/chosen": -3.001392364501953, "logits/rejected": -2.9010868072509766, "logps/chosen": -33.172786712646484, "logps/rejected": -1104.7462158203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24292877316474915, "rewards/margins": 10.913264274597168, "rewards/rejected": -10.67033576965332, "step": 13480 }, { "epoch": 0.8, "learning_rate": 5.603648654959454e-07, "logits/chosen": -2.966050148010254, "logits/rejected": -2.8624427318573, "logps/chosen": -44.26316452026367, "logps/rejected": -1164.3056640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2611693739891052, "rewards/margins": 11.533575057983398, "rewards/rejected": -11.272404670715332, "step": 13490 }, { "epoch": 0.81, "learning_rate": 5.570859751539687e-07, "logits/chosen": -2.998640298843384, "logits/rejected": -2.8845062255859375, "logps/chosen": -43.430152893066406, "logps/rejected": -1117.416748046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25751370191574097, "rewards/margins": 11.059386253356934, "rewards/rejected": -10.801873207092285, "step": 13500 }, { "epoch": 0.81, "learning_rate": 5.538155026783726e-07, "logits/chosen": -2.9563815593719482, "logits/rejected": -2.899205207824707, "logps/chosen": -42.38550567626953, "logps/rejected": -1106.20361328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.21064870059490204, "rewards/margins": 10.882638931274414, "rewards/rejected": -10.671991348266602, "step": 13510 }, { "epoch": 0.81, "learning_rate": 5.505534622387998e-07, "logits/chosen": -2.9694533348083496, "logits/rejected": -2.888462781906128, "logps/chosen": -41.302391052246094, "logps/rejected": -1144.881591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.29146814346313477, "rewards/margins": 11.350311279296875, "rewards/rejected": -11.058843612670898, "step": 13520 }, { "epoch": 0.81, "learning_rate": 5.472998679683619e-07, "logits/chosen": -2.989863395690918, "logits/rejected": -2.8850631713867188, "logps/chosen": -46.51256561279297, "logps/rejected": -1135.556884765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2202836275100708, "rewards/margins": 11.197112083435059, "rewards/rejected": -10.976829528808594, "step": 13530 }, { "epoch": 0.81, "learning_rate": 5.440547339635769e-07, "logits/chosen": -2.988269805908203, "logits/rejected": -2.877368927001953, "logps/chosen": -47.24034881591797, "logps/rejected": -1196.1737060546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20407715439796448, "rewards/margins": 11.789624214172363, "rewards/rejected": -11.58554744720459, "step": 13540 }, { "epoch": 0.81, "learning_rate": 5.408180742843069e-07, "logits/chosen": -2.992255449295044, "logits/rejected": -2.8963570594787598, "logps/chosen": -35.81533432006836, "logps/rejected": -1181.76513671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28264564275741577, "rewards/margins": 11.71831226348877, "rewards/rejected": -11.435664176940918, "step": 13550 }, { "epoch": 0.81, "learning_rate": 5.375899029536996e-07, "logits/chosen": -2.930659055709839, "logits/rejected": -2.8764328956604004, "logps/chosen": -37.68061447143555, "logps/rejected": -1136.305419921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2518436312675476, "rewards/margins": 11.235445022583008, "rewards/rejected": -10.983601570129395, "step": 13560 }, { "epoch": 0.81, "learning_rate": 5.34370233958125e-07, "logits/chosen": -2.9996650218963623, "logits/rejected": -2.900214433670044, "logps/chosen": -38.755828857421875, "logps/rejected": -1173.5491943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22721488773822784, "rewards/margins": 11.58405876159668, "rewards/rejected": -11.356842041015625, "step": 13570 }, { "epoch": 0.81, "learning_rate": 5.311590812471165e-07, "logits/chosen": -2.983910083770752, "logits/rejected": -2.866450548171997, "logps/chosen": -36.68812942504883, "logps/rejected": -1167.48486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24982011318206787, "rewards/margins": 11.546558380126953, "rewards/rejected": -11.296738624572754, "step": 13580 }, { "epoch": 0.81, "learning_rate": 5.279564587333077e-07, "logits/chosen": -2.973959445953369, "logits/rejected": -2.8823814392089844, "logps/chosen": -40.59172439575195, "logps/rejected": -1183.706787109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.16834965348243713, "rewards/margins": 11.632944107055664, "rewards/rejected": -11.464593887329102, "step": 13590 }, { "epoch": 0.81, "learning_rate": 5.247623802923788e-07, "logits/chosen": -2.99957275390625, "logits/rejected": -2.927642583847046, "logps/chosen": -37.5772705078125, "logps/rejected": -1169.551513671875, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.25102782249450684, "rewards/margins": 11.556093215942383, "rewards/rejected": -11.305065155029297, "step": 13600 }, { "epoch": 0.81, "learning_rate": 5.215768597629872e-07, "logits/chosen": -3.0213897228240967, "logits/rejected": -2.948288679122925, "logps/chosen": -38.369407653808594, "logps/rejected": -1166.837158203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.24890944361686707, "rewards/margins": 11.543437957763672, "rewards/rejected": -11.294528007507324, "step": 13610 }, { "epoch": 0.81, "learning_rate": 5.18399910946715e-07, "logits/chosen": -2.9764206409454346, "logits/rejected": -2.8837597370147705, "logps/chosen": -39.47169876098633, "logps/rejected": -1128.37939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27628183364868164, "rewards/margins": 11.182417869567871, "rewards/rejected": -10.906135559082031, "step": 13620 }, { "epoch": 0.81, "learning_rate": 5.152315476080058e-07, "logits/chosen": -2.99161958694458, "logits/rejected": -2.888735055923462, "logps/chosen": -36.64089584350586, "logps/rejected": -1184.9560546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26178884506225586, "rewards/margins": 11.722304344177246, "rewards/rejected": -11.460515975952148, "step": 13630 }, { "epoch": 0.81, "learning_rate": 5.12071783474106e-07, "logits/chosen": -2.9445621967315674, "logits/rejected": -2.8776438236236572, "logps/chosen": -32.51543426513672, "logps/rejected": -1178.79443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2428121119737625, "rewards/margins": 11.661029815673828, "rewards/rejected": -11.418217658996582, "step": 13640 }, { "epoch": 0.81, "learning_rate": 5.089206322350046e-07, "logits/chosen": -2.938230037689209, "logits/rejected": -2.8862781524658203, "logps/chosen": -36.7374267578125, "logps/rejected": -1174.1536865234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2473042905330658, "rewards/margins": 11.60869312286377, "rewards/rejected": -11.361387252807617, "step": 13650 }, { "epoch": 0.81, "learning_rate": 5.057781075433751e-07, "logits/chosen": -2.9784371852874756, "logits/rejected": -2.879121780395508, "logps/chosen": -34.29950714111328, "logps/rejected": -1198.2703857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2746104896068573, "rewards/margins": 11.869779586791992, "rewards/rejected": -11.595169067382812, "step": 13660 }, { "epoch": 0.82, "learning_rate": 5.026442230145157e-07, "logits/chosen": -3.013833522796631, "logits/rejected": -2.9210457801818848, "logps/chosen": -38.2286376953125, "logps/rejected": -1209.5413818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25799059867858887, "rewards/margins": 11.972813606262207, "rewards/rejected": -11.714822769165039, "step": 13670 }, { "epoch": 0.82, "learning_rate": 4.995189922262877e-07, "logits/chosen": -2.969776153564453, "logits/rejected": -2.8898282051086426, "logps/chosen": -37.518836975097656, "logps/rejected": -1160.535888671875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.28336313366889954, "rewards/margins": 11.51307487487793, "rewards/rejected": -11.229713439941406, "step": 13680 }, { "epoch": 0.82, "learning_rate": 4.964024287190644e-07, "logits/chosen": -3.0035648345947266, "logits/rejected": -2.877570867538452, "logps/chosen": -35.95438003540039, "logps/rejected": -1169.818115234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2181353121995926, "rewards/margins": 11.51692008972168, "rewards/rejected": -11.298784255981445, "step": 13690 }, { "epoch": 0.82, "learning_rate": 4.932945459956617e-07, "logits/chosen": -2.9910061359405518, "logits/rejected": -2.8866748809814453, "logps/chosen": -39.215965270996094, "logps/rejected": -1211.7373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23650667071342468, "rewards/margins": 11.977807998657227, "rewards/rejected": -11.741301536560059, "step": 13700 }, { "epoch": 0.82, "learning_rate": 4.901953575212884e-07, "logits/chosen": -2.955109119415283, "logits/rejected": -2.8593993186950684, "logps/chosen": -33.12800216674805, "logps/rejected": -1187.9454345703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2598869204521179, "rewards/margins": 11.754145622253418, "rewards/rejected": -11.49425983428955, "step": 13710 }, { "epoch": 0.82, "learning_rate": 4.87104876723484e-07, "logits/chosen": -2.97871470451355, "logits/rejected": -2.9106736183166504, "logps/chosen": -45.50674819946289, "logps/rejected": -1095.191162109375, "loss": 0.0224, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.19295263290405273, "rewards/margins": 10.765787124633789, "rewards/rejected": -10.572832107543945, "step": 13720 }, { "epoch": 0.82, "learning_rate": 4.840231169920609e-07, "logits/chosen": -2.9881467819213867, "logits/rejected": -2.8904480934143066, "logps/chosen": -42.759056091308594, "logps/rejected": -1159.2969970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26836416125297546, "rewards/margins": 11.489990234375, "rewards/rejected": -11.221624374389648, "step": 13730 }, { "epoch": 0.82, "learning_rate": 4.809500916790466e-07, "logits/chosen": -2.972191572189331, "logits/rejected": -2.8843045234680176, "logps/chosen": -41.01798629760742, "logps/rejected": -1183.128662109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.27396029233932495, "rewards/margins": 11.734207153320312, "rewards/rejected": -11.460246086120605, "step": 13740 }, { "epoch": 0.82, "learning_rate": 4.778858140986259e-07, "logits/chosen": -2.9863314628601074, "logits/rejected": -2.8930420875549316, "logps/chosen": -40.379005432128906, "logps/rejected": -1176.2315673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.244021937251091, "rewards/margins": 11.61732292175293, "rewards/rejected": -11.373300552368164, "step": 13750 }, { "epoch": 0.82, "learning_rate": 4.748302975270838e-07, "logits/chosen": -2.966677188873291, "logits/rejected": -2.8717360496520996, "logps/chosen": -38.62614822387695, "logps/rejected": -1169.1295166015625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 0.23107238113880157, "rewards/margins": 11.557990074157715, "rewards/rejected": -11.32691764831543, "step": 13760 }, { "epoch": 0.82, "learning_rate": 4.71783555202745e-07, "logits/chosen": -2.9675755500793457, "logits/rejected": -2.8681981563568115, "logps/chosen": -33.303367614746094, "logps/rejected": -1172.384521484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23977091908454895, "rewards/margins": 11.579814910888672, "rewards/rejected": -11.340044021606445, "step": 13770 }, { "epoch": 0.82, "learning_rate": 4.6874560032592333e-07, "logits/chosen": -2.9650886058807373, "logits/rejected": -2.901951313018799, "logps/chosen": -36.558921813964844, "logps/rejected": -1168.328369140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.23895962536334991, "rewards/margins": 11.552337646484375, "rewards/rejected": -11.31337833404541, "step": 13780 }, { "epoch": 0.82, "learning_rate": 4.6571644605885565e-07, "logits/chosen": -2.9955286979675293, "logits/rejected": -2.8998477458953857, "logps/chosen": -34.65021514892578, "logps/rejected": -1181.9649658203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26836657524108887, "rewards/margins": 11.719107627868652, "rewards/rejected": -11.450740814208984, "step": 13790 }, { "epoch": 0.82, "learning_rate": 4.6269610552565153e-07, "logits/chosen": -2.9508631229400635, "logits/rejected": -2.8615589141845703, "logps/chosen": -40.997074127197266, "logps/rejected": -1157.5054931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26562055945396423, "rewards/margins": 11.463315963745117, "rewards/rejected": -11.197696685791016, "step": 13800 }, { "epoch": 0.82, "learning_rate": 4.5968459181223416e-07, "logits/chosen": -2.998640537261963, "logits/rejected": -2.8936586380004883, "logps/chosen": -38.365074157714844, "logps/rejected": -1189.4290771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24750745296478271, "rewards/margins": 11.761906623840332, "rewards/rejected": -11.514399528503418, "step": 13810 }, { "epoch": 0.82, "learning_rate": 4.566819179662829e-07, "logits/chosen": -2.9771323204040527, "logits/rejected": -2.9159557819366455, "logps/chosen": -37.31814956665039, "logps/rejected": -1158.6568603515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2524333596229553, "rewards/margins": 11.465685844421387, "rewards/rejected": -11.213251113891602, "step": 13820 }, { "epoch": 0.82, "learning_rate": 4.5368809699717855e-07, "logits/chosen": -2.9688961505889893, "logits/rejected": -2.922121524810791, "logps/chosen": -43.86768341064453, "logps/rejected": -1082.0341796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.26399391889572144, "rewards/margins": 10.710981369018555, "rewards/rejected": -10.446988105773926, "step": 13830 }, { "epoch": 0.83, "learning_rate": 4.507031418759447e-07, "logits/chosen": -2.9917361736297607, "logits/rejected": -2.9121644496917725, "logps/chosen": -39.75749969482422, "logps/rejected": -1233.6302490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2655358612537384, "rewards/margins": 12.222577095031738, "rewards/rejected": -11.957040786743164, "step": 13840 }, { "epoch": 0.83, "learning_rate": 4.477270655351942e-07, "logits/chosen": -2.9869115352630615, "logits/rejected": -2.8576526641845703, "logps/chosen": -35.602413177490234, "logps/rejected": -1161.718994140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.246074840426445, "rewards/margins": 11.48668384552002, "rewards/rejected": -11.240609169006348, "step": 13850 }, { "epoch": 0.83, "learning_rate": 4.447598808690695e-07, "logits/chosen": -2.9945571422576904, "logits/rejected": -2.8879222869873047, "logps/chosen": -38.63846206665039, "logps/rejected": -1166.460205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23242774605751038, "rewards/margins": 11.529361724853516, "rewards/rejected": -11.296934127807617, "step": 13860 }, { "epoch": 0.83, "learning_rate": 4.418016007331924e-07, "logits/chosen": -2.989121913909912, "logits/rejected": -2.9054603576660156, "logps/chosen": -35.35349655151367, "logps/rejected": -1215.9620361328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2580859065055847, "rewards/margins": 12.039027214050293, "rewards/rejected": -11.7809419631958, "step": 13870 }, { "epoch": 0.83, "learning_rate": 4.3885223794460114e-07, "logits/chosen": -3.0032284259796143, "logits/rejected": -2.9015555381774902, "logps/chosen": -42.59313201904297, "logps/rejected": -1137.925048828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27779680490493774, "rewards/margins": 11.290609359741211, "rewards/rejected": -11.012812614440918, "step": 13880 }, { "epoch": 0.83, "learning_rate": 4.359118052817013e-07, "logits/chosen": -2.9686026573181152, "logits/rejected": -2.8423471450805664, "logps/chosen": -52.9781379699707, "logps/rejected": -1153.206298828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.1409260332584381, "rewards/margins": 11.295442581176758, "rewards/rejected": -11.154516220092773, "step": 13890 }, { "epoch": 0.83, "learning_rate": 4.3298031548420716e-07, "logits/chosen": -2.98628568649292, "logits/rejected": -2.8753533363342285, "logps/chosen": -31.660253524780273, "logps/rejected": -1160.2275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25196897983551025, "rewards/margins": 11.479164123535156, "rewards/rejected": -11.227193832397461, "step": 13900 }, { "epoch": 0.83, "learning_rate": 4.300577812530868e-07, "logits/chosen": -2.965773582458496, "logits/rejected": -2.8610873222351074, "logps/chosen": -35.574424743652344, "logps/rejected": -1130.3165283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.27412158250808716, "rewards/margins": 11.21101188659668, "rewards/rejected": -10.936891555786133, "step": 13910 }, { "epoch": 0.83, "learning_rate": 4.2714421525050734e-07, "logits/chosen": -2.9899356365203857, "logits/rejected": -2.9014055728912354, "logps/chosen": -51.366607666015625, "logps/rejected": -1134.1016845703125, "loss": 0.0098, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.19141089916229248, "rewards/margins": 11.15606689453125, "rewards/rejected": -10.964655876159668, "step": 13920 }, { "epoch": 0.83, "learning_rate": 4.242396300997809e-07, "logits/chosen": -2.9890694618225098, "logits/rejected": -2.8962855339050293, "logps/chosen": -38.18283462524414, "logps/rejected": -1163.6573486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27921703457832336, "rewards/margins": 11.5448579788208, "rewards/rejected": -11.265640258789062, "step": 13930 }, { "epoch": 0.83, "learning_rate": 4.213440383853093e-07, "logits/chosen": -2.9614646434783936, "logits/rejected": -2.8839545249938965, "logps/chosen": -39.361228942871094, "logps/rejected": -1175.473876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24087107181549072, "rewards/margins": 11.62691879272461, "rewards/rejected": -11.386045455932617, "step": 13940 }, { "epoch": 0.83, "learning_rate": 4.1845745265252673e-07, "logits/chosen": -2.985910415649414, "logits/rejected": -2.8729171752929688, "logps/chosen": -32.21543502807617, "logps/rejected": -1187.541748046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2646552622318268, "rewards/margins": 11.759527206420898, "rewards/rejected": -11.494873046875, "step": 13950 }, { "epoch": 0.83, "learning_rate": 4.15579885407853e-07, "logits/chosen": -2.9922990798950195, "logits/rejected": -2.9121081829071045, "logps/chosen": -34.394615173339844, "logps/rejected": -1174.5438232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2593531012535095, "rewards/margins": 11.627801895141602, "rewards/rejected": -11.368449211120605, "step": 13960 }, { "epoch": 0.83, "learning_rate": 4.1271134911862936e-07, "logits/chosen": -2.979200839996338, "logits/rejected": -2.9005274772644043, "logps/chosen": -41.68241500854492, "logps/rejected": -1149.0047607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2437378168106079, "rewards/margins": 11.348532676696777, "rewards/rejected": -11.104795455932617, "step": 13970 }, { "epoch": 0.83, "learning_rate": 4.0985185621307293e-07, "logits/chosen": -3.0019867420196533, "logits/rejected": -2.898019552230835, "logps/chosen": -38.264286041259766, "logps/rejected": -1233.1590576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2543830871582031, "rewards/margins": 12.202974319458008, "rewards/rejected": -11.948590278625488, "step": 13980 }, { "epoch": 0.83, "learning_rate": 4.0700141908021793e-07, "logits/chosen": -2.985034465789795, "logits/rejected": -2.8880136013031006, "logps/chosen": -46.12855911254883, "logps/rejected": -1136.7421875, "loss": 0.0708, "rewards/accuracies": 1.0, "rewards/chosen": 0.22140094637870789, "rewards/margins": 11.21202564239502, "rewards/rejected": -10.99062442779541, "step": 13990 }, { "epoch": 0.83, "learning_rate": 4.041600500698642e-07, "logits/chosen": -2.990851879119873, "logits/rejected": -2.8786635398864746, "logps/chosen": -38.150657653808594, "logps/rejected": -1181.6973876953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.22031119465827942, "rewards/margins": 11.647249221801758, "rewards/rejected": -11.426937103271484, "step": 14000 }, { "epoch": 0.83, "eval_logits/chosen": -2.9443416595458984, "eval_logits/rejected": -2.9319815635681152, "eval_logps/chosen": -39.6554069519043, "eval_logps/rejected": -1151.730224609375, "eval_loss": 2.481411866028793e-05, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.19713005423545837, "eval_rewards/margins": 11.332048416137695, "eval_rewards/rejected": -11.134918212890625, "eval_runtime": 3.9064, "eval_samples_per_second": 1.28, "eval_steps_per_second": 0.256, "step": 14000 }, { "epoch": 0.84, "learning_rate": 4.013277614925229e-07, "logits/chosen": -2.9556424617767334, "logits/rejected": -2.839322328567505, "logps/chosen": -40.40355682373047, "logps/rejected": -1174.1368408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24868285655975342, "rewards/margins": 11.604778289794922, "rewards/rejected": -11.356094360351562, "step": 14010 }, { "epoch": 0.84, "learning_rate": 3.985045656193631e-07, "logits/chosen": -2.975538492202759, "logits/rejected": -2.886845827102661, "logps/chosen": -43.87914276123047, "logps/rejected": -1170.378173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2150866687297821, "rewards/margins": 11.536439895629883, "rewards/rejected": -11.3213529586792, "step": 14020 }, { "epoch": 0.84, "learning_rate": 3.9569047468215967e-07, "logits/chosen": -2.9608826637268066, "logits/rejected": -2.841970443725586, "logps/chosen": -41.944313049316406, "logps/rejected": -1140.113037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24829331040382385, "rewards/margins": 11.283625602722168, "rewards/rejected": -11.035331726074219, "step": 14030 }, { "epoch": 0.84, "learning_rate": 3.9288550087323687e-07, "logits/chosen": -2.9494919776916504, "logits/rejected": -2.8754091262817383, "logps/chosen": -40.39307403564453, "logps/rejected": -1186.650634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22584083676338196, "rewards/margins": 11.709796905517578, "rewards/rejected": -11.483957290649414, "step": 14040 }, { "epoch": 0.84, "learning_rate": 3.900896563454226e-07, "logits/chosen": -2.9712491035461426, "logits/rejected": -2.8670074939727783, "logps/chosen": -33.02203369140625, "logps/rejected": -1185.712158203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26823145151138306, "rewards/margins": 11.751668930053711, "rewards/rejected": -11.483437538146973, "step": 14050 }, { "epoch": 0.84, "learning_rate": 3.873029532119868e-07, "logits/chosen": -2.9596848487854004, "logits/rejected": -2.8863582611083984, "logps/chosen": -33.85038375854492, "logps/rejected": -1170.5400390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27817752957344055, "rewards/margins": 11.604523658752441, "rewards/rejected": -11.326345443725586, "step": 14060 }, { "epoch": 0.84, "learning_rate": 3.845254035465951e-07, "logits/chosen": -2.989048719406128, "logits/rejected": -2.902933359146118, "logps/chosen": -41.78233337402344, "logps/rejected": -1186.1873779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25399017333984375, "rewards/margins": 11.74703598022461, "rewards/rejected": -11.49304485321045, "step": 14070 }, { "epoch": 0.84, "learning_rate": 3.8175701938325677e-07, "logits/chosen": -2.9727742671966553, "logits/rejected": -2.857083320617676, "logps/chosen": -37.89759063720703, "logps/rejected": -1154.80126953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2638583779335022, "rewards/margins": 11.439736366271973, "rewards/rejected": -11.175877571105957, "step": 14080 }, { "epoch": 0.84, "learning_rate": 3.7899781271626747e-07, "logits/chosen": -2.9848992824554443, "logits/rejected": -2.9070353507995605, "logps/chosen": -39.68728256225586, "logps/rejected": -1152.2689208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2539916932582855, "rewards/margins": 11.402501106262207, "rewards/rejected": -11.14850902557373, "step": 14090 }, { "epoch": 0.84, "learning_rate": 3.76247795500162e-07, "logits/chosen": -2.9581360816955566, "logits/rejected": -2.9032576084136963, "logps/chosen": -36.24940872192383, "logps/rejected": -1184.3323974609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2657966613769531, "rewards/margins": 11.734807968139648, "rewards/rejected": -11.469011306762695, "step": 14100 }, { "epoch": 0.84, "learning_rate": 3.73506979649661e-07, "logits/chosen": -2.969757556915283, "logits/rejected": -2.8629496097564697, "logps/chosen": -35.07985305786133, "logps/rejected": -1141.770751953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2474711835384369, "rewards/margins": 11.28460693359375, "rewards/rejected": -11.037135124206543, "step": 14110 }, { "epoch": 0.84, "learning_rate": 3.707753770396197e-07, "logits/chosen": -2.974464178085327, "logits/rejected": -2.894047737121582, "logps/chosen": -40.00037384033203, "logps/rejected": -1167.463623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.28203585743904114, "rewards/margins": 11.58220100402832, "rewards/rejected": -11.300166130065918, "step": 14120 }, { "epoch": 0.84, "learning_rate": 3.6805299950497366e-07, "logits/chosen": -2.9641175270080566, "logits/rejected": -2.8882346153259277, "logps/chosen": -41.379150390625, "logps/rejected": -1150.04150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2742035984992981, "rewards/margins": 11.38703727722168, "rewards/rejected": -11.112833023071289, "step": 14130 }, { "epoch": 0.84, "learning_rate": 3.653398588406937e-07, "logits/chosen": -2.976961374282837, "logits/rejected": -2.8812904357910156, "logps/chosen": -37.95551300048828, "logps/rejected": -1140.9989013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.27787384390830994, "rewards/margins": 11.29975414276123, "rewards/rejected": -11.021880149841309, "step": 14140 }, { "epoch": 0.84, "learning_rate": 3.626359668017285e-07, "logits/chosen": -2.9519410133361816, "logits/rejected": -2.8458609580993652, "logps/chosen": -40.797019958496094, "logps/rejected": -1108.572021484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2443278729915619, "rewards/margins": 10.946907043457031, "rewards/rejected": -10.702577590942383, "step": 14150 }, { "epoch": 0.84, "learning_rate": 3.5994133510295517e-07, "logits/chosen": -2.984022617340088, "logits/rejected": -2.9151902198791504, "logps/chosen": -38.43916320800781, "logps/rejected": -1119.72705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2706991136074066, "rewards/margins": 11.086308479309082, "rewards/rejected": -10.815608978271484, "step": 14160 }, { "epoch": 0.84, "learning_rate": 3.572559754191332e-07, "logits/chosen": -2.962430953979492, "logits/rejected": -2.872950315475464, "logps/chosen": -43.51716613769531, "logps/rejected": -1168.561767578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.27181094884872437, "rewards/margins": 11.587163925170898, "rewards/rejected": -11.315354347229004, "step": 14170 }, { "epoch": 0.85, "learning_rate": 3.545798993848465e-07, "logits/chosen": -2.9340779781341553, "logits/rejected": -2.8606605529785156, "logps/chosen": -35.56151580810547, "logps/rejected": -1132.875732421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2693587839603424, "rewards/margins": 11.216402053833008, "rewards/rejected": -10.947042465209961, "step": 14180 }, { "epoch": 0.85, "learning_rate": 3.51913118594458e-07, "logits/chosen": -2.942863941192627, "logits/rejected": -2.9103007316589355, "logps/chosen": -35.48967742919922, "logps/rejected": -1202.595458984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.24892444908618927, "rewards/margins": 11.903104782104492, "rewards/rejected": -11.654180526733398, "step": 14190 }, { "epoch": 0.85, "learning_rate": 3.492556446020587e-07, "logits/chosen": -2.9518089294433594, "logits/rejected": -2.8716187477111816, "logps/chosen": -39.486175537109375, "logps/rejected": -1154.579345703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25107064843177795, "rewards/margins": 11.425931930541992, "rewards/rejected": -11.174860000610352, "step": 14200 }, { "epoch": 0.85, "learning_rate": 3.466074889214169e-07, "logits/chosen": -2.9532928466796875, "logits/rejected": -2.881896495819092, "logps/chosen": -37.14575958251953, "logps/rejected": -1161.622802734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2576335370540619, "rewards/margins": 11.50275993347168, "rewards/rejected": -11.245124816894531, "step": 14210 }, { "epoch": 0.85, "learning_rate": 3.4396866302592593e-07, "logits/chosen": -2.992985486984253, "logits/rejected": -2.933152437210083, "logps/chosen": -33.65398025512695, "logps/rejected": -1186.126708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2715386748313904, "rewards/margins": 11.755863189697266, "rewards/rejected": -11.48432445526123, "step": 14220 }, { "epoch": 0.85, "learning_rate": 3.413391783485606e-07, "logits/chosen": -2.9627413749694824, "logits/rejected": -2.894888401031494, "logps/chosen": -37.91114044189453, "logps/rejected": -1178.111083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25796735286712646, "rewards/margins": 11.670048713684082, "rewards/rejected": -11.412080764770508, "step": 14230 }, { "epoch": 0.85, "learning_rate": 3.3871904628182267e-07, "logits/chosen": -2.9556427001953125, "logits/rejected": -2.874993324279785, "logps/chosen": -41.99294662475586, "logps/rejected": -1174.887939453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.23899154365062714, "rewards/margins": 11.62536334991455, "rewards/rejected": -11.386372566223145, "step": 14240 }, { "epoch": 0.85, "learning_rate": 3.361082781776906e-07, "logits/chosen": -2.9554123878479004, "logits/rejected": -2.8812153339385986, "logps/chosen": -32.15985870361328, "logps/rejected": -1119.5086669921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2603474259376526, "rewards/margins": 11.077900886535645, "rewards/rejected": -10.817553520202637, "step": 14250 }, { "epoch": 0.85, "learning_rate": 3.335068853475762e-07, "logits/chosen": -2.983323812484741, "logits/rejected": -2.8594462871551514, "logps/chosen": -40.57501983642578, "logps/rejected": -1196.447998046875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.24829277396202087, "rewards/margins": 11.836374282836914, "rewards/rejected": -11.588081359863281, "step": 14260 }, { "epoch": 0.85, "learning_rate": 3.309148790622688e-07, "logits/chosen": -2.9842529296875, "logits/rejected": -2.822277784347534, "logps/chosen": -36.70087814331055, "logps/rejected": -1176.931396484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.27445799112319946, "rewards/margins": 11.678621292114258, "rewards/rejected": -11.404162406921387, "step": 14270 }, { "epoch": 0.85, "learning_rate": 3.2833227055189126e-07, "logits/chosen": -2.9882633686065674, "logits/rejected": -2.8937246799468994, "logps/chosen": -37.624961853027344, "logps/rejected": -1211.5439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2561279833316803, "rewards/margins": 11.979304313659668, "rewards/rejected": -11.723176002502441, "step": 14280 }, { "epoch": 0.85, "learning_rate": 3.2575907100584976e-07, "logits/chosen": -2.9473347663879395, "logits/rejected": -2.833286762237549, "logps/chosen": -39.17856979370117, "logps/rejected": -1152.345947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24069365859031677, "rewards/margins": 11.378581047058105, "rewards/rejected": -11.137887954711914, "step": 14290 }, { "epoch": 0.85, "learning_rate": 3.2319529157278427e-07, "logits/chosen": -2.9895594120025635, "logits/rejected": -2.84694242477417, "logps/chosen": -35.13867950439453, "logps/rejected": -1149.5904541015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.26353392004966736, "rewards/margins": 11.373065948486328, "rewards/rejected": -11.109532356262207, "step": 14300 }, { "epoch": 0.85, "learning_rate": 3.2064094336052176e-07, "logits/chosen": -2.9752068519592285, "logits/rejected": -2.911365270614624, "logps/chosen": -40.975563049316406, "logps/rejected": -1111.7720947265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.22282831370830536, "rewards/margins": 10.963251113891602, "rewards/rejected": -10.740423202514648, "step": 14310 }, { "epoch": 0.85, "learning_rate": 3.1809603743602783e-07, "logits/chosen": -2.9474854469299316, "logits/rejected": -2.8877720832824707, "logps/chosen": -36.8981819152832, "logps/rejected": -1163.843017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2550462484359741, "rewards/margins": 11.521089553833008, "rewards/rejected": -11.266043663024902, "step": 14320 }, { "epoch": 0.85, "learning_rate": 3.1556058482535817e-07, "logits/chosen": -3.004861831665039, "logits/rejected": -2.8939802646636963, "logps/chosen": -41.1436653137207, "logps/rejected": -1109.649658203125, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": 0.27094608545303345, "rewards/margins": 10.997098922729492, "rewards/rejected": -10.726153373718262, "step": 14330 }, { "epoch": 0.86, "learning_rate": 3.1303459651361027e-07, "logits/chosen": -2.978245258331299, "logits/rejected": -2.8902478218078613, "logps/chosen": -41.618377685546875, "logps/rejected": -1166.655029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24871960282325745, "rewards/margins": 11.52495288848877, "rewards/rejected": -11.276232719421387, "step": 14340 }, { "epoch": 0.86, "learning_rate": 3.105180834448776e-07, "logits/chosen": -2.9869320392608643, "logits/rejected": -2.8750998973846436, "logps/chosen": -38.59731674194336, "logps/rejected": -1196.451904296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.253936767578125, "rewards/margins": 11.83669662475586, "rewards/rejected": -11.58276081085205, "step": 14350 }, { "epoch": 0.86, "learning_rate": 3.080110565222008e-07, "logits/chosen": -2.9636287689208984, "logits/rejected": -2.8717257976531982, "logps/chosen": -38.56322479248047, "logps/rejected": -1183.343017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25960105657577515, "rewards/margins": 11.711102485656738, "rewards/rejected": -11.45150089263916, "step": 14360 }, { "epoch": 0.86, "learning_rate": 3.05513526607521e-07, "logits/chosen": -2.9569036960601807, "logits/rejected": -2.85575532913208, "logps/chosen": -35.050025939941406, "logps/rejected": -1222.4166259765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28773659467697144, "rewards/margins": 12.14677619934082, "rewards/rejected": -11.859039306640625, "step": 14370 }, { "epoch": 0.86, "learning_rate": 3.0302550452163294e-07, "logits/chosen": -2.972907304763794, "logits/rejected": -2.8789494037628174, "logps/chosen": -39.202415466308594, "logps/rejected": -1130.459228515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2843253016471863, "rewards/margins": 11.209595680236816, "rewards/rejected": -10.925270080566406, "step": 14380 }, { "epoch": 0.86, "learning_rate": 3.0054700104413666e-07, "logits/chosen": -2.972195863723755, "logits/rejected": -2.9086170196533203, "logps/chosen": -50.360572814941406, "logps/rejected": -1108.968505859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.1804075539112091, "rewards/margins": 10.875568389892578, "rewards/rejected": -10.695159912109375, "step": 14390 }, { "epoch": 0.86, "learning_rate": 2.980780269133937e-07, "logits/chosen": -2.9625353813171387, "logits/rejected": -2.8621582984924316, "logps/chosen": -35.31135177612305, "logps/rejected": -1121.241943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2787972092628479, "rewards/margins": 11.112247467041016, "rewards/rejected": -10.833450317382812, "step": 14400 }, { "epoch": 0.86, "learning_rate": 2.956185928264757e-07, "logits/chosen": -2.9637696743011475, "logits/rejected": -2.887159585952759, "logps/chosen": -33.07411575317383, "logps/rejected": -1171.3399658203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.2798188328742981, "rewards/margins": 11.609004020690918, "rewards/rejected": -11.329184532165527, "step": 14410 }, { "epoch": 0.86, "learning_rate": 2.9316870943912554e-07, "logits/chosen": -2.9970641136169434, "logits/rejected": -2.918379306793213, "logps/chosen": -44.4133415222168, "logps/rejected": -1214.63134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23853567242622375, "rewards/margins": 11.98333740234375, "rewards/rejected": -11.74480152130127, "step": 14420 }, { "epoch": 0.86, "learning_rate": 2.9072838736570243e-07, "logits/chosen": -2.9777426719665527, "logits/rejected": -2.888695478439331, "logps/chosen": -37.00130844116211, "logps/rejected": -1168.811767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2307082712650299, "rewards/margins": 11.531135559082031, "rewards/rejected": -11.300426483154297, "step": 14430 }, { "epoch": 0.86, "learning_rate": 2.8829763717914266e-07, "logits/chosen": -2.964890718460083, "logits/rejected": -2.850745677947998, "logps/chosen": -40.2948112487793, "logps/rejected": -1138.306884765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.25608593225479126, "rewards/margins": 11.243547439575195, "rewards/rejected": -10.987462043762207, "step": 14440 }, { "epoch": 0.86, "learning_rate": 2.8587646941091116e-07, "logits/chosen": -2.996582508087158, "logits/rejected": -2.900740623474121, "logps/chosen": -34.45864486694336, "logps/rejected": -1206.917236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2887122333049774, "rewards/margins": 11.97275447845459, "rewards/rejected": -11.684041976928711, "step": 14450 }, { "epoch": 0.86, "learning_rate": 2.834648945509552e-07, "logits/chosen": -2.9729394912719727, "logits/rejected": -2.879490375518799, "logps/chosen": -40.890499114990234, "logps/rejected": -1190.101318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26295238733291626, "rewards/margins": 11.77513313293457, "rewards/rejected": -11.512179374694824, "step": 14460 }, { "epoch": 0.86, "learning_rate": 2.810629230476611e-07, "logits/chosen": -2.9505562782287598, "logits/rejected": -2.872908115386963, "logps/chosen": -42.04741668701172, "logps/rejected": -1170.8892822265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24690935015678406, "rewards/margins": 11.566638946533203, "rewards/rejected": -11.319730758666992, "step": 14470 }, { "epoch": 0.86, "learning_rate": 2.786705653078062e-07, "logits/chosen": -2.992215633392334, "logits/rejected": -2.9260201454162598, "logps/chosen": -36.50754165649414, "logps/rejected": -1160.802001953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2316358983516693, "rewards/margins": 11.479607582092285, "rewards/rejected": -11.247971534729004, "step": 14480 }, { "epoch": 0.86, "learning_rate": 2.76287831696517e-07, "logits/chosen": -2.975308656692505, "logits/rejected": -2.8944661617279053, "logps/chosen": -38.54339599609375, "logps/rejected": -1199.462646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24478037655353546, "rewards/margins": 11.867864608764648, "rewards/rejected": -11.623083114624023, "step": 14490 }, { "epoch": 0.86, "learning_rate": 2.7391473253722017e-07, "logits/chosen": -2.9583072662353516, "logits/rejected": -2.8763275146484375, "logps/chosen": -40.08979034423828, "logps/rejected": -1129.8218994140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22717790305614471, "rewards/margins": 11.156317710876465, "rewards/rejected": -10.929140090942383, "step": 14500 }, { "epoch": 0.87, "learning_rate": 2.7155127811160336e-07, "logits/chosen": -2.987046003341675, "logits/rejected": -2.8624444007873535, "logps/chosen": -35.8136100769043, "logps/rejected": -1164.223876953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2851030230522156, "rewards/margins": 11.553810119628906, "rewards/rejected": -11.268708229064941, "step": 14510 }, { "epoch": 0.87, "learning_rate": 2.6919747865956413e-07, "logits/chosen": -3.009577512741089, "logits/rejected": -2.902857780456543, "logps/chosen": -44.49883270263672, "logps/rejected": -1184.5322265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22864560782909393, "rewards/margins": 11.688360214233398, "rewards/rejected": -11.459714889526367, "step": 14520 }, { "epoch": 0.87, "learning_rate": 2.668533443791707e-07, "logits/chosen": -2.9741852283477783, "logits/rejected": -2.89213490486145, "logps/chosen": -42.90782165527344, "logps/rejected": -1165.458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2349993735551834, "rewards/margins": 11.504727363586426, "rewards/rejected": -11.269726753234863, "step": 14530 }, { "epoch": 0.87, "learning_rate": 2.645188854266162e-07, "logits/chosen": -2.995941638946533, "logits/rejected": -2.9112987518310547, "logps/chosen": -40.149513244628906, "logps/rejected": -1175.6861572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26536229252815247, "rewards/margins": 11.627902030944824, "rewards/rejected": -11.362539291381836, "step": 14540 }, { "epoch": 0.87, "learning_rate": 2.621941119161739e-07, "logits/chosen": -2.9575822353363037, "logits/rejected": -2.847137212753296, "logps/chosen": -30.879901885986328, "logps/rejected": -1207.5631103515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.2715495526790619, "rewards/margins": 11.965923309326172, "rewards/rejected": -11.694375038146973, "step": 14550 }, { "epoch": 0.87, "learning_rate": 2.598790339201537e-07, "logits/chosen": -2.973747730255127, "logits/rejected": -2.884227991104126, "logps/chosen": -31.298513412475586, "logps/rejected": -1144.3876953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25711923837661743, "rewards/margins": 11.321371078491211, "rewards/rejected": -11.064251899719238, "step": 14560 }, { "epoch": 0.87, "learning_rate": 2.575736614688595e-07, "logits/chosen": -2.974978446960449, "logits/rejected": -2.8833775520324707, "logps/chosen": -35.46416473388672, "logps/rejected": -1127.0582275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2784484922885895, "rewards/margins": 11.167272567749023, "rewards/rejected": -10.888823509216309, "step": 14570 }, { "epoch": 0.87, "learning_rate": 2.552780045505446e-07, "logits/chosen": -2.974616289138794, "logits/rejected": -2.870253801345825, "logps/chosen": -39.09266662597656, "logps/rejected": -1194.9544677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24392390251159668, "rewards/margins": 11.817712783813477, "rewards/rejected": -11.573789596557617, "step": 14580 }, { "epoch": 0.87, "learning_rate": 2.529920731113672e-07, "logits/chosen": -2.9811716079711914, "logits/rejected": -2.883944272994995, "logps/chosen": -37.100830078125, "logps/rejected": -1185.7421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2681924104690552, "rewards/margins": 11.75524616241455, "rewards/rejected": -11.487053871154785, "step": 14590 }, { "epoch": 0.87, "learning_rate": 2.507158770553528e-07, "logits/chosen": -2.981268882751465, "logits/rejected": -2.8495049476623535, "logps/chosen": -61.57887649536133, "logps/rejected": -1122.2589111328125, "loss": 0.0527, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.10085967928171158, "rewards/margins": 10.931605339050293, "rewards/rejected": -10.8307466506958, "step": 14600 }, { "epoch": 0.87, "learning_rate": 2.484494262443429e-07, "logits/chosen": -2.9697985649108887, "logits/rejected": -2.8659613132476807, "logps/chosen": -38.73645782470703, "logps/rejected": -1163.5970458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2644786238670349, "rewards/margins": 11.512569427490234, "rewards/rejected": -11.248091697692871, "step": 14610 }, { "epoch": 0.87, "learning_rate": 2.4619273049796e-07, "logits/chosen": -2.9595065116882324, "logits/rejected": -2.872454881668091, "logps/chosen": -35.75692367553711, "logps/rejected": -1220.347900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25217437744140625, "rewards/margins": 12.07624626159668, "rewards/rejected": -11.824071884155273, "step": 14620 }, { "epoch": 0.87, "learning_rate": 2.439457995935604e-07, "logits/chosen": -2.9820053577423096, "logits/rejected": -2.8725152015686035, "logps/chosen": -35.46991729736328, "logps/rejected": -1184.9896240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2573280930519104, "rewards/margins": 11.73294734954834, "rewards/rejected": -11.475618362426758, "step": 14630 }, { "epoch": 0.87, "learning_rate": 2.417086432661939e-07, "logits/chosen": -2.982407808303833, "logits/rejected": -2.886704921722412, "logps/chosen": -39.6124267578125, "logps/rejected": -1215.7908935546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2740919291973114, "rewards/margins": 12.04510498046875, "rewards/rejected": -11.771013259887695, "step": 14640 }, { "epoch": 0.87, "learning_rate": 2.394812712085598e-07, "logits/chosen": -2.967245578765869, "logits/rejected": -2.9065072536468506, "logps/chosen": -33.843048095703125, "logps/rejected": -1159.634521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2309783697128296, "rewards/margins": 11.448858261108398, "rewards/rejected": -11.217880249023438, "step": 14650 }, { "epoch": 0.87, "learning_rate": 2.3726369307096765e-07, "logits/chosen": -2.9690871238708496, "logits/rejected": -2.899435520172119, "logps/chosen": -42.27226638793945, "logps/rejected": -1227.180419921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22535070776939392, "rewards/margins": 12.115138053894043, "rewards/rejected": -11.889787673950195, "step": 14660 }, { "epoch": 0.87, "learning_rate": 2.3505591846129356e-07, "logits/chosen": -2.982811450958252, "logits/rejected": -2.881304979324341, "logps/chosen": -40.921546936035156, "logps/rejected": -1200.5848388671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23757345974445343, "rewards/margins": 11.859773635864258, "rewards/rejected": -11.622200012207031, "step": 14670 }, { "epoch": 0.88, "learning_rate": 2.3285795694493686e-07, "logits/chosen": -2.9644010066986084, "logits/rejected": -2.889530658721924, "logps/chosen": -41.01499938964844, "logps/rejected": -1169.771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25438714027404785, "rewards/margins": 11.576871871948242, "rewards/rejected": -11.322486877441406, "step": 14680 }, { "epoch": 0.88, "learning_rate": 2.3066981804478416e-07, "logits/chosen": -2.9530622959136963, "logits/rejected": -2.837897777557373, "logps/chosen": -36.175968170166016, "logps/rejected": -1217.08544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22331397235393524, "rewards/margins": 12.022293090820312, "rewards/rejected": -11.798978805541992, "step": 14690 }, { "epoch": 0.88, "learning_rate": 2.2849151124116148e-07, "logits/chosen": -2.9613192081451416, "logits/rejected": -2.908884048461914, "logps/chosen": -44.20573806762695, "logps/rejected": -1199.722412109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.1910620778799057, "rewards/margins": 11.815801620483398, "rewards/rejected": -11.624738693237305, "step": 14700 }, { "epoch": 0.88, "learning_rate": 2.2632304597179827e-07, "logits/chosen": -2.9538557529449463, "logits/rejected": -2.859138250350952, "logps/chosen": -37.532745361328125, "logps/rejected": -1170.2880859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.2513587474822998, "rewards/margins": 11.572126388549805, "rewards/rejected": -11.320768356323242, "step": 14710 }, { "epoch": 0.88, "learning_rate": 2.2416443163178342e-07, "logits/chosen": -2.954435348510742, "logits/rejected": -2.8554751873016357, "logps/chosen": -41.557010650634766, "logps/rejected": -1208.505615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24223406612873077, "rewards/margins": 11.942941665649414, "rewards/rejected": -11.70070743560791, "step": 14720 }, { "epoch": 0.88, "learning_rate": 2.2201567757352631e-07, "logits/chosen": -2.9838356971740723, "logits/rejected": -2.9026012420654297, "logps/chosen": -35.78371810913086, "logps/rejected": -1180.573486328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.24237938225269318, "rewards/margins": 11.666749000549316, "rewards/rejected": -11.424368858337402, "step": 14730 }, { "epoch": 0.88, "learning_rate": 2.1987679310671582e-07, "logits/chosen": -2.988358974456787, "logits/rejected": -2.913644313812256, "logps/chosen": -41.052894592285156, "logps/rejected": -1163.731201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2496020346879959, "rewards/margins": 11.516433715820312, "rewards/rejected": -11.26683235168457, "step": 14740 }, { "epoch": 0.88, "learning_rate": 2.1774778749827946e-07, "logits/chosen": -2.9602668285369873, "logits/rejected": -2.8909010887145996, "logps/chosen": -30.24057388305664, "logps/rejected": -1145.8494873046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.27846235036849976, "rewards/margins": 11.355927467346191, "rewards/rejected": -11.077465057373047, "step": 14750 }, { "epoch": 0.88, "learning_rate": 2.1562866997234421e-07, "logits/chosen": -2.968945026397705, "logits/rejected": -2.881824016571045, "logps/chosen": -35.205352783203125, "logps/rejected": -1196.9407958984375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 0.24158112704753876, "rewards/margins": 11.824810981750488, "rewards/rejected": -11.58322811126709, "step": 14760 }, { "epoch": 0.88, "learning_rate": 2.1351944971019362e-07, "logits/chosen": -2.9869556427001953, "logits/rejected": -2.8871238231658936, "logps/chosen": -38.40692901611328, "logps/rejected": -1237.6851806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2754155695438385, "rewards/margins": 12.270101547241211, "rewards/rejected": -11.994685173034668, "step": 14770 }, { "epoch": 0.88, "learning_rate": 2.1142013585023464e-07, "logits/chosen": -2.9699816703796387, "logits/rejected": -2.8610036373138428, "logps/chosen": -40.453773498535156, "logps/rejected": -1206.207763671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2395583689212799, "rewards/margins": 11.93638801574707, "rewards/rejected": -11.696830749511719, "step": 14780 }, { "epoch": 0.88, "learning_rate": 2.0933073748794996e-07, "logits/chosen": -3.0146446228027344, "logits/rejected": -2.9502310752868652, "logps/chosen": -42.548622131347656, "logps/rejected": -1218.767822265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.22917577624320984, "rewards/margins": 12.04952335357666, "rewards/rejected": -11.820348739624023, "step": 14790 }, { "epoch": 0.88, "learning_rate": 2.072512636758639e-07, "logits/chosen": -2.9659197330474854, "logits/rejected": -2.898606777191162, "logps/chosen": -39.56842803955078, "logps/rejected": -1148.098388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2633543610572815, "rewards/margins": 11.35074520111084, "rewards/rejected": -11.087390899658203, "step": 14800 }, { "epoch": 0.88, "learning_rate": 2.051817234235015e-07, "logits/chosen": -2.966994285583496, "logits/rejected": -2.871854782104492, "logps/chosen": -35.143802642822266, "logps/rejected": -1199.575927734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2710846960544586, "rewards/margins": 11.889532089233398, "rewards/rejected": -11.618448257446289, "step": 14810 }, { "epoch": 0.88, "learning_rate": 2.0312212569735035e-07, "logits/chosen": -2.961574077606201, "logits/rejected": -2.8751587867736816, "logps/chosen": -39.212730407714844, "logps/rejected": -1177.6048583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26796579360961914, "rewards/margins": 11.67877197265625, "rewards/rejected": -11.410806655883789, "step": 14820 }, { "epoch": 0.88, "learning_rate": 2.0107247942081963e-07, "logits/chosen": -2.9954681396484375, "logits/rejected": -2.9002223014831543, "logps/chosen": -39.851036071777344, "logps/rejected": -1205.3831787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2767341434955597, "rewards/margins": 11.95850944519043, "rewards/rejected": -11.681775093078613, "step": 14830 }, { "epoch": 0.88, "learning_rate": 1.990327934742045e-07, "logits/chosen": -2.9388582706451416, "logits/rejected": -2.8700079917907715, "logps/chosen": -38.4421272277832, "logps/rejected": -1158.03564453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.23311813175678253, "rewards/margins": 11.44289493560791, "rewards/rejected": -11.20977783203125, "step": 14840 }, { "epoch": 0.89, "learning_rate": 1.9700307669464515e-07, "logits/chosen": -2.9476330280303955, "logits/rejected": -2.8464789390563965, "logps/chosen": -35.94663619995117, "logps/rejected": -1195.105224609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27594587206840515, "rewards/margins": 11.857394218444824, "rewards/rejected": -11.581449508666992, "step": 14850 }, { "epoch": 0.89, "learning_rate": 1.949833378760882e-07, "logits/chosen": -2.969691514968872, "logits/rejected": -2.8692784309387207, "logps/chosen": -40.49444580078125, "logps/rejected": -1145.8961181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2370677888393402, "rewards/margins": 11.310900688171387, "rewards/rejected": -11.073832511901855, "step": 14860 }, { "epoch": 0.89, "learning_rate": 1.92973585769253e-07, "logits/chosen": -3.00011944770813, "logits/rejected": -2.907902479171753, "logps/chosen": -44.987762451171875, "logps/rejected": -1172.6527099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24879412353038788, "rewards/margins": 11.594925880432129, "rewards/rejected": -11.346132278442383, "step": 14870 }, { "epoch": 0.89, "learning_rate": 1.9097382908158713e-07, "logits/chosen": -2.985194683074951, "logits/rejected": -2.9092354774475098, "logps/chosen": -37.39886474609375, "logps/rejected": -1128.885986328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2460823357105255, "rewards/margins": 11.151304244995117, "rewards/rejected": -10.905221939086914, "step": 14880 }, { "epoch": 0.89, "learning_rate": 1.8898407647723327e-07, "logits/chosen": -2.973564624786377, "logits/rejected": -2.939056158065796, "logps/chosen": -39.501564025878906, "logps/rejected": -1191.6790771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2481163740158081, "rewards/margins": 11.788674354553223, "rewards/rejected": -11.540557861328125, "step": 14890 }, { "epoch": 0.89, "learning_rate": 1.8700433657699162e-07, "logits/chosen": -2.9694972038269043, "logits/rejected": -2.9102184772491455, "logps/chosen": -42.71367645263672, "logps/rejected": -1224.18896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22102269530296326, "rewards/margins": 12.089110374450684, "rewards/rejected": -11.868088722229004, "step": 14900 }, { "epoch": 0.89, "learning_rate": 1.8503461795827958e-07, "logits/chosen": -2.9840550422668457, "logits/rejected": -2.900902271270752, "logps/chosen": -38.789344787597656, "logps/rejected": -1124.760498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.273086279630661, "rewards/margins": 11.148600578308105, "rewards/rejected": -10.875513076782227, "step": 14910 }, { "epoch": 0.89, "learning_rate": 1.8307492915509705e-07, "logits/chosen": -2.9455766677856445, "logits/rejected": -2.8825197219848633, "logps/chosen": -42.33556365966797, "logps/rejected": -1176.556396484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.26457056403160095, "rewards/margins": 11.648407936096191, "rewards/rejected": -11.383837699890137, "step": 14920 }, { "epoch": 0.89, "learning_rate": 1.8112527865798896e-07, "logits/chosen": -2.948873996734619, "logits/rejected": -2.894749402999878, "logps/chosen": -40.089942932128906, "logps/rejected": -1178.2847900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2340421974658966, "rewards/margins": 11.639849662780762, "rewards/rejected": -11.405806541442871, "step": 14930 }, { "epoch": 0.89, "learning_rate": 1.7918567491400862e-07, "logits/chosen": -2.949519634246826, "logits/rejected": -2.8578875064849854, "logps/chosen": -46.051795959472656, "logps/rejected": -1146.0572509765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2160722315311432, "rewards/margins": 11.314178466796875, "rewards/rejected": -11.098104476928711, "step": 14940 }, { "epoch": 0.89, "learning_rate": 1.7725612632667895e-07, "logits/chosen": -2.9446334838867188, "logits/rejected": -2.868124485015869, "logps/chosen": -36.956199645996094, "logps/rejected": -1142.9837646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25842082500457764, "rewards/margins": 11.314658164978027, "rewards/rejected": -11.056236267089844, "step": 14950 }, { "epoch": 0.89, "learning_rate": 1.7533664125596038e-07, "logits/chosen": -2.9455292224884033, "logits/rejected": -2.862055540084839, "logps/chosen": -40.35226821899414, "logps/rejected": -1159.81689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2311546504497528, "rewards/margins": 11.452412605285645, "rewards/rejected": -11.221258163452148, "step": 14960 }, { "epoch": 0.89, "learning_rate": 1.7342722801821143e-07, "logits/chosen": -2.958644390106201, "logits/rejected": -2.8997268676757812, "logps/chosen": -35.28658676147461, "logps/rejected": -1199.81103515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2610974907875061, "rewards/margins": 11.878003120422363, "rewards/rejected": -11.616905212402344, "step": 14970 }, { "epoch": 0.89, "learning_rate": 1.7152789488615124e-07, "logits/chosen": -2.9567859172821045, "logits/rejected": -2.8849568367004395, "logps/chosen": -41.155128479003906, "logps/rejected": -1174.941162109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23939356207847595, "rewards/margins": 11.610101699829102, "rewards/rejected": -11.370708465576172, "step": 14980 }, { "epoch": 0.89, "learning_rate": 1.6963865008882975e-07, "logits/chosen": -2.992448091506958, "logits/rejected": -2.874046802520752, "logps/chosen": -39.237152099609375, "logps/rejected": -1169.231689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22706358134746552, "rewards/margins": 11.555395126342773, "rewards/rejected": -11.328330039978027, "step": 14990 }, { "epoch": 0.89, "learning_rate": 1.6775950181158462e-07, "logits/chosen": -2.954263210296631, "logits/rejected": -2.8583288192749023, "logps/chosen": -40.58722686767578, "logps/rejected": -1105.032958984375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.19396013021469116, "rewards/margins": 10.86297607421875, "rewards/rejected": -10.66901683807373, "step": 15000 }, { "epoch": 0.9, "learning_rate": 1.6589045819601134e-07, "logits/chosen": -2.9351258277893066, "logits/rejected": -2.846315622329712, "logps/chosen": -37.831668853759766, "logps/rejected": -1173.9010009765625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.24440331757068634, "rewards/margins": 11.618576049804688, "rewards/rejected": -11.37417221069336, "step": 15010 }, { "epoch": 0.9, "learning_rate": 1.640315273399254e-07, "logits/chosen": -2.9719841480255127, "logits/rejected": -2.8859450817108154, "logps/chosen": -36.954986572265625, "logps/rejected": -1187.863037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2429029494524002, "rewards/margins": 11.754290580749512, "rewards/rejected": -11.511388778686523, "step": 15020 }, { "epoch": 0.9, "learning_rate": 1.621827172973281e-07, "logits/chosen": -2.960239887237549, "logits/rejected": -2.862933874130249, "logps/chosen": -39.555076599121094, "logps/rejected": -1165.606201171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23071518540382385, "rewards/margins": 11.514144897460938, "rewards/rejected": -11.283429145812988, "step": 15030 }, { "epoch": 0.9, "learning_rate": 1.603440360783709e-07, "logits/chosen": -2.9596478939056396, "logits/rejected": -2.869964599609375, "logps/chosen": -33.3579216003418, "logps/rejected": -1173.653076171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.26571035385131836, "rewards/margins": 11.616678237915039, "rewards/rejected": -11.350967407226562, "step": 15040 }, { "epoch": 0.9, "learning_rate": 1.5851549164932118e-07, "logits/chosen": -2.9776899814605713, "logits/rejected": -2.8980157375335693, "logps/chosen": -43.45653533935547, "logps/rejected": -1208.797119140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.20579838752746582, "rewards/margins": 11.928613662719727, "rewards/rejected": -11.722814559936523, "step": 15050 }, { "epoch": 0.9, "learning_rate": 1.5669709193252835e-07, "logits/chosen": -2.9606213569641113, "logits/rejected": -2.8818001747131348, "logps/chosen": -37.21210479736328, "logps/rejected": -1169.078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.250752717256546, "rewards/margins": 11.555463790893555, "rewards/rejected": -11.30471134185791, "step": 15060 }, { "epoch": 0.9, "learning_rate": 1.5488884480638677e-07, "logits/chosen": -2.9627280235290527, "logits/rejected": -2.881899356842041, "logps/chosen": -45.11912536621094, "logps/rejected": -1138.7672119140625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.22045183181762695, "rewards/margins": 11.231890678405762, "rewards/rejected": -11.011438369750977, "step": 15070 }, { "epoch": 0.9, "learning_rate": 1.5309075810530732e-07, "logits/chosen": -3.0030758380889893, "logits/rejected": -2.9120121002197266, "logps/chosen": -36.72823715209961, "logps/rejected": -1125.1456298828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26389291882514954, "rewards/margins": 11.139310836791992, "rewards/rejected": -10.875418663024902, "step": 15080 }, { "epoch": 0.9, "learning_rate": 1.5130283961967614e-07, "logits/chosen": -2.9662270545959473, "logits/rejected": -2.8749849796295166, "logps/chosen": -35.76933288574219, "logps/rejected": -1140.45654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.257709801197052, "rewards/margins": 11.295709609985352, "rewards/rejected": -11.038000106811523, "step": 15090 }, { "epoch": 0.9, "learning_rate": 1.4952509709582673e-07, "logits/chosen": -2.9833168983459473, "logits/rejected": -2.8895130157470703, "logps/chosen": -37.73052215576172, "logps/rejected": -1205.6680908203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2419883757829666, "rewards/margins": 11.916754722595215, "rewards/rejected": -11.674766540527344, "step": 15100 }, { "epoch": 0.9, "learning_rate": 1.4775753823600359e-07, "logits/chosen": -2.9734091758728027, "logits/rejected": -2.901729106903076, "logps/chosen": -39.368141174316406, "logps/rejected": -1201.920166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2657638490200043, "rewards/margins": 11.923334121704102, "rewards/rejected": -11.657569885253906, "step": 15110 }, { "epoch": 0.9, "learning_rate": 1.460001706983294e-07, "logits/chosen": -2.9700160026550293, "logits/rejected": -2.881188154220581, "logps/chosen": -38.50746536254883, "logps/rejected": -1098.9444580078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.23839297890663147, "rewards/margins": 10.855712890625, "rewards/rejected": -10.617321014404297, "step": 15120 }, { "epoch": 0.9, "learning_rate": 1.442530020967725e-07, "logits/chosen": -3.0056023597717285, "logits/rejected": -2.928194999694824, "logps/chosen": -38.074058532714844, "logps/rejected": -1169.90576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2577414810657501, "rewards/margins": 11.575519561767578, "rewards/rejected": -11.317777633666992, "step": 15130 }, { "epoch": 0.9, "learning_rate": 1.4251604000111275e-07, "logits/chosen": -2.958242654800415, "logits/rejected": -2.887268543243408, "logps/chosen": -36.58842468261719, "logps/rejected": -1224.310791015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2657166123390198, "rewards/margins": 12.132353782653809, "rewards/rejected": -11.866636276245117, "step": 15140 }, { "epoch": 0.9, "learning_rate": 1.4078929193691e-07, "logits/chosen": -2.9820125102996826, "logits/rejected": -2.8922102451324463, "logps/chosen": -39.72421646118164, "logps/rejected": -1165.865966796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.28552043437957764, "rewards/margins": 11.564339637756348, "rewards/rejected": -11.278820037841797, "step": 15150 }, { "epoch": 0.9, "learning_rate": 1.3907276538546898e-07, "logits/chosen": -2.9782519340515137, "logits/rejected": -2.903153896331787, "logps/chosen": -36.701133728027344, "logps/rejected": -1183.7110595703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.23683814704418182, "rewards/margins": 11.696184158325195, "rewards/rejected": -11.45934772491455, "step": 15160 }, { "epoch": 0.9, "learning_rate": 1.3736646778381159e-07, "logits/chosen": -2.9982497692108154, "logits/rejected": -2.9064948558807373, "logps/chosen": -41.206214904785156, "logps/rejected": -1147.576171875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 0.20787262916564941, "rewards/margins": 11.289251327514648, "rewards/rejected": -11.081378936767578, "step": 15170 }, { "epoch": 0.91, "learning_rate": 1.3567040652463946e-07, "logits/chosen": -2.9708542823791504, "logits/rejected": -2.8830618858337402, "logps/chosen": -36.58932876586914, "logps/rejected": -1183.44921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.27233266830444336, "rewards/margins": 11.726694107055664, "rewards/rejected": -11.454360961914062, "step": 15180 }, { "epoch": 0.91, "learning_rate": 1.339845889563049e-07, "logits/chosen": -2.951820135116577, "logits/rejected": -2.871936082839966, "logps/chosen": -35.525264739990234, "logps/rejected": -1245.8509521484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2576505243778229, "rewards/margins": 12.340564727783203, "rewards/rejected": -12.082914352416992, "step": 15190 }, { "epoch": 0.91, "learning_rate": 1.3230902238277887e-07, "logits/chosen": -2.985128879547119, "logits/rejected": -2.892003059387207, "logps/chosen": -35.87692642211914, "logps/rejected": -1210.2354736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2674192786216736, "rewards/margins": 11.979203224182129, "rewards/rejected": -11.711782455444336, "step": 15200 }, { "epoch": 0.91, "learning_rate": 1.3064371406361854e-07, "logits/chosen": -2.9731171131134033, "logits/rejected": -2.8806004524230957, "logps/chosen": -31.557514190673828, "logps/rejected": -1201.3955078125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.27656641602516174, "rewards/margins": 11.917546272277832, "rewards/rejected": -11.640979766845703, "step": 15210 }, { "epoch": 0.91, "learning_rate": 1.2898867121393627e-07, "logits/chosen": -2.9013404846191406, "logits/rejected": -2.8142545223236084, "logps/chosen": -40.74055099487305, "logps/rejected": -1085.91162109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.18874764442443848, "rewards/margins": 10.67133617401123, "rewards/rejected": -10.482587814331055, "step": 15220 }, { "epoch": 0.91, "learning_rate": 1.273439010043681e-07, "logits/chosen": -2.9742913246154785, "logits/rejected": -2.8742709159851074, "logps/chosen": -35.91184616088867, "logps/rejected": -1171.831298828125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": 0.27519479393959045, "rewards/margins": 11.622952461242676, "rewards/rejected": -11.347757339477539, "step": 15230 }, { "epoch": 0.91, "learning_rate": 1.2570941056104348e-07, "logits/chosen": -2.966607093811035, "logits/rejected": -2.8784568309783936, "logps/chosen": -38.58753967285156, "logps/rejected": -1143.734375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 0.24940225481987, "rewards/margins": 11.30919075012207, "rewards/rejected": -11.059788703918457, "step": 15240 }, { "epoch": 0.91, "learning_rate": 1.2408520696555183e-07, "logits/chosen": -2.996110439300537, "logits/rejected": -2.914276599884033, "logps/chosen": -42.199668884277344, "logps/rejected": -1195.952392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2758926749229431, "rewards/margins": 11.853321075439453, "rewards/rejected": -11.577427864074707, "step": 15250 }, { "epoch": 0.91, "learning_rate": 1.224712972549172e-07, "logits/chosen": -2.9610772132873535, "logits/rejected": -2.890836715698242, "logps/chosen": -36.169464111328125, "logps/rejected": -1185.133544921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.24974274635314941, "rewards/margins": 11.72623348236084, "rewards/rejected": -11.476491928100586, "step": 15260 }, { "epoch": 0.91, "learning_rate": 1.2086768842156065e-07, "logits/chosen": -2.984138011932373, "logits/rejected": -2.9071881771087646, "logps/chosen": -40.61865234375, "logps/rejected": -1194.735107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2591859996318817, "rewards/margins": 11.83064079284668, "rewards/rejected": -11.571455001831055, "step": 15270 }, { "epoch": 0.91, "learning_rate": 1.1927438741327652e-07, "logits/chosen": -2.974619150161743, "logits/rejected": -2.8982224464416504, "logps/chosen": -51.818077087402344, "logps/rejected": -1122.6195068359375, "loss": 0.0218, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.12518063187599182, "rewards/margins": 10.98192024230957, "rewards/rejected": -10.856740951538086, "step": 15280 }, { "epoch": 0.91, "learning_rate": 1.1769140113319755e-07, "logits/chosen": -3.0144262313842773, "logits/rejected": -2.9155774116516113, "logps/chosen": -39.75475311279297, "logps/rejected": -1163.056884765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2646397054195404, "rewards/margins": 11.516702651977539, "rewards/rejected": -11.25206184387207, "step": 15290 }, { "epoch": 0.91, "learning_rate": 1.1611873643976839e-07, "logits/chosen": -2.978649139404297, "logits/rejected": -2.890958070755005, "logps/chosen": -38.9640007019043, "logps/rejected": -1181.6832275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.257240355014801, "rewards/margins": 11.698715209960938, "rewards/rejected": -11.441473960876465, "step": 15300 }, { "epoch": 0.91, "learning_rate": 1.145564001467131e-07, "logits/chosen": -2.9944519996643066, "logits/rejected": -2.8863461017608643, "logps/chosen": -40.759742736816406, "logps/rejected": -1231.613525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2693871855735779, "rewards/margins": 12.217482566833496, "rewards/rejected": -11.94809627532959, "step": 15310 }, { "epoch": 0.91, "learning_rate": 1.1300439902300814e-07, "logits/chosen": -2.989800453186035, "logits/rejected": -2.888291597366333, "logps/chosen": -40.778526306152344, "logps/rejected": -1175.2950439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2634030282497406, "rewards/margins": 11.647651672363281, "rewards/rejected": -11.384248733520508, "step": 15320 }, { "epoch": 0.91, "learning_rate": 1.1146273979285138e-07, "logits/chosen": -2.9748406410217285, "logits/rejected": -2.8863353729248047, "logps/chosen": -34.45737838745117, "logps/rejected": -1117.285888671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23594383895397186, "rewards/margins": 11.035730361938477, "rewards/rejected": -10.799786567687988, "step": 15330 }, { "epoch": 0.91, "learning_rate": 1.0993142913563209e-07, "logits/chosen": -2.9460413455963135, "logits/rejected": -2.8224503993988037, "logps/chosen": -37.8783073425293, "logps/rejected": -1125.140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.22235114872455597, "rewards/margins": 11.091314315795898, "rewards/rejected": -10.868963241577148, "step": 15340 }, { "epoch": 0.92, "learning_rate": 1.0841047368590596e-07, "logits/chosen": -2.9755117893218994, "logits/rejected": -2.9088165760040283, "logps/chosen": -38.083839416503906, "logps/rejected": -1112.820556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24876773357391357, "rewards/margins": 10.99383544921875, "rewards/rejected": -10.74506664276123, "step": 15350 }, { "epoch": 0.92, "learning_rate": 1.0689988003336121e-07, "logits/chosen": -2.962559700012207, "logits/rejected": -2.8666887283325195, "logps/chosen": -40.437217712402344, "logps/rejected": -1116.868896484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.26921284198760986, "rewards/margins": 11.06241226196289, "rewards/rejected": -10.793200492858887, "step": 15360 }, { "epoch": 0.92, "learning_rate": 1.0539965472279424e-07, "logits/chosen": -2.9444332122802734, "logits/rejected": -2.8394036293029785, "logps/chosen": -33.94709014892578, "logps/rejected": -1128.8128662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.28288835287094116, "rewards/margins": 11.198755264282227, "rewards/rejected": -10.915865898132324, "step": 15370 }, { "epoch": 0.92, "learning_rate": 1.039098042540787e-07, "logits/chosen": -2.9871022701263428, "logits/rejected": -2.904219150543213, "logps/chosen": -46.9288215637207, "logps/rejected": -1166.7086181640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.19786928594112396, "rewards/margins": 11.483966827392578, "rewards/rejected": -11.286097526550293, "step": 15380 }, { "epoch": 0.92, "learning_rate": 1.0243033508213873e-07, "logits/chosen": -2.9489641189575195, "logits/rejected": -2.831801176071167, "logps/chosen": -39.442893981933594, "logps/rejected": -1145.763916015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2526959478855133, "rewards/margins": 11.317575454711914, "rewards/rejected": -11.064879417419434, "step": 15390 }, { "epoch": 0.92, "learning_rate": 1.0096125361691993e-07, "logits/chosen": -2.9813382625579834, "logits/rejected": -2.921874523162842, "logps/chosen": -42.09162521362305, "logps/rejected": -1158.1947021484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2582600712776184, "rewards/margins": 11.458087921142578, "rewards/rejected": -11.1998291015625, "step": 15400 }, { "epoch": 0.92, "learning_rate": 9.950256622336258e-08, "logits/chosen": -2.9925882816314697, "logits/rejected": -2.9062981605529785, "logps/chosen": -36.55866622924805, "logps/rejected": -1171.843505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25989609956741333, "rewards/margins": 11.601727485656738, "rewards/rejected": -11.341832160949707, "step": 15410 }, { "epoch": 0.92, "learning_rate": 9.805427922137373e-08, "logits/chosen": -2.985926389694214, "logits/rejected": -2.8780596256256104, "logps/chosen": -35.72705841064453, "logps/rejected": -1162.277587890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2841925024986267, "rewards/margins": 11.520689964294434, "rewards/rejected": -11.23649787902832, "step": 15420 }, { "epoch": 0.92, "learning_rate": 9.661639888579877e-08, "logits/chosen": -2.934736728668213, "logits/rejected": -2.8498764038085938, "logps/chosen": -35.08636474609375, "logps/rejected": -1127.7509765625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.17307087779045105, "rewards/margins": 11.076457023620605, "rewards/rejected": -10.903387069702148, "step": 15430 }, { "epoch": 0.92, "learning_rate": 9.51889314463969e-08, "logits/chosen": -2.9981932640075684, "logits/rejected": -2.9080982208251953, "logps/chosen": -37.607521057128906, "logps/rejected": -1121.16162109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2565361261367798, "rewards/margins": 11.077008247375488, "rewards/rejected": -10.820473670959473, "step": 15440 }, { "epoch": 0.92, "learning_rate": 9.377188308781038e-08, "logits/chosen": -2.9733824729919434, "logits/rejected": -2.8770201206207275, "logps/chosen": -40.517799377441406, "logps/rejected": -1190.4144287109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.224281907081604, "rewards/margins": 11.735956192016602, "rewards/rejected": -11.511672973632812, "step": 15450 }, { "epoch": 0.92, "learning_rate": 9.236525994954142e-08, "logits/chosen": -2.9672045707702637, "logits/rejected": -2.8878884315490723, "logps/chosen": -39.7332649230957, "logps/rejected": -1134.380859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2319769412279129, "rewards/margins": 11.2018461227417, "rewards/rejected": -10.969869613647461, "step": 15460 }, { "epoch": 0.92, "learning_rate": 9.096906812592315e-08, "logits/chosen": -2.9887452125549316, "logits/rejected": -2.8882734775543213, "logps/chosen": -33.223121643066406, "logps/rejected": -1205.3875732421875, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": 0.23574571311473846, "rewards/margins": 11.896418571472168, "rewards/rejected": -11.660672187805176, "step": 15470 }, { "epoch": 0.92, "learning_rate": 8.958331366609424e-08, "logits/chosen": -2.9660253524780273, "logits/rejected": -2.896043300628662, "logps/chosen": -40.176963806152344, "logps/rejected": -1182.551025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24371537566184998, "rewards/margins": 11.687970161437988, "rewards/rejected": -11.444254875183105, "step": 15480 }, { "epoch": 0.92, "learning_rate": 8.820800257397205e-08, "logits/chosen": -2.9691712856292725, "logits/rejected": -2.8729615211486816, "logps/chosen": -38.7834358215332, "logps/rejected": -1143.245361328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.25553739070892334, "rewards/margins": 11.304418563842773, "rewards/rejected": -11.048881530761719, "step": 15490 }, { "epoch": 0.92, "learning_rate": 8.684314080822764e-08, "logits/chosen": -2.9784903526306152, "logits/rejected": -2.890383243560791, "logps/chosen": -37.947574615478516, "logps/rejected": -1186.1884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2642391324043274, "rewards/margins": 11.740569114685059, "rewards/rejected": -11.476330757141113, "step": 15500 }, { "epoch": 0.92, "learning_rate": 8.54887342822594e-08, "logits/chosen": -2.989776134490967, "logits/rejected": -2.892819881439209, "logps/chosen": -43.613285064697266, "logps/rejected": -1137.437744140625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 0.2524784505367279, "rewards/margins": 11.245490074157715, "rewards/rejected": -10.993012428283691, "step": 15510 }, { "epoch": 0.93, "learning_rate": 8.414478886416611e-08, "logits/chosen": -2.9835121631622314, "logits/rejected": -2.9003612995147705, "logps/chosen": -35.15013122558594, "logps/rejected": -1144.12255859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24543063342571259, "rewards/margins": 11.30392074584961, "rewards/rejected": -11.058489799499512, "step": 15520 }, { "epoch": 0.93, "learning_rate": 8.281131037672474e-08, "logits/chosen": -2.9811716079711914, "logits/rejected": -2.9232337474823, "logps/chosen": -37.65182876586914, "logps/rejected": -1209.619873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26974326372146606, "rewards/margins": 12.005250930786133, "rewards/rejected": -11.735507011413574, "step": 15530 }, { "epoch": 0.93, "learning_rate": 8.148830459736106e-08, "logits/chosen": -2.9730443954467773, "logits/rejected": -2.893909215927124, "logps/chosen": -41.03983688354492, "logps/rejected": -1192.9407958984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26030129194259644, "rewards/margins": 11.826200485229492, "rewards/rejected": -11.565900802612305, "step": 15540 }, { "epoch": 0.93, "learning_rate": 8.017577725812825e-08, "logits/chosen": -2.9574265480041504, "logits/rejected": -2.886592388153076, "logps/chosen": -41.14467239379883, "logps/rejected": -1138.6055908203125, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 0.2535688579082489, "rewards/margins": 11.259025573730469, "rewards/rejected": -11.005456924438477, "step": 15550 }, { "epoch": 0.93, "learning_rate": 7.887373404568133e-08, "logits/chosen": -2.9514946937561035, "logits/rejected": -2.8436732292175293, "logps/chosen": -51.858482360839844, "logps/rejected": -1135.4613037109375, "loss": 0.0505, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.14882883429527283, "rewards/margins": 11.120051383972168, "rewards/rejected": -10.971221923828125, "step": 15560 }, { "epoch": 0.93, "learning_rate": 7.758218060124916e-08, "logits/chosen": -2.9595909118652344, "logits/rejected": -2.8829185962677, "logps/chosen": -42.34349822998047, "logps/rejected": -1200.5792236328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.250970721244812, "rewards/margins": 11.879231452941895, "rewards/rejected": -11.628260612487793, "step": 15570 }, { "epoch": 0.93, "learning_rate": 7.630112252061534e-08, "logits/chosen": -2.979240894317627, "logits/rejected": -2.8974432945251465, "logps/chosen": -35.00056838989258, "logps/rejected": -1199.154296875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.27570241689682007, "rewards/margins": 11.880024909973145, "rewards/rejected": -11.604321479797363, "step": 15580 }, { "epoch": 0.93, "learning_rate": 7.503056535408975e-08, "logits/chosen": -3.0068459510803223, "logits/rejected": -2.9129586219787598, "logps/chosen": -39.467857360839844, "logps/rejected": -1203.6297607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25149697065353394, "rewards/margins": 11.9050874710083, "rewards/rejected": -11.653589248657227, "step": 15590 }, { "epoch": 0.93, "learning_rate": 7.377051460648682e-08, "logits/chosen": -2.927553176879883, "logits/rejected": -2.865756034851074, "logps/chosen": -36.8929328918457, "logps/rejected": -1194.6793212890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.24821588397026062, "rewards/margins": 11.8215913772583, "rewards/rejected": -11.573375701904297, "step": 15600 }, { "epoch": 0.93, "learning_rate": 7.252097573709982e-08, "logits/chosen": -3.0071861743927, "logits/rejected": -2.9123787879943848, "logps/chosen": -37.60557556152344, "logps/rejected": -1180.5096435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26357072591781616, "rewards/margins": 11.70094108581543, "rewards/rejected": -11.437371253967285, "step": 15610 }, { "epoch": 0.93, "learning_rate": 7.128195415967987e-08, "logits/chosen": -2.9733834266662598, "logits/rejected": -2.9045004844665527, "logps/chosen": -36.119014739990234, "logps/rejected": -1213.737060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.260175883769989, "rewards/margins": 12.008655548095703, "rewards/rejected": -11.748479843139648, "step": 15620 }, { "epoch": 0.93, "learning_rate": 7.005345524240926e-08, "logits/chosen": -2.981187582015991, "logits/rejected": -2.8913638591766357, "logps/chosen": -40.44076156616211, "logps/rejected": -1160.3564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.28412964940071106, "rewards/margins": 11.505317687988281, "rewards/rejected": -11.221187591552734, "step": 15630 }, { "epoch": 0.93, "learning_rate": 6.883548430788062e-08, "logits/chosen": -2.9608731269836426, "logits/rejected": -2.9003024101257324, "logps/chosen": -35.694034576416016, "logps/rejected": -1155.848388671875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.26121994853019714, "rewards/margins": 11.4380464553833, "rewards/rejected": -11.176827430725098, "step": 15640 }, { "epoch": 0.93, "learning_rate": 6.762804663307365e-08, "logits/chosen": -2.9608874320983887, "logits/rejected": -2.8585567474365234, "logps/chosen": -42.31610870361328, "logps/rejected": -1176.7828369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2703687250614166, "rewards/margins": 11.65398120880127, "rewards/rejected": -11.383612632751465, "step": 15650 }, { "epoch": 0.93, "learning_rate": 6.643114744933038e-08, "logits/chosen": -2.956362009048462, "logits/rejected": -2.885291814804077, "logps/chosen": -39.40729522705078, "logps/rejected": -1182.58251953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23031875491142273, "rewards/margins": 11.694676399230957, "rewards/rejected": -11.464356422424316, "step": 15660 }, { "epoch": 0.93, "learning_rate": 6.524479194233463e-08, "logits/chosen": -3.0468926429748535, "logits/rejected": -2.9202284812927246, "logps/chosen": -41.790889739990234, "logps/rejected": -1219.505615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24841301143169403, "rewards/margins": 12.062105178833008, "rewards/rejected": -11.81369400024414, "step": 15670 }, { "epoch": 0.94, "learning_rate": 6.406898525208843e-08, "logits/chosen": -2.945479154586792, "logits/rejected": -2.857006072998047, "logps/chosen": -42.10221481323242, "logps/rejected": -1171.552978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21837592124938965, "rewards/margins": 11.560086250305176, "rewards/rejected": -11.34170913696289, "step": 15680 }, { "epoch": 0.94, "learning_rate": 6.290373247289012e-08, "logits/chosen": -2.95981502532959, "logits/rejected": -2.872325897216797, "logps/chosen": -37.947818756103516, "logps/rejected": -1187.836669921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25823599100112915, "rewards/margins": 11.765544891357422, "rewards/rejected": -11.507308959960938, "step": 15690 }, { "epoch": 0.94, "learning_rate": 6.174903865331177e-08, "logits/chosen": -3.0016162395477295, "logits/rejected": -2.884253978729248, "logps/chosen": -40.47675323486328, "logps/rejected": -1152.930419921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.24836640059947968, "rewards/margins": 11.407809257507324, "rewards/rejected": -11.159442901611328, "step": 15700 }, { "epoch": 0.94, "learning_rate": 6.060490879617853e-08, "logits/chosen": -2.9558331966400146, "logits/rejected": -2.8886265754699707, "logps/chosen": -42.451087951660156, "logps/rejected": -1171.815673828125, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.24435396492481232, "rewards/margins": 11.594766616821289, "rewards/rejected": -11.35041332244873, "step": 15710 }, { "epoch": 0.94, "learning_rate": 5.947134785854597e-08, "logits/chosen": -2.966118335723877, "logits/rejected": -2.892910957336426, "logps/chosen": -38.194374084472656, "logps/rejected": -1125.1644287109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23814108967781067, "rewards/margins": 11.108500480651855, "rewards/rejected": -10.870360374450684, "step": 15720 }, { "epoch": 0.94, "learning_rate": 5.8348360751677435e-08, "logits/chosen": -2.963320016860962, "logits/rejected": -2.877458333969116, "logps/chosen": -40.509891510009766, "logps/rejected": -1138.3682861328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.287384569644928, "rewards/margins": 11.290287017822266, "rewards/rejected": -11.002902030944824, "step": 15730 }, { "epoch": 0.94, "learning_rate": 5.7235952341026524e-08, "logits/chosen": -2.983717441558838, "logits/rejected": -2.864889144897461, "logps/chosen": -44.33399963378906, "logps/rejected": -1117.5361328125, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.20728027820587158, "rewards/margins": 11.006080627441406, "rewards/rejected": -10.79880142211914, "step": 15740 }, { "epoch": 0.94, "learning_rate": 5.6134127446211275e-08, "logits/chosen": -2.9662249088287354, "logits/rejected": -2.8873026371002197, "logps/chosen": -30.30438232421875, "logps/rejected": -1199.1956787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26358285546302795, "rewards/margins": 11.88241958618164, "rewards/rejected": -11.618837356567383, "step": 15750 }, { "epoch": 0.94, "learning_rate": 5.5042890840996676e-08, "logits/chosen": -3.00521183013916, "logits/rejected": -2.891458034515381, "logps/chosen": -33.87781524658203, "logps/rejected": -1187.2978515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2540748119354248, "rewards/margins": 11.746267318725586, "rewards/rejected": -11.492192268371582, "step": 15760 }, { "epoch": 0.94, "learning_rate": 5.3962247253273035e-08, "logits/chosen": -2.98069429397583, "logits/rejected": -2.893838405609131, "logps/chosen": -44.0556640625, "logps/rejected": -1148.7401123046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.20948807895183563, "rewards/margins": 11.322388648986816, "rewards/rejected": -11.112900733947754, "step": 15770 }, { "epoch": 0.94, "learning_rate": 5.2892201365035144e-08, "logits/chosen": -2.9887852668762207, "logits/rejected": -2.9215283393859863, "logps/chosen": -33.72168731689453, "logps/rejected": -1182.263427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26901835203170776, "rewards/margins": 11.727694511413574, "rewards/rejected": -11.458677291870117, "step": 15780 }, { "epoch": 0.94, "learning_rate": 5.18327578123623e-08, "logits/chosen": -2.967113494873047, "logits/rejected": -2.900663375854492, "logps/chosen": -38.1612663269043, "logps/rejected": -1129.042236328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2649931311607361, "rewards/margins": 11.192712783813477, "rewards/rejected": -10.927720069885254, "step": 15790 }, { "epoch": 0.94, "learning_rate": 5.078392118539777e-08, "logits/chosen": -2.9768669605255127, "logits/rejected": -2.903657913208008, "logps/chosen": -40.830810546875, "logps/rejected": -1159.336181640625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 0.21502785384655, "rewards/margins": 11.42724609375, "rewards/rejected": -11.212220191955566, "step": 15800 }, { "epoch": 0.94, "learning_rate": 4.974569602832991e-08, "logits/chosen": -2.9659359455108643, "logits/rejected": -2.868215322494507, "logps/chosen": -38.496734619140625, "logps/rejected": -1166.22705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.21149468421936035, "rewards/margins": 11.494048118591309, "rewards/rejected": -11.282553672790527, "step": 15810 }, { "epoch": 0.94, "learning_rate": 4.8718086839370794e-08, "logits/chosen": -2.977973461151123, "logits/rejected": -2.874011516571045, "logps/chosen": -34.365379333496094, "logps/rejected": -1196.691162109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27920398116111755, "rewards/margins": 11.861215591430664, "rewards/rejected": -11.582010269165039, "step": 15820 }, { "epoch": 0.94, "learning_rate": 4.7701098070739304e-08, "logits/chosen": -2.971156358718872, "logits/rejected": -2.8932762145996094, "logps/chosen": -35.232139587402344, "logps/rejected": -1212.9073486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26702433824539185, "rewards/margins": 12.026121139526367, "rewards/rejected": -11.7590970993042, "step": 15830 }, { "epoch": 0.94, "learning_rate": 4.66947341286389e-08, "logits/chosen": -2.9809508323669434, "logits/rejected": -2.867298126220703, "logps/chosen": -37.77024459838867, "logps/rejected": -1192.4586181640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.26922550797462463, "rewards/margins": 11.813329696655273, "rewards/rejected": -11.544105529785156, "step": 15840 }, { "epoch": 0.95, "learning_rate": 4.5698999373240404e-08, "logits/chosen": -2.946019172668457, "logits/rejected": -2.8675432205200195, "logps/chosen": -45.59740447998047, "logps/rejected": -1126.293212890625, "loss": 0.0188, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.17943409085273743, "rewards/margins": 11.056106567382812, "rewards/rejected": -10.876672744750977, "step": 15850 }, { "epoch": 0.95, "learning_rate": 4.471389811866289e-08, "logits/chosen": -2.9778640270233154, "logits/rejected": -2.889017105102539, "logps/chosen": -31.9451904296875, "logps/rejected": -1172.9234619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27734583616256714, "rewards/margins": 11.626118659973145, "rewards/rejected": -11.348772048950195, "step": 15860 }, { "epoch": 0.95, "learning_rate": 4.373943463295477e-08, "logits/chosen": -2.961275100708008, "logits/rejected": -2.869232177734375, "logps/chosen": -36.65496063232422, "logps/rejected": -1185.671630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2385808527469635, "rewards/margins": 11.730873107910156, "rewards/rejected": -11.492292404174805, "step": 15870 }, { "epoch": 0.95, "learning_rate": 4.277561313807493e-08, "logits/chosen": -2.9707489013671875, "logits/rejected": -2.873931646347046, "logps/chosen": -37.49226760864258, "logps/rejected": -1142.8839111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26636847853660583, "rewards/margins": 11.32282543182373, "rewards/rejected": -11.056455612182617, "step": 15880 }, { "epoch": 0.95, "learning_rate": 4.1822437809874994e-08, "logits/chosen": -2.9704997539520264, "logits/rejected": -2.8810935020446777, "logps/chosen": -38.36634063720703, "logps/rejected": -1167.6915283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24661222100257874, "rewards/margins": 11.55004596710205, "rewards/rejected": -11.303433418273926, "step": 15890 }, { "epoch": 0.95, "learning_rate": 4.0879912778080956e-08, "logits/chosen": -2.974407196044922, "logits/rejected": -2.898735523223877, "logps/chosen": -35.60115432739258, "logps/rejected": -1173.841064453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.22720551490783691, "rewards/margins": 11.601739883422852, "rewards/rejected": -11.374534606933594, "step": 15900 }, { "epoch": 0.95, "learning_rate": 3.994804212627462e-08, "logits/chosen": -2.955533504486084, "logits/rejected": -2.853759765625, "logps/chosen": -37.67648696899414, "logps/rejected": -1166.0150146484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2725805938243866, "rewards/margins": 11.555089950561523, "rewards/rejected": -11.282508850097656, "step": 15910 }, { "epoch": 0.95, "learning_rate": 3.902682989187889e-08, "logits/chosen": -2.9881701469421387, "logits/rejected": -2.882383346557617, "logps/chosen": -40.305213928222656, "logps/rejected": -1192.7138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2590678036212921, "rewards/margins": 11.803757667541504, "rewards/rejected": -11.54469108581543, "step": 15920 }, { "epoch": 0.95, "learning_rate": 3.8116280066134994e-08, "logits/chosen": -2.982243776321411, "logits/rejected": -2.8969321250915527, "logps/chosen": -35.67623519897461, "logps/rejected": -1180.2974853515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2706843614578247, "rewards/margins": 11.699018478393555, "rewards/rejected": -11.42833423614502, "step": 15930 }, { "epoch": 0.95, "learning_rate": 3.721639659409054e-08, "logits/chosen": -2.9596426486968994, "logits/rejected": -2.898728609085083, "logps/chosen": -36.92039108276367, "logps/rejected": -1156.459716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2683895230293274, "rewards/margins": 11.460421562194824, "rewards/rejected": -11.192031860351562, "step": 15940 }, { "epoch": 0.95, "learning_rate": 3.63271833745793e-08, "logits/chosen": -2.986008882522583, "logits/rejected": -2.884777545928955, "logps/chosen": -50.27680587768555, "logps/rejected": -1167.973388671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.17356684803962708, "rewards/margins": 11.470231056213379, "rewards/rejected": -11.296664237976074, "step": 15950 }, { "epoch": 0.95, "learning_rate": 3.544864426020478e-08, "logits/chosen": -2.9796030521392822, "logits/rejected": -2.9039759635925293, "logps/chosen": -43.04854202270508, "logps/rejected": -1174.984619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2309647500514984, "rewards/margins": 11.60192584991455, "rewards/rejected": -11.37096118927002, "step": 15960 }, { "epoch": 0.95, "learning_rate": 3.4580783057324706e-08, "logits/chosen": -2.986281633377075, "logits/rejected": -2.8788909912109375, "logps/chosen": -42.02538299560547, "logps/rejected": -1119.833740234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2473711222410202, "rewards/margins": 11.07331657409668, "rewards/rejected": -10.825945854187012, "step": 15970 }, { "epoch": 0.95, "learning_rate": 3.3723603526032435e-08, "logits/chosen": -2.961486339569092, "logits/rejected": -2.844026565551758, "logps/chosen": -35.09319305419922, "logps/rejected": -1184.122314453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2800231873989105, "rewards/margins": 11.744056701660156, "rewards/rejected": -11.464034080505371, "step": 15980 }, { "epoch": 0.95, "learning_rate": 3.2877109380143604e-08, "logits/chosen": -2.9891583919525146, "logits/rejected": -2.878838300704956, "logps/chosen": -36.0637321472168, "logps/rejected": -1176.429443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26586517691612244, "rewards/margins": 11.659668922424316, "rewards/rejected": -11.393804550170898, "step": 15990 }, { "epoch": 0.95, "learning_rate": 3.204130428717672e-08, "logits/chosen": -2.995089292526245, "logits/rejected": -2.891414165496826, "logps/chosen": -51.05775833129883, "logps/rejected": -1099.09228515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.17868097126483917, "rewards/margins": 10.789872169494629, "rewards/rejected": -10.611190795898438, "step": 16000 }, { "epoch": 0.95, "learning_rate": 3.121619186834041e-08, "logits/chosen": -2.9871826171875, "logits/rejected": -2.9054388999938965, "logps/chosen": -39.936466217041016, "logps/rejected": -1155.6578369140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2073655128479004, "rewards/margins": 11.38557243347168, "rewards/rejected": -11.178205490112305, "step": 16010 }, { "epoch": 0.96, "learning_rate": 3.040177569851477e-08, "logits/chosen": -2.9579861164093018, "logits/rejected": -2.8813881874084473, "logps/chosen": -40.56143569946289, "logps/rejected": -1154.5406494140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25788116455078125, "rewards/margins": 11.435400009155273, "rewards/rejected": -11.177520751953125, "step": 16020 }, { "epoch": 0.96, "learning_rate": 2.9598059306238658e-08, "logits/chosen": -2.962851047515869, "logits/rejected": -2.8751142024993896, "logps/chosen": -42.170265197753906, "logps/rejected": -1160.629150390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.236520916223526, "rewards/margins": 11.468549728393555, "rewards/rejected": -11.232028007507324, "step": 16030 }, { "epoch": 0.96, "learning_rate": 2.8805046173692176e-08, "logits/chosen": -2.9814252853393555, "logits/rejected": -2.8869006633758545, "logps/chosen": -39.44782638549805, "logps/rejected": -1194.7626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2726927697658539, "rewards/margins": 11.839091300964355, "rewards/rejected": -11.566398620605469, "step": 16040 }, { "epoch": 0.96, "learning_rate": 2.802273973668279e-08, "logits/chosen": -2.9722812175750732, "logits/rejected": -2.9029510021209717, "logps/chosen": -35.10037612915039, "logps/rejected": -1218.4908447265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2789551019668579, "rewards/margins": 12.094807624816895, "rewards/rejected": -11.815852165222168, "step": 16050 }, { "epoch": 0.96, "learning_rate": 2.725114338463064e-08, "logits/chosen": -3.027747631072998, "logits/rejected": -2.915755271911621, "logps/chosen": -39.191490173339844, "logps/rejected": -1171.199951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.260113388299942, "rewards/margins": 11.59369945526123, "rewards/rejected": -11.333585739135742, "step": 16060 }, { "epoch": 0.96, "learning_rate": 2.6490260460552143e-08, "logits/chosen": -2.941401481628418, "logits/rejected": -2.8418667316436768, "logps/chosen": -44.06707000732422, "logps/rejected": -1196.9881591796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2438884973526001, "rewards/margins": 11.842870712280273, "rewards/rejected": -11.598981857299805, "step": 16070 }, { "epoch": 0.96, "learning_rate": 2.5740094261048342e-08, "logits/chosen": -2.9546456336975098, "logits/rejected": -2.8859753608703613, "logps/chosen": -40.389705657958984, "logps/rejected": -1146.1358642578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.19589504599571228, "rewards/margins": 11.27999210357666, "rewards/rejected": -11.084096908569336, "step": 16080 }, { "epoch": 0.96, "learning_rate": 2.5000648036287712e-08, "logits/chosen": -2.9891114234924316, "logits/rejected": -2.9075865745544434, "logps/chosen": -35.88082504272461, "logps/rejected": -1164.2396240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2526836097240448, "rewards/margins": 11.503988265991211, "rewards/rejected": -11.251304626464844, "step": 16090 }, { "epoch": 0.96, "learning_rate": 2.4271924989993646e-08, "logits/chosen": -2.9694743156433105, "logits/rejected": -2.881671905517578, "logps/chosen": -41.920188903808594, "logps/rejected": -1173.8359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23932182788848877, "rewards/margins": 11.600401878356934, "rewards/rejected": -11.361079216003418, "step": 16100 }, { "epoch": 0.96, "learning_rate": 2.3553928279431147e-08, "logits/chosen": -3.014561891555786, "logits/rejected": -2.9235734939575195, "logps/chosen": -39.641578674316406, "logps/rejected": -1176.0086669921875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 0.2537968158721924, "rewards/margins": 11.625482559204102, "rewards/rejected": -11.371685981750488, "step": 16110 }, { "epoch": 0.96, "learning_rate": 2.284666101539129e-08, "logits/chosen": -2.9624593257904053, "logits/rejected": -2.8559486865997314, "logps/chosen": -39.33770751953125, "logps/rejected": -1196.863525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25342100858688354, "rewards/margins": 11.854436874389648, "rewards/rejected": -11.601015090942383, "step": 16120 }, { "epoch": 0.96, "learning_rate": 2.2150126262179273e-08, "logits/chosen": -2.9838085174560547, "logits/rejected": -2.9129385948181152, "logps/chosen": -32.820213317871094, "logps/rejected": -1186.082275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26039576530456543, "rewards/margins": 11.752208709716797, "rewards/rejected": -11.491812705993652, "step": 16130 }, { "epoch": 0.96, "learning_rate": 2.1464327037600264e-08, "logits/chosen": -2.972182512283325, "logits/rejected": -2.855874538421631, "logps/chosen": -43.940040588378906, "logps/rejected": -1168.400146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2571015954017639, "rewards/margins": 11.569953918457031, "rewards/rejected": -11.31285285949707, "step": 16140 }, { "epoch": 0.96, "learning_rate": 2.0789266312947477e-08, "logits/chosen": -2.97621488571167, "logits/rejected": -2.9311749935150146, "logps/chosen": -52.071205139160156, "logps/rejected": -1099.9063720703125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.12108077853918076, "rewards/margins": 10.750368118286133, "rewards/rejected": -10.629287719726562, "step": 16150 }, { "epoch": 0.96, "learning_rate": 2.0124947012987172e-08, "logits/chosen": -2.9603981971740723, "logits/rejected": -2.891611337661743, "logps/chosen": -36.835731506347656, "logps/rejected": -1200.822998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2571175992488861, "rewards/margins": 11.879572868347168, "rewards/rejected": -11.622456550598145, "step": 16160 }, { "epoch": 0.96, "learning_rate": 1.947137201594923e-08, "logits/chosen": -2.9811434745788574, "logits/rejected": -2.9273011684417725, "logps/chosen": -58.503089904785156, "logps/rejected": -1137.667724609375, "loss": 0.0295, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.09261783212423325, "rewards/margins": 11.096138000488281, "rewards/rejected": -11.003519058227539, "step": 16170 }, { "epoch": 0.96, "learning_rate": 1.8828544153510765e-08, "logits/chosen": -2.990734577178955, "logits/rejected": -2.910461902618408, "logps/chosen": -34.00423049926758, "logps/rejected": -1179.03173828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27350011467933655, "rewards/margins": 11.685291290283203, "rewards/rejected": -11.41179084777832, "step": 16180 }, { "epoch": 0.97, "learning_rate": 1.8196466210787245e-08, "logits/chosen": -3.011914014816284, "logits/rejected": -2.909181833267212, "logps/chosen": -36.3491096496582, "logps/rejected": -1159.255126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24578294157981873, "rewards/margins": 11.467875480651855, "rewards/rejected": -11.222091674804688, "step": 16190 }, { "epoch": 0.97, "learning_rate": 1.7575140926318346e-08, "logits/chosen": -2.949711322784424, "logits/rejected": -2.870286226272583, "logps/chosen": -39.97211837768555, "logps/rejected": -1157.61181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2082953006029129, "rewards/margins": 11.412635803222656, "rewards/rejected": -11.204339981079102, "step": 16200 }, { "epoch": 0.97, "learning_rate": 1.6964570992057394e-08, "logits/chosen": -2.9782352447509766, "logits/rejected": -2.90010142326355, "logps/chosen": -35.14348602294922, "logps/rejected": -1170.9095458984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.2308487445116043, "rewards/margins": 11.57271671295166, "rewards/rejected": -11.34186840057373, "step": 16210 }, { "epoch": 0.97, "learning_rate": 1.6364759053358603e-08, "logits/chosen": -2.9484035968780518, "logits/rejected": -2.8656609058380127, "logps/chosen": -34.03763198852539, "logps/rejected": -1153.902099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27117204666137695, "rewards/margins": 11.42363166809082, "rewards/rejected": -11.152460098266602, "step": 16220 }, { "epoch": 0.97, "learning_rate": 1.5775707708966247e-08, "logits/chosen": -2.970412254333496, "logits/rejected": -2.8743820190429688, "logps/chosen": -45.48604965209961, "logps/rejected": -1115.433837890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20096087455749512, "rewards/margins": 10.97292423248291, "rewards/rejected": -10.771963119506836, "step": 16230 }, { "epoch": 0.97, "learning_rate": 1.5197419511003564e-08, "logits/chosen": -2.9813241958618164, "logits/rejected": -2.9183154106140137, "logps/chosen": -37.35383224487305, "logps/rejected": -1206.75146484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2884852886199951, "rewards/margins": 11.980317115783691, "rewards/rejected": -11.691831588745117, "step": 16240 }, { "epoch": 0.97, "learning_rate": 1.4629896964960533e-08, "logits/chosen": -2.9362940788269043, "logits/rejected": -2.856788158416748, "logps/chosen": -35.481658935546875, "logps/rejected": -1181.284912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2726444602012634, "rewards/margins": 11.71674633026123, "rewards/rejected": -11.444101333618164, "step": 16250 }, { "epoch": 0.97, "learning_rate": 1.4073142529685003e-08, "logits/chosen": -2.9321391582489014, "logits/rejected": -2.8587870597839355, "logps/chosen": -43.0134391784668, "logps/rejected": -1222.186767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.29056674242019653, "rewards/margins": 12.12594223022461, "rewards/rejected": -11.835375785827637, "step": 16260 }, { "epoch": 0.97, "learning_rate": 1.3527158617370196e-08, "logits/chosen": -2.9519286155700684, "logits/rejected": -2.8658111095428467, "logps/chosen": -34.260013580322266, "logps/rejected": -1149.9425048828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2419126033782959, "rewards/margins": 11.358358383178711, "rewards/rejected": -11.116446495056152, "step": 16270 }, { "epoch": 0.97, "learning_rate": 1.2991947593545273e-08, "logits/chosen": -2.987269878387451, "logits/rejected": -2.9012413024902344, "logps/chosen": -41.28490447998047, "logps/rejected": -1183.5062255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2618003785610199, "rewards/margins": 11.727599143981934, "rewards/rejected": -11.465797424316406, "step": 16280 }, { "epoch": 0.97, "learning_rate": 1.2467511777064789e-08, "logits/chosen": -2.9489338397979736, "logits/rejected": -2.8705973625183105, "logps/chosen": -44.069854736328125, "logps/rejected": -1168.801025390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.2709759473800659, "rewards/margins": 11.579866409301758, "rewards/rejected": -11.308889389038086, "step": 16290 }, { "epoch": 0.97, "learning_rate": 1.1953853440098418e-08, "logits/chosen": -2.9904797077178955, "logits/rejected": -2.9099016189575195, "logps/chosen": -42.23029708862305, "logps/rejected": -1199.8133544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24413847923278809, "rewards/margins": 11.865653991699219, "rewards/rejected": -11.621516227722168, "step": 16300 }, { "epoch": 0.97, "learning_rate": 1.145097480812124e-08, "logits/chosen": -2.9608254432678223, "logits/rejected": -2.8798019886016846, "logps/chosen": -40.044334411621094, "logps/rejected": -1136.4405517578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.2493135929107666, "rewards/margins": 11.235330581665039, "rewards/rejected": -10.986016273498535, "step": 16310 }, { "epoch": 0.97, "learning_rate": 1.0958878059905143e-08, "logits/chosen": -3.001274824142456, "logits/rejected": -2.8766798973083496, "logps/chosen": -38.15780258178711, "logps/rejected": -1177.786865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25999996066093445, "rewards/margins": 11.665762901306152, "rewards/rejected": -11.405763626098633, "step": 16320 }, { "epoch": 0.97, "learning_rate": 1.0477565327507155e-08, "logits/chosen": -3.022473096847534, "logits/rejected": -2.9047179222106934, "logps/chosen": -40.209774017333984, "logps/rejected": -1150.265869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23502345383167267, "rewards/margins": 11.352181434631348, "rewards/rejected": -11.117156982421875, "step": 16330 }, { "epoch": 0.97, "learning_rate": 1.0007038696262517e-08, "logits/chosen": -3.013058662414551, "logits/rejected": -2.8661482334136963, "logps/chosen": -34.97023391723633, "logps/rejected": -1155.0009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.275210440158844, "rewards/margins": 11.457010269165039, "rewards/rejected": -11.181798934936523, "step": 16340 }, { "epoch": 0.97, "learning_rate": 9.547300204773845e-09, "logits/chosen": -2.960892915725708, "logits/rejected": -2.8497262001037598, "logps/chosen": -38.67884826660156, "logps/rejected": -1145.352783203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26833951473236084, "rewards/margins": 11.351669311523438, "rewards/rejected": -11.083330154418945, "step": 16350 }, { "epoch": 0.98, "learning_rate": 9.098351844903653e-09, "logits/chosen": -2.948535442352295, "logits/rejected": -2.8796467781066895, "logps/chosen": -41.04463577270508, "logps/rejected": -1192.374267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2608184218406677, "rewards/margins": 11.793676376342773, "rewards/rejected": -11.532856941223145, "step": 16360 }, { "epoch": 0.98, "learning_rate": 8.660195561764617e-09, "logits/chosen": -2.981036901473999, "logits/rejected": -2.887997627258301, "logps/chosen": -41.90972137451172, "logps/rejected": -1125.4244384765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.20678074657917023, "rewards/margins": 11.082629203796387, "rewards/rejected": -10.875848770141602, "step": 16370 }, { "epoch": 0.98, "learning_rate": 8.232833253712657e-09, "logits/chosen": -2.969620943069458, "logits/rejected": -2.887453079223633, "logps/chosen": -36.70418930053711, "logps/rejected": -1213.3831787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23378881812095642, "rewards/margins": 12.001017570495605, "rewards/rejected": -11.767228126525879, "step": 16380 }, { "epoch": 0.98, "learning_rate": 7.816266772336378e-09, "logits/chosen": -2.9656527042388916, "logits/rejected": -2.861576557159424, "logps/chosen": -39.8578987121582, "logps/rejected": -1159.412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21195825934410095, "rewards/margins": 11.436799049377441, "rewards/rejected": -11.224841117858887, "step": 16390 }, { "epoch": 0.98, "learning_rate": 7.410497922451243e-09, "logits/chosen": -2.9760282039642334, "logits/rejected": -2.8868165016174316, "logps/chosen": -46.11217498779297, "logps/rejected": -1188.263916015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.17695455253124237, "rewards/margins": 11.68779182434082, "rewards/rejected": -11.510836601257324, "step": 16400 }, { "epoch": 0.98, "learning_rate": 7.015528462091248e-09, "logits/chosen": -2.9645702838897705, "logits/rejected": -2.8727362155914307, "logps/chosen": -40.766868591308594, "logps/rejected": -1193.519775390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26837486028671265, "rewards/margins": 11.831402778625488, "rewards/rejected": -11.563028335571289, "step": 16410 }, { "epoch": 0.98, "learning_rate": 6.63136010250004e-09, "logits/chosen": -2.9833381175994873, "logits/rejected": -2.889453411102295, "logps/chosen": -38.057273864746094, "logps/rejected": -1191.997802734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23931416869163513, "rewards/margins": 11.787971496582031, "rewards/rejected": -11.548656463623047, "step": 16420 }, { "epoch": 0.98, "learning_rate": 6.257994508124532e-09, "logits/chosen": -2.9598069190979004, "logits/rejected": -2.8813555240631104, "logps/chosen": -33.98401641845703, "logps/rejected": -1151.73876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25832343101501465, "rewards/margins": 11.402207374572754, "rewards/rejected": -11.14388370513916, "step": 16430 }, { "epoch": 0.98, "learning_rate": 5.895433296608799e-09, "logits/chosen": -2.991654396057129, "logits/rejected": -2.900341510772705, "logps/chosen": -44.223487854003906, "logps/rejected": -1105.039306640625, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": 0.238483265042305, "rewards/margins": 10.919299125671387, "rewards/rejected": -10.680815696716309, "step": 16440 }, { "epoch": 0.98, "learning_rate": 5.543678038784361e-09, "logits/chosen": -2.9614899158477783, "logits/rejected": -2.8717026710510254, "logps/chosen": -37.13172149658203, "logps/rejected": -1191.864013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23685677349567413, "rewards/margins": 11.78090763092041, "rewards/rejected": -11.544050216674805, "step": 16450 }, { "epoch": 0.98, "learning_rate": 5.202730258665745e-09, "logits/chosen": -2.970689296722412, "logits/rejected": -2.876586437225342, "logps/chosen": -32.83209228515625, "logps/rejected": -1168.235595703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27774155139923096, "rewards/margins": 11.589604377746582, "rewards/rejected": -11.311861991882324, "step": 16460 }, { "epoch": 0.98, "learning_rate": 4.872591433442708e-09, "logits/chosen": -2.9545395374298096, "logits/rejected": -2.871737003326416, "logps/chosen": -37.15007400512695, "logps/rejected": -1156.5916748046875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.2679993510246277, "rewards/margins": 11.456220626831055, "rewards/rejected": -11.188220977783203, "step": 16470 }, { "epoch": 0.98, "learning_rate": 4.5532629934744166e-09, "logits/chosen": -2.977311611175537, "logits/rejected": -2.883059024810791, "logps/chosen": -43.00679397583008, "logps/rejected": -1189.927490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26236066222190857, "rewards/margins": 11.771711349487305, "rewards/rejected": -11.50935173034668, "step": 16480 }, { "epoch": 0.98, "learning_rate": 4.244746322282501e-09, "logits/chosen": -2.9868063926696777, "logits/rejected": -2.907980442047119, "logps/chosen": -36.86524200439453, "logps/rejected": -1151.93896484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23324015736579895, "rewards/margins": 11.366656303405762, "rewards/rejected": -11.133418083190918, "step": 16490 }, { "epoch": 0.98, "learning_rate": 3.94704275654606e-09, "logits/chosen": -2.9725594520568848, "logits/rejected": -2.885079860687256, "logps/chosen": -35.8339958190918, "logps/rejected": -1187.140380859375, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": 0.2880643606185913, "rewards/margins": 11.780933380126953, "rewards/rejected": -11.492868423461914, "step": 16500 }, { "epoch": 0.98, "learning_rate": 3.6601535860950053e-09, "logits/chosen": -2.9902501106262207, "logits/rejected": -2.8780014514923096, "logps/chosen": -46.48346710205078, "logps/rejected": -1166.72607421875, "loss": 0.0201, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.19129939377307892, "rewards/margins": 11.4829740524292, "rewards/rejected": -11.29167366027832, "step": 16510 }, { "epoch": 0.99, "learning_rate": 3.3840800539047815e-09, "logits/chosen": -2.965071678161621, "logits/rejected": -2.884244680404663, "logps/chosen": -36.220340728759766, "logps/rejected": -1200.587646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25836434960365295, "rewards/margins": 11.886749267578125, "rewards/rejected": -11.628385543823242, "step": 16520 }, { "epoch": 0.99, "learning_rate": 3.1188233560913717e-09, "logits/chosen": -2.9709270000457764, "logits/rejected": -2.8743155002593994, "logps/chosen": -36.963584899902344, "logps/rejected": -1192.3828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.2683446407318115, "rewards/margins": 11.827230453491211, "rewards/rejected": -11.55888557434082, "step": 16530 }, { "epoch": 0.99, "learning_rate": 2.8643846419057484e-09, "logits/chosen": -2.9808244705200195, "logits/rejected": -2.8893260955810547, "logps/chosen": -40.28797149658203, "logps/rejected": -1173.3895263671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.27610233426094055, "rewards/margins": 11.634004592895508, "rewards/rejected": -11.357901573181152, "step": 16540 }, { "epoch": 0.99, "learning_rate": 2.6207650137283215e-09, "logits/chosen": -2.9987943172454834, "logits/rejected": -2.9004480838775635, "logps/chosen": -41.34272384643555, "logps/rejected": -1219.3314208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.23778057098388672, "rewards/margins": 12.059732437133789, "rewards/rejected": -11.821952819824219, "step": 16550 }, { "epoch": 0.99, "learning_rate": 2.3879655270650504e-09, "logits/chosen": -3.0004405975341797, "logits/rejected": -2.862736225128174, "logps/chosen": -45.677555084228516, "logps/rejected": -1162.2830810546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.18425174057483673, "rewards/margins": 11.422266006469727, "rewards/rejected": -11.23801326751709, "step": 16560 }, { "epoch": 0.99, "learning_rate": 2.1659871905430064e-09, "logits/chosen": -2.9814231395721436, "logits/rejected": -2.939014196395874, "logps/chosen": -45.21931838989258, "logps/rejected": -1203.398681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2439136952161789, "rewards/margins": 11.889001846313477, "rewards/rejected": -11.645089149475098, "step": 16570 }, { "epoch": 0.99, "learning_rate": 1.954830965905097e-09, "logits/chosen": -2.9847636222839355, "logits/rejected": -2.884995937347412, "logps/chosen": -35.229042053222656, "logps/rejected": -1190.774169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2573715150356293, "rewards/margins": 11.794328689575195, "rewards/rejected": -11.536958694458008, "step": 16580 }, { "epoch": 0.99, "learning_rate": 1.7544977680064578e-09, "logits/chosen": -2.971853017807007, "logits/rejected": -2.8907768726348877, "logps/chosen": -31.319881439208984, "logps/rejected": -1159.8157958984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.279995858669281, "rewards/margins": 11.489057540893555, "rewards/rejected": -11.209061622619629, "step": 16590 }, { "epoch": 0.99, "learning_rate": 1.564988464810291e-09, "logits/chosen": -2.9470179080963135, "logits/rejected": -2.885007619857788, "logps/chosen": -44.1840705871582, "logps/rejected": -1149.56396484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2008061408996582, "rewards/margins": 11.32608699798584, "rewards/rejected": -11.125282287597656, "step": 16600 }, { "epoch": 0.99, "learning_rate": 1.386303877384254e-09, "logits/chosen": -2.9641871452331543, "logits/rejected": -2.865833282470703, "logps/chosen": -32.89170837402344, "logps/rejected": -1214.0311279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2978431284427643, "rewards/margins": 12.052046775817871, "rewards/rejected": -11.754203796386719, "step": 16610 }, { "epoch": 0.99, "learning_rate": 1.2184447798971322e-09, "logits/chosen": -2.9917538166046143, "logits/rejected": -2.8718719482421875, "logps/chosen": -36.97807312011719, "logps/rejected": -1190.203857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.27034687995910645, "rewards/margins": 11.7969970703125, "rewards/rejected": -11.526649475097656, "step": 16620 }, { "epoch": 0.99, "learning_rate": 1.0614118996146727e-09, "logits/chosen": -2.9826889038085938, "logits/rejected": -2.9171955585479736, "logps/chosen": -41.28940963745117, "logps/rejected": -1199.7974853515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21507024765014648, "rewards/margins": 11.84539794921875, "rewards/rejected": -11.630327224731445, "step": 16630 }, { "epoch": 0.99, "learning_rate": 9.152059168976435e-10, "logits/chosen": -2.9621992111206055, "logits/rejected": -2.8757262229919434, "logps/chosen": -41.11307907104492, "logps/rejected": -1154.6234130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2536494731903076, "rewards/margins": 11.415281295776367, "rewards/rejected": -11.161632537841797, "step": 16640 }, { "epoch": 0.99, "learning_rate": 7.798274651979465e-10, "logits/chosen": -2.955575704574585, "logits/rejected": -2.8880972862243652, "logps/chosen": -41.12010955810547, "logps/rejected": -1189.9884033203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.26463547348976135, "rewards/margins": 11.763185501098633, "rewards/rejected": -11.498550415039062, "step": 16650 }, { "epoch": 0.99, "learning_rate": 6.552771310558426e-10, "logits/chosen": -2.997683048248291, "logits/rejected": -2.893223285675049, "logps/chosen": -38.611656188964844, "logps/rejected": -1099.108642578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.24326014518737793, "rewards/margins": 10.855090141296387, "rewards/rejected": -10.61182975769043, "step": 16660 }, { "epoch": 0.99, "learning_rate": 5.415554540977308e-10, "logits/chosen": -2.963106870651245, "logits/rejected": -2.8892219066619873, "logps/chosen": -37.929832458496094, "logps/rejected": -1151.049072265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.2738068699836731, "rewards/margins": 11.413668632507324, "rewards/rejected": -11.139863014221191, "step": 16670 }, { "epoch": 0.99, "learning_rate": 4.386629270342058e-10, "logits/chosen": -2.990009069442749, "logits/rejected": -2.920884370803833, "logps/chosen": -37.45532989501953, "logps/rejected": -1216.5748291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2602490484714508, "rewards/margins": 12.041473388671875, "rewards/rejected": -11.781225204467773, "step": 16680 }, { "epoch": 1.0, "learning_rate": 3.465999956575594e-10, "logits/chosen": -2.9748711585998535, "logits/rejected": -2.8943052291870117, "logps/chosen": -45.50844192504883, "logps/rejected": -1154.0252685546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.23798151314258575, "rewards/margins": 11.409109115600586, "rewards/rejected": -11.171126365661621, "step": 16690 }, { "epoch": 1.0, "learning_rate": 2.653670588390056e-10, "logits/chosen": -2.976849317550659, "logits/rejected": -2.8902807235717773, "logps/chosen": -41.94722366333008, "logps/rejected": -1177.258544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2085374891757965, "rewards/margins": 11.607073783874512, "rewards/rejected": -11.398536682128906, "step": 16700 }, { "epoch": 1.0, "learning_rate": 1.9496446852840244e-10, "logits/chosen": -2.9898717403411865, "logits/rejected": -2.905846357345581, "logps/chosen": -36.809471130371094, "logps/rejected": -1168.546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2536791265010834, "rewards/margins": 11.569839477539062, "rewards/rejected": -11.316161155700684, "step": 16710 }, { "epoch": 1.0, "learning_rate": 1.3539252975175442e-10, "logits/chosen": -2.9380404949188232, "logits/rejected": -2.86262845993042, "logps/chosen": -35.00012969970703, "logps/rejected": -1141.1968994140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2647295594215393, "rewards/margins": 11.30298137664795, "rewards/rejected": -11.038251876831055, "step": 16720 }, { "epoch": 1.0, "learning_rate": 8.665150061093475e-11, "logits/chosen": -2.9658541679382324, "logits/rejected": -2.8255176544189453, "logps/chosen": -45.386322021484375, "logps/rejected": -1217.061767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.25928011536598206, "rewards/margins": 12.055723190307617, "rewards/rejected": -11.796443939208984, "step": 16730 }, { "epoch": 1.0, "learning_rate": 4.874159228063224e-11, "logits/chosen": -2.982390880584717, "logits/rejected": -2.9110467433929443, "logps/chosen": -38.99374008178711, "logps/rejected": -1175.9229736328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2805853486061096, "rewards/margins": 11.659303665161133, "rewards/rejected": -11.378717422485352, "step": 16740 }, { "epoch": 1.0, "learning_rate": 2.1662969009461632e-11, "logits/chosen": -2.9677343368530273, "logits/rejected": -2.8859071731567383, "logps/chosen": -39.74566650390625, "logps/rejected": -1166.9544677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26400846242904663, "rewards/margins": 11.574089050292969, "rewards/rejected": -11.31008243560791, "step": 16750 }, { "epoch": 1.0, "learning_rate": 5.415748118575703e-12, "logits/chosen": -2.9731383323669434, "logits/rejected": -2.911139965057373, "logps/chosen": -39.51188278198242, "logps/rejected": -1152.6669921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.24081656336784363, "rewards/margins": 11.391138076782227, "rewards/rejected": -11.150320053100586, "step": 16760 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -2.9572763442993164, "logits/rejected": -2.8842952251434326, "logps/chosen": -38.780635833740234, "logps/rejected": -1146.466552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2800725996494293, "rewards/margins": 11.371965408325195, "rewards/rejected": -11.091894149780273, "step": 16770 }, { "epoch": 1.0, "step": 16770, "total_flos": 0.0, "train_loss": 0.02337361431049117, "train_runtime": 68623.0806, "train_samples_per_second": 1.955, "train_steps_per_second": 0.244 } ], "logging_steps": 10, "max_steps": 16770, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }