diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,21 +1,21 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9995965030262273, - "eval_steps": 300, - "global_step": 1858, + "epoch": 0.9996020692399522, + "eval_steps": 500, + "global_step": 1884, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 2.6881720430107528e-09, - "logits/chosen": -2.670260429382324, - "logits/rejected": -2.1533777713775635, - "logps/chosen": -299.33551025390625, - "logps/rejected": -186.81130981445312, - "loss": 13125.0, + "learning_rate": 2.645502645502645e-09, + "logits/chosen": -1.8052858114242554, + "logits/rejected": -1.8250553607940674, + "logps/chosen": -201.6904296875, + "logps/rejected": -206.93157958984375, + "loss": 7734.375, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, @@ -26,3089 +26,3083 @@ }, { "epoch": 0.01, - "learning_rate": 2.6881720430107527e-08, - "logits/chosen": -2.4763858318328857, - "logits/rejected": -2.354341745376587, - "logps/chosen": -201.82504272460938, - "logps/rejected": -189.45822143554688, - "loss": 14971.9097, - "rewards/accuracies": 0.4930555522441864, - "rewards/chosen": 9.199242413160391e-06, - "rewards/margins": -8.218608854804188e-05, - "rewards/rejected": 9.138535824604332e-05, - "rewards/safe_rewards": 1.8223654478788376e-05, - "rewards/unsafe_rewards": 1.7482994962847442e-07, + "learning_rate": 2.6455026455026453e-08, + "logits/chosen": -2.025691032409668, + "logits/rejected": -1.8649556636810303, + "logps/chosen": -270.43963623046875, + "logps/rejected": -169.98423767089844, + "loss": 7727.0087, + "rewards/accuracies": 0.4027777910232544, + "rewards/chosen": 4.114356852369383e-05, + "rewards/margins": -0.0002653732954058796, + "rewards/rejected": 0.00030651676934212446, + "rewards/safe_rewards": -1.17086410682532e-05, + "rewards/unsafe_rewards": -0.0006500756135210395, "step": 10 }, { "epoch": 0.01, - "learning_rate": 5.3763440860215054e-08, - "logits/chosen": -2.44279408454895, - "logits/rejected": -2.218726873397827, - "logps/chosen": -226.34970092773438, - "logps/rejected": -181.1803436279297, - "loss": 15239.6516, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.0002221722388640046, - "rewards/margins": -0.0001655549422139302, - "rewards/rejected": -5.6617216614540666e-05, - "rewards/safe_rewards": -0.0002462912234477699, - "rewards/unsafe_rewards": -0.0001980532251764089, + "learning_rate": 5.2910052910052905e-08, + "logits/chosen": -1.961146593093872, + "logits/rejected": -1.873740553855896, + "logps/chosen": -189.17404174804688, + "logps/rejected": -176.31651306152344, + "loss": 7718.007, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -6.340327672660351e-06, + "rewards/margins": -0.00010152898175874725, + "rewards/rejected": 9.518869046587497e-05, + "rewards/safe_rewards": 0.00045737033360637724, + "rewards/unsafe_rewards": -8.718876051716506e-05, "step": 20 }, { "epoch": 0.02, - "learning_rate": 8.064516129032257e-08, - "logits/chosen": -2.4326512813568115, - "logits/rejected": -2.2918035984039307, - "logps/chosen": -215.0783233642578, - "logps/rejected": -189.28309631347656, - "loss": 14993.0922, - "rewards/accuracies": 0.375, - "rewards/chosen": -0.0004244670271873474, - "rewards/margins": -0.000839566346257925, - "rewards/rejected": 0.0004150994645897299, - "rewards/safe_rewards": -0.0002224749478045851, - "rewards/unsafe_rewards": -0.0006264590774662793, + "learning_rate": 7.936507936507936e-08, + "logits/chosen": -1.9912703037261963, + "logits/rejected": -1.883933424949646, + "logps/chosen": -198.4538116455078, + "logps/rejected": -183.28781127929688, + "loss": 7515.9359, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0001133469631895423, + "rewards/margins": 0.0007399408495984972, + "rewards/rejected": -0.0006265938864089549, + "rewards/safe_rewards": 0.00022509883274324238, + "rewards/unsafe_rewards": 0.0002071214112220332, "step": 30 }, { "epoch": 0.02, - "learning_rate": 1.0752688172043011e-07, - "logits/chosen": -2.375096082687378, - "logits/rejected": -2.157045841217041, - "logps/chosen": -180.7657470703125, - "logps/rejected": -173.87054443359375, - "loss": 14723.7563, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.00011304272629786283, - "rewards/margins": 0.0008593280799686909, - "rewards/rejected": -0.0009723707917146385, - "rewards/safe_rewards": 0.0004990470479242504, - "rewards/unsafe_rewards": -0.0007251326460391283, + "learning_rate": 1.0582010582010581e-07, + "logits/chosen": -1.927167534828186, + "logits/rejected": -1.8453724384307861, + "logps/chosen": -198.85276794433594, + "logps/rejected": -174.22967529296875, + "loss": 7334.5094, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.00027468582266010344, + "rewards/margins": 0.0014765586238354445, + "rewards/rejected": -0.0012018729466944933, + "rewards/safe_rewards": 0.0002533269871491939, + "rewards/unsafe_rewards": 0.00015336349315475672, "step": 40 }, { "epoch": 0.03, - "learning_rate": 1.3440860215053762e-07, - "logits/chosen": -2.489865303039551, - "logits/rejected": -2.1804325580596924, - "logps/chosen": -209.59664916992188, - "logps/rejected": -167.6984100341797, - "loss": 14816.0938, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": 0.0011160348076373339, - "rewards/margins": 0.0046949503012001514, - "rewards/rejected": -0.0035789154935628176, - "rewards/safe_rewards": 0.00042248546378687024, - "rewards/unsafe_rewards": 0.0018095843261107802, + "learning_rate": 1.3227513227513225e-07, + "logits/chosen": -2.037893533706665, + "logits/rejected": -1.8426322937011719, + "logps/chosen": -214.9281463623047, + "logps/rejected": -162.3707733154297, + "loss": 7399.5859, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.0017435807967558503, + "rewards/margins": 0.001902301562950015, + "rewards/rejected": -0.00015872062067501247, + "rewards/safe_rewards": 0.002309921896085143, + "rewards/unsafe_rewards": 0.00044932105811312795, "step": 50 }, { "epoch": 0.03, - "learning_rate": 1.6129032258064515e-07, - "logits/chosen": -2.4289777278900146, - "logits/rejected": -2.2897238731384277, - "logps/chosen": -185.92630004882812, - "logps/rejected": -185.1912841796875, - "loss": 14593.3875, - "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.0020237788558006287, - "rewards/margins": 0.002217882312834263, - "rewards/rejected": -0.004241660702973604, - "rewards/safe_rewards": -0.001639070687815547, - "rewards/unsafe_rewards": -0.002408486558124423, + "learning_rate": 1.5873015873015872e-07, + "logits/chosen": -2.011747360229492, + "logits/rejected": -1.8823707103729248, + "logps/chosen": -182.73411560058594, + "logps/rejected": -155.423095703125, + "loss": 7214.4602, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.0006955948774702847, + "rewards/margins": 0.005063413176685572, + "rewards/rejected": -0.0057590072974562645, + "rewards/safe_rewards": -0.0021988481748849154, + "rewards/unsafe_rewards": 0.0001153635821538046, "step": 60 }, { "epoch": 0.04, - "learning_rate": 1.8817204301075268e-07, - "logits/chosen": -2.423598289489746, - "logits/rejected": -2.2320618629455566, - "logps/chosen": -202.7837677001953, - "logps/rejected": -184.62033081054688, - "loss": 14276.2172, - "rewards/accuracies": 0.5375000238418579, - "rewards/chosen": -0.005977807100862265, - "rewards/margins": 0.005661749746650457, - "rewards/rejected": -0.011639557778835297, - "rewards/safe_rewards": -0.00685838982462883, - "rewards/unsafe_rewards": -0.005097225774079561, + "learning_rate": 1.8518518518518516e-07, + "logits/chosen": -1.975612998008728, + "logits/rejected": -1.8158948421478271, + "logps/chosen": -186.48574829101562, + "logps/rejected": -168.57896423339844, + "loss": 7816.8766, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.007440758403390646, + "rewards/margins": 0.010602862574160099, + "rewards/rejected": -0.018043622374534607, + "rewards/safe_rewards": -0.010516250506043434, + "rewards/unsafe_rewards": -0.015666166320443153, "step": 70 }, { "epoch": 0.04, - "learning_rate": 2.1505376344086022e-07, - "logits/chosen": -2.4342398643493652, - "logits/rejected": -2.271530866622925, - "logps/chosen": -221.3511505126953, - "logps/rejected": -196.20684814453125, - "loss": 14430.5766, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.017918918281793594, - "rewards/margins": 0.013557764701545238, - "rewards/rejected": -0.03147668391466141, - "rewards/safe_rewards": -0.020387938246130943, - "rewards/unsafe_rewards": -0.015449894592165947, + "learning_rate": 2.1164021164021162e-07, + "logits/chosen": -1.9063125848770142, + "logits/rejected": -1.7897474765777588, + "logps/chosen": -210.2836151123047, + "logps/rejected": -180.822998046875, + "loss": 7304.9531, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.024481967091560364, + "rewards/margins": 0.016244709491729736, + "rewards/rejected": -0.0407266803085804, + "rewards/safe_rewards": -0.02365388534963131, + "rewards/unsafe_rewards": -0.0289783775806427, "step": 80 }, { "epoch": 0.05, - "learning_rate": 2.4193548387096775e-07, - "logits/chosen": -2.4415786266326904, - "logits/rejected": -2.301713466644287, - "logps/chosen": -211.0481414794922, - "logps/rejected": -171.99159240722656, - "loss": 14501.5938, + "learning_rate": 2.3809523809523806e-07, + "logits/chosen": -1.994605302810669, + "logits/rejected": -1.866681694984436, + "logps/chosen": -203.6532440185547, + "logps/rejected": -174.1517791748047, + "loss": 7251.9984, "rewards/accuracies": 0.625, - "rewards/chosen": -0.05115891247987747, - "rewards/margins": 0.025721188634634018, - "rewards/rejected": -0.07688009738922119, - "rewards/safe_rewards": -0.04981667920947075, - "rewards/unsafe_rewards": -0.05250114947557449, + "rewards/chosen": -0.06749475002288818, + "rewards/margins": 0.020768558606505394, + "rewards/rejected": -0.08826331794261932, + "rewards/safe_rewards": -0.06556878238916397, + "rewards/unsafe_rewards": -0.052192188799381256, "step": 90 }, { "epoch": 0.05, - "learning_rate": 2.6881720430107523e-07, - "logits/chosen": -2.4574618339538574, - "logits/rejected": -2.3291263580322266, - "logps/chosen": -204.63087463378906, - "logps/rejected": -179.63002014160156, - "loss": 14364.2516, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.08542316406965256, - "rewards/margins": 0.03905550017952919, - "rewards/rejected": -0.12447866052389145, - "rewards/safe_rewards": -0.08581903576850891, - "rewards/unsafe_rewards": -0.0850272923707962, + "learning_rate": 2.645502645502645e-07, + "logits/chosen": -1.9495357275009155, + "logits/rejected": -1.8006837368011475, + "logps/chosen": -205.99411010742188, + "logps/rejected": -192.54415893554688, + "loss": 6776.1008, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.11886356770992279, + "rewards/margins": 0.020749244838953018, + "rewards/rejected": -0.1396128088235855, + "rewards/safe_rewards": -0.11704058945178986, + "rewards/unsafe_rewards": -0.1348837912082672, "step": 100 }, { "epoch": 0.06, - "learning_rate": 2.956989247311828e-07, - "logits/chosen": -2.429097890853882, - "logits/rejected": -2.2684311866760254, - "logps/chosen": -222.95913696289062, - "logps/rejected": -187.91293334960938, - "loss": 14761.6688, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.11019313335418701, - "rewards/margins": 0.048916272819042206, - "rewards/rejected": -0.159109428524971, - "rewards/safe_rewards": -0.10632093250751495, - "rewards/unsafe_rewards": -0.11406532675027847, + "learning_rate": 2.9100529100529097e-07, + "logits/chosen": -1.9887052774429321, + "logits/rejected": -1.8671073913574219, + "logps/chosen": -226.98001098632812, + "logps/rejected": -217.73733520507812, + "loss": 6636.9766, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.11880362033843994, + "rewards/margins": 0.03935481607913971, + "rewards/rejected": -0.15815845131874084, + "rewards/safe_rewards": -0.14540424942970276, + "rewards/unsafe_rewards": -0.11240017414093018, "step": 110 }, { "epoch": 0.06, - "learning_rate": 3.225806451612903e-07, - "logits/chosen": -2.4783873558044434, - "logits/rejected": -2.262389659881592, - "logps/chosen": -220.35800170898438, - "logps/rejected": -169.4020233154297, - "loss": 13980.0906, - "rewards/accuracies": 0.731249988079071, - "rewards/chosen": -0.09087369590997696, - "rewards/margins": 0.07458756119012833, - "rewards/rejected": -0.1654612421989441, - "rewards/safe_rewards": -0.08417809009552002, - "rewards/unsafe_rewards": -0.0975693017244339, + "learning_rate": 3.1746031746031743e-07, + "logits/chosen": -1.8841511011123657, + "logits/rejected": -1.6952005624771118, + "logps/chosen": -235.6121368408203, + "logps/rejected": -192.76162719726562, + "loss": 6804.4828, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.1285235583782196, + "rewards/margins": 0.07450314611196518, + "rewards/rejected": -0.20302672684192657, + "rewards/safe_rewards": -0.12894900143146515, + "rewards/unsafe_rewards": -0.12272067368030548, "step": 120 }, { "epoch": 0.07, - "learning_rate": 3.4946236559139783e-07, - "logits/chosen": -2.47499942779541, - "logits/rejected": -2.3391237258911133, - "logps/chosen": -224.6880645751953, - "logps/rejected": -181.26388549804688, - "loss": 13439.1469, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.10466276109218597, - "rewards/margins": 0.0889045000076294, - "rewards/rejected": -0.19356727600097656, - "rewards/safe_rewards": -0.07197071611881256, - "rewards/unsafe_rewards": -0.13735483586788177, + "learning_rate": 3.439153439153439e-07, + "logits/chosen": -1.8711330890655518, + "logits/rejected": -1.6887938976287842, + "logps/chosen": -225.3953094482422, + "logps/rejected": -200.31997680664062, + "loss": 7036.6016, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.11849894374608994, + "rewards/margins": 0.05801115185022354, + "rewards/rejected": -0.17651011049747467, + "rewards/safe_rewards": -0.10611984878778458, + "rewards/unsafe_rewards": -0.14429841935634613, "step": 130 }, { - "epoch": 0.08, - "learning_rate": 3.7634408602150537e-07, - "logits/chosen": -2.5111544132232666, - "logits/rejected": -2.323676347732544, - "logps/chosen": -232.02120971679688, - "logps/rejected": -207.3649444580078, - "loss": 13156.9813, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.20595864951610565, - "rewards/margins": 0.0673115998506546, - "rewards/rejected": -0.27327024936676025, - "rewards/safe_rewards": -0.19269119203090668, - "rewards/unsafe_rewards": -0.21922609210014343, + "epoch": 0.07, + "learning_rate": 3.703703703703703e-07, + "logits/chosen": -1.826206922531128, + "logits/rejected": -1.6439968347549438, + "logps/chosen": -220.1838836669922, + "logps/rejected": -185.7141876220703, + "loss": 6936.9914, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.11376659572124481, + "rewards/margins": 0.0765247792005539, + "rewards/rejected": -0.1902913898229599, + "rewards/safe_rewards": -0.11482509225606918, + "rewards/unsafe_rewards": -0.09925278276205063, "step": 140 }, { "epoch": 0.08, - "learning_rate": 4.0322580645161285e-07, - "logits/chosen": -2.440796375274658, - "logits/rejected": -2.294173002243042, - "logps/chosen": -229.3905792236328, - "logps/rejected": -201.4041748046875, - "loss": 12286.168, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.2244747132062912, - "rewards/margins": 0.09208513051271439, - "rewards/rejected": -0.3165598511695862, - "rewards/safe_rewards": -0.2293473184108734, - "rewards/unsafe_rewards": -0.21960210800170898, + "learning_rate": 3.968253968253968e-07, + "logits/chosen": -1.7187334299087524, + "logits/rejected": -1.5741361379623413, + "logps/chosen": -211.09603881835938, + "logps/rejected": -203.66156005859375, + "loss": 6555.6867, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.19104455411434174, + "rewards/margins": 0.06891994178295135, + "rewards/rejected": -0.2599644958972931, + "rewards/safe_rewards": -0.20118245482444763, + "rewards/unsafe_rewards": -0.16981182992458344, "step": 150 }, { - "epoch": 0.09, - "learning_rate": 4.3010752688172043e-07, - "logits/chosen": -2.4001574516296387, - "logits/rejected": -2.2244515419006348, - "logps/chosen": -242.0061492919922, - "logps/rejected": -220.7064666748047, - "loss": 13450.5672, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.2182481735944748, - "rewards/margins": 0.09150904417037964, - "rewards/rejected": -0.30975720286369324, - "rewards/safe_rewards": -0.20791450142860413, - "rewards/unsafe_rewards": -0.22858186066150665, + "epoch": 0.08, + "learning_rate": 4.2328042328042324e-07, + "logits/chosen": -1.7090606689453125, + "logits/rejected": -1.4574247598648071, + "logps/chosen": -231.1162567138672, + "logps/rejected": -197.13832092285156, + "loss": 6483.332, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.2999975085258484, + "rewards/margins": 0.08841492235660553, + "rewards/rejected": -0.3884124159812927, + "rewards/safe_rewards": -0.2963607907295227, + "rewards/unsafe_rewards": -0.2815978527069092, "step": 160 }, { "epoch": 0.09, - "learning_rate": 4.569892473118279e-07, - "logits/chosen": -2.4316458702087402, - "logits/rejected": -2.3187336921691895, - "logps/chosen": -229.7932891845703, - "logps/rejected": -191.04086303710938, - "loss": 12580.4609, + "learning_rate": 4.497354497354497e-07, + "logits/chosen": -1.7472738027572632, + "logits/rejected": -1.5065333843231201, + "logps/chosen": -255.1507110595703, + "logps/rejected": -221.82241821289062, + "loss": 6801.5375, "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.15405751764774323, - "rewards/margins": 0.08396536856889725, - "rewards/rejected": -0.23802292346954346, - "rewards/safe_rewards": -0.13653624057769775, - "rewards/unsafe_rewards": -0.1715788096189499, + "rewards/chosen": -0.23129959404468536, + "rewards/margins": 0.12043756246566772, + "rewards/rejected": -0.35173720121383667, + "rewards/safe_rewards": -0.22959312796592712, + "rewards/unsafe_rewards": -0.1985938847064972, "step": 170 }, { "epoch": 0.1, - "learning_rate": 4.838709677419355e-07, - "logits/chosen": -2.473118305206299, - "logits/rejected": -2.3191967010498047, - "logps/chosen": -217.89785766601562, - "logps/rejected": -209.47775268554688, - "loss": 12886.1875, - "rewards/accuracies": 0.731249988079071, - "rewards/chosen": -0.2591598629951477, - "rewards/margins": 0.1223045364022255, - "rewards/rejected": -0.3814643919467926, - "rewards/safe_rewards": -0.2669682800769806, - "rewards/unsafe_rewards": -0.25135138630867004, + "learning_rate": 4.761904761904761e-07, + "logits/chosen": -1.680676817893982, + "logits/rejected": -1.4166452884674072, + "logps/chosen": -216.8690948486328, + "logps/rejected": -191.8008270263672, + "loss": 6535.7055, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.26913732290267944, + "rewards/margins": 0.11233188211917877, + "rewards/rejected": -0.381469190120697, + "rewards/safe_rewards": -0.26176974177360535, + "rewards/unsafe_rewards": -0.23940448462963104, "step": 180 }, { "epoch": 0.1, - "learning_rate": 4.999929391798331e-07, - "logits/chosen": -2.4944000244140625, - "logits/rejected": -2.323819637298584, - "logps/chosen": -224.4022674560547, - "logps/rejected": -209.09341430664062, - "loss": 12964.0625, - "rewards/accuracies": 0.71875, - "rewards/chosen": -0.30063313245773315, - "rewards/margins": 0.11855147778987885, - "rewards/rejected": -0.4191845953464508, - "rewards/safe_rewards": -0.30268028378486633, - "rewards/unsafe_rewards": -0.2985859215259552, + "learning_rate": 4.999995705919032e-07, + "logits/chosen": -1.5433807373046875, + "logits/rejected": -1.2667306661605835, + "logps/chosen": -224.0026397705078, + "logps/rejected": -205.34414672851562, + "loss": 6409.0121, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19693121314048767, + "rewards/margins": 0.09455744177103043, + "rewards/rejected": -0.2914886772632599, + "rewards/safe_rewards": -0.17649488151073456, + "rewards/unsafe_rewards": -0.18380855023860931, "step": 190 }, { "epoch": 0.11, - "learning_rate": 4.9991350953333e-07, - "logits/chosen": -2.4175021648406982, - "logits/rejected": -2.261751413345337, - "logps/chosen": -257.97100830078125, - "logps/rejected": -248.92697143554688, - "loss": 12408.5, + "learning_rate": 4.999480434051858e-07, + "logits/chosen": -1.5521910190582275, + "logits/rejected": -1.3097938299179077, + "logps/chosen": -225.257568359375, + "logps/rejected": -205.92129516601562, + "loss": 6576.5188, "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.3828560709953308, - "rewards/margins": 0.09374293684959412, - "rewards/rejected": -0.4765990376472473, - "rewards/safe_rewards": -0.37296319007873535, - "rewards/unsafe_rewards": -0.39274901151657104, + "rewards/chosen": -0.1997550129890442, + "rewards/margins": 0.0904761329293251, + "rewards/rejected": -0.2902311384677887, + "rewards/safe_rewards": -0.20136451721191406, + "rewards/unsafe_rewards": -0.21680407226085663, "step": 200 }, { "epoch": 0.11, - "learning_rate": 4.997458523498236e-07, - "logits/chosen": -2.4530012607574463, - "logits/rejected": -2.3131752014160156, - "logps/chosen": -222.4829559326172, - "logps/rejected": -198.89447021484375, - "loss": 12109.7687, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.3033100664615631, - "rewards/margins": 0.09060867875814438, - "rewards/rejected": -0.3939187526702881, - "rewards/safe_rewards": -0.29972246289253235, - "rewards/unsafe_rewards": -0.30689769983291626, + "learning_rate": 4.998106548810311e-07, + "logits/chosen": -1.3539698123931885, + "logits/rejected": -1.2038872241973877, + "logps/chosen": -212.8267364501953, + "logps/rejected": -220.0903778076172, + "loss": 6444.5828, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2437468022108078, + "rewards/margins": 0.14799915254116058, + "rewards/rejected": -0.3917458951473236, + "rewards/safe_rewards": -0.2773512601852417, + "rewards/unsafe_rewards": -0.2216939926147461, "step": 210 }, { "epoch": 0.12, - "learning_rate": 4.99490026817712e-07, - "logits/chosen": -2.415255308151245, - "logits/rejected": -2.290353775024414, - "logps/chosen": -225.1759490966797, - "logps/rejected": -215.8201446533203, - "loss": 12286.7969, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.308417946100235, - "rewards/margins": 0.15336203575134277, - "rewards/rejected": -0.46177998185157776, - "rewards/safe_rewards": -0.2879489064216614, - "rewards/unsafe_rewards": -0.3288869559764862, + "learning_rate": 4.995874522146975e-07, + "logits/chosen": -1.503328561782837, + "logits/rejected": -1.3146250247955322, + "logps/chosen": -236.4509735107422, + "logps/rejected": -211.6634063720703, + "loss": 6233.5547, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.29747992753982544, + "rewards/margins": 0.13039958477020264, + "rewards/rejected": -0.4278795123100281, + "rewards/safe_rewards": -0.2768808901309967, + "rewards/unsafe_rewards": -0.3182833790779114, "step": 220 }, { "epoch": 0.12, - "learning_rate": 4.991461232516674e-07, - "logits/chosen": -2.403742551803589, - "logits/rejected": -2.2323994636535645, - "logps/chosen": -265.6297607421875, - "logps/rejected": -247.0828399658203, - "loss": 13271.4391, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.35555627942085266, - "rewards/margins": 0.13768498599529266, - "rewards/rejected": -0.49324122071266174, - "rewards/safe_rewards": -0.34540650248527527, - "rewards/unsafe_rewards": -0.36570602655410767, + "learning_rate": 4.992785120800375e-07, + "logits/chosen": -1.576887845993042, + "logits/rejected": -1.2664101123809814, + "logps/chosen": -237.9243621826172, + "logps/rejected": -213.4459991455078, + "loss": 6108.0914, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.23068375885486603, + "rewards/margins": 0.14957153797149658, + "rewards/rejected": -0.3802553117275238, + "rewards/safe_rewards": -0.22292426228523254, + "rewards/unsafe_rewards": -0.18162095546722412, "step": 230 }, { "epoch": 0.13, - "learning_rate": 4.98714263060751e-07, - "logits/chosen": -2.5071444511413574, - "logits/rejected": -2.31742525100708, - "logps/chosen": -209.05905151367188, - "logps/rejected": -179.74855041503906, - "loss": 12854.6859, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.2065323293209076, - "rewards/margins": 0.12446670234203339, - "rewards/rejected": -0.33099907636642456, - "rewards/safe_rewards": -0.21961939334869385, - "rewards/unsafe_rewards": -0.19344526529312134, + "learning_rate": 4.988839406031596e-07, + "logits/chosen": -1.515092134475708, + "logits/rejected": -1.2886550426483154, + "logps/chosen": -223.7300567626953, + "logps/rejected": -192.06324768066406, + "loss": 6310.6699, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.24790284037590027, + "rewards/margins": 0.1096932515501976, + "rewards/rejected": -0.3575960695743561, + "rewards/safe_rewards": -0.2673969864845276, + "rewards/unsafe_rewards": -0.24145250022411346, "step": 240 }, { "epoch": 0.13, - "learning_rate": 4.98194598705552e-07, - "logits/chosen": -2.460813045501709, - "logits/rejected": -2.3778514862060547, - "logps/chosen": -238.38693237304688, - "logps/rejected": -223.18795776367188, - "loss": 13078.5102, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.48650604486465454, - "rewards/margins": 0.07239247858524323, - "rewards/rejected": -0.5588985681533813, - "rewards/safe_rewards": -0.4960268437862396, - "rewards/unsafe_rewards": -0.47698527574539185, + "learning_rate": 4.98403873325972e-07, + "logits/chosen": -1.5146888494491577, + "logits/rejected": -1.3244738578796387, + "logps/chosen": -213.21694946289062, + "logps/rejected": -209.35061645507812, + "loss": 6209.5707, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.2071472406387329, + "rewards/margins": 0.16860046982765198, + "rewards/rejected": -0.3757476806640625, + "rewards/safe_rewards": -0.1998087763786316, + "rewards/unsafe_rewards": -0.20211009681224823, "step": 250 }, { "epoch": 0.14, - "learning_rate": 4.975873136443648e-07, - "logits/chosen": -2.5023112297058105, - "logits/rejected": -2.350739002227783, - "logps/chosen": -268.8946228027344, - "logps/rejected": -247.2296600341797, - "loss": 11995.3344, - "rewards/accuracies": 0.6937500238418579, - "rewards/chosen": -0.3785567581653595, - "rewards/margins": 0.12816599011421204, - "rewards/rejected": -0.5067228078842163, - "rewards/safe_rewards": -0.39122840762138367, - "rewards/unsafe_rewards": -0.36588507890701294, + "learning_rate": 4.978384751596212e-07, + "logits/chosen": -1.3180285692214966, + "logits/rejected": -1.1171799898147583, + "logps/chosen": -232.109375, + "logps/rejected": -236.84072875976562, + "loss": 6328.7531, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.32092350721359253, + "rewards/margins": 0.17156612873077393, + "rewards/rejected": -0.49248963594436646, + "rewards/safe_rewards": -0.4227983355522156, + "rewards/unsafe_rewards": -0.3325851559638977, "step": 260 }, { - "epoch": 0.15, - "learning_rate": 4.968926222684212e-07, - "logits/chosen": -2.4438796043395996, - "logits/rejected": -2.3461410999298096, - "logps/chosen": -229.61386108398438, - "logps/rejected": -223.92630004882812, - "loss": 12123.7047, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.294075608253479, - "rewards/margins": 0.13579340279102325, - "rewards/rejected": -0.42986902594566345, - "rewards/safe_rewards": -0.300296813249588, - "rewards/unsafe_rewards": -0.2878544330596924, + "epoch": 0.14, + "learning_rate": 4.971879403278432e-07, + "logits/chosen": -1.1372450590133667, + "logits/rejected": -0.9446180462837219, + "logps/chosen": -234.88888549804688, + "logps/rejected": -224.05886840820312, + "loss": 6312.1719, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.29563266038894653, + "rewards/margins": 0.12811212241649628, + "rewards/rejected": -0.4237447679042816, + "rewards/safe_rewards": -0.33217892050743103, + "rewards/unsafe_rewards": -0.27307888865470886, "step": 270 }, { "epoch": 0.15, - "learning_rate": 4.961107698262044e-07, - "logits/chosen": -2.4015512466430664, - "logits/rejected": -2.263901948928833, - "logps/chosen": -255.80538940429688, - "logps/rejected": -228.9020233154297, - "loss": 12352.2758, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.375516414642334, - "rewards/margins": 0.10626471042633057, - "rewards/rejected": -0.48178109526634216, - "rewards/safe_rewards": -0.37126559019088745, - "rewards/unsafe_rewards": -0.37976714968681335, + "learning_rate": 4.964524923002436e-07, + "logits/chosen": -1.415801763534546, + "logits/rejected": -1.1731336116790771, + "logps/chosen": -241.7359619140625, + "logps/rejected": -224.5096893310547, + "loss": 5974.0195, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.3053835928440094, + "rewards/margins": 0.16657045483589172, + "rewards/rejected": -0.4719540476799011, + "rewards/safe_rewards": -0.3295218348503113, + "rewards/unsafe_rewards": -0.30390697717666626, "step": 280 }, { - "epoch": 0.16, - "learning_rate": 4.952420323368673e-07, - "logits/chosen": -2.4498801231384277, - "logits/rejected": -2.382075548171997, - "logps/chosen": -239.36386108398438, - "logps/rejected": -241.67861938476562, - "loss": 12562.6453, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.47904032468795776, - "rewards/margins": 0.12548959255218506, - "rewards/rejected": -0.6045299172401428, - "rewards/safe_rewards": -0.4896617829799652, - "rewards/unsafe_rewards": -0.46841883659362793, + "epoch": 0.15, + "learning_rate": 4.956323837155325e-07, + "logits/chosen": -1.2966214418411255, + "logits/rejected": -1.1260521411895752, + "logps/chosen": -227.2568359375, + "logps/rejected": -214.1421661376953, + "loss": 6133.0227, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.261239230632782, + "rewards/margins": 0.15825437009334564, + "rewards/rejected": -0.4194936156272888, + "rewards/safe_rewards": -0.2375851422548294, + "rewards/unsafe_rewards": -0.2705303132534027, "step": 290 }, { "epoch": 0.16, - "learning_rate": 4.942867164927899e-07, - "logits/chosen": -2.448484182357788, - "logits/rejected": -2.3499300479888916, - "logps/chosen": -233.24447631835938, - "logps/rejected": -213.6923828125, - "loss": 13096.7359, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.3095199167728424, - "rewards/margins": 0.13528046011924744, - "rewards/rejected": -0.44480031728744507, - "rewards/safe_rewards": -0.2981899082660675, - "rewards/unsafe_rewards": -0.3208498954772949, + "learning_rate": 4.947278962947386e-07, + "logits/chosen": -1.255904197692871, + "logits/rejected": -1.0300556421279907, + "logps/chosen": -231.86593627929688, + "logps/rejected": -213.03768920898438, + "loss": 5684.9316, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.30576351284980774, + "rewards/margins": 0.1560250222682953, + "rewards/rejected": -0.4617885649204254, + "rewards/safe_rewards": -0.3117372691631317, + "rewards/unsafe_rewards": -0.30344492197036743, "step": 300 }, { "epoch": 0.16, - "eval_logits/chosen": -2.319474458694458, - "eval_logits/rejected": -2.1815290451049805, - "eval_logps/chosen": -170.00274658203125, - "eval_logps/rejected": -140.18304443359375, - "eval_loss": 4529.67333984375, - "eval_rewards/accuracies": 0.6583978533744812, - "eval_rewards/chosen": -0.39572206139564514, - "eval_rewards/margins": 0.08145187050104141, - "eval_rewards/rejected": -0.4771738648414612, - "eval_rewards/safe_rewards": -0.39296460151672363, - "eval_rewards/unsafe_rewards": -0.3956325650215149, - "eval_runtime": 996.6135, - "eval_samples_per_second": 33.156, - "eval_steps_per_second": 1.037, - "step": 300 - }, - { - "epoch": 0.17, - "learning_rate": 4.932451595513062e-07, - "logits/chosen": -2.464543342590332, - "logits/rejected": -2.3167717456817627, - "logps/chosen": -244.10421752929688, - "logps/rejected": -229.1746368408203, - "loss": 11442.1586, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.25108686089515686, - "rewards/margins": 0.15766267478466034, - "rewards/rejected": -0.408749520778656, - "rewards/safe_rewards": -0.2550010681152344, - "rewards/unsafe_rewards": -0.24717266857624054, + "learning_rate": 4.937393407444337e-07, + "logits/chosen": -1.1847805976867676, + "logits/rejected": -0.8935750722885132, + "logps/chosen": -235.5170135498047, + "logps/rejected": -226.17910766601562, + "loss": 5606.7586, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.4436865746974945, + "rewards/margins": 0.12356774508953094, + "rewards/rejected": -0.5672543048858643, + "rewards/safe_rewards": -0.4222384989261627, + "rewards/unsafe_rewards": -0.49501723051071167, "step": 310 }, { "epoch": 0.17, - "learning_rate": 4.921177292156419e-07, - "logits/chosen": -2.486952543258667, - "logits/rejected": -2.317573070526123, - "logps/chosen": -234.8188934326172, - "logps/rejected": -228.7853240966797, - "loss": 11535.2578, - "rewards/accuracies": 0.7437499761581421, - "rewards/chosen": -0.2879149317741394, - "rewards/margins": 0.18317629396915436, - "rewards/rejected": -0.47109121084213257, - "rewards/safe_rewards": -0.2716086506843567, - "rewards/unsafe_rewards": -0.30422115325927734, + "learning_rate": 4.926670566499992e-07, + "logits/chosen": -0.6831132173538208, + "logits/rejected": -0.43409886956214905, + "logps/chosen": -230.1105499267578, + "logps/rejected": -223.13021850585938, + "loss": 6029.3086, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.4783251881599426, + "rewards/margins": 0.13184307515621185, + "rewards/rejected": -0.6101682782173157, + "rewards/safe_rewards": -0.46370235085487366, + "rewards/unsafe_rewards": -0.4838125705718994, "step": 320 }, { "epoch": 0.18, - "learning_rate": 4.909048235051033e-07, - "logits/chosen": -2.408730983734131, - "logits/rejected": -2.313384532928467, - "logps/chosen": -240.1051483154297, - "logps/rejected": -238.6286163330078, - "loss": 11998.6938, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.4070356488227844, - "rewards/margins": 0.15228822827339172, - "rewards/rejected": -0.559323787689209, - "rewards/safe_rewards": -0.38683491945266724, - "rewards/unsafe_rewards": -0.42723625898361206, + "learning_rate": 4.915114123589732e-07, + "logits/chosen": -0.5296390652656555, + "logits/rejected": -0.23315271735191345, + "logps/chosen": -264.1290588378906, + "logps/rejected": -222.7255401611328, + "loss": 6587.2148, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.49660125374794006, + "rewards/margins": 0.1269882619380951, + "rewards/rejected": -0.6235895156860352, + "rewards/safe_rewards": -0.5574027299880981, + "rewards/unsafe_rewards": -0.5570284128189087, "step": 330 }, { "epoch": 0.18, - "learning_rate": 4.896068706145631e-07, - "logits/chosen": -2.430915355682373, - "logits/rejected": -2.3069653511047363, - "logps/chosen": -263.82440185546875, - "logps/rejected": -228.1202392578125, - "loss": 12415.2109, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.5314391255378723, - "rewards/margins": 0.14294834434986115, - "rewards/rejected": -0.674387514591217, - "rewards/safe_rewards": -0.5249046087265015, - "rewards/unsafe_rewards": -0.5379736423492432, + "learning_rate": 4.90272804854517e-07, + "logits/chosen": -0.20833459496498108, + "logits/rejected": 0.08662636578083038, + "logps/chosen": -271.68389892578125, + "logps/rejected": -259.1782531738281, + "loss": 6224.5324, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.5533224940299988, + "rewards/margins": 0.15772438049316406, + "rewards/rejected": -0.7110469341278076, + "rewards/safe_rewards": -0.5448375940322876, + "rewards/unsafe_rewards": -0.5393844842910767, "step": 340 }, { "epoch": 0.19, - "learning_rate": 4.882243287632946e-07, - "logits/chosen": -2.5057172775268555, - "logits/rejected": -2.3827576637268066, - "logps/chosen": -237.1790771484375, - "logps/rejected": -237.27392578125, - "loss": 11917.3391, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.44174814224243164, - "rewards/margins": 0.1188526302576065, - "rewards/rejected": -0.5606008172035217, - "rewards/safe_rewards": -0.4454471170902252, - "rewards/unsafe_rewards": -0.4380492568016052, + "learning_rate": 4.889516596190448e-07, + "logits/chosen": -0.7373126149177551, + "logits/rejected": -0.34005147218704224, + "logps/chosen": -293.0935363769531, + "logps/rejected": -241.9617156982422, + "loss": 6110.7906, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.5001389980316162, + "rewards/margins": 0.1725221574306488, + "rewards/rejected": -0.6726611852645874, + "rewards/safe_rewards": -0.4835886061191559, + "rewards/unsafe_rewards": -0.5382236838340759, "step": 350 }, { "epoch": 0.19, - "learning_rate": 4.867576860332048e-07, - "logits/chosen": -2.5187079906463623, - "logits/rejected": -2.4128689765930176, - "logps/chosen": -207.39404296875, - "logps/rejected": -220.6639404296875, - "loss": 11619.4062, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.353740930557251, - "rewards/margins": 0.16230328381061554, - "rewards/rejected": -0.5160442590713501, - "rewards/safe_rewards": -0.37073829770088196, - "rewards/unsafe_rewards": -0.33674362301826477, + "learning_rate": 4.875484304880629e-07, + "logits/chosen": -0.8152839541435242, + "logits/rejected": -0.4126107096672058, + "logps/chosen": -302.5885314941406, + "logps/rejected": -256.1798095703125, + "loss": 6488.7234, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.48745980858802795, + "rewards/margins": 0.10641022026538849, + "rewards/rejected": -0.5938700437545776, + "rewards/safe_rewards": -0.449713796377182, + "rewards/unsafe_rewards": -0.48859700560569763, "step": 360 }, { "epoch": 0.2, - "learning_rate": 4.85207460196526e-07, - "logits/chosen": -2.5497612953186035, - "logits/rejected": -2.4256086349487305, - "logps/chosen": -251.84036254882812, - "logps/rejected": -246.384033203125, - "loss": 11846.7016, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.47489339113235474, - "rewards/margins": 0.13903862237930298, - "rewards/rejected": -0.6139320135116577, - "rewards/safe_rewards": -0.4880181849002838, - "rewards/unsafe_rewards": -0.46176856756210327, + "learning_rate": 4.860635994942702e-07, + "logits/chosen": -0.47416171431541443, + "logits/rejected": 0.00913926400244236, + "logps/chosen": -258.38189697265625, + "logps/rejected": -230.67880249023438, + "loss": 5790.3816, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.5084312558174133, + "rewards/margins": 0.1444414108991623, + "rewards/rejected": -0.6528726816177368, + "rewards/safe_rewards": -0.5270028114318848, + "rewards/unsafe_rewards": -0.48991069197654724, "step": 370 }, { "epoch": 0.2, - "learning_rate": 4.835741985330259e-07, - "logits/chosen": -2.5404162406921387, - "logits/rejected": -2.4239704608917236, - "logps/chosen": -246.53939819335938, - "logps/rejected": -233.34353637695312, - "loss": 11351.725, - "rewards/accuracies": 0.7437499761581421, - "rewards/chosen": -0.45782560110092163, - "rewards/margins": 0.15662749111652374, - "rewards/rejected": -0.614453136920929, - "rewards/safe_rewards": -0.42890676856040955, - "rewards/unsafe_rewards": -0.48674440383911133, + "learning_rate": 4.844976767019714e-07, + "logits/chosen": -0.19216355681419373, + "logits/rejected": 0.15172423422336578, + "logps/chosen": -222.911865234375, + "logps/rejected": -202.00888061523438, + "loss": 5908.2133, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.5394010543823242, + "rewards/margins": 0.11715151369571686, + "rewards/rejected": -0.6565525531768799, + "rewards/safe_rewards": -0.5183984041213989, + "rewards/unsafe_rewards": -0.5164821743965149, "step": 380 }, { "epoch": 0.21, - "learning_rate": 4.818584776367992e-07, - "logits/chosen": -2.5068519115448, - "logits/rejected": -2.4303596019744873, - "logps/chosen": -244.3930206298828, - "logps/rejected": -249.14187622070312, - "loss": 11726.7609, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.40478819608688354, - "rewards/margins": 0.16650350391864777, - "rewards/rejected": -0.5712917447090149, - "rewards/safe_rewards": -0.43173927068710327, - "rewards/unsafe_rewards": -0.377837210893631, + "learning_rate": 4.828512000318616e-07, + "logits/chosen": -0.213291734457016, + "logits/rejected": 0.39291974902153015, + "logps/chosen": -303.5594177246094, + "logps/rejected": -259.14178466796875, + "loss": 6109.6039, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.5700324177742004, + "rewards/margins": 0.1927037090063095, + "rewards/rejected": -0.7627362012863159, + "rewards/safe_rewards": -0.5912032723426819, + "rewards/unsafe_rewards": -0.5395609140396118, "step": 390 }, { - "epoch": 0.22, - "learning_rate": 4.800609032127122e-07, - "logits/chosen": -2.4628491401672363, - "logits/rejected": -2.374331474304199, - "logps/chosen": -254.27822875976562, - "logps/rejected": -228.73330688476562, - "loss": 12046.5695, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.48730865120887756, - "rewards/margins": 0.11220350116491318, - "rewards/rejected": -0.5995121598243713, - "rewards/safe_rewards": -0.4591635763645172, - "rewards/unsafe_rewards": -0.5154536962509155, + "epoch": 0.21, + "learning_rate": 4.811247350762418e-07, + "logits/chosen": -0.36068278551101685, + "logits/rejected": 0.05598723143339157, + "logps/chosen": -240.6222381591797, + "logps/rejected": -234.20803833007812, + "loss": 5907.1703, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.554689347743988, + "rewards/margins": 0.17352624237537384, + "rewards/rejected": -0.7282156348228455, + "rewards/safe_rewards": -0.5173069834709167, + "rewards/unsafe_rewards": -0.5826700329780579, "step": 400 }, { "epoch": 0.22, - "learning_rate": 4.78182109862569e-07, - "logits/chosen": -2.4446866512298584, - "logits/rejected": -2.408660650253296, - "logps/chosen": -230.39208984375, - "logps/rejected": -231.1884765625, - "loss": 12854.2617, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.3896816670894623, - "rewards/margins": 0.12279312312602997, - "rewards/rejected": -0.5124748945236206, - "rewards/safe_rewards": -0.35095858573913574, - "rewards/unsafe_rewards": -0.4284047484397888, + "learning_rate": 4.79318874904728e-07, + "logits/chosen": -0.5469863414764404, + "logits/rejected": -0.3919845223426819, + "logps/chosen": -267.99761962890625, + "logps/rejected": -260.9379577636719, + "loss": 6323.5375, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.5513988137245178, + "rewards/margins": 0.16061297059059143, + "rewards/rejected": -0.7120116949081421, + "rewards/safe_rewards": -0.5992297530174255, + "rewards/unsafe_rewards": -0.5494996309280396, "step": 410 }, { - "epoch": 0.23, - "learning_rate": 4.7622276086107677e-07, - "logits/chosen": -2.4900124073028564, - "logits/rejected": -2.3827693462371826, - "logps/chosen": -251.73007202148438, - "logps/rejected": -243.5518341064453, - "loss": 12053.4797, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.4146080017089844, - "rewards/margins": 0.16079458594322205, - "rewards/rejected": -0.5754026174545288, - "rewards/safe_rewards": -0.4179013669490814, - "rewards/unsafe_rewards": -0.41131457686424255, + "epoch": 0.22, + "learning_rate": 4.774342398605221e-07, + "logits/chosen": -1.3936598300933838, + "logits/rejected": -1.0238125324249268, + "logps/chosen": -262.09033203125, + "logps/rejected": -221.07174682617188, + "loss": 5492.8094, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.5323154926300049, + "rewards/margins": 0.15208503603935242, + "rewards/rejected": -0.6844004988670349, + "rewards/safe_rewards": -0.5349102020263672, + "rewards/unsafe_rewards": -0.505738377571106, "step": 420 }, { "epoch": 0.23, - "learning_rate": 4.741835479216879e-07, - "logits/chosen": -2.445971727371216, - "logits/rejected": -2.286686420440674, - "logps/chosen": -294.9362487792969, - "logps/rejected": -262.58404541015625, - "loss": 11785.9859, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.48062044382095337, - "rewards/margins": 0.16858412325382233, - "rewards/rejected": -0.6492044925689697, - "rewards/safe_rewards": -0.5107543468475342, - "rewards/unsafe_rewards": -0.450486421585083, + "learning_rate": 4.754714773473134e-07, + "logits/chosen": -1.2268015146255493, + "logits/rejected": -1.0391647815704346, + "logps/chosen": -248.2527313232422, + "logps/rejected": -258.4667663574219, + "loss": 6146.5922, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.5346105694770813, + "rewards/margins": 0.18027544021606445, + "rewards/rejected": -0.7148860692977905, + "rewards/safe_rewards": -0.4759598672389984, + "rewards/unsafe_rewards": -0.534007728099823, "step": 430 }, { - "epoch": 0.24, - "learning_rate": 4.720651909524036e-07, - "logits/chosen": -2.3577795028686523, - "logits/rejected": -2.2147059440612793, - "logps/chosen": -236.0032196044922, - "logps/rejected": -218.00820922851562, - "loss": 11980.575, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.3994632661342621, - "rewards/margins": 0.1237812489271164, - "rewards/rejected": -0.5232445001602173, - "rewards/safe_rewards": -0.4401687681674957, - "rewards/unsafe_rewards": -0.3587578237056732, + "epoch": 0.23, + "learning_rate": 4.734312616068851e-07, + "logits/chosen": -1.2311909198760986, + "logits/rejected": -0.9865934252738953, + "logps/chosen": -214.25851440429688, + "logps/rejected": -198.68943786621094, + "loss": 5944.2828, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.3851444125175476, + "rewards/margins": 0.0964752659201622, + "rewards/rejected": -0.481619656085968, + "rewards/safe_rewards": -0.40014153718948364, + "rewards/unsafe_rewards": -0.4206266403198242, "step": 440 }, { "epoch": 0.24, - "learning_rate": 4.698684378016222e-07, - "logits/chosen": -2.2142910957336426, - "logits/rejected": -2.0808310508728027, - "logps/chosen": -249.1070556640625, - "logps/rejected": -246.5348663330078, - "loss": 12188.9062, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.6178480982780457, - "rewards/margins": 0.13151735067367554, - "rewards/rejected": -0.7493655681610107, - "rewards/safe_rewards": -0.6169500350952148, - "rewards/unsafe_rewards": -0.6187463402748108, + "learning_rate": 4.713142934875005e-07, + "logits/chosen": -0.7530995607376099, + "logits/rejected": -0.348047137260437, + "logps/chosen": -273.5533447265625, + "logps/rejected": -247.33377075195312, + "loss": 6019.3629, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.4809795916080475, + "rewards/margins": 0.16457389295101166, + "rewards/rejected": -0.645553469657898, + "rewards/safe_rewards": -0.4939555525779724, + "rewards/unsafe_rewards": -0.51116544008255, "step": 450 }, { - "epoch": 0.25, - "learning_rate": 4.675940639941256e-07, - "logits/chosen": -2.2552475929260254, - "logits/rejected": -2.087047815322876, - "logps/chosen": -268.5553283691406, - "logps/rejected": -256.51849365234375, - "loss": 11744.7563, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.6386964917182922, - "rewards/margins": 0.15222086012363434, - "rewards/rejected": -0.7909173965454102, - "rewards/safe_rewards": -0.6151617765426636, - "rewards/unsafe_rewards": -0.6622311472892761, + "epoch": 0.24, + "learning_rate": 4.6912130020314996e-07, + "logits/chosen": 0.18566010892391205, + "logits/rejected": 0.4161214232444763, + "logps/chosen": -233.847900390625, + "logps/rejected": -238.5542755126953, + "loss": 5555.243, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.6200246810913086, + "rewards/margins": 0.13345691561698914, + "rewards/rejected": -0.7534815073013306, + "rewards/safe_rewards": -0.6095362901687622, + "rewards/unsafe_rewards": -0.6309984922409058, "step": 460 }, { "epoch": 0.25, - "learning_rate": 4.6524287245729286e-07, - "logits/chosen": -2.2173266410827637, - "logits/rejected": -2.0759875774383545, - "logps/chosen": -247.02444458007812, - "logps/rejected": -235.70431518554688, - "loss": 11539.7117, - "rewards/accuracies": 0.6937500238418579, - "rewards/chosen": -0.5241425633430481, - "rewards/margins": 0.1680615395307541, - "rewards/rejected": -0.6922041177749634, - "rewards/safe_rewards": -0.5475858449935913, - "rewards/unsafe_rewards": -0.5006993412971497, + "learning_rate": 4.668530350837408e-07, + "logits/chosen": 0.024336492642760277, + "logits/rejected": 0.4952603876590729, + "logps/chosen": -259.33697509765625, + "logps/rejected": -254.6613006591797, + "loss": 5726.7293, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.5721555948257446, + "rewards/margins": 0.12051858007907867, + "rewards/rejected": -0.6926741600036621, + "rewards/safe_rewards": -0.5316283702850342, + "rewards/unsafe_rewards": -0.5645433664321899, "step": 470 }, { - "epoch": 0.26, - "learning_rate": 4.628156932376418e-07, - "logits/chosen": -2.254505157470703, - "logits/rejected": -2.057737112045288, - "logps/chosen": -252.7125244140625, - "logps/rejected": -227.06204223632812, - "loss": 11626.5016, - "rewards/accuracies": 0.6937500238418579, - "rewards/chosen": -0.5887488126754761, - "rewards/margins": 0.16838543117046356, - "rewards/rejected": -0.7571342587471008, - "rewards/safe_rewards": -0.5993281602859497, - "rewards/unsafe_rewards": -0.5781695246696472, + "epoch": 0.25, + "learning_rate": 4.64510277316316e-07, + "logits/chosen": -0.0006995767471380532, + "logits/rejected": 0.4036879539489746, + "logps/chosen": -269.50482177734375, + "logps/rejected": -248.73434448242188, + "loss": 6012.2914, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.5171098113059998, + "rewards/margins": 0.20941033959388733, + "rewards/rejected": -0.7265201807022095, + "rewards/safe_rewards": -0.5066377520561218, + "rewards/unsafe_rewards": -0.4963339865207672, "step": 480 }, { "epoch": 0.26, - "learning_rate": 4.603133832077953e-07, - "logits/chosen": -2.2982699871063232, - "logits/rejected": -2.1872057914733887, - "logps/chosen": -293.5793762207031, - "logps/rejected": -291.3560791015625, - "loss": 11510.6367, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.609082043170929, - "rewards/margins": 0.1590423583984375, - "rewards/rejected": -0.7681244015693665, - "rewards/safe_rewards": -0.6237068176269531, - "rewards/unsafe_rewards": -0.5944572687149048, + "learning_rate": 4.6209383167739015e-07, + "logits/chosen": -0.8723047971725464, + "logits/rejected": -0.47492194175720215, + "logps/chosen": -239.2227020263672, + "logps/rejected": -223.37191772460938, + "loss": 6090.4563, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.39161261916160583, + "rewards/margins": 0.16117171943187714, + "rewards/rejected": -0.5527843832969666, + "rewards/safe_rewards": -0.4009205400943756, + "rewards/unsafe_rewards": -0.4027668535709381, "step": 490 }, { "epoch": 0.27, - "learning_rate": 4.5773682576397776e-07, - "logits/chosen": -2.2795727252960205, - "logits/rejected": -2.150864839553833, - "logps/chosen": -247.63088989257812, - "logps/rejected": -234.78408813476562, - "loss": 11965.7203, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.47748714685440063, - "rewards/margins": 0.13509830832481384, - "rewards/rejected": -0.6125854849815369, - "rewards/safe_rewards": -0.48745498061180115, - "rewards/unsafe_rewards": -0.4675193428993225, + "learning_rate": 4.5960452825649526e-07, + "logits/chosen": -0.8613616228103638, + "logits/rejected": -0.5483921766281128, + "logps/chosen": -252.01095581054688, + "logps/rejected": -236.2162628173828, + "loss": 5410.1973, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.4818722605705261, + "rewards/margins": 0.12459783256053925, + "rewards/rejected": -0.606469988822937, + "rewards/safe_rewards": -0.4409845769405365, + "rewards/unsafe_rewards": -0.48863571882247925, "step": 500 }, { "epoch": 0.27, - "learning_rate": 4.5508693051414774e-07, - "logits/chosen": -2.3894762992858887, - "logits/rejected": -2.2791035175323486, - "logps/chosen": -240.63394165039062, - "logps/rejected": -236.6797332763672, - "loss": 11414.6078, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.4428652822971344, - "rewards/margins": 0.16295991837978363, - "rewards/rejected": -0.6058252453804016, - "rewards/safe_rewards": -0.4437985420227051, - "rewards/unsafe_rewards": -0.44193196296691895, + "eval_logits/chosen": -0.00993373803794384, + "eval_logits/rejected": 0.6948209404945374, + "eval_logps/chosen": -205.43228149414062, + "eval_logps/rejected": -177.0600128173828, + "eval_loss": 4657.333984375, + "eval_rewards/accuracies": 0.6367472410202026, + "eval_rewards/chosen": -0.6508274078369141, + "eval_rewards/margins": 0.09844248741865158, + "eval_rewards/rejected": -0.749269962310791, + "eval_rewards/safe_rewards": -0.6381882429122925, + "eval_rewards/unsafe_rewards": -0.6354333162307739, + "eval_runtime": 2355.0926, + "eval_samples_per_second": 14.88, + "eval_steps_per_second": 0.465, + "step": 500 + }, + { + "epoch": 0.27, + "learning_rate": 4.570432221710314e-07, + "logits/chosen": -0.2417004406452179, + "logits/rejected": 0.17007017135620117, + "logps/chosen": -273.1074523925781, + "logps/rejected": -236.8904266357422, + "loss": 6244.0367, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.5197592973709106, + "rewards/margins": 0.19909226894378662, + "rewards/rejected": -0.7188515067100525, + "rewards/safe_rewards": -0.6001642942428589, + "rewards/unsafe_rewards": -0.5492387413978577, "step": 510 }, { "epoch": 0.28, - "learning_rate": 4.52364632956877e-07, - "logits/chosen": -2.376743793487549, - "logits/rejected": -2.2216954231262207, - "logps/chosen": -262.9234313964844, - "logps/rejected": -226.41116333007812, - "loss": 12766.6898, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.4680143892765045, - "rewards/margins": 0.11080136150121689, - "rewards/rejected": -0.578815758228302, - "rewards/safe_rewards": -0.4691530764102936, - "rewards/unsafe_rewards": -0.4668757915496826, + "learning_rate": 4.5441079327251927e-07, + "logits/chosen": -0.3826223909854889, + "logits/rejected": 0.10965192317962646, + "logps/chosen": -261.4352722167969, + "logps/rejected": -251.9311065673828, + "loss": 5649.8195, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.49133262038230896, + "rewards/margins": 0.11736941337585449, + "rewards/rejected": -0.6087020635604858, + "rewards/safe_rewards": -0.4915240406990051, + "rewards/unsafe_rewards": -0.4991859793663025, "step": 520 }, { - "epoch": 0.29, - "learning_rate": 4.4957089415108895e-07, - "logits/chosen": -2.3553097248077393, - "logits/rejected": -2.2464723587036133, - "logps/chosen": -242.263671875, - "logps/rejected": -255.6951141357422, - "loss": 11431.9516, - "rewards/accuracies": 0.731249988079071, - "rewards/chosen": -0.4732840061187744, - "rewards/margins": 0.19163894653320312, - "rewards/rejected": -0.6649229526519775, - "rewards/safe_rewards": -0.4660876393318176, - "rewards/unsafe_rewards": -0.4804803729057312, + "epoch": 0.28, + "learning_rate": 4.5170814584435644e-07, + "logits/chosen": -0.1299566924571991, + "logits/rejected": 0.30430150032043457, + "logps/chosen": -281.5189514160156, + "logps/rejected": -248.9510040283203, + "loss": 6070.9859, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.5048553347587585, + "rewards/margins": 0.17633280158042908, + "rewards/rejected": -0.6811882257461548, + "rewards/safe_rewards": -0.45997923612594604, + "rewards/unsafe_rewards": -0.5042248964309692, "step": 530 }, { "epoch": 0.29, - "learning_rate": 4.467067003767745e-07, - "logits/chosen": -2.3562865257263184, - "logits/rejected": -2.181304931640625, - "logps/chosen": -247.70449829101562, - "logps/rejected": -240.8135528564453, - "loss": 12058.2938, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.5476623177528381, - "rewards/margins": 0.19659535586833954, - "rewards/rejected": -0.7442576885223389, - "rewards/safe_rewards": -0.5530461072921753, - "rewards/unsafe_rewards": -0.5422784686088562, + "learning_rate": 4.4893620829118124e-07, + "logits/chosen": 0.41155165433883667, + "logits/rejected": 0.7351133227348328, + "logps/chosen": -218.6739959716797, + "logps/rejected": -222.22238159179688, + "loss": 5773.9555, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.5764225721359253, + "rewards/margins": 0.17755261063575745, + "rewards/rejected": -0.7539752125740051, + "rewards/safe_rewards": -0.5707100033760071, + "rewards/unsafe_rewards": -0.5930426716804504, "step": 540 }, { - "epoch": 0.3, - "learning_rate": 4.437730627868027e-07, - "logits/chosen": -2.3266587257385254, - "logits/rejected": -2.120724678039551, - "logps/chosen": -231.5970916748047, - "logps/rejected": -221.3756103515625, - "loss": 11200.6187, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.5469081997871399, - "rewards/margins": 0.20715579390525818, - "rewards/rejected": -0.7540639638900757, - "rewards/safe_rewards": -0.5482960939407349, - "rewards/unsafe_rewards": -0.5455203652381897, + "epoch": 0.29, + "learning_rate": 4.460959328199497e-07, + "logits/chosen": 0.4961000382900238, + "logits/rejected": 0.9081694483757019, + "logps/chosen": -256.54791259765625, + "logps/rejected": -277.130126953125, + "loss": 6108.098, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.6318496465682983, + "rewards/margins": 0.2199208289384842, + "rewards/rejected": -0.8517705202102661, + "rewards/safe_rewards": -0.6448063850402832, + "rewards/unsafe_rewards": -0.5973528623580933, "step": 550 }, { "epoch": 0.3, - "learning_rate": 4.4077101704995163e-07, - "logits/chosen": -2.3897128105163574, - "logits/rejected": -2.2368927001953125, - "logps/chosen": -238.13796997070312, - "logps/rejected": -232.872314453125, - "loss": 11633.2375, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.4596262574195862, - "rewards/margins": 0.13524024188518524, - "rewards/rejected": -0.5948664546012878, - "rewards/safe_rewards": -0.4546341300010681, - "rewards/unsafe_rewards": -0.4646182954311371, + "learning_rate": 4.4318829511283707e-07, + "logits/chosen": 0.23597554862499237, + "logits/rejected": 0.5608280301094055, + "logps/chosen": -262.15960693359375, + "logps/rejected": -276.5953369140625, + "loss": 6017.0984, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.7231947183609009, + "rewards/margins": 0.16650545597076416, + "rewards/rejected": -0.8897002339363098, + "rewards/safe_rewards": -0.7144005298614502, + "rewards/unsafe_rewards": -0.6883742213249207, "step": 560 }, { - "epoch": 0.31, - "learning_rate": 4.3770162298528356e-07, - "logits/chosen": -2.396641969680786, - "logits/rejected": -2.2687771320343018, - "logps/chosen": -244.2262420654297, - "logps/rejected": -219.45248413085938, - "loss": 12088.3172, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.493887335062027, - "rewards/margins": 0.15973922610282898, - "rewards/rejected": -0.653626561164856, - "rewards/safe_rewards": -0.48037633299827576, - "rewards/unsafe_rewards": -0.5073983073234558, + "epoch": 0.3, + "learning_rate": 4.40214293992074e-07, + "logits/chosen": 0.30961090326309204, + "logits/rejected": 0.6938155889511108, + "logps/chosen": -267.58404541015625, + "logps/rejected": -252.78311157226562, + "loss": 6321.9309, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.5275936722755432, + "rewards/margins": 0.20575468242168427, + "rewards/rejected": -0.7333483099937439, + "rewards/safe_rewards": -0.5182517766952515, + "rewards/unsafe_rewards": -0.5568464994430542, "step": 570 }, { "epoch": 0.31, - "learning_rate": 4.3456596418799476e-07, - "logits/chosen": -2.3043763637542725, - "logits/rejected": -2.173457145690918, - "logps/chosen": -257.493896484375, - "logps/rejected": -241.50222778320312, - "loss": 10751.843, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.5369631052017212, - "rewards/margins": 0.15752388536930084, - "rewards/rejected": -0.6944869756698608, - "rewards/safe_rewards": -0.5310293436050415, - "rewards/unsafe_rewards": -0.5428968667984009, + "learning_rate": 4.3717495107683516e-07, + "logits/chosen": 0.2671489417552948, + "logits/rejected": 0.9092152714729309, + "logps/chosen": -250.55960083007812, + "logps/rejected": -235.89840698242188, + "loss": 5574.8402, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.5318346619606018, + "rewards/margins": 0.18946382403373718, + "rewards/rejected": -0.7212985157966614, + "rewards/safe_rewards": -0.5447245836257935, + "rewards/unsafe_rewards": -0.5725606083869934, "step": 580 }, { - "epoch": 0.32, - "learning_rate": 4.313651476468715e-07, - "logits/chosen": -2.270984172821045, - "logits/rejected": -2.1114678382873535, - "logps/chosen": -245.8052520751953, - "logps/rejected": -238.4367218017578, - "loss": 11156.9102, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.4919458329677582, - "rewards/margins": 0.16596952080726624, - "rewards/rejected": -0.6579153537750244, - "rewards/safe_rewards": -0.533513605594635, - "rewards/unsafe_rewards": -0.45037803053855896, + "epoch": 0.31, + "learning_rate": 4.340713104322953e-07, + "logits/chosen": 0.01171237975358963, + "logits/rejected": 0.4629115164279938, + "logps/chosen": -265.1495056152344, + "logps/rejected": -259.7709045410156, + "loss": 5202.8691, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.5935125946998596, + "rewards/margins": 0.18529286980628967, + "rewards/rejected": -0.7788054347038269, + "rewards/safe_rewards": -0.6250792741775513, + "rewards/unsafe_rewards": -0.6238072514533997, "step": 590 }, { "epoch": 0.32, - "learning_rate": 4.2810030335348693e-07, - "logits/chosen": -2.199920177459717, - "logits/rejected": -2.001626491546631, - "logps/chosen": -263.06634521484375, - "logps/rejected": -236.2901153564453, - "loss": 11584.7875, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.5653510689735413, - "rewards/margins": 0.14779801666736603, - "rewards/rejected": -0.7131490707397461, - "rewards/safe_rewards": -0.5651000142097473, - "rewards/unsafe_rewards": -0.56560218334198, + "learning_rate": 4.3090443821097566e-07, + "logits/chosen": 0.7814422845840454, + "logits/rejected": 1.1566433906555176, + "logps/chosen": -278.1474609375, + "logps/rejected": -280.3294677734375, + "loss": 5335.1562, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6250512599945068, + "rewards/margins": 0.19450877606868744, + "rewards/rejected": -0.8195600509643555, + "rewards/safe_rewards": -0.5736940503120422, + "rewards/unsafe_rewards": -0.6311155557632446, "step": 600 }, { "epoch": 0.32, - "eval_logits/chosen": -1.993375301361084, - "eval_logits/rejected": -1.7938207387924194, - "eval_logps/chosen": -211.25747680664062, - "eval_logps/rejected": -180.6571044921875, - "eval_loss": 4406.71337890625, - "eval_rewards/accuracies": 0.6338334679603577, - "eval_rewards/chosen": -0.8082689642906189, - "eval_rewards/margins": 0.07364560663700104, - "eval_rewards/rejected": -0.8819145560264587, - "eval_rewards/safe_rewards": -0.8027816414833069, - "eval_rewards/unsafe_rewards": -0.8050090670585632, - "eval_runtime": 995.4426, - "eval_samples_per_second": 33.195, - "eval_steps_per_second": 1.038, - "step": 600 - }, - { - "epoch": 0.33, - "learning_rate": 4.2477258390327806e-07, - "logits/chosen": -2.2489686012268066, - "logits/rejected": -2.072410821914673, - "logps/chosen": -241.9006805419922, - "logps/rejected": -251.07376098632812, - "loss": 11304.5898, - "rewards/accuracies": 0.7437499761581421, - "rewards/chosen": -0.5690335035324097, - "rewards/margins": 0.20418229699134827, - "rewards/rejected": -0.7732157707214355, - "rewards/safe_rewards": -0.5881134271621704, - "rewards/unsafe_rewards": -0.5499535799026489, + "learning_rate": 4.276754222865029e-07, + "logits/chosen": 0.546709418296814, + "logits/rejected": 1.5038117170333862, + "logps/chosen": -284.0765075683594, + "logps/rejected": -235.79367065429688, + "loss": 5880.4258, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.6524443626403809, + "rewards/margins": 0.17251375317573547, + "rewards/rejected": -0.8249581456184387, + "rewards/safe_rewards": -0.6402295231819153, + "rewards/unsafe_rewards": -0.6277676224708557, "step": 610 }, { "epoch": 0.33, - "learning_rate": 4.2138316408864197e-07, - "logits/chosen": -2.334721803665161, - "logits/rejected": -2.150754928588867, - "logps/chosen": -246.8868865966797, - "logps/rejected": -237.44033813476562, - "loss": 10365.4461, - "rewards/accuracies": 0.731249988079071, - "rewards/chosen": -0.48651427030563354, - "rewards/margins": 0.2631617486476898, - "rewards/rejected": -0.7496760487556458, - "rewards/safe_rewards": -0.4882384240627289, - "rewards/unsafe_rewards": -0.4847901463508606, + "learning_rate": 4.2438537187990565e-07, + "logits/chosen": 0.7865768671035767, + "logits/rejected": 1.5061836242675781, + "logps/chosen": -283.3603820800781, + "logps/rejected": -251.56442260742188, + "loss": 5760.8687, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.658532977104187, + "rewards/margins": 0.21655750274658203, + "rewards/rejected": -0.875090479850769, + "rewards/safe_rewards": -0.6327935457229614, + "rewards/unsafe_rewards": -0.6471335291862488, "step": 620 }, { - "epoch": 0.34, - "learning_rate": 4.179332404841962e-07, - "logits/chosen": -2.2644031047821045, - "logits/rejected": -2.0903661251068115, - "logps/chosen": -267.8985290527344, - "logps/rejected": -251.5312042236328, - "loss": 10894.2219, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.5778486728668213, - "rewards/margins": 0.22166796028614044, - "rewards/rejected": -0.7995165586471558, - "rewards/safe_rewards": -0.5826883316040039, - "rewards/unsafe_rewards": -0.5730089545249939, + "epoch": 0.33, + "learning_rate": 4.210354171785795e-07, + "logits/chosen": 0.2993673086166382, + "logits/rejected": 0.7917363047599792, + "logps/chosen": -272.6424865722656, + "logps/rejected": -247.65853881835938, + "loss": 5872.0883, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.5130705833435059, + "rewards/margins": 0.1547364443540573, + "rewards/rejected": -0.6678069829940796, + "rewards/safe_rewards": -0.5059661269187927, + "rewards/unsafe_rewards": -0.5222837328910828, "step": 630 }, { "epoch": 0.34, - "learning_rate": 4.1442403102434954e-07, - "logits/chosen": -2.282378911972046, - "logits/rejected": -2.113847255706787, - "logps/chosen": -270.49737548828125, - "logps/rejected": -250.124755859375, - "loss": 11471.2906, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.5048245787620544, - "rewards/margins": 0.20923128724098206, - "rewards/rejected": -0.7140558958053589, - "rewards/safe_rewards": -0.5091570615768433, - "rewards/unsafe_rewards": -0.5004920959472656, + "learning_rate": 4.1762670894804775e-07, + "logits/chosen": 0.09364859014749527, + "logits/rejected": 0.5361107587814331, + "logps/chosen": -249.59634399414062, + "logps/rejected": -237.3841094970703, + "loss": 5896.1926, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.49201780557632446, + "rewards/margins": 0.16005203127861023, + "rewards/rejected": -0.6520698070526123, + "rewards/safe_rewards": -0.549709677696228, + "rewards/unsafe_rewards": -0.5637668967247009, "step": 640 }, { - "epoch": 0.35, - "learning_rate": 4.108567745733318e-07, - "logits/chosen": -2.283024311065674, - "logits/rejected": -2.1171913146972656, - "logps/chosen": -216.49264526367188, - "logps/rejected": -221.82421875, - "loss": 11842.7375, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.4298115372657776, - "rewards/margins": 0.178109809756279, - "rewards/rejected": -0.6079213619232178, - "rewards/safe_rewards": -0.41675862669944763, - "rewards/unsafe_rewards": -0.4428643584251404, + "epoch": 0.34, + "learning_rate": 4.1416041813665493e-07, + "logits/chosen": -0.5552986860275269, + "logits/rejected": -0.25023895502090454, + "logps/chosen": -253.50790405273438, + "logps/rejected": -253.32583618164062, + "loss": 5920.0328, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.47500887513160706, + "rewards/margins": 0.12813320755958557, + "rewards/rejected": -0.6031420826911926, + "rewards/safe_rewards": -0.43845662474632263, + "rewards/unsafe_rewards": -0.45656904578208923, "step": 650 }, { - "epoch": 0.36, - "learning_rate": 4.0723273048783426e-07, - "logits/chosen": -2.3322463035583496, - "logits/rejected": -2.1912810802459717, - "logps/chosen": -271.88299560546875, - "logps/rejected": -232.814208984375, - "loss": 11859.6484, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.47567135095596313, - "rewards/margins": 0.16224288940429688, - "rewards/rejected": -0.6379141807556152, - "rewards/safe_rewards": -0.4415673315525055, - "rewards/unsafe_rewards": -0.5097752809524536, + "epoch": 0.35, + "learning_rate": 4.1063773547332584e-07, + "logits/chosen": -0.46418723464012146, + "logits/rejected": -0.049189966171979904, + "logps/chosen": -267.15765380859375, + "logps/rejected": -243.20010375976562, + "loss": 6128.7578, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.6104855537414551, + "rewards/margins": 0.10687772184610367, + "rewards/rejected": -0.7173632383346558, + "rewards/safe_rewards": -0.5476406216621399, + "rewards/unsafe_rewards": -0.603262722492218, "step": 660 }, { "epoch": 0.36, - "learning_rate": 4.0355317817241697e-07, - "logits/chosen": -2.3272366523742676, - "logits/rejected": -2.1486003398895264, - "logps/chosen": -275.4678039550781, - "logps/rejected": -224.7365264892578, - "loss": 11542.4758, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.465405136346817, - "rewards/margins": 0.16741091012954712, - "rewards/rejected": -0.6328160166740417, - "rewards/safe_rewards": -0.43667951226234436, - "rewards/unsafe_rewards": -0.4941307008266449, + "learning_rate": 4.0705987105853077e-07, + "logits/chosen": -0.2697436213493347, + "logits/rejected": 0.344801664352417, + "logps/chosen": -252.3665313720703, + "logps/rejected": -232.3540496826172, + "loss": 5986.7625, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.5879735350608826, + "rewards/margins": 0.14302758872509003, + "rewards/rejected": -0.731001079082489, + "rewards/safe_rewards": -0.543707013130188, + "rewards/unsafe_rewards": -0.5482696294784546, "step": 670 }, { - "epoch": 0.37, - "learning_rate": 3.998194166278367e-07, - "logits/chosen": -2.378793239593506, - "logits/rejected": -2.2765145301818848, - "logps/chosen": -242.05722045898438, - "logps/rejected": -228.02822875976562, - "loss": 12028.9203, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.5077196359634399, - "rewards/margins": 0.10326583683490753, - "rewards/rejected": -0.6109854578971863, - "rewards/safe_rewards": -0.519899845123291, - "rewards/unsafe_rewards": -0.4955393671989441, + "epoch": 0.36, + "learning_rate": 4.034280539485952e-07, + "logits/chosen": -0.36558887362480164, + "logits/rejected": 0.18461750447750092, + "logps/chosen": -295.22119140625, + "logps/rejected": -274.0675354003906, + "loss": 5383.9453, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.5177947878837585, + "rewards/margins": 0.21047362685203552, + "rewards/rejected": -0.7282685041427612, + "rewards/safe_rewards": -0.5312758684158325, + "rewards/unsafe_rewards": -0.5633383393287659, "step": 680 }, { "epoch": 0.37, - "learning_rate": 3.9603276399245855e-07, - "logits/chosen": -2.3500876426696777, - "logits/rejected": -2.2054431438446045, - "logps/chosen": -266.26800537109375, - "logps/rejected": -240.6906280517578, - "loss": 11937.7102, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.5368236303329468, - "rewards/margins": 0.19003283977508545, - "rewards/rejected": -0.7268564105033875, - "rewards/safe_rewards": -0.5230244994163513, - "rewards/unsafe_rewards": -0.5506226420402527, + "learning_rate": 3.997435317334988e-07, + "logits/chosen": 0.3039137125015259, + "logits/rejected": 0.7977389097213745, + "logps/chosen": -279.23187255859375, + "logps/rejected": -261.033935546875, + "loss": 5720.7707, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.5356379747390747, + "rewards/margins": 0.2088995724916458, + "rewards/rejected": -0.7445374131202698, + "rewards/safe_rewards": -0.5458201169967651, + "rewards/unsafe_rewards": -0.47182130813598633, "step": 690 }, { - "epoch": 0.38, - "learning_rate": 3.9219455707691e-07, - "logits/chosen": -2.3385093212127686, - "logits/rejected": -2.194088935852051, - "logps/chosen": -262.49957275390625, - "logps/rejected": -248.57937622070312, - "loss": 11237.9461, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.5866261720657349, - "rewards/margins": 0.18088310956954956, - "rewards/rejected": -0.7675093412399292, - "rewards/safe_rewards": -0.5994827747344971, - "rewards/unsafe_rewards": -0.5737696886062622, + "epoch": 0.37, + "learning_rate": 3.960075701083074e-07, + "logits/chosen": 0.06580640375614166, + "logits/rejected": 0.28118953108787537, + "logps/chosen": -237.80581665039062, + "logps/rejected": -245.47216796875, + "loss": 5702.616, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.5484215021133423, + "rewards/margins": 0.16065733134746552, + "rewards/rejected": -0.709078848361969, + "rewards/safe_rewards": -0.5256644487380981, + "rewards/unsafe_rewards": -0.5779343247413635, "step": 700 }, { "epoch": 0.38, - "learning_rate": 3.883061508921439e-07, - "logits/chosen": -2.4109256267547607, - "logits/rejected": -2.309866428375244, - "logps/chosen": -253.4579620361328, - "logps/rejected": -269.43060302734375, - "loss": 11465.2805, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.5475128889083862, - "rewards/margins": 0.13771310448646545, - "rewards/rejected": -0.6852259635925293, - "rewards/safe_rewards": -0.5719602704048157, - "rewards/unsafe_rewards": -0.5230655670166016, + "learning_rate": 3.92221452438385e-07, + "logits/chosen": -0.6886399388313293, + "logits/rejected": -0.33862438797950745, + "logps/chosen": -255.33505249023438, + "logps/rejected": -234.041259765625, + "loss": 5505.9277, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.5285482406616211, + "rewards/margins": 0.18568384647369385, + "rewards/rejected": -0.7142320871353149, + "rewards/safe_rewards": -0.5484398007392883, + "rewards/unsafe_rewards": -0.5874748826026917, "step": 710 }, { - "epoch": 0.39, - "learning_rate": 3.8436891817107555e-07, - "logits/chosen": -2.3683512210845947, - "logits/rejected": -2.29433012008667, - "logps/chosen": -238.59597778320312, - "logps/rejected": -241.7389678955078, - "loss": 11748.5664, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.481682687997818, - "rewards/margins": 0.18611426651477814, - "rewards/rejected": -0.6677969694137573, - "rewards/safe_rewards": -0.4765700697898865, - "rewards/unsafe_rewards": -0.4867952764034271, + "epoch": 0.38, + "learning_rate": 3.8838647931853684e-07, + "logits/chosen": -0.7950954437255859, + "logits/rejected": -0.4466307759284973, + "logps/chosen": -253.4489288330078, + "logps/rejected": -254.49813842773438, + "loss": 6030.682, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.5057817697525024, + "rewards/margins": 0.20095935463905334, + "rewards/rejected": -0.7067410945892334, + "rewards/safe_rewards": -0.5353250503540039, + "rewards/unsafe_rewards": -0.4995631277561188, "step": 720 }, { "epoch": 0.39, - "learning_rate": 3.8038424888396414e-07, - "logits/chosen": -2.3680453300476074, - "logits/rejected": -2.233586072921753, - "logps/chosen": -255.9365692138672, - "logps/rejected": -252.37393188476562, - "loss": 10985.4922, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.5011948347091675, - "rewards/margins": 0.1678631603717804, - "rewards/rejected": -0.6690580248832703, - "rewards/safe_rewards": -0.5107179880142212, - "rewards/unsafe_rewards": -0.49167174100875854, + "learning_rate": 3.845039681262332e-07, + "logits/chosen": -0.5698283910751343, + "logits/rejected": -0.1652621030807495, + "logps/chosen": -265.46368408203125, + "logps/rejected": -250.52951049804688, + "loss": 5514.4148, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.45593494176864624, + "rewards/margins": 0.1759863793849945, + "rewards/rejected": -0.6319212913513184, + "rewards/safe_rewards": -0.4363466799259186, + "rewards/unsafe_rewards": -0.4330349862575531, "step": 730 }, { - "epoch": 0.4, - "learning_rate": 3.763535497477079e-07, - "logits/chosen": -2.376601457595825, - "logits/rejected": -2.2197787761688232, - "logps/chosen": -260.691650390625, - "logps/rejected": -245.3418731689453, - "loss": 11114.0781, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.5256686210632324, - "rewards/margins": 0.21716609597206116, - "rewards/rejected": -0.742834746837616, - "rewards/safe_rewards": -0.5268703103065491, - "rewards/unsafe_rewards": -0.524466872215271, + "epoch": 0.39, + "learning_rate": 3.805752525690681e-07, + "logits/chosen": 0.09326216578483582, + "logits/rejected": 0.7224725484848022, + "logps/chosen": -253.9232940673828, + "logps/rejected": -268.0160217285156, + "loss": 5160.3754, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.6526281237602234, + "rewards/margins": 0.22083961963653564, + "rewards/rejected": -0.8734676241874695, + "rewards/safe_rewards": -0.6421413421630859, + "rewards/unsafe_rewards": -0.6364503502845764, "step": 740 }, { "epoch": 0.4, - "learning_rate": 3.7227824372922795e-07, - "logits/chosen": -2.3355045318603516, - "logits/rejected": -2.202324867248535, - "logps/chosen": -244.21817016601562, - "logps/rejected": -234.26766967773438, - "loss": 11000.375, + "learning_rate": 3.7660168222660824e-07, + "logits/chosen": 0.43039554357528687, + "logits/rejected": 0.772833526134491, + "logps/chosen": -293.98541259765625, + "logps/rejected": -288.250732421875, + "loss": 5855.4879, "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.5600683093070984, - "rewards/margins": 0.15717127919197083, - "rewards/rejected": -0.7172395586967468, - "rewards/safe_rewards": -0.5515316724777222, - "rewards/unsafe_rewards": -0.5686048269271851, + "rewards/chosen": -0.7387111783027649, + "rewards/margins": 0.16440826654434204, + "rewards/rejected": -0.9031193852424622, + "rewards/safe_rewards": -0.7269446849822998, + "rewards/unsafe_rewards": -0.6723185777664185, "step": 750 }, { - "epoch": 0.41, - "learning_rate": 3.681597695431148e-07, - "logits/chosen": -2.351304292678833, - "logits/rejected": -2.2070152759552, - "logps/chosen": -249.6879425048828, - "logps/rejected": -252.49722290039062, - "loss": 11395.6094, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.5104349851608276, - "rewards/margins": 0.22875532507896423, - "rewards/rejected": -0.7391902804374695, - "rewards/safe_rewards": -0.5327340364456177, - "rewards/unsafe_rewards": -0.4881359040737152, + "epoch": 0.4, + "learning_rate": 3.725846220867901e-07, + "logits/chosen": -0.09916634857654572, + "logits/rejected": 0.4922304153442383, + "logps/chosen": -265.7640686035156, + "logps/rejected": -243.7411346435547, + "loss": 6137.0988, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.6147286295890808, + "rewards/margins": 0.14420659840106964, + "rewards/rejected": -0.7589352130889893, + "rewards/safe_rewards": -0.6549733877182007, + "rewards/unsafe_rewards": -0.6351133584976196, "step": 760 }, { "epoch": 0.41, - "learning_rate": 3.639995811437159e-07, - "logits/chosen": -2.3204877376556396, - "logits/rejected": -2.2027907371520996, - "logps/chosen": -252.81082153320312, - "logps/rejected": -258.1892395019531, - "loss": 11175.6797, - "rewards/accuracies": 0.6937500238418579, - "rewards/chosen": -0.5397385358810425, - "rewards/margins": 0.19889900088310242, - "rewards/rejected": -0.7386375069618225, - "rewards/safe_rewards": -0.5588012337684631, - "rewards/unsafe_rewards": -0.520675778388977, + "learning_rate": 3.6852545207702393e-07, + "logits/chosen": -0.18887875974178314, + "logits/rejected": 0.4651460647583008, + "logps/chosen": -300.3460998535156, + "logps/rejected": -247.0656280517578, + "loss": 5956.6977, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -0.5610722899436951, + "rewards/margins": 0.18032148480415344, + "rewards/rejected": -0.7413938641548157, + "rewards/safe_rewards": -0.5364476442337036, + "rewards/unsafe_rewards": -0.5671006441116333, "step": 770 }, { - "epoch": 0.42, - "learning_rate": 3.597991472118426e-07, - "logits/chosen": -2.3824949264526367, - "logits/rejected": -2.2542223930358887, - "logps/chosen": -267.04437255859375, - "logps/rejected": -254.86337280273438, - "loss": 11511.8547, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.5231243371963501, - "rewards/margins": 0.17550811171531677, - "rewards/rejected": -0.6986324191093445, - "rewards/safe_rewards": -0.5508791208267212, - "rewards/unsafe_rewards": -0.49536967277526855, + "epoch": 0.41, + "learning_rate": 3.6442556659016475e-07, + "logits/chosen": 0.3691898286342621, + "logits/rejected": 1.0192655324935913, + "logps/chosen": -278.3470458984375, + "logps/rejected": -240.86141967773438, + "loss": 5414.8289, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.5899799466133118, + "rewards/margins": 0.20228877663612366, + "rewards/rejected": -0.7922687530517578, + "rewards/safe_rewards": -0.5520480871200562, + "rewards/unsafe_rewards": -0.5946981906890869, "step": 780 }, { - "epoch": 0.43, - "learning_rate": 3.5555995063627836e-07, - "logits/chosen": -2.3838717937469482, - "logits/rejected": -2.264705181121826, - "logps/chosen": -288.45098876953125, - "logps/rejected": -261.7911071777344, - "loss": 11126.725, + "epoch": 0.42, + "learning_rate": 3.602863740055161e-07, + "logits/chosen": 1.002415418624878, + "logits/rejected": 1.6322085857391357, + "logps/chosen": -268.44488525390625, + "logps/rejected": -261.2592468261719, + "loss": 5358.4598, "rewards/accuracies": 0.6875, - "rewards/chosen": -0.5807030200958252, - "rewards/margins": 0.1779719889163971, - "rewards/rejected": -0.7586749792098999, - "rewards/safe_rewards": -0.5821424722671509, - "rewards/unsafe_rewards": -0.5792635679244995, + "rewards/chosen": -0.6824139356613159, + "rewards/margins": 0.22263555228710175, + "rewards/rejected": -0.9050495028495789, + "rewards/safe_rewards": -0.6642250418663025, + "rewards/unsafe_rewards": -0.6494946479797363, "step": 790 }, { - "epoch": 0.43, - "learning_rate": 3.512834879902715e-07, - "logits/chosen": -2.325700521469116, - "logits/rejected": -2.20318603515625, - "logps/chosen": -262.4079284667969, - "logps/rejected": -256.8218078613281, - "loss": 11113.3211, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.6240984797477722, - "rewards/margins": 0.1907762587070465, - "rewards/rejected": -0.8148747682571411, - "rewards/safe_rewards": -0.6181318759918213, - "rewards/unsafe_rewards": -0.6300650835037231, + "epoch": 0.42, + "learning_rate": 3.5610929620502747e-07, + "logits/chosen": 0.9502559900283813, + "logits/rejected": 1.4719197750091553, + "logps/chosen": -271.93231201171875, + "logps/rejected": -281.78125, + "loss": 5792.9727, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.7460067272186279, + "rewards/margins": 0.18493010103702545, + "rewards/rejected": -0.9309368133544922, + "rewards/safe_rewards": -0.7411947846412659, + "rewards/unsafe_rewards": -0.8093317151069641, "step": 800 }, { - "epoch": 0.44, - "learning_rate": 3.4697126900319616e-07, - "logits/chosen": -2.2967851161956787, - "logits/rejected": -2.1394267082214355, - "logps/chosen": -259.38018798828125, - "logps/rejected": -245.7625732421875, - "loss": 11771.4461, - "rewards/accuracies": 0.768750011920929, - "rewards/chosen": -0.5871919393539429, - "rewards/margins": 0.2232244461774826, - "rewards/rejected": -0.8104164004325867, - "rewards/safe_rewards": -0.5566633939743042, - "rewards/unsafe_rewards": -0.6177204251289368, + "epoch": 0.43, + "learning_rate": 3.5189576808485404e-07, + "logits/chosen": 0.7791315913200378, + "logits/rejected": 1.4415690898895264, + "logps/chosen": -300.54150390625, + "logps/rejected": -273.402587890625, + "loss": 5584.2125, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.7409987449645996, + "rewards/margins": 0.20648033916950226, + "rewards/rejected": -0.9474791288375854, + "rewards/safe_rewards": -0.726071834564209, + "rewards/unsafe_rewards": -0.8359003067016602, "step": 810 }, { "epoch": 0.44, - "learning_rate": 3.426248160275693e-07, - "logits/chosen": -2.33948016166687, - "logits/rejected": -2.2020907402038574, - "logps/chosen": -251.8537139892578, - "logps/rejected": -247.29443359375, - "loss": 11619.0344, + "learning_rate": 3.476472370624464e-07, + "logits/chosen": 0.40392106771469116, + "logits/rejected": 0.7413457632064819, + "logps/chosen": -254.9908905029297, + "logps/rejected": -251.4073028564453, + "loss": 6101.9039, "rewards/accuracies": 0.65625, - "rewards/chosen": -0.5438165068626404, - "rewards/margins": 0.17501583695411682, - "rewards/rejected": -0.7188323736190796, - "rewards/safe_rewards": -0.5689524412155151, - "rewards/unsafe_rewards": -0.5186805725097656, + "rewards/chosen": -0.6420382261276245, + "rewards/margins": 0.13990595936775208, + "rewards/rejected": -0.7819441556930542, + "rewards/safe_rewards": -0.5959726572036743, + "rewards/unsafe_rewards": -0.6521440744400024, "step": 820 }, { - "epoch": 0.45, - "learning_rate": 3.3824566350161094e-07, - "logits/chosen": -2.3640289306640625, - "logits/rejected": -2.195350408554077, - "logps/chosen": -256.7790222167969, - "logps/rejected": -235.6261444091797, - "loss": 11238.1625, - "rewards/accuracies": 0.731249988079071, - "rewards/chosen": -0.4845482409000397, - "rewards/margins": 0.1882091611623764, - "rewards/rejected": -0.6727573871612549, - "rewards/safe_rewards": -0.48699751496315, - "rewards/unsafe_rewards": -0.48209887742996216, + "epoch": 0.44, + "learning_rate": 3.43365162579338e-07, + "logits/chosen": 0.11586692184209824, + "logits/rejected": 0.49579864740371704, + "logps/chosen": -226.8084716796875, + "logps/rejected": -232.3746337890625, + "loss": 5837.0383, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.551177442073822, + "rewards/margins": 0.19108565151691437, + "rewards/rejected": -0.7422630190849304, + "rewards/safe_rewards": -0.5533746480941772, + "rewards/unsafe_rewards": -0.5072416663169861, "step": 830 }, { "epoch": 0.45, - "learning_rate": 3.338353574075381e-07, - "logits/chosen": -2.319687843322754, - "logits/rejected": -2.2427151203155518, - "logps/chosen": -229.2741241455078, - "logps/rejected": -230.6369171142578, - "loss": 13030.1109, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.5387877225875854, - "rewards/margins": 0.1425672322511673, - "rewards/rejected": -0.6813548803329468, - "rewards/safe_rewards": -0.5679432153701782, - "rewards/unsafe_rewards": -0.5096321702003479, + "learning_rate": 3.390510155998023e-07, + "logits/chosen": 0.24915654957294464, + "logits/rejected": 0.6536698341369629, + "logps/chosen": -277.9824523925781, + "logps/rejected": -249.2000732421875, + "loss": 5721.2586, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.648623526096344, + "rewards/margins": 0.12514245510101318, + "rewards/rejected": -0.7737659811973572, + "rewards/safe_rewards": -0.7092838287353516, + "rewards/unsafe_rewards": -0.6900613903999329, "step": 840 }, { - "epoch": 0.46, - "learning_rate": 3.2939545472578314e-07, - "logits/chosen": -2.3902430534362793, - "logits/rejected": -2.1937272548675537, - "logps/chosen": -279.47705078125, - "logps/rejected": -250.4775848388672, - "loss": 11581.1313, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.5328287482261658, - "rewards/margins": 0.13031499087810516, - "rewards/rejected": -0.6631438136100769, - "rewards/safe_rewards": -0.5133041143417358, - "rewards/unsafe_rewards": -0.5523533821105957, + "epoch": 0.45, + "learning_rate": 3.347062781055526e-07, + "logits/chosen": 0.5860965847969055, + "logits/rejected": 0.9803635478019714, + "logps/chosen": -245.1415252685547, + "logps/rejected": -272.01080322265625, + "loss": 5834.2676, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.6521397829055786, + "rewards/margins": 0.21285566687583923, + "rewards/rejected": -0.8649954795837402, + "rewards/safe_rewards": -0.6472452878952026, + "rewards/unsafe_rewards": -0.6902757883071899, "step": 850 }, { "epoch": 0.46, - "learning_rate": 3.2492752288532916e-07, - "logits/chosen": -2.325867176055908, - "logits/rejected": -2.1522111892700195, - "logps/chosen": -253.1811981201172, - "logps/rejected": -233.44174194335938, - "loss": 11311.8109, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.4949401319026947, - "rewards/margins": 0.15677447617053986, - "rewards/rejected": -0.6517146825790405, - "rewards/safe_rewards": -0.47377505898475647, - "rewards/unsafe_rewards": -0.5161052942276001, + "learning_rate": 3.303324425866559e-07, + "logits/chosen": 0.6316410303115845, + "logits/rejected": 0.902866005897522, + "logps/chosen": -291.68597412109375, + "logps/rejected": -266.18585205078125, + "loss": 5964.1836, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.6263974905014038, + "rewards/margins": 0.17340168356895447, + "rewards/rejected": -0.7997991442680359, + "rewards/safe_rewards": -0.6621179580688477, + "rewards/unsafe_rewards": -0.6091993451118469, "step": 860 }, { - "epoch": 0.47, - "learning_rate": 3.204331392103574e-07, - "logits/chosen": -2.337698459625244, - "logits/rejected": -2.1352732181549072, - "logps/chosen": -268.2446594238281, - "logps/rejected": -234.0172882080078, - "loss": 11545.3641, - "rewards/accuracies": 0.71875, - "rewards/chosen": -0.5469434261322021, - "rewards/margins": 0.15391257405281067, - "rewards/rejected": -0.7008560299873352, - "rewards/safe_rewards": -0.5454758405685425, - "rewards/unsafe_rewards": -0.5484111905097961, + "epoch": 0.46, + "learning_rate": 3.2593101152883795e-07, + "logits/chosen": 0.6831669211387634, + "logits/rejected": 0.9902046918869019, + "logps/chosen": -256.2884521484375, + "logps/rejected": -279.5752868652344, + "loss": 5961.9836, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.6823039054870605, + "rewards/margins": 0.17010322213172913, + "rewards/rejected": -0.8524071574211121, + "rewards/safe_rewards": -0.6452068090438843, + "rewards/unsafe_rewards": -0.7062270641326904, "step": 870 }, { "epoch": 0.47, - "learning_rate": 3.159138903634006e-07, - "logits/chosen": -2.1978166103363037, - "logits/rejected": -2.0318470001220703, - "logps/chosen": -280.46319580078125, - "logps/rejected": -256.2702941894531, - "loss": 11248.9852, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.7179843187332153, - "rewards/margins": 0.11917723715305328, - "rewards/rejected": -0.837161660194397, - "rewards/safe_rewards": -0.7153393030166626, - "rewards/unsafe_rewards": -0.7206293940544128, + "learning_rate": 3.21503496897354e-07, + "logits/chosen": 0.48068660497665405, + "logits/rejected": 0.952492892742157, + "logps/chosen": -289.909423828125, + "logps/rejected": -262.1679992675781, + "loss": 6021.2465, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7239787578582764, + "rewards/margins": 0.12146921455860138, + "rewards/rejected": -0.8454478979110718, + "rewards/safe_rewards": -0.7816897630691528, + "rewards/unsafe_rewards": -0.7392334938049316, "step": 880 }, { - "epoch": 0.48, - "learning_rate": 3.1137137178519977e-07, - "logits/chosen": -2.1384453773498535, - "logits/rejected": -2.003512382507324, - "logps/chosen": -253.1010284423828, - "logps/rejected": -262.6402893066406, - "loss": 11450.3414, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.7104918956756592, - "rewards/margins": 0.18712057173252106, - "rewards/rejected": -0.897612452507019, - "rewards/safe_rewards": -0.6939632296562195, - "rewards/unsafe_rewards": -0.7270206212997437, + "epoch": 0.47, + "learning_rate": 3.170514196176037e-07, + "logits/chosen": 0.28930729627609253, + "logits/rejected": 0.6634337902069092, + "logps/chosen": -267.9020080566406, + "logps/rejected": -267.813720703125, + "loss": 5325.9504, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.6826976537704468, + "rewards/margins": 0.18379981815814972, + "rewards/rejected": -0.8664973974227905, + "rewards/safe_rewards": -0.6970924139022827, + "rewards/unsafe_rewards": -0.6835001111030579, "step": 890 }, { "epoch": 0.48, - "learning_rate": 3.068071871314626e-07, - "logits/chosen": -2.135709524154663, - "logits/rejected": -2.028604507446289, - "logps/chosen": -242.0713653564453, - "logps/rejected": -236.3900909423828, - "loss": 10862.3484, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.6334865689277649, - "rewards/margins": 0.13563254475593567, - "rewards/rejected": -0.7691190838813782, - "rewards/safe_rewards": -0.63458251953125, - "rewards/unsafe_rewards": -0.6323906183242798, + "learning_rate": 3.125763090526674e-07, + "logits/chosen": 0.21367737650871277, + "logits/rejected": 0.6621453166007996, + "logps/chosen": -278.2737731933594, + "logps/rejected": -269.89404296875, + "loss": 5261.0746, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.6765376329421997, + "rewards/margins": 0.20078134536743164, + "rewards/rejected": -0.8773189783096313, + "rewards/safe_rewards": -0.6867783665657043, + "rewards/unsafe_rewards": -0.6920818090438843, "step": 900 }, { "epoch": 0.48, - "eval_logits/chosen": -1.9376695156097412, - "eval_logits/rejected": -1.7468173503875732, - "eval_logps/chosen": -218.71400451660156, - "eval_logps/rejected": -187.7608642578125, - "eval_loss": 4377.5634765625, - "eval_rewards/accuracies": 0.6195546984672546, - "eval_rewards/chosen": -0.8828346133232117, - "eval_rewards/margins": 0.07011755555868149, - "eval_rewards/rejected": -0.9529521465301514, - "eval_rewards/safe_rewards": -0.8774688839912415, - "eval_rewards/unsafe_rewards": -0.8777852058410645, - "eval_runtime": 994.9448, - "eval_samples_per_second": 33.212, - "eval_steps_per_second": 1.038, - "step": 900 - }, - { - "epoch": 0.49, - "learning_rate": 3.022229477067205e-07, - "logits/chosen": -2.1737711429595947, - "logits/rejected": -1.9998136758804321, - "logps/chosen": -283.02313232421875, - "logps/rejected": -255.57656860351562, - "loss": 10507.2695, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.6587027311325073, - "rewards/margins": 0.193914994597435, - "rewards/rejected": -0.8526177406311035, - "rewards/safe_rewards": -0.6458637714385986, - "rewards/unsafe_rewards": -0.6715416312217712, + "learning_rate": 3.080797024779447e-07, + "logits/chosen": 0.19137686491012573, + "logits/rejected": 0.7889005541801453, + "logps/chosen": -253.41421508789062, + "logps/rejected": -236.6729278564453, + "loss": 5719.0418, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.6732780933380127, + "rewards/margins": 0.19284026324748993, + "rewards/rejected": -0.866118311882019, + "rewards/safe_rewards": -0.7765754461288452, + "rewards/unsafe_rewards": -0.682191014289856, "step": 910 }, { "epoch": 0.49, - "learning_rate": 2.976202718954869e-07, - "logits/chosen": -2.183156728744507, - "logits/rejected": -1.9853588342666626, - "logps/chosen": -282.3530578613281, - "logps/rejected": -271.27069091796875, - "loss": 11861.3852, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.7082624435424805, - "rewards/margins": 0.18091826140880585, - "rewards/rejected": -0.8891807794570923, - "rewards/safe_rewards": -0.7145110368728638, - "rewards/unsafe_rewards": -0.7020138502120972, + "learning_rate": 3.035631445530743e-07, + "logits/chosen": 0.4879905581474304, + "logits/rejected": 0.9158290028572083, + "logps/chosen": -290.2519226074219, + "logps/rejected": -284.17071533203125, + "loss": 5561.2797, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.7149994969367981, + "rewards/margins": 0.19377604126930237, + "rewards/rejected": -0.9087755084037781, + "rewards/safe_rewards": -0.6696754693984985, + "rewards/unsafe_rewards": -0.6708149313926697, "step": 920 }, { - "epoch": 0.5, - "learning_rate": 2.930007845909146e-07, - "logits/chosen": -2.177462339401245, - "logits/rejected": -2.0282797813415527, - "logps/chosen": -268.3103942871094, - "logps/rejected": -266.30548095703125, - "loss": 11683.3781, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.7064036726951599, - "rewards/margins": 0.13596151769161224, - "rewards/rejected": -0.8423651456832886, - "rewards/safe_rewards": -0.7049628496170044, - "rewards/unsafe_rewards": -0.7078445553779602, + "epoch": 0.49, + "learning_rate": 2.9902818679131775e-07, + "logits/chosen": 0.3951093852519989, + "logits/rejected": 0.8302197456359863, + "logps/chosen": -271.294189453125, + "logps/rejected": -253.5810546875, + "loss": 5419.4855, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.7780183553695679, + "rewards/margins": 0.17024961113929749, + "rewards/rejected": -0.9482680559158325, + "rewards/safe_rewards": -0.7877544164657593, + "rewards/unsafe_rewards": -0.7789348363876343, "step": 930 }, { - "epoch": 0.51, - "learning_rate": 2.8836611662115634e-07, - "logits/chosen": -2.1535885334014893, - "logits/rejected": -1.9535331726074219, - "logps/chosen": -287.9471130371094, - "logps/rejected": -254.46157836914062, - "loss": 12032.6703, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.6779440641403198, - "rewards/margins": 0.1851780265569687, - "rewards/rejected": -0.8631221652030945, - "rewards/safe_rewards": -0.6615537405014038, - "rewards/unsafe_rewards": -0.6943344473838806, + "epoch": 0.5, + "learning_rate": 2.944763870265886e-07, + "logits/chosen": -0.13839875161647797, + "logits/rejected": 0.3581174314022064, + "logps/chosen": -272.4313659667969, + "logps/rejected": -267.915771484375, + "loss": 5453.8977, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6422435641288757, + "rewards/margins": 0.19745132327079773, + "rewards/rejected": -0.8396948575973511, + "rewards/safe_rewards": -0.6758723258972168, + "rewards/unsafe_rewards": -0.578320324420929, "step": 940 }, { - "epoch": 0.51, - "learning_rate": 2.8371790417362986e-07, - "logits/chosen": -2.1404051780700684, - "logits/rejected": -1.9921302795410156, - "logps/chosen": -250.58895874023438, - "logps/rejected": -252.75021362304688, - "loss": 12575.8664, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.6325485110282898, - "rewards/margins": 0.13314202427864075, - "rewards/rejected": -0.7656905651092529, - "rewards/safe_rewards": -0.615986168384552, - "rewards/unsafe_rewards": -0.6491108536720276, + "epoch": 0.5, + "learning_rate": 2.899093088783105e-07, + "logits/chosen": -0.06241287663578987, + "logits/rejected": 0.4015175700187683, + "logps/chosen": -294.8834533691406, + "logps/rejected": -279.0429382324219, + "loss": 5278.1754, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.6345726847648621, + "rewards/margins": 0.14065605401992798, + "rewards/rejected": -0.7752287983894348, + "rewards/safe_rewards": -0.6587311029434204, + "rewards/unsafe_rewards": -0.6476761102676392, "step": 950 }, { - "epoch": 0.52, - "learning_rate": 2.7905778821739056e-07, - "logits/chosen": -2.1147265434265137, - "logits/rejected": -1.9771724939346313, - "logps/chosen": -253.51351928710938, - "logps/rejected": -233.7639923095703, - "loss": 11670.4188, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.5872529745101929, - "rewards/margins": 0.1395425647497177, - "rewards/rejected": -0.7267955541610718, - "rewards/safe_rewards": -0.5655093193054199, - "rewards/unsafe_rewards": -0.6089966297149658, + "epoch": 0.51, + "learning_rate": 2.8532852121428733e-07, + "logits/chosen": -0.04936225712299347, + "logits/rejected": 0.38959282636642456, + "logps/chosen": -248.14639282226562, + "logps/rejected": -235.8994598388672, + "loss": 5653.668, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.5577735304832458, + "rewards/margins": 0.21775202453136444, + "rewards/rejected": -0.7755255699157715, + "rewards/safe_rewards": -0.55736243724823, + "rewards/unsafe_rewards": -0.5908164978027344, "step": 960 }, { - "epoch": 0.52, - "learning_rate": 2.74387413923817e-07, - "logits/chosen": -2.056212902069092, - "logits/rejected": -1.9303762912750244, - "logps/chosen": -294.0152893066406, - "logps/rejected": -272.24615478515625, - "loss": 11716.1664, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.6957898736000061, - "rewards/margins": 0.17264129221439362, - "rewards/rejected": -0.8684310913085938, - "rewards/safe_rewards": -0.688141405582428, - "rewards/unsafe_rewards": -0.7034383416175842, + "epoch": 0.51, + "learning_rate": 2.807355976117716e-07, + "logits/chosen": 0.11599000543355942, + "logits/rejected": 0.49212461709976196, + "logps/chosen": -284.78472900390625, + "logps/rejected": -265.7978515625, + "loss": 5924.3578, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.5290887355804443, + "rewards/margins": 0.22062186896800995, + "rewards/rejected": -0.7497105598449707, + "rewards/safe_rewards": -0.4509585499763489, + "rewards/unsafe_rewards": -0.5535848736763, "step": 970 }, { - "epoch": 0.53, - "learning_rate": 2.69708430085812e-07, - "logits/chosen": -2.0949666500091553, - "logits/rejected": -1.8358476161956787, - "logps/chosen": -303.7754821777344, - "logps/rejected": -279.3113708496094, - "loss": 12070.975, - "rewards/accuracies": 0.731249988079071, - "rewards/chosen": -0.8039963841438293, - "rewards/margins": 0.2226048707962036, - "rewards/rejected": -1.0266011953353882, - "rewards/safe_rewards": -0.7967602014541626, - "rewards/unsafe_rewards": -0.8112322688102722, + "epoch": 0.52, + "learning_rate": 2.761321158169134e-07, + "logits/chosen": -0.0665382593870163, + "logits/rejected": 0.4467547535896301, + "logps/chosen": -262.4479064941406, + "logps/rejected": -265.8846740722656, + "loss": 5391.7484, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.604932427406311, + "rewards/margins": 0.16624750196933746, + "rewards/rejected": -0.7711800336837769, + "rewards/safe_rewards": -0.570032000541687, + "rewards/unsafe_rewards": -0.6088122129440308, "step": 980 }, { "epoch": 0.53, - "learning_rate": 2.6502248853572504e-07, - "logits/chosen": -2.018751859664917, - "logits/rejected": -1.81708562374115, - "logps/chosen": -268.05535888671875, - "logps/rejected": -263.68487548828125, - "loss": 12117.2781, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.8013098835945129, - "rewards/margins": 0.21267572045326233, - "rewards/rejected": -1.013985514640808, - "rewards/safe_rewards": -0.7930777668952942, - "rewards/unsafe_rewards": -0.8095420002937317, + "learning_rate": 2.715196572027789e-07, + "logits/chosen": 0.15862391889095306, + "logits/rejected": 0.511070966720581, + "logps/chosen": -252.94137573242188, + "logps/rejected": -255.08187866210938, + "loss": 5628.2164, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.6333836913108826, + "rewards/margins": 0.20889365673065186, + "rewards/rejected": -0.8422773480415344, + "rewards/safe_rewards": -0.6369217038154602, + "rewards/unsafe_rewards": -0.6703649163246155, "step": 990 }, { - "epoch": 0.54, - "learning_rate": 2.6033124356220325e-07, - "logits/chosen": -2.040283441543579, - "logits/rejected": -1.8361635208129883, - "logps/chosen": -266.68841552734375, - "logps/rejected": -244.3878631591797, - "loss": 11242.6422, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.639050304889679, - "rewards/margins": 0.21101757884025574, - "rewards/rejected": -0.8500677943229675, - "rewards/safe_rewards": -0.6206936836242676, - "rewards/unsafe_rewards": -0.6574069261550903, + "epoch": 0.53, + "learning_rate": 2.6689980622612204e-07, + "logits/chosen": 0.08565627038478851, + "logits/rejected": 0.5222666263580322, + "logps/chosen": -255.2662811279297, + "logps/rejected": -253.49105834960938, + "loss": 5634.6316, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.6020347476005554, + "rewards/margins": 0.19342327117919922, + "rewards/rejected": -0.7954580187797546, + "rewards/safe_rewards": -0.6501786708831787, + "rewards/unsafe_rewards": -0.6461445093154907, + "step": 1000 + }, + { + "epoch": 0.53, + "eval_logits/chosen": 0.41202229261398315, + "eval_logits/rejected": 1.1542474031448364, + "eval_logps/chosen": -220.34913635253906, + "eval_logps/rejected": -189.61671447753906, + "eval_loss": 4507.89453125, + "eval_rewards/accuracies": 0.6151915788650513, + "eval_rewards/chosen": -0.799996018409729, + "eval_rewards/margins": 0.07484080642461777, + "eval_rewards/rejected": -0.874836802482605, + "eval_rewards/safe_rewards": -0.7885684370994568, + "eval_rewards/unsafe_rewards": -0.784635066986084, + "eval_runtime": 2353.482, + "eval_samples_per_second": 14.89, + "eval_steps_per_second": 0.466, "step": 1000 }, { "epoch": 0.54, - "learning_rate": 2.55636351326173e-07, - "logits/chosen": -2.074812412261963, - "logits/rejected": -1.9070053100585938, - "logps/chosen": -267.3968505859375, - "logps/rejected": -242.2158660888672, - "loss": 10413.1977, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.6132654547691345, - "rewards/margins": 0.21324153244495392, - "rewards/rejected": -0.8265069723129272, - "rewards/safe_rewards": -0.638529360294342, - "rewards/unsafe_rewards": -0.588001549243927, + "learning_rate": 2.622741498830969e-07, + "logits/chosen": 0.2431926727294922, + "logits/rejected": 0.40795207023620605, + "logps/chosen": -279.1517333984375, + "logps/rejected": -271.7449645996094, + "loss": 5872.2367, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.6438090801239014, + "rewards/margins": 0.17429831624031067, + "rewards/rejected": -0.8181073069572449, + "rewards/safe_rewards": -0.6910767555236816, + "rewards/unsafe_rewards": -0.6460915803909302, "step": 1010 }, { - "epoch": 0.55, - "learning_rate": 2.509394692761622e-07, - "logits/chosen": -2.1109747886657715, - "logits/rejected": -1.8711456060409546, - "logps/chosen": -266.0138244628906, - "logps/rejected": -246.04159545898438, - "loss": 11730.4734, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.6028767824172974, - "rewards/margins": 0.20766966044902802, - "rewards/rejected": -0.8105465173721313, - "rewards/safe_rewards": -0.5901221632957458, - "rewards/unsafe_rewards": -0.6156314611434937, + "epoch": 0.54, + "learning_rate": 2.5764427716409815e-07, + "logits/chosen": -0.09687475860118866, + "logits/rejected": 0.4301505982875824, + "logps/chosen": -272.0554504394531, + "logps/rejected": -255.6719207763672, + "loss": 5816.6723, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.5806029438972473, + "rewards/margins": 0.19818606972694397, + "rewards/rejected": -0.7787889838218689, + "rewards/safe_rewards": -0.5169692635536194, + "rewards/unsafe_rewards": -0.5289751291275024, "step": 1020 }, { "epoch": 0.55, - "learning_rate": 2.462422555631674e-07, - "logits/chosen": -2.073171854019165, - "logits/rejected": -1.8314094543457031, - "logps/chosen": -267.2673034667969, - "logps/rejected": -234.9337921142578, - "loss": 11384.8359, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.6479398012161255, - "rewards/margins": 0.20152795314788818, - "rewards/rejected": -0.8494676351547241, - "rewards/safe_rewards": -0.6472254991531372, - "rewards/unsafe_rewards": -0.6486541032791138, + "learning_rate": 2.5301177850791616e-07, + "logits/chosen": 0.01663217321038246, + "logits/rejected": 0.6527854204177856, + "logps/chosen": -290.3711853027344, + "logps/rejected": -268.1048278808594, + "loss": 5912.7102, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.6295832395553589, + "rewards/margins": 0.20760869979858398, + "rewards/rejected": -0.8371919393539429, + "rewards/safe_rewards": -0.642471432685852, + "rewards/unsafe_rewards": -0.6146708726882935, "step": 1030 }, { - "epoch": 0.56, - "learning_rate": 2.415463684552728e-07, - "logits/chosen": -2.0377871990203857, - "logits/rejected": -1.8571240901947021, - "logps/chosen": -264.3353576660156, - "logps/rejected": -250.5574493408203, - "loss": 11869.4391, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.6739367246627808, - "rewards/margins": 0.1584298014640808, - "rewards/rejected": -0.8323665857315063, - "rewards/safe_rewards": -0.6686100363731384, - "rewards/unsafe_rewards": -0.6792632341384888, + "epoch": 0.55, + "learning_rate": 2.4837824525539477e-07, + "logits/chosen": 0.17375509440898895, + "logits/rejected": 0.7390264272689819, + "logps/chosen": -270.261474609375, + "logps/rejected": -261.2465515136719, + "loss": 5659.6238, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.6727645993232727, + "rewards/margins": 0.17281220853328705, + "rewards/rejected": -0.8455768823623657, + "rewards/safe_rewards": -0.6424635052680969, + "rewards/unsafe_rewards": -0.6337414979934692, "step": 1040 }, { "epoch": 0.56, - "learning_rate": 2.3685346575222807e-07, - "logits/chosen": -2.111082077026367, - "logits/rejected": -1.8854366540908813, - "logps/chosen": -280.7798767089844, - "logps/rejected": -254.7810821533203, - "loss": 11278.0977, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.6307443976402283, - "rewards/margins": 0.16730864346027374, - "rewards/rejected": -0.7980531454086304, - "rewards/safe_rewards": -0.6317691802978516, - "rewards/unsafe_rewards": -0.6297196745872498, + "learning_rate": 2.4374526910277886e-07, + "logits/chosen": 0.13272862136363983, + "logits/rejected": 0.57741779088974, + "logps/chosen": -270.9297790527344, + "logps/rejected": -267.14471435546875, + "loss": 5861.1039, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.6448026895523071, + "rewards/margins": 0.2006601095199585, + "rewards/rejected": -0.8454626798629761, + "rewards/safe_rewards": -0.6065593361854553, + "rewards/unsafe_rewards": -0.6479047536849976, "step": 1050 }, { - "epoch": 0.57, - "learning_rate": 2.321652042001919e-07, - "logits/chosen": -2.124929189682007, - "logits/rejected": -1.8838306665420532, - "logps/chosen": -272.2227783203125, - "logps/rejected": -273.8869323730469, - "loss": 11306.2313, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.6101773381233215, - "rewards/margins": 0.20483005046844482, - "rewards/rejected": -0.8150073885917664, - "rewards/safe_rewards": -0.6046205759048462, - "rewards/unsafe_rewards": -0.6157340407371521, + "epoch": 0.56, + "learning_rate": 2.391144415549403e-07, + "logits/chosen": 0.2520432770252228, + "logits/rejected": 0.7386651039123535, + "logps/chosen": -256.0111389160156, + "logps/rejected": -244.1455535888672, + "loss": 5928.0605, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.6962358355522156, + "rewards/margins": 0.125870481133461, + "rewards/rejected": -0.8221063613891602, + "rewards/safe_rewards": -0.6803200244903564, + "rewards/unsafe_rewards": -0.6994472742080688, "step": 1060 }, { - "epoch": 0.58, - "learning_rate": 2.2748323890684662e-07, - "logits/chosen": -2.1761631965637207, - "logits/rejected": -1.8986377716064453, - "logps/chosen": -270.1111145019531, - "logps/rejected": -247.43576049804688, - "loss": 11255.1672, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.5767061710357666, - "rewards/margins": 0.2480432540178299, - "rewards/rejected": -0.8247492909431458, - "rewards/safe_rewards": -0.5869646072387695, - "rewards/unsafe_rewards": -0.5664476156234741, + "epoch": 0.57, + "learning_rate": 2.3448735337866919e-07, + "logits/chosen": 0.26303520798683167, + "logits/rejected": 0.7426208257675171, + "logps/chosen": -247.3863983154297, + "logps/rejected": -244.02392578125, + "loss": 5880.1039, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.6635211706161499, + "rewards/margins": 0.15260052680969238, + "rewards/rejected": -0.8161218762397766, + "rewards/safe_rewards": -0.706309974193573, + "rewards/unsafe_rewards": -0.6638337969779968, "step": 1070 }, { - "epoch": 0.58, - "learning_rate": 2.2280922275709213e-07, - "logits/chosen": -2.1585605144500732, - "logits/rejected": -2.006892681121826, - "logps/chosen": -272.51593017578125, - "logps/rejected": -259.193603515625, - "loss": 11610.1609, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.6135168671607971, - "rewards/margins": 0.16986827552318573, - "rewards/rejected": -0.783385157585144, - "rewards/safe_rewards": -0.5932785272598267, - "rewards/unsafe_rewards": -0.6337552666664124, + "epoch": 0.57, + "learning_rate": 2.2986559405621886e-07, + "logits/chosen": 0.030937856063246727, + "logits/rejected": 0.47169026732444763, + "logps/chosen": -279.0972595214844, + "logps/rejected": -268.9930725097656, + "loss": 5616.6, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.6163111925125122, + "rewards/margins": 0.16996563971042633, + "rewards/rejected": -0.7862768173217773, + "rewards/safe_rewards": -0.6654713749885559, + "rewards/unsafe_rewards": -0.6399198770523071, "step": 1080 }, { - "epoch": 0.59, - "learning_rate": 2.1814480582952375e-07, - "logits/chosen": -2.176574230194092, - "logits/rejected": -2.0007762908935547, - "logps/chosen": -264.99627685546875, - "logps/rejected": -260.7152099609375, - "loss": 11244.9641, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.6294008493423462, - "rewards/margins": 0.1786467581987381, - "rewards/rejected": -0.8080476522445679, - "rewards/safe_rewards": -0.610293984413147, - "rewards/unsafe_rewards": -0.6485077738761902, + "epoch": 0.58, + "learning_rate": 2.2525075123929213e-07, + "logits/chosen": 0.43386760354042053, + "logits/rejected": 0.7538164258003235, + "logps/chosen": -267.44134521484375, + "logps/rejected": -258.99249267578125, + "loss": 5716.7879, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.6649960279464722, + "rewards/margins": 0.22522863745689392, + "rewards/rejected": -0.890224814414978, + "rewards/safe_rewards": -0.6375536322593689, + "rewards/unsafe_rewards": -0.6348733901977539, "step": 1090 }, { - "epoch": 0.59, - "learning_rate": 2.1349163481390187e-07, - "logits/chosen": -2.1403450965881348, - "logits/rejected": -1.9742145538330078, - "logps/chosen": -258.41851806640625, - "logps/rejected": -253.95596313476562, - "loss": 11432.9805, + "epoch": 0.58, + "learning_rate": 2.206444102036565e-07, + "logits/chosen": 0.6684126257896423, + "logits/rejected": 0.9879862666130066, + "logps/chosen": -267.1449279785156, + "logps/rejected": -270.4283752441406, + "loss": 5974.3918, "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.579275369644165, - "rewards/margins": 0.1978916972875595, - "rewards/rejected": -0.7771671414375305, - "rewards/safe_rewards": -0.5507979393005371, - "rewards/unsafe_rewards": -0.6077528595924377, + "rewards/chosen": -0.74274742603302, + "rewards/margins": 0.15645694732666016, + "rewards/rejected": -0.899204432964325, + "rewards/safe_rewards": -0.7267962694168091, + "rewards/unsafe_rewards": -0.6818505525588989, "step": 1100 }, { - "epoch": 0.6, - "learning_rate": 2.0885135242981647e-07, - "logits/chosen": -2.110644817352295, - "logits/rejected": -1.8921115398406982, - "logps/chosen": -295.12310791015625, - "logps/rejected": -242.7633056640625, - "loss": 10866.6008, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.6292051672935486, - "rewards/margins": 0.1930016279220581, - "rewards/rejected": -0.8222068548202515, - "rewards/safe_rewards": -0.6636418104171753, - "rewards/unsafe_rewards": -0.5947686433792114, + "epoch": 0.59, + "learning_rate": 2.160481533045751e-07, + "logits/chosen": 0.4061971604824066, + "logits/rejected": 0.9739459753036499, + "logps/chosen": -285.2103271484375, + "logps/rejected": -266.5544128417969, + "loss": 5749.7781, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.7457272410392761, + "rewards/margins": 0.2004440277814865, + "rewards/rejected": -0.9461711645126343, + "rewards/safe_rewards": -0.7860220670700073, + "rewards/unsafe_rewards": -0.7390663623809814, "step": 1110 }, { - "epoch": 0.6, - "learning_rate": 2.0422559684675494e-07, - "logits/chosen": -2.07668137550354, - "logits/rejected": -1.870774269104004, - "logps/chosen": -279.4585876464844, - "logps/rejected": -252.98422241210938, - "loss": 11123.7453, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.622269868850708, - "rewards/margins": 0.18196968734264374, - "rewards/rejected": -0.8042396306991577, - "rewards/safe_rewards": -0.6149351596832275, - "rewards/unsafe_rewards": -0.629604697227478, + "epoch": 0.59, + "learning_rate": 2.1146355943324148e-07, + "logits/chosen": 0.48321422934532166, + "logits/rejected": 0.9058516621589661, + "logps/chosen": -271.53924560546875, + "logps/rejected": -259.0006103515625, + "loss": 5805.548, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.7600331902503967, + "rewards/margins": 0.13751891255378723, + "rewards/rejected": -0.8975521326065063, + "rewards/safe_rewards": -0.7516414523124695, + "rewards/unsafe_rewards": -0.7484757304191589, "step": 1120 }, { - "epoch": 0.61, - "learning_rate": 1.9961600110577457e-07, - "logits/chosen": -2.0477161407470703, - "logits/rejected": -1.8454376459121704, - "logps/chosen": -281.2010803222656, - "logps/rejected": -276.59503173828125, - "loss": 11451.4273, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.7229688763618469, - "rewards/margins": 0.15784484148025513, - "rewards/rejected": -0.880813717842102, - "rewards/safe_rewards": -0.6920644640922546, - "rewards/unsafe_rewards": -0.7538732290267944, + "epoch": 0.6, + "learning_rate": 2.0689220347440374e-07, + "logits/chosen": 0.1501261442899704, + "logits/rejected": 0.688166618347168, + "logps/chosen": -301.4822082519531, + "logps/rejected": -273.8033447265625, + "loss": 5622.9852, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.6868051290512085, + "rewards/margins": 0.17512689530849457, + "rewards/rejected": -0.8619319796562195, + "rewards/safe_rewards": -0.6461024284362793, + "rewards/unsafe_rewards": -0.6649470329284668, "step": 1130 }, { - "epoch": 0.61, - "learning_rate": 1.950241925429867e-07, - "logits/chosen": -2.1420865058898926, - "logits/rejected": -1.9303150177001953, - "logps/chosen": -264.2613830566406, - "logps/rejected": -247.3074493408203, - "loss": 11185.8914, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.6415218114852905, - "rewards/margins": 0.21913623809814453, - "rewards/rejected": -0.8606580495834351, - "rewards/safe_rewards": -0.6352616548538208, - "rewards/unsafe_rewards": -0.647782027721405, + "epoch": 0.6, + "learning_rate": 2.0233565576536564e-07, + "logits/chosen": 0.05991173908114433, + "logits/rejected": 0.42331352829933167, + "logps/chosen": -294.298095703125, + "logps/rejected": -287.5555419921875, + "loss": 5822.3992, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.7161829471588135, + "rewards/margins": 0.13876894116401672, + "rewards/rejected": -0.8549518585205078, + "rewards/safe_rewards": -0.7057495713233948, + "rewards/unsafe_rewards": -0.6698770523071289, "step": 1140 }, { - "epoch": 0.62, - "learning_rate": 1.9045179221505495e-07, - "logits/chosen": -2.1051979064941406, - "logits/rejected": -1.9602015018463135, - "logps/chosen": -292.8426513671875, - "logps/rejected": -272.5882568359375, - "loss": 10919.0359, - "rewards/accuracies": 0.7437499761581421, - "rewards/chosen": -0.6359490156173706, - "rewards/margins": 0.21196213364601135, - "rewards/rejected": -0.8479111790657043, - "rewards/safe_rewards": -0.605754554271698, - "rewards/unsafe_rewards": -0.666143536567688, + "epoch": 0.61, + "learning_rate": 1.97795481556549e-07, + "logits/chosen": -0.03588150069117546, + "logits/rejected": 0.400505006313324, + "logps/chosen": -277.2012023925781, + "logps/rejected": -247.14804077148438, + "loss": 5935.0914, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.6964778304100037, + "rewards/margins": 0.17653243243694305, + "rewards/rejected": -0.8730102777481079, + "rewards/safe_rewards": -0.6869702339172363, + "rewards/unsafe_rewards": -0.6601093411445618, "step": 1150 }, { "epoch": 0.62, - "learning_rate": 1.8590041432690893e-07, - "logits/chosen": -2.0412395000457764, - "logits/rejected": -1.9340324401855469, - "logps/chosen": -256.9598693847656, - "logps/rejected": -250.4047393798828, - "loss": 11277.7938, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.6930029988288879, - "rewards/margins": 0.13424697518348694, - "rewards/rejected": -0.8272498846054077, - "rewards/safe_rewards": -0.6941550970077515, - "rewards/unsafe_rewards": -0.6918508410453796, + "learning_rate": 1.9327324047380422e-07, + "logits/chosen": -0.08701475709676743, + "logits/rejected": 0.4873865246772766, + "logps/chosen": -263.2158203125, + "logps/rejected": -258.84039306640625, + "loss": 5564.0863, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.6252955198287964, + "rewards/margins": 0.22415871918201447, + "rewards/rejected": -0.8494542241096497, + "rewards/safe_rewards": -0.6420432329177856, + "rewards/unsafe_rewards": -0.6124902963638306, "step": 1160 }, { - "epoch": 0.63, - "learning_rate": 1.813716656618788e-07, - "logits/chosen": -2.0102741718292236, - "logits/rejected": -1.9039026498794556, - "logps/chosen": -259.2917175292969, - "logps/rejected": -252.873046875, - "loss": 11988.2266, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.7098476886749268, - "rewards/margins": 0.1709776222705841, - "rewards/rejected": -0.8808252215385437, - "rewards/safe_rewards": -0.7011247873306274, - "rewards/unsafe_rewards": -0.7185705304145813, + "epoch": 0.62, + "learning_rate": 1.887704859826528e-07, + "logits/chosen": 0.07522957026958466, + "logits/rejected": 0.3329767882823944, + "logps/chosen": -285.8026123046875, + "logps/rejected": -266.8732604980469, + "loss": 5750.982, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.6510334014892578, + "rewards/margins": 0.10930682718753815, + "rewards/rejected": -0.7603402137756348, + "rewards/safe_rewards": -0.6223952174186707, + "rewards/unsafe_rewards": -0.6682702302932739, "step": 1170 }, { "epoch": 0.63, - "learning_rate": 1.7686714501444788e-07, - "logits/chosen": -2.084367275238037, - "logits/rejected": -1.8202540874481201, - "logps/chosen": -288.35626220703125, - "logps/rejected": -254.33651733398438, - "loss": 11612.3062, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.7599023580551147, - "rewards/margins": 0.16640828549861908, - "rewards/rejected": -0.926310658454895, - "rewards/safe_rewards": -0.7765257954597473, - "rewards/unsafe_rewards": -0.7432790994644165, + "learning_rate": 1.8428876485464572e-07, + "logits/chosen": -0.15613001585006714, + "logits/rejected": 0.41360145807266235, + "logps/chosen": -238.16897583007812, + "logps/rejected": -225.97802734375, + "loss": 5979.2156, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.5804222822189331, + "rewards/margins": 0.1743427962064743, + "rewards/rejected": -0.7547650933265686, + "rewards/safe_rewards": -0.5962327718734741, + "rewards/unsafe_rewards": -0.6777797341346741, "step": 1180 }, { - "epoch": 0.64, - "learning_rate": 1.7238844262582768e-07, - "logits/chosen": -2.052704095840454, - "logits/rejected": -1.9864280223846436, - "logps/chosen": -275.9404602050781, - "logps/rejected": -276.5259094238281, - "loss": 10682.7672, - "rewards/accuracies": 0.6937500238418579, - "rewards/chosen": -0.6891142129898071, - "rewards/margins": 0.15954537689685822, - "rewards/rejected": -0.8486596345901489, - "rewards/safe_rewards": -0.6521228551864624, - "rewards/unsafe_rewards": -0.7261057496070862, + "epoch": 0.63, + "learning_rate": 1.798296166360216e-07, + "logits/chosen": -0.029682714492082596, + "logits/rejected": 0.5113533139228821, + "logps/chosen": -290.142822265625, + "logps/rejected": -269.4226989746094, + "loss": 6057.1922, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6241404414176941, + "rewards/margins": 0.1994599997997284, + "rewards/rejected": -0.8236004114151001, + "rewards/safe_rewards": -0.6254442930221558, + "rewards/unsafe_rewards": -0.6271675229072571, "step": 1190 }, { - "epoch": 0.65, - "learning_rate": 1.679371396225504e-07, - "logits/chosen": -2.0941474437713623, - "logits/rejected": -1.8897063732147217, - "logps/chosen": -271.2891540527344, - "logps/rejected": -270.78924560546875, - "loss": 11671.4219, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.7015270590782166, - "rewards/margins": 0.19374321401119232, - "rewards/rejected": -0.8952702283859253, - "rewards/safe_rewards": -0.6884527802467346, - "rewards/unsafe_rewards": -0.7146013379096985, - "step": 1200 - }, - { - "epoch": 0.65, - "eval_logits/chosen": -1.8665262460708618, - "eval_logits/rejected": -1.6740189790725708, - "eval_logps/chosen": -228.5369415283203, - "eval_logps/rejected": -197.5587921142578, - "eval_loss": 4346.4052734375, - "eval_rewards/accuracies": 0.6158034801483154, - "eval_rewards/chosen": -0.9810636043548584, - "eval_rewards/margins": 0.06986771523952484, - "eval_rewards/rejected": -1.0509313344955444, - "eval_rewards/safe_rewards": -0.9763943552970886, - "eval_rewards/unsafe_rewards": -0.9767957329750061, - "eval_runtime": 992.8374, - "eval_samples_per_second": 33.282, - "eval_steps_per_second": 1.04, + "epoch": 0.64, + "learning_rate": 1.7539457311884675e-07, + "logits/chosen": 0.1500866711139679, + "logits/rejected": 0.5680428743362427, + "logps/chosen": -262.3311462402344, + "logps/rejected": -251.67489624023438, + "loss": 5421.8398, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.6509288549423218, + "rewards/margins": 0.2198909968137741, + "rewards/rejected": -0.8708198666572571, + "rewards/safe_rewards": -0.6651867032051086, + "rewards/unsafe_rewards": -0.6189877390861511, "step": 1200 }, { - "epoch": 0.65, - "learning_rate": 1.6351480745828096e-07, - "logits/chosen": -2.1039538383483887, - "logits/rejected": -1.9394419193267822, - "logps/chosen": -265.77984619140625, - "logps/rejected": -253.25137329101562, - "loss": 10276.6594, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.6483036279678345, - "rewards/margins": 0.1812000572681427, - "rewards/rejected": -0.8295037150382996, - "rewards/safe_rewards": -0.6423134803771973, - "rewards/unsafe_rewards": -0.6542937755584717, + "epoch": 0.64, + "learning_rate": 1.7098515781481883e-07, + "logits/chosen": 0.4903317987918854, + "logits/rejected": 0.883372962474823, + "logps/chosen": -272.56097412109375, + "logps/rejected": -241.92919921875, + "loss": 5678.3117, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.6993108987808228, + "rewards/margins": 0.11801446974277496, + "rewards/rejected": -0.8173252940177917, + "rewards/safe_rewards": -0.6638237237930298, + "rewards/unsafe_rewards": -0.6766722202301025, "step": 1210 }, { - "epoch": 0.66, - "learning_rate": 1.5912300735904248e-07, - "logits/chosen": -2.1374449729919434, - "logits/rejected": -1.983070969581604, - "logps/chosen": -290.1751403808594, - "logps/rejected": -259.19097900390625, - "loss": 11657.7609, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.6737682223320007, - "rewards/margins": 0.14703810214996338, - "rewards/rejected": -0.8208063244819641, - "rewards/safe_rewards": -0.6786805987358093, - "rewards/unsafe_rewards": -0.6688558459281921, + "epoch": 0.65, + "learning_rate": 1.6660288543191568e-07, + "logits/chosen": 0.20008230209350586, + "logits/rejected": 1.072401523590088, + "logps/chosen": -292.7231140136719, + "logps/rejected": -264.1849365234375, + "loss": 5411.0453, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.6634177565574646, + "rewards/margins": 0.19502988457679749, + "rewards/rejected": -0.8584476709365845, + "rewards/safe_rewards": -0.7102524638175964, + "rewards/unsafe_rewards": -0.6833497285842896, "step": 1220 }, { - "epoch": 0.66, - "learning_rate": 1.5476328977205395e-07, - "logits/chosen": -2.1445086002349854, - "logits/rejected": -1.9767332077026367, - "logps/chosen": -271.8307189941406, - "logps/rejected": -251.6882781982422, - "loss": 11008.7156, - "rewards/accuracies": 0.6937500238418579, - "rewards/chosen": -0.6544319987297058, - "rewards/margins": 0.21836614608764648, - "rewards/rejected": -0.8727981448173523, - "rewards/safe_rewards": -0.6966055631637573, - "rewards/unsafe_rewards": -0.6122584342956543, + "epoch": 0.65, + "learning_rate": 1.6224926135406693e-07, + "logits/chosen": 0.4110666811466217, + "logits/rejected": 0.9241645932197571, + "logps/chosen": -291.5517272949219, + "logps/rejected": -268.79437255859375, + "loss": 5535.6395, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.6780111193656921, + "rewards/margins": 0.2115507870912552, + "rewards/rejected": -0.8895619511604309, + "rewards/safe_rewards": -0.6748231053352356, + "rewards/unsafe_rewards": -0.7003692984580994, "step": 1230 }, { - "epoch": 0.67, - "learning_rate": 1.5043719381837112e-07, - "logits/chosen": -2.08951473236084, - "logits/rejected": -1.9547208547592163, - "logps/chosen": -287.0835266113281, - "logps/rejected": -271.43670654296875, - "loss": 11362.9422, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.6643306612968445, - "rewards/margins": 0.17835856974124908, - "rewards/rejected": -0.8426891565322876, - "rewards/safe_rewards": -0.6574736833572388, - "rewards/unsafe_rewards": -0.6711875200271606, + "epoch": 0.66, + "learning_rate": 1.579257811240298e-07, + "logits/chosen": 0.17879924178123474, + "logits/rejected": 0.82609623670578, + "logps/chosen": -283.47686767578125, + "logps/rejected": -269.6540832519531, + "loss": 5427.3156, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.7036404609680176, + "rewards/margins": 0.14344856142997742, + "rewards/rejected": -0.8470889925956726, + "rewards/safe_rewards": -0.6846009492874146, + "rewards/unsafe_rewards": -0.6783186197280884, "step": 1240 }, { - "epoch": 0.67, - "learning_rate": 1.461462467495284e-07, - "logits/chosen": -2.0471625328063965, - "logits/rejected": -1.8923746347427368, - "logps/chosen": -252.0188446044922, - "logps/rejected": -260.54522705078125, - "loss": 10503.6422, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.7411776185035706, - "rewards/margins": 0.21499836444854736, - "rewards/rejected": -0.9561759829521179, - "rewards/safe_rewards": -0.7554097175598145, - "rewards/unsafe_rewards": -0.7269455194473267, + "epoch": 0.66, + "learning_rate": 1.5363392992964523e-07, + "logits/chosen": 0.4139084815979004, + "logits/rejected": 0.7215920686721802, + "logps/chosen": -257.33319091796875, + "logps/rejected": -258.1666564941406, + "loss": 5595.8969, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.7196224927902222, + "rewards/margins": 0.11075691878795624, + "rewards/rejected": -0.8303793668746948, + "rewards/safe_rewards": -0.7594167590141296, + "rewards/unsafe_rewards": -0.7032173275947571, "step": 1250 }, { - "epoch": 0.68, - "learning_rate": 1.4189196340836865e-07, - "logits/chosen": -2.1763851642608643, - "logits/rejected": -1.9493157863616943, - "logps/chosen": -269.4758605957031, - "logps/rejected": -258.123291015625, - "loss": 10853.5781, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.7069443464279175, - "rewards/margins": 0.17843110859394073, - "rewards/rejected": -0.8853754997253418, - "rewards/safe_rewards": -0.7389132380485535, - "rewards/unsafe_rewards": -0.6749754548072815, + "epoch": 0.67, + "learning_rate": 1.4937518209365108e-07, + "logits/chosen": 0.2804068922996521, + "logits/rejected": 0.7492934465408325, + "logps/chosen": -299.9917297363281, + "logps/rejected": -274.86566162109375, + "loss": 5485.5156, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.6413429975509644, + "rewards/margins": 0.18771231174468994, + "rewards/rejected": -0.8290553092956543, + "rewards/safe_rewards": -0.6320935487747192, + "rewards/unsafe_rewards": -0.6288415789604187, "step": 1260 }, { - "epoch": 0.68, - "learning_rate": 1.3767584569425561e-07, - "logits/chosen": -2.19474458694458, - "logits/rejected": -1.9729722738265991, - "logps/chosen": -277.60992431640625, - "logps/rejected": -258.0458984375, - "loss": 11364.5312, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.6931368708610535, - "rewards/margins": 0.18750238418579102, - "rewards/rejected": -0.8806392550468445, - "rewards/safe_rewards": -0.7000595927238464, - "rewards/unsafe_rewards": -0.6862143278121948, + "epoch": 0.67, + "learning_rate": 1.4515100056722708e-07, + "logits/chosen": 0.49235549569129944, + "logits/rejected": 0.896806538105011, + "logps/chosen": -250.7898712158203, + "logps/rejected": -248.735107421875, + "loss": 5635.8461, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.6494947671890259, + "rewards/margins": 0.2068520337343216, + "rewards/rejected": -0.8563467860221863, + "rewards/safe_rewards": -0.6947168707847595, + "rewards/unsafe_rewards": -0.6628744602203369, "step": 1270 }, { - "epoch": 0.69, - "learning_rate": 1.334993820328541e-07, - "logits/chosen": -2.0907111167907715, - "logits/rejected": -1.9239038228988647, - "logps/chosen": -246.83642578125, - "logps/rejected": -245.0505828857422, - "loss": 10652.2328, - "rewards/accuracies": 0.7437499761581421, - "rewards/chosen": -0.7036882638931274, - "rewards/margins": 0.24492530524730682, - "rewards/rejected": -0.9486135244369507, - "rewards/safe_rewards": -0.6736655235290527, - "rewards/unsafe_rewards": -0.7337108850479126, + "epoch": 0.68, + "learning_rate": 1.4096283642744716e-07, + "logits/chosen": 0.564648449420929, + "logits/rejected": 1.1666864156723022, + "logps/chosen": -287.2496337890625, + "logps/rejected": -269.12689208984375, + "loss": 5744.0652, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.6512799263000488, + "rewards/margins": 0.23767797648906708, + "rewards/rejected": -0.8889577984809875, + "rewards/safe_rewards": -0.6507743000984192, + "rewards/unsafe_rewards": -0.6260145306587219, "step": 1280 }, { - "epoch": 0.69, - "learning_rate": 1.2936404685066852e-07, - "logits/chosen": -2.0505154132843018, - "logits/rejected": -1.9092705249786377, - "logps/chosen": -286.58990478515625, - "logps/rejected": -280.3271789550781, - "loss": 11526.9, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.68525230884552, - "rewards/margins": 0.18679992854595184, - "rewards/rejected": -0.8720521926879883, - "rewards/safe_rewards": -0.7125786542892456, - "rewards/unsafe_rewards": -0.6579257249832153, + "epoch": 0.68, + "learning_rate": 1.3681212837880977e-07, + "logits/chosen": 0.3310979902744293, + "logits/rejected": 0.946731686592102, + "logps/chosen": -283.14178466796875, + "logps/rejected": -268.6293029785156, + "loss": 5538.1773, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.6541503667831421, + "rewards/margins": 0.20235121250152588, + "rewards/rejected": -0.856501579284668, + "rewards/safe_rewards": -0.7126244902610779, + "rewards/unsafe_rewards": -0.6116858124732971, "step": 1290 }, { - "epoch": 0.7, - "learning_rate": 1.252713000545221e-07, - "logits/chosen": -2.2164149284362793, - "logits/rejected": -2.019528388977051, - "logps/chosen": -282.0648498535156, - "logps/rejected": -258.7922058105469, - "loss": 10187.7117, - "rewards/accuracies": 0.6937500238418579, - "rewards/chosen": -0.6330300569534302, - "rewards/margins": 0.23740728199481964, - "rewards/rejected": -0.8704373240470886, - "rewards/safe_rewards": -0.6281386613845825, - "rewards/unsafe_rewards": -0.6379214525222778, + "epoch": 0.69, + "learning_rate": 1.3270030225901908e-07, + "logits/chosen": 0.21446232497692108, + "logits/rejected": 0.9988247156143188, + "logps/chosen": -311.952392578125, + "logps/rejected": -264.99005126953125, + "loss": 5863.9875, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.6609299778938293, + "rewards/margins": 0.20790867507457733, + "rewards/rejected": -0.8688386678695679, + "rewards/safe_rewards": -0.6820018291473389, + "rewards/unsafe_rewards": -0.6768487691879272, "step": 1300 }, { "epoch": 0.7, - "learning_rate": 1.2122258651616304e-07, - "logits/chosen": -2.183960437774658, - "logits/rejected": -1.976144790649414, - "logps/chosen": -275.6617126464844, - "logps/rejected": -244.86434936523438, - "loss": 10813.9688, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.6538507342338562, - "rewards/margins": 0.17538292706012726, - "rewards/rejected": -0.8292337656021118, - "rewards/safe_rewards": -0.6337584853172302, - "rewards/unsafe_rewards": -0.673943042755127, + "learning_rate": 1.2862877054918572e-07, + "logits/chosen": 0.43877673149108887, + "logits/rejected": 0.7122836112976074, + "logps/chosen": -263.78924560546875, + "logps/rejected": -267.306884765625, + "loss": 5915.4555, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.6279779672622681, + "rewards/margins": 0.19203224778175354, + "rewards/rejected": -0.8200103044509888, + "rewards/safe_rewards": -0.5540001392364502, + "rewards/unsafe_rewards": -0.6103017926216125, "step": 1310 }, { - "epoch": 0.71, - "learning_rate": 1.1721933556217792e-07, - "logits/chosen": -2.167842388153076, - "logits/rejected": -2.0031533241271973, - "logps/chosen": -265.863525390625, - "logps/rejected": -261.1900329589844, - "loss": 11171.5523, - "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.6531133651733398, - "rewards/margins": 0.19861795008182526, - "rewards/rejected": -0.8517313003540039, - "rewards/safe_rewards": -0.6670490503311157, - "rewards/unsafe_rewards": -0.639177680015564, + "epoch": 0.7, + "learning_rate": 1.2459893188861613e-07, + "logits/chosen": 0.11050845682621002, + "logits/rejected": 0.638201117515564, + "logps/chosen": -230.92892456054688, + "logps/rejected": -223.246826171875, + "loss": 5522.6379, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.5677499771118164, + "rewards/margins": 0.1929033249616623, + "rewards/rejected": -0.7606532573699951, + "rewards/safe_rewards": -0.6029695272445679, + "rewards/unsafe_rewards": -0.6227617859840393, "step": 1320 }, { - "epoch": 0.72, - "learning_rate": 1.1326296046939333e-07, - "logits/chosen": -2.134734869003296, - "logits/rejected": -1.9698076248168945, - "logps/chosen": -257.0758972167969, - "logps/rejected": -242.34640502929688, - "loss": 10608.8219, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.6214333772659302, - "rewards/margins": 0.21942774951457977, - "rewards/rejected": -0.8408611416816711, - "rewards/safe_rewards": -0.6008444428443909, - "rewards/unsafe_rewards": -0.6420222520828247, + "epoch": 0.71, + "learning_rate": 1.206121705943558e-07, + "logits/chosen": 0.2380530834197998, + "logits/rejected": 0.772462785243988, + "logps/chosen": -265.9678039550781, + "logps/rejected": -236.330078125, + "loss": 5444.8687, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.5695582628250122, + "rewards/margins": 0.17861400544643402, + "rewards/rejected": -0.7481723427772522, + "rewards/safe_rewards": -0.4967488646507263, + "rewards/unsafe_rewards": -0.5609390139579773, "step": 1330 }, { - "epoch": 0.72, - "learning_rate": 1.0935485796594351e-07, - "logits/chosen": -2.174132823944092, - "logits/rejected": -1.9792499542236328, - "logps/chosen": -291.65814208984375, - "logps/rejected": -260.0698547363281, - "loss": 11895.8789, - "rewards/accuracies": 0.71875, - "rewards/chosen": -0.6717311143875122, - "rewards/margins": 0.22725185751914978, - "rewards/rejected": -0.8989830017089844, - "rewards/safe_rewards": -0.6783491373062134, - "rewards/unsafe_rewards": -0.665113091468811, + "epoch": 0.71, + "learning_rate": 1.1666985618565422e-07, + "logits/chosen": 0.7791303396224976, + "logits/rejected": 1.0070080757141113, + "logps/chosen": -239.6016082763672, + "logps/rejected": -250.1675567626953, + "loss": 5496.5402, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.643204391002655, + "rewards/margins": 0.212922140955925, + "rewards/rejected": -0.856126606464386, + "rewards/safe_rewards": -0.6307708024978638, + "rewards/unsafe_rewards": -0.6205247044563293, "step": 1340 }, { - "epoch": 0.73, - "learning_rate": 1.0549640773818028e-07, - "logits/chosen": -2.139070749282837, - "logits/rejected": -2.040194034576416, - "logps/chosen": -272.8217468261719, - "logps/rejected": -252.80606079101562, - "loss": 11776.8328, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.724553108215332, - "rewards/margins": 0.14907190203666687, - "rewards/rejected": -0.8736250996589661, - "rewards/safe_rewards": -0.764702320098877, - "rewards/unsafe_rewards": -0.6844038963317871, + "epoch": 0.72, + "learning_rate": 1.1277334291351145e-07, + "logits/chosen": 0.6811083555221558, + "logits/rejected": 1.2308669090270996, + "logps/chosen": -240.9481964111328, + "logps/rejected": -251.2366485595703, + "loss": 5451.2172, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.6521676778793335, + "rewards/margins": 0.1860547959804535, + "rewards/rejected": -0.8382223844528198, + "rewards/safe_rewards": -0.7259255647659302, + "rewards/unsafe_rewards": -0.6219838857650757, "step": 1350 }, { - "epoch": 0.73, - "learning_rate": 1.0168897194359921e-07, - "logits/chosen": -2.1789050102233887, - "logits/rejected": -1.9954169988632202, - "logps/chosen": -300.97711181640625, - "logps/rejected": -274.7542724609375, - "loss": 10877.8953, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.7322704195976257, - "rewards/margins": 0.15806125104427338, - "rewards/rejected": -0.8903317451477051, - "rewards/safe_rewards": -0.6902467608451843, - "rewards/unsafe_rewards": -0.7742940783500671, + "epoch": 0.72, + "learning_rate": 1.089239692954701e-07, + "logits/chosen": 0.36615195870399475, + "logits/rejected": 0.9472381472587585, + "logps/chosen": -269.5465087890625, + "logps/rejected": -256.1499328613281, + "loss": 5717.6105, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.6657227873802185, + "rewards/margins": 0.15908706188201904, + "rewards/rejected": -0.8248098492622375, + "rewards/safe_rewards": -0.7341758012771606, + "rewards/unsafe_rewards": -0.6227680444717407, "step": 1360 }, { - "epoch": 0.74, - "learning_rate": 9.793389472995392e-08, - "logits/chosen": -2.155348539352417, - "logits/rejected": -1.9421207904815674, - "logps/chosen": -279.4602966308594, - "logps/rejected": -240.51791381835938, - "loss": 10089.3242, - "rewards/accuracies": 0.7437499761581421, - "rewards/chosen": -0.639030933380127, - "rewards/margins": 0.252288281917572, - "rewards/rejected": -0.8913192749023438, - "rewards/safe_rewards": -0.6204690933227539, - "rewards/unsafe_rewards": -0.6575928330421448, + "epoch": 0.73, + "learning_rate": 1.051230576558127e-07, + "logits/chosen": 0.7043350338935852, + "logits/rejected": 1.012446641921997, + "logps/chosen": -265.9175720214844, + "logps/rejected": -296.2731628417969, + "loss": 5307.2445, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.7264591455459595, + "rewards/margins": 0.1706809252500534, + "rewards/rejected": -0.8971401453018188, + "rewards/safe_rewards": -0.7796869277954102, + "rewards/unsafe_rewards": -0.7442405819892883, "step": 1370 }, { - "epoch": 0.74, - "learning_rate": 9.423250176072874e-08, - "logits/chosen": -2.1420297622680664, - "logits/rejected": -1.946412444114685, - "logps/chosen": -260.1219177246094, - "logps/rejected": -234.1204833984375, - "loss": 12708.7586, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.71236252784729, - "rewards/margins": 0.1536431610584259, - "rewards/rejected": -0.8660055994987488, - "rewards/safe_rewards": -0.6938896179199219, - "rewards/unsafe_rewards": -0.7308354377746582, + "epoch": 0.73, + "learning_rate": 1.0137191367132078e-07, + "logits/chosen": 0.5799378156661987, + "logits/rejected": 1.0962615013122559, + "logps/chosen": -280.27587890625, + "logps/rejected": -261.3016052246094, + "loss": 5462.4613, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.659958004951477, + "rewards/margins": 0.24963033199310303, + "rewards/rejected": -0.9095882177352905, + "rewards/safe_rewards": -0.6955925226211548, + "rewards/unsafe_rewards": -0.6324699521064758, "step": 1380 }, { - "epoch": 0.75, - "learning_rate": 9.058609974713654e-08, - "logits/chosen": -2.1647183895111084, - "logits/rejected": -1.9520107507705688, - "logps/chosen": -270.0279235839844, - "logps/rejected": -260.9911193847656, - "loss": 10654.6578, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.6530768275260925, - "rewards/margins": 0.2193538248538971, - "rewards/rejected": -0.872430682182312, - "rewards/safe_rewards": -0.6400060653686523, - "rewards/unsafe_rewards": -0.6661475896835327, + "epoch": 0.74, + "learning_rate": 9.76718259227532e-08, + "logits/chosen": 0.498538076877594, + "logits/rejected": 0.9989287257194519, + "logps/chosen": -272.96820068359375, + "logps/rejected": -256.63140869140625, + "loss": 5331.4734, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6205289363861084, + "rewards/margins": 0.21373698115348816, + "rewards/rejected": -0.8342660069465637, + "rewards/safe_rewards": -0.5949203372001648, + "rewards/unsafe_rewards": -0.6141771674156189, "step": 1390 }, { - "epoch": 0.75, - "learning_rate": 8.699597598680753e-08, - "logits/chosen": -2.1252872943878174, - "logits/rejected": -1.9515492916107178, - "logps/chosen": -257.53680419921875, - "logps/rejected": -247.7872314453125, - "loss": 10438.4352, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.6546493768692017, - "rewards/margins": 0.19867801666259766, - "rewards/rejected": -0.8533273935317993, - "rewards/safe_rewards": -0.6472882032394409, - "rewards/unsafe_rewards": -0.6620105504989624, + "epoch": 0.74, + "learning_rate": 9.402406545219676e-08, + "logits/chosen": 0.34590667486190796, + "logits/rejected": 0.8703553080558777, + "logps/chosen": -273.8531188964844, + "logps/rejected": -247.87466430664062, + "loss": 5546.1305, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.6622526049613953, + "rewards/margins": 0.1561700403690338, + "rewards/rejected": -0.8184226751327515, + "rewards/safe_rewards": -0.6668413281440735, + "rewards/unsafe_rewards": -0.6589676141738892, "step": 1400 }, { - "epoch": 0.76, - "learning_rate": 8.346339790933166e-08, - "logits/chosen": -2.163567543029785, - "logits/rejected": -1.9771531820297241, - "logps/chosen": -261.906982421875, - "logps/rejected": -245.43344116210938, - "loss": 11502.7297, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.6902798414230347, - "rewards/margins": 0.19094708561897278, - "rewards/rejected": -0.8812268376350403, - "rewards/safe_rewards": -0.6901260018348694, - "rewards/unsafe_rewards": -0.6904336214065552, + "epoch": 0.75, + "learning_rate": 9.042988532644249e-08, + "logits/chosen": 0.2142190933227539, + "logits/rejected": 0.5996747016906738, + "logps/chosen": -308.82635498046875, + "logps/rejected": -276.37823486328125, + "loss": 5583.4395, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.5863175392150879, + "rewards/margins": 0.23458845913410187, + "rewards/rejected": -0.8209059834480286, + "rewards/safe_rewards": -0.5638710260391235, + "rewards/unsafe_rewards": -0.5323917269706726, "step": 1410 }, { - "epoch": 0.76, - "learning_rate": 7.998961262881506e-08, - "logits/chosen": -2.136033535003662, - "logits/rejected": -1.915771245956421, - "logps/chosen": -282.3092346191406, - "logps/rejected": -245.1779022216797, - "loss": 10907.2641, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.6517400741577148, - "rewards/margins": 0.2158946692943573, - "rewards/rejected": -0.8676347732543945, - "rewards/safe_rewards": -0.6872564554214478, - "rewards/unsafe_rewards": -0.6162236928939819, + "epoch": 0.75, + "learning_rate": 8.689052020653592e-08, + "logits/chosen": -0.06605692207813263, + "logits/rejected": 0.6343873739242554, + "logps/chosen": -285.37225341796875, + "logps/rejected": -252.3105010986328, + "loss": 5576.0598, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.5753235816955566, + "rewards/margins": 0.2064014971256256, + "rewards/rejected": -0.7817251086235046, + "rewards/safe_rewards": -0.5231102705001831, + "rewards/unsafe_rewards": -0.5478030443191528, "step": 1420 }, { - "epoch": 0.77, - "learning_rate": 7.657584650360846e-08, - "logits/chosen": -2.068126678466797, - "logits/rejected": -1.9587552547454834, - "logps/chosen": -251.7869873046875, - "logps/rejected": -236.9489288330078, - "loss": 11409.3359, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.6544846296310425, - "rewards/margins": 0.2086174488067627, - "rewards/rejected": -0.8631020784378052, - "rewards/safe_rewards": -0.6824710965156555, - "rewards/unsafe_rewards": -0.6264981627464294, + "epoch": 0.76, + "learning_rate": 8.340718592365037e-08, + "logits/chosen": 0.4551053047180176, + "logits/rejected": 0.6916473507881165, + "logps/chosen": -259.25543212890625, + "logps/rejected": -269.81097412109375, + "loss": 5258.8734, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.6683470010757446, + "rewards/margins": 0.16762246191501617, + "rewards/rejected": -0.8359693288803101, + "rewards/safe_rewards": -0.6167613863945007, + "rewards/unsafe_rewards": -0.6983481645584106, "step": 1430 }, { - "epoch": 0.77, - "learning_rate": 7.322330470336313e-08, - "logits/chosen": -2.122067928314209, - "logits/rejected": -1.9209505319595337, - "logps/chosen": -269.77520751953125, - "logps/rejected": -268.40972900390625, - "loss": 10821.843, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.6080222129821777, - "rewards/margins": 0.25041937828063965, - "rewards/rejected": -0.8584416508674622, - "rewards/safe_rewards": -0.5671194195747375, - "rewards/unsafe_rewards": -0.6489250659942627, + "epoch": 0.76, + "learning_rate": 7.998107906142839e-08, + "logits/chosen": 0.4198254942893982, + "logits/rejected": 0.9249162673950195, + "logps/chosen": -256.2335205078125, + "logps/rejected": -243.9502716064453, + "loss": 5150.4359, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6530503034591675, + "rewards/margins": 0.22125795483589172, + "rewards/rejected": -0.8743082880973816, + "rewards/safe_rewards": -0.6435777544975281, + "rewards/unsafe_rewards": -0.6962872743606567, "step": 1440 }, { - "epoch": 0.78, - "learning_rate": 6.993317078356709e-08, - "logits/chosen": -2.1524910926818848, - "logits/rejected": -2.056199789047241, - "logps/chosen": -279.29583740234375, - "logps/rejected": -255.84521484375, - "loss": 11391.4539, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.691224217414856, - "rewards/margins": 0.13461162149906158, - "rewards/rejected": -0.8258358240127563, - "rewards/safe_rewards": -0.7272582054138184, - "rewards/unsafe_rewards": -0.6551901698112488, + "epoch": 0.77, + "learning_rate": 7.661337654493575e-08, + "logits/chosen": 0.11405469477176666, + "logits/rejected": 0.8541787266731262, + "logps/chosen": -285.04632568359375, + "logps/rejected": -264.7653503417969, + "loss": 5838.1379, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.6224103569984436, + "rewards/margins": 0.20319974422454834, + "rewards/rejected": -0.8256100416183472, + "rewards/safe_rewards": -0.6171637773513794, + "rewards/unsafe_rewards": -0.5961381793022156, "step": 1450 }, { - "epoch": 0.79, - "learning_rate": 6.67066062677118e-08, - "logits/chosen": -2.191707134246826, - "logits/rejected": -2.0195794105529785, - "logps/chosen": -263.27642822265625, - "logps/rejected": -235.50973510742188, - "loss": 11926.3453, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.6236655116081238, - "rewards/margins": 0.16336306929588318, - "rewards/rejected": -0.7870286107063293, - "rewards/safe_rewards": -0.6308915615081787, - "rewards/unsafe_rewards": -0.6164394617080688, + "epoch": 0.77, + "learning_rate": 7.330523523636751e-08, + "logits/chosen": 0.33853933215141296, + "logits/rejected": 0.5890348553657532, + "logps/chosen": -267.7184753417969, + "logps/rejected": -279.6230163574219, + "loss": 5326.7477, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.6186683177947998, + "rewards/margins": 0.19817940890789032, + "rewards/rejected": -0.8168476819992065, + "rewards/safe_rewards": -0.6040722727775574, + "rewards/unsafe_rewards": -0.6181649565696716, "step": 1460 }, { - "epoch": 0.79, - "learning_rate": 6.354475023723685e-08, - "logits/chosen": -2.1155574321746826, - "logits/rejected": -1.9720067977905273, - "logps/chosen": -301.7005920410156, - "logps/rejected": -267.5169372558594, - "loss": 11094.9625, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.6763675808906555, - "rewards/margins": 0.23334193229675293, - "rewards/rejected": -0.9097094535827637, - "rewards/safe_rewards": -0.6679859757423401, - "rewards/unsafe_rewards": -0.6847492456436157, + "epoch": 0.78, + "learning_rate": 7.005779153764682e-08, + "logits/chosen": 0.4181288182735443, + "logits/rejected": 0.7393978238105774, + "logps/chosen": -249.9525909423828, + "logps/rejected": -242.4307861328125, + "loss": 5633.5648, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.6368721723556519, + "rewards/margins": 0.15112480521202087, + "rewards/rejected": -0.7879970073699951, + "rewards/safe_rewards": -0.6358110308647156, + "rewards/unsafe_rewards": -0.6208546161651611, "step": 1470 }, { - "epoch": 0.8, - "learning_rate": 6.044871892939746e-08, - "logits/chosen": -2.184211254119873, - "logits/rejected": -1.9999923706054688, - "logps/chosen": -281.41900634765625, - "logps/rejected": -268.82464599609375, - "loss": 10924.2703, - "rewards/accuracies": 0.7437499761581421, - "rewards/chosen": -0.6950246691703796, - "rewards/margins": 0.19827482104301453, - "rewards/rejected": -0.8932995796203613, - "rewards/safe_rewards": -0.6960557699203491, - "rewards/unsafe_rewards": -0.6939936280250549, + "epoch": 0.79, + "learning_rate": 6.687216100005138e-08, + "logits/chosen": 0.6848994493484497, + "logits/rejected": 1.1733933687210083, + "logps/chosen": -284.51080322265625, + "logps/rejected": -288.7901916503906, + "loss": 5048.4258, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6762335300445557, + "rewards/margins": 0.1719200611114502, + "rewards/rejected": -0.8481537103652954, + "rewards/safe_rewards": -0.6376355290412903, + "rewards/unsafe_rewards": -0.7184177041053772, "step": 1480 }, { - "epoch": 0.8, - "learning_rate": 5.741960534319676e-08, - "logits/chosen": -2.134298801422119, - "logits/rejected": -2.02238130569458, - "logps/chosen": -241.8282012939453, - "logps/rejected": -240.76290893554688, - "loss": 11064.8711, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.687341570854187, - "rewards/margins": 0.16167931258678436, - "rewards/rejected": -0.8490209579467773, - "rewards/safe_rewards": -0.7233562469482422, - "rewards/unsafe_rewards": -0.6513269543647766, + "epoch": 0.79, + "learning_rate": 6.374943794100349e-08, + "logits/chosen": 0.48638778924942017, + "logits/rejected": 1.259670615196228, + "logps/chosen": -267.34588623046875, + "logps/rejected": -245.59756469726562, + "loss": 5545.4941, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.6003537178039551, + "rewards/margins": 0.22699756920337677, + "rewards/rejected": -0.8273512721061707, + "rewards/safe_rewards": -0.6312727332115173, + "rewards/unsafe_rewards": -0.6281502842903137, "step": 1490 }, { - "epoch": 0.81, - "learning_rate": 5.44584788535217e-08, - "logits/chosen": -2.1548972129821777, - "logits/rejected": -2.0020337104797363, - "logps/chosen": -288.1227111816406, - "logps/rejected": -265.52337646484375, - "loss": 10202.4125, - "rewards/accuracies": 0.6937500238418579, - "rewards/chosen": -0.7084766626358032, - "rewards/margins": 0.21978096663951874, - "rewards/rejected": -0.9282576441764832, - "rewards/safe_rewards": -0.6754325032234192, - "rewards/unsafe_rewards": -0.741520881652832, + "epoch": 0.8, + "learning_rate": 6.069069506815325e-08, + "logits/chosen": 0.7533052563667297, + "logits/rejected": 1.2028855085372925, + "logps/chosen": -251.12496948242188, + "logps/rejected": -253.78408813476562, + "loss": 5749.5141, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.6362664103507996, + "rewards/margins": 0.2198611944913864, + "rewards/rejected": -0.8561276197433472, + "rewards/safe_rewards": -0.622052013874054, + "rewards/unsafe_rewards": -0.704675555229187, "step": 1500 }, { - "epoch": 0.81, - "eval_logits/chosen": -1.9419655799865723, - "eval_logits/rejected": -1.764477252960205, - "eval_logps/chosen": -226.97747802734375, - "eval_logps/rejected": -195.17942810058594, - "eval_loss": 4320.98779296875, - "eval_rewards/accuracies": 0.6022507548332214, - "eval_rewards/chosen": -0.9654689431190491, - "eval_rewards/margins": 0.061668772250413895, - "eval_rewards/rejected": -1.0271376371383667, - "eval_rewards/safe_rewards": -0.9610660672187805, - "eval_rewards/unsafe_rewards": -0.9618369936943054, - "eval_runtime": 993.2958, - "eval_samples_per_second": 33.267, - "eval_steps_per_second": 1.04, + "epoch": 0.8, + "eval_logits/chosen": 1.0718276500701904, + "eval_logits/rejected": 1.9546749591827393, + "eval_logps/chosen": -228.9304656982422, + "eval_logps/rejected": -199.36412048339844, + "eval_loss": 4458.44287109375, + "eval_rewards/accuracies": 0.6194114685058594, + "eval_rewards/chosen": -0.8858092427253723, + "eval_rewards/margins": 0.0865015909075737, + "eval_rewards/rejected": -0.9723107814788818, + "eval_rewards/safe_rewards": -0.874053955078125, + "eval_rewards/unsafe_rewards": -0.8699882626533508, + "eval_runtime": 2349.2554, + "eval_samples_per_second": 14.917, + "eval_steps_per_second": 0.467, "step": 1500 }, { - "epoch": 0.81, - "learning_rate": 5.156638483361933e-08, - "logits/chosen": -2.2049641609191895, - "logits/rejected": -2.0106749534606934, - "logps/chosen": -285.2719421386719, - "logps/rejected": -273.9066467285156, - "loss": 11107.9672, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.6739949584007263, - "rewards/margins": 0.23203198611736298, - "rewards/rejected": -0.9060269594192505, - "rewards/safe_rewards": -0.6972517967224121, - "rewards/unsafe_rewards": -0.6507382988929749, + "epoch": 0.8, + "learning_rate": 5.7696983110885746e-08, + "logits/chosen": 1.0346394777297974, + "logits/rejected": 1.4075425863265991, + "logps/chosen": -264.0049133300781, + "logps/rejected": -256.81793212890625, + "loss": 5875.7254, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.7450360059738159, + "rewards/margins": 0.13777832686901093, + "rewards/rejected": -0.8828142881393433, + "rewards/safe_rewards": -0.6767371892929077, + "rewards/unsafe_rewards": -0.7506189942359924, "step": 1510 }, { - "epoch": 0.82, - "learning_rate": 4.8744344286046236e-08, - "logits/chosen": -2.148716926574707, - "logits/rejected": -2.0059280395507812, - "logps/chosen": -285.7190856933594, - "logps/rejected": -266.8186950683594, - "loss": 11427.2711, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.6893398761749268, - "rewards/margins": 0.15746144950389862, - "rewards/rejected": -0.8468014001846313, - "rewards/safe_rewards": -0.7316367030143738, - "rewards/unsafe_rewards": -0.6470431089401245, + "epoch": 0.81, + "learning_rate": 5.47693304593777e-08, + "logits/chosen": 0.577034056186676, + "logits/rejected": 1.2275969982147217, + "logps/chosen": -280.673583984375, + "logps/rejected": -243.10635375976562, + "loss": 5531.6125, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.6422880291938782, + "rewards/margins": 0.22371160984039307, + "rewards/rejected": -0.8659995794296265, + "rewards/safe_rewards": -0.5432512164115906, + "rewards/unsafe_rewards": -0.6611617803573608, "step": 1520 }, { - "epoch": 0.82, - "learning_rate": 4.599335348222169e-08, - "logits/chosen": -2.174553871154785, - "logits/rejected": -2.05711030960083, - "logps/chosen": -288.70849609375, - "logps/rejected": -289.8013610839844, - "loss": 10682.943, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.6674157977104187, - "rewards/margins": 0.21851542592048645, - "rewards/rejected": -0.8859313130378723, - "rewards/safe_rewards": -0.6756128668785095, - "rewards/unsafe_rewards": -0.6592189073562622, + "epoch": 0.81, + "learning_rate": 5.190874281132851e-08, + "logits/chosen": 0.6209213733673096, + "logits/rejected": 0.9749325513839722, + "logps/chosen": -258.8196716308594, + "logps/rejected": -247.3189697265625, + "loss": 5541.2727, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.6575254201889038, + "rewards/margins": 0.12947872281074524, + "rewards/rejected": -0.7870042324066162, + "rewards/safe_rewards": -0.7655413746833801, + "rewards/unsafe_rewards": -0.7101870775222778, "step": 1530 }, { - "epoch": 0.83, - "learning_rate": 4.331438361071163e-08, - "logits/chosen": -2.1525204181671143, - "logits/rejected": -2.059199571609497, - "logps/chosen": -302.69195556640625, - "logps/rejected": -287.97161865234375, - "loss": 11478.0016, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.6294053792953491, - "rewards/margins": 0.17254997789859772, - "rewards/rejected": -0.8019553422927856, - "rewards/safe_rewards": -0.6268101334571838, - "rewards/unsafe_rewards": -0.6320004463195801, + "epoch": 0.82, + "learning_rate": 4.9116202826486045e-08, + "logits/chosen": 0.7310935258865356, + "logits/rejected": 1.0775771141052246, + "logps/chosen": -272.3906555175781, + "logps/rejected": -257.2728271484375, + "loss": 5545.8492, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.6876263618469238, + "rewards/margins": 0.16089771687984467, + "rewards/rejected": -0.8485240936279297, + "rewards/safe_rewards": -0.6295339465141296, + "rewards/unsafe_rewards": -0.7383956909179688, "step": 1540 }, { - "epoch": 0.83, - "learning_rate": 4.0708380434367864e-08, - "logits/chosen": -2.1821987628936768, - "logits/rejected": -1.9993159770965576, - "logps/chosen": -265.56854248046875, - "logps/rejected": -261.4121398925781, - "loss": 10532.0812, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.6764332056045532, - "rewards/margins": 0.19545750319957733, - "rewards/rejected": -0.8718908429145813, - "rewards/safe_rewards": -0.6992810964584351, - "rewards/unsafe_rewards": -0.6535855531692505, + "epoch": 0.82, + "learning_rate": 4.639266978908676e-08, + "logits/chosen": 0.6267167329788208, + "logits/rejected": 1.1266528367996216, + "logps/chosen": -297.58380126953125, + "logps/rejected": -271.4803161621094, + "loss": 5131.627, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.6685757637023926, + "rewards/margins": 0.18729698657989502, + "rewards/rejected": -0.8558727502822876, + "rewards/safe_rewards": -0.6740354299545288, + "rewards/unsafe_rewards": -0.6281224489212036, "step": 1550 }, { - "epoch": 0.84, - "learning_rate": 3.817626395644305e-08, - "logits/chosen": -2.1872050762176514, - "logits/rejected": -2.0328307151794434, - "logps/chosen": -265.10205078125, - "logps/rejected": -248.8824462890625, - "loss": 11308.0203, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.6716850399971008, - "rewards/margins": 0.13508987426757812, - "rewards/rejected": -0.806774914264679, - "rewards/safe_rewards": -0.6651741862297058, - "rewards/unsafe_rewards": -0.6781958341598511, + "epoch": 0.83, + "learning_rate": 4.373907927832513e-08, + "logits/chosen": 0.6049357056617737, + "logits/rejected": 0.9919975996017456, + "logps/chosen": -265.62481689453125, + "logps/rejected": -285.9028625488281, + "loss": 5640.1398, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.6182764172554016, + "rewards/margins": 0.22418944537639618, + "rewards/rejected": -0.842465877532959, + "rewards/safe_rewards": -0.6555901765823364, + "rewards/unsafe_rewards": -0.5656682848930359, "step": 1560 }, { - "epoch": 0.84, - "learning_rate": 3.571892809580013e-08, - "logits/chosen": -2.157172203063965, - "logits/rejected": -2.0089964866638184, - "logps/chosen": -260.57525634765625, - "logps/rejected": -251.6233367919922, - "loss": 11400.9055, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.6940633058547974, - "rewards/margins": 0.136986643075943, - "rewards/rejected": -0.831049919128418, - "rewards/safe_rewards": -0.6672950983047485, - "rewards/unsafe_rewards": -0.7208314538002014, + "epoch": 0.83, + "learning_rate": 4.115634284696698e-08, + "logits/chosen": 0.49705711007118225, + "logits/rejected": 0.9479654431343079, + "logps/chosen": -261.2461853027344, + "logps/rejected": -270.83331298828125, + "loss": 5189.8301, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.6632257699966431, + "rewards/margins": 0.21208517253398895, + "rewards/rejected": -0.8753108978271484, + "rewards/safe_rewards": -0.6663291454315186, + "rewards/unsafe_rewards": -0.6038998365402222, "step": 1570 }, { - "epoch": 0.85, - "learning_rate": 3.333724037132976e-08, - "logits/chosen": -2.180004596710205, - "logits/rejected": -2.0329430103302, - "logps/chosen": -274.8149108886719, - "logps/rejected": -270.170654296875, - "loss": 11022.3203, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.6869722008705139, - "rewards/margins": 0.17142841219902039, - "rewards/rejected": -0.8584005236625671, - "rewards/safe_rewards": -0.664495587348938, - "rewards/unsafe_rewards": -0.7094486951828003, + "epoch": 0.84, + "learning_rate": 3.864534770821559e-08, + "logits/chosen": 0.6149829626083374, + "logits/rejected": 1.1939442157745361, + "logps/chosen": -262.00933837890625, + "logps/rejected": -240.24581909179688, + "loss": 5618.5883, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.6275893449783325, + "rewards/margins": 0.20411472022533417, + "rewards/rejected": -0.8317041397094727, + "rewards/safe_rewards": -0.6472023725509644, + "rewards/unsafe_rewards": -0.5557063817977905, "step": 1580 }, { - "epoch": 0.86, - "learning_rate": 3.1032041595688506e-08, - "logits/chosen": -2.107234477996826, - "logits/rejected": -1.9175293445587158, - "logps/chosen": -265.2936706542969, - "logps/rejected": -258.67987060546875, - "loss": 10853.2336, + "epoch": 0.84, + "learning_rate": 3.620695643093924e-08, + "logits/chosen": 0.43840399384498596, + "logits/rejected": 1.105423092842102, + "logps/chosen": -269.2837829589844, + "logps/rejected": -238.085205078125, + "loss": 5468.3313, "rewards/accuracies": 0.6812499761581421, - "rewards/chosen": -0.6839567422866821, - "rewards/margins": 0.1879226267337799, - "rewards/rejected": -0.8718793988227844, - "rewards/safe_rewards": -0.6944083571434021, - "rewards/unsafe_rewards": -0.6735051870346069, + "rewards/chosen": -0.6394304037094116, + "rewards/margins": 0.22106070816516876, + "rewards/rejected": -0.860491156578064, + "rewards/safe_rewards": -0.6031507849693298, + "rewards/unsafe_rewards": -0.6791771650314331, "step": 1590 }, { - "epoch": 0.86, - "learning_rate": 2.880414557846453e-08, - "logits/chosen": -2.131779432296753, - "logits/rejected": -2.045048236846924, - "logps/chosen": -249.33114624023438, - "logps/rejected": -240.8230438232422, - "loss": 10239.6547, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.6429909467697144, - "rewards/margins": 0.1874755173921585, - "rewards/rejected": -0.8304663896560669, - "rewards/safe_rewards": -0.6290788650512695, - "rewards/unsafe_rewards": -0.6569029092788696, + "epoch": 0.85, + "learning_rate": 3.384200664336412e-08, + "logits/chosen": 0.5348480343818665, + "logits/rejected": 1.0058144330978394, + "logps/chosen": -268.3987731933594, + "logps/rejected": -247.79696655273438, + "loss": 5660.3645, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.5938838720321655, + "rewards/margins": 0.21732494235038757, + "rewards/rejected": -0.8112088441848755, + "rewards/safe_rewards": -0.5639302134513855, + "rewards/unsafe_rewards": -0.6350196599960327, "step": 1600 }, { - "epoch": 0.87, - "learning_rate": 2.6654338838876662e-08, - "logits/chosen": -2.2182207107543945, - "logits/rejected": -1.985608458518982, - "logps/chosen": -280.660400390625, - "logps/rejected": -240.7994384765625, - "loss": 11038.0031, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.6624557971954346, - "rewards/margins": 0.23739679157733917, - "rewards/rejected": -0.8998525738716125, - "rewards/safe_rewards": -0.68045574426651, - "rewards/unsafe_rewards": -0.6444558501243591, + "epoch": 0.85, + "learning_rate": 3.155131074533529e-08, + "logits/chosen": 0.30334433913230896, + "logits/rejected": 0.9854658246040344, + "logps/chosen": -283.627685546875, + "logps/rejected": -263.83251953125, + "loss": 6043.9172, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.6394412517547607, + "rewards/margins": 0.1600230187177658, + "rewards/rejected": -0.7994643449783325, + "rewards/safe_rewards": -0.6199285387992859, + "rewards/unsafe_rewards": -0.6412296295166016, "step": 1610 }, { - "epoch": 0.87, - "learning_rate": 2.4583380328107805e-08, - "logits/chosen": -2.1470413208007812, - "logits/rejected": -1.985733985900879, - "logps/chosen": -291.9235534667969, - "logps/rejected": -258.4531555175781, - "loss": 11528.2828, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.6623538136482239, - "rewards/margins": 0.20262321829795837, - "rewards/rejected": -0.8649770021438599, - "rewards/safe_rewards": -0.6641503572463989, - "rewards/unsafe_rewards": -0.660557210445404, + "epoch": 0.86, + "learning_rate": 2.9335655629243645e-08, + "logits/chosen": 0.39362573623657227, + "logits/rejected": 0.9285033941268921, + "logps/chosen": -270.2079162597656, + "logps/rejected": -261.9796447753906, + "loss": 5957.5516, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.6112038493156433, + "rewards/margins": 0.18837173283100128, + "rewards/rejected": -0.7995756268501282, + "rewards/safe_rewards": -0.6032061576843262, + "rewards/unsafe_rewards": -0.6732661724090576, "step": 1620 }, { - "epoch": 0.88, - "learning_rate": 2.259200116137039e-08, - "logits/chosen": -2.1394782066345215, - "logits/rejected": -1.9972803592681885, - "logps/chosen": -284.652099609375, - "logps/rejected": -278.02423095703125, - "loss": 11545.0234, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.6758708357810974, - "rewards/margins": 0.17819495499134064, - "rewards/rejected": -0.8540657758712769, - "rewards/safe_rewards": -0.6568797826766968, - "rewards/unsafe_rewards": -0.6948619484901428, + "epoch": 0.86, + "learning_rate": 2.7195802409715197e-08, + "logits/chosen": 0.2444291114807129, + "logits/rejected": 0.9499914050102234, + "logps/chosen": -298.4200134277344, + "logps/rejected": -249.72866821289062, + "loss": 5750.8313, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.6592567563056946, + "rewards/margins": 0.1407555341720581, + "rewards/rejected": -0.8000122904777527, + "rewards/safe_rewards": -0.7100226283073425, + "rewards/unsafe_rewards": -0.7015893459320068, "step": 1630 }, { - "epoch": 0.88, - "learning_rate": 2.068090435979958e-08, - "logits/chosen": -2.1189610958099365, - "logits/rejected": -2.0093135833740234, - "logps/chosen": -268.87371826171875, - "logps/rejected": -250.8845672607422, - "loss": 11243.7469, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.6307465434074402, - "rewards/margins": 0.16744786500930786, - "rewards/rejected": -0.798194408416748, - "rewards/safe_rewards": -0.6202843189239502, - "rewards/unsafe_rewards": -0.6412087678909302, + "epoch": 0.87, + "learning_rate": 2.513248616215527e-08, + "logits/chosen": 0.3666357100009918, + "logits/rejected": 0.9415947198867798, + "logps/chosen": -277.87518310546875, + "logps/rejected": -276.29119873046875, + "loss": 5205.8715, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6106274724006653, + "rewards/margins": 0.24805088341236115, + "rewards/rejected": -0.8586783409118652, + "rewards/safe_rewards": -0.6150985956192017, + "rewards/unsafe_rewards": -0.594727635383606, "step": 1640 }, { - "epoch": 0.89, - "learning_rate": 1.8850764602263423e-08, - "logits/chosen": -2.1497740745544434, - "logits/rejected": -1.9683231115341187, - "logps/chosen": -263.89495849609375, - "logps/rejected": -266.2047424316406, - "loss": 11196.7602, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.6697946786880493, - "rewards/margins": 0.1570041924715042, - "rewards/rejected": -0.8267987966537476, - "rewards/safe_rewards": -0.6728789806365967, - "rewards/unsafe_rewards": -0.666710376739502, + "epoch": 0.88, + "learning_rate": 2.31464156702382e-08, + "logits/chosen": 0.24014464020729065, + "logits/rejected": 0.9577549695968628, + "logps/chosen": -292.7112121582031, + "logps/rejected": -265.7065734863281, + "loss": 5896.8078, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.5955285429954529, + "rewards/margins": 0.2333928644657135, + "rewards/rejected": -0.8289214372634888, + "rewards/safe_rewards": -0.6319350600242615, + "rewards/unsafe_rewards": -0.5868616104125977, "step": 1650 }, { - "epoch": 0.89, - "learning_rate": 1.710222798718028e-08, - "logits/chosen": -2.16522479057312, - "logits/rejected": -2.045431613922119, - "logps/chosen": -276.67889404296875, - "logps/rejected": -280.6698303222656, - "loss": 10997.7469, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.6575156450271606, - "rewards/margins": 0.1838151067495346, - "rewards/rejected": -0.8413307070732117, - "rewards/safe_rewards": -0.6694937944412231, - "rewards/unsafe_rewards": -0.6455374360084534, + "epoch": 0.88, + "learning_rate": 2.1238273182427933e-08, + "logits/chosen": 0.6973511576652527, + "logits/rejected": 1.2915074825286865, + "logps/chosen": -265.3111572265625, + "logps/rejected": -251.41201782226562, + "loss": 5434.0336, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.6617192029953003, + "rewards/margins": 0.19598451256752014, + "rewards/rejected": -0.857703685760498, + "rewards/safe_rewards": -0.6422809362411499, + "rewards/unsafe_rewards": -0.6228102445602417, "step": 1660 }, { - "epoch": 0.9, - "learning_rate": 1.5435911804424356e-08, - "logits/chosen": -2.159721851348877, - "logits/rejected": -2.048668384552002, - "logps/chosen": -290.70916748046875, - "logps/rejected": -270.44683837890625, - "loss": 11924.0805, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.6324605345726013, - "rewards/margins": 0.19088464975357056, - "rewards/rejected": -0.8233451843261719, - "rewards/safe_rewards": -0.6401357054710388, - "rewards/unsafe_rewards": -0.624785304069519, + "epoch": 0.89, + "learning_rate": 1.9408714177614306e-08, + "logits/chosen": 0.5173779726028442, + "logits/rejected": 1.02643883228302, + "logps/chosen": -268.9621887207031, + "logps/rejected": -251.25808715820312, + "loss": 5243.4758, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -0.6187028288841248, + "rewards/margins": 0.22567462921142578, + "rewards/rejected": -0.8443773984909058, + "rewards/safe_rewards": -0.6375213265419006, + "rewards/unsafe_rewards": -0.6421637535095215, "step": 1670 }, { - "epoch": 0.9, - "learning_rate": 1.3852404317403199e-08, - "logits/chosen": -2.108271360397339, - "logits/rejected": -1.9915637969970703, - "logps/chosen": -250.95361328125, - "logps/rejected": -268.6639099121094, - "loss": 11402.2625, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.6399115324020386, - "rewards/margins": 0.16807249188423157, - "rewards/rejected": -0.8079840540885925, - "rewards/safe_rewards": -0.6549087762832642, - "rewards/unsafe_rewards": -0.6249145269393921, + "epoch": 0.89, + "learning_rate": 1.7658367139945228e-08, + "logits/chosen": 0.6539649963378906, + "logits/rejected": 1.0953106880187988, + "logps/chosen": -288.9885559082031, + "logps/rejected": -259.146728515625, + "loss": 5246.4344, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.6686577200889587, + "rewards/margins": 0.19176754355430603, + "rewards/rejected": -0.8604252934455872, + "rewards/safe_rewards": -0.7045280933380127, + "rewards/unsafe_rewards": -0.7155130505561829, "step": 1680 }, { - "epoch": 0.91, - "learning_rate": 1.235226455538113e-08, - "logits/chosen": -2.1952316761016846, - "logits/rejected": -2.0874345302581787, - "logps/chosen": -270.4241943359375, - "logps/rejected": -263.0255432128906, - "loss": 11680.2766, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.6519337892532349, - "rewards/margins": 0.18009743094444275, - "rewards/rejected": -0.8320311307907104, - "rewards/safe_rewards": -0.662136435508728, - "rewards/unsafe_rewards": -0.6417311429977417, + "epoch": 0.9, + "learning_rate": 1.5987833342931745e-08, + "logits/chosen": 0.4664410650730133, + "logits/rejected": 1.215132236480713, + "logps/chosen": -284.1900939941406, + "logps/rejected": -251.48379516601562, + "loss": 5564.9324, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.6805782318115234, + "rewards/margins": 0.21095602214336395, + "rewards/rejected": -0.8915343284606934, + "rewards/safe_rewards": -0.67192143201828, + "rewards/unsafe_rewards": -0.6578537821769714, "step": 1690 }, { - "epoch": 0.91, - "learning_rate": 1.0936022116124321e-08, - "logits/chosen": -2.155649185180664, - "logits/rejected": -2.0004172325134277, - "logps/chosen": -262.668212890625, - "logps/rejected": -252.78994750976562, - "loss": 10840.3375, - "rewards/accuracies": 0.706250011920929, - "rewards/chosen": -0.6261528134346008, - "rewards/margins": 0.2126920521259308, - "rewards/rejected": -0.838844895362854, - "rewards/safe_rewards": -0.6410089731216431, - "rewards/unsafe_rewards": -0.6112965941429138, + "epoch": 0.9, + "learning_rate": 1.439768664290053e-08, + "logits/chosen": 0.48882967233657837, + "logits/rejected": 1.0205453634262085, + "logps/chosen": -288.0510559082031, + "logps/rejected": -263.57122802734375, + "loss": 5705.5039, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.6453284025192261, + "rewards/margins": 0.18227383494377136, + "rewards/rejected": -0.827602207660675, + "rewards/safe_rewards": -0.6023403406143188, + "rewards/unsafe_rewards": -0.6489912867546082, "step": 1700 }, { - "epoch": 0.92, - "learning_rate": 9.60417697893534e-09, - "logits/chosen": -2.155482292175293, - "logits/rejected": -2.0352649688720703, - "logps/chosen": -259.88397216796875, - "logps/rejected": -262.71746826171875, - "loss": 11428.7422, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.632883608341217, - "rewards/margins": 0.17666089534759521, - "rewards/rejected": -0.8095444440841675, - "rewards/safe_rewards": -0.5953446626663208, - "rewards/unsafe_rewards": -0.6704224348068237, + "epoch": 0.91, + "learning_rate": 1.2888473281864597e-08, + "logits/chosen": 0.3580858111381531, + "logits/rejected": 0.9355760812759399, + "logps/chosen": -252.00344848632812, + "logps/rejected": -256.7703552246094, + "loss": 5420.7055, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.6472461819648743, + "rewards/margins": 0.19622859358787537, + "rewards/rejected": -0.8434747457504272, + "rewards/safe_rewards": -0.6663787364959717, + "rewards/unsafe_rewards": -0.6997274160385132, "step": 1710 }, { - "epoch": 0.93, - "learning_rate": 8.357199328144576e-09, - "logits/chosen": -2.1424155235290527, - "logits/rejected": -2.0332658290863037, - "logps/chosen": -308.6863708496094, - "logps/rejected": -289.75830078125, - "loss": 10265.0281, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.6669701337814331, - "rewards/margins": 0.17594949901103973, - "rewards/rejected": -0.8429197072982788, - "rewards/safe_rewards": -0.7015780210494995, - "rewards/unsafe_rewards": -0.6323622465133667, + "epoch": 0.91, + "learning_rate": 1.1460711699880082e-08, + "logits/chosen": 0.32274478673934937, + "logits/rejected": 0.9183855056762695, + "logps/chosen": -281.06304931640625, + "logps/rejected": -268.91278076171875, + "loss": 5609.357, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.5867010951042175, + "rewards/margins": 0.23433193564414978, + "rewards/rejected": -0.8210331201553345, + "rewards/safe_rewards": -0.5630391240119934, + "rewards/unsafe_rewards": -0.6277604103088379, "step": 1720 }, { - "epoch": 0.93, - "learning_rate": 7.1955293871198144e-09, - "logits/chosen": -2.112140417098999, - "logits/rejected": -2.030024528503418, - "logps/chosen": -241.85476684570312, - "logps/rejected": -241.1482391357422, - "loss": 11823.4781, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.6754211187362671, - "rewards/margins": 0.13721489906311035, - "rewards/rejected": -0.8126360177993774, - "rewards/safe_rewards": -0.6722853779792786, - "rewards/unsafe_rewards": -0.6785567998886108, + "epoch": 0.92, + "learning_rate": 1.0114892356953397e-08, + "logits/chosen": 0.381804883480072, + "logits/rejected": 0.9557956457138062, + "logps/chosen": -278.6263427734375, + "logps/rejected": -252.7932891845703, + "loss": 5676.834, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.6421754360198975, + "rewards/margins": 0.1775234043598175, + "rewards/rejected": -0.8196988105773926, + "rewards/safe_rewards": -0.6115553379058838, + "rewards/unsafe_rewards": -0.6476501226425171, "step": 1730 }, { - "epoch": 0.94, - "learning_rate": 6.119577262853254e-09, - "logits/chosen": -2.1218502521514893, - "logits/rejected": -1.944239854812622, - "logps/chosen": -251.3634490966797, - "logps/rejected": -230.8335723876953, - "loss": 11556.3273, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.638350248336792, - "rewards/margins": 0.21106202900409698, - "rewards/rejected": -0.8494122624397278, - "rewards/safe_rewards": -0.613598108291626, - "rewards/unsafe_rewards": -0.6631024479866028, + "epoch": 0.92, + "learning_rate": 8.851477564560061e-09, + "logits/chosen": 0.5100737810134888, + "logits/rejected": 0.932380199432373, + "logps/chosen": -263.25146484375, + "logps/rejected": -271.11676025390625, + "loss": 5593.4414, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.6300482749938965, + "rewards/margins": 0.25807589292526245, + "rewards/rejected": -0.8881241679191589, + "rewards/safe_rewards": -0.6826761960983276, + "rewards/unsafe_rewards": -0.6732330322265625, "step": 1740 }, { - "epoch": 0.94, - "learning_rate": 5.129722801180542e-09, - "logits/chosen": -2.1147098541259766, - "logits/rejected": -2.0030481815338135, - "logps/chosen": -270.8954162597656, - "logps/rejected": -255.99600219726562, - "loss": 10173.3906, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.6771684885025024, - "rewards/margins": 0.1781720221042633, - "rewards/rejected": -0.8553404808044434, - "rewards/safe_rewards": -0.7092324495315552, - "rewards/unsafe_rewards": -0.6451044082641602, + "epoch": 0.93, + "learning_rate": 7.670901326832763e-09, + "logits/chosen": 0.6556006669998169, + "logits/rejected": 1.0529851913452148, + "logps/chosen": -272.6200866699219, + "logps/rejected": -291.10101318359375, + "loss": 5333.684, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.7119321823120117, + "rewards/margins": 0.18222954869270325, + "rewards/rejected": -0.8941618204116821, + "rewards/safe_rewards": -0.7450841069221497, + "rewards/unsafe_rewards": -0.6783844232559204, "step": 1750 }, { - "epoch": 0.95, - "learning_rate": 4.226315452682816e-09, - "logits/chosen": -2.1494641304016113, - "logits/rejected": -2.025930404663086, - "logps/chosen": -264.47100830078125, - "logps/rejected": -256.65093994140625, - "loss": 10950.5703, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.6349852681159973, - "rewards/margins": 0.19823208451271057, - "rewards/rejected": -0.833217442035675, - "rewards/safe_rewards": -0.6437097787857056, - "rewards/unsafe_rewards": -0.6262607574462891, + "epoch": 0.93, + "learning_rate": 6.5735691914738936e-09, + "logits/chosen": 0.3428182005882263, + "logits/rejected": 0.6993114948272705, + "logps/chosen": -276.2501220703125, + "logps/rejected": -270.787841796875, + "loss": 6014.7414, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.6672028303146362, + "rewards/margins": 0.16263318061828613, + "rewards/rejected": -0.8298360109329224, + "rewards/safe_rewards": -0.6557270288467407, + "rewards/unsafe_rewards": -0.7067701816558838, "step": 1760 }, { - "epoch": 0.95, - "learning_rate": 3.4096741493194193e-09, - "logits/chosen": -2.2127685546875, - "logits/rejected": -2.0972578525543213, - "logps/chosen": -264.87530517578125, - "logps/rejected": -257.4256286621094, - "loss": 11913.2609, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.6370722651481628, - "rewards/margins": 0.15080325305461884, - "rewards/rejected": -0.7878755927085876, - "rewards/safe_rewards": -0.617976725101471, - "rewards/unsafe_rewards": -0.6561679840087891, + "epoch": 0.94, + "learning_rate": 5.559858110443016e-09, + "logits/chosen": 0.3265165388584137, + "logits/rejected": 0.9415761828422546, + "logps/chosen": -279.380615234375, + "logps/rejected": -258.53887939453125, + "loss": 5329.075, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.6516368985176086, + "rewards/margins": 0.22732026875019073, + "rewards/rejected": -0.8789570927619934, + "rewards/safe_rewards": -0.6853364706039429, + "rewards/unsafe_rewards": -0.6284711360931396, "step": 1770 }, { - "epoch": 0.96, - "learning_rate": 2.6800871918346846e-09, - "logits/chosen": -2.220578670501709, - "logits/rejected": -2.04236102104187, - "logps/chosen": -274.6324768066406, - "logps/rejected": -260.865234375, - "loss": 11005.4078, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.6086142659187317, - "rewards/margins": 0.19890496134757996, - "rewards/rejected": -0.8075191378593445, - "rewards/safe_rewards": -0.6277071237564087, - "rewards/unsafe_rewards": -0.5895212888717651, + "epoch": 0.94, + "learning_rate": 4.6301163104676685e-09, + "logits/chosen": 0.5433076620101929, + "logits/rejected": 0.899452805519104, + "logps/chosen": -262.05511474609375, + "logps/rejected": -280.93658447265625, + "loss": 5452.5277, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6632400751113892, + "rewards/margins": 0.19723954796791077, + "rewards/rejected": -0.8604797124862671, + "rewards/safe_rewards": -0.5747020244598389, + "rewards/unsafe_rewards": -0.6066412329673767, "step": 1780 }, { - "epoch": 0.96, - "learning_rate": 2.0378121479783796e-09, - "logits/chosen": -2.132037401199341, - "logits/rejected": -1.972651481628418, - "logps/chosen": -265.41961669921875, - "logps/rejected": -256.274658203125, - "loss": 11459.243, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.6822707056999207, - "rewards/margins": 0.2049897462129593, - "rewards/rejected": -0.8872605562210083, - "rewards/safe_rewards": -0.6853871941566467, - "rewards/unsafe_rewards": -0.6791542768478394, + "epoch": 0.95, + "learning_rate": 3.784663173421438e-09, + "logits/chosen": 0.47608470916748047, + "logits/rejected": 0.8737590909004211, + "logps/chosen": -294.0523376464844, + "logps/rejected": -280.8829650878906, + "loss": 5532.6391, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.6354952454566956, + "rewards/margins": 0.18091240525245667, + "rewards/rejected": -0.8164075613021851, + "rewards/safe_rewards": -0.6999973654747009, + "rewards/unsafe_rewards": -0.6226142644882202, "step": 1790 }, { - "epoch": 0.97, - "learning_rate": 1.4830757615760247e-09, - "logits/chosen": -2.1159682273864746, - "logits/rejected": -1.9551427364349365, - "logps/chosen": -281.0558166503906, - "logps/rejected": -260.3757629394531, - "loss": 11785.8336, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.6576239466667175, - "rewards/margins": 0.15307986736297607, - "rewards/rejected": -0.8107039332389832, - "rewards/safe_rewards": -0.689802348613739, - "rewards/unsafe_rewards": -0.6254457235336304, + "epoch": 0.96, + "learning_rate": 3.023789126611137e-09, + "logits/chosen": 0.6358956694602966, + "logits/rejected": 1.2913506031036377, + "logps/chosen": -276.2715148925781, + "logps/rejected": -243.6599884033203, + "loss": 5192.1734, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.6617811918258667, + "rewards/margins": 0.21255967020988464, + "rewards/rejected": -0.874340832233429, + "rewards/safe_rewards": -0.665223240852356, + "rewards/unsafe_rewards": -0.67181396484375, "step": 1800 }, + { + "epoch": 0.96, + "learning_rate": 2.3477555430100604e-09, + "logits/chosen": 0.5863360166549683, + "logits/rejected": 1.0950720310211182, + "logps/chosen": -270.6855773925781, + "logps/rejected": -254.65771484375, + "loss": 5546.9984, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.5831121206283569, + "rewards/margins": 0.2669592499732971, + "rewards/rejected": -0.8500713109970093, + "rewards/safe_rewards": -0.586032509803772, + "rewards/unsafe_rewards": -0.577675461769104, + "step": 1810 + }, { "epoch": 0.97, - "eval_logits/chosen": -1.954970121383667, - "eval_logits/rejected": -1.7745355367660522, - "eval_logps/chosen": -224.6013946533203, - "eval_logps/rejected": -193.1151123046875, - "eval_loss": 4320.82080078125, - "eval_rewards/accuracies": 0.6027347445487976, - "eval_rewards/chosen": -0.9417080879211426, - "eval_rewards/margins": 0.06478659808635712, - "eval_rewards/rejected": -1.0064946413040161, - "eval_rewards/safe_rewards": -0.9369282722473145, - "eval_rewards/unsafe_rewards": -0.9372634291648865, - "eval_runtime": 993.1393, - "eval_samples_per_second": 33.272, - "eval_steps_per_second": 1.04, - "step": 1800 + "learning_rate": 1.7567946514721322e-09, + "logits/chosen": 0.6444328427314758, + "logits/rejected": 1.0208208560943604, + "logps/chosen": -269.35577392578125, + "logps/rejected": -271.528564453125, + "loss": 5601.7539, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.6750708818435669, + "rewards/margins": 0.19110876321792603, + "rewards/rejected": -0.8661795854568481, + "rewards/safe_rewards": -0.6811034679412842, + "rewards/unsafe_rewards": -0.7294248342514038, + "step": 1820 }, { "epoch": 0.97, - "learning_rate": 1.0160738724809548e-09, - "logits/chosen": -2.1704697608947754, - "logits/rejected": -1.9731838703155518, - "logps/chosen": -259.1123046875, - "logps/rejected": -257.42254638671875, - "loss": 10364.2625, + "learning_rate": 1.2511094569571668e-09, + "logits/chosen": 0.3397526741027832, + "logits/rejected": 1.0616391897201538, + "logps/chosen": -257.86822509765625, + "logps/rejected": -244.8105926513672, + "loss": 5620.3375, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.631868302822113, + "rewards/margins": 0.2000071257352829, + "rewards/rejected": -0.8318754434585571, + "rewards/safe_rewards": -0.5972138047218323, + "rewards/unsafe_rewards": -0.6459835171699524, + "step": 1830 + }, + { + "epoch": 0.98, + "learning_rate": 8.308736707954289e-10, + "logits/chosen": 0.518609881401062, + "logits/rejected": 1.1488319635391235, + "logps/chosen": -273.81390380859375, + "logps/rejected": -240.91372680664062, + "loss": 5548.0289, "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.6492170691490173, - "rewards/margins": 0.19766607880592346, - "rewards/rejected": -0.8468831777572632, - "rewards/safe_rewards": -0.6583928465843201, - "rewards/unsafe_rewards": -0.6400412321090698, - "step": 1810 + "rewards/chosen": -0.6856581568717957, + "rewards/margins": 0.2014351636171341, + "rewards/rejected": -0.8870933651924133, + "rewards/safe_rewards": -0.6684737205505371, + "rewards/unsafe_rewards": -0.694146990776062, + "step": 1840 }, { "epoch": 0.98, - "learning_rate": 6.369713474366212e-10, - "logits/chosen": -2.1475830078125, - "logits/rejected": -1.9969937801361084, - "logps/chosen": -294.9414367675781, - "logps/rejected": -288.337890625, - "loss": 10068.2195, + "learning_rate": 4.962316510149222e-10, + "logits/chosen": 0.3395392894744873, + "logits/rejected": 1.0089718103408813, + "logps/chosen": -252.1464080810547, + "logps/rejected": -241.22982788085938, + "loss": 5356.7621, "rewards/accuracies": 0.6875, - "rewards/chosen": -0.6980792880058289, - "rewards/margins": 0.2098824679851532, - "rewards/rejected": -0.9079617261886597, - "rewards/safe_rewards": -0.7020989656448364, - "rewards/unsafe_rewards": -0.6940596699714661, - "step": 1820 + "rewards/chosen": -0.6337156295776367, + "rewards/margins": 0.2152295857667923, + "rewards/rejected": -0.8489452600479126, + "rewards/safe_rewards": -0.6431758403778076, + "rewards/unsafe_rewards": -0.6494039297103882, + "step": 1850 }, { - "epoch": 0.98, - "learning_rate": 3.459020218731512e-10, - "logits/chosen": -2.1376261711120605, - "logits/rejected": -2.0157809257507324, - "logps/chosen": -254.5150604248047, - "logps/rejected": -243.1461181640625, - "loss": 10131.9508, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.6527233719825745, - "rewards/margins": 0.2155275046825409, - "rewards/rejected": -0.8682507276535034, - "rewards/safe_rewards": -0.6437439322471619, - "rewards/unsafe_rewards": -0.6617026329040527, - "step": 1830 + "epoch": 0.99, + "learning_rate": 2.4729835275189016e-10, + "logits/chosen": 0.5798267722129822, + "logits/rejected": 0.9745955467224121, + "logps/chosen": -243.1245574951172, + "logps/rejected": -238.126220703125, + "loss": 5836.127, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.6284788846969604, + "rewards/margins": 0.2039627581834793, + "rewards/rejected": -0.8324416279792786, + "rewards/safe_rewards": -0.5914771556854248, + "rewards/unsafe_rewards": -0.6241937279701233, + "step": 1860 }, { "epoch": 0.99, - "learning_rate": 1.429686526593088e-10, - "logits/chosen": -2.116459608078003, - "logits/rejected": -1.9903713464736938, - "logps/chosen": -270.08428955078125, - "logps/rejected": -265.38336181640625, - "loss": 11322.0812, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.6661794185638428, - "rewards/margins": 0.18438491225242615, - "rewards/rejected": -0.8505643606185913, - "rewards/safe_rewards": -0.6831308603286743, - "rewards/unsafe_rewards": -0.6492279767990112, - "step": 1840 + "learning_rate": 8.415928876176482e-11, + "logits/chosen": 0.4843016564846039, + "logits/rejected": 0.8851835131645203, + "logps/chosen": -258.23773193359375, + "logps/rejected": -251.73001098632812, + "loss": 6036.282, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.6951759457588196, + "rewards/margins": 0.1390235722064972, + "rewards/rejected": -0.8341996073722839, + "rewards/safe_rewards": -0.7087674140930176, + "rewards/unsafe_rewards": -0.712031900882721, + "step": 1870 }, { "epoch": 1.0, - "learning_rate": 2.824288182584622e-11, - "logits/chosen": -2.1975011825561523, - "logits/rejected": -2.020158290863037, - "logps/chosen": -284.1047668457031, - "logps/rejected": -259.551513671875, - "loss": 10472.5969, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.6722114682197571, - "rewards/margins": 0.17123940587043762, - "rewards/rejected": -0.8434508442878723, - "rewards/safe_rewards": -0.6627709865570068, - "rewards/unsafe_rewards": -0.6816519498825073, - "step": 1850 + "learning_rate": 6.870500044303673e-12, + "logits/chosen": 0.5293042063713074, + "logits/rejected": 0.8430191874504089, + "logps/chosen": -253.91397094726562, + "logps/rejected": -270.7514953613281, + "loss": 5497.6977, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.5842832326889038, + "rewards/margins": 0.209587961435318, + "rewards/rejected": -0.7938712239265442, + "rewards/safe_rewards": -0.6020101308822632, + "rewards/unsafe_rewards": -0.6186091303825378, + "step": 1880 }, { "epoch": 1.0, - "step": 1858, + "step": 1884, "total_flos": 0.0, - "train_loss": 11692.422362755651, - "train_runtime": 23508.9776, - "train_samples_per_second": 2.53, - "train_steps_per_second": 0.079 + "train_loss": 5859.617769083399, + "train_runtime": 32772.3871, + "train_samples_per_second": 3.68, + "train_steps_per_second": 0.057 } ], "logging_steps": 10, - "max_steps": 1858, + "max_steps": 1884, "num_input_tokens_seen": 0, "num_train_epochs": 1, - "save_steps": 300, + "save_steps": 500, "total_flos": 0.0, - "train_batch_size": 2, + "train_batch_size": 4, "trial_name": null, "trial_params": null }