{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9996020692399522, "eval_steps": 500, "global_step": 1884, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.645502645502645e-09, "logits/chosen": -1.8052858114242554, "logits/rejected": -1.8250553607940674, "logps/chosen": -201.6904296875, "logps/rejected": -206.93157958984375, "loss": 7734.375, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "rewards/safe_rewards": 0.0, "rewards/unsafe_rewards": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 2.6455026455026453e-08, "logits/chosen": -2.025691032409668, "logits/rejected": -1.8649556636810303, "logps/chosen": -270.43963623046875, "logps/rejected": -169.98423767089844, "loss": 7727.0087, "rewards/accuracies": 0.4027777910232544, "rewards/chosen": 4.114356852369383e-05, "rewards/margins": -0.0002653732954058796, "rewards/rejected": 0.00030651676934212446, "rewards/safe_rewards": -1.17086410682532e-05, "rewards/unsafe_rewards": -0.0006500756135210395, "step": 10 }, { "epoch": 0.01, "learning_rate": 5.2910052910052905e-08, "logits/chosen": -1.961146593093872, "logits/rejected": -1.873740553855896, "logps/chosen": -189.17404174804688, "logps/rejected": -176.31651306152344, "loss": 7718.007, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -6.340327672660351e-06, "rewards/margins": -0.00010152898175874725, "rewards/rejected": 9.518869046587497e-05, "rewards/safe_rewards": 0.00045737033360637724, "rewards/unsafe_rewards": -8.718876051716506e-05, "step": 20 }, { "epoch": 0.02, "learning_rate": 7.936507936507936e-08, "logits/chosen": -1.9912703037261963, "logits/rejected": -1.883933424949646, "logps/chosen": -198.4538116455078, "logps/rejected": -183.28781127929688, "loss": 7515.9359, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0001133469631895423, "rewards/margins": 0.0007399408495984972, "rewards/rejected": -0.0006265938864089549, "rewards/safe_rewards": 0.00022509883274324238, "rewards/unsafe_rewards": 0.0002071214112220332, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.0582010582010581e-07, "logits/chosen": -1.927167534828186, "logits/rejected": -1.8453724384307861, "logps/chosen": -198.85276794433594, "logps/rejected": -174.22967529296875, "loss": 7334.5094, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.00027468582266010344, "rewards/margins": 0.0014765586238354445, "rewards/rejected": -0.0012018729466944933, "rewards/safe_rewards": 0.0002533269871491939, "rewards/unsafe_rewards": 0.00015336349315475672, "step": 40 }, { "epoch": 0.03, "learning_rate": 1.3227513227513225e-07, "logits/chosen": -2.037893533706665, "logits/rejected": -1.8426322937011719, "logps/chosen": -214.9281463623047, "logps/rejected": -162.3707733154297, "loss": 7399.5859, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": 0.0017435807967558503, "rewards/margins": 0.001902301562950015, "rewards/rejected": -0.00015872062067501247, "rewards/safe_rewards": 0.002309921896085143, "rewards/unsafe_rewards": 0.00044932105811312795, "step": 50 }, { "epoch": 0.03, "learning_rate": 1.5873015873015872e-07, "logits/chosen": -2.011747360229492, "logits/rejected": -1.8823707103729248, "logps/chosen": -182.73411560058594, "logps/rejected": -155.423095703125, "loss": 7214.4602, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.0006955948774702847, "rewards/margins": 0.005063413176685572, "rewards/rejected": -0.0057590072974562645, "rewards/safe_rewards": -0.0021988481748849154, "rewards/unsafe_rewards": 0.0001153635821538046, "step": 60 }, { "epoch": 0.04, "learning_rate": 1.8518518518518516e-07, "logits/chosen": -1.975612998008728, "logits/rejected": -1.8158948421478271, "logps/chosen": -186.48574829101562, "logps/rejected": -168.57896423339844, "loss": 7816.8766, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.007440758403390646, "rewards/margins": 0.010602862574160099, "rewards/rejected": -0.018043622374534607, "rewards/safe_rewards": -0.010516250506043434, "rewards/unsafe_rewards": -0.015666166320443153, "step": 70 }, { "epoch": 0.04, "learning_rate": 2.1164021164021162e-07, "logits/chosen": -1.9063125848770142, "logits/rejected": -1.7897474765777588, "logps/chosen": -210.2836151123047, "logps/rejected": -180.822998046875, "loss": 7304.9531, "rewards/accuracies": 0.625, "rewards/chosen": -0.024481967091560364, "rewards/margins": 0.016244709491729736, "rewards/rejected": -0.0407266803085804, "rewards/safe_rewards": -0.02365388534963131, "rewards/unsafe_rewards": -0.0289783775806427, "step": 80 }, { "epoch": 0.05, "learning_rate": 2.3809523809523806e-07, "logits/chosen": -1.994605302810669, "logits/rejected": -1.866681694984436, "logps/chosen": -203.6532440185547, "logps/rejected": -174.1517791748047, "loss": 7251.9984, "rewards/accuracies": 0.625, "rewards/chosen": -0.06749475002288818, "rewards/margins": 0.020768558606505394, "rewards/rejected": -0.08826331794261932, "rewards/safe_rewards": -0.06556878238916397, "rewards/unsafe_rewards": -0.052192188799381256, "step": 90 }, { "epoch": 0.05, "learning_rate": 2.645502645502645e-07, "logits/chosen": -1.9495357275009155, "logits/rejected": -1.8006837368011475, "logps/chosen": -205.99411010742188, "logps/rejected": -192.54415893554688, "loss": 6776.1008, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11886356770992279, "rewards/margins": 0.020749244838953018, "rewards/rejected": -0.1396128088235855, "rewards/safe_rewards": -0.11704058945178986, "rewards/unsafe_rewards": -0.1348837912082672, "step": 100 }, { "epoch": 0.06, "learning_rate": 2.9100529100529097e-07, "logits/chosen": -1.9887052774429321, "logits/rejected": -1.8671073913574219, "logps/chosen": -226.98001098632812, "logps/rejected": -217.73733520507812, "loss": 6636.9766, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.11880362033843994, "rewards/margins": 0.03935481607913971, "rewards/rejected": -0.15815845131874084, "rewards/safe_rewards": -0.14540424942970276, "rewards/unsafe_rewards": -0.11240017414093018, "step": 110 }, { "epoch": 0.06, "learning_rate": 3.1746031746031743e-07, "logits/chosen": -1.8841511011123657, "logits/rejected": -1.6952005624771118, "logps/chosen": -235.6121368408203, "logps/rejected": -192.76162719726562, "loss": 6804.4828, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.1285235583782196, "rewards/margins": 0.07450314611196518, "rewards/rejected": -0.20302672684192657, "rewards/safe_rewards": -0.12894900143146515, "rewards/unsafe_rewards": -0.12272067368030548, "step": 120 }, { "epoch": 0.07, "learning_rate": 3.439153439153439e-07, "logits/chosen": -1.8711330890655518, "logits/rejected": -1.6887938976287842, "logps/chosen": -225.3953094482422, "logps/rejected": -200.31997680664062, "loss": 7036.6016, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.11849894374608994, "rewards/margins": 0.05801115185022354, "rewards/rejected": -0.17651011049747467, "rewards/safe_rewards": -0.10611984878778458, "rewards/unsafe_rewards": -0.14429841935634613, "step": 130 }, { "epoch": 0.07, "learning_rate": 3.703703703703703e-07, "logits/chosen": -1.826206922531128, "logits/rejected": -1.6439968347549438, "logps/chosen": -220.1838836669922, "logps/rejected": -185.7141876220703, "loss": 6936.9914, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11376659572124481, "rewards/margins": 0.0765247792005539, "rewards/rejected": -0.1902913898229599, "rewards/safe_rewards": -0.11482509225606918, "rewards/unsafe_rewards": -0.09925278276205063, "step": 140 }, { "epoch": 0.08, "learning_rate": 3.968253968253968e-07, "logits/chosen": -1.7187334299087524, "logits/rejected": -1.5741361379623413, "logps/chosen": -211.09603881835938, "logps/rejected": -203.66156005859375, "loss": 6555.6867, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.19104455411434174, "rewards/margins": 0.06891994178295135, "rewards/rejected": -0.2599644958972931, "rewards/safe_rewards": -0.20118245482444763, "rewards/unsafe_rewards": -0.16981182992458344, "step": 150 }, { "epoch": 0.08, "learning_rate": 4.2328042328042324e-07, "logits/chosen": -1.7090606689453125, "logits/rejected": -1.4574247598648071, "logps/chosen": -231.1162567138672, "logps/rejected": -197.13832092285156, "loss": 6483.332, "rewards/accuracies": 0.65625, "rewards/chosen": -0.2999975085258484, "rewards/margins": 0.08841492235660553, "rewards/rejected": -0.3884124159812927, "rewards/safe_rewards": -0.2963607907295227, "rewards/unsafe_rewards": -0.2815978527069092, "step": 160 }, { "epoch": 0.09, "learning_rate": 4.497354497354497e-07, "logits/chosen": -1.7472738027572632, "logits/rejected": -1.5065333843231201, "logps/chosen": -255.1507110595703, "logps/rejected": -221.82241821289062, "loss": 6801.5375, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23129959404468536, "rewards/margins": 0.12043756246566772, "rewards/rejected": -0.35173720121383667, "rewards/safe_rewards": -0.22959312796592712, "rewards/unsafe_rewards": -0.1985938847064972, "step": 170 }, { "epoch": 0.1, "learning_rate": 4.761904761904761e-07, "logits/chosen": -1.680676817893982, "logits/rejected": -1.4166452884674072, "logps/chosen": -216.8690948486328, "logps/rejected": -191.8008270263672, "loss": 6535.7055, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.26913732290267944, "rewards/margins": 0.11233188211917877, "rewards/rejected": -0.381469190120697, "rewards/safe_rewards": -0.26176974177360535, "rewards/unsafe_rewards": -0.23940448462963104, "step": 180 }, { "epoch": 0.1, "learning_rate": 4.999995705919032e-07, "logits/chosen": -1.5433807373046875, "logits/rejected": -1.2667306661605835, "logps/chosen": -224.0026397705078, "logps/rejected": -205.34414672851562, "loss": 6409.0121, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19693121314048767, "rewards/margins": 0.09455744177103043, "rewards/rejected": -0.2914886772632599, "rewards/safe_rewards": -0.17649488151073456, "rewards/unsafe_rewards": -0.18380855023860931, "step": 190 }, { "epoch": 0.11, "learning_rate": 4.999480434051858e-07, "logits/chosen": -1.5521910190582275, "logits/rejected": -1.3097938299179077, "logps/chosen": -225.257568359375, "logps/rejected": -205.92129516601562, "loss": 6576.5188, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1997550129890442, "rewards/margins": 0.0904761329293251, "rewards/rejected": -0.2902311384677887, "rewards/safe_rewards": -0.20136451721191406, "rewards/unsafe_rewards": -0.21680407226085663, "step": 200 }, { "epoch": 0.11, "learning_rate": 4.998106548810311e-07, "logits/chosen": -1.3539698123931885, "logits/rejected": -1.2038872241973877, "logps/chosen": -212.8267364501953, "logps/rejected": -220.0903778076172, "loss": 6444.5828, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2437468022108078, "rewards/margins": 0.14799915254116058, "rewards/rejected": -0.3917458951473236, "rewards/safe_rewards": -0.2773512601852417, "rewards/unsafe_rewards": -0.2216939926147461, "step": 210 }, { "epoch": 0.12, "learning_rate": 4.995874522146975e-07, "logits/chosen": -1.503328561782837, "logits/rejected": -1.3146250247955322, "logps/chosen": -236.4509735107422, "logps/rejected": -211.6634063720703, "loss": 6233.5547, "rewards/accuracies": 0.65625, "rewards/chosen": -0.29747992753982544, "rewards/margins": 0.13039958477020264, "rewards/rejected": -0.4278795123100281, "rewards/safe_rewards": -0.2768808901309967, "rewards/unsafe_rewards": -0.3182833790779114, "step": 220 }, { "epoch": 0.12, "learning_rate": 4.992785120800375e-07, "logits/chosen": -1.576887845993042, "logits/rejected": -1.2664101123809814, "logps/chosen": -237.9243621826172, "logps/rejected": -213.4459991455078, "loss": 6108.0914, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23068375885486603, "rewards/margins": 0.14957153797149658, "rewards/rejected": -0.3802553117275238, "rewards/safe_rewards": -0.22292426228523254, "rewards/unsafe_rewards": -0.18162095546722412, "step": 230 }, { "epoch": 0.13, "learning_rate": 4.988839406031596e-07, "logits/chosen": -1.515092134475708, "logits/rejected": -1.2886550426483154, "logps/chosen": -223.7300567626953, "logps/rejected": -192.06324768066406, "loss": 6310.6699, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.24790284037590027, "rewards/margins": 0.1096932515501976, "rewards/rejected": -0.3575960695743561, "rewards/safe_rewards": -0.2673969864845276, "rewards/unsafe_rewards": -0.24145250022411346, "step": 240 }, { "epoch": 0.13, "learning_rate": 4.98403873325972e-07, "logits/chosen": -1.5146888494491577, "logits/rejected": -1.3244738578796387, "logps/chosen": -213.21694946289062, "logps/rejected": -209.35061645507812, "loss": 6209.5707, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2071472406387329, "rewards/margins": 0.16860046982765198, "rewards/rejected": -0.3757476806640625, "rewards/safe_rewards": -0.1998087763786316, "rewards/unsafe_rewards": -0.20211009681224823, "step": 250 }, { "epoch": 0.14, "learning_rate": 4.978384751596212e-07, "logits/chosen": -1.3180285692214966, "logits/rejected": -1.1171799898147583, "logps/chosen": -232.109375, "logps/rejected": -236.84072875976562, "loss": 6328.7531, "rewards/accuracies": 0.625, "rewards/chosen": -0.32092350721359253, "rewards/margins": 0.17156612873077393, "rewards/rejected": -0.49248963594436646, "rewards/safe_rewards": -0.4227983355522156, "rewards/unsafe_rewards": -0.3325851559638977, "step": 260 }, { "epoch": 0.14, "learning_rate": 4.971879403278432e-07, "logits/chosen": -1.1372450590133667, "logits/rejected": -0.9446180462837219, "logps/chosen": -234.88888549804688, "logps/rejected": -224.05886840820312, "loss": 6312.1719, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.29563266038894653, "rewards/margins": 0.12811212241649628, "rewards/rejected": -0.4237447679042816, "rewards/safe_rewards": -0.33217892050743103, "rewards/unsafe_rewards": -0.27307888865470886, "step": 270 }, { "epoch": 0.15, "learning_rate": 4.964524923002436e-07, "logits/chosen": -1.415801763534546, "logits/rejected": -1.1731336116790771, "logps/chosen": -241.7359619140625, "logps/rejected": -224.5096893310547, "loss": 5974.0195, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.3053835928440094, "rewards/margins": 0.16657045483589172, "rewards/rejected": -0.4719540476799011, "rewards/safe_rewards": -0.3295218348503113, "rewards/unsafe_rewards": -0.30390697717666626, "step": 280 }, { "epoch": 0.15, "learning_rate": 4.956323837155325e-07, "logits/chosen": -1.2966214418411255, "logits/rejected": -1.1260521411895752, "logps/chosen": -227.2568359375, "logps/rejected": -214.1421661376953, "loss": 6133.0227, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.261239230632782, "rewards/margins": 0.15825437009334564, "rewards/rejected": -0.4194936156272888, "rewards/safe_rewards": -0.2375851422548294, "rewards/unsafe_rewards": -0.2705303132534027, "step": 290 }, { "epoch": 0.16, "learning_rate": 4.947278962947386e-07, "logits/chosen": -1.255904197692871, "logits/rejected": -1.0300556421279907, "logps/chosen": -231.86593627929688, "logps/rejected": -213.03768920898438, "loss": 5684.9316, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30576351284980774, "rewards/margins": 0.1560250222682953, "rewards/rejected": -0.4617885649204254, "rewards/safe_rewards": -0.3117372691631317, "rewards/unsafe_rewards": -0.30344492197036743, "step": 300 }, { "epoch": 0.16, "learning_rate": 4.937393407444337e-07, "logits/chosen": -1.1847805976867676, "logits/rejected": -0.8935750722885132, "logps/chosen": -235.5170135498047, "logps/rejected": -226.17910766601562, "loss": 5606.7586, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.4436865746974945, "rewards/margins": 0.12356774508953094, "rewards/rejected": -0.5672543048858643, "rewards/safe_rewards": -0.4222384989261627, "rewards/unsafe_rewards": -0.49501723051071167, "step": 310 }, { "epoch": 0.17, "learning_rate": 4.926670566499992e-07, "logits/chosen": -0.6831132173538208, "logits/rejected": -0.43409886956214905, "logps/chosen": -230.1105499267578, "logps/rejected": -223.13021850585938, "loss": 6029.3086, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.4783251881599426, "rewards/margins": 0.13184307515621185, "rewards/rejected": -0.6101682782173157, "rewards/safe_rewards": -0.46370235085487366, "rewards/unsafe_rewards": -0.4838125705718994, "step": 320 }, { "epoch": 0.18, "learning_rate": 4.915114123589732e-07, "logits/chosen": -0.5296390652656555, "logits/rejected": -0.23315271735191345, "logps/chosen": -264.1290588378906, "logps/rejected": -222.7255401611328, "loss": 6587.2148, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.49660125374794006, "rewards/margins": 0.1269882619380951, "rewards/rejected": -0.6235895156860352, "rewards/safe_rewards": -0.5574027299880981, "rewards/unsafe_rewards": -0.5570284128189087, "step": 330 }, { "epoch": 0.18, "learning_rate": 4.90272804854517e-07, "logits/chosen": -0.20833459496498108, "logits/rejected": 0.08662636578083038, "logps/chosen": -271.68389892578125, "logps/rejected": -259.1782531738281, "loss": 6224.5324, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5533224940299988, "rewards/margins": 0.15772438049316406, "rewards/rejected": -0.7110469341278076, "rewards/safe_rewards": -0.5448375940322876, "rewards/unsafe_rewards": -0.5393844842910767, "step": 340 }, { "epoch": 0.19, "learning_rate": 4.889516596190448e-07, "logits/chosen": -0.7373126149177551, "logits/rejected": -0.34005147218704224, "logps/chosen": -293.0935363769531, "logps/rejected": -241.9617156982422, "loss": 6110.7906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5001389980316162, "rewards/margins": 0.1725221574306488, "rewards/rejected": -0.6726611852645874, "rewards/safe_rewards": -0.4835886061191559, "rewards/unsafe_rewards": -0.5382236838340759, "step": 350 }, { "epoch": 0.19, "learning_rate": 4.875484304880629e-07, "logits/chosen": -0.8152839541435242, "logits/rejected": -0.4126107096672058, "logps/chosen": -302.5885314941406, "logps/rejected": -256.1798095703125, "loss": 6488.7234, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.48745980858802795, "rewards/margins": 0.10641022026538849, "rewards/rejected": -0.5938700437545776, "rewards/safe_rewards": -0.449713796377182, "rewards/unsafe_rewards": -0.48859700560569763, "step": 360 }, { "epoch": 0.2, "learning_rate": 4.860635994942702e-07, "logits/chosen": -0.47416171431541443, "logits/rejected": 0.00913926400244236, "logps/chosen": -258.38189697265625, "logps/rejected": -230.67880249023438, "loss": 5790.3816, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.5084312558174133, "rewards/margins": 0.1444414108991623, "rewards/rejected": -0.6528726816177368, "rewards/safe_rewards": -0.5270028114318848, "rewards/unsafe_rewards": -0.48991069197654724, "step": 370 }, { "epoch": 0.2, "learning_rate": 4.844976767019714e-07, "logits/chosen": -0.19216355681419373, "logits/rejected": 0.15172423422336578, "logps/chosen": -222.911865234375, "logps/rejected": -202.00888061523438, "loss": 5908.2133, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5394010543823242, "rewards/margins": 0.11715151369571686, "rewards/rejected": -0.6565525531768799, "rewards/safe_rewards": -0.5183984041213989, "rewards/unsafe_rewards": -0.5164821743965149, "step": 380 }, { "epoch": 0.21, "learning_rate": 4.828512000318616e-07, "logits/chosen": -0.213291734457016, "logits/rejected": 0.39291974902153015, "logps/chosen": -303.5594177246094, "logps/rejected": -259.14178466796875, "loss": 6109.6039, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5700324177742004, "rewards/margins": 0.1927037090063095, "rewards/rejected": -0.7627362012863159, "rewards/safe_rewards": -0.5912032723426819, "rewards/unsafe_rewards": -0.5395609140396118, "step": 390 }, { "epoch": 0.21, "learning_rate": 4.811247350762418e-07, "logits/chosen": -0.36068278551101685, "logits/rejected": 0.05598723143339157, "logps/chosen": -240.6222381591797, "logps/rejected": -234.20803833007812, "loss": 5907.1703, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.554689347743988, "rewards/margins": 0.17352624237537384, "rewards/rejected": -0.7282156348228455, "rewards/safe_rewards": -0.5173069834709167, "rewards/unsafe_rewards": -0.5826700329780579, "step": 400 }, { "epoch": 0.22, "learning_rate": 4.79318874904728e-07, "logits/chosen": -0.5469863414764404, "logits/rejected": -0.3919845223426819, "logps/chosen": -267.99761962890625, "logps/rejected": -260.9379577636719, "loss": 6323.5375, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5513988137245178, "rewards/margins": 0.16061297059059143, "rewards/rejected": -0.7120116949081421, "rewards/safe_rewards": -0.5992297530174255, "rewards/unsafe_rewards": -0.5494996309280396, "step": 410 }, { "epoch": 0.22, "learning_rate": 4.774342398605221e-07, "logits/chosen": -1.3936598300933838, "logits/rejected": -1.0238125324249268, "logps/chosen": -262.09033203125, "logps/rejected": -221.07174682617188, "loss": 5492.8094, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5323154926300049, "rewards/margins": 0.15208503603935242, "rewards/rejected": -0.6844004988670349, "rewards/safe_rewards": -0.5349102020263672, "rewards/unsafe_rewards": -0.505738377571106, "step": 420 }, { "epoch": 0.23, "learning_rate": 4.754714773473134e-07, "logits/chosen": -1.2268015146255493, "logits/rejected": -1.0391647815704346, "logps/chosen": -248.2527313232422, "logps/rejected": -258.4667663574219, "loss": 6146.5922, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.5346105694770813, "rewards/margins": 0.18027544021606445, "rewards/rejected": -0.7148860692977905, "rewards/safe_rewards": -0.4759598672389984, "rewards/unsafe_rewards": -0.534007728099823, "step": 430 }, { "epoch": 0.23, "learning_rate": 4.734312616068851e-07, "logits/chosen": -1.2311909198760986, "logits/rejected": -0.9865934252738953, "logps/chosen": -214.25851440429688, "logps/rejected": -198.68943786621094, "loss": 5944.2828, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.3851444125175476, "rewards/margins": 0.0964752659201622, "rewards/rejected": -0.481619656085968, "rewards/safe_rewards": -0.40014153718948364, "rewards/unsafe_rewards": -0.4206266403198242, "step": 440 }, { "epoch": 0.24, "learning_rate": 4.713142934875005e-07, "logits/chosen": -0.7530995607376099, "logits/rejected": -0.348047137260437, "logps/chosen": -273.5533447265625, "logps/rejected": -247.33377075195312, "loss": 6019.3629, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4809795916080475, "rewards/margins": 0.16457389295101166, "rewards/rejected": -0.645553469657898, "rewards/safe_rewards": -0.4939555525779724, "rewards/unsafe_rewards": -0.51116544008255, "step": 450 }, { "epoch": 0.24, "learning_rate": 4.6912130020314996e-07, "logits/chosen": 0.18566010892391205, "logits/rejected": 0.4161214232444763, "logps/chosen": -233.847900390625, "logps/rejected": -238.5542755126953, "loss": 5555.243, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6200246810913086, "rewards/margins": 0.13345691561698914, "rewards/rejected": -0.7534815073013306, "rewards/safe_rewards": -0.6095362901687622, "rewards/unsafe_rewards": -0.6309984922409058, "step": 460 }, { "epoch": 0.25, "learning_rate": 4.668530350837408e-07, "logits/chosen": 0.024336492642760277, "logits/rejected": 0.4952603876590729, "logps/chosen": -259.33697509765625, "logps/rejected": -254.6613006591797, "loss": 5726.7293, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5721555948257446, "rewards/margins": 0.12051858007907867, "rewards/rejected": -0.6926741600036621, "rewards/safe_rewards": -0.5316283702850342, "rewards/unsafe_rewards": -0.5645433664321899, "step": 470 }, { "epoch": 0.25, "learning_rate": 4.64510277316316e-07, "logits/chosen": -0.0006995767471380532, "logits/rejected": 0.4036879539489746, "logps/chosen": -269.50482177734375, "logps/rejected": -248.73434448242188, "loss": 6012.2914, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5171098113059998, "rewards/margins": 0.20941033959388733, "rewards/rejected": -0.7265201807022095, "rewards/safe_rewards": -0.5066377520561218, "rewards/unsafe_rewards": -0.4963339865207672, "step": 480 }, { "epoch": 0.26, "learning_rate": 4.6209383167739015e-07, "logits/chosen": -0.8723047971725464, "logits/rejected": -0.47492194175720215, "logps/chosen": -239.2227020263672, "logps/rejected": -223.37191772460938, "loss": 6090.4563, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39161261916160583, "rewards/margins": 0.16117171943187714, "rewards/rejected": -0.5527843832969666, "rewards/safe_rewards": -0.4009205400943756, "rewards/unsafe_rewards": -0.4027668535709381, "step": 490 }, { "epoch": 0.27, "learning_rate": 4.5960452825649526e-07, "logits/chosen": -0.8613616228103638, "logits/rejected": -0.5483921766281128, "logps/chosen": -252.01095581054688, "logps/rejected": -236.2162628173828, "loss": 5410.1973, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.4818722605705261, "rewards/margins": 0.12459783256053925, "rewards/rejected": -0.606469988822937, "rewards/safe_rewards": -0.4409845769405365, "rewards/unsafe_rewards": -0.48863571882247925, "step": 500 }, { "epoch": 0.27, "eval_logits/chosen": -0.00993373803794384, "eval_logits/rejected": 0.6948209404945374, "eval_logps/chosen": -205.43228149414062, "eval_logps/rejected": -177.0600128173828, "eval_loss": 4657.333984375, "eval_rewards/accuracies": 0.6367472410202026, "eval_rewards/chosen": -0.6508274078369141, "eval_rewards/margins": 0.09844248741865158, "eval_rewards/rejected": -0.749269962310791, "eval_rewards/safe_rewards": -0.6381882429122925, "eval_rewards/unsafe_rewards": -0.6354333162307739, "eval_runtime": 2355.0926, "eval_samples_per_second": 14.88, "eval_steps_per_second": 0.465, "step": 500 }, { "epoch": 0.27, "learning_rate": 4.570432221710314e-07, "logits/chosen": -0.2417004406452179, "logits/rejected": 0.17007017135620117, "logps/chosen": -273.1074523925781, "logps/rejected": -236.8904266357422, "loss": 6244.0367, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.5197592973709106, "rewards/margins": 0.19909226894378662, "rewards/rejected": -0.7188515067100525, "rewards/safe_rewards": -0.6001642942428589, "rewards/unsafe_rewards": -0.5492387413978577, "step": 510 }, { "epoch": 0.28, "learning_rate": 4.5441079327251927e-07, "logits/chosen": -0.3826223909854889, "logits/rejected": 0.10965192317962646, "logps/chosen": -261.4352722167969, "logps/rejected": -251.9311065673828, "loss": 5649.8195, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.49133262038230896, "rewards/margins": 0.11736941337585449, "rewards/rejected": -0.6087020635604858, "rewards/safe_rewards": -0.4915240406990051, "rewards/unsafe_rewards": -0.4991859793663025, "step": 520 }, { "epoch": 0.28, "learning_rate": 4.5170814584435644e-07, "logits/chosen": -0.1299566924571991, "logits/rejected": 0.30430150032043457, "logps/chosen": -281.5189514160156, "logps/rejected": -248.9510040283203, "loss": 6070.9859, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5048553347587585, "rewards/margins": 0.17633280158042908, "rewards/rejected": -0.6811882257461548, "rewards/safe_rewards": -0.45997923612594604, "rewards/unsafe_rewards": -0.5042248964309692, "step": 530 }, { "epoch": 0.29, "learning_rate": 4.4893620829118124e-07, "logits/chosen": 0.41155165433883667, "logits/rejected": 0.7351133227348328, "logps/chosen": -218.6739959716797, "logps/rejected": -222.22238159179688, "loss": 5773.9555, "rewards/accuracies": 0.65625, "rewards/chosen": -0.5764225721359253, "rewards/margins": 0.17755261063575745, "rewards/rejected": -0.7539752125740051, "rewards/safe_rewards": -0.5707100033760071, "rewards/unsafe_rewards": -0.5930426716804504, "step": 540 }, { "epoch": 0.29, "learning_rate": 4.460959328199497e-07, "logits/chosen": 0.4961000382900238, "logits/rejected": 0.9081694483757019, "logps/chosen": -256.54791259765625, "logps/rejected": -277.130126953125, "loss": 6108.098, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6318496465682983, "rewards/margins": 0.2199208289384842, "rewards/rejected": -0.8517705202102661, "rewards/safe_rewards": -0.6448063850402832, "rewards/unsafe_rewards": -0.5973528623580933, "step": 550 }, { "epoch": 0.3, "learning_rate": 4.4318829511283707e-07, "logits/chosen": 0.23597554862499237, "logits/rejected": 0.5608280301094055, "logps/chosen": -262.15960693359375, "logps/rejected": -276.5953369140625, "loss": 6017.0984, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.7231947183609009, "rewards/margins": 0.16650545597076416, "rewards/rejected": -0.8897002339363098, "rewards/safe_rewards": -0.7144005298614502, "rewards/unsafe_rewards": -0.6883742213249207, "step": 560 }, { "epoch": 0.3, "learning_rate": 4.40214293992074e-07, "logits/chosen": 0.30961090326309204, "logits/rejected": 0.6938155889511108, "logps/chosen": -267.58404541015625, "logps/rejected": -252.78311157226562, "loss": 6321.9309, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.5275936722755432, "rewards/margins": 0.20575468242168427, "rewards/rejected": -0.7333483099937439, "rewards/safe_rewards": -0.5182517766952515, "rewards/unsafe_rewards": -0.5568464994430542, "step": 570 }, { "epoch": 0.31, "learning_rate": 4.3717495107683516e-07, "logits/chosen": 0.2671489417552948, "logits/rejected": 0.9092152714729309, "logps/chosen": -250.55960083007812, "logps/rejected": -235.89840698242188, "loss": 5574.8402, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.5318346619606018, "rewards/margins": 0.18946382403373718, "rewards/rejected": -0.7212985157966614, "rewards/safe_rewards": -0.5447245836257935, "rewards/unsafe_rewards": -0.5725606083869934, "step": 580 }, { "epoch": 0.31, "learning_rate": 4.340713104322953e-07, "logits/chosen": 0.01171237975358963, "logits/rejected": 0.4629115164279938, "logps/chosen": -265.1495056152344, "logps/rejected": -259.7709045410156, "loss": 5202.8691, "rewards/accuracies": 0.65625, "rewards/chosen": -0.5935125946998596, "rewards/margins": 0.18529286980628967, "rewards/rejected": -0.7788054347038269, "rewards/safe_rewards": -0.6250792741775513, "rewards/unsafe_rewards": -0.6238072514533997, "step": 590 }, { "epoch": 0.32, "learning_rate": 4.3090443821097566e-07, "logits/chosen": 0.7814422845840454, "logits/rejected": 1.1566433906555176, "logps/chosen": -278.1474609375, "logps/rejected": -280.3294677734375, "loss": 5335.1562, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6250512599945068, "rewards/margins": 0.19450877606868744, "rewards/rejected": -0.8195600509643555, "rewards/safe_rewards": -0.5736940503120422, "rewards/unsafe_rewards": -0.6311155557632446, "step": 600 }, { "epoch": 0.32, "learning_rate": 4.276754222865029e-07, "logits/chosen": 0.546709418296814, "logits/rejected": 1.5038117170333862, "logps/chosen": -284.0765075683594, "logps/rejected": -235.79367065429688, "loss": 5880.4258, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6524443626403809, "rewards/margins": 0.17251375317573547, "rewards/rejected": -0.8249581456184387, "rewards/safe_rewards": -0.6402295231819153, "rewards/unsafe_rewards": -0.6277676224708557, "step": 610 }, { "epoch": 0.33, "learning_rate": 4.2438537187990565e-07, "logits/chosen": 0.7865768671035767, "logits/rejected": 1.5061836242675781, "logps/chosen": -283.3603820800781, "logps/rejected": -251.56442260742188, "loss": 5760.8687, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.658532977104187, "rewards/margins": 0.21655750274658203, "rewards/rejected": -0.875090479850769, "rewards/safe_rewards": -0.6327935457229614, "rewards/unsafe_rewards": -0.6471335291862488, "step": 620 }, { "epoch": 0.33, "learning_rate": 4.210354171785795e-07, "logits/chosen": 0.2993673086166382, "logits/rejected": 0.7917363047599792, "logps/chosen": -272.6424865722656, "logps/rejected": -247.65853881835938, "loss": 5872.0883, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5130705833435059, "rewards/margins": 0.1547364443540573, "rewards/rejected": -0.6678069829940796, "rewards/safe_rewards": -0.5059661269187927, "rewards/unsafe_rewards": -0.5222837328910828, "step": 630 }, { "epoch": 0.34, "learning_rate": 4.1762670894804775e-07, "logits/chosen": 0.09364859014749527, "logits/rejected": 0.5361107587814331, "logps/chosen": -249.59634399414062, "logps/rejected": -237.3841094970703, "loss": 5896.1926, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.49201780557632446, "rewards/margins": 0.16005203127861023, "rewards/rejected": -0.6520698070526123, "rewards/safe_rewards": -0.549709677696228, "rewards/unsafe_rewards": -0.5637668967247009, "step": 640 }, { "epoch": 0.34, "learning_rate": 4.1416041813665493e-07, "logits/chosen": -0.5552986860275269, "logits/rejected": -0.25023895502090454, "logps/chosen": -253.50790405273438, "logps/rejected": -253.32583618164062, "loss": 5920.0328, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.47500887513160706, "rewards/margins": 0.12813320755958557, "rewards/rejected": -0.6031420826911926, "rewards/safe_rewards": -0.43845662474632263, "rewards/unsafe_rewards": -0.45656904578208923, "step": 650 }, { "epoch": 0.35, "learning_rate": 4.1063773547332584e-07, "logits/chosen": -0.46418723464012146, "logits/rejected": -0.049189966171979904, "logps/chosen": -267.15765380859375, "logps/rejected": -243.20010375976562, "loss": 6128.7578, "rewards/accuracies": 0.59375, "rewards/chosen": -0.6104855537414551, "rewards/margins": 0.10687772184610367, "rewards/rejected": -0.7173632383346558, "rewards/safe_rewards": -0.5476406216621399, "rewards/unsafe_rewards": -0.603262722492218, "step": 660 }, { "epoch": 0.36, "learning_rate": 4.0705987105853077e-07, "logits/chosen": -0.2697436213493347, "logits/rejected": 0.344801664352417, "logps/chosen": -252.3665313720703, "logps/rejected": -232.3540496826172, "loss": 5986.7625, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5879735350608826, "rewards/margins": 0.14302758872509003, "rewards/rejected": -0.731001079082489, "rewards/safe_rewards": -0.543707013130188, "rewards/unsafe_rewards": -0.5482696294784546, "step": 670 }, { "epoch": 0.36, "learning_rate": 4.034280539485952e-07, "logits/chosen": -0.36558887362480164, "logits/rejected": 0.18461750447750092, "logps/chosen": -295.22119140625, "logps/rejected": -274.0675354003906, "loss": 5383.9453, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.5177947878837585, "rewards/margins": 0.21047362685203552, "rewards/rejected": -0.7282685041427612, "rewards/safe_rewards": -0.5312758684158325, "rewards/unsafe_rewards": -0.5633383393287659, "step": 680 }, { "epoch": 0.37, "learning_rate": 3.997435317334988e-07, "logits/chosen": 0.3039137125015259, "logits/rejected": 0.7977389097213745, "logps/chosen": -279.23187255859375, "logps/rejected": -261.033935546875, "loss": 5720.7707, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5356379747390747, "rewards/margins": 0.2088995724916458, "rewards/rejected": -0.7445374131202698, "rewards/safe_rewards": -0.5458201169967651, "rewards/unsafe_rewards": -0.47182130813598633, "step": 690 }, { "epoch": 0.37, "learning_rate": 3.960075701083074e-07, "logits/chosen": 0.06580640375614166, "logits/rejected": 0.28118953108787537, "logps/chosen": -237.80581665039062, "logps/rejected": -245.47216796875, "loss": 5702.616, "rewards/accuracies": 0.65625, "rewards/chosen": -0.5484215021133423, "rewards/margins": 0.16065733134746552, "rewards/rejected": -0.709078848361969, "rewards/safe_rewards": -0.5256644487380981, "rewards/unsafe_rewards": -0.5779343247413635, "step": 700 }, { "epoch": 0.38, "learning_rate": 3.92221452438385e-07, "logits/chosen": -0.6886399388313293, "logits/rejected": -0.33862438797950745, "logps/chosen": -255.33505249023438, "logps/rejected": -234.041259765625, "loss": 5505.9277, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.5285482406616211, "rewards/margins": 0.18568384647369385, "rewards/rejected": -0.7142320871353149, "rewards/safe_rewards": -0.5484398007392883, "rewards/unsafe_rewards": -0.5874748826026917, "step": 710 }, { "epoch": 0.38, "learning_rate": 3.8838647931853684e-07, "logits/chosen": -0.7950954437255859, "logits/rejected": -0.4466307759284973, "logps/chosen": -253.4489288330078, "logps/rejected": -254.49813842773438, "loss": 6030.682, "rewards/accuracies": 0.71875, "rewards/chosen": -0.5057817697525024, "rewards/margins": 0.20095935463905334, "rewards/rejected": -0.7067410945892334, "rewards/safe_rewards": -0.5353250503540039, "rewards/unsafe_rewards": -0.4995631277561188, "step": 720 }, { "epoch": 0.39, "learning_rate": 3.845039681262332e-07, "logits/chosen": -0.5698283910751343, "logits/rejected": -0.1652621030807495, "logps/chosen": -265.46368408203125, "logps/rejected": -250.52951049804688, "loss": 5514.4148, "rewards/accuracies": 0.71875, "rewards/chosen": -0.45593494176864624, "rewards/margins": 0.1759863793849945, "rewards/rejected": -0.6319212913513184, "rewards/safe_rewards": -0.4363466799259186, "rewards/unsafe_rewards": -0.4330349862575531, "step": 730 }, { "epoch": 0.39, "learning_rate": 3.805752525690681e-07, "logits/chosen": 0.09326216578483582, "logits/rejected": 0.7224725484848022, "logps/chosen": -253.9232940673828, "logps/rejected": -268.0160217285156, "loss": 5160.3754, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6526281237602234, "rewards/margins": 0.22083961963653564, "rewards/rejected": -0.8734676241874695, "rewards/safe_rewards": -0.6421413421630859, "rewards/unsafe_rewards": -0.6364503502845764, "step": 740 }, { "epoch": 0.4, "learning_rate": 3.7660168222660824e-07, "logits/chosen": 0.43039554357528687, "logits/rejected": 0.772833526134491, "logps/chosen": -293.98541259765625, "logps/rejected": -288.250732421875, "loss": 5855.4879, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.7387111783027649, "rewards/margins": 0.16440826654434204, "rewards/rejected": -0.9031193852424622, "rewards/safe_rewards": -0.7269446849822998, "rewards/unsafe_rewards": -0.6723185777664185, "step": 750 }, { "epoch": 0.4, "learning_rate": 3.725846220867901e-07, "logits/chosen": -0.09916634857654572, "logits/rejected": 0.4922304153442383, "logps/chosen": -265.7640686035156, "logps/rejected": -243.7411346435547, "loss": 6137.0988, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.6147286295890808, "rewards/margins": 0.14420659840106964, "rewards/rejected": -0.7589352130889893, "rewards/safe_rewards": -0.6549733877182007, "rewards/unsafe_rewards": -0.6351133584976196, "step": 760 }, { "epoch": 0.41, "learning_rate": 3.6852545207702393e-07, "logits/chosen": -0.18887875974178314, "logits/rejected": 0.4651460647583008, "logps/chosen": -300.3460998535156, "logps/rejected": -247.0656280517578, "loss": 5956.6977, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.5610722899436951, "rewards/margins": 0.18032148480415344, "rewards/rejected": -0.7413938641548157, "rewards/safe_rewards": -0.5364476442337036, "rewards/unsafe_rewards": -0.5671006441116333, "step": 770 }, { "epoch": 0.41, "learning_rate": 3.6442556659016475e-07, "logits/chosen": 0.3691898286342621, "logits/rejected": 1.0192655324935913, "logps/chosen": -278.3470458984375, "logps/rejected": -240.86141967773438, "loss": 5414.8289, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.5899799466133118, "rewards/margins": 0.20228877663612366, "rewards/rejected": -0.7922687530517578, "rewards/safe_rewards": -0.5520480871200562, "rewards/unsafe_rewards": -0.5946981906890869, "step": 780 }, { "epoch": 0.42, "learning_rate": 3.602863740055161e-07, "logits/chosen": 1.002415418624878, "logits/rejected": 1.6322085857391357, "logps/chosen": -268.44488525390625, "logps/rejected": -261.2592468261719, "loss": 5358.4598, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6824139356613159, "rewards/margins": 0.22263555228710175, "rewards/rejected": -0.9050495028495789, "rewards/safe_rewards": -0.6642250418663025, "rewards/unsafe_rewards": -0.6494946479797363, "step": 790 }, { "epoch": 0.42, "learning_rate": 3.5610929620502747e-07, "logits/chosen": 0.9502559900283813, "logits/rejected": 1.4719197750091553, "logps/chosen": -271.93231201171875, "logps/rejected": -281.78125, "loss": 5792.9727, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.7460067272186279, "rewards/margins": 0.18493010103702545, "rewards/rejected": -0.9309368133544922, "rewards/safe_rewards": -0.7411947846412659, "rewards/unsafe_rewards": -0.8093317151069641, "step": 800 }, { "epoch": 0.43, "learning_rate": 3.5189576808485404e-07, "logits/chosen": 0.7791315913200378, "logits/rejected": 1.4415690898895264, "logps/chosen": -300.54150390625, "logps/rejected": -273.402587890625, "loss": 5584.2125, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.7409987449645996, "rewards/margins": 0.20648033916950226, "rewards/rejected": -0.9474791288375854, "rewards/safe_rewards": -0.726071834564209, "rewards/unsafe_rewards": -0.8359003067016602, "step": 810 }, { "epoch": 0.44, "learning_rate": 3.476472370624464e-07, "logits/chosen": 0.40392106771469116, "logits/rejected": 0.7413457632064819, "logps/chosen": -254.9908905029297, "logps/rejected": -251.4073028564453, "loss": 6101.9039, "rewards/accuracies": 0.65625, "rewards/chosen": -0.6420382261276245, "rewards/margins": 0.13990595936775208, "rewards/rejected": -0.7819441556930542, "rewards/safe_rewards": -0.5959726572036743, "rewards/unsafe_rewards": -0.6521440744400024, "step": 820 }, { "epoch": 0.44, "learning_rate": 3.43365162579338e-07, "logits/chosen": 0.11586692184209824, "logits/rejected": 0.49579864740371704, "logps/chosen": -226.8084716796875, "logps/rejected": -232.3746337890625, "loss": 5837.0383, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.551177442073822, "rewards/margins": 0.19108565151691437, "rewards/rejected": -0.7422630190849304, "rewards/safe_rewards": -0.5533746480941772, "rewards/unsafe_rewards": -0.5072416663169861, "step": 830 }, { "epoch": 0.45, "learning_rate": 3.390510155998023e-07, "logits/chosen": 0.24915654957294464, "logits/rejected": 0.6536698341369629, "logps/chosen": -277.9824523925781, "logps/rejected": -249.2000732421875, "loss": 5721.2586, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.648623526096344, "rewards/margins": 0.12514245510101318, "rewards/rejected": -0.7737659811973572, "rewards/safe_rewards": -0.7092838287353516, "rewards/unsafe_rewards": -0.6900613903999329, "step": 840 }, { "epoch": 0.45, "learning_rate": 3.347062781055526e-07, "logits/chosen": 0.5860965847969055, "logits/rejected": 0.9803635478019714, "logps/chosen": -245.1415252685547, "logps/rejected": -272.01080322265625, "loss": 5834.2676, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6521397829055786, "rewards/margins": 0.21285566687583923, "rewards/rejected": -0.8649954795837402, "rewards/safe_rewards": -0.6472452878952026, "rewards/unsafe_rewards": -0.6902757883071899, "step": 850 }, { "epoch": 0.46, "learning_rate": 3.303324425866559e-07, "logits/chosen": 0.6316410303115845, "logits/rejected": 0.902866005897522, "logps/chosen": -291.68597412109375, "logps/rejected": -266.18585205078125, "loss": 5964.1836, "rewards/accuracies": 0.65625, "rewards/chosen": -0.6263974905014038, "rewards/margins": 0.17340168356895447, "rewards/rejected": -0.7997991442680359, "rewards/safe_rewards": -0.6621179580688477, "rewards/unsafe_rewards": -0.6091993451118469, "step": 860 }, { "epoch": 0.46, "learning_rate": 3.2593101152883795e-07, "logits/chosen": 0.6831669211387634, "logits/rejected": 0.9902046918869019, "logps/chosen": -256.2884521484375, "logps/rejected": -279.5752868652344, "loss": 5961.9836, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6823039054870605, "rewards/margins": 0.17010322213172913, "rewards/rejected": -0.8524071574211121, "rewards/safe_rewards": -0.6452068090438843, "rewards/unsafe_rewards": -0.7062270641326904, "step": 870 }, { "epoch": 0.47, "learning_rate": 3.21503496897354e-07, "logits/chosen": 0.48068660497665405, "logits/rejected": 0.952492892742157, "logps/chosen": -289.909423828125, "logps/rejected": -262.1679992675781, "loss": 6021.2465, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7239787578582764, "rewards/margins": 0.12146921455860138, "rewards/rejected": -0.8454478979110718, "rewards/safe_rewards": -0.7816897630691528, "rewards/unsafe_rewards": -0.7392334938049316, "step": 880 }, { "epoch": 0.47, "learning_rate": 3.170514196176037e-07, "logits/chosen": 0.28930729627609253, "logits/rejected": 0.6634337902069092, "logps/chosen": -267.9020080566406, "logps/rejected": -267.813720703125, "loss": 5325.9504, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6826976537704468, "rewards/margins": 0.18379981815814972, "rewards/rejected": -0.8664973974227905, "rewards/safe_rewards": -0.6970924139022827, "rewards/unsafe_rewards": -0.6835001111030579, "step": 890 }, { "epoch": 0.48, "learning_rate": 3.125763090526674e-07, "logits/chosen": 0.21367737650871277, "logits/rejected": 0.6621453166007996, "logps/chosen": -278.2737731933594, "logps/rejected": -269.89404296875, "loss": 5261.0746, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.6765376329421997, "rewards/margins": 0.20078134536743164, "rewards/rejected": -0.8773189783096313, "rewards/safe_rewards": -0.6867783665657043, "rewards/unsafe_rewards": -0.6920818090438843, "step": 900 }, { "epoch": 0.48, "learning_rate": 3.080797024779447e-07, "logits/chosen": 0.19137686491012573, "logits/rejected": 0.7889005541801453, "logps/chosen": -253.41421508789062, "logps/rejected": -236.6729278564453, "loss": 5719.0418, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6732780933380127, "rewards/margins": 0.19284026324748993, "rewards/rejected": -0.866118311882019, "rewards/safe_rewards": -0.7765754461288452, "rewards/unsafe_rewards": -0.682191014289856, "step": 910 }, { "epoch": 0.49, "learning_rate": 3.035631445530743e-07, "logits/chosen": 0.4879905581474304, "logits/rejected": 0.9158290028572083, "logps/chosen": -290.2519226074219, "logps/rejected": -284.17071533203125, "loss": 5561.2797, "rewards/accuracies": 0.71875, "rewards/chosen": -0.7149994969367981, "rewards/margins": 0.19377604126930237, "rewards/rejected": -0.9087755084037781, "rewards/safe_rewards": -0.6696754693984985, "rewards/unsafe_rewards": -0.6708149313926697, "step": 920 }, { "epoch": 0.49, "learning_rate": 2.9902818679131775e-07, "logits/chosen": 0.3951093852519989, "logits/rejected": 0.8302197456359863, "logps/chosen": -271.294189453125, "logps/rejected": -253.5810546875, "loss": 5419.4855, "rewards/accuracies": 0.65625, "rewards/chosen": -0.7780183553695679, "rewards/margins": 0.17024961113929749, "rewards/rejected": -0.9482680559158325, "rewards/safe_rewards": -0.7877544164657593, "rewards/unsafe_rewards": -0.7789348363876343, "step": 930 }, { "epoch": 0.5, "learning_rate": 2.944763870265886e-07, "logits/chosen": -0.13839875161647797, "logits/rejected": 0.3581174314022064, "logps/chosen": -272.4313659667969, "logps/rejected": -267.915771484375, "loss": 5453.8977, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6422435641288757, "rewards/margins": 0.19745132327079773, "rewards/rejected": -0.8396948575973511, "rewards/safe_rewards": -0.6758723258972168, "rewards/unsafe_rewards": -0.578320324420929, "step": 940 }, { "epoch": 0.5, "learning_rate": 2.899093088783105e-07, "logits/chosen": -0.06241287663578987, "logits/rejected": 0.4015175700187683, "logps/chosen": -294.8834533691406, "logps/rejected": -279.0429382324219, "loss": 5278.1754, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.6345726847648621, "rewards/margins": 0.14065605401992798, "rewards/rejected": -0.7752287983894348, "rewards/safe_rewards": -0.6587311029434204, "rewards/unsafe_rewards": -0.6476761102676392, "step": 950 }, { "epoch": 0.51, "learning_rate": 2.8532852121428733e-07, "logits/chosen": -0.04936225712299347, "logits/rejected": 0.38959282636642456, "logps/chosen": -248.14639282226562, "logps/rejected": -235.8994598388672, "loss": 5653.668, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.5577735304832458, "rewards/margins": 0.21775202453136444, "rewards/rejected": -0.7755255699157715, "rewards/safe_rewards": -0.55736243724823, "rewards/unsafe_rewards": -0.5908164978027344, "step": 960 }, { "epoch": 0.51, "learning_rate": 2.807355976117716e-07, "logits/chosen": 0.11599000543355942, "logits/rejected": 0.49212461709976196, "logps/chosen": -284.78472900390625, "logps/rejected": -265.7978515625, "loss": 5924.3578, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5290887355804443, "rewards/margins": 0.22062186896800995, "rewards/rejected": -0.7497105598449707, "rewards/safe_rewards": -0.4509585499763489, "rewards/unsafe_rewards": -0.5535848736763, "step": 970 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-07, "logits/chosen": -0.0665382593870163, "logits/rejected": 0.4467547535896301, "logps/chosen": -262.4479064941406, "logps/rejected": -265.8846740722656, "loss": 5391.7484, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.604932427406311, "rewards/margins": 0.16624750196933746, "rewards/rejected": -0.7711800336837769, "rewards/safe_rewards": -0.570032000541687, "rewards/unsafe_rewards": -0.6088122129440308, "step": 980 }, { "epoch": 0.53, "learning_rate": 2.715196572027789e-07, "logits/chosen": 0.15862391889095306, "logits/rejected": 0.511070966720581, "logps/chosen": -252.94137573242188, "logps/rejected": -255.08187866210938, "loss": 5628.2164, "rewards/accuracies": 0.65625, "rewards/chosen": -0.6333836913108826, "rewards/margins": 0.20889365673065186, "rewards/rejected": -0.8422773480415344, "rewards/safe_rewards": -0.6369217038154602, "rewards/unsafe_rewards": -0.6703649163246155, "step": 990 }, { "epoch": 0.53, "learning_rate": 2.6689980622612204e-07, "logits/chosen": 0.08565627038478851, "logits/rejected": 0.5222666263580322, "logps/chosen": -255.2662811279297, "logps/rejected": -253.49105834960938, "loss": 5634.6316, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.6020347476005554, "rewards/margins": 0.19342327117919922, "rewards/rejected": -0.7954580187797546, "rewards/safe_rewards": -0.6501786708831787, "rewards/unsafe_rewards": -0.6461445093154907, "step": 1000 }, { "epoch": 0.53, "eval_logits/chosen": 0.41202229261398315, "eval_logits/rejected": 1.1542474031448364, "eval_logps/chosen": -220.34913635253906, "eval_logps/rejected": -189.61671447753906, "eval_loss": 4507.89453125, "eval_rewards/accuracies": 0.6151915788650513, "eval_rewards/chosen": -0.799996018409729, "eval_rewards/margins": 0.07484080642461777, "eval_rewards/rejected": -0.874836802482605, "eval_rewards/safe_rewards": -0.7885684370994568, "eval_rewards/unsafe_rewards": -0.784635066986084, "eval_runtime": 2353.482, "eval_samples_per_second": 14.89, "eval_steps_per_second": 0.466, "step": 1000 }, { "epoch": 0.54, "learning_rate": 2.622741498830969e-07, "logits/chosen": 0.2431926727294922, "logits/rejected": 0.40795207023620605, "logps/chosen": -279.1517333984375, "logps/rejected": -271.7449645996094, "loss": 5872.2367, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6438090801239014, "rewards/margins": 0.17429831624031067, "rewards/rejected": -0.8181073069572449, "rewards/safe_rewards": -0.6910767555236816, "rewards/unsafe_rewards": -0.6460915803909302, "step": 1010 }, { "epoch": 0.54, "learning_rate": 2.5764427716409815e-07, "logits/chosen": -0.09687475860118866, "logits/rejected": 0.4301505982875824, "logps/chosen": -272.0554504394531, "logps/rejected": -255.6719207763672, "loss": 5816.6723, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5806029438972473, "rewards/margins": 0.19818606972694397, "rewards/rejected": -0.7787889838218689, "rewards/safe_rewards": -0.5169692635536194, "rewards/unsafe_rewards": -0.5289751291275024, "step": 1020 }, { "epoch": 0.55, "learning_rate": 2.5301177850791616e-07, "logits/chosen": 0.01663217321038246, "logits/rejected": 0.6527854204177856, "logps/chosen": -290.3711853027344, "logps/rejected": -268.1048278808594, "loss": 5912.7102, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6295832395553589, "rewards/margins": 0.20760869979858398, "rewards/rejected": -0.8371919393539429, "rewards/safe_rewards": -0.642471432685852, "rewards/unsafe_rewards": -0.6146708726882935, "step": 1030 }, { "epoch": 0.55, "learning_rate": 2.4837824525539477e-07, "logits/chosen": 0.17375509440898895, "logits/rejected": 0.7390264272689819, "logps/chosen": -270.261474609375, "logps/rejected": -261.2465515136719, "loss": 5659.6238, "rewards/accuracies": 0.65625, "rewards/chosen": -0.6727645993232727, "rewards/margins": 0.17281220853328705, "rewards/rejected": -0.8455768823623657, "rewards/safe_rewards": -0.6424635052680969, "rewards/unsafe_rewards": -0.6337414979934692, "step": 1040 }, { "epoch": 0.56, "learning_rate": 2.4374526910277886e-07, "logits/chosen": 0.13272862136363983, "logits/rejected": 0.57741779088974, "logps/chosen": -270.9297790527344, "logps/rejected": -267.14471435546875, "loss": 5861.1039, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.6448026895523071, "rewards/margins": 0.2006601095199585, "rewards/rejected": -0.8454626798629761, "rewards/safe_rewards": -0.6065593361854553, "rewards/unsafe_rewards": -0.6479047536849976, "step": 1050 }, { "epoch": 0.56, "learning_rate": 2.391144415549403e-07, "logits/chosen": 0.2520432770252228, "logits/rejected": 0.7386651039123535, "logps/chosen": -256.0111389160156, "logps/rejected": -244.1455535888672, "loss": 5928.0605, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.6962358355522156, "rewards/margins": 0.125870481133461, "rewards/rejected": -0.8221063613891602, "rewards/safe_rewards": -0.6803200244903564, "rewards/unsafe_rewards": -0.6994472742080688, "step": 1060 }, { "epoch": 0.57, "learning_rate": 2.3448735337866919e-07, "logits/chosen": 0.26303520798683167, "logits/rejected": 0.7426208257675171, "logps/chosen": -247.3863983154297, "logps/rejected": -244.02392578125, "loss": 5880.1039, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6635211706161499, "rewards/margins": 0.15260052680969238, "rewards/rejected": -0.8161218762397766, "rewards/safe_rewards": -0.706309974193573, "rewards/unsafe_rewards": -0.6638337969779968, "step": 1070 }, { "epoch": 0.57, "learning_rate": 2.2986559405621886e-07, "logits/chosen": 0.030937856063246727, "logits/rejected": 0.47169026732444763, "logps/chosen": -279.0972595214844, "logps/rejected": -268.9930725097656, "loss": 5616.6, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6163111925125122, "rewards/margins": 0.16996563971042633, "rewards/rejected": -0.7862768173217773, "rewards/safe_rewards": -0.6654713749885559, "rewards/unsafe_rewards": -0.6399198770523071, "step": 1080 }, { "epoch": 0.58, "learning_rate": 2.2525075123929213e-07, "logits/chosen": 0.43386760354042053, "logits/rejected": 0.7538164258003235, "logps/chosen": -267.44134521484375, "logps/rejected": -258.99249267578125, "loss": 5716.7879, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.6649960279464722, "rewards/margins": 0.22522863745689392, "rewards/rejected": -0.890224814414978, "rewards/safe_rewards": -0.6375536322593689, "rewards/unsafe_rewards": -0.6348733901977539, "step": 1090 }, { "epoch": 0.58, "learning_rate": 2.206444102036565e-07, "logits/chosen": 0.6684126257896423, "logits/rejected": 0.9879862666130066, "logps/chosen": -267.1449279785156, "logps/rejected": -270.4283752441406, "loss": 5974.3918, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.74274742603302, "rewards/margins": 0.15645694732666016, "rewards/rejected": -0.899204432964325, "rewards/safe_rewards": -0.7267962694168091, "rewards/unsafe_rewards": -0.6818505525588989, "step": 1100 }, { "epoch": 0.59, "learning_rate": 2.160481533045751e-07, "logits/chosen": 0.4061971604824066, "logits/rejected": 0.9739459753036499, "logps/chosen": -285.2103271484375, "logps/rejected": -266.5544128417969, "loss": 5749.7781, "rewards/accuracies": 0.71875, "rewards/chosen": -0.7457272410392761, "rewards/margins": 0.2004440277814865, "rewards/rejected": -0.9461711645126343, "rewards/safe_rewards": -0.7860220670700073, "rewards/unsafe_rewards": -0.7390663623809814, "step": 1110 }, { "epoch": 0.59, "learning_rate": 2.1146355943324148e-07, "logits/chosen": 0.48321422934532166, "logits/rejected": 0.9058516621589661, "logps/chosen": -271.53924560546875, "logps/rejected": -259.0006103515625, "loss": 5805.548, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.7600331902503967, "rewards/margins": 0.13751891255378723, "rewards/rejected": -0.8975521326065063, "rewards/safe_rewards": -0.7516414523124695, "rewards/unsafe_rewards": -0.7484757304191589, "step": 1120 }, { "epoch": 0.6, "learning_rate": 2.0689220347440374e-07, "logits/chosen": 0.1501261442899704, "logits/rejected": 0.688166618347168, "logps/chosen": -301.4822082519531, "logps/rejected": -273.8033447265625, "loss": 5622.9852, "rewards/accuracies": 0.65625, "rewards/chosen": -0.6868051290512085, "rewards/margins": 0.17512689530849457, "rewards/rejected": -0.8619319796562195, "rewards/safe_rewards": -0.6461024284362793, "rewards/unsafe_rewards": -0.6649470329284668, "step": 1130 }, { "epoch": 0.6, "learning_rate": 2.0233565576536564e-07, "logits/chosen": 0.05991173908114433, "logits/rejected": 0.42331352829933167, "logps/chosen": -294.298095703125, "logps/rejected": -287.5555419921875, "loss": 5822.3992, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.7161829471588135, "rewards/margins": 0.13876894116401672, "rewards/rejected": -0.8549518585205078, "rewards/safe_rewards": -0.7057495713233948, "rewards/unsafe_rewards": -0.6698770523071289, "step": 1140 }, { "epoch": 0.61, "learning_rate": 1.97795481556549e-07, "logits/chosen": -0.03588150069117546, "logits/rejected": 0.400505006313324, "logps/chosen": -277.2012023925781, "logps/rejected": -247.14804077148438, "loss": 5935.0914, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6964778304100037, "rewards/margins": 0.17653243243694305, "rewards/rejected": -0.8730102777481079, "rewards/safe_rewards": -0.6869702339172363, "rewards/unsafe_rewards": -0.6601093411445618, "step": 1150 }, { "epoch": 0.62, "learning_rate": 1.9327324047380422e-07, "logits/chosen": -0.08701475709676743, "logits/rejected": 0.4873865246772766, "logps/chosen": -263.2158203125, "logps/rejected": -258.84039306640625, "loss": 5564.0863, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6252955198287964, "rewards/margins": 0.22415871918201447, "rewards/rejected": -0.8494542241096497, "rewards/safe_rewards": -0.6420432329177856, "rewards/unsafe_rewards": -0.6124902963638306, "step": 1160 }, { "epoch": 0.62, "learning_rate": 1.887704859826528e-07, "logits/chosen": 0.07522957026958466, "logits/rejected": 0.3329767882823944, "logps/chosen": -285.8026123046875, "logps/rejected": -266.8732604980469, "loss": 5750.982, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.6510334014892578, "rewards/margins": 0.10930682718753815, "rewards/rejected": -0.7603402137756348, "rewards/safe_rewards": -0.6223952174186707, "rewards/unsafe_rewards": -0.6682702302932739, "step": 1170 }, { "epoch": 0.63, "learning_rate": 1.8428876485464572e-07, "logits/chosen": -0.15613001585006714, "logits/rejected": 0.41360145807266235, "logps/chosen": -238.16897583007812, "logps/rejected": -225.97802734375, "loss": 5979.2156, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5804222822189331, "rewards/margins": 0.1743427962064743, "rewards/rejected": -0.7547650933265686, "rewards/safe_rewards": -0.5962327718734741, "rewards/unsafe_rewards": -0.6777797341346741, "step": 1180 }, { "epoch": 0.63, "learning_rate": 1.798296166360216e-07, "logits/chosen": -0.029682714492082596, "logits/rejected": 0.5113533139228821, "logps/chosen": -290.142822265625, "logps/rejected": -269.4226989746094, "loss": 6057.1922, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6241404414176941, "rewards/margins": 0.1994599997997284, "rewards/rejected": -0.8236004114151001, "rewards/safe_rewards": -0.6254442930221558, "rewards/unsafe_rewards": -0.6271675229072571, "step": 1190 }, { "epoch": 0.64, "learning_rate": 1.7539457311884675e-07, "logits/chosen": 0.1500866711139679, "logits/rejected": 0.5680428743362427, "logps/chosen": -262.3311462402344, "logps/rejected": -251.67489624023438, "loss": 5421.8398, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.6509288549423218, "rewards/margins": 0.2198909968137741, "rewards/rejected": -0.8708198666572571, "rewards/safe_rewards": -0.6651867032051086, "rewards/unsafe_rewards": -0.6189877390861511, "step": 1200 }, { "epoch": 0.64, "learning_rate": 1.7098515781481883e-07, "logits/chosen": 0.4903317987918854, "logits/rejected": 0.883372962474823, "logps/chosen": -272.56097412109375, "logps/rejected": -241.92919921875, "loss": 5678.3117, "rewards/accuracies": 0.59375, "rewards/chosen": -0.6993108987808228, "rewards/margins": 0.11801446974277496, "rewards/rejected": -0.8173252940177917, "rewards/safe_rewards": -0.6638237237930298, "rewards/unsafe_rewards": -0.6766722202301025, "step": 1210 }, { "epoch": 0.65, "learning_rate": 1.6660288543191568e-07, "logits/chosen": 0.20008230209350586, "logits/rejected": 1.072401523590088, "logps/chosen": -292.7231140136719, "logps/rejected": -264.1849365234375, "loss": 5411.0453, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6634177565574646, "rewards/margins": 0.19502988457679749, "rewards/rejected": -0.8584476709365845, "rewards/safe_rewards": -0.7102524638175964, "rewards/unsafe_rewards": -0.6833497285842896, "step": 1220 }, { "epoch": 0.65, "learning_rate": 1.6224926135406693e-07, "logits/chosen": 0.4110666811466217, "logits/rejected": 0.9241645932197571, "logps/chosen": -291.5517272949219, "logps/rejected": -268.79437255859375, "loss": 5535.6395, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6780111193656921, "rewards/margins": 0.2115507870912552, "rewards/rejected": -0.8895619511604309, "rewards/safe_rewards": -0.6748231053352356, "rewards/unsafe_rewards": -0.7003692984580994, "step": 1230 }, { "epoch": 0.66, "learning_rate": 1.579257811240298e-07, "logits/chosen": 0.17879924178123474, "logits/rejected": 0.82609623670578, "logps/chosen": -283.47686767578125, "logps/rejected": -269.6540832519531, "loss": 5427.3156, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.7036404609680176, "rewards/margins": 0.14344856142997742, "rewards/rejected": -0.8470889925956726, "rewards/safe_rewards": -0.6846009492874146, "rewards/unsafe_rewards": -0.6783186197280884, "step": 1240 }, { "epoch": 0.66, "learning_rate": 1.5363392992964523e-07, "logits/chosen": 0.4139084815979004, "logits/rejected": 0.7215920686721802, "logps/chosen": -257.33319091796875, "logps/rejected": -258.1666564941406, "loss": 5595.8969, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.7196224927902222, "rewards/margins": 0.11075691878795624, "rewards/rejected": -0.8303793668746948, "rewards/safe_rewards": -0.7594167590141296, "rewards/unsafe_rewards": -0.7032173275947571, "step": 1250 }, { "epoch": 0.67, "learning_rate": 1.4937518209365108e-07, "logits/chosen": 0.2804068922996521, "logits/rejected": 0.7492934465408325, "logps/chosen": -299.9917297363281, "logps/rejected": -274.86566162109375, "loss": 5485.5156, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6413429975509644, "rewards/margins": 0.18771231174468994, "rewards/rejected": -0.8290553092956543, "rewards/safe_rewards": -0.6320935487747192, "rewards/unsafe_rewards": -0.6288415789604187, "step": 1260 }, { "epoch": 0.67, "learning_rate": 1.4515100056722708e-07, "logits/chosen": 0.49235549569129944, "logits/rejected": 0.896806538105011, "logps/chosen": -250.7898712158203, "logps/rejected": -248.735107421875, "loss": 5635.8461, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6494947671890259, "rewards/margins": 0.2068520337343216, "rewards/rejected": -0.8563467860221863, "rewards/safe_rewards": -0.6947168707847595, "rewards/unsafe_rewards": -0.6628744602203369, "step": 1270 }, { "epoch": 0.68, "learning_rate": 1.4096283642744716e-07, "logits/chosen": 0.564648449420929, "logits/rejected": 1.1666864156723022, "logps/chosen": -287.2496337890625, "logps/rejected": -269.12689208984375, "loss": 5744.0652, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6512799263000488, "rewards/margins": 0.23767797648906708, "rewards/rejected": -0.8889577984809875, "rewards/safe_rewards": -0.6507743000984192, "rewards/unsafe_rewards": -0.6260145306587219, "step": 1280 }, { "epoch": 0.68, "learning_rate": 1.3681212837880977e-07, "logits/chosen": 0.3310979902744293, "logits/rejected": 0.946731686592102, "logps/chosen": -283.14178466796875, "logps/rejected": -268.6293029785156, "loss": 5538.1773, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6541503667831421, "rewards/margins": 0.20235121250152588, "rewards/rejected": -0.856501579284668, "rewards/safe_rewards": -0.7126244902610779, "rewards/unsafe_rewards": -0.6116858124732971, "step": 1290 }, { "epoch": 0.69, "learning_rate": 1.3270030225901908e-07, "logits/chosen": 0.21446232497692108, "logits/rejected": 0.9988247156143188, "logps/chosen": -311.952392578125, "logps/rejected": -264.99005126953125, "loss": 5863.9875, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6609299778938293, "rewards/margins": 0.20790867507457733, "rewards/rejected": -0.8688386678695679, "rewards/safe_rewards": -0.6820018291473389, "rewards/unsafe_rewards": -0.6768487691879272, "step": 1300 }, { "epoch": 0.7, "learning_rate": 1.2862877054918572e-07, "logits/chosen": 0.43877673149108887, "logits/rejected": 0.7122836112976074, "logps/chosen": -263.78924560546875, "logps/rejected": -267.306884765625, "loss": 5915.4555, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6279779672622681, "rewards/margins": 0.19203224778175354, "rewards/rejected": -0.8200103044509888, "rewards/safe_rewards": -0.5540001392364502, "rewards/unsafe_rewards": -0.6103017926216125, "step": 1310 }, { "epoch": 0.7, "learning_rate": 1.2459893188861613e-07, "logits/chosen": 0.11050845682621002, "logits/rejected": 0.638201117515564, "logps/chosen": -230.92892456054688, "logps/rejected": -223.246826171875, "loss": 5522.6379, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.5677499771118164, "rewards/margins": 0.1929033249616623, "rewards/rejected": -0.7606532573699951, "rewards/safe_rewards": -0.6029695272445679, "rewards/unsafe_rewards": -0.6227617859840393, "step": 1320 }, { "epoch": 0.71, "learning_rate": 1.206121705943558e-07, "logits/chosen": 0.2380530834197998, "logits/rejected": 0.772462785243988, "logps/chosen": -265.9678039550781, "logps/rejected": -236.330078125, "loss": 5444.8687, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5695582628250122, "rewards/margins": 0.17861400544643402, "rewards/rejected": -0.7481723427772522, "rewards/safe_rewards": -0.4967488646507263, "rewards/unsafe_rewards": -0.5609390139579773, "step": 1330 }, { "epoch": 0.71, "learning_rate": 1.1666985618565422e-07, "logits/chosen": 0.7791303396224976, "logits/rejected": 1.0070080757141113, "logps/chosen": -239.6016082763672, "logps/rejected": -250.1675567626953, "loss": 5496.5402, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.643204391002655, "rewards/margins": 0.212922140955925, "rewards/rejected": -0.856126606464386, "rewards/safe_rewards": -0.6307708024978638, "rewards/unsafe_rewards": -0.6205247044563293, "step": 1340 }, { "epoch": 0.72, "learning_rate": 1.1277334291351145e-07, "logits/chosen": 0.6811083555221558, "logits/rejected": 1.2308669090270996, "logps/chosen": -240.9481964111328, "logps/rejected": -251.2366485595703, "loss": 5451.2172, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6521676778793335, "rewards/margins": 0.1860547959804535, "rewards/rejected": -0.8382223844528198, "rewards/safe_rewards": -0.7259255647659302, "rewards/unsafe_rewards": -0.6219838857650757, "step": 1350 }, { "epoch": 0.72, "learning_rate": 1.089239692954701e-07, "logits/chosen": 0.36615195870399475, "logits/rejected": 0.9472381472587585, "logps/chosen": -269.5465087890625, "logps/rejected": -256.1499328613281, "loss": 5717.6105, "rewards/accuracies": 0.625, "rewards/chosen": -0.6657227873802185, "rewards/margins": 0.15908706188201904, "rewards/rejected": -0.8248098492622375, "rewards/safe_rewards": -0.7341758012771606, "rewards/unsafe_rewards": -0.6227680444717407, "step": 1360 }, { "epoch": 0.73, "learning_rate": 1.051230576558127e-07, "logits/chosen": 0.7043350338935852, "logits/rejected": 1.012446641921997, "logps/chosen": -265.9175720214844, "logps/rejected": -296.2731628417969, "loss": 5307.2445, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.7264591455459595, "rewards/margins": 0.1706809252500534, "rewards/rejected": -0.8971401453018188, "rewards/safe_rewards": -0.7796869277954102, "rewards/unsafe_rewards": -0.7442405819892883, "step": 1370 }, { "epoch": 0.73, "learning_rate": 1.0137191367132078e-07, "logits/chosen": 0.5799378156661987, "logits/rejected": 1.0962615013122559, "logps/chosen": -280.27587890625, "logps/rejected": -261.3016052246094, "loss": 5462.4613, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.659958004951477, "rewards/margins": 0.24963033199310303, "rewards/rejected": -0.9095882177352905, "rewards/safe_rewards": -0.6955925226211548, "rewards/unsafe_rewards": -0.6324699521064758, "step": 1380 }, { "epoch": 0.74, "learning_rate": 9.76718259227532e-08, "logits/chosen": 0.498538076877594, "logits/rejected": 0.9989287257194519, "logps/chosen": -272.96820068359375, "logps/rejected": -256.63140869140625, "loss": 5331.4734, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6205289363861084, "rewards/margins": 0.21373698115348816, "rewards/rejected": -0.8342660069465637, "rewards/safe_rewards": -0.5949203372001648, "rewards/unsafe_rewards": -0.6141771674156189, "step": 1390 }, { "epoch": 0.74, "learning_rate": 9.402406545219676e-08, "logits/chosen": 0.34590667486190796, "logits/rejected": 0.8703553080558777, "logps/chosen": -273.8531188964844, "logps/rejected": -247.87466430664062, "loss": 5546.1305, "rewards/accuracies": 0.625, "rewards/chosen": -0.6622526049613953, "rewards/margins": 0.1561700403690338, "rewards/rejected": -0.8184226751327515, "rewards/safe_rewards": -0.6668413281440735, "rewards/unsafe_rewards": -0.6589676141738892, "step": 1400 }, { "epoch": 0.75, "learning_rate": 9.042988532644249e-08, "logits/chosen": 0.2142190933227539, "logits/rejected": 0.5996747016906738, "logps/chosen": -308.82635498046875, "logps/rejected": -276.37823486328125, "loss": 5583.4395, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5863175392150879, "rewards/margins": 0.23458845913410187, "rewards/rejected": -0.8209059834480286, "rewards/safe_rewards": -0.5638710260391235, "rewards/unsafe_rewards": -0.5323917269706726, "step": 1410 }, { "epoch": 0.75, "learning_rate": 8.689052020653592e-08, "logits/chosen": -0.06605692207813263, "logits/rejected": 0.6343873739242554, "logps/chosen": -285.37225341796875, "logps/rejected": -252.3105010986328, "loss": 5576.0598, "rewards/accuracies": 0.71875, "rewards/chosen": -0.5753235816955566, "rewards/margins": 0.2064014971256256, "rewards/rejected": -0.7817251086235046, "rewards/safe_rewards": -0.5231102705001831, "rewards/unsafe_rewards": -0.5478030443191528, "step": 1420 }, { "epoch": 0.76, "learning_rate": 8.340718592365037e-08, "logits/chosen": 0.4551053047180176, "logits/rejected": 0.6916473507881165, "logps/chosen": -259.25543212890625, "logps/rejected": -269.81097412109375, "loss": 5258.8734, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.6683470010757446, "rewards/margins": 0.16762246191501617, "rewards/rejected": -0.8359693288803101, "rewards/safe_rewards": -0.6167613863945007, "rewards/unsafe_rewards": -0.6983481645584106, "step": 1430 }, { "epoch": 0.76, "learning_rate": 7.998107906142839e-08, "logits/chosen": 0.4198254942893982, "logits/rejected": 0.9249162673950195, "logps/chosen": -256.2335205078125, "logps/rejected": -243.9502716064453, "loss": 5150.4359, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6530503034591675, "rewards/margins": 0.22125795483589172, "rewards/rejected": -0.8743082880973816, "rewards/safe_rewards": -0.6435777544975281, "rewards/unsafe_rewards": -0.6962872743606567, "step": 1440 }, { "epoch": 0.77, "learning_rate": 7.661337654493575e-08, "logits/chosen": 0.11405469477176666, "logits/rejected": 0.8541787266731262, "logps/chosen": -285.04632568359375, "logps/rejected": -264.7653503417969, "loss": 5838.1379, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6224103569984436, "rewards/margins": 0.20319974422454834, "rewards/rejected": -0.8256100416183472, "rewards/safe_rewards": -0.6171637773513794, "rewards/unsafe_rewards": -0.5961381793022156, "step": 1450 }, { "epoch": 0.77, "learning_rate": 7.330523523636751e-08, "logits/chosen": 0.33853933215141296, "logits/rejected": 0.5890348553657532, "logps/chosen": -267.7184753417969, "logps/rejected": -279.6230163574219, "loss": 5326.7477, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6186683177947998, "rewards/margins": 0.19817940890789032, "rewards/rejected": -0.8168476819992065, "rewards/safe_rewards": -0.6040722727775574, "rewards/unsafe_rewards": -0.6181649565696716, "step": 1460 }, { "epoch": 0.78, "learning_rate": 7.005779153764682e-08, "logits/chosen": 0.4181288182735443, "logits/rejected": 0.7393978238105774, "logps/chosen": -249.9525909423828, "logps/rejected": -242.4307861328125, "loss": 5633.5648, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6368721723556519, "rewards/margins": 0.15112480521202087, "rewards/rejected": -0.7879970073699951, "rewards/safe_rewards": -0.6358110308647156, "rewards/unsafe_rewards": -0.6208546161651611, "step": 1470 }, { "epoch": 0.79, "learning_rate": 6.687216100005138e-08, "logits/chosen": 0.6848994493484497, "logits/rejected": 1.1733933687210083, "logps/chosen": -284.51080322265625, "logps/rejected": -288.7901916503906, "loss": 5048.4258, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6762335300445557, "rewards/margins": 0.1719200611114502, "rewards/rejected": -0.8481537103652954, "rewards/safe_rewards": -0.6376355290412903, "rewards/unsafe_rewards": -0.7184177041053772, "step": 1480 }, { "epoch": 0.79, "learning_rate": 6.374943794100349e-08, "logits/chosen": 0.48638778924942017, "logits/rejected": 1.259670615196228, "logps/chosen": -267.34588623046875, "logps/rejected": -245.59756469726562, "loss": 5545.4941, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.6003537178039551, "rewards/margins": 0.22699756920337677, "rewards/rejected": -0.8273512721061707, "rewards/safe_rewards": -0.6312727332115173, "rewards/unsafe_rewards": -0.6281502842903137, "step": 1490 }, { "epoch": 0.8, "learning_rate": 6.069069506815325e-08, "logits/chosen": 0.7533052563667297, "logits/rejected": 1.2028855085372925, "logps/chosen": -251.12496948242188, "logps/rejected": -253.78408813476562, "loss": 5749.5141, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6362664103507996, "rewards/margins": 0.2198611944913864, "rewards/rejected": -0.8561276197433472, "rewards/safe_rewards": -0.622052013874054, "rewards/unsafe_rewards": -0.704675555229187, "step": 1500 }, { "epoch": 0.8, "eval_logits/chosen": 1.0718276500701904, "eval_logits/rejected": 1.9546749591827393, "eval_logps/chosen": -228.9304656982422, "eval_logps/rejected": -199.36412048339844, "eval_loss": 4458.44287109375, "eval_rewards/accuracies": 0.6194114685058594, "eval_rewards/chosen": -0.8858092427253723, "eval_rewards/margins": 0.0865015909075737, "eval_rewards/rejected": -0.9723107814788818, "eval_rewards/safe_rewards": -0.874053955078125, "eval_rewards/unsafe_rewards": -0.8699882626533508, "eval_runtime": 2349.2554, "eval_samples_per_second": 14.917, "eval_steps_per_second": 0.467, "step": 1500 }, { "epoch": 0.8, "learning_rate": 5.7696983110885746e-08, "logits/chosen": 1.0346394777297974, "logits/rejected": 1.4075425863265991, "logps/chosen": -264.0049133300781, "logps/rejected": -256.81793212890625, "loss": 5875.7254, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.7450360059738159, "rewards/margins": 0.13777832686901093, "rewards/rejected": -0.8828142881393433, "rewards/safe_rewards": -0.6767371892929077, "rewards/unsafe_rewards": -0.7506189942359924, "step": 1510 }, { "epoch": 0.81, "learning_rate": 5.47693304593777e-08, "logits/chosen": 0.577034056186676, "logits/rejected": 1.2275969982147217, "logps/chosen": -280.673583984375, "logps/rejected": -243.10635375976562, "loss": 5531.6125, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6422880291938782, "rewards/margins": 0.22371160984039307, "rewards/rejected": -0.8659995794296265, "rewards/safe_rewards": -0.5432512164115906, "rewards/unsafe_rewards": -0.6611617803573608, "step": 1520 }, { "epoch": 0.81, "learning_rate": 5.190874281132851e-08, "logits/chosen": 0.6209213733673096, "logits/rejected": 0.9749325513839722, "logps/chosen": -258.8196716308594, "logps/rejected": -247.3189697265625, "loss": 5541.2727, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.6575254201889038, "rewards/margins": 0.12947872281074524, "rewards/rejected": -0.7870042324066162, "rewards/safe_rewards": -0.7655413746833801, "rewards/unsafe_rewards": -0.7101870775222778, "step": 1530 }, { "epoch": 0.82, "learning_rate": 4.9116202826486045e-08, "logits/chosen": 0.7310935258865356, "logits/rejected": 1.0775771141052246, "logps/chosen": -272.3906555175781, "logps/rejected": -257.2728271484375, "loss": 5545.8492, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.6876263618469238, "rewards/margins": 0.16089771687984467, "rewards/rejected": -0.8485240936279297, "rewards/safe_rewards": -0.6295339465141296, "rewards/unsafe_rewards": -0.7383956909179688, "step": 1540 }, { "epoch": 0.82, "learning_rate": 4.639266978908676e-08, "logits/chosen": 0.6267167329788208, "logits/rejected": 1.1266528367996216, "logps/chosen": -297.58380126953125, "logps/rejected": -271.4803161621094, "loss": 5131.627, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.6685757637023926, "rewards/margins": 0.18729698657989502, "rewards/rejected": -0.8558727502822876, "rewards/safe_rewards": -0.6740354299545288, "rewards/unsafe_rewards": -0.6281224489212036, "step": 1550 }, { "epoch": 0.83, "learning_rate": 4.373907927832513e-08, "logits/chosen": 0.6049357056617737, "logits/rejected": 0.9919975996017456, "logps/chosen": -265.62481689453125, "logps/rejected": -285.9028625488281, "loss": 5640.1398, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6182764172554016, "rewards/margins": 0.22418944537639618, "rewards/rejected": -0.842465877532959, "rewards/safe_rewards": -0.6555901765823364, "rewards/unsafe_rewards": -0.5656682848930359, "step": 1560 }, { "epoch": 0.83, "learning_rate": 4.115634284696698e-08, "logits/chosen": 0.49705711007118225, "logits/rejected": 0.9479654431343079, "logps/chosen": -261.2461853027344, "logps/rejected": -270.83331298828125, "loss": 5189.8301, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6632257699966431, "rewards/margins": 0.21208517253398895, "rewards/rejected": -0.8753108978271484, "rewards/safe_rewards": -0.6663291454315186, "rewards/unsafe_rewards": -0.6038998365402222, "step": 1570 }, { "epoch": 0.84, "learning_rate": 3.864534770821559e-08, "logits/chosen": 0.6149829626083374, "logits/rejected": 1.1939442157745361, "logps/chosen": -262.00933837890625, "logps/rejected": -240.24581909179688, "loss": 5618.5883, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6275893449783325, "rewards/margins": 0.20411472022533417, "rewards/rejected": -0.8317041397094727, "rewards/safe_rewards": -0.6472023725509644, "rewards/unsafe_rewards": -0.5557063817977905, "step": 1580 }, { "epoch": 0.84, "learning_rate": 3.620695643093924e-08, "logits/chosen": 0.43840399384498596, "logits/rejected": 1.105423092842102, "logps/chosen": -269.2837829589844, "logps/rejected": -238.085205078125, "loss": 5468.3313, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6394304037094116, "rewards/margins": 0.22106070816516876, "rewards/rejected": -0.860491156578064, "rewards/safe_rewards": -0.6031507849693298, "rewards/unsafe_rewards": -0.6791771650314331, "step": 1590 }, { "epoch": 0.85, "learning_rate": 3.384200664336412e-08, "logits/chosen": 0.5348480343818665, "logits/rejected": 1.0058144330978394, "logps/chosen": -268.3987731933594, "logps/rejected": -247.79696655273438, "loss": 5660.3645, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5938838720321655, "rewards/margins": 0.21732494235038757, "rewards/rejected": -0.8112088441848755, "rewards/safe_rewards": -0.5639302134513855, "rewards/unsafe_rewards": -0.6350196599960327, "step": 1600 }, { "epoch": 0.85, "learning_rate": 3.155131074533529e-08, "logits/chosen": 0.30334433913230896, "logits/rejected": 0.9854658246040344, "logps/chosen": -283.627685546875, "logps/rejected": -263.83251953125, "loss": 6043.9172, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.6394412517547607, "rewards/margins": 0.1600230187177658, "rewards/rejected": -0.7994643449783325, "rewards/safe_rewards": -0.6199285387992859, "rewards/unsafe_rewards": -0.6412296295166016, "step": 1610 }, { "epoch": 0.86, "learning_rate": 2.9335655629243645e-08, "logits/chosen": 0.39362573623657227, "logits/rejected": 0.9285033941268921, "logps/chosen": -270.2079162597656, "logps/rejected": -261.9796447753906, "loss": 5957.5516, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.6112038493156433, "rewards/margins": 0.18837173283100128, "rewards/rejected": -0.7995756268501282, "rewards/safe_rewards": -0.6032061576843262, "rewards/unsafe_rewards": -0.6732661724090576, "step": 1620 }, { "epoch": 0.86, "learning_rate": 2.7195802409715197e-08, "logits/chosen": 0.2444291114807129, "logits/rejected": 0.9499914050102234, "logps/chosen": -298.4200134277344, "logps/rejected": -249.72866821289062, "loss": 5750.8313, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.6592567563056946, "rewards/margins": 0.1407555341720581, "rewards/rejected": -0.8000122904777527, "rewards/safe_rewards": -0.7100226283073425, "rewards/unsafe_rewards": -0.7015893459320068, "step": 1630 }, { "epoch": 0.87, "learning_rate": 2.513248616215527e-08, "logits/chosen": 0.3666357100009918, "logits/rejected": 0.9415947198867798, "logps/chosen": -277.87518310546875, "logps/rejected": -276.29119873046875, "loss": 5205.8715, "rewards/accuracies": 0.75, "rewards/chosen": -0.6106274724006653, "rewards/margins": 0.24805088341236115, "rewards/rejected": -0.8586783409118652, "rewards/safe_rewards": -0.6150985956192017, "rewards/unsafe_rewards": -0.594727635383606, "step": 1640 }, { "epoch": 0.88, "learning_rate": 2.31464156702382e-08, "logits/chosen": 0.24014464020729065, "logits/rejected": 0.9577549695968628, "logps/chosen": -292.7112121582031, "logps/rejected": -265.7065734863281, "loss": 5896.8078, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5955285429954529, "rewards/margins": 0.2333928644657135, "rewards/rejected": -0.8289214372634888, "rewards/safe_rewards": -0.6319350600242615, "rewards/unsafe_rewards": -0.5868616104125977, "step": 1650 }, { "epoch": 0.88, "learning_rate": 2.1238273182427933e-08, "logits/chosen": 0.6973511576652527, "logits/rejected": 1.2915074825286865, "logps/chosen": -265.3111572265625, "logps/rejected": -251.41201782226562, "loss": 5434.0336, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6617192029953003, "rewards/margins": 0.19598451256752014, "rewards/rejected": -0.857703685760498, "rewards/safe_rewards": -0.6422809362411499, "rewards/unsafe_rewards": -0.6228102445602417, "step": 1660 }, { "epoch": 0.89, "learning_rate": 1.9408714177614306e-08, "logits/chosen": 0.5173779726028442, "logits/rejected": 1.02643883228302, "logps/chosen": -268.9621887207031, "logps/rejected": -251.25808715820312, "loss": 5243.4758, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.6187028288841248, "rewards/margins": 0.22567462921142578, "rewards/rejected": -0.8443773984909058, "rewards/safe_rewards": -0.6375213265419006, "rewards/unsafe_rewards": -0.6421637535095215, "step": 1670 }, { "epoch": 0.89, "learning_rate": 1.7658367139945228e-08, "logits/chosen": 0.6539649963378906, "logits/rejected": 1.0953106880187988, "logps/chosen": -288.9885559082031, "logps/rejected": -259.146728515625, "loss": 5246.4344, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6686577200889587, "rewards/margins": 0.19176754355430603, "rewards/rejected": -0.8604252934455872, "rewards/safe_rewards": -0.7045280933380127, "rewards/unsafe_rewards": -0.7155130505561829, "step": 1680 }, { "epoch": 0.9, "learning_rate": 1.5987833342931745e-08, "logits/chosen": 0.4664410650730133, "logits/rejected": 1.215132236480713, "logps/chosen": -284.1900939941406, "logps/rejected": -251.48379516601562, "loss": 5564.9324, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.6805782318115234, "rewards/margins": 0.21095602214336395, "rewards/rejected": -0.8915343284606934, "rewards/safe_rewards": -0.67192143201828, "rewards/unsafe_rewards": -0.6578537821769714, "step": 1690 }, { "epoch": 0.9, "learning_rate": 1.439768664290053e-08, "logits/chosen": 0.48882967233657837, "logits/rejected": 1.0205453634262085, "logps/chosen": -288.0510559082031, "logps/rejected": -263.57122802734375, "loss": 5705.5039, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6453284025192261, "rewards/margins": 0.18227383494377136, "rewards/rejected": -0.827602207660675, "rewards/safe_rewards": -0.6023403406143188, "rewards/unsafe_rewards": -0.6489912867546082, "step": 1700 }, { "epoch": 0.91, "learning_rate": 1.2888473281864597e-08, "logits/chosen": 0.3580858111381531, "logits/rejected": 0.9355760812759399, "logps/chosen": -252.00344848632812, "logps/rejected": -256.7703552246094, "loss": 5420.7055, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.6472461819648743, "rewards/margins": 0.19622859358787537, "rewards/rejected": -0.8434747457504272, "rewards/safe_rewards": -0.6663787364959717, "rewards/unsafe_rewards": -0.6997274160385132, "step": 1710 }, { "epoch": 0.91, "learning_rate": 1.1460711699880082e-08, "logits/chosen": 0.32274478673934937, "logits/rejected": 0.9183855056762695, "logps/chosen": -281.06304931640625, "logps/rejected": -268.91278076171875, "loss": 5609.357, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.5867010951042175, "rewards/margins": 0.23433193564414978, "rewards/rejected": -0.8210331201553345, "rewards/safe_rewards": -0.5630391240119934, "rewards/unsafe_rewards": -0.6277604103088379, "step": 1720 }, { "epoch": 0.92, "learning_rate": 1.0114892356953397e-08, "logits/chosen": 0.381804883480072, "logits/rejected": 0.9557956457138062, "logps/chosen": -278.6263427734375, "logps/rejected": -252.7932891845703, "loss": 5676.834, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6421754360198975, "rewards/margins": 0.1775234043598175, "rewards/rejected": -0.8196988105773926, "rewards/safe_rewards": -0.6115553379058838, "rewards/unsafe_rewards": -0.6476501226425171, "step": 1730 }, { "epoch": 0.92, "learning_rate": 8.851477564560061e-09, "logits/chosen": 0.5100737810134888, "logits/rejected": 0.932380199432373, "logps/chosen": -263.25146484375, "logps/rejected": -271.11676025390625, "loss": 5593.4414, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6300482749938965, "rewards/margins": 0.25807589292526245, "rewards/rejected": -0.8881241679191589, "rewards/safe_rewards": -0.6826761960983276, "rewards/unsafe_rewards": -0.6732330322265625, "step": 1740 }, { "epoch": 0.93, "learning_rate": 7.670901326832763e-09, "logits/chosen": 0.6556006669998169, "logits/rejected": 1.0529851913452148, "logps/chosen": -272.6200866699219, "logps/rejected": -291.10101318359375, "loss": 5333.684, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.7119321823120117, "rewards/margins": 0.18222954869270325, "rewards/rejected": -0.8941618204116821, "rewards/safe_rewards": -0.7450841069221497, "rewards/unsafe_rewards": -0.6783844232559204, "step": 1750 }, { "epoch": 0.93, "learning_rate": 6.5735691914738936e-09, "logits/chosen": 0.3428182005882263, "logits/rejected": 0.6993114948272705, "logps/chosen": -276.2501220703125, "logps/rejected": -270.787841796875, "loss": 6014.7414, "rewards/accuracies": 0.625, "rewards/chosen": -0.6672028303146362, "rewards/margins": 0.16263318061828613, "rewards/rejected": -0.8298360109329224, "rewards/safe_rewards": -0.6557270288467407, "rewards/unsafe_rewards": -0.7067701816558838, "step": 1760 }, { "epoch": 0.94, "learning_rate": 5.559858110443016e-09, "logits/chosen": 0.3265165388584137, "logits/rejected": 0.9415761828422546, "logps/chosen": -279.380615234375, "logps/rejected": -258.53887939453125, "loss": 5329.075, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6516368985176086, "rewards/margins": 0.22732026875019073, "rewards/rejected": -0.8789570927619934, "rewards/safe_rewards": -0.6853364706039429, "rewards/unsafe_rewards": -0.6284711360931396, "step": 1770 }, { "epoch": 0.94, "learning_rate": 4.6301163104676685e-09, "logits/chosen": 0.5433076620101929, "logits/rejected": 0.899452805519104, "logps/chosen": -262.05511474609375, "logps/rejected": -280.93658447265625, "loss": 5452.5277, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6632400751113892, "rewards/margins": 0.19723954796791077, "rewards/rejected": -0.8604797124862671, "rewards/safe_rewards": -0.5747020244598389, "rewards/unsafe_rewards": -0.6066412329673767, "step": 1780 }, { "epoch": 0.95, "learning_rate": 3.784663173421438e-09, "logits/chosen": 0.47608470916748047, "logits/rejected": 0.8737590909004211, "logps/chosen": -294.0523376464844, "logps/rejected": -280.8829650878906, "loss": 5532.6391, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.6354952454566956, "rewards/margins": 0.18091240525245667, "rewards/rejected": -0.8164075613021851, "rewards/safe_rewards": -0.6999973654747009, "rewards/unsafe_rewards": -0.6226142644882202, "step": 1790 }, { "epoch": 0.96, "learning_rate": 3.023789126611137e-09, "logits/chosen": 0.6358956694602966, "logits/rejected": 1.2913506031036377, "logps/chosen": -276.2715148925781, "logps/rejected": -243.6599884033203, "loss": 5192.1734, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6617811918258667, "rewards/margins": 0.21255967020988464, "rewards/rejected": -0.874340832233429, "rewards/safe_rewards": -0.665223240852356, "rewards/unsafe_rewards": -0.67181396484375, "step": 1800 }, { "epoch": 0.96, "learning_rate": 2.3477555430100604e-09, "logits/chosen": 0.5863360166549683, "logits/rejected": 1.0950720310211182, "logps/chosen": -270.6855773925781, "logps/rejected": -254.65771484375, "loss": 5546.9984, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.5831121206283569, "rewards/margins": 0.2669592499732971, "rewards/rejected": -0.8500713109970093, "rewards/safe_rewards": -0.586032509803772, "rewards/unsafe_rewards": -0.577675461769104, "step": 1810 }, { "epoch": 0.97, "learning_rate": 1.7567946514721322e-09, "logits/chosen": 0.6444328427314758, "logits/rejected": 1.0208208560943604, "logps/chosen": -269.35577392578125, "logps/rejected": -271.528564453125, "loss": 5601.7539, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6750708818435669, "rewards/margins": 0.19110876321792603, "rewards/rejected": -0.8661795854568481, "rewards/safe_rewards": -0.6811034679412842, "rewards/unsafe_rewards": -0.7294248342514038, "step": 1820 }, { "epoch": 0.97, "learning_rate": 1.2511094569571668e-09, "logits/chosen": 0.3397526741027832, "logits/rejected": 1.0616391897201538, "logps/chosen": -257.86822509765625, "logps/rejected": -244.8105926513672, "loss": 5620.3375, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.631868302822113, "rewards/margins": 0.2000071257352829, "rewards/rejected": -0.8318754434585571, "rewards/safe_rewards": -0.5972138047218323, "rewards/unsafe_rewards": -0.6459835171699524, "step": 1830 }, { "epoch": 0.98, "learning_rate": 8.308736707954289e-10, "logits/chosen": 0.518609881401062, "logits/rejected": 1.1488319635391235, "logps/chosen": -273.81390380859375, "logps/rejected": -240.91372680664062, "loss": 5548.0289, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6856581568717957, "rewards/margins": 0.2014351636171341, "rewards/rejected": -0.8870933651924133, "rewards/safe_rewards": -0.6684737205505371, "rewards/unsafe_rewards": -0.694146990776062, "step": 1840 }, { "epoch": 0.98, "learning_rate": 4.962316510149222e-10, "logits/chosen": 0.3395392894744873, "logits/rejected": 1.0089718103408813, "logps/chosen": -252.1464080810547, "logps/rejected": -241.22982788085938, "loss": 5356.7621, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6337156295776367, "rewards/margins": 0.2152295857667923, "rewards/rejected": -0.8489452600479126, "rewards/safe_rewards": -0.6431758403778076, "rewards/unsafe_rewards": -0.6494039297103882, "step": 1850 }, { "epoch": 0.99, "learning_rate": 2.4729835275189016e-10, "logits/chosen": 0.5798267722129822, "logits/rejected": 0.9745955467224121, "logps/chosen": -243.1245574951172, "logps/rejected": -238.126220703125, "loss": 5836.127, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.6284788846969604, "rewards/margins": 0.2039627581834793, "rewards/rejected": -0.8324416279792786, "rewards/safe_rewards": -0.5914771556854248, "rewards/unsafe_rewards": -0.6241937279701233, "step": 1860 }, { "epoch": 0.99, "learning_rate": 8.415928876176482e-11, "logits/chosen": 0.4843016564846039, "logits/rejected": 0.8851835131645203, "logps/chosen": -258.23773193359375, "logps/rejected": -251.73001098632812, "loss": 6036.282, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6951759457588196, "rewards/margins": 0.1390235722064972, "rewards/rejected": -0.8341996073722839, "rewards/safe_rewards": -0.7087674140930176, "rewards/unsafe_rewards": -0.712031900882721, "step": 1870 }, { "epoch": 1.0, "learning_rate": 6.870500044303673e-12, "logits/chosen": 0.5293042063713074, "logits/rejected": 0.8430191874504089, "logps/chosen": -253.91397094726562, "logps/rejected": -270.7514953613281, "loss": 5497.6977, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.5842832326889038, "rewards/margins": 0.209587961435318, "rewards/rejected": -0.7938712239265442, "rewards/safe_rewards": -0.6020101308822632, "rewards/unsafe_rewards": -0.6186091303825378, "step": 1880 }, { "epoch": 1.0, "step": 1884, "total_flos": 0.0, "train_loss": 5859.617769083399, "train_runtime": 32772.3871, "train_samples_per_second": 3.68, "train_steps_per_second": 0.057 } ], "logging_steps": 10, "max_steps": 1884, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }