|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996020692399522, |
|
"eval_steps": 500, |
|
"global_step": 1884, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.645502645502645e-09, |
|
"logits/chosen": -1.8052858114242554, |
|
"logits/rejected": -1.8250553607940674, |
|
"logps/chosen": -201.6904296875, |
|
"logps/rejected": -206.93157958984375, |
|
"loss": 7734.375, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"rewards/safe_rewards": 0.0, |
|
"rewards/unsafe_rewards": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6455026455026453e-08, |
|
"logits/chosen": -2.025691032409668, |
|
"logits/rejected": -1.8649556636810303, |
|
"logps/chosen": -270.43963623046875, |
|
"logps/rejected": -169.98423767089844, |
|
"loss": 7727.0087, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": 4.114356852369383e-05, |
|
"rewards/margins": -0.0002653732954058796, |
|
"rewards/rejected": 0.00030651676934212446, |
|
"rewards/safe_rewards": -1.17086410682532e-05, |
|
"rewards/unsafe_rewards": -0.0006500756135210395, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.2910052910052905e-08, |
|
"logits/chosen": -1.961146593093872, |
|
"logits/rejected": -1.873740553855896, |
|
"logps/chosen": -189.17404174804688, |
|
"logps/rejected": -176.31651306152344, |
|
"loss": 7718.007, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -6.340327672660351e-06, |
|
"rewards/margins": -0.00010152898175874725, |
|
"rewards/rejected": 9.518869046587497e-05, |
|
"rewards/safe_rewards": 0.00045737033360637724, |
|
"rewards/unsafe_rewards": -8.718876051716506e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.936507936507936e-08, |
|
"logits/chosen": -1.9912703037261963, |
|
"logits/rejected": -1.883933424949646, |
|
"logps/chosen": -198.4538116455078, |
|
"logps/rejected": -183.28781127929688, |
|
"loss": 7515.9359, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0001133469631895423, |
|
"rewards/margins": 0.0007399408495984972, |
|
"rewards/rejected": -0.0006265938864089549, |
|
"rewards/safe_rewards": 0.00022509883274324238, |
|
"rewards/unsafe_rewards": 0.0002071214112220332, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0582010582010581e-07, |
|
"logits/chosen": -1.927167534828186, |
|
"logits/rejected": -1.8453724384307861, |
|
"logps/chosen": -198.85276794433594, |
|
"logps/rejected": -174.22967529296875, |
|
"loss": 7334.5094, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.00027468582266010344, |
|
"rewards/margins": 0.0014765586238354445, |
|
"rewards/rejected": -0.0012018729466944933, |
|
"rewards/safe_rewards": 0.0002533269871491939, |
|
"rewards/unsafe_rewards": 0.00015336349315475672, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3227513227513225e-07, |
|
"logits/chosen": -2.037893533706665, |
|
"logits/rejected": -1.8426322937011719, |
|
"logps/chosen": -214.9281463623047, |
|
"logps/rejected": -162.3707733154297, |
|
"loss": 7399.5859, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0017435807967558503, |
|
"rewards/margins": 0.001902301562950015, |
|
"rewards/rejected": -0.00015872062067501247, |
|
"rewards/safe_rewards": 0.002309921896085143, |
|
"rewards/unsafe_rewards": 0.00044932105811312795, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5873015873015872e-07, |
|
"logits/chosen": -2.011747360229492, |
|
"logits/rejected": -1.8823707103729248, |
|
"logps/chosen": -182.73411560058594, |
|
"logps/rejected": -155.423095703125, |
|
"loss": 7214.4602, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0006955948774702847, |
|
"rewards/margins": 0.005063413176685572, |
|
"rewards/rejected": -0.0057590072974562645, |
|
"rewards/safe_rewards": -0.0021988481748849154, |
|
"rewards/unsafe_rewards": 0.0001153635821538046, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8518518518518516e-07, |
|
"logits/chosen": -1.975612998008728, |
|
"logits/rejected": -1.8158948421478271, |
|
"logps/chosen": -186.48574829101562, |
|
"logps/rejected": -168.57896423339844, |
|
"loss": 7816.8766, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.007440758403390646, |
|
"rewards/margins": 0.010602862574160099, |
|
"rewards/rejected": -0.018043622374534607, |
|
"rewards/safe_rewards": -0.010516250506043434, |
|
"rewards/unsafe_rewards": -0.015666166320443153, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1164021164021162e-07, |
|
"logits/chosen": -1.9063125848770142, |
|
"logits/rejected": -1.7897474765777588, |
|
"logps/chosen": -210.2836151123047, |
|
"logps/rejected": -180.822998046875, |
|
"loss": 7304.9531, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.024481967091560364, |
|
"rewards/margins": 0.016244709491729736, |
|
"rewards/rejected": -0.0407266803085804, |
|
"rewards/safe_rewards": -0.02365388534963131, |
|
"rewards/unsafe_rewards": -0.0289783775806427, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3809523809523806e-07, |
|
"logits/chosen": -1.994605302810669, |
|
"logits/rejected": -1.866681694984436, |
|
"logps/chosen": -203.6532440185547, |
|
"logps/rejected": -174.1517791748047, |
|
"loss": 7251.9984, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06749475002288818, |
|
"rewards/margins": 0.020768558606505394, |
|
"rewards/rejected": -0.08826331794261932, |
|
"rewards/safe_rewards": -0.06556878238916397, |
|
"rewards/unsafe_rewards": -0.052192188799381256, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.645502645502645e-07, |
|
"logits/chosen": -1.9495357275009155, |
|
"logits/rejected": -1.8006837368011475, |
|
"logps/chosen": -205.99411010742188, |
|
"logps/rejected": -192.54415893554688, |
|
"loss": 6776.1008, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11886356770992279, |
|
"rewards/margins": 0.020749244838953018, |
|
"rewards/rejected": -0.1396128088235855, |
|
"rewards/safe_rewards": -0.11704058945178986, |
|
"rewards/unsafe_rewards": -0.1348837912082672, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9100529100529097e-07, |
|
"logits/chosen": -1.9887052774429321, |
|
"logits/rejected": -1.8671073913574219, |
|
"logps/chosen": -226.98001098632812, |
|
"logps/rejected": -217.73733520507812, |
|
"loss": 6636.9766, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.11880362033843994, |
|
"rewards/margins": 0.03935481607913971, |
|
"rewards/rejected": -0.15815845131874084, |
|
"rewards/safe_rewards": -0.14540424942970276, |
|
"rewards/unsafe_rewards": -0.11240017414093018, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1746031746031743e-07, |
|
"logits/chosen": -1.8841511011123657, |
|
"logits/rejected": -1.6952005624771118, |
|
"logps/chosen": -235.6121368408203, |
|
"logps/rejected": -192.76162719726562, |
|
"loss": 6804.4828, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1285235583782196, |
|
"rewards/margins": 0.07450314611196518, |
|
"rewards/rejected": -0.20302672684192657, |
|
"rewards/safe_rewards": -0.12894900143146515, |
|
"rewards/unsafe_rewards": -0.12272067368030548, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.439153439153439e-07, |
|
"logits/chosen": -1.8711330890655518, |
|
"logits/rejected": -1.6887938976287842, |
|
"logps/chosen": -225.3953094482422, |
|
"logps/rejected": -200.31997680664062, |
|
"loss": 7036.6016, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.11849894374608994, |
|
"rewards/margins": 0.05801115185022354, |
|
"rewards/rejected": -0.17651011049747467, |
|
"rewards/safe_rewards": -0.10611984878778458, |
|
"rewards/unsafe_rewards": -0.14429841935634613, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.703703703703703e-07, |
|
"logits/chosen": -1.826206922531128, |
|
"logits/rejected": -1.6439968347549438, |
|
"logps/chosen": -220.1838836669922, |
|
"logps/rejected": -185.7141876220703, |
|
"loss": 6936.9914, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.11376659572124481, |
|
"rewards/margins": 0.0765247792005539, |
|
"rewards/rejected": -0.1902913898229599, |
|
"rewards/safe_rewards": -0.11482509225606918, |
|
"rewards/unsafe_rewards": -0.09925278276205063, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.968253968253968e-07, |
|
"logits/chosen": -1.7187334299087524, |
|
"logits/rejected": -1.5741361379623413, |
|
"logps/chosen": -211.09603881835938, |
|
"logps/rejected": -203.66156005859375, |
|
"loss": 6555.6867, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.19104455411434174, |
|
"rewards/margins": 0.06891994178295135, |
|
"rewards/rejected": -0.2599644958972931, |
|
"rewards/safe_rewards": -0.20118245482444763, |
|
"rewards/unsafe_rewards": -0.16981182992458344, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.2328042328042324e-07, |
|
"logits/chosen": -1.7090606689453125, |
|
"logits/rejected": -1.4574247598648071, |
|
"logps/chosen": -231.1162567138672, |
|
"logps/rejected": -197.13832092285156, |
|
"loss": 6483.332, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2999975085258484, |
|
"rewards/margins": 0.08841492235660553, |
|
"rewards/rejected": -0.3884124159812927, |
|
"rewards/safe_rewards": -0.2963607907295227, |
|
"rewards/unsafe_rewards": -0.2815978527069092, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.497354497354497e-07, |
|
"logits/chosen": -1.7472738027572632, |
|
"logits/rejected": -1.5065333843231201, |
|
"logps/chosen": -255.1507110595703, |
|
"logps/rejected": -221.82241821289062, |
|
"loss": 6801.5375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.23129959404468536, |
|
"rewards/margins": 0.12043756246566772, |
|
"rewards/rejected": -0.35173720121383667, |
|
"rewards/safe_rewards": -0.22959312796592712, |
|
"rewards/unsafe_rewards": -0.1985938847064972, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.761904761904761e-07, |
|
"logits/chosen": -1.680676817893982, |
|
"logits/rejected": -1.4166452884674072, |
|
"logps/chosen": -216.8690948486328, |
|
"logps/rejected": -191.8008270263672, |
|
"loss": 6535.7055, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.26913732290267944, |
|
"rewards/margins": 0.11233188211917877, |
|
"rewards/rejected": -0.381469190120697, |
|
"rewards/safe_rewards": -0.26176974177360535, |
|
"rewards/unsafe_rewards": -0.23940448462963104, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999995705919032e-07, |
|
"logits/chosen": -1.5433807373046875, |
|
"logits/rejected": -1.2667306661605835, |
|
"logps/chosen": -224.0026397705078, |
|
"logps/rejected": -205.34414672851562, |
|
"loss": 6409.0121, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.19693121314048767, |
|
"rewards/margins": 0.09455744177103043, |
|
"rewards/rejected": -0.2914886772632599, |
|
"rewards/safe_rewards": -0.17649488151073456, |
|
"rewards/unsafe_rewards": -0.18380855023860931, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999480434051858e-07, |
|
"logits/chosen": -1.5521910190582275, |
|
"logits/rejected": -1.3097938299179077, |
|
"logps/chosen": -225.257568359375, |
|
"logps/rejected": -205.92129516601562, |
|
"loss": 6576.5188, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1997550129890442, |
|
"rewards/margins": 0.0904761329293251, |
|
"rewards/rejected": -0.2902311384677887, |
|
"rewards/safe_rewards": -0.20136451721191406, |
|
"rewards/unsafe_rewards": -0.21680407226085663, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998106548810311e-07, |
|
"logits/chosen": -1.3539698123931885, |
|
"logits/rejected": -1.2038872241973877, |
|
"logps/chosen": -212.8267364501953, |
|
"logps/rejected": -220.0903778076172, |
|
"loss": 6444.5828, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2437468022108078, |
|
"rewards/margins": 0.14799915254116058, |
|
"rewards/rejected": -0.3917458951473236, |
|
"rewards/safe_rewards": -0.2773512601852417, |
|
"rewards/unsafe_rewards": -0.2216939926147461, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995874522146975e-07, |
|
"logits/chosen": -1.503328561782837, |
|
"logits/rejected": -1.3146250247955322, |
|
"logps/chosen": -236.4509735107422, |
|
"logps/rejected": -211.6634063720703, |
|
"loss": 6233.5547, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.29747992753982544, |
|
"rewards/margins": 0.13039958477020264, |
|
"rewards/rejected": -0.4278795123100281, |
|
"rewards/safe_rewards": -0.2768808901309967, |
|
"rewards/unsafe_rewards": -0.3182833790779114, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992785120800375e-07, |
|
"logits/chosen": -1.576887845993042, |
|
"logits/rejected": -1.2664101123809814, |
|
"logps/chosen": -237.9243621826172, |
|
"logps/rejected": -213.4459991455078, |
|
"loss": 6108.0914, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.23068375885486603, |
|
"rewards/margins": 0.14957153797149658, |
|
"rewards/rejected": -0.3802553117275238, |
|
"rewards/safe_rewards": -0.22292426228523254, |
|
"rewards/unsafe_rewards": -0.18162095546722412, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988839406031596e-07, |
|
"logits/chosen": -1.515092134475708, |
|
"logits/rejected": -1.2886550426483154, |
|
"logps/chosen": -223.7300567626953, |
|
"logps/rejected": -192.06324768066406, |
|
"loss": 6310.6699, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.24790284037590027, |
|
"rewards/margins": 0.1096932515501976, |
|
"rewards/rejected": -0.3575960695743561, |
|
"rewards/safe_rewards": -0.2673969864845276, |
|
"rewards/unsafe_rewards": -0.24145250022411346, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.98403873325972e-07, |
|
"logits/chosen": -1.5146888494491577, |
|
"logits/rejected": -1.3244738578796387, |
|
"logps/chosen": -213.21694946289062, |
|
"logps/rejected": -209.35061645507812, |
|
"loss": 6209.5707, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2071472406387329, |
|
"rewards/margins": 0.16860046982765198, |
|
"rewards/rejected": -0.3757476806640625, |
|
"rewards/safe_rewards": -0.1998087763786316, |
|
"rewards/unsafe_rewards": -0.20211009681224823, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.978384751596212e-07, |
|
"logits/chosen": -1.3180285692214966, |
|
"logits/rejected": -1.1171799898147583, |
|
"logps/chosen": -232.109375, |
|
"logps/rejected": -236.84072875976562, |
|
"loss": 6328.7531, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.32092350721359253, |
|
"rewards/margins": 0.17156612873077393, |
|
"rewards/rejected": -0.49248963594436646, |
|
"rewards/safe_rewards": -0.4227983355522156, |
|
"rewards/unsafe_rewards": -0.3325851559638977, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.971879403278432e-07, |
|
"logits/chosen": -1.1372450590133667, |
|
"logits/rejected": -0.9446180462837219, |
|
"logps/chosen": -234.88888549804688, |
|
"logps/rejected": -224.05886840820312, |
|
"loss": 6312.1719, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.29563266038894653, |
|
"rewards/margins": 0.12811212241649628, |
|
"rewards/rejected": -0.4237447679042816, |
|
"rewards/safe_rewards": -0.33217892050743103, |
|
"rewards/unsafe_rewards": -0.27307888865470886, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.964524923002436e-07, |
|
"logits/chosen": -1.415801763534546, |
|
"logits/rejected": -1.1731336116790771, |
|
"logps/chosen": -241.7359619140625, |
|
"logps/rejected": -224.5096893310547, |
|
"loss": 5974.0195, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3053835928440094, |
|
"rewards/margins": 0.16657045483589172, |
|
"rewards/rejected": -0.4719540476799011, |
|
"rewards/safe_rewards": -0.3295218348503113, |
|
"rewards/unsafe_rewards": -0.30390697717666626, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.956323837155325e-07, |
|
"logits/chosen": -1.2966214418411255, |
|
"logits/rejected": -1.1260521411895752, |
|
"logps/chosen": -227.2568359375, |
|
"logps/rejected": -214.1421661376953, |
|
"loss": 6133.0227, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.261239230632782, |
|
"rewards/margins": 0.15825437009334564, |
|
"rewards/rejected": -0.4194936156272888, |
|
"rewards/safe_rewards": -0.2375851422548294, |
|
"rewards/unsafe_rewards": -0.2705303132534027, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.947278962947386e-07, |
|
"logits/chosen": -1.255904197692871, |
|
"logits/rejected": -1.0300556421279907, |
|
"logps/chosen": -231.86593627929688, |
|
"logps/rejected": -213.03768920898438, |
|
"loss": 5684.9316, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.30576351284980774, |
|
"rewards/margins": 0.1560250222682953, |
|
"rewards/rejected": -0.4617885649204254, |
|
"rewards/safe_rewards": -0.3117372691631317, |
|
"rewards/unsafe_rewards": -0.30344492197036743, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.937393407444337e-07, |
|
"logits/chosen": -1.1847805976867676, |
|
"logits/rejected": -0.8935750722885132, |
|
"logps/chosen": -235.5170135498047, |
|
"logps/rejected": -226.17910766601562, |
|
"loss": 5606.7586, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.4436865746974945, |
|
"rewards/margins": 0.12356774508953094, |
|
"rewards/rejected": -0.5672543048858643, |
|
"rewards/safe_rewards": -0.4222384989261627, |
|
"rewards/unsafe_rewards": -0.49501723051071167, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.926670566499992e-07, |
|
"logits/chosen": -0.6831132173538208, |
|
"logits/rejected": -0.43409886956214905, |
|
"logps/chosen": -230.1105499267578, |
|
"logps/rejected": -223.13021850585938, |
|
"loss": 6029.3086, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.4783251881599426, |
|
"rewards/margins": 0.13184307515621185, |
|
"rewards/rejected": -0.6101682782173157, |
|
"rewards/safe_rewards": -0.46370235085487366, |
|
"rewards/unsafe_rewards": -0.4838125705718994, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.915114123589732e-07, |
|
"logits/chosen": -0.5296390652656555, |
|
"logits/rejected": -0.23315271735191345, |
|
"logps/chosen": -264.1290588378906, |
|
"logps/rejected": -222.7255401611328, |
|
"loss": 6587.2148, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.49660125374794006, |
|
"rewards/margins": 0.1269882619380951, |
|
"rewards/rejected": -0.6235895156860352, |
|
"rewards/safe_rewards": -0.5574027299880981, |
|
"rewards/unsafe_rewards": -0.5570284128189087, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.90272804854517e-07, |
|
"logits/chosen": -0.20833459496498108, |
|
"logits/rejected": 0.08662636578083038, |
|
"logps/chosen": -271.68389892578125, |
|
"logps/rejected": -259.1782531738281, |
|
"loss": 6224.5324, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5533224940299988, |
|
"rewards/margins": 0.15772438049316406, |
|
"rewards/rejected": -0.7110469341278076, |
|
"rewards/safe_rewards": -0.5448375940322876, |
|
"rewards/unsafe_rewards": -0.5393844842910767, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.889516596190448e-07, |
|
"logits/chosen": -0.7373126149177551, |
|
"logits/rejected": -0.34005147218704224, |
|
"logps/chosen": -293.0935363769531, |
|
"logps/rejected": -241.9617156982422, |
|
"loss": 6110.7906, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5001389980316162, |
|
"rewards/margins": 0.1725221574306488, |
|
"rewards/rejected": -0.6726611852645874, |
|
"rewards/safe_rewards": -0.4835886061191559, |
|
"rewards/unsafe_rewards": -0.5382236838340759, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.875484304880629e-07, |
|
"logits/chosen": -0.8152839541435242, |
|
"logits/rejected": -0.4126107096672058, |
|
"logps/chosen": -302.5885314941406, |
|
"logps/rejected": -256.1798095703125, |
|
"loss": 6488.7234, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.48745980858802795, |
|
"rewards/margins": 0.10641022026538849, |
|
"rewards/rejected": -0.5938700437545776, |
|
"rewards/safe_rewards": -0.449713796377182, |
|
"rewards/unsafe_rewards": -0.48859700560569763, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.860635994942702e-07, |
|
"logits/chosen": -0.47416171431541443, |
|
"logits/rejected": 0.00913926400244236, |
|
"logps/chosen": -258.38189697265625, |
|
"logps/rejected": -230.67880249023438, |
|
"loss": 5790.3816, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.5084312558174133, |
|
"rewards/margins": 0.1444414108991623, |
|
"rewards/rejected": -0.6528726816177368, |
|
"rewards/safe_rewards": -0.5270028114318848, |
|
"rewards/unsafe_rewards": -0.48991069197654724, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.844976767019714e-07, |
|
"logits/chosen": -0.19216355681419373, |
|
"logits/rejected": 0.15172423422336578, |
|
"logps/chosen": -222.911865234375, |
|
"logps/rejected": -202.00888061523438, |
|
"loss": 5908.2133, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5394010543823242, |
|
"rewards/margins": 0.11715151369571686, |
|
"rewards/rejected": -0.6565525531768799, |
|
"rewards/safe_rewards": -0.5183984041213989, |
|
"rewards/unsafe_rewards": -0.5164821743965149, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828512000318616e-07, |
|
"logits/chosen": -0.213291734457016, |
|
"logits/rejected": 0.39291974902153015, |
|
"logps/chosen": -303.5594177246094, |
|
"logps/rejected": -259.14178466796875, |
|
"loss": 6109.6039, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5700324177742004, |
|
"rewards/margins": 0.1927037090063095, |
|
"rewards/rejected": -0.7627362012863159, |
|
"rewards/safe_rewards": -0.5912032723426819, |
|
"rewards/unsafe_rewards": -0.5395609140396118, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.811247350762418e-07, |
|
"logits/chosen": -0.36068278551101685, |
|
"logits/rejected": 0.05598723143339157, |
|
"logps/chosen": -240.6222381591797, |
|
"logps/rejected": -234.20803833007812, |
|
"loss": 5907.1703, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.554689347743988, |
|
"rewards/margins": 0.17352624237537384, |
|
"rewards/rejected": -0.7282156348228455, |
|
"rewards/safe_rewards": -0.5173069834709167, |
|
"rewards/unsafe_rewards": -0.5826700329780579, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.79318874904728e-07, |
|
"logits/chosen": -0.5469863414764404, |
|
"logits/rejected": -0.3919845223426819, |
|
"logps/chosen": -267.99761962890625, |
|
"logps/rejected": -260.9379577636719, |
|
"loss": 6323.5375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5513988137245178, |
|
"rewards/margins": 0.16061297059059143, |
|
"rewards/rejected": -0.7120116949081421, |
|
"rewards/safe_rewards": -0.5992297530174255, |
|
"rewards/unsafe_rewards": -0.5494996309280396, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.774342398605221e-07, |
|
"logits/chosen": -1.3936598300933838, |
|
"logits/rejected": -1.0238125324249268, |
|
"logps/chosen": -262.09033203125, |
|
"logps/rejected": -221.07174682617188, |
|
"loss": 5492.8094, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5323154926300049, |
|
"rewards/margins": 0.15208503603935242, |
|
"rewards/rejected": -0.6844004988670349, |
|
"rewards/safe_rewards": -0.5349102020263672, |
|
"rewards/unsafe_rewards": -0.505738377571106, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.754714773473134e-07, |
|
"logits/chosen": -1.2268015146255493, |
|
"logits/rejected": -1.0391647815704346, |
|
"logps/chosen": -248.2527313232422, |
|
"logps/rejected": -258.4667663574219, |
|
"loss": 6146.5922, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5346105694770813, |
|
"rewards/margins": 0.18027544021606445, |
|
"rewards/rejected": -0.7148860692977905, |
|
"rewards/safe_rewards": -0.4759598672389984, |
|
"rewards/unsafe_rewards": -0.534007728099823, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.734312616068851e-07, |
|
"logits/chosen": -1.2311909198760986, |
|
"logits/rejected": -0.9865934252738953, |
|
"logps/chosen": -214.25851440429688, |
|
"logps/rejected": -198.68943786621094, |
|
"loss": 5944.2828, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3851444125175476, |
|
"rewards/margins": 0.0964752659201622, |
|
"rewards/rejected": -0.481619656085968, |
|
"rewards/safe_rewards": -0.40014153718948364, |
|
"rewards/unsafe_rewards": -0.4206266403198242, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.713142934875005e-07, |
|
"logits/chosen": -0.7530995607376099, |
|
"logits/rejected": -0.348047137260437, |
|
"logps/chosen": -273.5533447265625, |
|
"logps/rejected": -247.33377075195312, |
|
"loss": 6019.3629, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4809795916080475, |
|
"rewards/margins": 0.16457389295101166, |
|
"rewards/rejected": -0.645553469657898, |
|
"rewards/safe_rewards": -0.4939555525779724, |
|
"rewards/unsafe_rewards": -0.51116544008255, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6912130020314996e-07, |
|
"logits/chosen": 0.18566010892391205, |
|
"logits/rejected": 0.4161214232444763, |
|
"logps/chosen": -233.847900390625, |
|
"logps/rejected": -238.5542755126953, |
|
"loss": 5555.243, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6200246810913086, |
|
"rewards/margins": 0.13345691561698914, |
|
"rewards/rejected": -0.7534815073013306, |
|
"rewards/safe_rewards": -0.6095362901687622, |
|
"rewards/unsafe_rewards": -0.6309984922409058, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.668530350837408e-07, |
|
"logits/chosen": 0.024336492642760277, |
|
"logits/rejected": 0.4952603876590729, |
|
"logps/chosen": -259.33697509765625, |
|
"logps/rejected": -254.6613006591797, |
|
"loss": 5726.7293, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5721555948257446, |
|
"rewards/margins": 0.12051858007907867, |
|
"rewards/rejected": -0.6926741600036621, |
|
"rewards/safe_rewards": -0.5316283702850342, |
|
"rewards/unsafe_rewards": -0.5645433664321899, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.64510277316316e-07, |
|
"logits/chosen": -0.0006995767471380532, |
|
"logits/rejected": 0.4036879539489746, |
|
"logps/chosen": -269.50482177734375, |
|
"logps/rejected": -248.73434448242188, |
|
"loss": 6012.2914, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5171098113059998, |
|
"rewards/margins": 0.20941033959388733, |
|
"rewards/rejected": -0.7265201807022095, |
|
"rewards/safe_rewards": -0.5066377520561218, |
|
"rewards/unsafe_rewards": -0.4963339865207672, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6209383167739015e-07, |
|
"logits/chosen": -0.8723047971725464, |
|
"logits/rejected": -0.47492194175720215, |
|
"logps/chosen": -239.2227020263672, |
|
"logps/rejected": -223.37191772460938, |
|
"loss": 6090.4563, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.39161261916160583, |
|
"rewards/margins": 0.16117171943187714, |
|
"rewards/rejected": -0.5527843832969666, |
|
"rewards/safe_rewards": -0.4009205400943756, |
|
"rewards/unsafe_rewards": -0.4027668535709381, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5960452825649526e-07, |
|
"logits/chosen": -0.8613616228103638, |
|
"logits/rejected": -0.5483921766281128, |
|
"logps/chosen": -252.01095581054688, |
|
"logps/rejected": -236.2162628173828, |
|
"loss": 5410.1973, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.4818722605705261, |
|
"rewards/margins": 0.12459783256053925, |
|
"rewards/rejected": -0.606469988822937, |
|
"rewards/safe_rewards": -0.4409845769405365, |
|
"rewards/unsafe_rewards": -0.48863571882247925, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_logits/chosen": -0.00993373803794384, |
|
"eval_logits/rejected": 0.6948209404945374, |
|
"eval_logps/chosen": -205.43228149414062, |
|
"eval_logps/rejected": -177.0600128173828, |
|
"eval_loss": 4657.333984375, |
|
"eval_rewards/accuracies": 0.6367472410202026, |
|
"eval_rewards/chosen": -0.6508274078369141, |
|
"eval_rewards/margins": 0.09844248741865158, |
|
"eval_rewards/rejected": -0.749269962310791, |
|
"eval_rewards/safe_rewards": -0.6381882429122925, |
|
"eval_rewards/unsafe_rewards": -0.6354333162307739, |
|
"eval_runtime": 2355.0926, |
|
"eval_samples_per_second": 14.88, |
|
"eval_steps_per_second": 0.465, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.570432221710314e-07, |
|
"logits/chosen": -0.2417004406452179, |
|
"logits/rejected": 0.17007017135620117, |
|
"logps/chosen": -273.1074523925781, |
|
"logps/rejected": -236.8904266357422, |
|
"loss": 6244.0367, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5197592973709106, |
|
"rewards/margins": 0.19909226894378662, |
|
"rewards/rejected": -0.7188515067100525, |
|
"rewards/safe_rewards": -0.6001642942428589, |
|
"rewards/unsafe_rewards": -0.5492387413978577, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5441079327251927e-07, |
|
"logits/chosen": -0.3826223909854889, |
|
"logits/rejected": 0.10965192317962646, |
|
"logps/chosen": -261.4352722167969, |
|
"logps/rejected": -251.9311065673828, |
|
"loss": 5649.8195, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.49133262038230896, |
|
"rewards/margins": 0.11736941337585449, |
|
"rewards/rejected": -0.6087020635604858, |
|
"rewards/safe_rewards": -0.4915240406990051, |
|
"rewards/unsafe_rewards": -0.4991859793663025, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5170814584435644e-07, |
|
"logits/chosen": -0.1299566924571991, |
|
"logits/rejected": 0.30430150032043457, |
|
"logps/chosen": -281.5189514160156, |
|
"logps/rejected": -248.9510040283203, |
|
"loss": 6070.9859, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5048553347587585, |
|
"rewards/margins": 0.17633280158042908, |
|
"rewards/rejected": -0.6811882257461548, |
|
"rewards/safe_rewards": -0.45997923612594604, |
|
"rewards/unsafe_rewards": -0.5042248964309692, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4893620829118124e-07, |
|
"logits/chosen": 0.41155165433883667, |
|
"logits/rejected": 0.7351133227348328, |
|
"logps/chosen": -218.6739959716797, |
|
"logps/rejected": -222.22238159179688, |
|
"loss": 5773.9555, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5764225721359253, |
|
"rewards/margins": 0.17755261063575745, |
|
"rewards/rejected": -0.7539752125740051, |
|
"rewards/safe_rewards": -0.5707100033760071, |
|
"rewards/unsafe_rewards": -0.5930426716804504, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.460959328199497e-07, |
|
"logits/chosen": 0.4961000382900238, |
|
"logits/rejected": 0.9081694483757019, |
|
"logps/chosen": -256.54791259765625, |
|
"logps/rejected": -277.130126953125, |
|
"loss": 6108.098, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6318496465682983, |
|
"rewards/margins": 0.2199208289384842, |
|
"rewards/rejected": -0.8517705202102661, |
|
"rewards/safe_rewards": -0.6448063850402832, |
|
"rewards/unsafe_rewards": -0.5973528623580933, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4318829511283707e-07, |
|
"logits/chosen": 0.23597554862499237, |
|
"logits/rejected": 0.5608280301094055, |
|
"logps/chosen": -262.15960693359375, |
|
"logps/rejected": -276.5953369140625, |
|
"loss": 6017.0984, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7231947183609009, |
|
"rewards/margins": 0.16650545597076416, |
|
"rewards/rejected": -0.8897002339363098, |
|
"rewards/safe_rewards": -0.7144005298614502, |
|
"rewards/unsafe_rewards": -0.6883742213249207, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.40214293992074e-07, |
|
"logits/chosen": 0.30961090326309204, |
|
"logits/rejected": 0.6938155889511108, |
|
"logps/chosen": -267.58404541015625, |
|
"logps/rejected": -252.78311157226562, |
|
"loss": 6321.9309, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5275936722755432, |
|
"rewards/margins": 0.20575468242168427, |
|
"rewards/rejected": -0.7333483099937439, |
|
"rewards/safe_rewards": -0.5182517766952515, |
|
"rewards/unsafe_rewards": -0.5568464994430542, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3717495107683516e-07, |
|
"logits/chosen": 0.2671489417552948, |
|
"logits/rejected": 0.9092152714729309, |
|
"logps/chosen": -250.55960083007812, |
|
"logps/rejected": -235.89840698242188, |
|
"loss": 5574.8402, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5318346619606018, |
|
"rewards/margins": 0.18946382403373718, |
|
"rewards/rejected": -0.7212985157966614, |
|
"rewards/safe_rewards": -0.5447245836257935, |
|
"rewards/unsafe_rewards": -0.5725606083869934, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.340713104322953e-07, |
|
"logits/chosen": 0.01171237975358963, |
|
"logits/rejected": 0.4629115164279938, |
|
"logps/chosen": -265.1495056152344, |
|
"logps/rejected": -259.7709045410156, |
|
"loss": 5202.8691, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5935125946998596, |
|
"rewards/margins": 0.18529286980628967, |
|
"rewards/rejected": -0.7788054347038269, |
|
"rewards/safe_rewards": -0.6250792741775513, |
|
"rewards/unsafe_rewards": -0.6238072514533997, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3090443821097566e-07, |
|
"logits/chosen": 0.7814422845840454, |
|
"logits/rejected": 1.1566433906555176, |
|
"logps/chosen": -278.1474609375, |
|
"logps/rejected": -280.3294677734375, |
|
"loss": 5335.1562, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6250512599945068, |
|
"rewards/margins": 0.19450877606868744, |
|
"rewards/rejected": -0.8195600509643555, |
|
"rewards/safe_rewards": -0.5736940503120422, |
|
"rewards/unsafe_rewards": -0.6311155557632446, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.276754222865029e-07, |
|
"logits/chosen": 0.546709418296814, |
|
"logits/rejected": 1.5038117170333862, |
|
"logps/chosen": -284.0765075683594, |
|
"logps/rejected": -235.79367065429688, |
|
"loss": 5880.4258, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6524443626403809, |
|
"rewards/margins": 0.17251375317573547, |
|
"rewards/rejected": -0.8249581456184387, |
|
"rewards/safe_rewards": -0.6402295231819153, |
|
"rewards/unsafe_rewards": -0.6277676224708557, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2438537187990565e-07, |
|
"logits/chosen": 0.7865768671035767, |
|
"logits/rejected": 1.5061836242675781, |
|
"logps/chosen": -283.3603820800781, |
|
"logps/rejected": -251.56442260742188, |
|
"loss": 5760.8687, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.658532977104187, |
|
"rewards/margins": 0.21655750274658203, |
|
"rewards/rejected": -0.875090479850769, |
|
"rewards/safe_rewards": -0.6327935457229614, |
|
"rewards/unsafe_rewards": -0.6471335291862488, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.210354171785795e-07, |
|
"logits/chosen": 0.2993673086166382, |
|
"logits/rejected": 0.7917363047599792, |
|
"logps/chosen": -272.6424865722656, |
|
"logps/rejected": -247.65853881835938, |
|
"loss": 5872.0883, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5130705833435059, |
|
"rewards/margins": 0.1547364443540573, |
|
"rewards/rejected": -0.6678069829940796, |
|
"rewards/safe_rewards": -0.5059661269187927, |
|
"rewards/unsafe_rewards": -0.5222837328910828, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1762670894804775e-07, |
|
"logits/chosen": 0.09364859014749527, |
|
"logits/rejected": 0.5361107587814331, |
|
"logps/chosen": -249.59634399414062, |
|
"logps/rejected": -237.3841094970703, |
|
"loss": 5896.1926, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.49201780557632446, |
|
"rewards/margins": 0.16005203127861023, |
|
"rewards/rejected": -0.6520698070526123, |
|
"rewards/safe_rewards": -0.549709677696228, |
|
"rewards/unsafe_rewards": -0.5637668967247009, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1416041813665493e-07, |
|
"logits/chosen": -0.5552986860275269, |
|
"logits/rejected": -0.25023895502090454, |
|
"logps/chosen": -253.50790405273438, |
|
"logps/rejected": -253.32583618164062, |
|
"loss": 5920.0328, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.47500887513160706, |
|
"rewards/margins": 0.12813320755958557, |
|
"rewards/rejected": -0.6031420826911926, |
|
"rewards/safe_rewards": -0.43845662474632263, |
|
"rewards/unsafe_rewards": -0.45656904578208923, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.1063773547332584e-07, |
|
"logits/chosen": -0.46418723464012146, |
|
"logits/rejected": -0.049189966171979904, |
|
"logps/chosen": -267.15765380859375, |
|
"logps/rejected": -243.20010375976562, |
|
"loss": 6128.7578, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.6104855537414551, |
|
"rewards/margins": 0.10687772184610367, |
|
"rewards/rejected": -0.7173632383346558, |
|
"rewards/safe_rewards": -0.5476406216621399, |
|
"rewards/unsafe_rewards": -0.603262722492218, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0705987105853077e-07, |
|
"logits/chosen": -0.2697436213493347, |
|
"logits/rejected": 0.344801664352417, |
|
"logps/chosen": -252.3665313720703, |
|
"logps/rejected": -232.3540496826172, |
|
"loss": 5986.7625, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5879735350608826, |
|
"rewards/margins": 0.14302758872509003, |
|
"rewards/rejected": -0.731001079082489, |
|
"rewards/safe_rewards": -0.543707013130188, |
|
"rewards/unsafe_rewards": -0.5482696294784546, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.034280539485952e-07, |
|
"logits/chosen": -0.36558887362480164, |
|
"logits/rejected": 0.18461750447750092, |
|
"logps/chosen": -295.22119140625, |
|
"logps/rejected": -274.0675354003906, |
|
"loss": 5383.9453, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5177947878837585, |
|
"rewards/margins": 0.21047362685203552, |
|
"rewards/rejected": -0.7282685041427612, |
|
"rewards/safe_rewards": -0.5312758684158325, |
|
"rewards/unsafe_rewards": -0.5633383393287659, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.997435317334988e-07, |
|
"logits/chosen": 0.3039137125015259, |
|
"logits/rejected": 0.7977389097213745, |
|
"logps/chosen": -279.23187255859375, |
|
"logps/rejected": -261.033935546875, |
|
"loss": 5720.7707, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5356379747390747, |
|
"rewards/margins": 0.2088995724916458, |
|
"rewards/rejected": -0.7445374131202698, |
|
"rewards/safe_rewards": -0.5458201169967651, |
|
"rewards/unsafe_rewards": -0.47182130813598633, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.960075701083074e-07, |
|
"logits/chosen": 0.06580640375614166, |
|
"logits/rejected": 0.28118953108787537, |
|
"logps/chosen": -237.80581665039062, |
|
"logps/rejected": -245.47216796875, |
|
"loss": 5702.616, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5484215021133423, |
|
"rewards/margins": 0.16065733134746552, |
|
"rewards/rejected": -0.709078848361969, |
|
"rewards/safe_rewards": -0.5256644487380981, |
|
"rewards/unsafe_rewards": -0.5779343247413635, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92221452438385e-07, |
|
"logits/chosen": -0.6886399388313293, |
|
"logits/rejected": -0.33862438797950745, |
|
"logps/chosen": -255.33505249023438, |
|
"logps/rejected": -234.041259765625, |
|
"loss": 5505.9277, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5285482406616211, |
|
"rewards/margins": 0.18568384647369385, |
|
"rewards/rejected": -0.7142320871353149, |
|
"rewards/safe_rewards": -0.5484398007392883, |
|
"rewards/unsafe_rewards": -0.5874748826026917, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8838647931853684e-07, |
|
"logits/chosen": -0.7950954437255859, |
|
"logits/rejected": -0.4466307759284973, |
|
"logps/chosen": -253.4489288330078, |
|
"logps/rejected": -254.49813842773438, |
|
"loss": 6030.682, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5057817697525024, |
|
"rewards/margins": 0.20095935463905334, |
|
"rewards/rejected": -0.7067410945892334, |
|
"rewards/safe_rewards": -0.5353250503540039, |
|
"rewards/unsafe_rewards": -0.4995631277561188, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.845039681262332e-07, |
|
"logits/chosen": -0.5698283910751343, |
|
"logits/rejected": -0.1652621030807495, |
|
"logps/chosen": -265.46368408203125, |
|
"logps/rejected": -250.52951049804688, |
|
"loss": 5514.4148, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.45593494176864624, |
|
"rewards/margins": 0.1759863793849945, |
|
"rewards/rejected": -0.6319212913513184, |
|
"rewards/safe_rewards": -0.4363466799259186, |
|
"rewards/unsafe_rewards": -0.4330349862575531, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.805752525690681e-07, |
|
"logits/chosen": 0.09326216578483582, |
|
"logits/rejected": 0.7224725484848022, |
|
"logps/chosen": -253.9232940673828, |
|
"logps/rejected": -268.0160217285156, |
|
"loss": 5160.3754, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6526281237602234, |
|
"rewards/margins": 0.22083961963653564, |
|
"rewards/rejected": -0.8734676241874695, |
|
"rewards/safe_rewards": -0.6421413421630859, |
|
"rewards/unsafe_rewards": -0.6364503502845764, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7660168222660824e-07, |
|
"logits/chosen": 0.43039554357528687, |
|
"logits/rejected": 0.772833526134491, |
|
"logps/chosen": -293.98541259765625, |
|
"logps/rejected": -288.250732421875, |
|
"loss": 5855.4879, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.7387111783027649, |
|
"rewards/margins": 0.16440826654434204, |
|
"rewards/rejected": -0.9031193852424622, |
|
"rewards/safe_rewards": -0.7269446849822998, |
|
"rewards/unsafe_rewards": -0.6723185777664185, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.725846220867901e-07, |
|
"logits/chosen": -0.09916634857654572, |
|
"logits/rejected": 0.4922304153442383, |
|
"logps/chosen": -265.7640686035156, |
|
"logps/rejected": -243.7411346435547, |
|
"loss": 6137.0988, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.6147286295890808, |
|
"rewards/margins": 0.14420659840106964, |
|
"rewards/rejected": -0.7589352130889893, |
|
"rewards/safe_rewards": -0.6549733877182007, |
|
"rewards/unsafe_rewards": -0.6351133584976196, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6852545207702393e-07, |
|
"logits/chosen": -0.18887875974178314, |
|
"logits/rejected": 0.4651460647583008, |
|
"logps/chosen": -300.3460998535156, |
|
"logps/rejected": -247.0656280517578, |
|
"loss": 5956.6977, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.5610722899436951, |
|
"rewards/margins": 0.18032148480415344, |
|
"rewards/rejected": -0.7413938641548157, |
|
"rewards/safe_rewards": -0.5364476442337036, |
|
"rewards/unsafe_rewards": -0.5671006441116333, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6442556659016475e-07, |
|
"logits/chosen": 0.3691898286342621, |
|
"logits/rejected": 1.0192655324935913, |
|
"logps/chosen": -278.3470458984375, |
|
"logps/rejected": -240.86141967773438, |
|
"loss": 5414.8289, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5899799466133118, |
|
"rewards/margins": 0.20228877663612366, |
|
"rewards/rejected": -0.7922687530517578, |
|
"rewards/safe_rewards": -0.5520480871200562, |
|
"rewards/unsafe_rewards": -0.5946981906890869, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.602863740055161e-07, |
|
"logits/chosen": 1.002415418624878, |
|
"logits/rejected": 1.6322085857391357, |
|
"logps/chosen": -268.44488525390625, |
|
"logps/rejected": -261.2592468261719, |
|
"loss": 5358.4598, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6824139356613159, |
|
"rewards/margins": 0.22263555228710175, |
|
"rewards/rejected": -0.9050495028495789, |
|
"rewards/safe_rewards": -0.6642250418663025, |
|
"rewards/unsafe_rewards": -0.6494946479797363, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5610929620502747e-07, |
|
"logits/chosen": 0.9502559900283813, |
|
"logits/rejected": 1.4719197750091553, |
|
"logps/chosen": -271.93231201171875, |
|
"logps/rejected": -281.78125, |
|
"loss": 5792.9727, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7460067272186279, |
|
"rewards/margins": 0.18493010103702545, |
|
"rewards/rejected": -0.9309368133544922, |
|
"rewards/safe_rewards": -0.7411947846412659, |
|
"rewards/unsafe_rewards": -0.8093317151069641, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5189576808485404e-07, |
|
"logits/chosen": 0.7791315913200378, |
|
"logits/rejected": 1.4415690898895264, |
|
"logps/chosen": -300.54150390625, |
|
"logps/rejected": -273.402587890625, |
|
"loss": 5584.2125, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7409987449645996, |
|
"rewards/margins": 0.20648033916950226, |
|
"rewards/rejected": -0.9474791288375854, |
|
"rewards/safe_rewards": -0.726071834564209, |
|
"rewards/unsafe_rewards": -0.8359003067016602, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.476472370624464e-07, |
|
"logits/chosen": 0.40392106771469116, |
|
"logits/rejected": 0.7413457632064819, |
|
"logps/chosen": -254.9908905029297, |
|
"logps/rejected": -251.4073028564453, |
|
"loss": 6101.9039, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6420382261276245, |
|
"rewards/margins": 0.13990595936775208, |
|
"rewards/rejected": -0.7819441556930542, |
|
"rewards/safe_rewards": -0.5959726572036743, |
|
"rewards/unsafe_rewards": -0.6521440744400024, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.43365162579338e-07, |
|
"logits/chosen": 0.11586692184209824, |
|
"logits/rejected": 0.49579864740371704, |
|
"logps/chosen": -226.8084716796875, |
|
"logps/rejected": -232.3746337890625, |
|
"loss": 5837.0383, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.551177442073822, |
|
"rewards/margins": 0.19108565151691437, |
|
"rewards/rejected": -0.7422630190849304, |
|
"rewards/safe_rewards": -0.5533746480941772, |
|
"rewards/unsafe_rewards": -0.5072416663169861, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.390510155998023e-07, |
|
"logits/chosen": 0.24915654957294464, |
|
"logits/rejected": 0.6536698341369629, |
|
"logps/chosen": -277.9824523925781, |
|
"logps/rejected": -249.2000732421875, |
|
"loss": 5721.2586, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.648623526096344, |
|
"rewards/margins": 0.12514245510101318, |
|
"rewards/rejected": -0.7737659811973572, |
|
"rewards/safe_rewards": -0.7092838287353516, |
|
"rewards/unsafe_rewards": -0.6900613903999329, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.347062781055526e-07, |
|
"logits/chosen": 0.5860965847969055, |
|
"logits/rejected": 0.9803635478019714, |
|
"logps/chosen": -245.1415252685547, |
|
"logps/rejected": -272.01080322265625, |
|
"loss": 5834.2676, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6521397829055786, |
|
"rewards/margins": 0.21285566687583923, |
|
"rewards/rejected": -0.8649954795837402, |
|
"rewards/safe_rewards": -0.6472452878952026, |
|
"rewards/unsafe_rewards": -0.6902757883071899, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.303324425866559e-07, |
|
"logits/chosen": 0.6316410303115845, |
|
"logits/rejected": 0.902866005897522, |
|
"logps/chosen": -291.68597412109375, |
|
"logps/rejected": -266.18585205078125, |
|
"loss": 5964.1836, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6263974905014038, |
|
"rewards/margins": 0.17340168356895447, |
|
"rewards/rejected": -0.7997991442680359, |
|
"rewards/safe_rewards": -0.6621179580688477, |
|
"rewards/unsafe_rewards": -0.6091993451118469, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2593101152883795e-07, |
|
"logits/chosen": 0.6831669211387634, |
|
"logits/rejected": 0.9902046918869019, |
|
"logps/chosen": -256.2884521484375, |
|
"logps/rejected": -279.5752868652344, |
|
"loss": 5961.9836, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6823039054870605, |
|
"rewards/margins": 0.17010322213172913, |
|
"rewards/rejected": -0.8524071574211121, |
|
"rewards/safe_rewards": -0.6452068090438843, |
|
"rewards/unsafe_rewards": -0.7062270641326904, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.21503496897354e-07, |
|
"logits/chosen": 0.48068660497665405, |
|
"logits/rejected": 0.952492892742157, |
|
"logps/chosen": -289.909423828125, |
|
"logps/rejected": -262.1679992675781, |
|
"loss": 6021.2465, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7239787578582764, |
|
"rewards/margins": 0.12146921455860138, |
|
"rewards/rejected": -0.8454478979110718, |
|
"rewards/safe_rewards": -0.7816897630691528, |
|
"rewards/unsafe_rewards": -0.7392334938049316, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.170514196176037e-07, |
|
"logits/chosen": 0.28930729627609253, |
|
"logits/rejected": 0.6634337902069092, |
|
"logps/chosen": -267.9020080566406, |
|
"logps/rejected": -267.813720703125, |
|
"loss": 5325.9504, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6826976537704468, |
|
"rewards/margins": 0.18379981815814972, |
|
"rewards/rejected": -0.8664973974227905, |
|
"rewards/safe_rewards": -0.6970924139022827, |
|
"rewards/unsafe_rewards": -0.6835001111030579, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.125763090526674e-07, |
|
"logits/chosen": 0.21367737650871277, |
|
"logits/rejected": 0.6621453166007996, |
|
"logps/chosen": -278.2737731933594, |
|
"logps/rejected": -269.89404296875, |
|
"loss": 5261.0746, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6765376329421997, |
|
"rewards/margins": 0.20078134536743164, |
|
"rewards/rejected": -0.8773189783096313, |
|
"rewards/safe_rewards": -0.6867783665657043, |
|
"rewards/unsafe_rewards": -0.6920818090438843, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.080797024779447e-07, |
|
"logits/chosen": 0.19137686491012573, |
|
"logits/rejected": 0.7889005541801453, |
|
"logps/chosen": -253.41421508789062, |
|
"logps/rejected": -236.6729278564453, |
|
"loss": 5719.0418, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6732780933380127, |
|
"rewards/margins": 0.19284026324748993, |
|
"rewards/rejected": -0.866118311882019, |
|
"rewards/safe_rewards": -0.7765754461288452, |
|
"rewards/unsafe_rewards": -0.682191014289856, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.035631445530743e-07, |
|
"logits/chosen": 0.4879905581474304, |
|
"logits/rejected": 0.9158290028572083, |
|
"logps/chosen": -290.2519226074219, |
|
"logps/rejected": -284.17071533203125, |
|
"loss": 5561.2797, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7149994969367981, |
|
"rewards/margins": 0.19377604126930237, |
|
"rewards/rejected": -0.9087755084037781, |
|
"rewards/safe_rewards": -0.6696754693984985, |
|
"rewards/unsafe_rewards": -0.6708149313926697, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9902818679131775e-07, |
|
"logits/chosen": 0.3951093852519989, |
|
"logits/rejected": 0.8302197456359863, |
|
"logps/chosen": -271.294189453125, |
|
"logps/rejected": -253.5810546875, |
|
"loss": 5419.4855, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7780183553695679, |
|
"rewards/margins": 0.17024961113929749, |
|
"rewards/rejected": -0.9482680559158325, |
|
"rewards/safe_rewards": -0.7877544164657593, |
|
"rewards/unsafe_rewards": -0.7789348363876343, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.944763870265886e-07, |
|
"logits/chosen": -0.13839875161647797, |
|
"logits/rejected": 0.3581174314022064, |
|
"logps/chosen": -272.4313659667969, |
|
"logps/rejected": -267.915771484375, |
|
"loss": 5453.8977, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6422435641288757, |
|
"rewards/margins": 0.19745132327079773, |
|
"rewards/rejected": -0.8396948575973511, |
|
"rewards/safe_rewards": -0.6758723258972168, |
|
"rewards/unsafe_rewards": -0.578320324420929, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.899093088783105e-07, |
|
"logits/chosen": -0.06241287663578987, |
|
"logits/rejected": 0.4015175700187683, |
|
"logps/chosen": -294.8834533691406, |
|
"logps/rejected": -279.0429382324219, |
|
"loss": 5278.1754, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6345726847648621, |
|
"rewards/margins": 0.14065605401992798, |
|
"rewards/rejected": -0.7752287983894348, |
|
"rewards/safe_rewards": -0.6587311029434204, |
|
"rewards/unsafe_rewards": -0.6476761102676392, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8532852121428733e-07, |
|
"logits/chosen": -0.04936225712299347, |
|
"logits/rejected": 0.38959282636642456, |
|
"logps/chosen": -248.14639282226562, |
|
"logps/rejected": -235.8994598388672, |
|
"loss": 5653.668, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5577735304832458, |
|
"rewards/margins": 0.21775202453136444, |
|
"rewards/rejected": -0.7755255699157715, |
|
"rewards/safe_rewards": -0.55736243724823, |
|
"rewards/unsafe_rewards": -0.5908164978027344, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.807355976117716e-07, |
|
"logits/chosen": 0.11599000543355942, |
|
"logits/rejected": 0.49212461709976196, |
|
"logps/chosen": -284.78472900390625, |
|
"logps/rejected": -265.7978515625, |
|
"loss": 5924.3578, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5290887355804443, |
|
"rewards/margins": 0.22062186896800995, |
|
"rewards/rejected": -0.7497105598449707, |
|
"rewards/safe_rewards": -0.4509585499763489, |
|
"rewards/unsafe_rewards": -0.5535848736763, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-07, |
|
"logits/chosen": -0.0665382593870163, |
|
"logits/rejected": 0.4467547535896301, |
|
"logps/chosen": -262.4479064941406, |
|
"logps/rejected": -265.8846740722656, |
|
"loss": 5391.7484, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.604932427406311, |
|
"rewards/margins": 0.16624750196933746, |
|
"rewards/rejected": -0.7711800336837769, |
|
"rewards/safe_rewards": -0.570032000541687, |
|
"rewards/unsafe_rewards": -0.6088122129440308, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.715196572027789e-07, |
|
"logits/chosen": 0.15862391889095306, |
|
"logits/rejected": 0.511070966720581, |
|
"logps/chosen": -252.94137573242188, |
|
"logps/rejected": -255.08187866210938, |
|
"loss": 5628.2164, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6333836913108826, |
|
"rewards/margins": 0.20889365673065186, |
|
"rewards/rejected": -0.8422773480415344, |
|
"rewards/safe_rewards": -0.6369217038154602, |
|
"rewards/unsafe_rewards": -0.6703649163246155, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6689980622612204e-07, |
|
"logits/chosen": 0.08565627038478851, |
|
"logits/rejected": 0.5222666263580322, |
|
"logps/chosen": -255.2662811279297, |
|
"logps/rejected": -253.49105834960938, |
|
"loss": 5634.6316, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6020347476005554, |
|
"rewards/margins": 0.19342327117919922, |
|
"rewards/rejected": -0.7954580187797546, |
|
"rewards/safe_rewards": -0.6501786708831787, |
|
"rewards/unsafe_rewards": -0.6461445093154907, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_logits/chosen": 0.41202229261398315, |
|
"eval_logits/rejected": 1.1542474031448364, |
|
"eval_logps/chosen": -220.34913635253906, |
|
"eval_logps/rejected": -189.61671447753906, |
|
"eval_loss": 4507.89453125, |
|
"eval_rewards/accuracies": 0.6151915788650513, |
|
"eval_rewards/chosen": -0.799996018409729, |
|
"eval_rewards/margins": 0.07484080642461777, |
|
"eval_rewards/rejected": -0.874836802482605, |
|
"eval_rewards/safe_rewards": -0.7885684370994568, |
|
"eval_rewards/unsafe_rewards": -0.784635066986084, |
|
"eval_runtime": 2353.482, |
|
"eval_samples_per_second": 14.89, |
|
"eval_steps_per_second": 0.466, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.622741498830969e-07, |
|
"logits/chosen": 0.2431926727294922, |
|
"logits/rejected": 0.40795207023620605, |
|
"logps/chosen": -279.1517333984375, |
|
"logps/rejected": -271.7449645996094, |
|
"loss": 5872.2367, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6438090801239014, |
|
"rewards/margins": 0.17429831624031067, |
|
"rewards/rejected": -0.8181073069572449, |
|
"rewards/safe_rewards": -0.6910767555236816, |
|
"rewards/unsafe_rewards": -0.6460915803909302, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5764427716409815e-07, |
|
"logits/chosen": -0.09687475860118866, |
|
"logits/rejected": 0.4301505982875824, |
|
"logps/chosen": -272.0554504394531, |
|
"logps/rejected": -255.6719207763672, |
|
"loss": 5816.6723, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5806029438972473, |
|
"rewards/margins": 0.19818606972694397, |
|
"rewards/rejected": -0.7787889838218689, |
|
"rewards/safe_rewards": -0.5169692635536194, |
|
"rewards/unsafe_rewards": -0.5289751291275024, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5301177850791616e-07, |
|
"logits/chosen": 0.01663217321038246, |
|
"logits/rejected": 0.6527854204177856, |
|
"logps/chosen": -290.3711853027344, |
|
"logps/rejected": -268.1048278808594, |
|
"loss": 5912.7102, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6295832395553589, |
|
"rewards/margins": 0.20760869979858398, |
|
"rewards/rejected": -0.8371919393539429, |
|
"rewards/safe_rewards": -0.642471432685852, |
|
"rewards/unsafe_rewards": -0.6146708726882935, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4837824525539477e-07, |
|
"logits/chosen": 0.17375509440898895, |
|
"logits/rejected": 0.7390264272689819, |
|
"logps/chosen": -270.261474609375, |
|
"logps/rejected": -261.2465515136719, |
|
"loss": 5659.6238, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6727645993232727, |
|
"rewards/margins": 0.17281220853328705, |
|
"rewards/rejected": -0.8455768823623657, |
|
"rewards/safe_rewards": -0.6424635052680969, |
|
"rewards/unsafe_rewards": -0.6337414979934692, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4374526910277886e-07, |
|
"logits/chosen": 0.13272862136363983, |
|
"logits/rejected": 0.57741779088974, |
|
"logps/chosen": -270.9297790527344, |
|
"logps/rejected": -267.14471435546875, |
|
"loss": 5861.1039, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6448026895523071, |
|
"rewards/margins": 0.2006601095199585, |
|
"rewards/rejected": -0.8454626798629761, |
|
"rewards/safe_rewards": -0.6065593361854553, |
|
"rewards/unsafe_rewards": -0.6479047536849976, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.391144415549403e-07, |
|
"logits/chosen": 0.2520432770252228, |
|
"logits/rejected": 0.7386651039123535, |
|
"logps/chosen": -256.0111389160156, |
|
"logps/rejected": -244.1455535888672, |
|
"loss": 5928.0605, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6962358355522156, |
|
"rewards/margins": 0.125870481133461, |
|
"rewards/rejected": -0.8221063613891602, |
|
"rewards/safe_rewards": -0.6803200244903564, |
|
"rewards/unsafe_rewards": -0.6994472742080688, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3448735337866919e-07, |
|
"logits/chosen": 0.26303520798683167, |
|
"logits/rejected": 0.7426208257675171, |
|
"logps/chosen": -247.3863983154297, |
|
"logps/rejected": -244.02392578125, |
|
"loss": 5880.1039, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6635211706161499, |
|
"rewards/margins": 0.15260052680969238, |
|
"rewards/rejected": -0.8161218762397766, |
|
"rewards/safe_rewards": -0.706309974193573, |
|
"rewards/unsafe_rewards": -0.6638337969779968, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.2986559405621886e-07, |
|
"logits/chosen": 0.030937856063246727, |
|
"logits/rejected": 0.47169026732444763, |
|
"logps/chosen": -279.0972595214844, |
|
"logps/rejected": -268.9930725097656, |
|
"loss": 5616.6, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6163111925125122, |
|
"rewards/margins": 0.16996563971042633, |
|
"rewards/rejected": -0.7862768173217773, |
|
"rewards/safe_rewards": -0.6654713749885559, |
|
"rewards/unsafe_rewards": -0.6399198770523071, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2525075123929213e-07, |
|
"logits/chosen": 0.43386760354042053, |
|
"logits/rejected": 0.7538164258003235, |
|
"logps/chosen": -267.44134521484375, |
|
"logps/rejected": -258.99249267578125, |
|
"loss": 5716.7879, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6649960279464722, |
|
"rewards/margins": 0.22522863745689392, |
|
"rewards/rejected": -0.890224814414978, |
|
"rewards/safe_rewards": -0.6375536322593689, |
|
"rewards/unsafe_rewards": -0.6348733901977539, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.206444102036565e-07, |
|
"logits/chosen": 0.6684126257896423, |
|
"logits/rejected": 0.9879862666130066, |
|
"logps/chosen": -267.1449279785156, |
|
"logps/rejected": -270.4283752441406, |
|
"loss": 5974.3918, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.74274742603302, |
|
"rewards/margins": 0.15645694732666016, |
|
"rewards/rejected": -0.899204432964325, |
|
"rewards/safe_rewards": -0.7267962694168091, |
|
"rewards/unsafe_rewards": -0.6818505525588989, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.160481533045751e-07, |
|
"logits/chosen": 0.4061971604824066, |
|
"logits/rejected": 0.9739459753036499, |
|
"logps/chosen": -285.2103271484375, |
|
"logps/rejected": -266.5544128417969, |
|
"loss": 5749.7781, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7457272410392761, |
|
"rewards/margins": 0.2004440277814865, |
|
"rewards/rejected": -0.9461711645126343, |
|
"rewards/safe_rewards": -0.7860220670700073, |
|
"rewards/unsafe_rewards": -0.7390663623809814, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1146355943324148e-07, |
|
"logits/chosen": 0.48321422934532166, |
|
"logits/rejected": 0.9058516621589661, |
|
"logps/chosen": -271.53924560546875, |
|
"logps/rejected": -259.0006103515625, |
|
"loss": 5805.548, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7600331902503967, |
|
"rewards/margins": 0.13751891255378723, |
|
"rewards/rejected": -0.8975521326065063, |
|
"rewards/safe_rewards": -0.7516414523124695, |
|
"rewards/unsafe_rewards": -0.7484757304191589, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0689220347440374e-07, |
|
"logits/chosen": 0.1501261442899704, |
|
"logits/rejected": 0.688166618347168, |
|
"logps/chosen": -301.4822082519531, |
|
"logps/rejected": -273.8033447265625, |
|
"loss": 5622.9852, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6868051290512085, |
|
"rewards/margins": 0.17512689530849457, |
|
"rewards/rejected": -0.8619319796562195, |
|
"rewards/safe_rewards": -0.6461024284362793, |
|
"rewards/unsafe_rewards": -0.6649470329284668, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0233565576536564e-07, |
|
"logits/chosen": 0.05991173908114433, |
|
"logits/rejected": 0.42331352829933167, |
|
"logps/chosen": -294.298095703125, |
|
"logps/rejected": -287.5555419921875, |
|
"loss": 5822.3992, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7161829471588135, |
|
"rewards/margins": 0.13876894116401672, |
|
"rewards/rejected": -0.8549518585205078, |
|
"rewards/safe_rewards": -0.7057495713233948, |
|
"rewards/unsafe_rewards": -0.6698770523071289, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.97795481556549e-07, |
|
"logits/chosen": -0.03588150069117546, |
|
"logits/rejected": 0.400505006313324, |
|
"logps/chosen": -277.2012023925781, |
|
"logps/rejected": -247.14804077148438, |
|
"loss": 5935.0914, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6964778304100037, |
|
"rewards/margins": 0.17653243243694305, |
|
"rewards/rejected": -0.8730102777481079, |
|
"rewards/safe_rewards": -0.6869702339172363, |
|
"rewards/unsafe_rewards": -0.6601093411445618, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9327324047380422e-07, |
|
"logits/chosen": -0.08701475709676743, |
|
"logits/rejected": 0.4873865246772766, |
|
"logps/chosen": -263.2158203125, |
|
"logps/rejected": -258.84039306640625, |
|
"loss": 5564.0863, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6252955198287964, |
|
"rewards/margins": 0.22415871918201447, |
|
"rewards/rejected": -0.8494542241096497, |
|
"rewards/safe_rewards": -0.6420432329177856, |
|
"rewards/unsafe_rewards": -0.6124902963638306, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.887704859826528e-07, |
|
"logits/chosen": 0.07522957026958466, |
|
"logits/rejected": 0.3329767882823944, |
|
"logps/chosen": -285.8026123046875, |
|
"logps/rejected": -266.8732604980469, |
|
"loss": 5750.982, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6510334014892578, |
|
"rewards/margins": 0.10930682718753815, |
|
"rewards/rejected": -0.7603402137756348, |
|
"rewards/safe_rewards": -0.6223952174186707, |
|
"rewards/unsafe_rewards": -0.6682702302932739, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8428876485464572e-07, |
|
"logits/chosen": -0.15613001585006714, |
|
"logits/rejected": 0.41360145807266235, |
|
"logps/chosen": -238.16897583007812, |
|
"logps/rejected": -225.97802734375, |
|
"loss": 5979.2156, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5804222822189331, |
|
"rewards/margins": 0.1743427962064743, |
|
"rewards/rejected": -0.7547650933265686, |
|
"rewards/safe_rewards": -0.5962327718734741, |
|
"rewards/unsafe_rewards": -0.6777797341346741, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.798296166360216e-07, |
|
"logits/chosen": -0.029682714492082596, |
|
"logits/rejected": 0.5113533139228821, |
|
"logps/chosen": -290.142822265625, |
|
"logps/rejected": -269.4226989746094, |
|
"loss": 6057.1922, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6241404414176941, |
|
"rewards/margins": 0.1994599997997284, |
|
"rewards/rejected": -0.8236004114151001, |
|
"rewards/safe_rewards": -0.6254442930221558, |
|
"rewards/unsafe_rewards": -0.6271675229072571, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7539457311884675e-07, |
|
"logits/chosen": 0.1500866711139679, |
|
"logits/rejected": 0.5680428743362427, |
|
"logps/chosen": -262.3311462402344, |
|
"logps/rejected": -251.67489624023438, |
|
"loss": 5421.8398, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6509288549423218, |
|
"rewards/margins": 0.2198909968137741, |
|
"rewards/rejected": -0.8708198666572571, |
|
"rewards/safe_rewards": -0.6651867032051086, |
|
"rewards/unsafe_rewards": -0.6189877390861511, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7098515781481883e-07, |
|
"logits/chosen": 0.4903317987918854, |
|
"logits/rejected": 0.883372962474823, |
|
"logps/chosen": -272.56097412109375, |
|
"logps/rejected": -241.92919921875, |
|
"loss": 5678.3117, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.6993108987808228, |
|
"rewards/margins": 0.11801446974277496, |
|
"rewards/rejected": -0.8173252940177917, |
|
"rewards/safe_rewards": -0.6638237237930298, |
|
"rewards/unsafe_rewards": -0.6766722202301025, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6660288543191568e-07, |
|
"logits/chosen": 0.20008230209350586, |
|
"logits/rejected": 1.072401523590088, |
|
"logps/chosen": -292.7231140136719, |
|
"logps/rejected": -264.1849365234375, |
|
"loss": 5411.0453, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6634177565574646, |
|
"rewards/margins": 0.19502988457679749, |
|
"rewards/rejected": -0.8584476709365845, |
|
"rewards/safe_rewards": -0.7102524638175964, |
|
"rewards/unsafe_rewards": -0.6833497285842896, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6224926135406693e-07, |
|
"logits/chosen": 0.4110666811466217, |
|
"logits/rejected": 0.9241645932197571, |
|
"logps/chosen": -291.5517272949219, |
|
"logps/rejected": -268.79437255859375, |
|
"loss": 5535.6395, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6780111193656921, |
|
"rewards/margins": 0.2115507870912552, |
|
"rewards/rejected": -0.8895619511604309, |
|
"rewards/safe_rewards": -0.6748231053352356, |
|
"rewards/unsafe_rewards": -0.7003692984580994, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.579257811240298e-07, |
|
"logits/chosen": 0.17879924178123474, |
|
"logits/rejected": 0.82609623670578, |
|
"logps/chosen": -283.47686767578125, |
|
"logps/rejected": -269.6540832519531, |
|
"loss": 5427.3156, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7036404609680176, |
|
"rewards/margins": 0.14344856142997742, |
|
"rewards/rejected": -0.8470889925956726, |
|
"rewards/safe_rewards": -0.6846009492874146, |
|
"rewards/unsafe_rewards": -0.6783186197280884, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5363392992964523e-07, |
|
"logits/chosen": 0.4139084815979004, |
|
"logits/rejected": 0.7215920686721802, |
|
"logps/chosen": -257.33319091796875, |
|
"logps/rejected": -258.1666564941406, |
|
"loss": 5595.8969, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.7196224927902222, |
|
"rewards/margins": 0.11075691878795624, |
|
"rewards/rejected": -0.8303793668746948, |
|
"rewards/safe_rewards": -0.7594167590141296, |
|
"rewards/unsafe_rewards": -0.7032173275947571, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4937518209365108e-07, |
|
"logits/chosen": 0.2804068922996521, |
|
"logits/rejected": 0.7492934465408325, |
|
"logps/chosen": -299.9917297363281, |
|
"logps/rejected": -274.86566162109375, |
|
"loss": 5485.5156, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6413429975509644, |
|
"rewards/margins": 0.18771231174468994, |
|
"rewards/rejected": -0.8290553092956543, |
|
"rewards/safe_rewards": -0.6320935487747192, |
|
"rewards/unsafe_rewards": -0.6288415789604187, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4515100056722708e-07, |
|
"logits/chosen": 0.49235549569129944, |
|
"logits/rejected": 0.896806538105011, |
|
"logps/chosen": -250.7898712158203, |
|
"logps/rejected": -248.735107421875, |
|
"loss": 5635.8461, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6494947671890259, |
|
"rewards/margins": 0.2068520337343216, |
|
"rewards/rejected": -0.8563467860221863, |
|
"rewards/safe_rewards": -0.6947168707847595, |
|
"rewards/unsafe_rewards": -0.6628744602203369, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4096283642744716e-07, |
|
"logits/chosen": 0.564648449420929, |
|
"logits/rejected": 1.1666864156723022, |
|
"logps/chosen": -287.2496337890625, |
|
"logps/rejected": -269.12689208984375, |
|
"loss": 5744.0652, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6512799263000488, |
|
"rewards/margins": 0.23767797648906708, |
|
"rewards/rejected": -0.8889577984809875, |
|
"rewards/safe_rewards": -0.6507743000984192, |
|
"rewards/unsafe_rewards": -0.6260145306587219, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3681212837880977e-07, |
|
"logits/chosen": 0.3310979902744293, |
|
"logits/rejected": 0.946731686592102, |
|
"logps/chosen": -283.14178466796875, |
|
"logps/rejected": -268.6293029785156, |
|
"loss": 5538.1773, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6541503667831421, |
|
"rewards/margins": 0.20235121250152588, |
|
"rewards/rejected": -0.856501579284668, |
|
"rewards/safe_rewards": -0.7126244902610779, |
|
"rewards/unsafe_rewards": -0.6116858124732971, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3270030225901908e-07, |
|
"logits/chosen": 0.21446232497692108, |
|
"logits/rejected": 0.9988247156143188, |
|
"logps/chosen": -311.952392578125, |
|
"logps/rejected": -264.99005126953125, |
|
"loss": 5863.9875, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6609299778938293, |
|
"rewards/margins": 0.20790867507457733, |
|
"rewards/rejected": -0.8688386678695679, |
|
"rewards/safe_rewards": -0.6820018291473389, |
|
"rewards/unsafe_rewards": -0.6768487691879272, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2862877054918572e-07, |
|
"logits/chosen": 0.43877673149108887, |
|
"logits/rejected": 0.7122836112976074, |
|
"logps/chosen": -263.78924560546875, |
|
"logps/rejected": -267.306884765625, |
|
"loss": 5915.4555, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6279779672622681, |
|
"rewards/margins": 0.19203224778175354, |
|
"rewards/rejected": -0.8200103044509888, |
|
"rewards/safe_rewards": -0.5540001392364502, |
|
"rewards/unsafe_rewards": -0.6103017926216125, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2459893188861613e-07, |
|
"logits/chosen": 0.11050845682621002, |
|
"logits/rejected": 0.638201117515564, |
|
"logps/chosen": -230.92892456054688, |
|
"logps/rejected": -223.246826171875, |
|
"loss": 5522.6379, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5677499771118164, |
|
"rewards/margins": 0.1929033249616623, |
|
"rewards/rejected": -0.7606532573699951, |
|
"rewards/safe_rewards": -0.6029695272445679, |
|
"rewards/unsafe_rewards": -0.6227617859840393, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.206121705943558e-07, |
|
"logits/chosen": 0.2380530834197998, |
|
"logits/rejected": 0.772462785243988, |
|
"logps/chosen": -265.9678039550781, |
|
"logps/rejected": -236.330078125, |
|
"loss": 5444.8687, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5695582628250122, |
|
"rewards/margins": 0.17861400544643402, |
|
"rewards/rejected": -0.7481723427772522, |
|
"rewards/safe_rewards": -0.4967488646507263, |
|
"rewards/unsafe_rewards": -0.5609390139579773, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1666985618565422e-07, |
|
"logits/chosen": 0.7791303396224976, |
|
"logits/rejected": 1.0070080757141113, |
|
"logps/chosen": -239.6016082763672, |
|
"logps/rejected": -250.1675567626953, |
|
"loss": 5496.5402, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.643204391002655, |
|
"rewards/margins": 0.212922140955925, |
|
"rewards/rejected": -0.856126606464386, |
|
"rewards/safe_rewards": -0.6307708024978638, |
|
"rewards/unsafe_rewards": -0.6205247044563293, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1277334291351145e-07, |
|
"logits/chosen": 0.6811083555221558, |
|
"logits/rejected": 1.2308669090270996, |
|
"logps/chosen": -240.9481964111328, |
|
"logps/rejected": -251.2366485595703, |
|
"loss": 5451.2172, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6521676778793335, |
|
"rewards/margins": 0.1860547959804535, |
|
"rewards/rejected": -0.8382223844528198, |
|
"rewards/safe_rewards": -0.7259255647659302, |
|
"rewards/unsafe_rewards": -0.6219838857650757, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.089239692954701e-07, |
|
"logits/chosen": 0.36615195870399475, |
|
"logits/rejected": 0.9472381472587585, |
|
"logps/chosen": -269.5465087890625, |
|
"logps/rejected": -256.1499328613281, |
|
"loss": 5717.6105, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6657227873802185, |
|
"rewards/margins": 0.15908706188201904, |
|
"rewards/rejected": -0.8248098492622375, |
|
"rewards/safe_rewards": -0.7341758012771606, |
|
"rewards/unsafe_rewards": -0.6227680444717407, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.051230576558127e-07, |
|
"logits/chosen": 0.7043350338935852, |
|
"logits/rejected": 1.012446641921997, |
|
"logps/chosen": -265.9175720214844, |
|
"logps/rejected": -296.2731628417969, |
|
"loss": 5307.2445, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7264591455459595, |
|
"rewards/margins": 0.1706809252500534, |
|
"rewards/rejected": -0.8971401453018188, |
|
"rewards/safe_rewards": -0.7796869277954102, |
|
"rewards/unsafe_rewards": -0.7442405819892883, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0137191367132078e-07, |
|
"logits/chosen": 0.5799378156661987, |
|
"logits/rejected": 1.0962615013122559, |
|
"logps/chosen": -280.27587890625, |
|
"logps/rejected": -261.3016052246094, |
|
"loss": 5462.4613, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.659958004951477, |
|
"rewards/margins": 0.24963033199310303, |
|
"rewards/rejected": -0.9095882177352905, |
|
"rewards/safe_rewards": -0.6955925226211548, |
|
"rewards/unsafe_rewards": -0.6324699521064758, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.76718259227532e-08, |
|
"logits/chosen": 0.498538076877594, |
|
"logits/rejected": 0.9989287257194519, |
|
"logps/chosen": -272.96820068359375, |
|
"logps/rejected": -256.63140869140625, |
|
"loss": 5331.4734, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6205289363861084, |
|
"rewards/margins": 0.21373698115348816, |
|
"rewards/rejected": -0.8342660069465637, |
|
"rewards/safe_rewards": -0.5949203372001648, |
|
"rewards/unsafe_rewards": -0.6141771674156189, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.402406545219676e-08, |
|
"logits/chosen": 0.34590667486190796, |
|
"logits/rejected": 0.8703553080558777, |
|
"logps/chosen": -273.8531188964844, |
|
"logps/rejected": -247.87466430664062, |
|
"loss": 5546.1305, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6622526049613953, |
|
"rewards/margins": 0.1561700403690338, |
|
"rewards/rejected": -0.8184226751327515, |
|
"rewards/safe_rewards": -0.6668413281440735, |
|
"rewards/unsafe_rewards": -0.6589676141738892, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.042988532644249e-08, |
|
"logits/chosen": 0.2142190933227539, |
|
"logits/rejected": 0.5996747016906738, |
|
"logps/chosen": -308.82635498046875, |
|
"logps/rejected": -276.37823486328125, |
|
"loss": 5583.4395, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5863175392150879, |
|
"rewards/margins": 0.23458845913410187, |
|
"rewards/rejected": -0.8209059834480286, |
|
"rewards/safe_rewards": -0.5638710260391235, |
|
"rewards/unsafe_rewards": -0.5323917269706726, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.689052020653592e-08, |
|
"logits/chosen": -0.06605692207813263, |
|
"logits/rejected": 0.6343873739242554, |
|
"logps/chosen": -285.37225341796875, |
|
"logps/rejected": -252.3105010986328, |
|
"loss": 5576.0598, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5753235816955566, |
|
"rewards/margins": 0.2064014971256256, |
|
"rewards/rejected": -0.7817251086235046, |
|
"rewards/safe_rewards": -0.5231102705001831, |
|
"rewards/unsafe_rewards": -0.5478030443191528, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.340718592365037e-08, |
|
"logits/chosen": 0.4551053047180176, |
|
"logits/rejected": 0.6916473507881165, |
|
"logps/chosen": -259.25543212890625, |
|
"logps/rejected": -269.81097412109375, |
|
"loss": 5258.8734, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6683470010757446, |
|
"rewards/margins": 0.16762246191501617, |
|
"rewards/rejected": -0.8359693288803101, |
|
"rewards/safe_rewards": -0.6167613863945007, |
|
"rewards/unsafe_rewards": -0.6983481645584106, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.998107906142839e-08, |
|
"logits/chosen": 0.4198254942893982, |
|
"logits/rejected": 0.9249162673950195, |
|
"logps/chosen": -256.2335205078125, |
|
"logps/rejected": -243.9502716064453, |
|
"loss": 5150.4359, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6530503034591675, |
|
"rewards/margins": 0.22125795483589172, |
|
"rewards/rejected": -0.8743082880973816, |
|
"rewards/safe_rewards": -0.6435777544975281, |
|
"rewards/unsafe_rewards": -0.6962872743606567, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.661337654493575e-08, |
|
"logits/chosen": 0.11405469477176666, |
|
"logits/rejected": 0.8541787266731262, |
|
"logps/chosen": -285.04632568359375, |
|
"logps/rejected": -264.7653503417969, |
|
"loss": 5838.1379, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6224103569984436, |
|
"rewards/margins": 0.20319974422454834, |
|
"rewards/rejected": -0.8256100416183472, |
|
"rewards/safe_rewards": -0.6171637773513794, |
|
"rewards/unsafe_rewards": -0.5961381793022156, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.330523523636751e-08, |
|
"logits/chosen": 0.33853933215141296, |
|
"logits/rejected": 0.5890348553657532, |
|
"logps/chosen": -267.7184753417969, |
|
"logps/rejected": -279.6230163574219, |
|
"loss": 5326.7477, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6186683177947998, |
|
"rewards/margins": 0.19817940890789032, |
|
"rewards/rejected": -0.8168476819992065, |
|
"rewards/safe_rewards": -0.6040722727775574, |
|
"rewards/unsafe_rewards": -0.6181649565696716, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.005779153764682e-08, |
|
"logits/chosen": 0.4181288182735443, |
|
"logits/rejected": 0.7393978238105774, |
|
"logps/chosen": -249.9525909423828, |
|
"logps/rejected": -242.4307861328125, |
|
"loss": 5633.5648, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6368721723556519, |
|
"rewards/margins": 0.15112480521202087, |
|
"rewards/rejected": -0.7879970073699951, |
|
"rewards/safe_rewards": -0.6358110308647156, |
|
"rewards/unsafe_rewards": -0.6208546161651611, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.687216100005138e-08, |
|
"logits/chosen": 0.6848994493484497, |
|
"logits/rejected": 1.1733933687210083, |
|
"logps/chosen": -284.51080322265625, |
|
"logps/rejected": -288.7901916503906, |
|
"loss": 5048.4258, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6762335300445557, |
|
"rewards/margins": 0.1719200611114502, |
|
"rewards/rejected": -0.8481537103652954, |
|
"rewards/safe_rewards": -0.6376355290412903, |
|
"rewards/unsafe_rewards": -0.7184177041053772, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.374943794100349e-08, |
|
"logits/chosen": 0.48638778924942017, |
|
"logits/rejected": 1.259670615196228, |
|
"logps/chosen": -267.34588623046875, |
|
"logps/rejected": -245.59756469726562, |
|
"loss": 5545.4941, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6003537178039551, |
|
"rewards/margins": 0.22699756920337677, |
|
"rewards/rejected": -0.8273512721061707, |
|
"rewards/safe_rewards": -0.6312727332115173, |
|
"rewards/unsafe_rewards": -0.6281502842903137, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.069069506815325e-08, |
|
"logits/chosen": 0.7533052563667297, |
|
"logits/rejected": 1.2028855085372925, |
|
"logps/chosen": -251.12496948242188, |
|
"logps/rejected": -253.78408813476562, |
|
"loss": 5749.5141, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6362664103507996, |
|
"rewards/margins": 0.2198611944913864, |
|
"rewards/rejected": -0.8561276197433472, |
|
"rewards/safe_rewards": -0.622052013874054, |
|
"rewards/unsafe_rewards": -0.704675555229187, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": 1.0718276500701904, |
|
"eval_logits/rejected": 1.9546749591827393, |
|
"eval_logps/chosen": -228.9304656982422, |
|
"eval_logps/rejected": -199.36412048339844, |
|
"eval_loss": 4458.44287109375, |
|
"eval_rewards/accuracies": 0.6194114685058594, |
|
"eval_rewards/chosen": -0.8858092427253723, |
|
"eval_rewards/margins": 0.0865015909075737, |
|
"eval_rewards/rejected": -0.9723107814788818, |
|
"eval_rewards/safe_rewards": -0.874053955078125, |
|
"eval_rewards/unsafe_rewards": -0.8699882626533508, |
|
"eval_runtime": 2349.2554, |
|
"eval_samples_per_second": 14.917, |
|
"eval_steps_per_second": 0.467, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.7696983110885746e-08, |
|
"logits/chosen": 1.0346394777297974, |
|
"logits/rejected": 1.4075425863265991, |
|
"logps/chosen": -264.0049133300781, |
|
"logps/rejected": -256.81793212890625, |
|
"loss": 5875.7254, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7450360059738159, |
|
"rewards/margins": 0.13777832686901093, |
|
"rewards/rejected": -0.8828142881393433, |
|
"rewards/safe_rewards": -0.6767371892929077, |
|
"rewards/unsafe_rewards": -0.7506189942359924, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.47693304593777e-08, |
|
"logits/chosen": 0.577034056186676, |
|
"logits/rejected": 1.2275969982147217, |
|
"logps/chosen": -280.673583984375, |
|
"logps/rejected": -243.10635375976562, |
|
"loss": 5531.6125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6422880291938782, |
|
"rewards/margins": 0.22371160984039307, |
|
"rewards/rejected": -0.8659995794296265, |
|
"rewards/safe_rewards": -0.5432512164115906, |
|
"rewards/unsafe_rewards": -0.6611617803573608, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.190874281132851e-08, |
|
"logits/chosen": 0.6209213733673096, |
|
"logits/rejected": 0.9749325513839722, |
|
"logps/chosen": -258.8196716308594, |
|
"logps/rejected": -247.3189697265625, |
|
"loss": 5541.2727, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.6575254201889038, |
|
"rewards/margins": 0.12947872281074524, |
|
"rewards/rejected": -0.7870042324066162, |
|
"rewards/safe_rewards": -0.7655413746833801, |
|
"rewards/unsafe_rewards": -0.7101870775222778, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.9116202826486045e-08, |
|
"logits/chosen": 0.7310935258865356, |
|
"logits/rejected": 1.0775771141052246, |
|
"logps/chosen": -272.3906555175781, |
|
"logps/rejected": -257.2728271484375, |
|
"loss": 5545.8492, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6876263618469238, |
|
"rewards/margins": 0.16089771687984467, |
|
"rewards/rejected": -0.8485240936279297, |
|
"rewards/safe_rewards": -0.6295339465141296, |
|
"rewards/unsafe_rewards": -0.7383956909179688, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.639266978908676e-08, |
|
"logits/chosen": 0.6267167329788208, |
|
"logits/rejected": 1.1266528367996216, |
|
"logps/chosen": -297.58380126953125, |
|
"logps/rejected": -271.4803161621094, |
|
"loss": 5131.627, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6685757637023926, |
|
"rewards/margins": 0.18729698657989502, |
|
"rewards/rejected": -0.8558727502822876, |
|
"rewards/safe_rewards": -0.6740354299545288, |
|
"rewards/unsafe_rewards": -0.6281224489212036, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.373907927832513e-08, |
|
"logits/chosen": 0.6049357056617737, |
|
"logits/rejected": 0.9919975996017456, |
|
"logps/chosen": -265.62481689453125, |
|
"logps/rejected": -285.9028625488281, |
|
"loss": 5640.1398, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6182764172554016, |
|
"rewards/margins": 0.22418944537639618, |
|
"rewards/rejected": -0.842465877532959, |
|
"rewards/safe_rewards": -0.6555901765823364, |
|
"rewards/unsafe_rewards": -0.5656682848930359, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.115634284696698e-08, |
|
"logits/chosen": 0.49705711007118225, |
|
"logits/rejected": 0.9479654431343079, |
|
"logps/chosen": -261.2461853027344, |
|
"logps/rejected": -270.83331298828125, |
|
"loss": 5189.8301, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6632257699966431, |
|
"rewards/margins": 0.21208517253398895, |
|
"rewards/rejected": -0.8753108978271484, |
|
"rewards/safe_rewards": -0.6663291454315186, |
|
"rewards/unsafe_rewards": -0.6038998365402222, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.864534770821559e-08, |
|
"logits/chosen": 0.6149829626083374, |
|
"logits/rejected": 1.1939442157745361, |
|
"logps/chosen": -262.00933837890625, |
|
"logps/rejected": -240.24581909179688, |
|
"loss": 5618.5883, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6275893449783325, |
|
"rewards/margins": 0.20411472022533417, |
|
"rewards/rejected": -0.8317041397094727, |
|
"rewards/safe_rewards": -0.6472023725509644, |
|
"rewards/unsafe_rewards": -0.5557063817977905, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.620695643093924e-08, |
|
"logits/chosen": 0.43840399384498596, |
|
"logits/rejected": 1.105423092842102, |
|
"logps/chosen": -269.2837829589844, |
|
"logps/rejected": -238.085205078125, |
|
"loss": 5468.3313, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6394304037094116, |
|
"rewards/margins": 0.22106070816516876, |
|
"rewards/rejected": -0.860491156578064, |
|
"rewards/safe_rewards": -0.6031507849693298, |
|
"rewards/unsafe_rewards": -0.6791771650314331, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.384200664336412e-08, |
|
"logits/chosen": 0.5348480343818665, |
|
"logits/rejected": 1.0058144330978394, |
|
"logps/chosen": -268.3987731933594, |
|
"logps/rejected": -247.79696655273438, |
|
"loss": 5660.3645, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5938838720321655, |
|
"rewards/margins": 0.21732494235038757, |
|
"rewards/rejected": -0.8112088441848755, |
|
"rewards/safe_rewards": -0.5639302134513855, |
|
"rewards/unsafe_rewards": -0.6350196599960327, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.155131074533529e-08, |
|
"logits/chosen": 0.30334433913230896, |
|
"logits/rejected": 0.9854658246040344, |
|
"logps/chosen": -283.627685546875, |
|
"logps/rejected": -263.83251953125, |
|
"loss": 6043.9172, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6394412517547607, |
|
"rewards/margins": 0.1600230187177658, |
|
"rewards/rejected": -0.7994643449783325, |
|
"rewards/safe_rewards": -0.6199285387992859, |
|
"rewards/unsafe_rewards": -0.6412296295166016, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9335655629243645e-08, |
|
"logits/chosen": 0.39362573623657227, |
|
"logits/rejected": 0.9285033941268921, |
|
"logps/chosen": -270.2079162597656, |
|
"logps/rejected": -261.9796447753906, |
|
"loss": 5957.5516, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6112038493156433, |
|
"rewards/margins": 0.18837173283100128, |
|
"rewards/rejected": -0.7995756268501282, |
|
"rewards/safe_rewards": -0.6032061576843262, |
|
"rewards/unsafe_rewards": -0.6732661724090576, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7195802409715197e-08, |
|
"logits/chosen": 0.2444291114807129, |
|
"logits/rejected": 0.9499914050102234, |
|
"logps/chosen": -298.4200134277344, |
|
"logps/rejected": -249.72866821289062, |
|
"loss": 5750.8313, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6592567563056946, |
|
"rewards/margins": 0.1407555341720581, |
|
"rewards/rejected": -0.8000122904777527, |
|
"rewards/safe_rewards": -0.7100226283073425, |
|
"rewards/unsafe_rewards": -0.7015893459320068, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.513248616215527e-08, |
|
"logits/chosen": 0.3666357100009918, |
|
"logits/rejected": 0.9415947198867798, |
|
"logps/chosen": -277.87518310546875, |
|
"logps/rejected": -276.29119873046875, |
|
"loss": 5205.8715, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6106274724006653, |
|
"rewards/margins": 0.24805088341236115, |
|
"rewards/rejected": -0.8586783409118652, |
|
"rewards/safe_rewards": -0.6150985956192017, |
|
"rewards/unsafe_rewards": -0.594727635383606, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.31464156702382e-08, |
|
"logits/chosen": 0.24014464020729065, |
|
"logits/rejected": 0.9577549695968628, |
|
"logps/chosen": -292.7112121582031, |
|
"logps/rejected": -265.7065734863281, |
|
"loss": 5896.8078, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5955285429954529, |
|
"rewards/margins": 0.2333928644657135, |
|
"rewards/rejected": -0.8289214372634888, |
|
"rewards/safe_rewards": -0.6319350600242615, |
|
"rewards/unsafe_rewards": -0.5868616104125977, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1238273182427933e-08, |
|
"logits/chosen": 0.6973511576652527, |
|
"logits/rejected": 1.2915074825286865, |
|
"logps/chosen": -265.3111572265625, |
|
"logps/rejected": -251.41201782226562, |
|
"loss": 5434.0336, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6617192029953003, |
|
"rewards/margins": 0.19598451256752014, |
|
"rewards/rejected": -0.857703685760498, |
|
"rewards/safe_rewards": -0.6422809362411499, |
|
"rewards/unsafe_rewards": -0.6228102445602417, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9408714177614306e-08, |
|
"logits/chosen": 0.5173779726028442, |
|
"logits/rejected": 1.02643883228302, |
|
"logps/chosen": -268.9621887207031, |
|
"logps/rejected": -251.25808715820312, |
|
"loss": 5243.4758, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6187028288841248, |
|
"rewards/margins": 0.22567462921142578, |
|
"rewards/rejected": -0.8443773984909058, |
|
"rewards/safe_rewards": -0.6375213265419006, |
|
"rewards/unsafe_rewards": -0.6421637535095215, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7658367139945228e-08, |
|
"logits/chosen": 0.6539649963378906, |
|
"logits/rejected": 1.0953106880187988, |
|
"logps/chosen": -288.9885559082031, |
|
"logps/rejected": -259.146728515625, |
|
"loss": 5246.4344, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6686577200889587, |
|
"rewards/margins": 0.19176754355430603, |
|
"rewards/rejected": -0.8604252934455872, |
|
"rewards/safe_rewards": -0.7045280933380127, |
|
"rewards/unsafe_rewards": -0.7155130505561829, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5987833342931745e-08, |
|
"logits/chosen": 0.4664410650730133, |
|
"logits/rejected": 1.215132236480713, |
|
"logps/chosen": -284.1900939941406, |
|
"logps/rejected": -251.48379516601562, |
|
"loss": 5564.9324, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6805782318115234, |
|
"rewards/margins": 0.21095602214336395, |
|
"rewards/rejected": -0.8915343284606934, |
|
"rewards/safe_rewards": -0.67192143201828, |
|
"rewards/unsafe_rewards": -0.6578537821769714, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.439768664290053e-08, |
|
"logits/chosen": 0.48882967233657837, |
|
"logits/rejected": 1.0205453634262085, |
|
"logps/chosen": -288.0510559082031, |
|
"logps/rejected": -263.57122802734375, |
|
"loss": 5705.5039, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6453284025192261, |
|
"rewards/margins": 0.18227383494377136, |
|
"rewards/rejected": -0.827602207660675, |
|
"rewards/safe_rewards": -0.6023403406143188, |
|
"rewards/unsafe_rewards": -0.6489912867546082, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2888473281864597e-08, |
|
"logits/chosen": 0.3580858111381531, |
|
"logits/rejected": 0.9355760812759399, |
|
"logps/chosen": -252.00344848632812, |
|
"logps/rejected": -256.7703552246094, |
|
"loss": 5420.7055, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6472461819648743, |
|
"rewards/margins": 0.19622859358787537, |
|
"rewards/rejected": -0.8434747457504272, |
|
"rewards/safe_rewards": -0.6663787364959717, |
|
"rewards/unsafe_rewards": -0.6997274160385132, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1460711699880082e-08, |
|
"logits/chosen": 0.32274478673934937, |
|
"logits/rejected": 0.9183855056762695, |
|
"logps/chosen": -281.06304931640625, |
|
"logps/rejected": -268.91278076171875, |
|
"loss": 5609.357, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5867010951042175, |
|
"rewards/margins": 0.23433193564414978, |
|
"rewards/rejected": -0.8210331201553345, |
|
"rewards/safe_rewards": -0.5630391240119934, |
|
"rewards/unsafe_rewards": -0.6277604103088379, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0114892356953397e-08, |
|
"logits/chosen": 0.381804883480072, |
|
"logits/rejected": 0.9557956457138062, |
|
"logps/chosen": -278.6263427734375, |
|
"logps/rejected": -252.7932891845703, |
|
"loss": 5676.834, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6421754360198975, |
|
"rewards/margins": 0.1775234043598175, |
|
"rewards/rejected": -0.8196988105773926, |
|
"rewards/safe_rewards": -0.6115553379058838, |
|
"rewards/unsafe_rewards": -0.6476501226425171, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.851477564560061e-09, |
|
"logits/chosen": 0.5100737810134888, |
|
"logits/rejected": 0.932380199432373, |
|
"logps/chosen": -263.25146484375, |
|
"logps/rejected": -271.11676025390625, |
|
"loss": 5593.4414, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6300482749938965, |
|
"rewards/margins": 0.25807589292526245, |
|
"rewards/rejected": -0.8881241679191589, |
|
"rewards/safe_rewards": -0.6826761960983276, |
|
"rewards/unsafe_rewards": -0.6732330322265625, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.670901326832763e-09, |
|
"logits/chosen": 0.6556006669998169, |
|
"logits/rejected": 1.0529851913452148, |
|
"logps/chosen": -272.6200866699219, |
|
"logps/rejected": -291.10101318359375, |
|
"loss": 5333.684, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7119321823120117, |
|
"rewards/margins": 0.18222954869270325, |
|
"rewards/rejected": -0.8941618204116821, |
|
"rewards/safe_rewards": -0.7450841069221497, |
|
"rewards/unsafe_rewards": -0.6783844232559204, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.5735691914738936e-09, |
|
"logits/chosen": 0.3428182005882263, |
|
"logits/rejected": 0.6993114948272705, |
|
"logps/chosen": -276.2501220703125, |
|
"logps/rejected": -270.787841796875, |
|
"loss": 6014.7414, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6672028303146362, |
|
"rewards/margins": 0.16263318061828613, |
|
"rewards/rejected": -0.8298360109329224, |
|
"rewards/safe_rewards": -0.6557270288467407, |
|
"rewards/unsafe_rewards": -0.7067701816558838, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.559858110443016e-09, |
|
"logits/chosen": 0.3265165388584137, |
|
"logits/rejected": 0.9415761828422546, |
|
"logps/chosen": -279.380615234375, |
|
"logps/rejected": -258.53887939453125, |
|
"loss": 5329.075, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6516368985176086, |
|
"rewards/margins": 0.22732026875019073, |
|
"rewards/rejected": -0.8789570927619934, |
|
"rewards/safe_rewards": -0.6853364706039429, |
|
"rewards/unsafe_rewards": -0.6284711360931396, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.6301163104676685e-09, |
|
"logits/chosen": 0.5433076620101929, |
|
"logits/rejected": 0.899452805519104, |
|
"logps/chosen": -262.05511474609375, |
|
"logps/rejected": -280.93658447265625, |
|
"loss": 5452.5277, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6632400751113892, |
|
"rewards/margins": 0.19723954796791077, |
|
"rewards/rejected": -0.8604797124862671, |
|
"rewards/safe_rewards": -0.5747020244598389, |
|
"rewards/unsafe_rewards": -0.6066412329673767, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.784663173421438e-09, |
|
"logits/chosen": 0.47608470916748047, |
|
"logits/rejected": 0.8737590909004211, |
|
"logps/chosen": -294.0523376464844, |
|
"logps/rejected": -280.8829650878906, |
|
"loss": 5532.6391, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6354952454566956, |
|
"rewards/margins": 0.18091240525245667, |
|
"rewards/rejected": -0.8164075613021851, |
|
"rewards/safe_rewards": -0.6999973654747009, |
|
"rewards/unsafe_rewards": -0.6226142644882202, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.023789126611137e-09, |
|
"logits/chosen": 0.6358956694602966, |
|
"logits/rejected": 1.2913506031036377, |
|
"logps/chosen": -276.2715148925781, |
|
"logps/rejected": -243.6599884033203, |
|
"loss": 5192.1734, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6617811918258667, |
|
"rewards/margins": 0.21255967020988464, |
|
"rewards/rejected": -0.874340832233429, |
|
"rewards/safe_rewards": -0.665223240852356, |
|
"rewards/unsafe_rewards": -0.67181396484375, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3477555430100604e-09, |
|
"logits/chosen": 0.5863360166549683, |
|
"logits/rejected": 1.0950720310211182, |
|
"logps/chosen": -270.6855773925781, |
|
"logps/rejected": -254.65771484375, |
|
"loss": 5546.9984, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5831121206283569, |
|
"rewards/margins": 0.2669592499732971, |
|
"rewards/rejected": -0.8500713109970093, |
|
"rewards/safe_rewards": -0.586032509803772, |
|
"rewards/unsafe_rewards": -0.577675461769104, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7567946514721322e-09, |
|
"logits/chosen": 0.6444328427314758, |
|
"logits/rejected": 1.0208208560943604, |
|
"logps/chosen": -269.35577392578125, |
|
"logps/rejected": -271.528564453125, |
|
"loss": 5601.7539, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6750708818435669, |
|
"rewards/margins": 0.19110876321792603, |
|
"rewards/rejected": -0.8661795854568481, |
|
"rewards/safe_rewards": -0.6811034679412842, |
|
"rewards/unsafe_rewards": -0.7294248342514038, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2511094569571668e-09, |
|
"logits/chosen": 0.3397526741027832, |
|
"logits/rejected": 1.0616391897201538, |
|
"logps/chosen": -257.86822509765625, |
|
"logps/rejected": -244.8105926513672, |
|
"loss": 5620.3375, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.631868302822113, |
|
"rewards/margins": 0.2000071257352829, |
|
"rewards/rejected": -0.8318754434585571, |
|
"rewards/safe_rewards": -0.5972138047218323, |
|
"rewards/unsafe_rewards": -0.6459835171699524, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.308736707954289e-10, |
|
"logits/chosen": 0.518609881401062, |
|
"logits/rejected": 1.1488319635391235, |
|
"logps/chosen": -273.81390380859375, |
|
"logps/rejected": -240.91372680664062, |
|
"loss": 5548.0289, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6856581568717957, |
|
"rewards/margins": 0.2014351636171341, |
|
"rewards/rejected": -0.8870933651924133, |
|
"rewards/safe_rewards": -0.6684737205505371, |
|
"rewards/unsafe_rewards": -0.694146990776062, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.962316510149222e-10, |
|
"logits/chosen": 0.3395392894744873, |
|
"logits/rejected": 1.0089718103408813, |
|
"logps/chosen": -252.1464080810547, |
|
"logps/rejected": -241.22982788085938, |
|
"loss": 5356.7621, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6337156295776367, |
|
"rewards/margins": 0.2152295857667923, |
|
"rewards/rejected": -0.8489452600479126, |
|
"rewards/safe_rewards": -0.6431758403778076, |
|
"rewards/unsafe_rewards": -0.6494039297103882, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.4729835275189016e-10, |
|
"logits/chosen": 0.5798267722129822, |
|
"logits/rejected": 0.9745955467224121, |
|
"logps/chosen": -243.1245574951172, |
|
"logps/rejected": -238.126220703125, |
|
"loss": 5836.127, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6284788846969604, |
|
"rewards/margins": 0.2039627581834793, |
|
"rewards/rejected": -0.8324416279792786, |
|
"rewards/safe_rewards": -0.5914771556854248, |
|
"rewards/unsafe_rewards": -0.6241937279701233, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.415928876176482e-11, |
|
"logits/chosen": 0.4843016564846039, |
|
"logits/rejected": 0.8851835131645203, |
|
"logps/chosen": -258.23773193359375, |
|
"logps/rejected": -251.73001098632812, |
|
"loss": 6036.282, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6951759457588196, |
|
"rewards/margins": 0.1390235722064972, |
|
"rewards/rejected": -0.8341996073722839, |
|
"rewards/safe_rewards": -0.7087674140930176, |
|
"rewards/unsafe_rewards": -0.712031900882721, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.870500044303673e-12, |
|
"logits/chosen": 0.5293042063713074, |
|
"logits/rejected": 0.8430191874504089, |
|
"logps/chosen": -253.91397094726562, |
|
"logps/rejected": -270.7514953613281, |
|
"loss": 5497.6977, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5842832326889038, |
|
"rewards/margins": 0.209587961435318, |
|
"rewards/rejected": -0.7938712239265442, |
|
"rewards/safe_rewards": -0.6020101308822632, |
|
"rewards/unsafe_rewards": -0.6186091303825378, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1884, |
|
"total_flos": 0.0, |
|
"train_loss": 5859.617769083399, |
|
"train_runtime": 32772.3871, |
|
"train_samples_per_second": 3.68, |
|
"train_steps_per_second": 0.057 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1884, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|