diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,21 +1,21 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9999336738077867, + "epoch": 0.9994620763851533, "eval_steps": 500, - "global_step": 3769, + "global_step": 929, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 1.326259946949602e-09, - "logits/chosen": -2.2208399772644043, - "logits/rejected": -2.206911087036133, - "logps/chosen": -260.99365234375, - "logps/rejected": -269.98907470703125, - "loss": 1953.125, + "learning_rate": 5.3763440860215056e-09, + "logits/chosen": -2.278595209121704, + "logits/rejected": -1.9600536823272705, + "logps/chosen": -233.26939392089844, + "logps/rejected": -152.44842529296875, + "loss": 13906.25, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, @@ -25,6164 +25,1512 @@ "step": 1 }, { - "epoch": 0.0, - "learning_rate": 1.3262599469496022e-08, - "logits/chosen": -2.3374857902526855, - "logits/rejected": -1.909049153327942, - "logps/chosen": -212.86407470703125, - "logps/rejected": -148.97979736328125, - "loss": 1808.9694, - "rewards/accuracies": 0.4722222089767456, - "rewards/chosen": -0.00013276383106131107, - "rewards/margins": 1.2069190233887639e-05, - "rewards/rejected": -0.0001448330731363967, - "rewards/safe_rewards": -6.142162601463497e-05, - "rewards/unsafe_rewards": -0.0002041060070041567, + "epoch": 0.01, + "learning_rate": 5.3763440860215054e-08, + "logits/chosen": -2.210334539413452, + "logits/rejected": -1.9666600227355957, + "logps/chosen": -224.4127655029297, + "logps/rejected": -183.17764282226562, + "loss": 15139.5625, + "rewards/accuracies": 0.3993055522441864, + "rewards/chosen": -0.00028303125873208046, + "rewards/margins": -0.0001712446683086455, + "rewards/rejected": -0.00011178661952726543, + "rewards/safe_rewards": -0.0003576676535885781, + "rewards/unsafe_rewards": -0.0006926239584572613, "step": 10 }, { - "epoch": 0.01, - "learning_rate": 2.6525198938992043e-08, - "logits/chosen": -2.3733057975769043, - "logits/rejected": -2.00811767578125, - "logps/chosen": -274.391845703125, - "logps/rejected": -189.80722045898438, - "loss": 1796.975, - "rewards/accuracies": 0.44999998807907104, - "rewards/chosen": -0.0007986313430592418, - "rewards/margins": -0.0008665307541377842, - "rewards/rejected": 6.789946928620338e-05, - "rewards/safe_rewards": -0.0007532857125625014, - "rewards/unsafe_rewards": -0.0008439767989329994, + "epoch": 0.02, + "learning_rate": 1.0752688172043011e-07, + "logits/chosen": -2.114291191101074, + "logits/rejected": -1.9261529445648193, + "logps/chosen": -179.12319946289062, + "logps/rejected": -167.4541778564453, + "loss": 14864.7687, + "rewards/accuracies": 0.53125, + "rewards/chosen": 3.5217970435041934e-05, + "rewards/margins": 0.0006419256096705794, + "rewards/rejected": -0.0006067077629268169, + "rewards/safe_rewards": -4.5623164623975754e-05, + "rewards/unsafe_rewards": -0.0002370497677475214, "step": 20 }, { - "epoch": 0.01, - "learning_rate": 3.978779840848806e-08, - "logits/chosen": -2.2258195877075195, - "logits/rejected": -2.080796480178833, - "logps/chosen": -202.92897033691406, - "logps/rejected": -163.88963317871094, - "loss": 1786.6912, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.00026543866260908544, - "rewards/margins": -0.0008989976486191154, - "rewards/rejected": 0.0006335589569061995, - "rewards/safe_rewards": 9.513014083495364e-05, - "rewards/unsafe_rewards": -0.0006260074442252517, + "epoch": 0.03, + "learning_rate": 1.6129032258064515e-07, + "logits/chosen": -2.190725564956665, + "logits/rejected": -1.97126042842865, + "logps/chosen": -201.92779541015625, + "logps/rejected": -175.68978881835938, + "loss": 14776.8203, + "rewards/accuracies": 0.578125, + "rewards/chosen": 0.0003970341640524566, + "rewards/margins": 0.0022382759489119053, + "rewards/rejected": -0.001841241493821144, + "rewards/safe_rewards": -4.6795281377853826e-05, + "rewards/unsafe_rewards": 0.00011917026859009638, "step": 30 }, { - "epoch": 0.01, - "learning_rate": 5.3050397877984086e-08, - "logits/chosen": -2.2862894535064697, - "logits/rejected": -2.148435354232788, - "logps/chosen": -211.01400756835938, - "logps/rejected": -196.8510284423828, - "loss": 1854.5207, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": 0.0002621179155539721, - "rewards/margins": 0.0010960950748994946, - "rewards/rejected": -0.0008339773048646748, - "rewards/safe_rewards": 0.0004316710983403027, - "rewards/unsafe_rewards": 9.256476914742962e-05, + "epoch": 0.04, + "learning_rate": 2.1505376344086022e-07, + "logits/chosen": -2.1837613582611084, + "logits/rejected": -1.9931201934814453, + "logps/chosen": -210.1522979736328, + "logps/rejected": -182.34120178222656, + "loss": 14512.05, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 6.361771283991402e-06, + "rewards/margins": 0.005132108926773071, + "rewards/rejected": -0.005125747062265873, + "rewards/safe_rewards": -0.0015235814498737454, + "rewards/unsafe_rewards": 0.0014864765107631683, "step": 40 }, { - "epoch": 0.01, - "learning_rate": 6.631299734748011e-08, - "logits/chosen": -2.161708116531372, - "logits/rejected": -1.9565588235855103, - "logps/chosen": -156.20376586914062, - "logps/rejected": -183.1505584716797, - "loss": 1769.9344, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": 0.00037620964576490223, - "rewards/margins": 0.0014379946514964104, - "rewards/rejected": -0.0010617850348353386, - "rewards/safe_rewards": 6.0248847148614004e-05, - "rewards/unsafe_rewards": 0.0006921704625710845, + "epoch": 0.05, + "learning_rate": 2.6881720430107523e-07, + "logits/chosen": -2.142752170562744, + "logits/rejected": -1.9720497131347656, + "logps/chosen": -197.49960327148438, + "logps/rejected": -175.8955841064453, + "loss": 14817.1484, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.013221519999206066, + "rewards/margins": 0.014507068321108818, + "rewards/rejected": -0.02772858738899231, + "rewards/safe_rewards": -0.009249309077858925, + "rewards/unsafe_rewards": -0.012660175561904907, "step": 50 }, { - "epoch": 0.02, - "learning_rate": 7.957559681697612e-08, - "logits/chosen": -2.237748146057129, - "logits/rejected": -2.069594621658325, - "logps/chosen": -208.90628051757812, - "logps/rejected": -185.17343139648438, - "loss": 1729.4439, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": 0.0003722314431797713, - "rewards/margins": 0.0011089593172073364, - "rewards/rejected": -0.0007367279613390565, - "rewards/safe_rewards": -0.00010328351345378906, - "rewards/unsafe_rewards": 0.0008477465016767383, + "epoch": 0.06, + "learning_rate": 3.225806451612903e-07, + "logits/chosen": -2.1198599338531494, + "logits/rejected": -1.9124641418457031, + "logps/chosen": -212.4015655517578, + "logps/rejected": -182.197998046875, + "loss": 14732.3344, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.06461948156356812, + "rewards/margins": 0.029793167486786842, + "rewards/rejected": -0.0944126546382904, + "rewards/safe_rewards": -0.05795777961611748, + "rewards/unsafe_rewards": -0.06189362332224846, "step": 60 }, { - "epoch": 0.02, - "learning_rate": 9.283819628647215e-08, - "logits/chosen": -2.248917579650879, - "logits/rejected": -1.9423812627792358, - "logps/chosen": -210.68362426757812, - "logps/rejected": -152.62051391601562, - "loss": 1690.5527, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": 0.000262564979493618, - "rewards/margins": 0.001469696406275034, - "rewards/rejected": -0.0012071315431967378, - "rewards/safe_rewards": 0.0007048802217468619, - "rewards/unsafe_rewards": -0.00017975021910388023, + "epoch": 0.08, + "learning_rate": 3.7634408602150537e-07, + "logits/chosen": -2.1681621074676514, + "logits/rejected": -1.9275916814804077, + "logps/chosen": -224.36148071289062, + "logps/rejected": -199.69296264648438, + "loss": 13727.1641, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1377064734697342, + "rewards/margins": 0.04842432960867882, + "rewards/rejected": -0.1861308068037033, + "rewards/safe_rewards": -0.12104250490665436, + "rewards/unsafe_rewards": -0.15066877007484436, "step": 70 }, { - "epoch": 0.02, - "learning_rate": 1.0610079575596817e-07, - "logits/chosen": -2.19905948638916, - "logits/rejected": -2.0396084785461426, - "logps/chosen": -199.74111938476562, - "logps/rejected": -184.2566680908203, - "loss": 1778.007, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": 0.001033532666042447, - "rewards/margins": 0.004405138082802296, - "rewards/rejected": -0.003371605183929205, - "rewards/safe_rewards": 0.0015589881222695112, - "rewards/unsafe_rewards": 0.000508077209815383, + "epoch": 0.09, + "learning_rate": 4.3010752688172043e-07, + "logits/chosen": -2.03377103805542, + "logits/rejected": -1.8029301166534424, + "logps/chosen": -229.58056640625, + "logps/rejected": -207.89120483398438, + "loss": 13445.775, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.1712869554758072, + "rewards/margins": 0.07215174287557602, + "rewards/rejected": -0.24343867599964142, + "rewards/safe_rewards": -0.17058514058589935, + "rewards/unsafe_rewards": -0.1769869476556778, "step": 80 }, { - "epoch": 0.02, - "learning_rate": 1.1936339522546417e-07, - "logits/chosen": -2.2518837451934814, - "logits/rejected": -2.002791166305542, - "logps/chosen": -182.83538818359375, - "logps/rejected": -125.91290283203125, - "loss": 1781.3004, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.0012751545291393995, - "rewards/margins": 0.00354824704118073, - "rewards/rejected": -0.004823402035981417, - "rewards/safe_rewards": -0.001394549384713173, - "rewards/unsafe_rewards": -0.0011557595571503043, + "epoch": 0.1, + "learning_rate": 4.838709677419355e-07, + "logits/chosen": -1.9821720123291016, + "logits/rejected": -1.7775068283081055, + "logps/chosen": -234.10794067382812, + "logps/rejected": -203.7090606689453, + "loss": 13041.9695, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15412810444831848, + "rewards/margins": 0.07884708791971207, + "rewards/rejected": -0.23297517001628876, + "rewards/safe_rewards": -0.1482992023229599, + "rewards/unsafe_rewards": -0.16599053144454956, "step": 90 }, { - "epoch": 0.03, - "learning_rate": 1.3262599469496022e-07, - "logits/chosen": -2.162238359451294, - "logits/rejected": -1.9656562805175781, - "logps/chosen": -203.97683715820312, - "logps/rejected": -183.8803253173828, - "loss": 1680.1902, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": 0.0023126702290028334, - "rewards/margins": 0.0064977919682860374, - "rewards/rejected": -0.004185121972113848, - "rewards/safe_rewards": 0.004085113760083914, - "rewards/unsafe_rewards": 0.0005402260576374829, + "epoch": 0.11, + "learning_rate": 4.9991350953333e-07, + "logits/chosen": -1.864263892173767, + "logits/rejected": -1.6071033477783203, + "logps/chosen": -227.64517211914062, + "logps/rejected": -227.03842163085938, + "loss": 12741.3586, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.270012766122818, + "rewards/margins": 0.09471780061721802, + "rewards/rejected": -0.3647305369377136, + "rewards/safe_rewards": -0.26627737283706665, + "rewards/unsafe_rewards": -0.2737300395965576, "step": 100 }, { - "epoch": 0.03, - "learning_rate": 1.4588859416445624e-07, - "logits/chosen": -2.1465296745300293, - "logits/rejected": -2.035858631134033, - "logps/chosen": -141.734619140625, - "logps/rejected": -142.47686767578125, - "loss": 1701.7199, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.005047277547419071, - "rewards/margins": 0.008131561800837517, - "rewards/rejected": -0.013178840279579163, - "rewards/safe_rewards": -0.007261062506586313, - "rewards/unsafe_rewards": -0.002833492122590542, + "epoch": 0.12, + "learning_rate": 4.99490026817712e-07, + "logits/chosen": -1.7934337854385376, + "logits/rejected": -1.497312307357788, + "logps/chosen": -220.6864471435547, + "logps/rejected": -202.70872497558594, + "loss": 12486.9359, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2676014304161072, + "rewards/margins": 0.11580581963062286, + "rewards/rejected": -0.38340726494789124, + "rewards/safe_rewards": -0.25149208307266235, + "rewards/unsafe_rewards": -0.29486384987831116, "step": 110 }, { - "epoch": 0.03, - "learning_rate": 1.5915119363395223e-07, - "logits/chosen": -2.2748847007751465, - "logits/rejected": -2.1074626445770264, - "logps/chosen": -207.0059051513672, - "logps/rejected": -190.84957885742188, - "loss": 1650.3488, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.011613449081778526, - "rewards/margins": 0.015729505568742752, - "rewards/rejected": -0.02734295465052128, - "rewards/safe_rewards": -0.014216238632798195, - "rewards/unsafe_rewards": -0.009010660462081432, + "epoch": 0.13, + "learning_rate": 4.98714263060751e-07, + "logits/chosen": -1.8139445781707764, + "logits/rejected": -1.4991350173950195, + "logps/chosen": -228.6189727783203, + "logps/rejected": -206.57583618164062, + "loss": 13012.8516, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.2460775375366211, + "rewards/margins": 0.11647327244281769, + "rewards/rejected": -0.3625508248806, + "rewards/safe_rewards": -0.2320813238620758, + "rewards/unsafe_rewards": -0.22151398658752441, "step": 120 }, { - "epoch": 0.03, - "learning_rate": 1.7241379310344828e-07, - "logits/chosen": -2.2503013610839844, - "logits/rejected": -1.9507869482040405, - "logps/chosen": -186.76370239257812, - "logps/rejected": -181.24496459960938, - "loss": 1673.5436, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.026033928617835045, - "rewards/margins": 0.02832433208823204, - "rewards/rejected": -0.054358262568712234, - "rewards/safe_rewards": -0.021366354078054428, - "rewards/unsafe_rewards": -0.03070150315761566, + "epoch": 0.14, + "learning_rate": 4.975873136443648e-07, + "logits/chosen": -1.975090742111206, + "logits/rejected": -1.6450752019882202, + "logps/chosen": -242.79067993164062, + "logps/rejected": -217.6595458984375, + "loss": 12543.5445, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -0.29524677991867065, + "rewards/margins": 0.09299459308385849, + "rewards/rejected": -0.38824135065078735, + "rewards/safe_rewards": -0.29185301065444946, + "rewards/unsafe_rewards": -0.2741631865501404, "step": 130 }, { - "epoch": 0.04, - "learning_rate": 1.856763925729443e-07, - "logits/chosen": -2.1347732543945312, - "logits/rejected": -2.097235918045044, - "logps/chosen": -177.59814453125, - "logps/rejected": -171.32421875, - "loss": 1876.7266, - "rewards/accuracies": 0.512499988079071, - "rewards/chosen": -0.05749493092298508, - "rewards/margins": 0.008745192550122738, - "rewards/rejected": -0.06624011695384979, - "rewards/safe_rewards": -0.05437786504626274, - "rewards/unsafe_rewards": -0.060611993074417114, + "epoch": 0.15, + "learning_rate": 4.961107698262044e-07, + "logits/chosen": -2.004521131515503, + "logits/rejected": -1.7787082195281982, + "logps/chosen": -219.97177124023438, + "logps/rejected": -200.43832397460938, + "loss": 12485.0078, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.27640992403030396, + "rewards/margins": 0.10548245906829834, + "rewards/rejected": -0.3818923830986023, + "rewards/safe_rewards": -0.2715495824813843, + "rewards/unsafe_rewards": -0.26330554485321045, "step": 140 }, { - "epoch": 0.04, - "learning_rate": 1.989389920424403e-07, - "logits/chosen": -2.1675941944122314, - "logits/rejected": -2.037122964859009, - "logps/chosen": -252.21517944335938, - "logps/rejected": -207.8942108154297, - "loss": 1692.059, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04734355956315994, - "rewards/margins": 0.02409571036696434, - "rewards/rejected": -0.07143928110599518, - "rewards/safe_rewards": -0.04714367538690567, - "rewards/unsafe_rewards": -0.04754345491528511, + "epoch": 0.16, + "learning_rate": 4.942867164927899e-07, + "logits/chosen": -2.030977249145508, + "logits/rejected": -1.8493503332138062, + "logps/chosen": -248.6466522216797, + "logps/rejected": -229.3296356201172, + "loss": 12943.043, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.3637232780456543, + "rewards/margins": 0.12409548461437225, + "rewards/rejected": -0.48781871795654297, + "rewards/safe_rewards": -0.3475380539894104, + "rewards/unsafe_rewards": -0.36912816762924194, "step": 150 }, { - "epoch": 0.04, - "learning_rate": 2.1220159151193635e-07, - "logits/chosen": -2.2136006355285645, - "logits/rejected": -2.004173517227173, - "logps/chosen": -209.5802459716797, - "logps/rejected": -174.6752166748047, - "loss": 1616.4781, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.07595737278461456, - "rewards/margins": 0.018151089549064636, - "rewards/rejected": -0.094108447432518, - "rewards/safe_rewards": -0.07967563718557358, - "rewards/unsafe_rewards": -0.07223908603191376, + "epoch": 0.17, + "learning_rate": 4.921177292156419e-07, + "logits/chosen": -2.1098408699035645, + "logits/rejected": -1.8167873620986938, + "logps/chosen": -240.3527374267578, + "logps/rejected": -236.4432373046875, + "loss": 12041.1094, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.4361291825771332, + "rewards/margins": 0.12014999240636826, + "rewards/rejected": -0.5562791228294373, + "rewards/safe_rewards": -0.39963001012802124, + "rewards/unsafe_rewards": -0.4525947570800781, "step": 160 }, { - "epoch": 0.05, - "learning_rate": 2.2546419098143234e-07, - "logits/chosen": -2.271735429763794, - "logits/rejected": -2.0679004192352295, - "logps/chosen": -205.76113891601562, - "logps/rejected": -195.0201416015625, - "loss": 1666.1361, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.07126516848802567, - "rewards/margins": 0.03919476270675659, - "rewards/rejected": -0.11045993864536285, - "rewards/safe_rewards": -0.07663536816835403, - "rewards/unsafe_rewards": -0.0658949762582779, + "epoch": 0.18, + "learning_rate": 4.896068706145631e-07, + "logits/chosen": -2.022606372833252, + "logits/rejected": -1.7623529434204102, + "logps/chosen": -249.30545043945312, + "logps/rejected": -206.6702880859375, + "loss": 12467.793, + "rewards/accuracies": 0.640625, + "rewards/chosen": -0.24929103255271912, + "rewards/margins": 0.09386524558067322, + "rewards/rejected": -0.34315627813339233, + "rewards/safe_rewards": -0.2512078881263733, + "rewards/unsafe_rewards": -0.2514154314994812, "step": 170 }, { - "epoch": 0.05, - "learning_rate": 2.3872679045092834e-07, - "logits/chosen": -2.276427745819092, - "logits/rejected": -2.133946180343628, - "logps/chosen": -195.0485382080078, - "logps/rejected": -222.43124389648438, - "loss": 1623.3875, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.06365207582712173, - "rewards/margins": 0.03775962442159653, - "rewards/rejected": -0.10141168534755707, - "rewards/safe_rewards": -0.06803072988986969, - "rewards/unsafe_rewards": -0.059273410588502884, + "epoch": 0.19, + "learning_rate": 4.867576860332048e-07, + "logits/chosen": -1.9807233810424805, + "logits/rejected": -1.663428544998169, + "logps/chosen": -245.77587890625, + "logps/rejected": -237.46255493164062, + "loss": 12261.4297, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.46205464005470276, + "rewards/margins": 0.13240954279899597, + "rewards/rejected": -0.5944641828536987, + "rewards/safe_rewards": -0.48999160528182983, + "rewards/unsafe_rewards": -0.4135225713253021, "step": 180 }, { - "epoch": 0.05, - "learning_rate": 2.519893899204244e-07, - "logits/chosen": -2.2310173511505127, - "logits/rejected": -2.033411979675293, - "logps/chosen": -220.09750366210938, - "logps/rejected": -190.6726531982422, - "loss": 1516.2832, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.05845081806182861, - "rewards/margins": 0.047280453145504, - "rewards/rejected": -0.10573127120733261, - "rewards/safe_rewards": -0.058795563876628876, - "rewards/unsafe_rewards": -0.05810605362057686, + "epoch": 0.2, + "learning_rate": 4.835741985330259e-07, + "logits/chosen": -1.9643423557281494, + "logits/rejected": -1.6251189708709717, + "logps/chosen": -246.165771484375, + "logps/rejected": -218.6354522705078, + "loss": 11787.4242, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.3656853437423706, + "rewards/margins": 0.16899386048316956, + "rewards/rejected": -0.5346791744232178, + "rewards/safe_rewards": -0.3935268819332123, + "rewards/unsafe_rewards": -0.3631567358970642, "step": 190 }, { - "epoch": 0.05, - "learning_rate": 2.6525198938992043e-07, - "logits/chosen": -2.313551187515259, - "logits/rejected": -2.076124668121338, - "logps/chosen": -185.37484741210938, - "logps/rejected": -172.309814453125, - "loss": 1537.3717, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.09232227504253387, - "rewards/margins": 0.017361406236886978, - "rewards/rejected": -0.10968367755413055, - "rewards/safe_rewards": -0.09398721158504486, - "rewards/unsafe_rewards": -0.09065733850002289, + "epoch": 0.22, + "learning_rate": 4.800609032127122e-07, + "logits/chosen": -1.854522705078125, + "logits/rejected": -1.6063613891601562, + "logps/chosen": -220.02792358398438, + "logps/rejected": -214.9347381591797, + "loss": 12141.6414, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -0.29149216413497925, + "rewards/margins": 0.11372214555740356, + "rewards/rejected": -0.4052143096923828, + "rewards/safe_rewards": -0.24175524711608887, + "rewards/unsafe_rewards": -0.3217989504337311, "step": 200 }, { - "epoch": 0.06, - "learning_rate": 2.785145888594164e-07, - "logits/chosen": -2.1944236755371094, - "logits/rejected": -2.0413947105407715, - "logps/chosen": -231.95785522460938, - "logps/rejected": -202.9747314453125, - "loss": 1396.1459, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.06433193385601044, - "rewards/margins": 0.03484920412302017, - "rewards/rejected": -0.09918113797903061, - "rewards/safe_rewards": -0.07289481908082962, - "rewards/unsafe_rewards": -0.05576905608177185, + "epoch": 0.23, + "learning_rate": 4.7622276086107677e-07, + "logits/chosen": -1.7873353958129883, + "logits/rejected": -1.5099955797195435, + "logps/chosen": -252.8465576171875, + "logps/rejected": -223.00839233398438, + "loss": 12387.6477, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -0.32073110342025757, + "rewards/margins": 0.15121832489967346, + "rewards/rejected": -0.47194942831993103, + "rewards/safe_rewards": -0.29555749893188477, + "rewards/unsafe_rewards": -0.3283526599407196, "step": 210 }, { - "epoch": 0.06, - "learning_rate": 2.917771883289125e-07, - "logits/chosen": -2.194143772125244, - "logits/rejected": -1.9425199031829834, - "logps/chosen": -216.7147979736328, - "logps/rejected": -184.4503173828125, - "loss": 1588.6521, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.09734416007995605, - "rewards/margins": 0.042335640639066696, - "rewards/rejected": -0.13967978954315186, - "rewards/safe_rewards": -0.09563153237104416, - "rewards/unsafe_rewards": -0.09905678033828735, + "epoch": 0.24, + "learning_rate": 4.720651909524036e-07, + "logits/chosen": -1.6759564876556396, + "logits/rejected": -1.3835411071777344, + "logps/chosen": -240.4746856689453, + "logps/rejected": -228.14340209960938, + "loss": 12006.8687, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.3695080578327179, + "rewards/margins": 0.13010919094085693, + "rewards/rejected": -0.49961718916893005, + "rewards/safe_rewards": -0.4255983233451843, + "rewards/unsafe_rewards": -0.32201912999153137, "step": 220 }, { - "epoch": 0.06, - "learning_rate": 3.050397877984085e-07, - "logits/chosen": -2.211782932281494, - "logits/rejected": -2.0525803565979004, - "logps/chosen": -192.00868225097656, - "logps/rejected": -188.61412048339844, - "loss": 1473.4148, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.07546865940093994, - "rewards/margins": 0.03156990930438042, - "rewards/rejected": -0.10703857243061066, - "rewards/safe_rewards": -0.07242611050605774, - "rewards/unsafe_rewards": -0.07851120829582214, + "epoch": 0.25, + "learning_rate": 4.675940639941256e-07, + "logits/chosen": -1.616847038269043, + "logits/rejected": -1.2983253002166748, + "logps/chosen": -243.8197784423828, + "logps/rejected": -220.0131072998047, + "loss": 12132.225, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.32454174757003784, + "rewards/margins": 0.13894598186016083, + "rewards/rejected": -0.46348777413368225, + "rewards/safe_rewards": -0.32337862253189087, + "rewards/unsafe_rewards": -0.30096209049224854, "step": 230 }, { - "epoch": 0.06, - "learning_rate": 3.1830238726790447e-07, - "logits/chosen": -2.191246509552002, - "logits/rejected": -1.9216060638427734, - "logps/chosen": -207.90505981445312, - "logps/rejected": -186.59934997558594, - "loss": 1486.6319, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.07034306973218918, - "rewards/margins": 0.07047908008098602, - "rewards/rejected": -0.1408221423625946, - "rewards/safe_rewards": -0.06774644553661346, - "rewards/unsafe_rewards": -0.0729396864771843, + "epoch": 0.26, + "learning_rate": 4.628156932376418e-07, + "logits/chosen": -1.5430564880371094, + "logits/rejected": -1.2433820962905884, + "logps/chosen": -234.27780151367188, + "logps/rejected": -206.7601776123047, + "loss": 11772.8016, + "rewards/accuracies": 0.6968749761581421, + "rewards/chosen": -0.370536744594574, + "rewards/margins": 0.16985538601875305, + "rewards/rejected": -0.5403920412063599, + "rewards/safe_rewards": -0.3770383894443512, + "rewards/unsafe_rewards": -0.37265413999557495, "step": 240 }, { - "epoch": 0.07, - "learning_rate": 3.3156498673740054e-07, - "logits/chosen": -2.14121675491333, - "logits/rejected": -1.9854660034179688, - "logps/chosen": -205.254150390625, - "logps/rejected": -171.42398071289062, - "loss": 1604.483, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.11317384243011475, - "rewards/margins": 0.04803832247853279, - "rewards/rejected": -0.16121216118335724, - "rewards/safe_rewards": -0.1074928268790245, - "rewards/unsafe_rewards": -0.11885485798120499, + "epoch": 0.27, + "learning_rate": 4.5773682576397776e-07, + "logits/chosen": -1.5874181985855103, + "logits/rejected": -1.3577262163162231, + "logps/chosen": -238.10006713867188, + "logps/rejected": -229.5025634765625, + "loss": 11670.7086, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -0.3351340889930725, + "rewards/margins": 0.15698722004890442, + "rewards/rejected": -0.49212127923965454, + "rewards/safe_rewards": -0.3559727370738983, + "rewards/unsafe_rewards": -0.3131583034992218, "step": 250 }, { - "epoch": 0.07, - "learning_rate": 3.4482758620689656e-07, - "logits/chosen": -2.1345162391662598, - "logits/rejected": -1.9949233531951904, - "logps/chosen": -228.7440948486328, - "logps/rejected": -228.72872924804688, - "loss": 1606.9859, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.07242457568645477, - "rewards/margins": 0.05171200633049011, - "rewards/rejected": -0.12413656711578369, - "rewards/safe_rewards": -0.09204237163066864, - "rewards/unsafe_rewards": -0.05280677229166031, + "epoch": 0.28, + "learning_rate": 4.52364632956877e-07, + "logits/chosen": -1.621845006942749, + "logits/rejected": -1.392242193222046, + "logps/chosen": -229.9431915283203, + "logps/rejected": -224.5813751220703, + "loss": 12151.4023, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -0.41951942443847656, + "rewards/margins": 0.1129680722951889, + "rewards/rejected": -0.5324875116348267, + "rewards/safe_rewards": -0.4047684669494629, + "rewards/unsafe_rewards": -0.38962703943252563, "step": 260 }, { - "epoch": 0.07, - "learning_rate": 3.5809018567639253e-07, - "logits/chosen": -2.094165802001953, - "logits/rejected": -1.8853315114974976, - "logps/chosen": -193.77328491210938, - "logps/rejected": -179.89183044433594, - "loss": 1547.008, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.03563028201460838, - "rewards/margins": 0.06978610903024673, - "rewards/rejected": -0.10541639477014542, - "rewards/safe_rewards": -0.03551654890179634, - "rewards/unsafe_rewards": -0.035744018852710724, + "epoch": 0.29, + "learning_rate": 4.467067003767745e-07, + "logits/chosen": -1.688535451889038, + "logits/rejected": -1.3623073101043701, + "logps/chosen": -258.8652038574219, + "logps/rejected": -242.83126831054688, + "loss": 11759.0641, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.4135201871395111, + "rewards/margins": 0.18194760382175446, + "rewards/rejected": -0.5954678058624268, + "rewards/safe_rewards": -0.4168241024017334, + "rewards/unsafe_rewards": -0.398123562335968, "step": 270 }, { - "epoch": 0.07, - "learning_rate": 3.713527851458886e-07, - "logits/chosen": -2.1299290657043457, - "logits/rejected": -1.9036839008331299, - "logps/chosen": -183.91091918945312, - "logps/rejected": -159.7763214111328, - "loss": 1587.8945, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.052888333797454834, - "rewards/margins": 0.05135570093989372, - "rewards/rejected": -0.10424403101205826, - "rewards/safe_rewards": -0.048650674521923065, - "rewards/unsafe_rewards": -0.05712597817182541, + "epoch": 0.3, + "learning_rate": 4.4077101704995163e-07, + "logits/chosen": -1.7969125509262085, + "logits/rejected": -1.531200647354126, + "logps/chosen": -244.733642578125, + "logps/rejected": -229.079345703125, + "loss": 11477.5773, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.37101584672927856, + "rewards/margins": 0.15268611907958984, + "rewards/rejected": -0.5237019658088684, + "rewards/safe_rewards": -0.34663933515548706, + "rewards/unsafe_rewards": -0.36663565039634705, "step": 280 }, { - "epoch": 0.08, - "learning_rate": 3.8461538461538463e-07, - "logits/chosen": -2.0822203159332275, - "logits/rejected": -1.8311283588409424, - "logps/chosen": -179.03477478027344, - "logps/rejected": -190.87289428710938, - "loss": 1476.7474, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.056076426059007645, - "rewards/margins": 0.05921436473727226, - "rewards/rejected": -0.1152907982468605, - "rewards/safe_rewards": -0.04298359900712967, - "rewards/unsafe_rewards": -0.06916925311088562, + "epoch": 0.31, + "learning_rate": 4.3456596418799476e-07, + "logits/chosen": -1.7852989435195923, + "logits/rejected": -1.4842371940612793, + "logps/chosen": -242.92623901367188, + "logps/rejected": -208.17691040039062, + "loss": 11460.3641, + "rewards/accuracies": 0.690625011920929, + "rewards/chosen": -0.40203922986984253, + "rewards/margins": 0.14379718899726868, + "rewards/rejected": -0.5458364486694336, + "rewards/safe_rewards": -0.39511582255363464, + "rewards/unsafe_rewards": -0.4209752678871155, "step": 290 }, { - "epoch": 0.08, - "learning_rate": 3.978779840848806e-07, - "logits/chosen": -2.06008243560791, - "logits/rejected": -1.9192273616790771, - "logps/chosen": -235.5111846923828, - "logps/rejected": -226.0798797607422, - "loss": 1512.3354, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.12190475314855576, - "rewards/margins": 0.061126284301280975, - "rewards/rejected": -0.18303103744983673, - "rewards/safe_rewards": -0.13008055090904236, - "rewards/unsafe_rewards": -0.11372894048690796, + "epoch": 0.32, + "learning_rate": 4.2810030335348693e-07, + "logits/chosen": -1.7892544269561768, + "logits/rejected": -1.4415075778961182, + "logps/chosen": -232.9502716064453, + "logps/rejected": -216.12387084960938, + "loss": 11505.4461, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.3725574016571045, + "rewards/margins": 0.17103147506713867, + "rewards/rejected": -0.5435888767242432, + "rewards/safe_rewards": -0.39582887291908264, + "rewards/unsafe_rewards": -0.3782378137111664, "step": 300 }, { - "epoch": 0.08, - "learning_rate": 4.111405835543766e-07, - "logits/chosen": -2.1124207973480225, - "logits/rejected": -1.8679959774017334, - "logps/chosen": -226.321533203125, - "logps/rejected": -185.8311767578125, - "loss": 1386.0512, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.19944654405117035, - "rewards/margins": 0.06130058690905571, - "rewards/rejected": -0.26074710488319397, - "rewards/safe_rewards": -0.22467227280139923, - "rewards/unsafe_rewards": -0.17422081530094147, + "epoch": 0.33, + "learning_rate": 4.2138316408864197e-07, + "logits/chosen": -1.686356544494629, + "logits/rejected": -1.357250452041626, + "logps/chosen": -229.8454132080078, + "logps/rejected": -243.9219207763672, + "loss": 10935.6234, + "rewards/accuracies": 0.703125, + "rewards/chosen": -0.37233367562294006, + "rewards/margins": 0.21665258705615997, + "rewards/rejected": -0.5889862775802612, + "rewards/safe_rewards": -0.38686901330947876, + "rewards/unsafe_rewards": -0.34765738248825073, "step": 310 }, { - "epoch": 0.08, - "learning_rate": 4.244031830238727e-07, - "logits/chosen": -2.150132656097412, - "logits/rejected": -1.928096055984497, - "logps/chosen": -225.871337890625, - "logps/rejected": -196.04910278320312, - "loss": 1576.0187, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.1991344392299652, - "rewards/margins": 0.06467987596988678, - "rewards/rejected": -0.2638143002986908, - "rewards/safe_rewards": -0.22302480041980743, - "rewards/unsafe_rewards": -0.1752440631389618, + "epoch": 0.34, + "learning_rate": 4.1442403102434954e-07, + "logits/chosen": -1.6515910625457764, + "logits/rejected": -1.3183988332748413, + "logps/chosen": -255.7494659423828, + "logps/rejected": -243.8734130859375, + "loss": 11267.0859, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.408983051776886, + "rewards/margins": 0.20800288021564484, + "rewards/rejected": -0.6169859170913696, + "rewards/safe_rewards": -0.40380653738975525, + "rewards/unsafe_rewards": -0.409812867641449, "step": 320 }, { - "epoch": 0.09, - "learning_rate": 4.3766578249336866e-07, - "logits/chosen": -2.0506796836853027, - "logits/rejected": -1.8779433965682983, - "logps/chosen": -212.2921600341797, - "logps/rejected": -216.576904296875, - "loss": 1595.0307, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.19741246104240417, - "rewards/margins": 0.0619831383228302, - "rewards/rejected": -0.2593955993652344, - "rewards/safe_rewards": -0.20674028992652893, - "rewards/unsafe_rewards": -0.18808463215827942, + "epoch": 0.36, + "learning_rate": 4.0723273048783426e-07, + "logits/chosen": -1.6536251306533813, + "logits/rejected": -1.2981910705566406, + "logps/chosen": -237.6139373779297, + "logps/rejected": -205.27536010742188, + "loss": 12007.9836, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.3680744767189026, + "rewards/margins": 0.13818387687206268, + "rewards/rejected": -0.5062582492828369, + "rewards/safe_rewards": -0.34349921345710754, + "rewards/unsafe_rewards": -0.3608907461166382, "step": 330 }, { - "epoch": 0.09, - "learning_rate": 4.509283819628647e-07, - "logits/chosen": -2.2223238945007324, - "logits/rejected": -1.880895972251892, - "logps/chosen": -226.3984375, - "logps/rejected": -178.93789672851562, - "loss": 1535.964, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": -0.11265642940998077, - "rewards/margins": 0.0751795843243599, - "rewards/rejected": -0.18783600628376007, - "rewards/safe_rewards": -0.10846143960952759, - "rewards/unsafe_rewards": -0.11685142666101456, + "epoch": 0.37, + "learning_rate": 3.998194166278367e-07, + "logits/chosen": -1.6038053035736084, + "logits/rejected": -1.3264662027359009, + "logps/chosen": -248.21444702148438, + "logps/rejected": -211.14639282226562, + "loss": 11906.0977, + "rewards/accuracies": 0.609375, + "rewards/chosen": -0.34341517090797424, + "rewards/margins": 0.12123014777898788, + "rewards/rejected": -0.46464529633522034, + "rewards/safe_rewards": -0.3494574725627899, + "rewards/unsafe_rewards": -0.3820589482784271, "step": 340 }, { - "epoch": 0.09, - "learning_rate": 4.6419098143236076e-07, - "logits/chosen": -2.1769843101501465, - "logits/rejected": -1.9876177310943604, - "logps/chosen": -218.25308227539062, - "logps/rejected": -201.05123901367188, - "loss": 1550.5808, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.08950825035572052, - "rewards/margins": 0.07684050500392914, - "rewards/rejected": -0.16634874045848846, - "rewards/safe_rewards": -0.07859428226947784, - "rewards/unsafe_rewards": -0.1004222184419632, + "epoch": 0.38, + "learning_rate": 3.9219455707691e-07, + "logits/chosen": -1.6641082763671875, + "logits/rejected": -1.3824983835220337, + "logps/chosen": -261.64422607421875, + "logps/rejected": -247.2106170654297, + "loss": 11737.2516, + "rewards/accuracies": 0.690625011920929, + "rewards/chosen": -0.4960857331752777, + "rewards/margins": 0.17808444797992706, + "rewards/rejected": -0.674170196056366, + "rewards/safe_rewards": -0.48013004660606384, + "rewards/unsafe_rewards": -0.4812780022621155, "step": 350 }, { - "epoch": 0.1, - "learning_rate": 4.774535809018567e-07, - "logits/chosen": -2.1713149547576904, - "logits/rejected": -1.8387962579727173, - "logps/chosen": -216.64016723632812, - "logps/rejected": -177.36866760253906, - "loss": 1439.1969, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": -0.149744912981987, - "rewards/margins": 0.08795475214719772, - "rewards/rejected": -0.23769967257976532, - "rewards/safe_rewards": -0.15212786197662354, - "rewards/unsafe_rewards": -0.14736196398735046, + "epoch": 0.39, + "learning_rate": 3.8436891817107555e-07, + "logits/chosen": -1.5775325298309326, + "logits/rejected": -1.2561166286468506, + "logps/chosen": -253.5785675048828, + "logps/rejected": -248.65274047851562, + "loss": 11760.0016, + "rewards/accuracies": 0.690625011920929, + "rewards/chosen": -0.5550694465637207, + "rewards/margins": 0.16455566883087158, + "rewards/rejected": -0.7196251153945923, + "rewards/safe_rewards": -0.5541168451309204, + "rewards/unsafe_rewards": -0.558402955532074, "step": 360 }, { - "epoch": 0.1, - "learning_rate": 4.907161803713527e-07, - "logits/chosen": -2.1123909950256348, - "logits/rejected": -1.9190731048583984, - "logps/chosen": -223.93106079101562, - "logps/rejected": -184.443603515625, - "loss": 1801.4723, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1042817011475563, - "rewards/margins": 0.08119568973779678, - "rewards/rejected": -0.18547740578651428, - "rewards/safe_rewards": -0.11494370549917221, - "rewards/unsafe_rewards": -0.09361971914768219, + "epoch": 0.4, + "learning_rate": 3.763535497477079e-07, + "logits/chosen": -1.6439412832260132, + "logits/rejected": -1.3449862003326416, + "logps/chosen": -241.54110717773438, + "logps/rejected": -243.0854949951172, + "loss": 11248.6063, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.4285287857055664, + "rewards/margins": 0.19106730818748474, + "rewards/rejected": -0.6195961236953735, + "rewards/safe_rewards": -0.4356611371040344, + "rewards/unsafe_rewards": -0.4376387596130371, "step": 370 }, { - "epoch": 0.1, - "learning_rate": 4.999990349711405e-07, - "logits/chosen": -2.0340590476989746, - "logits/rejected": -1.7972625494003296, - "logps/chosen": -208.8951416015625, - "logps/rejected": -193.8221435546875, - "loss": 1590.2652, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.054428935050964355, - "rewards/margins": 0.07977239042520523, - "rewards/rejected": -0.13420133292675018, - "rewards/safe_rewards": -0.05824419856071472, - "rewards/unsafe_rewards": -0.050613678991794586, + "epoch": 0.41, + "learning_rate": 3.681597695431148e-07, + "logits/chosen": -1.8219878673553467, + "logits/rejected": -1.5961018800735474, + "logps/chosen": -251.90487670898438, + "logps/rejected": -242.7232666015625, + "loss": 11304.5852, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -0.47497597336769104, + "rewards/margins": 0.1433551162481308, + "rewards/rejected": -0.6183310747146606, + "rewards/safe_rewards": -0.5098608732223511, + "rewards/unsafe_rewards": -0.44355297088623047, "step": 380 }, { - "epoch": 0.1, - "learning_rate": 4.999818791097839e-07, - "logits/chosen": -1.7782261371612549, - "logits/rejected": -1.6585538387298584, - "logps/chosen": -203.72482299804688, - "logps/rejected": -176.34144592285156, - "loss": 2397.1555, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.1552494913339615, - "rewards/margins": -0.01551857776939869, - "rewards/rejected": -0.13973090052604675, - "rewards/safe_rewards": -0.21709856390953064, - "rewards/unsafe_rewards": -0.09340039640665054, + "epoch": 0.42, + "learning_rate": 3.597991472118426e-07, + "logits/chosen": -1.8015209436416626, + "logits/rejected": -1.5055344104766846, + "logps/chosen": -256.70831298828125, + "logps/rejected": -232.10440063476562, + "loss": 11725.1453, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.4393288195133209, + "rewards/margins": 0.18009425699710846, + "rewards/rejected": -0.6194230914115906, + "rewards/safe_rewards": -0.43668675422668457, + "rewards/unsafe_rewards": -0.4189079701900482, "step": 390 }, { - "epoch": 0.11, - "learning_rate": 4.999432798565667e-07, - "logits/chosen": -1.8543522357940674, - "logits/rejected": -1.6574729681015015, - "logps/chosen": -259.066650390625, - "logps/rejected": -218.59951782226562, - "loss": 1563.419, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.08661986887454987, - "rewards/margins": 0.09774277359247208, - "rewards/rejected": -0.18436264991760254, - "rewards/safe_rewards": -0.080643430352211, - "rewards/unsafe_rewards": -0.09259630739688873, + "epoch": 0.43, + "learning_rate": 3.512834879902715e-07, + "logits/chosen": -1.9032500982284546, + "logits/rejected": -1.6418339014053345, + "logps/chosen": -259.31646728515625, + "logps/rejected": -236.5167999267578, + "loss": 11323.9117, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -0.5012739300727844, + "rewards/margins": 0.1391296088695526, + "rewards/rejected": -0.6404035091400146, + "rewards/safe_rewards": -0.5036896467208862, + "rewards/unsafe_rewards": -0.5019193887710571, "step": 400 }, { - "epoch": 0.11, - "learning_rate": 4.998832405225244e-07, - "logits/chosen": -1.901777982711792, - "logits/rejected": -1.9128797054290771, - "logps/chosen": -171.1883544921875, - "logps/rejected": -187.63064575195312, - "loss": 1463.8475, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.06575946509838104, - "rewards/margins": 0.049509741365909576, - "rewards/rejected": -0.11526919901371002, - "rewards/safe_rewards": -0.07565880566835403, - "rewards/unsafe_rewards": -0.055860113352537155, + "epoch": 0.44, + "learning_rate": 3.426248160275693e-07, + "logits/chosen": -1.7674633264541626, + "logits/rejected": -1.5076560974121094, + "logps/chosen": -252.45761108398438, + "logps/rejected": -228.1309814453125, + "loss": 11663.7305, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -0.4755610525608063, + "rewards/margins": 0.12453018128871918, + "rewards/rejected": -0.600091278553009, + "rewards/safe_rewards": -0.4826468825340271, + "rewards/unsafe_rewards": -0.46062642335891724, "step": 410 }, { - "epoch": 0.11, - "learning_rate": 4.998017662578182e-07, - "logits/chosen": -1.8893829584121704, - "logits/rejected": -1.7524290084838867, - "logps/chosen": -182.86929321289062, - "logps/rejected": -179.08619689941406, - "loss": 1636.8207, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.09507036209106445, - "rewards/margins": 0.12069718539714813, - "rewards/rejected": -0.21576757729053497, - "rewards/safe_rewards": -0.13712948560714722, - "rewards/unsafe_rewards": -0.05301126837730408, + "epoch": 0.45, + "learning_rate": 3.338353574075381e-07, + "logits/chosen": -1.7134525775909424, + "logits/rejected": -1.4433438777923584, + "logps/chosen": -219.3074951171875, + "logps/rejected": -220.09909057617188, + "loss": 12136.807, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.3855774998664856, + "rewards/margins": 0.14632567763328552, + "rewards/rejected": -0.5319031476974487, + "rewards/safe_rewards": -0.38518843054771423, + "rewards/unsafe_rewards": -0.3635571300983429, "step": 420 }, { - "epoch": 0.11, - "learning_rate": 4.996988640512931e-07, - "logits/chosen": -1.7616560459136963, - "logits/rejected": -1.5181421041488647, - "logps/chosen": -241.02487182617188, - "logps/rejected": -209.2313232421875, - "loss": 3314.1547, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.060619037598371506, - "rewards/margins": 0.06125183030962944, - "rewards/rejected": -0.12187085300683975, - "rewards/safe_rewards": -0.05368631333112717, - "rewards/unsafe_rewards": -0.06755176186561584, + "epoch": 0.46, + "learning_rate": 3.2492752288532916e-07, + "logits/chosen": -1.748968482017517, + "logits/rejected": -1.4328683614730835, + "logps/chosen": -238.69827270507812, + "logps/rejected": -224.539306640625, + "loss": 11666.757, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -0.37423551082611084, + "rewards/margins": 0.1460035890340805, + "rewards/rejected": -0.5202391743659973, + "rewards/safe_rewards": -0.3754008710384369, + "rewards/unsafe_rewards": -0.3486160933971405, "step": 430 }, { - "epoch": 0.12, - "learning_rate": 4.99574542729878e-07, - "logits/chosen": -2.117448091506958, - "logits/rejected": -1.952856421470642, - "logps/chosen": -215.55078125, - "logps/rejected": -177.14146423339844, - "loss": 1795.1945, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.07363261282444, - "rewards/margins": 0.07106383144855499, - "rewards/rejected": -0.144696444272995, - "rewards/safe_rewards": -0.08556940406560898, - "rewards/unsafe_rewards": -0.06169581413269043, + "epoch": 0.47, + "learning_rate": 3.159138903634006e-07, + "logits/chosen": -1.6538251638412476, + "logits/rejected": -1.3511749505996704, + "logps/chosen": -249.89956665039062, + "logps/rejected": -225.74087524414062, + "loss": 11565.893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.46273189783096313, + "rewards/margins": 0.11824017763137817, + "rewards/rejected": -0.5809720158576965, + "rewards/safe_rewards": -0.4367545545101166, + "rewards/unsafe_rewards": -0.45838436484336853, "step": 440 }, { - "epoch": 0.12, - "learning_rate": 4.994288129578296e-07, - "logits/chosen": -2.3702845573425293, - "logits/rejected": -2.245753049850464, - "logps/chosen": -240.79183959960938, - "logps/rejected": -221.577880859375, - "loss": 1326.2917, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.09103454649448395, - "rewards/margins": 0.0868804007768631, - "rewards/rejected": -0.17791494727134705, - "rewards/safe_rewards": -0.08847610652446747, - "rewards/unsafe_rewards": -0.09359299391508102, + "epoch": 0.48, + "learning_rate": 3.068071871314626e-07, + "logits/chosen": -1.4675312042236328, + "logits/rejected": -1.178327202796936, + "logps/chosen": -228.97360229492188, + "logps/rejected": -227.25985717773438, + "loss": 11443.1875, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -0.5152639150619507, + "rewards/margins": 0.14481747150421143, + "rewards/rejected": -0.6600814461708069, + "rewards/safe_rewards": -0.528190016746521, + "rewards/unsafe_rewards": -0.5448918342590332, "step": 450 }, { - "epoch": 0.12, - "learning_rate": 4.99261687235816e-07, - "logits/chosen": -2.4940438270568848, - "logits/rejected": -2.2912464141845703, - "logps/chosen": -248.4958953857422, - "logps/rejected": -194.27401733398438, - "loss": 19418.6688, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.14589664340019226, - "rewards/margins": 0.016192104667425156, - "rewards/rejected": -0.16208875179290771, - "rewards/safe_rewards": -0.18939614295959473, - "rewards/unsafe_rewards": -0.10239718109369278, + "epoch": 0.49, + "learning_rate": 2.976202718954869e-07, + "logits/chosen": -1.680240273475647, + "logits/rejected": -1.292902946472168, + "logps/chosen": -257.8706359863281, + "logps/rejected": -234.29568481445312, + "loss": 11584.85, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.4649757444858551, + "rewards/margins": 0.16425415873527527, + "rewards/rejected": -0.6292298436164856, + "rewards/safe_rewards": -0.49089688062667847, + "rewards/unsafe_rewards": -0.4636811316013336, "step": 460 }, { - "epoch": 0.12, - "learning_rate": 4.990731798998458e-07, - "logits/chosen": -2.3484692573547363, - "logits/rejected": -2.2091269493103027, - "logps/chosen": -228.6166229248047, - "logps/rejected": -209.12295532226562, - "loss": 1498.4709, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.11217610538005829, - "rewards/margins": 0.06052281707525253, - "rewards/rejected": -0.17269891500473022, - "rewards/safe_rewards": -0.11405851691961288, - "rewards/unsafe_rewards": -0.11029370129108429, + "epoch": 0.51, + "learning_rate": 2.8836611662115634e-07, + "logits/chosen": -1.7547317743301392, + "logits/rejected": -1.4596977233886719, + "logps/chosen": -242.9713897705078, + "logps/rejected": -236.12692260742188, + "loss": 11948.3883, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.4835948944091797, + "rewards/margins": 0.15179117023944855, + "rewards/rejected": -0.6353861093521118, + "rewards/safe_rewards": -0.4963745176792145, + "rewards/unsafe_rewards": -0.505788266658783, "step": 470 }, { - "epoch": 0.13, - "learning_rate": 4.988633071200378e-07, - "logits/chosen": -2.1735734939575195, - "logits/rejected": -2.029167413711548, - "logps/chosen": -187.85093688964844, - "logps/rejected": -177.81895446777344, - "loss": 1690.9148, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.12577421963214874, - "rewards/margins": 0.04982226714491844, - "rewards/rejected": -0.17559649050235748, - "rewards/safe_rewards": -0.12387330830097198, - "rewards/unsafe_rewards": -0.1276751309633255, + "epoch": 0.52, + "learning_rate": 2.7905778821739056e-07, + "logits/chosen": -1.5380910634994507, + "logits/rejected": -1.2398124933242798, + "logps/chosen": -237.8711700439453, + "logps/rejected": -234.6429443359375, + "loss": 12133.4109, + "rewards/accuracies": 0.578125, + "rewards/chosen": -0.5105168223381042, + "rewards/margins": 0.13084134459495544, + "rewards/rejected": -0.6413581967353821, + "rewards/safe_rewards": -0.46697384119033813, + "rewards/unsafe_rewards": -0.5045939683914185, "step": 480 }, { - "epoch": 0.13, - "learning_rate": 4.98632086899234e-07, - "logits/chosen": -2.1900582313537598, - "logits/rejected": -2.1241798400878906, - "logps/chosen": -202.78292846679688, - "logps/rejected": -172.89369201660156, - "loss": 2924.7781, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.19147923588752747, - "rewards/margins": -0.01486232876777649, - "rewards/rejected": -0.1766168773174286, - "rewards/safe_rewards": -0.2867248058319092, - "rewards/unsafe_rewards": -0.09623364359140396, + "epoch": 0.53, + "learning_rate": 2.69708430085812e-07, + "logits/chosen": -1.3370671272277832, + "logits/rejected": -0.9828665852546692, + "logps/chosen": -247.05093383789062, + "logps/rejected": -231.03042602539062, + "loss": 12143.6172, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.5164421200752258, + "rewards/margins": 0.15838320553302765, + "rewards/rejected": -0.6748253107070923, + "rewards/safe_rewards": -0.5219355821609497, + "rewards/unsafe_rewards": -0.5064952969551086, "step": 490 }, { - "epoch": 0.13, - "learning_rate": 4.983795390714552e-07, - "logits/chosen": -2.264794111251831, - "logits/rejected": -2.1937930583953857, - "logps/chosen": -213.0334930419922, - "logps/rejected": -179.6028289794922, - "loss": 1504.5268, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.0930142030119896, - "rewards/margins": 0.08360128104686737, - "rewards/rejected": -0.17661550641059875, - "rewards/safe_rewards": -0.08157871663570404, - "rewards/unsafe_rewards": -0.10444968938827515, + "epoch": 0.54, + "learning_rate": 2.6033124356220325e-07, + "logits/chosen": -1.292763113975525, + "logits/rejected": -0.891495406627655, + "logps/chosen": -263.08270263671875, + "logps/rejected": -247.49874877929688, + "loss": 11841.1211, + "rewards/accuracies": 0.671875, + "rewards/chosen": -0.5315951704978943, + "rewards/margins": 0.1979287564754486, + "rewards/rejected": -0.7295239567756653, + "rewards/safe_rewards": -0.48640185594558716, + "rewards/unsafe_rewards": -0.5480044484138489, "step": 500 }, { - "epoch": 0.13, - "eval_logits/chosen": -2.1038978099823, - "eval_logits/rejected": -1.9343221187591553, - "eval_logps/chosen": -157.80984497070312, - "eval_logps/rejected": -124.56560516357422, - "eval_loss": 1034.4193115234375, - "eval_rewards/accuracies": 0.6808850169181824, - "eval_rewards/chosen": -0.17460301518440247, - "eval_rewards/margins": 0.04972272738814354, - "eval_rewards/rejected": -0.22432571649551392, - "eval_rewards/safe_rewards": -0.1710742712020874, - "eval_rewards/unsafe_rewards": -0.16879959404468536, - "eval_runtime": 2352.1255, - "eval_samples_per_second": 14.899, - "eval_steps_per_second": 0.466, + "epoch": 0.54, + "eval_logits/chosen": -0.6399103999137878, + "eval_logits/rejected": -0.03118806704878807, + "eval_logps/chosen": -202.84010314941406, + "eval_logps/rejected": -173.10743713378906, + "eval_loss": 4375.67431640625, + "eval_rewards/accuracies": 0.6439980864524841, + "eval_rewards/chosen": -0.724012017250061, + "eval_rewards/margins": 0.08234991878271103, + "eval_rewards/rejected": -0.8063618540763855, + "eval_rewards/safe_rewards": -0.717704176902771, + "eval_rewards/unsafe_rewards": -0.7185689806938171, + "eval_runtime": 1042.7523, + "eval_samples_per_second": 31.689, + "eval_steps_per_second": 0.991, "step": 500 }, { - "epoch": 0.14, - "learning_rate": 4.981056853001998e-07, - "logits/chosen": -2.2506051063537598, - "logits/rejected": -2.1504693031311035, - "logps/chosen": -216.8814239501953, - "logps/rejected": -204.0018310546875, - "loss": 1505.1768, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.15048286318778992, - "rewards/margins": 0.07545869052410126, - "rewards/rejected": -0.22594153881072998, - "rewards/safe_rewards": -0.15144427120685577, - "rewards/unsafe_rewards": -0.14952147006988525, + "epoch": 0.55, + "learning_rate": 2.509394692761622e-07, + "logits/chosen": -1.2631930112838745, + "logits/rejected": -0.8121662139892578, + "logps/chosen": -247.9859619140625, + "logps/rejected": -225.3314666748047, + "loss": 11225.3125, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.4896624982357025, + "rewards/margins": 0.20212964713573456, + "rewards/rejected": -0.6917921304702759, + "rewards/safe_rewards": -0.4899962544441223, + "rewards/unsafe_rewards": -0.48260608315467834, "step": 510 }, { - "epoch": 0.14, - "learning_rate": 4.978105490765854e-07, - "logits/chosen": -2.305607795715332, - "logits/rejected": -2.175187826156616, - "logps/chosen": -217.0155792236328, - "logps/rejected": -215.1630096435547, - "loss": 1556.0524, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.13201627135276794, - "rewards/margins": 0.0949278324842453, - "rewards/rejected": -0.22694408893585205, - "rewards/safe_rewards": -0.13568034768104553, - "rewards/unsafe_rewards": -0.12835213541984558, + "epoch": 0.56, + "learning_rate": 2.415463684552728e-07, + "logits/chosen": -1.3854891061782837, + "logits/rejected": -1.0462169647216797, + "logps/chosen": -257.8296813964844, + "logps/rejected": -243.88497924804688, + "loss": 11631.3773, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.5190461874008179, + "rewards/margins": 0.16427266597747803, + "rewards/rejected": -0.6833187937736511, + "rewards/safe_rewards": -0.5435765385627747, + "rewards/unsafe_rewards": -0.5212621688842773, "step": 520 }, { - "epoch": 0.14, - "learning_rate": 4.974941557173339e-07, - "logits/chosen": -2.287376642227173, - "logits/rejected": -2.190887928009033, - "logps/chosen": -215.2474365234375, - "logps/rejected": -184.8167266845703, - "loss": 1557.0826, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.10488133132457733, - "rewards/margins": 0.07161478698253632, - "rewards/rejected": -0.17649611830711365, - "rewards/safe_rewards": -0.11611838638782501, - "rewards/unsafe_rewards": -0.09364431351423264, + "epoch": 0.57, + "learning_rate": 2.321652042001919e-07, + "logits/chosen": -1.5475881099700928, + "logits/rejected": -1.1823749542236328, + "logps/chosen": -271.8938903808594, + "logps/rejected": -264.72967529296875, + "loss": 11306.525, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -0.5788856744766235, + "rewards/margins": 0.16636984050273895, + "rewards/rejected": -0.7452555298805237, + "rewards/safe_rewards": -0.5556532740592957, + "rewards/unsafe_rewards": -0.5943908095359802, "step": 530 }, { - "epoch": 0.14, - "learning_rate": 4.971565323625996e-07, - "logits/chosen": -2.3267552852630615, - "logits/rejected": -2.270484447479248, - "logps/chosen": -213.3306121826172, - "logps/rejected": -209.4958038330078, - "loss": 3809.1977, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.0942225232720375, - "rewards/margins": 0.06254059076309204, - "rewards/rejected": -0.15676310658454895, - "rewards/safe_rewards": -0.1080092191696167, - "rewards/unsafe_rewards": -0.08043583482503891, + "epoch": 0.58, + "learning_rate": 2.2280922275709213e-07, + "logits/chosen": -1.431162714958191, + "logits/rejected": -1.0832916498184204, + "logps/chosen": -261.7244567871094, + "logps/rejected": -256.2908630371094, + "loss": 11521.3875, + "rewards/accuracies": 0.6781250238418579, + "rewards/chosen": -0.6132346391677856, + "rewards/margins": 0.1872728019952774, + "rewards/rejected": -0.8005074262619019, + "rewards/safe_rewards": -0.5748257637023926, + "rewards/unsafe_rewards": -0.6018111109733582, "step": 540 }, { - "epoch": 0.15, - "learning_rate": 4.967977079736413e-07, - "logits/chosen": -2.281008243560791, - "logits/rejected": -2.1277031898498535, - "logps/chosen": -236.482666015625, - "logps/rejected": -203.95657348632812, - "loss": 1679.9348, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.09997034817934036, - "rewards/margins": 0.09865220636129379, - "rewards/rejected": -0.19862253963947296, - "rewards/safe_rewards": -0.08968280255794525, - "rewards/unsafe_rewards": -0.11025787889957428, + "epoch": 0.59, + "learning_rate": 2.1349163481390187e-07, + "logits/chosen": -1.6218006610870361, + "logits/rejected": -1.2075737714767456, + "logps/chosen": -254.3673095703125, + "logps/rejected": -242.2095947265625, + "loss": 11620.8375, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.5205433368682861, + "rewards/margins": 0.20417086780071259, + "rewards/rejected": -0.7247141599655151, + "rewards/safe_rewards": -0.5093175172805786, + "rewards/unsafe_rewards": -0.5348318219184875, "step": 550 }, { - "epoch": 0.15, - "learning_rate": 4.96417713330338e-07, - "logits/chosen": -2.178715705871582, - "logits/rejected": -2.0697832107543945, - "logps/chosen": -211.88461303710938, - "logps/rejected": -188.769775390625, - "loss": 2708.2738, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.10986654460430145, - "rewards/margins": 0.08689532428979874, - "rewards/rejected": -0.1967618614435196, - "rewards/safe_rewards": -0.11271083354949951, - "rewards/unsafe_rewards": -0.10702226310968399, + "epoch": 0.6, + "learning_rate": 2.0422559684675494e-07, + "logits/chosen": -1.6558773517608643, + "logits/rejected": -1.307812213897705, + "logps/chosen": -242.2835693359375, + "logps/rejected": -213.8196258544922, + "loss": 11084.5562, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.4332474172115326, + "rewards/margins": 0.16668307781219482, + "rewards/rejected": -0.599930465221405, + "rewards/safe_rewards": -0.4480403959751129, + "rewards/unsafe_rewards": -0.4273868203163147, "step": 560 }, { - "epoch": 0.15, - "learning_rate": 4.960165810285484e-07, - "logits/chosen": -2.1705868244171143, - "logits/rejected": -2.106612205505371, - "logps/chosen": -207.7125701904297, - "logps/rejected": -189.08287048339844, - "loss": 2088.3215, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.11839534342288971, - "rewards/margins": 0.06037949398159981, - "rewards/rejected": -0.17877483367919922, - "rewards/safe_rewards": -0.12084673345088959, - "rewards/unsafe_rewards": -0.11594392359256744, + "epoch": 0.61, + "learning_rate": 1.950241925429867e-07, + "logits/chosen": -1.5274425745010376, + "logits/rejected": -1.1276582479476929, + "logps/chosen": -240.20712280273438, + "logps/rejected": -234.57687377929688, + "loss": 11325.8641, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.46497711539268494, + "rewards/margins": 0.16491682827472687, + "rewards/rejected": -0.6298940181732178, + "rewards/safe_rewards": -0.41821298003196716, + "rewards/unsafe_rewards": -0.45536503195762634, "step": 570 }, { - "epoch": 0.15, - "learning_rate": 4.955943454773152e-07, - "logits/chosen": -2.127573251724243, - "logits/rejected": -2.103651762008667, - "logps/chosen": -228.0417938232422, - "logps/rejected": -221.4380645751953, - "loss": 1461.5396, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.13224250078201294, - "rewards/margins": 0.05800231173634529, - "rewards/rejected": -0.19024482369422913, - "rewards/safe_rewards": -0.13077394664287567, - "rewards/unsafe_rewards": -0.1337110698223114, + "epoch": 0.62, + "learning_rate": 1.8590041432690893e-07, + "logits/chosen": -1.3744151592254639, + "logits/rejected": -0.9575246572494507, + "logps/chosen": -260.49481201171875, + "logps/rejected": -244.38412475585938, + "loss": 11324.075, + "rewards/accuracies": 0.6781250238418579, + "rewards/chosen": -0.5417405962944031, + "rewards/margins": 0.17112591862678528, + "rewards/rejected": -0.712866485118866, + "rewards/safe_rewards": -0.5280636548995972, + "rewards/unsafe_rewards": -0.5438416004180908, "step": 580 }, { - "epoch": 0.16, - "learning_rate": 4.951510428959134e-07, - "logits/chosen": -2.2244675159454346, - "logits/rejected": -2.0955371856689453, - "logps/chosen": -216.9449462890625, - "logps/rejected": -191.6647186279297, - "loss": 1552.0829, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.12478175014257431, - "rewards/margins": 0.07837653160095215, - "rewards/rejected": -0.20315828919410706, - "rewards/safe_rewards": -0.12848195433616638, - "rewards/unsafe_rewards": -0.12108156830072403, + "epoch": 0.63, + "learning_rate": 1.7686714501444788e-07, + "logits/chosen": -1.4106223583221436, + "logits/rejected": -0.9382559657096863, + "logps/chosen": -262.56097412109375, + "logps/rejected": -243.7979278564453, + "loss": 11962.2859, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.5682249665260315, + "rewards/margins": 0.13025227189064026, + "rewards/rejected": -0.6984771490097046, + "rewards/safe_rewards": -0.6169765591621399, + "rewards/unsafe_rewards": -0.5568891763687134, "step": 590 }, { - "epoch": 0.16, - "learning_rate": 4.94686711310743e-07, - "logits/chosen": -2.2538208961486816, - "logits/rejected": -2.0603907108306885, - "logps/chosen": -255.35726928710938, - "logps/rejected": -186.90707397460938, - "loss": 1342.5938, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.1403053104877472, - "rewards/margins": 0.0783780962228775, - "rewards/rejected": -0.2186833918094635, - "rewards/safe_rewards": -0.13568279147148132, - "rewards/unsafe_rewards": -0.14492781460285187, + "epoch": 0.65, + "learning_rate": 1.679371396225504e-07, + "logits/chosen": -1.2628064155578613, + "logits/rejected": -0.8373771905899048, + "logps/chosen": -275.27001953125, + "logps/rejected": -267.8952331542969, + "loss": 11391.5367, + "rewards/accuracies": 0.684374988079071, + "rewards/chosen": -0.5605230927467346, + "rewards/margins": 0.17430099844932556, + "rewards/rejected": -0.7348241806030273, + "rewards/safe_rewards": -0.5333396792411804, + "rewards/unsafe_rewards": -0.5728257298469543, "step": 600 }, { - "epoch": 0.16, - "learning_rate": 4.942013905520675e-07, - "logits/chosen": -2.388282060623169, - "logits/rejected": -2.158958673477173, - "logps/chosen": -210.7412872314453, - "logps/rejected": -187.48587036132812, - "loss": 1318.3062, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.16068243980407715, - "rewards/margins": 0.06869255006313324, - "rewards/rejected": -0.2293749749660492, - "rewards/safe_rewards": -0.15053199231624603, - "rewards/unsafe_rewards": -0.17083287239074707, + "epoch": 0.66, + "learning_rate": 1.5912300735904248e-07, + "logits/chosen": -1.3267945051193237, + "logits/rejected": -0.7826977968215942, + "logps/chosen": -270.63555908203125, + "logps/rejected": -231.94143676757812, + "loss": 11024.2969, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -0.5260102152824402, + "rewards/margins": 0.1785580813884735, + "rewards/rejected": -0.7045683860778809, + "rewards/safe_rewards": -0.5124560594558716, + "rewards/unsafe_rewards": -0.5217950344085693, "step": 610 }, { - "epoch": 0.16, - "learning_rate": 4.936951222505975e-07, - "logits/chosen": -2.352155923843384, - "logits/rejected": -2.2548491954803467, - "logps/chosen": -196.47250366210938, - "logps/rejected": -187.747802734375, - "loss": 1434.7557, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.15289297699928284, - "rewards/margins": 0.0698348730802536, - "rewards/rejected": -0.22272785007953644, - "rewards/safe_rewards": -0.1387401819229126, - "rewards/unsafe_rewards": -0.16704575717449188, + "epoch": 0.67, + "learning_rate": 1.5043719381837112e-07, + "logits/chosen": -1.2252156734466553, + "logits/rejected": -0.7665490508079529, + "logps/chosen": -256.24664306640625, + "logps/rejected": -239.9573974609375, + "loss": 11326.3789, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -0.5171566009521484, + "rewards/margins": 0.1807917356491089, + "rewards/rejected": -0.6979483366012573, + "rewards/safe_rewards": -0.5192286372184753, + "rewards/unsafe_rewards": -0.4774685502052307, "step": 620 }, { - "epoch": 0.17, - "learning_rate": 4.931679498339189e-07, - "logits/chosen": -2.458136796951294, - "logits/rejected": -2.338815689086914, - "logps/chosen": -220.99252319335938, - "logps/rejected": -198.9859619140625, - "loss": 1429.4932, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": -0.11757110059261322, - "rewards/margins": 0.07899567484855652, - "rewards/rejected": -0.19656677544116974, - "rewards/safe_rewards": -0.12531983852386475, - "rewards/unsafe_rewards": -0.1098223477602005, + "epoch": 0.68, + "learning_rate": 1.4189196340836865e-07, + "logits/chosen": -1.1827423572540283, + "logits/rejected": -0.6304258704185486, + "logps/chosen": -248.7926025390625, + "logps/rejected": -238.5314178466797, + "loss": 10711.3141, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.5181415677070618, + "rewards/margins": 0.22150596976280212, + "rewards/rejected": -0.7396475076675415, + "rewards/safe_rewards": -0.5149692893028259, + "rewards/unsafe_rewards": -0.5179430246353149, "step": 630 }, { - "epoch": 0.17, - "learning_rate": 4.926199185227683e-07, - "logits/chosen": -2.2776482105255127, - "logits/rejected": -2.1631855964660645, - "logps/chosen": -169.3491668701172, - "logps/rejected": -162.9020233154297, - "loss": 1432.0049, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.15498077869415283, - "rewards/margins": 0.0734110176563263, - "rewards/rejected": -0.22839176654815674, - "rewards/safe_rewards": -0.14291585981845856, - "rewards/unsafe_rewards": -0.16704566776752472, + "epoch": 0.69, + "learning_rate": 1.334993820328541e-07, + "logits/chosen": -1.3094868659973145, + "logits/rejected": -0.920754075050354, + "logps/chosen": -239.2587890625, + "logps/rejected": -236.24411010742188, + "loss": 11078.3891, + "rewards/accuracies": 0.6968749761581421, + "rewards/chosen": -0.5026401281356812, + "rewards/margins": 0.20332691073417664, + "rewards/rejected": -0.7059669494628906, + "rewards/safe_rewards": -0.49299708008766174, + "rewards/unsafe_rewards": -0.5529029965400696, "step": 640 }, { - "epoch": 0.17, - "learning_rate": 4.920510753271539e-07, - "logits/chosen": -2.3641550540924072, - "logits/rejected": -2.2241387367248535, - "logps/chosen": -220.9196319580078, - "logps/rejected": -180.23648071289062, - "loss": 1676.902, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.15307243168354034, - "rewards/margins": 0.03921856731176376, - "rewards/rejected": -0.1922910064458847, - "rewards/safe_rewards": -0.17227527499198914, - "rewards/unsafe_rewards": -0.13386958837509155, + "epoch": 0.7, + "learning_rate": 1.252713000545221e-07, + "logits/chosen": -1.3778759241104126, + "logits/rejected": -0.8713847398757935, + "logps/chosen": -272.623779296875, + "logps/rejected": -254.25857543945312, + "loss": 10949.6641, + "rewards/accuracies": 0.671875, + "rewards/chosen": -0.5196093320846558, + "rewards/margins": 0.20374643802642822, + "rewards/rejected": -0.723355770111084, + "rewards/safe_rewards": -0.5185881853103638, + "rewards/unsafe_rewards": -0.5383481979370117, "step": 650 }, { - "epoch": 0.18, - "learning_rate": 4.914614690423226e-07, - "logits/chosen": -2.398496389389038, - "logits/rejected": -2.2206454277038574, - "logps/chosen": -243.0208740234375, - "logps/rejected": -176.14743041992188, - "loss": 1413.3051, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1554284393787384, - "rewards/margins": 0.08044226467609406, - "rewards/rejected": -0.23587068915367126, - "rewards/safe_rewards": -0.14427784085273743, - "rewards/unsafe_rewards": -0.16657905280590057, + "epoch": 0.71, + "learning_rate": 1.1721933556217792e-07, + "logits/chosen": -1.3767964839935303, + "logits/rejected": -0.9163608551025391, + "logps/chosen": -265.26025390625, + "logps/rejected": -234.197509765625, + "loss": 11203.9594, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.5324097871780396, + "rewards/margins": 0.14683523774147034, + "rewards/rejected": -0.6792449951171875, + "rewards/safe_rewards": -0.4919240474700928, + "rewards/unsafe_rewards": -0.5484806299209595, "step": 660 }, { - "epoch": 0.18, - "learning_rate": 4.908511502445748e-07, - "logits/chosen": -2.340196371078491, - "logits/rejected": -2.163971185684204, - "logps/chosen": -203.44891357421875, - "logps/rejected": -176.7835235595703, - "loss": 1430.8182, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.15669873356819153, - "rewards/margins": 0.09174702316522598, - "rewards/rejected": -0.24844574928283691, - "rewards/safe_rewards": -0.1487455815076828, - "rewards/unsafe_rewards": -0.16465191543102264, + "epoch": 0.72, + "learning_rate": 1.0935485796594351e-07, + "logits/chosen": -1.2184772491455078, + "logits/rejected": -0.7559088468551636, + "logps/chosen": -255.09213256835938, + "logps/rejected": -233.2527313232422, + "loss": 11455.8781, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.5359419584274292, + "rewards/margins": 0.18527516722679138, + "rewards/rejected": -0.7212170958518982, + "rewards/safe_rewards": -0.4948631227016449, + "rewards/unsafe_rewards": -0.5253986716270447, "step": 670 }, { - "epoch": 0.18, - "learning_rate": 4.902201712869259e-07, - "logits/chosen": -2.3413987159729004, - "logits/rejected": -2.287224292755127, - "logps/chosen": -216.82315063476562, - "logps/rejected": -230.1833953857422, - "loss": 1610.7859, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.16405163705348969, - "rewards/margins": 0.07907158136367798, - "rewards/rejected": -0.24312321841716766, - "rewards/safe_rewards": -0.16942548751831055, - "rewards/unsafe_rewards": -0.15867780148983002, + "epoch": 0.73, + "learning_rate": 1.0168897194359921e-07, + "logits/chosen": -1.2939293384552002, + "logits/rejected": -0.9618832468986511, + "logps/chosen": -261.6130065917969, + "logps/rejected": -244.4445037841797, + "loss": 11458.9703, + "rewards/accuracies": 0.565625011920929, + "rewards/chosen": -0.5875786542892456, + "rewards/margins": 0.11981384456157684, + "rewards/rejected": -0.7073925137519836, + "rewards/safe_rewards": -0.5732249021530151, + "rewards/unsafe_rewards": -0.6051737070083618, "step": 680 }, { - "epoch": 0.18, - "learning_rate": 4.895685862946153e-07, - "logits/chosen": -2.3661742210388184, - "logits/rejected": -2.2336881160736084, - "logps/chosen": -227.04736328125, - "logps/rejected": -163.33116149902344, - "loss": 1423.0496, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.17147275805473328, - "rewards/margins": 0.07322325557470322, - "rewards/rejected": -0.2446960210800171, - "rewards/safe_rewards": -0.17245201766490936, - "rewards/unsafe_rewards": -0.1704934984445572, + "epoch": 0.74, + "learning_rate": 9.423250176072874e-08, + "logits/chosen": -1.3475579023361206, + "logits/rejected": -0.8516947031021118, + "logps/chosen": -245.6322021484375, + "logps/rejected": -219.94253540039062, + "loss": 11567.3188, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.5698672533035278, + "rewards/margins": 0.17955942451953888, + "rewards/rejected": -0.7494266629219055, + "rewards/safe_rewards": -0.5103119611740112, + "rewards/unsafe_rewards": -0.5987597703933716, "step": 690 }, { - "epoch": 0.19, - "learning_rate": 4.888964511604635e-07, - "logits/chosen": -2.397960662841797, - "logits/rejected": -2.265415668487549, - "logps/chosen": -220.89010620117188, - "logps/rejected": -177.60342407226562, - "loss": 1424.6546, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.11983241140842438, - "rewards/margins": 0.08319593220949173, - "rewards/rejected": -0.2030283510684967, - "rewards/safe_rewards": -0.10996539890766144, - "rewards/unsafe_rewards": -0.12969939410686493, + "epoch": 0.75, + "learning_rate": 8.699597598680753e-08, + "logits/chosen": -1.2689791917800903, + "logits/rejected": -0.7626538276672363, + "logps/chosen": -264.85296630859375, + "logps/rejected": -247.6870574951172, + "loss": 10655.6469, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.545720100402832, + "rewards/margins": 0.2018158882856369, + "rewards/rejected": -0.7475360035896301, + "rewards/safe_rewards": -0.4882515072822571, + "rewards/unsafe_rewards": -0.5326446294784546, "step": 700 }, { - "epoch": 0.19, - "learning_rate": 4.882038235400778e-07, - "logits/chosen": -2.4046547412872314, - "logits/rejected": -2.3042712211608887, - "logps/chosen": -292.0196838378906, - "logps/rejected": -202.9504852294922, - "loss": 1604.4137, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.14213845133781433, - "rewards/margins": 0.07890886813402176, - "rewards/rejected": -0.2210473120212555, - "rewards/safe_rewards": -0.1426847279071808, - "rewards/unsafe_rewards": -0.14159215986728668, + "epoch": 0.76, + "learning_rate": 7.998961262881506e-08, + "logits/chosen": -1.1607654094696045, + "logits/rejected": -0.5871553421020508, + "logps/chosen": -261.75592041015625, + "logps/rejected": -231.9571075439453, + "loss": 11412.2789, + "rewards/accuracies": 0.671875, + "rewards/chosen": -0.5627632141113281, + "rewards/margins": 0.17224544286727905, + "rewards/rejected": -0.7350085973739624, + "rewards/safe_rewards": -0.5951864719390869, + "rewards/unsafe_rewards": -0.5435152649879456, "step": 710 }, { - "epoch": 0.19, - "learning_rate": 4.874907628469068e-07, - "logits/chosen": -2.4545702934265137, - "logits/rejected": -2.370619297027588, - "logps/chosen": -225.6560516357422, - "logps/rejected": -189.28976440429688, - "loss": 1546.1886, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.13647596538066864, - "rewards/margins": 0.060147859156131744, - "rewards/rejected": -0.19662383198738098, - "rewards/safe_rewards": -0.13966143131256104, - "rewards/unsafe_rewards": -0.13329051434993744, + "epoch": 0.77, + "learning_rate": 7.322330470336313e-08, + "logits/chosen": -1.0268478393554688, + "logits/rejected": -0.5913484692573547, + "logps/chosen": -246.5682373046875, + "logps/rejected": -243.531494140625, + "loss": 11226.3484, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.5322012901306152, + "rewards/margins": 0.20058274269104004, + "rewards/rejected": -0.7327840924263, + "rewards/safe_rewards": -0.49175509810447693, + "rewards/unsafe_rewards": -0.5365317463874817, "step": 720 }, { - "epoch": 0.19, - "learning_rate": 4.867573302471433e-07, - "logits/chosen": -2.465315103530884, - "logits/rejected": -2.337440013885498, - "logps/chosen": -211.9778289794922, - "logps/rejected": -169.18666076660156, - "loss": 1432.6039, + "epoch": 0.79, + "learning_rate": 6.67066062677118e-08, + "logits/chosen": -1.1494791507720947, + "logits/rejected": -0.6350758075714111, + "logps/chosen": -267.52545166015625, + "logps/rejected": -236.26150512695312, + "loss": 11859.0688, "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.1557946652173996, - "rewards/margins": 0.06730376929044724, - "rewards/rejected": -0.22309847176074982, - "rewards/safe_rewards": -0.1758725792169571, - "rewards/unsafe_rewards": -0.13571679592132568, + "rewards/chosen": -0.5086154937744141, + "rewards/margins": 0.16134929656982422, + "rewards/rejected": -0.6699647307395935, + "rewards/safe_rewards": -0.5048638582229614, + "rewards/unsafe_rewards": -0.5199515223503113, "step": 730 }, { - "epoch": 0.2, - "learning_rate": 4.860035886544781e-07, - "logits/chosen": -2.4883487224578857, - "logits/rejected": -2.3331525325775146, - "logps/chosen": -223.3091278076172, - "logps/rejected": -192.04769897460938, - "loss": 1513.5076, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.11871711909770966, - "rewards/margins": 0.06358304619789124, - "rewards/rejected": -0.18230018019676208, - "rewards/safe_rewards": -0.12802672386169434, - "rewards/unsafe_rewards": -0.10940754413604736, + "epoch": 0.8, + "learning_rate": 6.044871892939746e-08, + "logits/chosen": -1.1767222881317139, + "logits/rejected": -0.7402639389038086, + "logps/chosen": -263.974365234375, + "logps/rejected": -254.4547576904297, + "loss": 11326.2469, + "rewards/accuracies": 0.6968749761581421, + "rewards/chosen": -0.5166382193565369, + "rewards/margins": 0.19398480653762817, + "rewards/rejected": -0.7106229662895203, + "rewards/safe_rewards": -0.5084013342857361, + "rewards/unsafe_rewards": -0.5374983549118042, "step": 740 }, { - "epoch": 0.2, - "learning_rate": 4.852296027247031e-07, - "logits/chosen": -2.3702211380004883, - "logits/rejected": -2.3365530967712402, - "logps/chosen": -176.70053100585938, - "logps/rejected": -187.47071838378906, - "loss": 1308.6031, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": -0.10411804914474487, - "rewards/margins": 0.09238660335540771, - "rewards/rejected": -0.1965046375989914, - "rewards/safe_rewards": -0.09727554023265839, - "rewards/unsafe_rewards": -0.11096054315567017, + "epoch": 0.81, + "learning_rate": 5.44584788535217e-08, + "logits/chosen": -1.2126829624176025, + "logits/rejected": -0.7891534566879272, + "logps/chosen": -240.85501098632812, + "logps/rejected": -248.0969696044922, + "loss": 10754.8312, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.5264581441879272, + "rewards/margins": 0.19370315968990326, + "rewards/rejected": -0.7201613187789917, + "rewards/safe_rewards": -0.5348232984542847, + "rewards/unsafe_rewards": -0.5239699482917786, "step": 750 }, { - "epoch": 0.2, - "learning_rate": 4.844354388501649e-07, - "logits/chosen": -2.4354963302612305, - "logits/rejected": -2.2956669330596924, - "logps/chosen": -262.2779541015625, - "logps/rejected": -232.6850128173828, - "loss": 1492.725, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.10968796163797379, - "rewards/margins": 0.0626462996006012, - "rewards/rejected": -0.17233426868915558, - "rewards/safe_rewards": -0.12602491676807404, - "rewards/unsafe_rewards": -0.09335100650787354, + "epoch": 0.82, + "learning_rate": 4.8744344286046236e-08, + "logits/chosen": -1.2362258434295654, + "logits/rejected": -0.8547051548957825, + "logps/chosen": -263.15228271484375, + "logps/rejected": -246.9292755126953, + "loss": 11309.7617, + "rewards/accuracies": 0.6781250238418579, + "rewards/chosen": -0.5227540135383606, + "rewards/margins": 0.17927643656730652, + "rewards/rejected": -0.7020304203033447, + "rewards/safe_rewards": -0.5562368035316467, + "rewards/unsafe_rewards": -0.4761616587638855, "step": 760 }, { - "epoch": 0.2, - "learning_rate": 4.836211651540701e-07, - "logits/chosen": -2.481475830078125, - "logits/rejected": -2.2871978282928467, - "logps/chosen": -247.9012908935547, - "logps/rejected": -183.21054077148438, - "loss": 1427.1648, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.140279158949852, - "rewards/margins": 0.09081730991601944, - "rewards/rejected": -0.23109646141529083, - "rewards/safe_rewards": -0.1487794667482376, - "rewards/unsafe_rewards": -0.13177883625030518, + "epoch": 0.83, + "learning_rate": 4.331438361071163e-08, + "logits/chosen": -1.2906804084777832, + "logits/rejected": -0.957544207572937, + "logps/chosen": -268.247314453125, + "logps/rejected": -259.8026428222656, + "loss": 11313.5938, + "rewards/accuracies": 0.671875, + "rewards/chosen": -0.5142266154289246, + "rewards/margins": 0.1602473258972168, + "rewards/rejected": -0.6744739413261414, + "rewards/safe_rewards": -0.5065894722938538, + "rewards/unsafe_rewards": -0.4688842296600342, "step": 770 }, { - "epoch": 0.21, - "learning_rate": 4.827868514846412e-07, - "logits/chosen": -2.443751811981201, - "logits/rejected": -2.2596352100372314, - "logps/chosen": -232.2360382080078, - "logps/rejected": -214.2128448486328, - "loss": 1542.5896, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.16030152142047882, - "rewards/margins": 0.09534521400928497, - "rewards/rejected": -0.2556467652320862, - "rewards/safe_rewards": -0.1563524305820465, - "rewards/unsafe_rewards": -0.16425058245658875, + "epoch": 0.84, + "learning_rate": 3.817626395644305e-08, + "logits/chosen": -1.268654704093933, + "logits/rejected": -0.8672334551811218, + "logps/chosen": -258.80328369140625, + "logps/rejected": -236.2267608642578, + "loss": 11103.4828, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.5394198894500732, + "rewards/margins": 0.16181787848472595, + "rewards/rejected": -0.7012377381324768, + "rewards/safe_rewards": -0.5342508554458618, + "rewards/unsafe_rewards": -0.5185974836349487, "step": 780 }, { - "epoch": 0.21, - "learning_rate": 4.819325694091257e-07, - "logits/chosen": -2.411376476287842, - "logits/rejected": -2.2682785987854004, - "logps/chosen": -229.03219604492188, - "logps/rejected": -215.0069122314453, - "loss": 1477.9084, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.14824745059013367, - "rewards/margins": 0.10189710557460785, - "rewards/rejected": -0.2501445412635803, - "rewards/safe_rewards": -0.15226349234580994, - "rewards/unsafe_rewards": -0.1442314088344574, + "epoch": 0.85, + "learning_rate": 3.333724037132976e-08, + "logits/chosen": -1.2186919450759888, + "logits/rejected": -0.8310176134109497, + "logps/chosen": -249.523681640625, + "logps/rejected": -241.11837768554688, + "loss": 11373.9438, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -0.5439727306365967, + "rewards/margins": 0.15489128232002258, + "rewards/rejected": -0.6988639831542969, + "rewards/safe_rewards": -0.5334004163742065, + "rewards/unsafe_rewards": -0.5643824934959412, "step": 790 }, { - "epoch": 0.21, - "learning_rate": 4.810583922076559e-07, - "logits/chosen": -2.4613633155822754, - "logits/rejected": -2.35801362991333, - "logps/chosen": -207.6479949951172, - "logps/rejected": -187.14523315429688, - "loss": 1421.3367, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.13394489884376526, - "rewards/margins": 0.05777890235185623, - "rewards/rejected": -0.19172382354736328, - "rewards/safe_rewards": -0.11364112794399261, - "rewards/unsafe_rewards": -0.1542486697435379, + "epoch": 0.86, + "learning_rate": 2.880414557846453e-08, + "logits/chosen": -1.2295588254928589, + "logits/rejected": -0.822035014629364, + "logps/chosen": -279.32745361328125, + "logps/rejected": -260.31451416015625, + "loss": 10643.775, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.5332778692245483, + "rewards/margins": 0.1923495978116989, + "rewards/rejected": -0.7256275415420532, + "rewards/safe_rewards": -0.5375648736953735, + "rewards/unsafe_rewards": -0.5242511630058289, "step": 800 }, { - "epoch": 0.21, - "learning_rate": 4.801643948669649e-07, - "logits/chosen": -2.463087320327759, - "logits/rejected": -2.353064775466919, - "logps/chosen": -234.8894500732422, - "logps/rejected": -220.2030029296875, - "loss": 1476.6214, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.15638719499111176, - "rewards/margins": 0.07367660105228424, - "rewards/rejected": -0.2300637662410736, - "rewards/safe_rewards": -0.16454178094863892, - "rewards/unsafe_rewards": -0.1482326090335846, + "epoch": 0.87, + "learning_rate": 2.4583380328107805e-08, + "logits/chosen": -1.1883349418640137, + "logits/rejected": -0.6885582804679871, + "logps/chosen": -264.3476257324219, + "logps/rejected": -232.583984375, + "loss": 11377.8617, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -0.5739254355430603, + "rewards/margins": 0.1415238082408905, + "rewards/rejected": -0.7154492139816284, + "rewards/safe_rewards": -0.5949059724807739, + "rewards/unsafe_rewards": -0.5514267086982727, "step": 810 }, { - "epoch": 0.22, - "learning_rate": 4.79250654073952e-07, - "logits/chosen": -2.3803513050079346, - "logits/rejected": -2.288015842437744, - "logps/chosen": -215.7942657470703, - "logps/rejected": -170.42996215820312, - "loss": 1670.1777, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.12045584619045258, - "rewards/margins": 0.08401370048522949, - "rewards/rejected": -0.20446953177452087, - "rewards/safe_rewards": -0.12237007915973663, - "rewards/unsafe_rewards": -0.11854162067174911, + "epoch": 0.88, + "learning_rate": 2.068090435979958e-08, + "logits/chosen": -1.1781421899795532, + "logits/rejected": -0.8313497304916382, + "logps/chosen": -257.832275390625, + "logps/rejected": -236.805908203125, + "loss": 11513.0695, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.5257682204246521, + "rewards/margins": 0.20051494240760803, + "rewards/rejected": -0.7262831926345825, + "rewards/safe_rewards": -0.5059546828269958, + "rewards/unsafe_rewards": -0.5593279004096985, "step": 820 }, { - "epoch": 0.22, - "learning_rate": 4.783172482091061e-07, - "logits/chosen": -2.472538471221924, - "logits/rejected": -2.264540433883667, - "logps/chosen": -236.84506225585938, - "logps/rejected": -167.0760498046875, - "loss": 1358.332, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.08792787790298462, - "rewards/margins": 0.09938761591911316, - "rewards/rejected": -0.18731549382209778, - "rewards/safe_rewards": -0.08768352121114731, - "rewards/unsafe_rewards": -0.08817222714424133, + "epoch": 0.89, + "learning_rate": 1.710222798718028e-08, + "logits/chosen": -1.2206294536590576, + "logits/rejected": -0.7893603444099426, + "logps/chosen": -244.8930206298828, + "logps/rejected": -253.31832885742188, + "loss": 11328.5289, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.5127435922622681, + "rewards/margins": 0.19607791304588318, + "rewards/rejected": -0.7088214755058289, + "rewards/safe_rewards": -0.5048956871032715, + "rewards/unsafe_rewards": -0.4982619285583496, "step": 830 }, { - "epoch": 0.22, - "learning_rate": 4.773642573397817e-07, - "logits/chosen": -2.4241604804992676, - "logits/rejected": -2.3057217597961426, - "logps/chosen": -204.02590942382812, - "logps/rejected": -196.35165405273438, - "loss": 1507.3554, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.14191725850105286, - "rewards/margins": 0.10284604877233505, - "rewards/rejected": -0.2447633296251297, - "rewards/safe_rewards": -0.1281767040491104, - "rewards/unsafe_rewards": -0.1556578129529953, + "epoch": 0.9, + "learning_rate": 1.3852404317403199e-08, + "logits/chosen": -1.2235485315322876, + "logits/rejected": -0.8346702456474304, + "logps/chosen": -251.68283081054688, + "logps/rejected": -244.3260498046875, + "loss": 11788.0469, + "rewards/accuracies": 0.671875, + "rewards/chosen": -0.528911292552948, + "rewards/margins": 0.16417936980724335, + "rewards/rejected": -0.6930907368659973, + "rewards/safe_rewards": -0.5117666125297546, + "rewards/unsafe_rewards": -0.5516358017921448, "step": 840 }, { - "epoch": 0.23, - "learning_rate": 4.7639176321333074e-07, - "logits/chosen": -2.4255623817443848, - "logits/rejected": -2.3934948444366455, - "logps/chosen": -187.81924438476562, - "logps/rejected": -222.54379272460938, - "loss": 1642.2771, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.11979615688323975, - "rewards/margins": 0.06656758487224579, - "rewards/rejected": -0.18636374175548553, - "rewards/safe_rewards": -0.09374654293060303, - "rewards/unsafe_rewards": -0.14584577083587646, + "epoch": 0.91, + "learning_rate": 1.0936022116124321e-08, + "logits/chosen": -1.2849012613296509, + "logits/rejected": -0.789454460144043, + "logps/chosen": -260.54486083984375, + "logps/rejected": -234.87911987304688, + "loss": 11366.6031, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.5101709961891174, + "rewards/margins": 0.21243497729301453, + "rewards/rejected": -0.7226060032844543, + "rewards/safe_rewards": -0.5518566370010376, + "rewards/unsafe_rewards": -0.5282014012336731, "step": 850 }, { - "epoch": 0.23, - "learning_rate": 4.7539984925009046e-07, - "logits/chosen": -2.4411637783050537, - "logits/rejected": -2.215834140777588, - "logps/chosen": -256.6996765136719, - "logps/rejected": -205.2888946533203, - "loss": 1407.9803, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.10490177571773529, - "rewards/margins": 0.10948599874973297, - "rewards/rejected": -0.21438780426979065, - "rewards/safe_rewards": -0.12185828387737274, - "rewards/unsafe_rewards": -0.08794528245925903, + "epoch": 0.93, + "learning_rate": 8.357199328144576e-09, + "logits/chosen": -1.184933066368103, + "logits/rejected": -0.7943442463874817, + "logps/chosen": -261.4819641113281, + "logps/rejected": -252.32974243164062, + "loss": 10942.9906, + "rewards/accuracies": 0.6781250238418579, + "rewards/chosen": -0.5542179346084595, + "rewards/margins": 0.1730877161026001, + "rewards/rejected": -0.7273055911064148, + "rewards/safe_rewards": -0.5397693514823914, + "rewards/unsafe_rewards": -0.573650062084198, "step": 860 }, { - "epoch": 0.23, - "learning_rate": 4.743886005362273e-07, - "logits/chosen": -2.5367350578308105, - "logits/rejected": -2.3308494091033936, - "logps/chosen": -196.14984130859375, - "logps/rejected": -164.1910400390625, - "loss": 1417.3312, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.1225946918129921, - "rewards/margins": 0.06280263513326645, - "rewards/rejected": -0.18539731204509735, - "rewards/safe_rewards": -0.10954533517360687, - "rewards/unsafe_rewards": -0.13564400374889374, + "epoch": 0.94, + "learning_rate": 6.119577262853254e-09, + "logits/chosen": -1.2122161388397217, + "logits/rejected": -0.900853157043457, + "logps/chosen": -234.4072723388672, + "logps/rejected": -235.3949737548828, + "loss": 11823.2984, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.5236800312995911, + "rewards/margins": 0.18882396817207336, + "rewards/rejected": -0.712503969669342, + "rewards/safe_rewards": -0.5278108716011047, + "rewards/unsafe_rewards": -0.5694692134857178, "step": 870 }, { - "epoch": 0.23, - "learning_rate": 4.733581038164387e-07, - "logits/chosen": -2.3952066898345947, - "logits/rejected": -2.3238577842712402, - "logps/chosen": -197.02769470214844, - "logps/rejected": -169.38760375976562, - "loss": 1470.4297, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.11463264375925064, - "rewards/margins": 0.07264948636293411, - "rewards/rejected": -0.18728210031986237, - "rewards/safe_rewards": -0.13909080624580383, - "rewards/unsafe_rewards": -0.09017445892095566, + "epoch": 0.95, + "learning_rate": 4.226315452682816e-09, + "logits/chosen": -1.232089638710022, + "logits/rejected": -0.7402358055114746, + "logps/chosen": -262.76580810546875, + "logps/rejected": -243.03829956054688, + "loss": 10768.5484, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.5418084859848022, + "rewards/margins": 0.17788514494895935, + "rewards/rejected": -0.719693660736084, + "rewards/safe_rewards": -0.5552460551261902, + "rewards/unsafe_rewards": -0.535815417766571, "step": 880 }, { - "epoch": 0.24, - "learning_rate": 4.7230844748651164e-07, - "logits/chosen": -2.4748780727386475, - "logits/rejected": -2.2969536781311035, - "logps/chosen": -244.0203857421875, - "logps/rejected": -205.35171508789062, - "loss": 1345.2395, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.08670853078365326, - "rewards/margins": 0.09622689336538315, - "rewards/rejected": -0.182935431599617, - "rewards/safe_rewards": -0.09526154398918152, - "rewards/unsafe_rewards": -0.078155517578125, + "epoch": 0.96, + "learning_rate": 2.6800871918346846e-09, + "logits/chosen": -1.2783154249191284, + "logits/rejected": -0.8303347826004028, + "logps/chosen": -254.61483764648438, + "logps/rejected": -248.0339813232422, + "loss": 11688.1625, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.5256968140602112, + "rewards/margins": 0.19851632416248322, + "rewards/rejected": -0.7242131233215332, + "rewards/safe_rewards": -0.5289116501808167, + "rewards/unsafe_rewards": -0.5006402730941772, "step": 890 }, - { - "epoch": 0.24, - "learning_rate": 4.7123972158574067e-07, - "logits/chosen": -2.449808120727539, - "logits/rejected": -2.390338182449341, - "logps/chosen": -199.15823364257812, - "logps/rejected": -167.0094757080078, - "loss": 1584.2579, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.11253806203603745, - "rewards/margins": 0.06539806723594666, - "rewards/rejected": -0.1779361218214035, - "rewards/safe_rewards": -0.10013983398675919, - "rewards/unsafe_rewards": -0.1249362975358963, - "step": 900 - }, - { - "epoch": 0.24, - "learning_rate": 4.7015201778920375e-07, - "logits/chosen": -2.4893672466278076, - "logits/rejected": -2.354015350341797, - "logps/chosen": -194.4624481201172, - "logps/rejected": -178.14991760253906, - "loss": 1358.5419, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.10997853428125381, - "rewards/margins": 0.09477359056472778, - "rewards/rejected": -0.204752117395401, - "rewards/safe_rewards": -0.11922027170658112, - "rewards/unsafe_rewards": -0.10073678195476532, - "step": 910 - }, - { - "epoch": 0.24, - "learning_rate": 4.690454293998988e-07, - "logits/chosen": -2.4665284156799316, - "logits/rejected": -2.3531289100646973, - "logps/chosen": -193.0448760986328, - "logps/rejected": -162.0513916015625, - "loss": 1342.7469, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.13531403243541718, - "rewards/margins": 0.06263645738363266, - "rewards/rejected": -0.19795049726963043, - "rewards/safe_rewards": -0.11679371446371078, - "rewards/unsafe_rewards": -0.15383434295654297, - "step": 920 - }, - { - "epoch": 0.25, - "learning_rate": 4.679200513407399e-07, - "logits/chosen": -2.522549629211426, - "logits/rejected": -2.356151580810547, - "logps/chosen": -219.11721801757812, - "logps/rejected": -203.88504028320312, - "loss": 1496.7359, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.1072319746017456, - "rewards/margins": 0.07359454035758972, - "rewards/rejected": -0.18082651495933533, - "rewards/safe_rewards": -0.1036672592163086, - "rewards/unsafe_rewards": -0.11079667508602142, - "step": 930 - }, - { - "epoch": 0.25, - "learning_rate": 4.667759801464153e-07, - "logits/chosen": -2.4822139739990234, - "logits/rejected": -2.408947229385376, - "logps/chosen": -225.04776000976562, - "logps/rejected": -201.6161346435547, - "loss": 1407.0704, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.061927296221256256, - "rewards/margins": 0.08713509887456894, - "rewards/rejected": -0.1490623950958252, - "rewards/safe_rewards": -0.0739717110991478, - "rewards/unsafe_rewards": -0.04988287016749382, - "step": 940 - }, - { - "epoch": 0.25, - "learning_rate": 4.6561331395510593e-07, - "logits/chosen": -2.497800827026367, - "logits/rejected": -2.403398036956787, - "logps/chosen": -214.0504608154297, - "logps/rejected": -213.86215209960938, - "loss": 1367.0844, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": -0.05656176060438156, - "rewards/margins": 0.11882082372903824, - "rewards/rejected": -0.175382599234581, - "rewards/safe_rewards": -0.03590674698352814, - "rewards/unsafe_rewards": -0.07721678167581558, - "step": 950 - }, - { - "epoch": 0.25, - "learning_rate": 4.64432152500068e-07, - "logits/chosen": -2.4828553199768066, - "logits/rejected": -2.4612109661102295, - "logps/chosen": -197.10595703125, - "logps/rejected": -182.43344116210938, - "loss": 1848.5477, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.10623642057180405, - "rewards/margins": 0.05376746505498886, - "rewards/rejected": -0.1600038707256317, - "rewards/safe_rewards": -0.13910634815692902, - "rewards/unsafe_rewards": -0.07336650788784027, - "step": 960 - }, - { - "epoch": 0.26, - "learning_rate": 4.632325971010771e-07, - "logits/chosen": -2.4305388927459717, - "logits/rejected": -2.4263224601745605, - "logps/chosen": -207.3190155029297, - "logps/rejected": -200.54698181152344, - "loss": 1384.7666, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.08703888952732086, - "rewards/margins": 0.08829597383737564, - "rewards/rejected": -0.1753348857164383, - "rewards/safe_rewards": -0.09495584666728973, - "rewards/unsafe_rewards": -0.0791219174861908, - "step": 970 - }, - { - "epoch": 0.26, - "learning_rate": 4.620147506557375e-07, - "logits/chosen": -2.390660524368286, - "logits/rejected": -2.3073678016662598, - "logps/chosen": -187.18136596679688, - "logps/rejected": -202.09402465820312, - "loss": 1488.5536, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.15783900022506714, - "rewards/margins": 0.06464293599128723, - "rewards/rejected": -0.22248193621635437, - "rewards/safe_rewards": -0.1454911231994629, - "rewards/unsafe_rewards": -0.1701868772506714, - "step": 980 - }, - { - "epoch": 0.26, - "learning_rate": 4.607787176306557e-07, - "logits/chosen": -2.42850661277771, - "logits/rejected": -2.3657379150390625, - "logps/chosen": -213.331298828125, - "logps/rejected": -185.62588500976562, - "loss": 1336.0273, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.12769411504268646, - "rewards/margins": 0.06851659715175629, - "rewards/rejected": -0.19621069729328156, - "rewards/safe_rewards": -0.12298530340194702, - "rewards/unsafe_rewards": -0.1324029117822647, - "step": 990 - }, - { - "epoch": 0.27, - "learning_rate": 4.595246040524788e-07, - "logits/chosen": -2.461402416229248, - "logits/rejected": -2.3818717002868652, - "logps/chosen": -236.6735382080078, - "logps/rejected": -198.68966674804688, - "loss": 1379.832, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.13558301329612732, - "rewards/margins": 0.06149093434214592, - "rewards/rejected": -0.19707392156124115, - "rewards/safe_rewards": -0.10347513109445572, - "rewards/unsafe_rewards": -0.16769087314605713, - "step": 1000 - }, - { - "epoch": 0.27, - "eval_logits/chosen": -2.2674007415771484, - "eval_logits/rejected": -2.1098780632019043, - "eval_logps/chosen": -155.80995178222656, - "eval_logps/rejected": -122.25984954833984, - "eval_loss": 1014.7579956054688, - "eval_rewards/accuracies": 0.6632071137428284, - "eval_rewards/chosen": -0.15460406243801117, - "eval_rewards/margins": 0.046664148569107056, - "eval_rewards/rejected": -0.20126819610595703, - "eval_rewards/safe_rewards": -0.15221725404262543, - "eval_rewards/unsafe_rewards": -0.150146484375, - "eval_runtime": 2349.1081, - "eval_samples_per_second": 14.918, - "eval_steps_per_second": 0.467, - "step": 1000 - }, - { - "epoch": 0.27, - "learning_rate": 4.582525174988e-07, - "logits/chosen": -2.4191925525665283, - "logits/rejected": -2.3445022106170654, - "logps/chosen": -256.0183410644531, - "logps/rejected": -187.84774780273438, - "loss": 1368.0787, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.10282210260629654, - "rewards/margins": 0.0956369936466217, - "rewards/rejected": -0.19845911860466003, - "rewards/safe_rewards": -0.10070834308862686, - "rewards/unsafe_rewards": -0.10493586212396622, - "step": 1010 - }, - { - "epoch": 0.27, - "learning_rate": 4.5696256708893044e-07, - "logits/chosen": -2.3807530403137207, - "logits/rejected": -2.296288013458252, - "logps/chosen": -194.9390411376953, - "logps/rejected": -176.4541473388672, - "loss": 1532.8088, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.10998891294002533, - "rewards/margins": 0.0663776695728302, - "rewards/rejected": -0.17636659741401672, - "rewards/safe_rewards": -0.11625208705663681, - "rewards/unsafe_rewards": -0.10372574627399445, - "step": 1020 - }, - { - "epoch": 0.27, - "learning_rate": 4.5565486347453914e-07, - "logits/chosen": -2.4518191814422607, - "logits/rejected": -2.316587209701538, - "logps/chosen": -235.9738006591797, - "logps/rejected": -178.3621826171875, - "loss": 1477.7941, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.09197047352790833, - "rewards/margins": 0.063409224152565, - "rewards/rejected": -0.15537969768047333, - "rewards/safe_rewards": -0.11687271296977997, - "rewards/unsafe_rewards": -0.06706823408603668, - "step": 1030 - }, - { - "epoch": 0.28, - "learning_rate": 4.543295188301609e-07, - "logits/chosen": -2.406092882156372, - "logits/rejected": -2.2680728435516357, - "logps/chosen": -217.89208984375, - "logps/rejected": -212.4451446533203, - "loss": 1449.3331, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.08368674665689468, - "rewards/margins": 0.08917780965566635, - "rewards/rejected": -0.17286452651023865, - "rewards/safe_rewards": -0.061650414019823074, - "rewards/unsafe_rewards": -0.1057230681180954, - "step": 1040 - }, - { - "epoch": 0.28, - "learning_rate": 4.5298664684357456e-07, - "logits/chosen": -2.4532692432403564, - "logits/rejected": -2.3210701942443848, - "logps/chosen": -214.07809448242188, - "logps/rejected": -189.19981384277344, - "loss": 1454.1278, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.08185698091983795, - "rewards/margins": 0.08191980421543121, - "rewards/rejected": -0.16377678513526917, - "rewards/safe_rewards": -0.07532165944576263, - "rewards/unsafe_rewards": -0.08839228749275208, - "step": 1050 - }, - { - "epoch": 0.28, - "learning_rate": 4.5162636270605057e-07, - "logits/chosen": -2.4193339347839355, - "logits/rejected": -2.351714849472046, - "logps/chosen": -235.9359588623047, - "logps/rejected": -204.4629364013672, - "loss": 1518.3445, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.0704740658402443, - "rewards/margins": 0.06885071098804474, - "rewards/rejected": -0.13932478427886963, - "rewards/safe_rewards": -0.06750814616680145, - "rewards/unsafe_rewards": -0.07344000041484833, - "step": 1060 - }, - { - "epoch": 0.28, - "learning_rate": 4.5024878310246974e-07, - "logits/chosen": -2.3434391021728516, - "logits/rejected": -2.306189775466919, - "logps/chosen": -195.5021209716797, - "logps/rejected": -175.94754028320312, - "loss": 1423.9838, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.10351769626140594, - "rewards/margins": 0.07599927484989166, - "rewards/rejected": -0.1795169711112976, - "rewards/safe_rewards": -0.0999629870057106, - "rewards/unsafe_rewards": -0.10707239806652069, - "step": 1070 - }, - { - "epoch": 0.29, - "learning_rate": 4.488540262013144e-07, - "logits/chosen": -2.456278085708618, - "logits/rejected": -2.361643075942993, - "logps/chosen": -178.5478973388672, - "logps/rejected": -153.2827911376953, - "loss": 1301.167, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.15519826114177704, - "rewards/margins": 0.05980435758829117, - "rewards/rejected": -0.2150026261806488, - "rewards/safe_rewards": -0.1449848711490631, - "rewards/unsafe_rewards": -0.16541168093681335, - "step": 1080 - }, - { - "epoch": 0.29, - "learning_rate": 4.474422116445319e-07, - "logits/chosen": -2.5001673698425293, - "logits/rejected": -2.3624913692474365, - "logps/chosen": -201.32215881347656, - "logps/rejected": -207.0705108642578, - "loss": 1596.0372, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.14134150743484497, - "rewards/margins": 0.08747919648885727, - "rewards/rejected": -0.22882068157196045, - "rewards/safe_rewards": -0.16872529685497284, - "rewards/unsafe_rewards": -0.11395768821239471, - "step": 1090 - }, - { - "epoch": 0.29, - "learning_rate": 4.460134605372712e-07, - "logits/chosen": -2.40895676612854, - "logits/rejected": -2.297924757003784, - "logps/chosen": -251.35595703125, - "logps/rejected": -206.1438446044922, - "loss": 1360.401, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.14097321033477783, - "rewards/margins": 0.11211182177066803, - "rewards/rejected": -0.25308501720428467, - "rewards/safe_rewards": -0.16153845191001892, - "rewards/unsafe_rewards": -0.12040798366069794, - "step": 1100 - }, - { - "epoch": 0.29, - "learning_rate": 4.445678954374955e-07, - "logits/chosen": -2.496764659881592, - "logits/rejected": -2.3692989349365234, - "logps/chosen": -242.0779571533203, - "logps/rejected": -201.42449951171875, - "loss": 1476.5371, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.173785001039505, - "rewards/margins": 0.06252308934926987, - "rewards/rejected": -0.23630812764167786, - "rewards/safe_rewards": -0.18339715898036957, - "rewards/unsafe_rewards": -0.16417287290096283, - "step": 1110 - }, - { - "epoch": 0.3, - "learning_rate": 4.4310564034546817e-07, - "logits/chosen": -2.4985318183898926, - "logits/rejected": -2.3886020183563232, - "logps/chosen": -198.29969787597656, - "logps/rejected": -186.4773712158203, - "loss": 1457.1787, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.16199859976768494, - "rewards/margins": 0.09775176644325256, - "rewards/rejected": -0.2597503960132599, - "rewards/safe_rewards": -0.15209682285785675, - "rewards/unsafe_rewards": -0.1719004064798355, - "step": 1120 - }, - { - "epoch": 0.3, - "learning_rate": 4.4162682069311676e-07, - "logits/chosen": -2.410400152206421, - "logits/rejected": -2.3624703884124756, - "logps/chosen": -205.81185913085938, - "logps/rejected": -184.69650268554688, - "loss": 1501.7895, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.12636300921440125, - "rewards/margins": 0.0889534279704094, - "rewards/rejected": -0.21531644463539124, - "rewards/safe_rewards": -0.11239534616470337, - "rewards/unsafe_rewards": -0.1403307020664215, - "step": 1130 - }, - { - "epoch": 0.3, - "learning_rate": 4.401315633332732e-07, - "logits/chosen": -2.4212632179260254, - "logits/rejected": -2.3753693103790283, - "logps/chosen": -181.30227661132812, - "logps/rejected": -191.0427703857422, - "loss": 1583.6492, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.15884152054786682, - "rewards/margins": 0.06376403570175171, - "rewards/rejected": -0.22260555624961853, - "rewards/safe_rewards": -0.15404076874256134, - "rewards/unsafe_rewards": -0.1636422872543335, - "step": 1140 - }, - { - "epoch": 0.31, - "learning_rate": 4.386199965287924e-07, - "logits/chosen": -2.4650323390960693, - "logits/rejected": -2.3289175033569336, - "logps/chosen": -180.3450927734375, - "logps/rejected": -171.68313598632812, - "loss": 1445.0677, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.12832841277122498, - "rewards/margins": 0.08213474601507187, - "rewards/rejected": -0.21046319603919983, - "rewards/safe_rewards": -0.1306324005126953, - "rewards/unsafe_rewards": -0.12602445483207703, - "step": 1150 - }, - { - "epoch": 0.31, - "learning_rate": 4.3709224994155003e-07, - "logits/chosen": -2.488968849182129, - "logits/rejected": -2.3922841548919678, - "logps/chosen": -172.24342346191406, - "logps/rejected": -156.87738037109375, - "loss": 1398.0659, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.17456158995628357, - "rewards/margins": 0.061097513884305954, - "rewards/rejected": -0.23565909266471863, - "rewards/safe_rewards": -0.18458858132362366, - "rewards/unsafe_rewards": -0.16453461349010468, - "step": 1160 - }, - { - "epoch": 0.31, - "learning_rate": 4.3554845462132e-07, - "logits/chosen": -2.4631857872009277, - "logits/rejected": -2.383366107940674, - "logps/chosen": -227.1753692626953, - "logps/rejected": -210.9671173095703, - "loss": 1101.0617, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.13971468806266785, - "rewards/margins": 0.10988861322402954, - "rewards/rejected": -0.249603271484375, - "rewards/safe_rewards": -0.1308557689189911, - "rewards/unsafe_rewards": -0.14857357740402222, - "step": 1170 - }, - { - "epoch": 0.31, - "learning_rate": 4.3398874299453296e-07, - "logits/chosen": -2.413116693496704, - "logits/rejected": -2.2758474349975586, - "logps/chosen": -231.05752563476562, - "logps/rejected": -198.37632751464844, - "loss": 1416.5892, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.21605177223682404, - "rewards/margins": 0.061402082443237305, - "rewards/rejected": -0.27745383977890015, - "rewards/safe_rewards": -0.21816666424274445, - "rewards/unsafe_rewards": -0.21393688023090363, - "step": 1180 - }, - { - "epoch": 0.32, - "learning_rate": 4.3241324885291715e-07, - "logits/chosen": -2.444124221801758, - "logits/rejected": -2.2451930046081543, - "logps/chosen": -256.014404296875, - "logps/rejected": -253.3253173828125, - "loss": 1429.2455, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.19230113923549652, - "rewards/margins": 0.09462329745292664, - "rewards/rejected": -0.28692445158958435, - "rewards/safe_rewards": -0.19065041840076447, - "rewards/unsafe_rewards": -0.19395187497138977, - "step": 1190 - }, - { - "epoch": 0.32, - "learning_rate": 4.3082210734202127e-07, - "logits/chosen": -2.294668674468994, - "logits/rejected": -2.2676291465759277, - "logps/chosen": -199.166259765625, - "logps/rejected": -202.34390258789062, - "loss": 1238.2642, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.21350398659706116, - "rewards/margins": 0.07355356961488724, - "rewards/rejected": -0.2870575487613678, - "rewards/safe_rewards": -0.22245021164417267, - "rewards/unsafe_rewards": -0.20455770194530487, - "step": 1200 - }, - { - "epoch": 0.32, - "learning_rate": 4.292154549496223e-07, - "logits/chosen": -2.358863353729248, - "logits/rejected": -2.1883394718170166, - "logps/chosen": -254.25607299804688, - "logps/rejected": -208.291015625, - "loss": 1331.5301, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.19084244966506958, - "rewards/margins": 0.12158522754907608, - "rewards/rejected": -0.31242766976356506, - "rewards/safe_rewards": -0.21271340548992157, - "rewards/unsafe_rewards": -0.1689714938402176, - "step": 1210 - }, - { - "epoch": 0.32, - "learning_rate": 4.27593429494017e-07, - "logits/chosen": -2.3900339603424072, - "logits/rejected": -2.301790714263916, - "logps/chosen": -225.9875946044922, - "logps/rejected": -175.69113159179688, - "loss": 1523.9205, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.2246415913105011, - "rewards/margins": 0.030890032649040222, - "rewards/rejected": -0.2555316388607025, - "rewards/safe_rewards": -0.21765148639678955, - "rewards/unsafe_rewards": -0.23163168132305145, - "step": 1220 - }, - { - "epoch": 0.33, - "learning_rate": 4.2595617011220043e-07, - "logits/chosen": -2.4709110260009766, - "logits/rejected": -2.365135669708252, - "logps/chosen": -186.94692993164062, - "logps/rejected": -162.97119140625, - "loss": 1331.2154, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.1904468536376953, - "rewards/margins": 0.08371935784816742, - "rewards/rejected": -0.27416619658470154, - "rewards/safe_rewards": -0.16846317052841187, - "rewards/unsafe_rewards": -0.21243055164813995, - "step": 1230 - }, - { - "epoch": 0.33, - "learning_rate": 4.243038172479303e-07, - "logits/chosen": -2.3970465660095215, - "logits/rejected": -2.2854819297790527, - "logps/chosen": -227.3101806640625, - "logps/rejected": -195.39407348632812, - "loss": 1520.9523, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.15089385211467743, - "rewards/margins": 0.0606367290019989, - "rewards/rejected": -0.21153059601783752, - "rewards/safe_rewards": -0.1677275449037552, - "rewards/unsafe_rewards": -0.13406017422676086, - "step": 1240 - }, - { - "epoch": 0.33, - "learning_rate": 4.2263651263968037e-07, - "logits/chosen": -2.374483585357666, - "logits/rejected": -2.2359116077423096, - "logps/chosen": -217.7379608154297, - "logps/rejected": -187.3532257080078, - "loss": 1397.1002, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.12568016350269318, - "rewards/margins": 0.13003575801849365, - "rewards/rejected": -0.255715936422348, - "rewards/safe_rewards": -0.13189849257469177, - "rewards/unsafe_rewards": -0.11946181952953339, - "step": 1250 - }, - { - "epoch": 0.33, - "learning_rate": 4.2095439930848145e-07, - "logits/chosen": -2.3493154048919678, - "logits/rejected": -2.3069698810577393, - "logps/chosen": -214.3734893798828, - "logps/rejected": -224.5817413330078, - "loss": 1592.5004, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.14046742022037506, - "rewards/margins": 0.049839336425065994, - "rewards/rejected": -0.19030675292015076, - "rewards/safe_rewards": -0.12052347511053085, - "rewards/unsafe_rewards": -0.16041132807731628, - "step": 1260 - }, - { - "epoch": 0.34, - "learning_rate": 4.192576215456536e-07, - "logits/chosen": -2.4549827575683594, - "logits/rejected": -2.3466134071350098, - "logps/chosen": -193.22189331054688, - "logps/rejected": -176.31777954101562, - "loss": 1623.3943, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.1287730634212494, - "rewards/margins": 0.06341226398944855, - "rewards/rejected": -0.19218535721302032, - "rewards/safe_rewards": -0.1322464942932129, - "rewards/unsafe_rewards": -0.12529967725276947, - "step": 1270 - }, - { - "epoch": 0.34, - "learning_rate": 4.175463249004285e-07, - "logits/chosen": -2.4853529930114746, - "logits/rejected": -2.3766117095947266, - "logps/chosen": -191.35366821289062, - "logps/rejected": -165.14627075195312, - "loss": 1252.7815, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.11302691698074341, - "rewards/margins": 0.11286221444606781, - "rewards/rejected": -0.22588913142681122, - "rewards/safe_rewards": -0.12079620361328125, - "rewards/unsafe_rewards": -0.10525763034820557, - "step": 1280 - }, - { - "epoch": 0.34, - "learning_rate": 4.1582065616746474e-07, - "logits/chosen": -2.385850429534912, - "logits/rejected": -2.3417530059814453, - "logps/chosen": -218.7587127685547, - "logps/rejected": -221.1854705810547, - "loss": 1431.1749, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.09808145463466644, - "rewards/margins": 0.10028932988643646, - "rewards/rejected": -0.1983707845211029, - "rewards/safe_rewards": -0.0970446914434433, - "rewards/unsafe_rewards": -0.09911822527647018, - "step": 1290 - }, - { - "epoch": 0.34, - "learning_rate": 4.1408076337425524e-07, - "logits/chosen": -2.492769241333008, - "logits/rejected": -2.391195774078369, - "logps/chosen": -211.1204071044922, - "logps/rejected": -172.81301879882812, - "loss": 1411.8387, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.07882460206747055, - "rewards/margins": 0.09220398962497711, - "rewards/rejected": -0.17102858424186707, - "rewards/safe_rewards": -0.08399553596973419, - "rewards/unsafe_rewards": -0.0736536905169487, - "step": 1300 - }, - { - "epoch": 0.35, - "learning_rate": 4.123267957684298e-07, - "logits/chosen": -2.5242888927459717, - "logits/rejected": -2.3797318935394287, - "logps/chosen": -223.8448028564453, - "logps/rejected": -174.67098999023438, - "loss": 1432.8117, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.13797348737716675, - "rewards/margins": 0.0580359622836113, - "rewards/rejected": -0.19600944221019745, - "rewards/safe_rewards": -0.15271785855293274, - "rewards/unsafe_rewards": -0.12322912365198135, - "step": 1310 - }, - { - "epoch": 0.35, - "learning_rate": 4.1055890380495276e-07, - "logits/chosen": -2.4992218017578125, - "logits/rejected": -2.4102420806884766, - "logps/chosen": -257.4089050292969, - "logps/rejected": -241.6340789794922, - "loss": 1529.5772, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.15251678228378296, - "rewards/margins": 0.088885597884655, - "rewards/rejected": -0.24140235781669617, - "rewards/safe_rewards": -0.14009423553943634, - "rewards/unsafe_rewards": -0.16493932902812958, - "step": 1320 - }, - { - "epoch": 0.35, - "learning_rate": 4.087772391332167e-07, - "logits/chosen": -2.4183342456817627, - "logits/rejected": -2.315364122390747, - "logps/chosen": -243.69442749023438, - "logps/rejected": -203.03564453125, - "loss": 1517.0742, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.12647080421447754, - "rewards/margins": 0.06979449093341827, - "rewards/rejected": -0.1962652951478958, - "rewards/safe_rewards": -0.1382220834493637, - "rewards/unsafe_rewards": -0.11471954733133316, - "step": 1330 - }, - { - "epoch": 0.36, - "learning_rate": 4.069819545840343e-07, - "logits/chosen": -2.5063109397888184, - "logits/rejected": -2.352687358856201, - "logps/chosen": -251.11776733398438, - "logps/rejected": -207.6747589111328, - "loss": 1363.18, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.1317937970161438, - "rewards/margins": 0.08164751529693604, - "rewards/rejected": -0.21344132721424103, - "rewards/safe_rewards": -0.13523849844932556, - "rewards/unsafe_rewards": -0.12834909558296204, - "step": 1340 - }, - { - "epoch": 0.36, - "learning_rate": 4.0517320415652824e-07, - "logits/chosen": -2.435338020324707, - "logits/rejected": -2.3286821842193604, - "logps/chosen": -228.76339721679688, - "logps/rejected": -210.0983428955078, - "loss": 1355.6145, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.10598523914813995, - "rewards/margins": 0.12724778056144714, - "rewards/rejected": -0.2332330197095871, - "rewards/safe_rewards": -0.11148416996002197, - "rewards/unsafe_rewards": -0.10048631578683853, - "step": 1350 - }, - { - "epoch": 0.36, - "learning_rate": 4.0335114300492146e-07, - "logits/chosen": -2.4345858097076416, - "logits/rejected": -2.3088791370391846, - "logps/chosen": -215.0584716796875, - "logps/rejected": -181.14930725097656, - "loss": 1397.8868, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.1455017328262329, - "rewards/margins": 0.07766609638929367, - "rewards/rejected": -0.2231678068637848, - "rewards/safe_rewards": -0.14700794219970703, - "rewards/unsafe_rewards": -0.14399553835391998, - "step": 1360 - }, - { - "epoch": 0.36, - "learning_rate": 4.01515927425228e-07, - "logits/chosen": -2.4371566772460938, - "logits/rejected": -2.3483874797821045, - "logps/chosen": -223.6343536376953, - "logps/rejected": -193.9138641357422, - "loss": 1287.7429, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.12743057310581207, - "rewards/margins": 0.08499239385128021, - "rewards/rejected": -0.21242296695709229, - "rewards/safe_rewards": -0.1057007759809494, - "rewards/unsafe_rewards": -0.14916035532951355, - "step": 1370 - }, - { - "epoch": 0.37, - "learning_rate": 3.9966771484184566e-07, - "logits/chosen": -2.2985615730285645, - "logits/rejected": -2.2442033290863037, - "logps/chosen": -224.89309692382812, - "logps/rejected": -214.9363250732422, - "loss": 1334.1297, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.13449935615062714, - "rewards/margins": 0.09597790241241455, - "rewards/rejected": -0.2304772585630417, - "rewards/safe_rewards": -0.15713343024253845, - "rewards/unsafe_rewards": -0.11186530441045761, - "step": 1380 - }, - { - "epoch": 0.37, - "learning_rate": 3.978066637940527e-07, - "logits/chosen": -2.2873928546905518, - "logits/rejected": -2.2894973754882812, - "logps/chosen": -220.40963745117188, - "logps/rejected": -205.80892944335938, - "loss": 1376.5348, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.1277601569890976, - "rewards/margins": 0.09008289873600006, - "rewards/rejected": -0.21784305572509766, - "rewards/safe_rewards": -0.12132422626018524, - "rewards/unsafe_rewards": -0.13419607281684875, - "step": 1390 - }, - { - "epoch": 0.37, - "learning_rate": 3.95932933922408e-07, - "logits/chosen": -2.43399977684021, - "logits/rejected": -2.3279237747192383, - "logps/chosen": -206.205322265625, - "logps/rejected": -192.04666137695312, - "loss": 1286.1419, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.1669175773859024, - "rewards/margins": 0.06349079310894012, - "rewards/rejected": -0.23040834069252014, - "rewards/safe_rewards": -0.16002705693244934, - "rewards/unsafe_rewards": -0.17380808293819427, - "step": 1400 - }, - { - "epoch": 0.37, - "learning_rate": 3.940466859550573e-07, - "logits/chosen": -2.490708112716675, - "logits/rejected": -2.321770191192627, - "logps/chosen": -229.590087890625, - "logps/rejected": -185.96902465820312, - "loss": 1420.8066, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.16178865730762482, - "rewards/margins": 0.103298619389534, - "rewards/rejected": -0.2650873064994812, - "rewards/safe_rewards": -0.16527915000915527, - "rewards/unsafe_rewards": -0.15829813480377197, - "step": 1410 - }, - { - "epoch": 0.38, - "learning_rate": 3.9214808169394577e-07, - "logits/chosen": -2.4113211631774902, - "logits/rejected": -2.316403865814209, - "logps/chosen": -215.38583374023438, - "logps/rejected": -196.53634643554688, - "loss": 1355.2844, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.17774160206317902, - "rewards/margins": 0.09554930776357651, - "rewards/rejected": -0.2732909023761749, - "rewards/safe_rewards": -0.1823187917470932, - "rewards/unsafe_rewards": -0.17316441237926483, - "step": 1420 - }, - { - "epoch": 0.38, - "learning_rate": 3.902372840009387e-07, - "logits/chosen": -2.326296329498291, - "logits/rejected": -2.2835748195648193, - "logps/chosen": -194.90737915039062, - "logps/rejected": -251.5759735107422, - "loss": 1494.2578, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.1927250325679779, - "rewards/margins": 0.08211047947406769, - "rewards/rejected": -0.2748354971408844, - "rewards/safe_rewards": -0.18806278705596924, - "rewards/unsafe_rewards": -0.19738724827766418, - "step": 1430 - }, - { - "epoch": 0.38, - "learning_rate": 3.8831445678385173e-07, - "logits/chosen": -2.5008461475372314, - "logits/rejected": -2.332751989364624, - "logps/chosen": -229.41397094726562, - "logps/rejected": -199.62095642089844, - "loss": 1423.0473, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.16933095455169678, - "rewards/margins": 0.078728586435318, - "rewards/rejected": -0.24805955588817596, - "rewards/safe_rewards": -0.18283183872699738, - "rewards/unsafe_rewards": -0.15583007037639618, - "step": 1440 - }, - { - "epoch": 0.38, - "learning_rate": 3.8637976498239023e-07, - "logits/chosen": -2.414363384246826, - "logits/rejected": -2.351238489151001, - "logps/chosen": -212.896484375, - "logps/rejected": -197.0729217529297, - "loss": 1410.9084, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.16504549980163574, - "rewards/margins": 0.08142076432704926, - "rewards/rejected": -0.2464662492275238, - "rewards/safe_rewards": -0.16218319535255432, - "rewards/unsafe_rewards": -0.16790780425071716, - "step": 1450 - }, - { - "epoch": 0.39, - "learning_rate": 3.844333745540009e-07, - "logits/chosen": -2.433485269546509, - "logits/rejected": -2.279266834259033, - "logps/chosen": -230.96707153320312, - "logps/rejected": -181.1007537841797, - "loss": 1321.0371, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.1405428647994995, - "rewards/margins": 0.10154906660318375, - "rewards/rejected": -0.24209193885326385, - "rewards/safe_rewards": -0.1429021954536438, - "rewards/unsafe_rewards": -0.13818354904651642, - "step": 1460 - }, - { - "epoch": 0.39, - "learning_rate": 3.8247545245963654e-07, - "logits/chosen": -2.3225536346435547, - "logits/rejected": -2.2147715091705322, - "logps/chosen": -195.6408233642578, - "logps/rejected": -200.17922973632812, - "loss": 1268.9027, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1782570630311966, - "rewards/margins": 0.0737292543053627, - "rewards/rejected": -0.2519863247871399, - "rewards/safe_rewards": -0.18971732258796692, - "rewards/unsafe_rewards": -0.16679677367210388, - "step": 1470 - }, - { - "epoch": 0.39, - "learning_rate": 3.805061666494336e-07, - "logits/chosen": -2.4033913612365723, - "logits/rejected": -2.2910096645355225, - "logps/chosen": -196.27236938476562, - "logps/rejected": -175.74136352539062, - "loss": 1254.0469, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.13998866081237793, - "rewards/margins": 0.12497708946466446, - "rewards/rejected": -0.2649657428264618, - "rewards/safe_rewards": -0.12438102811574936, - "rewards/unsafe_rewards": -0.1555963158607483, - "step": 1480 - }, - { - "epoch": 0.4, - "learning_rate": 3.7852568604830535e-07, - "logits/chosen": -2.456437110900879, - "logits/rejected": -2.3128790855407715, - "logps/chosen": -223.6847381591797, - "logps/rejected": -198.30300903320312, - "loss": 1402.8721, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.1824633926153183, - "rewards/margins": 0.08513672649860382, - "rewards/rejected": -0.2676001191139221, - "rewards/safe_rewards": -0.17308323085308075, - "rewards/unsafe_rewards": -0.19184358417987823, - "step": 1490 - }, - { - "epoch": 0.4, - "learning_rate": 3.765341805414525e-07, - "logits/chosen": -2.446403741836548, - "logits/rejected": -2.3814282417297363, - "logps/chosen": -274.417236328125, - "logps/rejected": -249.2129364013672, - "loss": 1404.9199, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.1514512300491333, - "rewards/margins": 0.08991163969039917, - "rewards/rejected": -0.24136288464069366, - "rewards/safe_rewards": -0.15311299264431, - "rewards/unsafe_rewards": -0.1497894823551178, - "step": 1500 - }, - { - "epoch": 0.4, - "eval_logits/chosen": -2.216228723526001, - "eval_logits/rejected": -2.0713047981262207, - "eval_logps/chosen": -162.53675842285156, - "eval_logps/rejected": -128.91456604003906, - "eval_loss": 997.0103759765625, - "eval_rewards/accuracies": 0.6677691340446472, - "eval_rewards/chosen": -0.22187212109565735, - "eval_rewards/margins": 0.04594320431351662, - "eval_rewards/rejected": -0.2678152918815613, - "eval_rewards/safe_rewards": -0.21887338161468506, - "eval_rewards/unsafe_rewards": -0.21648073196411133, - "eval_runtime": 2349.0522, - "eval_samples_per_second": 14.918, - "eval_steps_per_second": 0.467, - "step": 1500 - }, - { - "epoch": 0.4, - "learning_rate": 3.7453182095978906e-07, - "logits/chosen": -2.3663973808288574, - "logits/rejected": -2.244286060333252, - "logps/chosen": -193.31201171875, - "logps/rejected": -202.28622436523438, - "loss": 1661.423, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.14921161532402039, - "rewards/margins": 0.054558612406253815, - "rewards/rejected": -0.2037702053785324, - "rewards/safe_rewards": -0.13778987526893616, - "rewards/unsafe_rewards": -0.160633385181427, - "step": 1510 - }, - { - "epoch": 0.4, - "learning_rate": 3.725187790652897e-07, - "logits/chosen": -2.416991949081421, - "logits/rejected": -2.3046176433563232, - "logps/chosen": -178.9683380126953, - "logps/rejected": -174.53445434570312, - "loss": 1374.422, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.14774398505687714, - "rewards/margins": 0.07564280182123184, - "rewards/rejected": -0.22338679432868958, - "rewards/safe_rewards": -0.15105345845222473, - "rewards/unsafe_rewards": -0.14443449676036835, - "step": 1520 - }, - { - "epoch": 0.41, - "learning_rate": 3.704952275362554e-07, - "logits/chosen": -2.465121030807495, - "logits/rejected": -2.3682198524475098, - "logps/chosen": -242.96194458007812, - "logps/rejected": -207.06246948242188, - "loss": 1569.5816, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.12511205673217773, - "rewards/margins": 0.07490274310112, - "rewards/rejected": -0.20001479983329773, - "rewards/safe_rewards": -0.11417423188686371, - "rewards/unsafe_rewards": -0.13604989647865295, - "step": 1530 - }, - { - "epoch": 0.41, - "learning_rate": 3.6846133995250146e-07, - "logits/chosen": -2.4363303184509277, - "logits/rejected": -2.2767231464385986, - "logps/chosen": -237.9536895751953, - "logps/rejected": -177.0271759033203, - "loss": 1369.3324, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.13186858594417572, - "rewards/margins": 0.08251256495714188, - "rewards/rejected": -0.214381143450737, - "rewards/safe_rewards": -0.1466236710548401, - "rewards/unsafe_rewards": -0.11711349338293076, - "step": 1540 - }, - { - "epoch": 0.41, - "learning_rate": 3.6641729078046763e-07, - "logits/chosen": -2.489834785461426, - "logits/rejected": -2.352896213531494, - "logps/chosen": -207.2012176513672, - "logps/rejected": -173.3971405029297, - "loss": 1332.681, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.1464800387620926, - "rewards/margins": 0.11185964196920395, - "rewards/rejected": -0.25833967328071594, - "rewards/safe_rewards": -0.11964575201272964, - "rewards/unsafe_rewards": -0.17331431806087494, - "step": 1550 - }, - { - "epoch": 0.41, - "learning_rate": 3.6436325535825255e-07, - "logits/chosen": -2.49906063079834, - "logits/rejected": -2.3646252155303955, - "logps/chosen": -217.7091827392578, - "logps/rejected": -199.86734008789062, - "loss": 1295.2139, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": -0.1840960681438446, - "rewards/margins": 0.09847177565097809, - "rewards/rejected": -0.2825678288936615, - "rewards/safe_rewards": -0.18202707171440125, - "rewards/unsafe_rewards": -0.18616504967212677, - "step": 1560 - }, - { - "epoch": 0.42, - "learning_rate": 3.6229940988057326e-07, - "logits/chosen": -2.4578747749328613, - "logits/rejected": -2.3097221851348877, - "logps/chosen": -198.35903930664062, - "logps/rejected": -176.7674560546875, - "loss": 1444.0348, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1905469447374344, - "rewards/margins": 0.095194511115551, - "rewards/rejected": -0.2857414186000824, - "rewards/safe_rewards": -0.19387492537498474, - "rewards/unsafe_rewards": -0.18721893429756165, - "step": 1570 - }, - { - "epoch": 0.42, - "learning_rate": 3.6022593138365124e-07, - "logits/chosen": -2.481733560562134, - "logits/rejected": -2.3595964908599854, - "logps/chosen": -190.13658142089844, - "logps/rejected": -198.0826873779297, - "loss": 1263.6074, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.1874133050441742, - "rewards/margins": 0.08709780871868134, - "rewards/rejected": -0.2745111286640167, - "rewards/safe_rewards": -0.18422745168209076, - "rewards/unsafe_rewards": -0.19059917330741882, - "step": 1580 - }, - { - "epoch": 0.42, - "learning_rate": 3.581429977300263e-07, - "logits/chosen": -2.446432590484619, - "logits/rejected": -2.3499393463134766, - "logps/chosen": -186.4178009033203, - "logps/rejected": -199.3746337890625, - "loss": 1313.7977, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.16946353018283844, - "rewards/margins": 0.08198583126068115, - "rewards/rejected": -0.2514493763446808, - "rewards/safe_rewards": -0.1607791930437088, - "rewards/unsafe_rewards": -0.17814788222312927, - "step": 1590 - }, - { - "epoch": 0.42, - "learning_rate": 3.5605078759329963e-07, - "logits/chosen": -2.4075934886932373, - "logits/rejected": -2.31242036819458, - "logps/chosen": -185.78793334960938, - "logps/rejected": -171.9018096923828, - "loss": 1547.1016, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.2110448181629181, - "rewards/margins": 0.07007146626710892, - "rewards/rejected": -0.2811163067817688, - "rewards/safe_rewards": -0.20154133439064026, - "rewards/unsafe_rewards": -0.22054831683635712, - "step": 1600 - }, - { - "epoch": 0.43, - "learning_rate": 3.5394948044280705e-07, - "logits/chosen": -2.4385690689086914, - "logits/rejected": -2.352933883666992, - "logps/chosen": -215.37673950195312, - "logps/rejected": -204.38522338867188, - "loss": 1397.4956, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.1870114952325821, - "rewards/margins": 0.07196725159883499, - "rewards/rejected": -0.2589787244796753, - "rewards/safe_rewards": -0.16818994283676147, - "rewards/unsafe_rewards": -0.20583298802375793, - "step": 1610 - }, - { - "epoch": 0.43, - "learning_rate": 3.5183925652822415e-07, - "logits/chosen": -2.4011077880859375, - "logits/rejected": -2.1952881813049316, - "logps/chosen": -255.5830078125, - "logps/rejected": -240.0943145751953, - "loss": 1474.7205, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.1567767709493637, - "rewards/margins": 0.1131705492734909, - "rewards/rejected": -0.269947350025177, - "rewards/safe_rewards": -0.1465783417224884, - "rewards/unsafe_rewards": -0.1669752150774002, - "step": 1620 - }, - { - "epoch": 0.43, - "learning_rate": 3.4972029686410495e-07, - "logits/chosen": -2.431394338607788, - "logits/rejected": -2.268126964569092, - "logps/chosen": -222.8999481201172, - "logps/rejected": -182.3618927001953, - "loss": 1480.7025, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.18097783625125885, - "rewards/margins": 0.0637437254190445, - "rewards/rejected": -0.24472156167030334, - "rewards/safe_rewards": -0.1764880269765854, - "rewards/unsafe_rewards": -0.18546763062477112, - "step": 1630 - }, - { - "epoch": 0.44, - "learning_rate": 3.475927832143538e-07, - "logits/chosen": -2.326904773712158, - "logits/rejected": -2.241318941116333, - "logps/chosen": -220.22116088867188, - "logps/rejected": -193.65753173828125, - "loss": 1555.6601, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.16184373199939728, - "rewards/margins": 0.07990992814302444, - "rewards/rejected": -0.24175365269184113, - "rewards/safe_rewards": -0.1610107123851776, - "rewards/unsafe_rewards": -0.16267673671245575, - "step": 1640 - }, - { - "epoch": 0.44, - "learning_rate": 3.454568980766345e-07, - "logits/chosen": -2.3985440731048584, - "logits/rejected": -2.1752164363861084, - "logps/chosen": -208.9574432373047, - "logps/rejected": -169.05763244628906, - "loss": 1415.1885, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.12837891280651093, - "rewards/margins": 0.11940275132656097, - "rewards/rejected": -0.2477816641330719, - "rewards/safe_rewards": -0.13213534653186798, - "rewards/unsafe_rewards": -0.12462246417999268, - "step": 1650 - }, - { - "epoch": 0.44, - "learning_rate": 3.433128246667151e-07, - "logits/chosen": -2.5162148475646973, - "logits/rejected": -2.4004786014556885, - "logps/chosen": -197.87667846679688, - "logps/rejected": -178.853515625, - "loss": 1731.5818, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1466478407382965, - "rewards/margins": 0.06997327506542206, - "rewards/rejected": -0.21662113070487976, - "rewards/safe_rewards": -0.14021913707256317, - "rewards/unsafe_rewards": -0.15307654440402985, - "step": 1660 - }, - { - "epoch": 0.44, - "learning_rate": 3.4116074690275165e-07, - "logits/chosen": -2.5400516986846924, - "logits/rejected": -2.3341615200042725, - "logps/chosen": -216.02420043945312, - "logps/rejected": -174.03268432617188, - "loss": 1440.6035, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.18382161855697632, - "rewards/margins": 0.04007343575358391, - "rewards/rejected": -0.22389504313468933, - "rewards/safe_rewards": -0.1840430200099945, - "rewards/unsafe_rewards": -0.18360023200511932, - "step": 1670 - }, - { - "epoch": 0.45, - "learning_rate": 3.3900084938951235e-07, - "logits/chosen": -2.4770917892456055, - "logits/rejected": -2.3432071208953857, - "logps/chosen": -260.0920715332031, - "logps/rejected": -212.26296997070312, - "loss": 1441.55, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.1706467866897583, - "rewards/margins": 0.08704660832881927, - "rewards/rejected": -0.2576933801174164, - "rewards/safe_rewards": -0.16985921561717987, - "rewards/unsafe_rewards": -0.17143437266349792, - "step": 1680 - }, - { - "epoch": 0.45, - "learning_rate": 3.368333174025416e-07, - "logits/chosen": -2.529247283935547, - "logits/rejected": -2.353431463241577, - "logps/chosen": -228.86593627929688, - "logps/rejected": -210.79916381835938, - "loss": 1441.8045, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.16985967755317688, - "rewards/margins": 0.1146186962723732, - "rewards/rejected": -0.2844783663749695, - "rewards/safe_rewards": -0.14971952140331268, - "rewards/unsafe_rewards": -0.18999984860420227, - "step": 1690 - }, - { - "epoch": 0.45, - "learning_rate": 3.3465833687226744e-07, - "logits/chosen": -2.482921838760376, - "logits/rejected": -2.447489023208618, - "logps/chosen": -217.0481719970703, - "logps/rejected": -208.802734375, - "loss": 1427.9241, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.20472776889801025, - "rewards/margins": 0.05187666416168213, - "rewards/rejected": -0.2566044330596924, - "rewards/safe_rewards": -0.20269599556922913, - "rewards/unsafe_rewards": -0.2067594975233078, - "step": 1700 - }, - { - "epoch": 0.45, - "learning_rate": 3.324760943680524e-07, - "logits/chosen": -2.5520997047424316, - "logits/rejected": -2.3803460597991943, - "logps/chosen": -210.81594848632812, - "logps/rejected": -166.60635375976562, - "loss": 1379.4057, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.1449248492717743, - "rewards/margins": 0.09905349463224411, - "rewards/rejected": -0.243978351354599, - "rewards/safe_rewards": -0.11834564059972763, - "rewards/unsafe_rewards": -0.17150405049324036, - "step": 1710 - }, - { - "epoch": 0.46, - "learning_rate": 3.302867770821891e-07, - "logits/chosen": -2.510335922241211, - "logits/rejected": -2.4587719440460205, - "logps/chosen": -236.3642578125, - "logps/rejected": -211.8437042236328, - "loss": 1548.6289, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.17973656952381134, - "rewards/margins": 0.052177559584379196, - "rewards/rejected": -0.23191413283348083, - "rewards/safe_rewards": -0.17798998951911926, - "rewards/unsafe_rewards": -0.1814831793308258, - "step": 1720 - }, - { - "epoch": 0.46, - "learning_rate": 3.28090572813844e-07, - "logits/chosen": -2.516510486602783, - "logits/rejected": -2.423905849456787, - "logps/chosen": -229.9568634033203, - "logps/rejected": -220.5032196044922, - "loss": 1444.9046, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.1410938799381256, - "rewards/margins": 0.099317267537117, - "rewards/rejected": -0.2404111623764038, - "rewards/safe_rewards": -0.1352454423904419, - "rewards/unsafe_rewards": -0.14694230258464813, - "step": 1730 - }, - { - "epoch": 0.46, - "learning_rate": 3.2588766995294714e-07, - "logits/chosen": -2.423407554626465, - "logits/rejected": -2.3955535888671875, - "logps/chosen": -174.02529907226562, - "logps/rejected": -213.25942993164062, - "loss": 1524.6137, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.152995765209198, - "rewards/margins": 0.0417090579867363, - "rewards/rejected": -0.1947048008441925, - "rewards/safe_rewards": -0.14165517687797546, - "rewards/unsafe_rewards": -0.16433633863925934, - "step": 1740 - }, - { - "epoch": 0.46, - "learning_rate": 3.2367825746403247e-07, - "logits/chosen": -2.5194592475891113, - "logits/rejected": -2.4487204551696777, - "logps/chosen": -238.40542602539062, - "logps/rejected": -235.1794891357422, - "loss": 1481.2777, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.11255230754613876, - "rewards/margins": 0.05513427406549454, - "rewards/rejected": -0.1676865816116333, - "rewards/safe_rewards": -0.12173326313495636, - "rewards/unsafe_rewards": -0.10337134450674057, - "step": 1750 - }, - { - "epoch": 0.47, - "learning_rate": 3.214625248700285e-07, - "logits/chosen": -2.4446468353271484, - "logits/rejected": -2.3627326488494873, - "logps/chosen": -194.45687866210938, - "logps/rejected": -172.0442657470703, - "loss": 1399.8721, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1163901686668396, - "rewards/margins": 0.04747793823480606, - "rewards/rejected": -0.16386809945106506, - "rewards/safe_rewards": -0.1143939346075058, - "rewards/unsafe_rewards": -0.118386410176754, - "step": 1760 - }, - { - "epoch": 0.47, - "learning_rate": 3.1924066223600075e-07, - "logits/chosen": -2.5814857482910156, - "logits/rejected": -2.4635584354400635, - "logps/chosen": -219.8085174560547, - "logps/rejected": -223.7118377685547, - "loss": 1451.0927, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.08994387090206146, - "rewards/margins": 0.09653481841087341, - "rewards/rejected": -0.18647868931293488, - "rewards/safe_rewards": -0.09047529101371765, - "rewards/unsafe_rewards": -0.08941245079040527, - "step": 1770 - }, - { - "epoch": 0.47, - "learning_rate": 3.170128601528486e-07, - "logits/chosen": -2.542621612548828, - "logits/rejected": -2.4229633808135986, - "logps/chosen": -227.93252563476562, - "logps/rejected": -183.18423461914062, - "loss": 1254.8991, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.12791630625724792, - "rewards/margins": 0.060995329171419144, - "rewards/rejected": -0.18891164660453796, - "rewards/safe_rewards": -0.11581780761480331, - "rewards/unsafe_rewards": -0.14001484215259552, - "step": 1780 - }, - { - "epoch": 0.47, - "learning_rate": 3.147793097209557e-07, - "logits/chosen": -2.5026376247406006, - "logits/rejected": -2.3890738487243652, - "logps/chosen": -200.47068786621094, - "logps/rejected": -173.41432189941406, - "loss": 1274.9061, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.17685821652412415, - "rewards/margins": 0.09471040964126587, - "rewards/rejected": -0.2715685963630676, - "rewards/safe_rewards": -0.1810494065284729, - "rewards/unsafe_rewards": -0.172666996717453, - "step": 1790 - }, - { - "epoch": 0.48, - "learning_rate": 3.1254020253379816e-07, - "logits/chosen": -2.637838840484619, - "logits/rejected": -2.4623403549194336, - "logps/chosen": -200.8404998779297, - "logps/rejected": -175.89923095703125, - "loss": 1289.3171, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.15527309477329254, - "rewards/margins": 0.07773787528276443, - "rewards/rejected": -0.23301096260547638, - "rewards/safe_rewards": -0.1603432595729828, - "rewards/unsafe_rewards": -0.15020295977592468, - "step": 1800 - }, - { - "epoch": 0.48, - "learning_rate": 3.1029573066150927e-07, - "logits/chosen": -2.6081745624542236, - "logits/rejected": -2.446638822555542, - "logps/chosen": -236.24258422851562, - "logps/rejected": -201.07171630859375, - "loss": 1530.2842, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.14817751944065094, - "rewards/margins": 0.10973908007144928, - "rewards/rejected": -0.2579166293144226, - "rewards/safe_rewards": -0.1428823620080948, - "rewards/unsafe_rewards": -0.1534726619720459, - "step": 1810 - }, - { - "epoch": 0.48, - "learning_rate": 3.0804608663440375e-07, - "logits/chosen": -2.60705304145813, - "logits/rejected": -2.3964991569519043, - "logps/chosen": -221.8950653076172, - "logps/rejected": -150.5135498046875, - "loss": 1270.9918, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.16347292065620422, - "rewards/margins": 0.08766381442546844, - "rewards/rejected": -0.2511367201805115, - "rewards/safe_rewards": -0.16431590914726257, - "rewards/unsafe_rewards": -0.1626299023628235, - "step": 1820 - }, - { - "epoch": 0.49, - "learning_rate": 3.0579146342646267e-07, - "logits/chosen": -2.6254096031188965, - "logits/rejected": -2.4774088859558105, - "logps/chosen": -242.34536743164062, - "logps/rejected": -198.2445068359375, - "loss": 1345.3914, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.18678438663482666, - "rewards/margins": 0.10494716465473175, - "rewards/rejected": -0.2917315363883972, - "rewards/safe_rewards": -0.20530100166797638, - "rewards/unsafe_rewards": -0.16826775670051575, - "step": 1830 - }, - { - "epoch": 0.49, - "learning_rate": 3.0353205443878033e-07, - "logits/chosen": -2.5342631340026855, - "logits/rejected": -2.358438014984131, - "logps/chosen": -252.79507446289062, - "logps/rejected": -217.93978881835938, - "loss": 1550.9495, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.1841137856245041, - "rewards/margins": 0.08327686786651611, - "rewards/rejected": -0.267390638589859, - "rewards/safe_rewards": -0.1832982748746872, - "rewards/unsafe_rewards": -0.1849292814731598, - "step": 1840 - }, - { - "epoch": 0.49, - "learning_rate": 3.012680534829741e-07, - "logits/chosen": -2.400489091873169, - "logits/rejected": -2.3201334476470947, - "logps/chosen": -252.9420928955078, - "logps/rejected": -230.3839569091797, - "loss": 1455.5671, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.21103501319885254, - "rewards/margins": 0.11814670264720917, - "rewards/rejected": -0.3291817307472229, - "rewards/safe_rewards": -0.2489803284406662, - "rewards/unsafe_rewards": -0.1730896681547165, - "step": 1850 - }, - { - "epoch": 0.49, - "learning_rate": 2.9899965476455923e-07, - "logits/chosen": -2.4957633018493652, - "logits/rejected": -2.3630380630493164, - "logps/chosen": -234.0614471435547, - "logps/rejected": -219.897216796875, - "loss": 1266.9163, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.1988244354724884, - "rewards/margins": 0.1296526938676834, - "rewards/rejected": -0.3284771144390106, - "rewards/safe_rewards": -0.1971742808818817, - "rewards/unsafe_rewards": -0.2004745751619339, - "step": 1860 - }, - { - "epoch": 0.5, - "learning_rate": 2.967270528662903e-07, - "logits/chosen": -2.4429516792297363, - "logits/rejected": -2.334066867828369, - "logps/chosen": -223.6156768798828, - "logps/rejected": -235.7895965576172, - "loss": 1393.4368, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.20147797465324402, - "rewards/margins": 0.09983384609222412, - "rewards/rejected": -0.30131182074546814, - "rewards/safe_rewards": -0.22797062993049622, - "rewards/unsafe_rewards": -0.17498531937599182, - "step": 1870 - }, - { - "epoch": 0.5, - "learning_rate": 2.944504427314697e-07, - "logits/chosen": -2.525000810623169, - "logits/rejected": -2.390749931335449, - "logps/chosen": -231.87167358398438, - "logps/rejected": -191.01589965820312, - "loss": 1288.1688, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.1958586573600769, - "rewards/margins": 0.05801469087600708, - "rewards/rejected": -0.253873348236084, - "rewards/safe_rewards": -0.19562391936779022, - "rewards/unsafe_rewards": -0.19609341025352478, - "step": 1880 - }, - { - "epoch": 0.5, - "learning_rate": 2.921700196472254e-07, - "logits/chosen": -2.517301559448242, - "logits/rejected": -2.412170648574829, - "logps/chosen": -231.212646484375, - "logps/rejected": -219.40554809570312, - "loss": 1231.8775, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.13757678866386414, - "rewards/margins": 0.08369623124599457, - "rewards/rejected": -0.2212730199098587, - "rewards/safe_rewards": -0.13803748786449432, - "rewards/unsafe_rewards": -0.13711608946323395, - "step": 1890 - }, - { - "epoch": 0.5, - "learning_rate": 2.8988597922775957e-07, - "logits/chosen": -2.499457836151123, - "logits/rejected": -2.4003498554229736, - "logps/chosen": -223.5936279296875, - "logps/rejected": -194.99151611328125, - "loss": 1317.801, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.1991351693868637, - "rewards/margins": 0.07430565357208252, - "rewards/rejected": -0.2734408378601074, - "rewards/safe_rewards": -0.1947377473115921, - "rewards/unsafe_rewards": -0.20353257656097412, - "step": 1900 - }, - { - "epoch": 0.51, - "learning_rate": 2.875985173975684e-07, - "logits/chosen": -2.5241916179656982, - "logits/rejected": -2.349404811859131, - "logps/chosen": -185.50253295898438, - "logps/rejected": -175.8238067626953, - "loss": 1450.6304, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.18669968843460083, - "rewards/margins": 0.08765646070241928, - "rewards/rejected": -0.2743561863899231, - "rewards/safe_rewards": -0.18601247668266296, - "rewards/unsafe_rewards": -0.1873868703842163, - "step": 1910 - }, - { - "epoch": 0.51, - "learning_rate": 2.8530783037463597e-07, - "logits/chosen": -2.5135693550109863, - "logits/rejected": -2.485405445098877, - "logps/chosen": -240.469482421875, - "logps/rejected": -229.724609375, - "loss": 1349.291, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.13673213124275208, - "rewards/margins": 0.10208648443222046, - "rewards/rejected": -0.23881861567497253, - "rewards/safe_rewards": -0.14533600211143494, - "rewards/unsafe_rewards": -0.1281282603740692, - "step": 1920 - }, - { - "epoch": 0.51, - "learning_rate": 2.830141146536028e-07, - "logits/chosen": -2.490551710128784, - "logits/rejected": -2.3557827472686768, - "logps/chosen": -257.0555114746094, - "logps/rejected": -178.37991333007812, - "loss": 1635.4246, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.15669485926628113, - "rewards/margins": 0.08748017251491547, - "rewards/rejected": -0.2441750466823578, - "rewards/safe_rewards": -0.14599820971488953, - "rewards/unsafe_rewards": -0.16739150881767273, - "step": 1930 - }, - { - "epoch": 0.51, - "learning_rate": 2.807175669889105e-07, - "logits/chosen": -2.487661838531494, - "logits/rejected": -2.412285089492798, - "logps/chosen": -246.3243865966797, - "logps/rejected": -209.6628875732422, - "loss": 1310.3097, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.10567233711481094, - "rewards/margins": 0.10232706367969513, - "rewards/rejected": -0.20799939334392548, - "rewards/safe_rewards": -0.09429927170276642, - "rewards/unsafe_rewards": -0.11704540252685547, - "step": 1940 - }, - { - "epoch": 0.52, - "learning_rate": 2.78418384377924e-07, - "logits/chosen": -2.4737987518310547, - "logits/rejected": -2.353318929672241, - "logps/chosen": -208.8633575439453, - "logps/rejected": -201.98355102539062, - "loss": 1337.6302, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.15721412003040314, - "rewards/margins": 0.07190976291894913, - "rewards/rejected": -0.22912387549877167, - "rewards/safe_rewards": -0.1655702292919159, - "rewards/unsafe_rewards": -0.148857980966568, - "step": 1950 - }, - { - "epoch": 0.52, - "learning_rate": 2.761167640440338e-07, - "logits/chosen": -2.581913471221924, - "logits/rejected": -2.504181146621704, - "logps/chosen": -210.75180053710938, - "logps/rejected": -182.91021728515625, - "loss": 1285.468, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.14732445776462555, - "rewards/margins": 0.07644233852624893, - "rewards/rejected": -0.2237667739391327, - "rewards/safe_rewards": -0.1480104625225067, - "rewards/unsafe_rewards": -0.146638423204422, - "step": 1960 - }, - { - "epoch": 0.52, - "learning_rate": 2.738129034197371e-07, - "logits/chosen": -2.540590524673462, - "logits/rejected": -2.4330615997314453, - "logps/chosen": -233.14291381835938, - "logps/rejected": -210.0241241455078, - "loss": 1379.4145, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.16095609962940216, - "rewards/margins": 0.0881221741437912, - "rewards/rejected": -0.24907827377319336, - "rewards/safe_rewards": -0.15186931192874908, - "rewards/unsafe_rewards": -0.17004290223121643, - "step": 1970 - }, - { - "epoch": 0.53, - "learning_rate": 2.7150700012970335e-07, - "logits/chosen": -2.4416656494140625, - "logits/rejected": -2.37373423576355, - "logps/chosen": -164.47569274902344, - "logps/rejected": -171.78884887695312, - "loss": 1296.12, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.15709151327610016, - "rewards/margins": 0.09523182362318039, - "rewards/rejected": -0.25232332944869995, - "rewards/safe_rewards": -0.1535656601190567, - "rewards/unsafe_rewards": -0.1606173813343048, - "step": 1980 - }, - { - "epoch": 0.53, - "learning_rate": 2.69199251973821e-07, - "logits/chosen": -2.429625988006592, - "logits/rejected": -2.3488945960998535, - "logps/chosen": -210.1746368408203, - "logps/rejected": -179.38394165039062, - "loss": 1459.6894, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.1717846691608429, - "rewards/margins": 0.060426510870456696, - "rewards/rejected": -0.2322111874818802, - "rewards/safe_rewards": -0.17486758530139923, - "rewards/unsafe_rewards": -0.16870179772377014, - "step": 1990 - }, - { - "epoch": 0.53, - "learning_rate": 2.668898569102308e-07, - "logits/chosen": -2.4625418186187744, - "logits/rejected": -2.2672998905181885, - "logps/chosen": -203.03599548339844, - "logps/rejected": -193.73269653320312, - "loss": 1361.9422, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.1658346951007843, - "rewards/margins": 0.14335204660892487, - "rewards/rejected": -0.309186726808548, - "rewards/safe_rewards": -0.1529822200536728, - "rewards/unsafe_rewards": -0.17868714034557343, - "step": 2000 - }, - { - "epoch": 0.53, - "eval_logits/chosen": -2.3435044288635254, - "eval_logits/rejected": -2.198021650314331, - "eval_logps/chosen": -164.1644744873047, - "eval_logps/rejected": -130.7617645263672, - "eval_loss": 991.2021484375, - "eval_rewards/accuracies": 0.6685675382614136, - "eval_rewards/chosen": -0.23814915120601654, - "eval_rewards/margins": 0.04813806340098381, - "eval_rewards/rejected": -0.28628724813461304, - "eval_rewards/safe_rewards": -0.23559458553791046, - "eval_rewards/unsafe_rewards": -0.23301181197166443, - "eval_runtime": 2350.2776, - "eval_samples_per_second": 14.911, - "eval_steps_per_second": 0.466, - "step": 2000 - }, - { - "epoch": 0.53, - "learning_rate": 2.6457901303834515e-07, - "logits/chosen": -2.4759714603424072, - "logits/rejected": -2.374565839767456, - "logps/chosen": -209.5516815185547, - "logps/rejected": -195.5994415283203, - "loss": 1240.9788, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.19912727177143097, - "rewards/margins": 0.10102291405200958, - "rewards/rejected": -0.30015018582344055, - "rewards/safe_rewards": -0.21168968081474304, - "rewards/unsafe_rewards": -0.1865648478269577, - "step": 2010 - }, - { - "epoch": 0.54, - "learning_rate": 2.6226691858185454e-07, - "logits/chosen": -2.4506821632385254, - "logits/rejected": -2.426478385925293, - "logps/chosen": -211.47244262695312, - "logps/rejected": -182.9574432373047, - "loss": 1611.4539, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.1903916895389557, - "rewards/margins": 0.08754824101924896, - "rewards/rejected": -0.27793991565704346, - "rewards/safe_rewards": -0.22850194573402405, - "rewards/unsafe_rewards": -0.15228143334388733, - "step": 2020 - }, - { - "epoch": 0.54, - "learning_rate": 2.599537718717245e-07, - "logits/chosen": -2.4043211936950684, - "logits/rejected": -2.291820764541626, - "logps/chosen": -243.07333374023438, - "logps/rejected": -228.57217407226562, - "loss": 1374.3383, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.147500142455101, - "rewards/margins": 0.13482965528964996, - "rewards/rejected": -0.282329797744751, - "rewards/safe_rewards": -0.14832225441932678, - "rewards/unsafe_rewards": -0.14667803049087524, - "step": 2030 - }, - { - "epoch": 0.54, - "learning_rate": 2.5763977132918267e-07, - "logits/chosen": -2.453399658203125, - "logits/rejected": -2.2970356941223145, - "logps/chosen": -206.1725311279297, - "logps/rejected": -189.53208923339844, - "loss": 1491.7768, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.16579872369766235, - "rewards/margins": 0.09684140235185623, - "rewards/rejected": -0.2626401484012604, - "rewards/safe_rewards": -0.15709422528743744, - "rewards/unsafe_rewards": -0.17450323700904846, - "step": 2040 - }, - { - "epoch": 0.54, - "learning_rate": 2.5532511544869835e-07, - "logits/chosen": -2.4601545333862305, - "logits/rejected": -2.3518226146698, - "logps/chosen": -235.8297882080078, - "logps/rejected": -198.88648986816406, - "loss": 1436.6111, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.1235668808221817, - "rewards/margins": 0.08136168867349625, - "rewards/rejected": -0.20492854714393616, - "rewards/safe_rewards": -0.13408291339874268, - "rewards/unsafe_rewards": -0.11305083334445953, - "step": 2050 - }, - { - "epoch": 0.55, - "learning_rate": 2.5301000278095535e-07, - "logits/chosen": -2.402427911758423, - "logits/rejected": -2.3180432319641113, - "logps/chosen": -193.18194580078125, - "logps/rejected": -173.34707641601562, - "loss": 1458.4996, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.17313452064990997, - "rewards/margins": 0.07035806775093079, - "rewards/rejected": -0.24349257349967957, - "rewards/safe_rewards": -0.17968951165676117, - "rewards/unsafe_rewards": -0.16657951474189758, - "step": 2060 - }, - { - "epoch": 0.55, - "learning_rate": 2.506946319158208e-07, - "logits/chosen": -2.51488995552063, - "logits/rejected": -2.3887581825256348, - "logps/chosen": -232.5959930419922, - "logps/rejected": -203.18624877929688, - "loss": 1375.0532, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.16676241159439087, - "rewards/margins": 0.07408100366592407, - "rewards/rejected": -0.24084338545799255, - "rewards/safe_rewards": -0.18878403306007385, - "rewards/unsafe_rewards": -0.1447407752275467, - "step": 2070 - }, - { - "epoch": 0.55, - "learning_rate": 2.483792014653097e-07, - "logits/chosen": -2.4196736812591553, - "logits/rejected": -2.320687770843506, - "logps/chosen": -247.30160522460938, - "logps/rejected": -207.07156372070312, - "loss": 1506.3441, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.16045354306697845, - "rewards/margins": 0.05838986113667488, - "rewards/rejected": -0.21884341537952423, - "rewards/safe_rewards": -0.1661694347858429, - "rewards/unsafe_rewards": -0.1547376811504364, - "step": 2080 - }, - { - "epoch": 0.55, - "learning_rate": 2.4606391004654855e-07, - "logits/chosen": -2.437570095062256, - "logits/rejected": -2.330148935317993, - "logps/chosen": -209.78018188476562, - "logps/rejected": -210.4960479736328, - "loss": 1494.784, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.136993408203125, - "rewards/margins": 0.06869462877511978, - "rewards/rejected": -0.20568804442882538, - "rewards/safe_rewards": -0.14101234078407288, - "rewards/unsafe_rewards": -0.13297446072101593, - "step": 2090 - }, - { - "epoch": 0.56, - "learning_rate": 2.4374895626473773e-07, - "logits/chosen": -2.420156955718994, - "logits/rejected": -2.321631669998169, - "logps/chosen": -251.2263641357422, - "logps/rejected": -221.1849822998047, - "loss": 1481.2426, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.1329573392868042, - "rewards/margins": 0.10544238239526749, - "rewards/rejected": -0.2383997142314911, - "rewards/safe_rewards": -0.13209089636802673, - "rewards/unsafe_rewards": -0.13382378220558167, - "step": 2100 - }, - { - "epoch": 0.56, - "learning_rate": 2.414345386961149e-07, - "logits/chosen": -2.4364171028137207, - "logits/rejected": -2.3148913383483887, - "logps/chosen": -227.43582153320312, - "logps/rejected": -208.8198699951172, - "loss": 1432.4787, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.1349310725927353, - "rewards/margins": 0.06205032393336296, - "rewards/rejected": -0.19698138535022736, - "rewards/safe_rewards": -0.15002097189426422, - "rewards/unsafe_rewards": -0.11984117329120636, - "step": 2110 - }, - { - "epoch": 0.56, - "learning_rate": 2.391208558709218e-07, - "logits/chosen": -2.5004663467407227, - "logits/rejected": -2.4065101146698, - "logps/chosen": -192.84033203125, - "logps/rejected": -189.07659912109375, - "loss": 1408.4769, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.12439978122711182, - "rewards/margins": 0.10417710244655609, - "rewards/rejected": -0.2285768985748291, - "rewards/safe_rewards": -0.1236104816198349, - "rewards/unsafe_rewards": -0.12518908083438873, - "step": 2120 - }, - { - "epoch": 0.57, - "learning_rate": 2.3680810625637368e-07, - "logits/chosen": -2.480620861053467, - "logits/rejected": -2.385749101638794, - "logps/chosen": -178.59791564941406, - "logps/rejected": -167.1098175048828, - "loss": 1417.9316, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.15111371874809265, - "rewards/margins": 0.07442240417003632, - "rewards/rejected": -0.22553610801696777, - "rewards/safe_rewards": -0.13198518753051758, - "rewards/unsafe_rewards": -0.17024224996566772, - "step": 2130 - }, - { - "epoch": 0.57, - "learning_rate": 2.3449648823963553e-07, - "logits/chosen": -2.4995198249816895, - "logits/rejected": -2.369597911834717, - "logps/chosen": -196.19549560546875, - "logps/rejected": -189.19076538085938, - "loss": 1466.058, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.12030693143606186, - "rewards/margins": 0.08176928013563156, - "rewards/rejected": -0.20207622647285461, - "rewards/safe_rewards": -0.1256714165210724, - "rewards/unsafe_rewards": -0.11494243144989014, - "step": 2140 - }, - { - "epoch": 0.57, - "learning_rate": 2.3218620011080398e-07, - "logits/chosen": -2.5006802082061768, - "logits/rejected": -2.3316831588745117, - "logps/chosen": -232.531982421875, - "logps/rejected": -201.3978729248047, - "loss": 1350.8678, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.1268027126789093, - "rewards/margins": 0.09993503987789154, - "rewards/rejected": -0.22673776745796204, - "rewards/safe_rewards": -0.14591991901397705, - "rewards/unsafe_rewards": -0.10768552124500275, - "step": 2150 - }, - { - "epoch": 0.57, - "learning_rate": 2.2987744004589814e-07, - "logits/chosen": -2.4784650802612305, - "logits/rejected": -2.276946783065796, - "logps/chosen": -234.5005340576172, - "logps/rejected": -193.0286102294922, - "loss": 1321.8506, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.13791488111019135, - "rewards/margins": 0.08454158902168274, - "rewards/rejected": -0.2224564552307129, - "rewards/safe_rewards": -0.14481723308563232, - "rewards/unsafe_rewards": -0.13101252913475037, - "step": 2160 - }, - { - "epoch": 0.58, - "learning_rate": 2.2757040608986023e-07, - "logits/chosen": -2.471007823944092, - "logits/rejected": -2.3761425018310547, - "logps/chosen": -232.3048858642578, - "logps/rejected": -205.1953887939453, - "loss": 1429.8463, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.12129487842321396, - "rewards/margins": 0.10116429626941681, - "rewards/rejected": -0.22245916724205017, - "rewards/safe_rewards": -0.13418254256248474, - "rewards/unsafe_rewards": -0.10840721428394318, - "step": 2170 - }, - { - "epoch": 0.58, - "learning_rate": 2.25265296139567e-07, - "logits/chosen": -2.4910647869110107, - "logits/rejected": -2.41190767288208, - "logps/chosen": -265.6110534667969, - "logps/rejected": -195.3870391845703, - "loss": 1318.8509, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.15205354988574982, - "rewards/margins": 0.0821566954255104, - "rewards/rejected": -0.23421022295951843, - "rewards/safe_rewards": -0.1293485164642334, - "rewards/unsafe_rewards": -0.17475856840610504, - "step": 2180 - }, - { - "epoch": 0.58, - "learning_rate": 2.2296230792685467e-07, - "logits/chosen": -2.412083864212036, - "logits/rejected": -2.269346237182617, - "logps/chosen": -211.48635864257812, - "logps/rejected": -191.59848022460938, - "loss": 1390.2391, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1491389125585556, - "rewards/margins": 0.08274741470813751, - "rewards/rejected": -0.2318863421678543, - "rewards/safe_rewards": -0.1368299424648285, - "rewards/unsafe_rewards": -0.16144788265228271, - "step": 2190 - }, - { - "epoch": 0.58, - "learning_rate": 2.2066163900155704e-07, - "logits/chosen": -2.4881632328033447, - "logits/rejected": -2.3802459239959717, - "logps/chosen": -187.48193359375, - "logps/rejected": -211.3712921142578, - "loss": 1566.0426, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.14224691689014435, - "rewards/margins": 0.0761914774775505, - "rewards/rejected": -0.21843838691711426, - "rewards/safe_rewards": -0.1473698914051056, - "rewards/unsafe_rewards": -0.1371239423751831, - "step": 2200 - }, - { - "epoch": 0.59, - "learning_rate": 2.1836348671456014e-07, - "logits/chosen": -2.389488697052002, - "logits/rejected": -2.2177658081054688, - "logps/chosen": -198.92556762695312, - "logps/rejected": -169.1580352783203, - "loss": 1477.3396, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.14946946501731873, - "rewards/margins": 0.07665406167507172, - "rewards/rejected": -0.22612352669239044, - "rewards/safe_rewards": -0.1635880470275879, - "rewards/unsafe_rewards": -0.13535086810588837, - "step": 2210 - }, - { - "epoch": 0.59, - "learning_rate": 2.160680482008731e-07, - "logits/chosen": -2.4867992401123047, - "logits/rejected": -2.315603017807007, - "logps/chosen": -211.35055541992188, - "logps/rejected": -198.93075561523438, - "loss": 1371.9206, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.10739324241876602, - "rewards/margins": 0.09028784930706024, - "rewards/rejected": -0.19768109917640686, - "rewards/safe_rewards": -0.1019827127456665, - "rewards/unsafe_rewards": -0.11280377954244614, - "step": 2220 - }, - { - "epoch": 0.59, - "learning_rate": 2.1377552036271825e-07, - "logits/chosen": -2.4671566486358643, - "logits/rejected": -2.4183859825134277, - "logps/chosen": -194.6284637451172, - "logps/rejected": -188.99264526367188, - "loss": 1379.8354, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.12507639825344086, - "rewards/margins": 0.08070269972085953, - "rewards/rejected": -0.2057790756225586, - "rewards/safe_rewards": -0.1028589978814125, - "rewards/unsafe_rewards": -0.1472938060760498, - "step": 2230 - }, - { - "epoch": 0.59, - "learning_rate": 2.114860998526409e-07, - "logits/chosen": -2.488299608230591, - "logits/rejected": -2.3015782833099365, - "logps/chosen": -216.9426727294922, - "logps/rejected": -167.46730041503906, - "loss": 1451.6342, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.12344807386398315, - "rewards/margins": 0.07239849865436554, - "rewards/rejected": -0.1958465576171875, - "rewards/safe_rewards": -0.12569786608219147, - "rewards/unsafe_rewards": -0.12119831889867783, - "step": 2240 - }, - { - "epoch": 0.6, - "learning_rate": 2.0919998305664028e-07, - "logits/chosen": -2.5245299339294434, - "logits/rejected": -2.3502886295318604, - "logps/chosen": -221.5634307861328, - "logps/rejected": -165.2393035888672, - "loss": 1407.3666, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.12156929820775986, - "rewards/margins": 0.080097496509552, - "rewards/rejected": -0.20166680216789246, - "rewards/safe_rewards": -0.12132598459720612, - "rewards/unsafe_rewards": -0.121812604367733, - "step": 2250 - }, - { - "epoch": 0.6, - "learning_rate": 2.0691736607732381e-07, - "logits/chosen": -2.543241024017334, - "logits/rejected": -2.4274001121520996, - "logps/chosen": -191.79177856445312, - "logps/rejected": -157.69992065429688, - "loss": 1418.5166, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.10374467074871063, - "rewards/margins": 0.09375988692045212, - "rewards/rejected": -0.19750455021858215, - "rewards/safe_rewards": -0.09328322112560272, - "rewards/unsafe_rewards": -0.11420612037181854, - "step": 2260 - }, - { - "epoch": 0.6, - "learning_rate": 2.0463844471708534e-07, - "logits/chosen": -2.5539581775665283, - "logits/rejected": -2.3504045009613037, - "logps/chosen": -215.0828094482422, - "logps/rejected": -186.6298828125, - "loss": 1386.757, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.1426406055688858, - "rewards/margins": 0.08760237693786621, - "rewards/rejected": -0.2302429974079132, - "rewards/safe_rewards": -0.1647053062915802, - "rewards/unsafe_rewards": -0.12057588994503021, - "step": 2270 - }, - { - "epoch": 0.6, - "learning_rate": 2.0236341446130933e-07, - "logits/chosen": -2.4923171997070312, - "logits/rejected": -2.414064407348633, - "logps/chosen": -218.45681762695312, - "logps/rejected": -212.07241821289062, - "loss": 1365.6666, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.12126747518777847, - "rewards/margins": 0.07703817635774612, - "rewards/rejected": -0.1983056366443634, - "rewards/safe_rewards": -0.10720603168010712, - "rewards/unsafe_rewards": -0.13532891869544983, - "step": 2280 - }, - { - "epoch": 0.61, - "learning_rate": 2.0009247046160228e-07, - "logits/chosen": -2.4711546897888184, - "logits/rejected": -2.437969446182251, - "logps/chosen": -253.02798461914062, - "logps/rejected": -211.25405883789062, - "loss": 1569.0791, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.11968283355236053, - "rewards/margins": 0.10151016712188721, - "rewards/rejected": -0.22119303047657013, - "rewards/safe_rewards": -0.11287388950586319, - "rewards/unsafe_rewards": -0.12649178504943848, - "step": 2290 - }, - { - "epoch": 0.61, - "learning_rate": 1.9782580751905216e-07, - "logits/chosen": -2.388043165206909, - "logits/rejected": -2.3421542644500732, - "logps/chosen": -201.22665405273438, - "logps/rejected": -214.58596801757812, - "loss": 1326.9529, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.13698357343673706, - "rewards/margins": 0.11646846681833267, - "rewards/rejected": -0.25345203280448914, - "rewards/safe_rewards": -0.14084574580192566, - "rewards/unsafe_rewards": -0.13312140107154846, - "step": 2300 - }, - { - "epoch": 0.61, - "learning_rate": 1.9556362006751905e-07, - "logits/chosen": -2.391787052154541, - "logits/rejected": -2.301476001739502, - "logps/chosen": -200.34669494628906, - "logps/rejected": -163.69271850585938, - "loss": 1367.249, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.09764213114976883, - "rewards/margins": 0.09558195620775223, - "rewards/rejected": -0.19322410225868225, - "rewards/safe_rewards": -0.0991140753030777, - "rewards/unsafe_rewards": -0.09617018699645996, - "step": 2310 - }, - { - "epoch": 0.62, - "learning_rate": 1.9330610215695647e-07, - "logits/chosen": -2.6220991611480713, - "logits/rejected": -2.393228530883789, - "logps/chosen": -248.3136444091797, - "logps/rejected": -192.6009521484375, - "loss": 1391.333, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.11033634841442108, - "rewards/margins": 0.09691871702671051, - "rewards/rejected": -0.20725508034229279, - "rewards/safe_rewards": -0.12268439680337906, - "rewards/unsafe_rewards": -0.0979883000254631, - "step": 2320 - }, - { - "epoch": 0.62, - "learning_rate": 1.9105344743676544e-07, - "logits/chosen": -2.582388162612915, - "logits/rejected": -2.3894667625427246, - "logps/chosen": -234.9110870361328, - "logps/rejected": -214.70773315429688, - "loss": 1231.0263, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.11229635775089264, - "rewards/margins": 0.10913494974374771, - "rewards/rejected": -0.22143130004405975, - "rewards/safe_rewards": -0.1089363843202591, - "rewards/unsafe_rewards": -0.11565636098384857, - "step": 2330 - }, - { - "epoch": 0.62, - "learning_rate": 1.888058491391837e-07, - "logits/chosen": -2.340135335922241, - "logits/rejected": -2.3757290840148926, - "logps/chosen": -206.0988311767578, - "logps/rejected": -220.0872802734375, - "loss": 1708.1523, - "rewards/accuracies": 0.512499988079071, - "rewards/chosen": -0.17391082644462585, - "rewards/margins": 0.031197816133499146, - "rewards/rejected": -0.2051086127758026, - "rewards/safe_rewards": -0.16137436032295227, - "rewards/unsafe_rewards": -0.18644729256629944, - "step": 2340 - }, - { - "epoch": 0.62, - "learning_rate": 1.865635000627102e-07, - "logits/chosen": -2.4265928268432617, - "logits/rejected": -2.319833993911743, - "logps/chosen": -167.71585083007812, - "logps/rejected": -166.10971069335938, - "loss": 1545.3021, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": -0.13507337868213654, - "rewards/margins": 0.11923108994960785, - "rewards/rejected": -0.2543044686317444, - "rewards/safe_rewards": -0.1444559395313263, - "rewards/unsafe_rewards": -0.12569081783294678, - "step": 2350 - }, - { - "epoch": 0.63, - "learning_rate": 1.843265925555667e-07, - "logits/chosen": -2.4492783546447754, - "logits/rejected": -2.3472743034362793, - "logps/chosen": -173.5474090576172, - "logps/rejected": -168.97134399414062, - "loss": 1342.7734, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.13487228751182556, - "rewards/margins": 0.0858498215675354, - "rewards/rejected": -0.22072210907936096, - "rewards/safe_rewards": -0.13287201523780823, - "rewards/unsafe_rewards": -0.1368725299835205, - "step": 2360 - }, - { - "epoch": 0.63, - "learning_rate": 1.8209531849919848e-07, - "logits/chosen": -2.552664279937744, - "logits/rejected": -2.376894950866699, - "logps/chosen": -251.29867553710938, - "logps/rejected": -202.37380981445312, - "loss": 1553.0475, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.17678940296173096, - "rewards/margins": 0.07482634484767914, - "rewards/rejected": -0.2516157329082489, - "rewards/safe_rewards": -0.16200976073741913, - "rewards/unsafe_rewards": -0.19156906008720398, - "step": 2370 - }, - { - "epoch": 0.63, - "learning_rate": 1.798698692918144e-07, - "logits/chosen": -2.407911777496338, - "logits/rejected": -2.2484326362609863, - "logps/chosen": -238.79421997070312, - "logps/rejected": -183.09768676757812, - "loss": 1404.5992, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.16303926706314087, - "rewards/margins": 0.1185038834810257, - "rewards/rejected": -0.281543105840683, - "rewards/safe_rewards": -0.1675606667995453, - "rewards/unsafe_rewards": -0.15851789712905884, - "step": 2380 - }, - { - "epoch": 0.63, - "learning_rate": 1.7765043583196918e-07, - "logits/chosen": -2.4979567527770996, - "logits/rejected": -2.33666729927063, - "logps/chosen": -229.1798553466797, - "logps/rejected": -204.34994506835938, - "loss": 1399.3854, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.17812839150428772, - "rewards/margins": 0.11284424364566803, - "rewards/rejected": -0.29097265005111694, - "rewards/safe_rewards": -0.17087377607822418, - "rewards/unsafe_rewards": -0.18538297712802887, - "step": 2390 - }, - { - "epoch": 0.64, - "learning_rate": 1.75437208502188e-07, - "logits/chosen": -2.5016016960144043, - "logits/rejected": -2.411349058151245, - "logps/chosen": -211.0176239013672, - "logps/rejected": -192.70266723632812, - "loss": 1380.9002, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.15835638344287872, - "rewards/margins": 0.09648066759109497, - "rewards/rejected": -0.2548370361328125, - "rewards/safe_rewards": -0.1672056019306183, - "rewards/unsafe_rewards": -0.14950717985630035, - "step": 2400 - }, - { - "epoch": 0.64, - "learning_rate": 1.7323037715263556e-07, - "logits/chosen": -2.506575345993042, - "logits/rejected": -2.403273105621338, - "logps/chosen": -209.0438690185547, - "logps/rejected": -209.6345977783203, - "loss": 1349.2687, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.16800788044929504, - "rewards/margins": 0.08734369277954102, - "rewards/rejected": -0.25535157322883606, - "rewards/safe_rewards": -0.14462462067604065, - "rewards/unsafe_rewards": -0.19139111042022705, - "step": 2410 - }, - { - "epoch": 0.64, - "learning_rate": 1.7103013108483055e-07, - "logits/chosen": -2.4804399013519287, - "logits/rejected": -2.291785717010498, - "logps/chosen": -285.30364990234375, - "logps/rejected": -178.92327880859375, - "loss": 1397.3707, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.11621476709842682, - "rewards/margins": 0.0993955209851265, - "rewards/rejected": -0.21561026573181152, - "rewards/safe_rewards": -0.1270422637462616, - "rewards/unsafe_rewards": -0.10538727045059204, - "step": 2420 - }, - { - "epoch": 0.64, - "learning_rate": 1.6883665903540788e-07, - "logits/chosen": -2.5082926750183105, - "logits/rejected": -2.3166751861572266, - "logps/chosen": -261.8489990234375, - "logps/rejected": -213.2135009765625, - "loss": 1332.2781, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.1385953426361084, - "rewards/margins": 0.0877176895737648, - "rewards/rejected": -0.2263130396604538, - "rewards/safe_rewards": -0.12270998954772949, - "rewards/unsafe_rewards": -0.1544807106256485, - "step": 2430 - }, - { - "epoch": 0.65, - "learning_rate": 1.6665014915992854e-07, - "logits/chosen": -2.405097246170044, - "logits/rejected": -2.3065104484558105, - "logps/chosen": -209.33700561523438, - "logps/rejected": -190.3739013671875, - "loss": 1369.9449, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.1621173769235611, - "rewards/margins": 0.06979227811098099, - "rewards/rejected": -0.23190966248512268, - "rewards/safe_rewards": -0.16811616718769073, - "rewards/unsafe_rewards": -0.15611855685710907, - "step": 2440 - }, - { - "epoch": 0.65, - "learning_rate": 1.644707890167396e-07, - "logits/chosen": -2.4365737438201904, - "logits/rejected": -2.3153648376464844, - "logps/chosen": -205.60171508789062, - "logps/rejected": -184.92398071289062, - "loss": 1375.2531, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.15144333243370056, - "rewards/margins": 0.09435336291790009, - "rewards/rejected": -0.24579668045043945, - "rewards/safe_rewards": -0.1532522588968277, - "rewards/unsafe_rewards": -0.14963440597057343, - "step": 2450 - }, - { - "epoch": 0.65, - "learning_rate": 1.6229876555088578e-07, - "logits/chosen": -2.3271708488464355, - "logits/rejected": -2.234588623046875, - "logps/chosen": -200.21229553222656, - "logps/rejected": -174.22476196289062, - "loss": 1396.0794, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.12566322088241577, - "rewards/margins": 0.08187476545572281, - "rewards/rejected": -0.20753800868988037, - "rewards/safe_rewards": -0.10973338037729263, - "rewards/unsafe_rewards": -0.1415930539369583, - "step": 2460 - }, - { - "epoch": 0.66, - "learning_rate": 1.6013426507807315e-07, - "logits/chosen": -2.455134868621826, - "logits/rejected": -2.392401933670044, - "logps/chosen": -255.32589721679688, - "logps/rejected": -244.7705078125, - "loss": 1264.645, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": -0.10595989227294922, - "rewards/margins": 0.10324098914861679, - "rewards/rejected": -0.2092008888721466, - "rewards/safe_rewards": -0.1368628889322281, - "rewards/unsafe_rewards": -0.07505688071250916, - "step": 2470 - }, - { - "epoch": 0.66, - "learning_rate": 1.57977473268687e-07, - "logits/chosen": -2.4642786979675293, - "logits/rejected": -2.3196756839752197, - "logps/chosen": -229.4205780029297, - "logps/rejected": -220.3321533203125, - "loss": 1310.6853, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.14078626036643982, - "rewards/margins": 0.07878123223781586, - "rewards/rejected": -0.21956749260425568, - "rewards/safe_rewards": -0.11206815391778946, - "rewards/unsafe_rewards": -0.16950435936450958, - "step": 2480 - }, - { - "epoch": 0.66, - "learning_rate": 1.5582857513186517e-07, - "logits/chosen": -2.39992356300354, - "logits/rejected": -2.30269193649292, - "logps/chosen": -186.98617553710938, - "logps/rejected": -190.1416473388672, - "loss": 1350.9332, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.1544695794582367, - "rewards/margins": 0.07934041321277618, - "rewards/rejected": -0.23380997776985168, - "rewards/safe_rewards": -0.19125720858573914, - "rewards/unsafe_rewards": -0.11768193542957306, - "step": 2490 - }, - { - "epoch": 0.66, - "learning_rate": 1.53687754999628e-07, - "logits/chosen": -2.4800846576690674, - "logits/rejected": -2.3339457511901855, - "logps/chosen": -218.6508331298828, - "logps/rejected": -212.5465545654297, - "loss": 1406.6168, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.14865832030773163, - "rewards/margins": 0.0916738510131836, - "rewards/rejected": -0.24033217132091522, - "rewards/safe_rewards": -0.14618083834648132, - "rewards/unsafe_rewards": -0.15113580226898193, - "step": 2500 - }, - { - "epoch": 0.66, - "eval_logits/chosen": -2.243939161300659, - "eval_logits/rejected": -2.0826327800750732, - "eval_logps/chosen": -161.87469482421875, - "eval_logps/rejected": -128.15350341796875, - "eval_loss": 981.6749267578125, - "eval_rewards/accuracies": 0.6503193378448486, - "eval_rewards/chosen": -0.21525155007839203, - "eval_rewards/margins": 0.044953055679798126, - "eval_rewards/rejected": -0.260204553604126, - "eval_rewards/safe_rewards": -0.21263450384140015, - "eval_rewards/unsafe_rewards": -0.21035312116146088, - "eval_runtime": 2349.5049, - "eval_samples_per_second": 14.915, - "eval_steps_per_second": 0.466, - "step": 2500 - }, - { - "epoch": 0.67, - "learning_rate": 1.5155519651106623e-07, - "logits/chosen": -2.464728593826294, - "logits/rejected": -2.339097499847412, - "logps/chosen": -234.0205078125, - "logps/rejected": -171.7035369873047, - "loss": 1462.4503, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.1519336998462677, - "rewards/margins": 0.04750160500407219, - "rewards/rejected": -0.199435293674469, - "rewards/safe_rewards": -0.15348905324935913, - "rewards/unsafe_rewards": -0.15037833154201508, - "step": 2510 - }, - { - "epoch": 0.67, - "learning_rate": 1.4943108259658883e-07, - "logits/chosen": -2.475980758666992, - "logits/rejected": -2.2519335746765137, - "logps/chosen": -252.41091918945312, - "logps/rejected": -194.72401428222656, - "loss": 1376.6281, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.1694948673248291, - "rewards/margins": 0.09449909627437592, - "rewards/rejected": -0.2639939785003662, - "rewards/safe_rewards": -0.17862732708454132, - "rewards/unsafe_rewards": -0.16036242246627808, - "step": 2520 - }, - { - "epoch": 0.67, - "learning_rate": 1.4731559546223062e-07, - "logits/chosen": -2.424410581588745, - "logits/rejected": -2.2859268188476562, - "logps/chosen": -199.07345581054688, - "logps/rejected": -168.55819702148438, - "loss": 1513.6159, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.16565127670764923, - "rewards/margins": 0.07521404325962067, - "rewards/rejected": -0.24086534976959229, - "rewards/safe_rewards": -0.1696147620677948, - "rewards/unsafe_rewards": -0.16168779134750366, - "step": 2530 - }, - { - "epoch": 0.67, - "learning_rate": 1.452089165740235e-07, - "logits/chosen": -2.477186918258667, - "logits/rejected": -2.3892674446105957, - "logps/chosen": -194.59176635742188, - "logps/rejected": -186.26873779296875, - "loss": 1390.9826, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.16222994029521942, - "rewards/margins": 0.06926704943180084, - "rewards/rejected": -0.23149700462818146, - "rewards/safe_rewards": -0.1704310178756714, - "rewards/unsafe_rewards": -0.15402889251708984, - "step": 2540 - }, - { - "epoch": 0.68, - "learning_rate": 1.4311122664242953e-07, - "logits/chosen": -2.426283121109009, - "logits/rejected": -2.2988548278808594, - "logps/chosen": -234.96871948242188, - "logps/rejected": -220.08065795898438, - "loss": 1313.4455, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.13547977805137634, - "rewards/margins": 0.10146719217300415, - "rewards/rejected": -0.2369469851255417, - "rewards/safe_rewards": -0.13394209742546082, - "rewards/unsafe_rewards": -0.13701748847961426, - "step": 2550 - }, - { - "epoch": 0.68, - "learning_rate": 1.4102270560684026e-07, - "logits/chosen": -2.4530446529388428, - "logits/rejected": -2.3543193340301514, - "logps/chosen": -212.0669403076172, - "logps/rejected": -194.79942321777344, - "loss": 1499.7798, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.1316794753074646, - "rewards/margins": 0.07953207194805145, - "rewards/rejected": -0.21121153235435486, - "rewards/safe_rewards": -0.14874553680419922, - "rewards/unsafe_rewards": -0.11461341381072998, - "step": 2560 - }, - { - "epoch": 0.68, - "learning_rate": 1.3894353262014125e-07, - "logits/chosen": -2.3019938468933105, - "logits/rejected": -2.2461485862731934, - "logps/chosen": -250.4305877685547, - "logps/rejected": -244.29580688476562, - "loss": 1567.9445, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.13926860690116882, - "rewards/margins": 0.09388790279626846, - "rewards/rejected": -0.2331564873456955, - "rewards/safe_rewards": -0.13534750044345856, - "rewards/unsafe_rewards": -0.1431896984577179, - "step": 2570 - }, - { - "epoch": 0.68, - "learning_rate": 1.3687388603334435e-07, - "logits/chosen": -2.5282301902770996, - "logits/rejected": -2.3112215995788574, - "logps/chosen": -223.5190887451172, - "logps/rejected": -178.07382202148438, - "loss": 1292.8661, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.14245255291461945, - "rewards/margins": 0.07912611216306686, - "rewards/rejected": -0.2215786725282669, - "rewards/safe_rewards": -0.14102299511432648, - "rewards/unsafe_rewards": -0.1438821256160736, - "step": 2580 - }, - { - "epoch": 0.69, - "learning_rate": 1.34813943380289e-07, - "logits/chosen": -2.479917287826538, - "logits/rejected": -2.3745007514953613, - "logps/chosen": -213.238525390625, - "logps/rejected": -207.45150756835938, - "loss": 1395.3248, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.17697656154632568, - "rewards/margins": 0.057517070323228836, - "rewards/rejected": -0.23449364304542542, - "rewards/safe_rewards": -0.18230494856834412, - "rewards/unsafe_rewards": -0.17164818942546844, - "step": 2590 - }, - { - "epoch": 0.69, - "learning_rate": 1.3276388136241324e-07, - "logits/chosen": -2.4198577404022217, - "logits/rejected": -2.285285711288452, - "logps/chosen": -231.4438934326172, - "logps/rejected": -208.94692993164062, - "loss": 1347.6217, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.1924552470445633, - "rewards/margins": 0.09498604387044907, - "rewards/rejected": -0.28744131326675415, - "rewards/safe_rewards": -0.20564106106758118, - "rewards/unsafe_rewards": -0.1792694330215454, - "step": 2600 - }, - { - "epoch": 0.69, - "learning_rate": 1.3072387583359627e-07, - "logits/chosen": -2.457505226135254, - "logits/rejected": -2.395055055618286, - "logps/chosen": -239.54806518554688, - "logps/rejected": -218.69680786132812, - "loss": 1379.4445, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.13240113854408264, - "rewards/margins": 0.11632535606622696, - "rewards/rejected": -0.24872645735740662, - "rewards/safe_rewards": -0.12846654653549194, - "rewards/unsafe_rewards": -0.13633571565151215, - "step": 2610 - }, - { - "epoch": 0.7, - "learning_rate": 1.2869410178507397e-07, - "logits/chosen": -2.53021502494812, - "logits/rejected": -2.396454334259033, - "logps/chosen": -207.0546417236328, - "logps/rejected": -192.90869140625, - "loss": 1531.1809, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.13072463870048523, - "rewards/margins": 0.1065887063741684, - "rewards/rejected": -0.23731334507465363, - "rewards/safe_rewards": -0.1105370745062828, - "rewards/unsafe_rewards": -0.15091219544410706, - "step": 2620 - }, - { - "epoch": 0.7, - "learning_rate": 1.26674733330428e-07, - "logits/chosen": -2.4712276458740234, - "logits/rejected": -2.380847692489624, - "logps/chosen": -217.5896759033203, - "logps/rejected": -200.88526916503906, - "loss": 1352.8295, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.12428609281778336, - "rewards/margins": 0.09996049106121063, - "rewards/rejected": -0.22424659132957458, - "rewards/safe_rewards": -0.11259852349758148, - "rewards/unsafe_rewards": -0.13597366213798523, - "step": 2630 - }, - { - "epoch": 0.7, - "learning_rate": 1.246659436906502e-07, - "logits/chosen": -2.4063479900360107, - "logits/rejected": -2.3740413188934326, - "logps/chosen": -210.17025756835938, - "logps/rejected": -220.21932983398438, - "loss": 1295.2228, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.11398371309041977, - "rewards/margins": 0.08700254559516907, - "rewards/rejected": -0.20098623633384705, - "rewards/safe_rewards": -0.12532182037830353, - "rewards/unsafe_rewards": -0.10264559835195541, - "step": 2640 - }, - { - "epoch": 0.7, - "learning_rate": 1.2266790517928426e-07, - "logits/chosen": -2.348674774169922, - "logits/rejected": -2.3197035789489746, - "logps/chosen": -181.04849243164062, - "logps/rejected": -191.56727600097656, - "loss": 1380.1684, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.09642624855041504, - "rewards/margins": 0.1325521171092987, - "rewards/rejected": -0.22897835075855255, - "rewards/safe_rewards": -0.09433227777481079, - "rewards/unsafe_rewards": -0.09852023422718048, - "step": 2650 - }, - { - "epoch": 0.71, - "learning_rate": 1.2068078918764413e-07, - "logits/chosen": -2.443033456802368, - "logits/rejected": -2.3605430126190186, - "logps/chosen": -251.41671752929688, - "logps/rejected": -216.9822235107422, - "loss": 1305.0855, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.14169490337371826, - "rewards/margins": 0.11782296001911163, - "rewards/rejected": -0.2595178782939911, - "rewards/safe_rewards": -0.135724276304245, - "rewards/unsafe_rewards": -0.14766556024551392, - "step": 2660 - }, - { - "epoch": 0.71, - "learning_rate": 1.1870476617011251e-07, - "logits/chosen": -2.4785828590393066, - "logits/rejected": -2.372243881225586, - "logps/chosen": -225.03781127929688, - "logps/rejected": -212.93954467773438, - "loss": 1371.8355, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.1279604136943817, - "rewards/margins": 0.122574582695961, - "rewards/rejected": -0.2505349814891815, - "rewards/safe_rewards": -0.12024296820163727, - "rewards/unsafe_rewards": -0.13567785918712616, - "step": 2670 - }, - { - "epoch": 0.71, - "learning_rate": 1.1674000562951916e-07, - "logits/chosen": -2.4538979530334473, - "logits/rejected": -2.30354380607605, - "logps/chosen": -196.8118133544922, - "logps/rejected": -181.60252380371094, - "loss": 1339.2882, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.14127959311008453, - "rewards/margins": 0.06795055419206619, - "rewards/rejected": -0.20923013985157013, - "rewards/safe_rewards": -0.15531083941459656, - "rewards/unsafe_rewards": -0.1272483468055725, - "step": 2680 - }, - { - "epoch": 0.71, - "learning_rate": 1.1478667610260095e-07, - "logits/chosen": -2.3940882682800293, - "logits/rejected": -2.3024888038635254, - "logps/chosen": -189.18226623535156, - "logps/rejected": -229.0049591064453, - "loss": 1326.5836, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.14425766468048096, - "rewards/margins": 0.07828481495380402, - "rewards/rejected": -0.22254247963428497, - "rewards/safe_rewards": -0.1324750930070877, - "rewards/unsafe_rewards": -0.156040221452713, - "step": 2690 - }, - { - "epoch": 0.72, - "learning_rate": 1.1284494514554488e-07, - "logits/chosen": -2.510636806488037, - "logits/rejected": -2.346837043762207, - "logps/chosen": -221.3477020263672, - "logps/rejected": -187.18638610839844, - "loss": 1335.1368, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.1462404876947403, - "rewards/margins": 0.12342081218957901, - "rewards/rejected": -0.2696612775325775, - "rewards/safe_rewards": -0.15262550115585327, - "rewards/unsafe_rewards": -0.13985547423362732, - "step": 2700 - }, - { - "epoch": 0.72, - "learning_rate": 1.1091497931961505e-07, - "logits/chosen": -2.4788074493408203, - "logits/rejected": -2.3865175247192383, - "logps/chosen": -231.6941680908203, - "logps/rejected": -218.86721801757812, - "loss": 1402.5593, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.1444820612668991, - "rewards/margins": 0.06018822267651558, - "rewards/rejected": -0.2046702802181244, - "rewards/safe_rewards": -0.1351994276046753, - "rewards/unsafe_rewards": -0.1537647247314453, - "step": 2710 - }, - { - "epoch": 0.72, - "learning_rate": 1.0899694417686529e-07, - "logits/chosen": -2.404553174972534, - "logits/rejected": -2.271867275238037, - "logps/chosen": -180.56387329101562, - "logps/rejected": -165.1314697265625, - "loss": 1394.0041, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.1627023071050644, - "rewards/margins": 0.08744940161705017, - "rewards/rejected": -0.25015169382095337, - "rewards/safe_rewards": -0.1710779070854187, - "rewards/unsafe_rewards": -0.15432670712471008, - "step": 2720 - }, - { - "epoch": 0.72, - "learning_rate": 1.0709100424593778e-07, - "logits/chosen": -2.459942579269409, - "logits/rejected": -2.2597813606262207, - "logps/chosen": -213.2707977294922, - "logps/rejected": -191.6422119140625, - "loss": 1247.3009, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.16189788281917572, - "rewards/margins": 0.12105657160282135, - "rewards/rejected": -0.28295445442199707, - "rewards/safe_rewards": -0.14164146780967712, - "rewards/unsafe_rewards": -0.18215428292751312, - "step": 2730 - }, - { - "epoch": 0.73, - "learning_rate": 1.0519732301795034e-07, - "logits/chosen": -2.4104714393615723, - "logits/rejected": -2.3298792839050293, - "logps/chosen": -211.53988647460938, - "logps/rejected": -288.78521728515625, - "loss": 1241.3158, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.14982062578201294, - "rewards/margins": 0.09874819964170456, - "rewards/rejected": -0.2485688477754593, - "rewards/safe_rewards": -0.1431150734424591, - "rewards/unsafe_rewards": -0.15652617812156677, - "step": 2740 - }, - { - "epoch": 0.73, - "learning_rate": 1.0331606293247144e-07, - "logits/chosen": -2.4398903846740723, - "logits/rejected": -2.262406349182129, - "logps/chosen": -221.0254364013672, - "logps/rejected": -168.57070922851562, - "loss": 1373.3396, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.18362286686897278, - "rewards/margins": 0.10515341907739639, - "rewards/rejected": -0.28877630829811096, - "rewards/safe_rewards": -0.2037414014339447, - "rewards/unsafe_rewards": -0.16350433230400085, - "step": 2750 - }, - { - "epoch": 0.73, - "learning_rate": 1.0144738536358702e-07, - "logits/chosen": -2.333991527557373, - "logits/rejected": -2.3055646419525146, - "logps/chosen": -212.1750030517578, - "logps/rejected": -200.6303253173828, - "loss": 1342.8592, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.1498975157737732, - "rewards/margins": 0.0784512534737587, - "rewards/rejected": -0.2283487766981125, - "rewards/safe_rewards": -0.1601577252149582, - "rewards/unsafe_rewards": -0.1396373212337494, - "step": 2760 - }, - { - "epoch": 0.73, - "learning_rate": 9.959145060605725e-08, - "logits/chosen": -2.4126687049865723, - "logits/rejected": -2.3219704627990723, - "logps/chosen": -194.3618621826172, - "logps/rejected": -182.04273986816406, - "loss": 1147.2762, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.1351696103811264, - "rewards/margins": 0.08169806748628616, - "rewards/rejected": -0.21686765551567078, - "rewards/safe_rewards": -0.13592712581157684, - "rewards/unsafe_rewards": -0.13441209495067596, - "step": 2770 - }, - { - "epoch": 0.74, - "learning_rate": 9.774841786156674e-08, - "logits/chosen": -2.469726085662842, - "logits/rejected": -2.2745542526245117, - "logps/chosen": -212.5373077392578, - "logps/rejected": -205.0717315673828, - "loss": 1448.8548, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.15598392486572266, - "rewards/margins": 0.1351768970489502, - "rewards/rejected": -0.29116082191467285, - "rewards/safe_rewards": -0.1800118386745453, - "rewards/unsafe_rewards": -0.13195599615573883, - "step": 2780 - }, - { - "epoch": 0.74, - "learning_rate": 9.591844522506817e-08, - "logits/chosen": -2.4468955993652344, - "logits/rejected": -2.371422052383423, - "logps/chosen": -209.7718963623047, - "logps/rejected": -176.19900512695312, - "loss": 1366.3584, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.17508932948112488, - "rewards/margins": 0.09766259789466858, - "rewards/rejected": -0.27275195717811584, - "rewards/safe_rewards": -0.20015151798725128, - "rewards/unsafe_rewards": -0.15002720057964325, - "step": 2790 - }, - { - "epoch": 0.74, - "learning_rate": 9.410168967122103e-08, - "logits/chosen": -2.4192795753479004, - "logits/rejected": -2.3287782669067383, - "logps/chosen": -205.4970703125, - "logps/rejected": -180.14083862304688, - "loss": 1445.3749, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.14298854768276215, - "rewards/margins": 0.0911317765712738, - "rewards/rejected": -0.23412032425403595, - "rewards/safe_rewards": -0.13888195157051086, - "rewards/unsafe_rewards": -0.14709511399269104, - "step": 2800 - }, - { - "epoch": 0.75, - "learning_rate": 9.22983070409262e-08, - "logits/chosen": -2.3851230144500732, - "logits/rejected": -2.273139476776123, - "logps/chosen": -224.5166473388672, - "logps/rejected": -182.35377502441406, - "loss": 1283.4182, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.1512645184993744, - "rewards/margins": 0.06428150087594986, - "rewards/rejected": -0.21554601192474365, - "rewards/safe_rewards": -0.1517799198627472, - "rewards/unsafe_rewards": -0.1507490873336792, - "step": 2810 - }, - { - "epoch": 0.75, - "learning_rate": 9.05084520279582e-08, - "logits/chosen": -2.4455227851867676, - "logits/rejected": -2.3816440105438232, - "logps/chosen": -232.181396484375, - "logps/rejected": -190.38851928710938, - "loss": 1510.3462, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.15054091811180115, - "rewards/margins": 0.06891327351331711, - "rewards/rejected": -0.21945419907569885, - "rewards/safe_rewards": -0.13940033316612244, - "rewards/unsafe_rewards": -0.16168153285980225, - "step": 2820 - }, - { - "epoch": 0.75, - "learning_rate": 8.87322781656952e-08, - "logits/chosen": -2.5001027584075928, - "logits/rejected": -2.4006972312927246, - "logps/chosen": -232.4515838623047, - "logps/rejected": -194.35205078125, - "loss": 1262.9369, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1305830180644989, - "rewards/margins": 0.09128337353467941, - "rewards/rejected": -0.2218663990497589, - "rewards/safe_rewards": -0.1377422958612442, - "rewards/unsafe_rewards": -0.1234237402677536, - "step": 2830 - }, - { - "epoch": 0.75, - "learning_rate": 8.696993781394938e-08, - "logits/chosen": -2.437107563018799, - "logits/rejected": -2.332440137863159, - "logps/chosen": -193.6494598388672, - "logps/rejected": -200.15139770507812, - "loss": 1521.0893, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.16311237215995789, - "rewards/margins": 0.07338700443506241, - "rewards/rejected": -0.2364993840456009, - "rewards/safe_rewards": -0.17912645637989044, - "rewards/unsafe_rewards": -0.14709830284118652, - "step": 2840 - }, - { - "epoch": 0.76, - "learning_rate": 8.522158214589744e-08, - "logits/chosen": -2.349147319793701, - "logits/rejected": -2.2065956592559814, - "logps/chosen": -216.84085083007812, - "logps/rejected": -218.7882843017578, - "loss": 1378.4679, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.17699700593948364, - "rewards/margins": 0.10931912809610367, - "rewards/rejected": -0.2863161563873291, - "rewards/safe_rewards": -0.1544623225927353, - "rewards/unsafe_rewards": -0.199531689286232, - "step": 2850 - }, - { - "epoch": 0.76, - "learning_rate": 8.348736113511265e-08, - "logits/chosen": -2.455056667327881, - "logits/rejected": -2.3617424964904785, - "logps/chosen": -193.42210388183594, - "logps/rejected": -190.78219604492188, - "loss": 1231.9676, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1572413146495819, - "rewards/margins": 0.10474126040935516, - "rewards/rejected": -0.26198258996009827, - "rewards/safe_rewards": -0.13097065687179565, - "rewards/unsafe_rewards": -0.18351195752620697, - "step": 2860 - }, - { - "epoch": 0.76, - "learning_rate": 8.17674235427006e-08, - "logits/chosen": -2.3932228088378906, - "logits/rejected": -2.2564892768859863, - "logps/chosen": -226.3605499267578, - "logps/rejected": -201.9912872314453, - "loss": 1432.8261, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.18014100193977356, - "rewards/margins": 0.06338997185230255, - "rewards/rejected": -0.2435309886932373, - "rewards/safe_rewards": -0.1822229027748108, - "rewards/unsafe_rewards": -0.1780591458082199, - "step": 2870 - }, - { - "epoch": 0.76, - "learning_rate": 8.006191690453839e-08, - "logits/chosen": -2.4417896270751953, - "logits/rejected": -2.2340636253356934, - "logps/chosen": -206.8270721435547, - "logps/rejected": -164.54788208007812, - "loss": 1136.0954, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.18152788281440735, - "rewards/margins": 0.07710777223110199, - "rewards/rejected": -0.25863566994667053, - "rewards/safe_rewards": -0.16479837894439697, - "rewards/unsafe_rewards": -0.1982574164867401, - "step": 2880 - }, - { - "epoch": 0.77, - "learning_rate": 7.837098751861882e-08, - "logits/chosen": -2.4111363887786865, - "logits/rejected": -2.3041722774505615, - "logps/chosen": -228.0347442626953, - "logps/rejected": -212.8562469482422, - "loss": 1474.6887, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.15939395129680634, - "rewards/margins": 0.05863727256655693, - "rewards/rejected": -0.21803121268749237, - "rewards/safe_rewards": -0.15683451294898987, - "rewards/unsafe_rewards": -0.161953404545784, - "step": 2890 - }, - { - "epoch": 0.77, - "learning_rate": 7.669478043250116e-08, - "logits/chosen": -2.5126185417175293, - "logits/rejected": -2.385205030441284, - "logps/chosen": -216.42385864257812, - "logps/rejected": -213.50888061523438, - "loss": 1395.615, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.15492045879364014, - "rewards/margins": 0.11109232902526855, - "rewards/rejected": -0.2660127878189087, - "rewards/safe_rewards": -0.16570965945720673, - "rewards/unsafe_rewards": -0.14413125813007355, - "step": 2900 - }, - { - "epoch": 0.77, - "learning_rate": 7.503343943086901e-08, - "logits/chosen": -2.45739483833313, - "logits/rejected": -2.343709707260132, - "logps/chosen": -259.11566162109375, - "logps/rejected": -213.95068359375, - "loss": 1425.7808, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.1624491959810257, - "rewards/margins": 0.07001639902591705, - "rewards/rejected": -0.23246559500694275, - "rewards/safe_rewards": -0.18054285645484924, - "rewards/unsafe_rewards": -0.14435555040836334, - "step": 2910 - }, - { - "epoch": 0.77, - "learning_rate": 7.338710702319639e-08, - "logits/chosen": -2.502403974533081, - "logits/rejected": -2.39176607131958, - "logps/chosen": -246.1416473388672, - "logps/rejected": -202.51943969726562, - "loss": 1142.2257, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.14090880751609802, - "rewards/margins": 0.08934492617845535, - "rewards/rejected": -0.2302536964416504, - "rewards/safe_rewards": -0.14837698638439178, - "rewards/unsafe_rewards": -0.13344059884548187, - "step": 2920 - }, - { - "epoch": 0.78, - "learning_rate": 7.175592443152342e-08, - "logits/chosen": -2.5229859352111816, - "logits/rejected": -2.4320898056030273, - "logps/chosen": -207.9595184326172, - "logps/rejected": -207.3093719482422, - "loss": 1448.7569, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": -0.12007516622543335, - "rewards/margins": 0.10768391191959381, - "rewards/rejected": -0.22775907814502716, - "rewards/safe_rewards": -0.12211060523986816, - "rewards/unsafe_rewards": -0.11803972721099854, - "step": 2930 - }, - { - "epoch": 0.78, - "learning_rate": 7.014003157834228e-08, - "logits/chosen": -2.4394707679748535, - "logits/rejected": -2.316734790802002, - "logps/chosen": -191.43057250976562, - "logps/rejected": -164.4339141845703, - "loss": 1373.3023, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.13677676022052765, - "rewards/margins": 0.10454187542200089, - "rewards/rejected": -0.24131861329078674, - "rewards/safe_rewards": -0.14201681315898895, - "rewards/unsafe_rewards": -0.13153666257858276, - "step": 2940 - }, - { - "epoch": 0.78, - "learning_rate": 6.85395670745944e-08, - "logits/chosen": -2.470555543899536, - "logits/rejected": -2.3573083877563477, - "logps/chosen": -242.80484008789062, - "logps/rejected": -217.9771728515625, - "loss": 1330.0771, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.15035180747509003, - "rewards/margins": 0.08213526010513306, - "rewards/rejected": -0.2324870526790619, - "rewards/safe_rewards": -0.12931732833385468, - "rewards/unsafe_rewards": -0.17138628661632538, - "step": 2950 - }, - { - "epoch": 0.79, - "learning_rate": 6.695466820778109e-08, - "logits/chosen": -2.3521533012390137, - "logits/rejected": -2.2585196495056152, - "logps/chosen": -240.77523803710938, - "logps/rejected": -238.1952667236328, - "loss": 1207.3693, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.15088975429534912, - "rewards/margins": 0.09504391998052597, - "rewards/rejected": -0.2459336817264557, - "rewards/safe_rewards": -0.11259335279464722, - "rewards/unsafe_rewards": -0.18918615579605103, - "step": 2960 - }, - { - "epoch": 0.79, - "learning_rate": 6.538547093018642e-08, - "logits/chosen": -2.4960103034973145, - "logits/rejected": -2.310760736465454, - "logps/chosen": -238.7340545654297, - "logps/rejected": -199.38058471679688, - "loss": 1369.2449, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1267164647579193, - "rewards/margins": 0.09891035407781601, - "rewards/rejected": -0.22562682628631592, - "rewards/safe_rewards": -0.08501541614532471, - "rewards/unsafe_rewards": -0.16841748356819153, - "step": 2970 - }, - { - "epoch": 0.79, - "learning_rate": 6.383210984721565e-08, - "logits/chosen": -2.464294672012329, - "logits/rejected": -2.2916579246520996, - "logps/chosen": -186.31362915039062, - "logps/rejected": -167.91604614257812, - "loss": 1315.7424, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.1632445752620697, - "rewards/margins": 0.09135641157627106, - "rewards/rejected": -0.25460100173950195, - "rewards/safe_rewards": -0.16177797317504883, - "rewards/unsafe_rewards": -0.16471122205257416, - "step": 2980 - }, - { - "epoch": 0.79, - "learning_rate": 6.229471820584858e-08, - "logits/chosen": -2.5637383460998535, - "logits/rejected": -2.4811198711395264, - "logps/chosen": -208.72158813476562, - "logps/rejected": -192.3394317626953, - "loss": 1554.5842, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.1505751609802246, - "rewards/margins": 0.06785817444324493, - "rewards/rejected": -0.21843330562114716, - "rewards/safe_rewards": -0.13245508074760437, - "rewards/unsafe_rewards": -0.16869522631168365, - "step": 2990 - }, - { - "epoch": 0.8, - "learning_rate": 6.077342788320983e-08, - "logits/chosen": -2.4934017658233643, - "logits/rejected": -2.3998332023620605, - "logps/chosen": -241.533447265625, - "logps/rejected": -224.54196166992188, - "loss": 1365.8523, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.14114201068878174, - "rewards/margins": 0.0711778774857521, - "rewards/rejected": -0.21231989562511444, - "rewards/safe_rewards": -0.133954256772995, - "rewards/unsafe_rewards": -0.14832976460456848, - "step": 3000 - }, - { - "epoch": 0.8, - "eval_logits/chosen": -2.2556862831115723, - "eval_logits/rejected": -2.0925025939941406, - "eval_logps/chosen": -161.99745178222656, - "eval_logps/rejected": -128.58595275878906, - "eval_loss": 980.28076171875, - "eval_rewards/accuracies": 0.6565921306610107, - "eval_rewards/chosen": -0.21647901833057404, - "eval_rewards/margins": 0.048050109297037125, - "eval_rewards/rejected": -0.26452910900115967, - "eval_rewards/safe_rewards": -0.21315838396549225, - "eval_rewards/unsafe_rewards": -0.21106599271297455, - "eval_runtime": 2349.3779, - "eval_samples_per_second": 14.916, - "eval_steps_per_second": 0.467, - "step": 3000 - }, - { - "epoch": 0.8, - "learning_rate": 5.926836937525614e-08, - "logits/chosen": -2.451995372772217, - "logits/rejected": -2.3129286766052246, - "logps/chosen": -224.66635131835938, - "logps/rejected": -190.92153930664062, - "loss": 1380.1385, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.1415611207485199, - "rewards/margins": 0.11827559769153595, - "rewards/rejected": -0.25983673334121704, - "rewards/safe_rewards": -0.12462811172008514, - "rewards/unsafe_rewards": -0.15849414467811584, - "step": 3010 - }, - { - "epoch": 0.8, - "learning_rate": 5.777967178558299e-08, - "logits/chosen": -2.3881709575653076, - "logits/rejected": -2.3278565406799316, - "logps/chosen": -184.0283203125, - "logps/rejected": -188.0187530517578, - "loss": 1510.0844, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.18465296924114227, - "rewards/margins": 0.05129767209291458, - "rewards/rejected": -0.23595066368579865, - "rewards/safe_rewards": -0.18172645568847656, - "rewards/unsafe_rewards": -0.18757948279380798, - "step": 3020 - }, - { - "epoch": 0.8, - "learning_rate": 5.630746281434956e-08, - "logits/chosen": -2.4901344776153564, - "logits/rejected": -2.2834839820861816, - "logps/chosen": -249.99526977539062, - "logps/rejected": -155.13876342773438, - "loss": 1337.4982, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.15860740840435028, - "rewards/margins": 0.08081382513046265, - "rewards/rejected": -0.23942121863365173, - "rewards/safe_rewards": -0.15060675144195557, - "rewards/unsafe_rewards": -0.166608065366745, - "step": 3030 - }, - { - "epoch": 0.81, - "learning_rate": 5.485186874732503e-08, - "logits/chosen": -2.390209674835205, - "logits/rejected": -2.3439838886260986, - "logps/chosen": -193.0906982421875, - "logps/rejected": -198.14846801757812, - "loss": 1389.8686, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.13740174472332, - "rewards/margins": 0.12461180984973907, - "rewards/rejected": -0.26201358437538147, - "rewards/safe_rewards": -0.12184695899486542, - "rewards/unsafe_rewards": -0.1529565304517746, - "step": 3040 - }, - { - "epoch": 0.81, - "learning_rate": 5.341301444505566e-08, - "logits/chosen": -2.489290952682495, - "logits/rejected": -2.4091949462890625, - "logps/chosen": -251.19100952148438, - "logps/rejected": -237.19985961914062, - "loss": 1320.9967, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.15607410669326782, - "rewards/margins": 0.100197933614254, - "rewards/rejected": -0.2562720477581024, - "rewards/safe_rewards": -0.1376083940267563, - "rewards/unsafe_rewards": -0.17453980445861816, - "step": 3050 - }, - { - "epoch": 0.81, - "learning_rate": 5.1991023332154107e-08, - "logits/chosen": -2.4318363666534424, - "logits/rejected": -2.317143440246582, - "logps/chosen": -208.0008544921875, - "logps/rejected": -182.1240234375, - "loss": 1399.8946, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.15874066948890686, - "rewards/margins": 0.08130695670843124, - "rewards/rejected": -0.2400476485490799, - "rewards/safe_rewards": -0.17512984573841095, - "rewards/unsafe_rewards": -0.14235153794288635, - "step": 3060 - }, - { - "epoch": 0.81, - "learning_rate": 5.058601738671248e-08, - "logits/chosen": -2.437701463699341, - "logits/rejected": -2.381761074066162, - "logps/chosen": -240.52670288085938, - "logps/rejected": -244.4202117919922, - "loss": 1411.272, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.1465824395418167, - "rewards/margins": 0.09980867803096771, - "rewards/rejected": -0.24639113247394562, - "rewards/safe_rewards": -0.12034926563501358, - "rewards/unsafe_rewards": -0.17281560599803925, - "step": 3070 - }, - { - "epoch": 0.82, - "learning_rate": 4.919811712983879e-08, - "logits/chosen": -2.5081167221069336, - "logits/rejected": -2.352241039276123, - "logps/chosen": -230.8387908935547, - "logps/rejected": -167.4381866455078, - "loss": 1321.1836, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.18383675813674927, - "rewards/margins": 0.08120562881231308, - "rewards/rejected": -0.26504239439964294, - "rewards/safe_rewards": -0.1794658750295639, - "rewards/unsafe_rewards": -0.18820765614509583, - "step": 3080 - }, - { - "epoch": 0.82, - "learning_rate": 4.782744161531877e-08, - "logits/chosen": -2.412121534347534, - "logits/rejected": -2.329601764678955, - "logps/chosen": -194.43106079101562, - "logps/rejected": -175.35232543945312, - "loss": 1210.5296, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.19490277767181396, - "rewards/margins": 0.08997827023267746, - "rewards/rejected": -0.284881055355072, - "rewards/safe_rewards": -0.16397520899772644, - "rewards/unsafe_rewards": -0.22583036124706268, - "step": 3090 - }, - { - "epoch": 0.82, - "learning_rate": 4.647410841940347e-08, - "logits/chosen": -2.4340522289276123, - "logits/rejected": -2.361701250076294, - "logps/chosen": -233.0377960205078, - "logps/rejected": -214.0589141845703, - "loss": 1340.0141, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.14663763344287872, - "rewards/margins": 0.11184868961572647, - "rewards/rejected": -0.2584863305091858, - "rewards/safe_rewards": -0.16188395023345947, - "rewards/unsafe_rewards": -0.13139131665229797, - "step": 3100 - }, - { - "epoch": 0.83, - "learning_rate": 4.5138233630723525e-08, - "logits/chosen": -2.393258571624756, - "logits/rejected": -2.3822903633117676, - "logps/chosen": -206.7146759033203, - "logps/rejected": -248.88919067382812, - "loss": 1343.6238, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.15200984477996826, - "rewards/margins": 0.09580516815185547, - "rewards/rejected": -0.24781498312950134, - "rewards/safe_rewards": -0.1634071171283722, - "rewards/unsafe_rewards": -0.14061257243156433, - "step": 3110 - }, - { - "epoch": 0.83, - "learning_rate": 4.3819931840331195e-08, - "logits/chosen": -2.415590524673462, - "logits/rejected": -2.2940897941589355, - "logps/chosen": -227.73519897460938, - "logps/rejected": -182.935302734375, - "loss": 1285.1371, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.1379997283220291, - "rewards/margins": 0.11923094093799591, - "rewards/rejected": -0.25723063945770264, - "rewards/safe_rewards": -0.15660221874713898, - "rewards/unsafe_rewards": -0.11939723789691925, - "step": 3120 - }, - { - "epoch": 0.83, - "learning_rate": 4.2519316131870735e-08, - "logits/chosen": -2.4957242012023926, - "logits/rejected": -2.341954231262207, - "logps/chosen": -229.7425079345703, - "logps/rejected": -212.257080078125, - "loss": 1288.0851, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.1558004915714264, - "rewards/margins": 0.08406776189804077, - "rewards/rejected": -0.23986823856830597, - "rewards/safe_rewards": -0.13923819363117218, - "rewards/unsafe_rewards": -0.1723627895116806, - "step": 3130 - }, - { - "epoch": 0.83, - "learning_rate": 4.12364980718781e-08, - "logits/chosen": -2.4079928398132324, - "logits/rejected": -2.300352096557617, - "logps/chosen": -225.2753448486328, - "logps/rejected": -226.75888061523438, - "loss": 1197.7185, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.1341785490512848, - "rewards/margins": 0.12418261915445328, - "rewards/rejected": -0.25836116075515747, - "rewards/safe_rewards": -0.14844010770320892, - "rewards/unsafe_rewards": -0.11991697549819946, - "step": 3140 - }, - { - "epoch": 0.84, - "learning_rate": 3.997158770021067e-08, - "logits/chosen": -2.410720109939575, - "logits/rejected": -2.2520899772644043, - "logps/chosen": -219.7996826171875, - "logps/rejected": -191.2012939453125, - "loss": 1384.5195, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.14747990667819977, - "rewards/margins": 0.11562303453683853, - "rewards/rejected": -0.2631029188632965, - "rewards/safe_rewards": -0.17096179723739624, - "rewards/unsafe_rewards": -0.12399798631668091, - "step": 3150 - }, - { - "epoch": 0.84, - "learning_rate": 3.872469352060828e-08, - "logits/chosen": -2.410459041595459, - "logits/rejected": -2.2988898754119873, - "logps/chosen": -232.65878295898438, - "logps/rejected": -211.06570434570312, - "loss": 1485.1102, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.15507188439369202, - "rewards/margins": 0.08375488966703415, - "rewards/rejected": -0.23882675170898438, - "rewards/safe_rewards": -0.15433767437934875, - "rewards/unsafe_rewards": -0.1558060646057129, - "step": 3160 - }, - { - "epoch": 0.84, - "learning_rate": 3.749592249138575e-08, - "logits/chosen": -2.448688268661499, - "logits/rejected": -2.358865261077881, - "logps/chosen": -206.31863403320312, - "logps/rejected": -176.6393280029297, - "loss": 1507.397, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.14200535416603088, - "rewards/margins": 0.09720391780138016, - "rewards/rejected": -0.23920929431915283, - "rewards/safe_rewards": -0.12844650447368622, - "rewards/unsafe_rewards": -0.15556421875953674, - "step": 3170 - }, - { - "epoch": 0.84, - "learning_rate": 3.6285380016257724e-08, - "logits/chosen": -2.5111165046691895, - "logits/rejected": -2.3816025257110596, - "logps/chosen": -195.72584533691406, - "logps/rejected": -179.56805419921875, - "loss": 1197.7672, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.1556689590215683, - "rewards/margins": 0.08882570266723633, - "rewards/rejected": -0.24449467658996582, - "rewards/safe_rewards": -0.154417484998703, - "rewards/unsafe_rewards": -0.1569204330444336, - "step": 3180 - }, - { - "epoch": 0.85, - "learning_rate": 3.50931699352976e-08, - "logits/chosen": -2.4040238857269287, - "logits/rejected": -2.2769551277160645, - "logps/chosen": -214.8437042236328, - "logps/rejected": -185.93856811523438, - "loss": 1308.7559, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.17890039086341858, - "rewards/margins": 0.10006201267242432, - "rewards/rejected": -0.2789623737335205, - "rewards/safe_rewards": -0.17131975293159485, - "rewards/unsafe_rewards": -0.1864810287952423, - "step": 3190 - }, - { - "epoch": 0.85, - "learning_rate": 3.3919394516029876e-08, - "logits/chosen": -2.413229465484619, - "logits/rejected": -2.3423690795898438, - "logps/chosen": -196.32373046875, - "logps/rejected": -183.0098419189453, - "loss": 1423.8119, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.13929566740989685, - "rewards/margins": 0.08123189210891724, - "rewards/rejected": -0.22052757441997528, - "rewards/safe_rewards": -0.16768403351306915, - "rewards/unsafe_rewards": -0.11090729385614395, - "step": 3200 - }, - { - "epoch": 0.85, - "learning_rate": 3.276415444465772e-08, - "logits/chosen": -2.495835065841675, - "logits/rejected": -2.4049930572509766, - "logps/chosen": -208.24893188476562, - "logps/rejected": -191.79994201660156, - "loss": 1386.1934, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.15787570178508759, - "rewards/margins": 0.07763205468654633, - "rewards/rejected": -0.2355077564716339, - "rewards/safe_rewards": -0.1333656758069992, - "rewards/unsafe_rewards": -0.18238575756549835, - "step": 3210 - }, - { - "epoch": 0.85, - "learning_rate": 3.1627548817426186e-08, - "logits/chosen": -2.468461751937866, - "logits/rejected": -2.272341251373291, - "logps/chosen": -231.1334686279297, - "logps/rejected": -194.1231231689453, - "loss": 1477.6265, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.1542334407567978, - "rewards/margins": 0.09861287474632263, - "rewards/rejected": -0.2528463304042816, - "rewards/safe_rewards": -0.15952548384666443, - "rewards/unsafe_rewards": -0.14894141256809235, - "step": 3220 - }, - { - "epoch": 0.86, - "learning_rate": 3.050967513212166e-08, - "logits/chosen": -2.4564671516418457, - "logits/rejected": -2.3349785804748535, - "logps/chosen": -218.11080932617188, - "logps/rejected": -182.4496307373047, - "loss": 1431.5375, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.15404048562049866, - "rewards/margins": 0.10540225356817245, - "rewards/rejected": -0.2594427466392517, - "rewards/safe_rewards": -0.14628000557422638, - "rewards/unsafe_rewards": -0.16180095076560974, - "step": 3230 - }, - { - "epoch": 0.86, - "learning_rate": 2.9410629279708608e-08, - "logits/chosen": -2.513301372528076, - "logits/rejected": -2.3546433448791504, - "logps/chosen": -180.7330322265625, - "logps/rejected": -203.46548461914062, - "loss": 1350.4557, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.1651059091091156, - "rewards/margins": 0.10105645656585693, - "rewards/rejected": -0.26616233587265015, - "rewards/safe_rewards": -0.15542066097259521, - "rewards/unsafe_rewards": -0.1747911423444748, - "step": 3240 - }, - { - "epoch": 0.86, - "learning_rate": 2.8330505536104055e-08, - "logits/chosen": -2.462486743927002, - "logits/rejected": -2.3805766105651855, - "logps/chosen": -204.26365661621094, - "logps/rejected": -197.9345245361328, - "loss": 1411.0915, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.22317290306091309, - "rewards/margins": 0.04581516236066818, - "rewards/rejected": -0.2689880430698395, - "rewards/safe_rewards": -0.21146826446056366, - "rewards/unsafe_rewards": -0.23487751185894012, - "step": 3250 - }, - { - "epoch": 0.86, - "learning_rate": 2.7269396554090525e-08, - "logits/chosen": -2.5127105712890625, - "logits/rejected": -2.3288822174072266, - "logps/chosen": -231.49972534179688, - "logps/rejected": -192.39596557617188, - "loss": 1408.9844, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.19172994792461395, - "rewards/margins": 0.04586008936166763, - "rewards/rejected": -0.237590029835701, - "rewards/safe_rewards": -0.17916744947433472, - "rewards/unsafe_rewards": -0.2042924463748932, - "step": 3260 - }, - { - "epoch": 0.87, - "learning_rate": 2.6227393355368445e-08, - "logits/chosen": -2.414293050765991, - "logits/rejected": -2.3348193168640137, - "logps/chosen": -242.31307983398438, - "logps/rejected": -246.9089813232422, - "loss": 1304.8733, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.1281508058309555, - "rewards/margins": 0.1268887221813202, - "rewards/rejected": -0.2550395429134369, - "rewards/safe_rewards": -0.12770487368106842, - "rewards/unsafe_rewards": -0.12859676778316498, - "step": 3270 - }, - { - "epoch": 0.87, - "learning_rate": 2.5204585322748246e-08, - "logits/chosen": -2.531818389892578, - "logits/rejected": -2.4052319526672363, - "logps/chosen": -252.3133087158203, - "logps/rejected": -233.08560180664062, - "loss": 1354.6002, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.1706986427307129, - "rewards/margins": 0.0687142163515091, - "rewards/rejected": -0.2394128292798996, - "rewards/safe_rewards": -0.1839136779308319, - "rewards/unsafe_rewards": -0.15748362243175507, - "step": 3280 - }, - { - "epoch": 0.87, - "learning_rate": 2.4201060192483107e-08, - "logits/chosen": -2.3884761333465576, - "logits/rejected": -2.160569667816162, - "logps/chosen": -286.6214904785156, - "logps/rejected": -196.3474884033203, - "loss": 1377.1116, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.19051751494407654, - "rewards/margins": 0.10852286964654922, - "rewards/rejected": -0.29904037714004517, - "rewards/safe_rewards": -0.21324869990348816, - "rewards/unsafe_rewards": -0.16778631508350372, - "step": 3290 - }, - { - "epoch": 0.88, - "learning_rate": 2.3216904046743035e-08, - "logits/chosen": -2.4478347301483154, - "logits/rejected": -2.352735996246338, - "logps/chosen": -206.50216674804688, - "logps/rejected": -192.98875427246094, - "loss": 1461.5512, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.19327738881111145, - "rewards/margins": 0.07878383249044418, - "rewards/rejected": -0.27206119894981384, - "rewards/safe_rewards": -0.2037767916917801, - "rewards/unsafe_rewards": -0.18277797102928162, - "step": 3300 - }, - { - "epoch": 0.88, - "learning_rate": 2.2252201306230704e-08, - "logits/chosen": -2.430565118789673, - "logits/rejected": -2.2724251747131348, - "logps/chosen": -244.2122802734375, - "logps/rejected": -199.22972106933594, - "loss": 1347.5625, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.15452131628990173, - "rewards/margins": 0.14309212565422058, - "rewards/rejected": -0.2976134717464447, - "rewards/safe_rewards": -0.11681215465068817, - "rewards/unsafe_rewards": -0.1922304928302765, - "step": 3310 - }, - { - "epoch": 0.88, - "learning_rate": 2.130703472293982e-08, - "logits/chosen": -2.5068135261535645, - "logits/rejected": -2.3027968406677246, - "logps/chosen": -258.24615478515625, - "logps/rejected": -212.1205596923828, - "loss": 1343.9468, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.14981429278850555, - "rewards/margins": 0.128998264670372, - "rewards/rejected": -0.27881258726119995, - "rewards/safe_rewards": -0.1735188364982605, - "rewards/unsafe_rewards": -0.1261097639799118, - "step": 3320 - }, - { - "epoch": 0.88, - "learning_rate": 2.0381485373056757e-08, - "logits/chosen": -2.446154832839966, - "logits/rejected": -2.386603593826294, - "logps/chosen": -199.39512634277344, - "logps/rejected": -186.36338806152344, - "loss": 1441.9888, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.17589536309242249, - "rewards/margins": 0.08219923824071884, - "rewards/rejected": -0.2580946087837219, - "rewards/safe_rewards": -0.15976838767528534, - "rewards/unsafe_rewards": -0.19202235341072083, - "step": 3330 - }, - { - "epoch": 0.89, - "learning_rate": 1.9475632650005848e-08, - "logits/chosen": -2.4040284156799316, - "logits/rejected": -2.2748074531555176, - "logps/chosen": -238.0712890625, - "logps/rejected": -232.64834594726562, - "loss": 1156.846, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.157466858625412, - "rewards/margins": 0.12707573175430298, - "rewards/rejected": -0.2845425605773926, - "rewards/safe_rewards": -0.17175695300102234, - "rewards/unsafe_rewards": -0.14317676424980164, - "step": 3340 - }, - { - "epoch": 0.89, - "learning_rate": 1.8589554257638918e-08, - "logits/chosen": -2.4485044479370117, - "logits/rejected": -2.287540912628174, - "logps/chosen": -233.2196044921875, - "logps/rejected": -193.11607360839844, - "loss": 1214.5244, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.16832873225212097, - "rewards/margins": 0.12234596908092499, - "rewards/rejected": -0.29067474603652954, - "rewards/safe_rewards": -0.16399753093719482, - "rewards/unsafe_rewards": -0.1726599633693695, - "step": 3350 - }, - { - "epoch": 0.89, - "learning_rate": 1.772332620357009e-08, - "logits/chosen": -2.4108593463897705, - "logits/rejected": -2.3884873390197754, - "logps/chosen": -243.2872772216797, - "logps/rejected": -218.4895477294922, - "loss": 1331.0887, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.15621154010295868, - "rewards/margins": 0.07616924494504929, - "rewards/rejected": -0.23238077759742737, - "rewards/safe_rewards": -0.16866862773895264, - "rewards/unsafe_rewards": -0.14375443756580353, - "step": 3360 - }, - { - "epoch": 0.89, - "learning_rate": 1.687702279265557e-08, - "logits/chosen": -2.461092233657837, - "logits/rejected": -2.313662052154541, - "logps/chosen": -202.67105102539062, - "logps/rejected": -184.63352966308594, - "loss": 1377.7719, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.19350698590278625, - "rewards/margins": 0.08659729361534119, - "rewards/rejected": -0.28010427951812744, - "rewards/safe_rewards": -0.19646957516670227, - "rewards/unsafe_rewards": -0.19054444134235382, - "step": 3370 - }, - { - "epoch": 0.9, - "learning_rate": 1.6050716620620102e-08, - "logits/chosen": -2.4286372661590576, - "logits/rejected": -2.2866196632385254, - "logps/chosen": -222.2981719970703, - "logps/rejected": -187.40615844726562, - "loss": 1415.531, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.15618988871574402, - "rewards/margins": 0.08528248965740204, - "rewards/rejected": -0.24147239327430725, - "rewards/safe_rewards": -0.15167191624641418, - "rewards/unsafe_rewards": -0.16070786118507385, - "step": 3380 - }, - { - "epoch": 0.9, - "learning_rate": 1.524447856782951e-08, - "logits/chosen": -2.4636127948760986, - "logits/rejected": -2.2639200687408447, - "logps/chosen": -196.055908203125, - "logps/rejected": -165.3712921142578, - "loss": 1450.9697, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.17779284715652466, - "rewards/margins": 0.07517805695533752, - "rewards/rejected": -0.2529709041118622, - "rewards/safe_rewards": -0.18910861015319824, - "rewards/unsafe_rewards": -0.16647711396217346, - "step": 3390 - }, - { - "epoch": 0.9, - "learning_rate": 1.4458377793210686e-08, - "logits/chosen": -2.4318699836730957, - "logits/rejected": -2.345082998275757, - "logps/chosen": -236.27169799804688, - "logps/rejected": -195.14306640625, - "loss": 1429.526, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.152841255068779, - "rewards/margins": 0.10460162162780762, - "rewards/rejected": -0.2574428915977478, - "rewards/safe_rewards": -0.13186484575271606, - "rewards/unsafe_rewards": -0.1738176792860031, - "step": 3400 - }, - { - "epoch": 0.9, - "learning_rate": 1.3692481728319133e-08, - "logits/chosen": -2.470095157623291, - "logits/rejected": -2.3147940635681152, - "logps/chosen": -195.31842041015625, - "logps/rejected": -157.31295776367188, - "loss": 1286.7398, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.2032199203968048, - "rewards/margins": 0.08531437069177628, - "rewards/rejected": -0.2885342538356781, - "rewards/safe_rewards": -0.18005777895450592, - "rewards/unsafe_rewards": -0.2263820618391037, - "step": 3410 - }, - { - "epoch": 0.91, - "learning_rate": 1.2946856071554629e-08, - "logits/chosen": -2.393263578414917, - "logits/rejected": -2.3455214500427246, - "logps/chosen": -193.7395477294922, - "logps/rejected": -204.05914306640625, - "loss": 1432.0641, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.15744832158088684, - "rewards/margins": 0.10759691894054413, - "rewards/rejected": -0.2650452256202698, - "rewards/safe_rewards": -0.17261706292629242, - "rewards/unsafe_rewards": -0.14227959513664246, - "step": 3420 - }, - { - "epoch": 0.91, - "learning_rate": 1.2221564782525812e-08, - "logits/chosen": -2.439044237136841, - "logits/rejected": -2.3702192306518555, - "logps/chosen": -257.65582275390625, - "logps/rejected": -245.55148315429688, - "loss": 1479.7575, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.15701845288276672, - "rewards/margins": 0.07186228781938553, - "rewards/rejected": -0.22888073325157166, - "rewards/safe_rewards": -0.1657995879650116, - "rewards/unsafe_rewards": -0.14823730289936066, - "step": 3430 - }, - { - "epoch": 0.91, - "learning_rate": 1.1516670076563512e-08, - "logits/chosen": -2.5074591636657715, - "logits/rejected": -2.3347530364990234, - "logps/chosen": -233.8135223388672, - "logps/rejected": -189.03053283691406, - "loss": 1425.1625, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.16578319668769836, - "rewards/margins": 0.078920379281044, - "rewards/rejected": -0.24470360577106476, - "rewards/safe_rewards": -0.13891756534576416, - "rewards/unsafe_rewards": -0.19264887273311615, - "step": 3440 - }, - { - "epoch": 0.92, - "learning_rate": 1.0832232419384162e-08, - "logits/chosen": -2.4321517944335938, - "logits/rejected": -2.3104190826416016, - "logps/chosen": -234.95669555664062, - "logps/rejected": -195.9945831298828, - "loss": 1415.4658, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.14400199055671692, - "rewards/margins": 0.1053481325507164, - "rewards/rejected": -0.24935011565685272, - "rewards/safe_rewards": -0.14105159044265747, - "rewards/unsafe_rewards": -0.14695239067077637, - "step": 3450 - }, - { - "epoch": 0.92, - "learning_rate": 1.016831052190284e-08, - "logits/chosen": -2.4439263343811035, - "logits/rejected": -2.307915687561035, - "logps/chosen": -236.1189727783203, - "logps/rejected": -199.6556396484375, - "loss": 1319.6274, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.18588881194591522, - "rewards/margins": 0.06768302619457245, - "rewards/rejected": -0.2535718083381653, - "rewards/safe_rewards": -0.17641659080982208, - "rewards/unsafe_rewards": -0.19536103308200836, - "step": 3460 - }, - { - "epoch": 0.92, - "learning_rate": 9.524961335197228e-09, - "logits/chosen": -2.388155460357666, - "logits/rejected": -2.3003084659576416, - "logps/chosen": -188.40737915039062, - "logps/rejected": -186.982421875, - "loss": 1422.0898, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.21522195637226105, - "rewards/margins": 0.05756833404302597, - "rewards/rejected": -0.2727903127670288, - "rewards/safe_rewards": -0.20854274928569794, - "rewards/unsafe_rewards": -0.22190114855766296, - "step": 3470 - }, - { - "epoch": 0.92, - "learning_rate": 8.902240045622261e-09, - "logits/chosen": -2.4832816123962402, - "logits/rejected": -2.303886890411377, - "logps/chosen": -234.3775177001953, - "logps/rejected": -217.74874877929688, - "loss": 1320.4361, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.1496528536081314, - "rewards/margins": 0.11930356919765472, - "rewards/rejected": -0.26895642280578613, - "rewards/safe_rewards": -0.16557829082012177, - "rewards/unsafe_rewards": -0.13372741639614105, - "step": 3480 - }, - { - "epoch": 0.93, - "learning_rate": 8.300200070076368e-09, - "logits/chosen": -2.432081460952759, - "logits/rejected": -2.2677602767944336, - "logps/chosen": -218.06222534179688, - "logps/rejected": -191.58758544921875, - "loss": 1292.1002, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.17285116016864777, - "rewards/margins": 0.10961610078811646, - "rewards/rejected": -0.2824672758579254, - "rewards/safe_rewards": -0.18338334560394287, - "rewards/unsafe_rewards": -0.16231897473335266, - "step": 3490 - }, - { - "epoch": 0.93, - "learning_rate": 7.718893051419206e-09, - "logits/chosen": -2.5006446838378906, - "logits/rejected": -2.3779196739196777, - "logps/chosen": -220.433349609375, - "logps/rejected": -207.16708374023438, - "loss": 1242.352, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.1803065687417984, - "rewards/margins": 0.09627944231033325, - "rewards/rejected": -0.27658599615097046, - "rewards/safe_rewards": -0.1697949320077896, - "rewards/unsafe_rewards": -0.1908182054758072, - "step": 3500 - }, - { - "epoch": 0.93, - "eval_logits/chosen": -2.247601270675659, - "eval_logits/rejected": -2.083954095840454, - "eval_logps/chosen": -164.99008178710938, - "eval_logps/rejected": -131.5628204345703, - "eval_loss": 978.79296875, - "eval_rewards/accuracies": 0.6612682342529297, - "eval_rewards/chosen": -0.2464052140712738, - "eval_rewards/margins": 0.047892529517412186, - "eval_rewards/rejected": -0.2942977547645569, - "eval_rewards/safe_rewards": -0.2429383248090744, - "eval_rewards/unsafe_rewards": -0.24054411053657532, - "eval_runtime": 2349.504, - "eval_samples_per_second": 14.915, - "eval_steps_per_second": 0.466, - "step": 3500 - }, - { - "epoch": 0.93, - "learning_rate": 7.158368854041891e-09, - "logits/chosen": -2.4209144115448, - "logits/rejected": -2.2624826431274414, - "logps/chosen": -261.0871276855469, - "logps/rejected": -215.70654296875, - "loss": 1381.3367, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.19456197321414948, - "rewards/margins": 0.06059097498655319, - "rewards/rejected": -0.25515297055244446, - "rewards/safe_rewards": -0.17220795154571533, - "rewards/unsafe_rewards": -0.2169160097837448, - "step": 3510 - }, - { - "epoch": 0.93, - "learning_rate": 6.6186755595896425e-09, - "logits/chosen": -2.412825345993042, - "logits/rejected": -2.423764705657959, - "logps/chosen": -206.2213897705078, - "logps/rejected": -185.2373046875, - "loss": 1626.3111, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.20289865136146545, - "rewards/margins": 0.03895517438650131, - "rewards/rejected": -0.24185380339622498, - "rewards/safe_rewards": -0.18121817708015442, - "rewards/unsafe_rewards": -0.2245790958404541, - "step": 3520 - }, - { - "epoch": 0.94, - "learning_rate": 6.099859462837253e-09, - "logits/chosen": -2.5304067134857178, - "logits/rejected": -2.324051856994629, - "logps/chosen": -204.20230102539062, - "logps/rejected": -180.61900329589844, - "loss": 1189.8152, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.14979711174964905, - "rewards/margins": 0.11737529933452606, - "rewards/rejected": -0.2671724259853363, - "rewards/safe_rewards": -0.15904760360717773, - "rewards/unsafe_rewards": -0.14054660499095917, - "step": 3530 - }, - { - "epoch": 0.94, - "learning_rate": 5.601965067717956e-09, - "logits/chosen": -2.3917431831359863, - "logits/rejected": -2.3045060634613037, - "logps/chosen": -217.7050018310547, - "logps/rejected": -222.43222045898438, - "loss": 1399.3263, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.17963740229606628, - "rewards/margins": 0.08732176572084427, - "rewards/rejected": -0.26695919036865234, - "rewards/safe_rewards": -0.1852760761976242, - "rewards/unsafe_rewards": -0.17399874329566956, - "step": 3540 - }, - { - "epoch": 0.94, - "learning_rate": 5.125035083505952e-09, - "logits/chosen": -2.3796796798706055, - "logits/rejected": -2.2818305492401123, - "logps/chosen": -205.2272186279297, - "logps/rejected": -195.5888214111328, - "loss": 1344.6176, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.14692382514476776, - "rewards/margins": 0.1033494621515274, - "rewards/rejected": -0.25027328729629517, - "rewards/safe_rewards": -0.13294745981693268, - "rewards/unsafe_rewards": -0.16090020537376404, - "step": 3550 - }, - { - "epoch": 0.94, - "learning_rate": 4.669110421152839e-09, - "logits/chosen": -2.4372589588165283, - "logits/rejected": -2.317784070968628, - "logps/chosen": -220.8054656982422, - "logps/rejected": -186.9831085205078, - "loss": 1259.3619, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.15995241701602936, - "rewards/margins": 0.10238592326641083, - "rewards/rejected": -0.2623383402824402, - "rewards/safe_rewards": -0.1529613733291626, - "rewards/unsafe_rewards": -0.16694346070289612, - "step": 3560 - }, - { - "epoch": 0.95, - "learning_rate": 4.234230189778087e-09, - "logits/chosen": -2.349630355834961, - "logits/rejected": -2.253180742263794, - "logps/chosen": -259.094482421875, - "logps/rejected": -235.8646697998047, - "loss": 1270.7289, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.16730989515781403, - "rewards/margins": 0.13173995912075043, - "rewards/rejected": -0.29904982447624207, - "rewards/safe_rewards": -0.16842129826545715, - "rewards/unsafe_rewards": -0.1661984622478485, - "step": 3570 - }, - { - "epoch": 0.95, - "learning_rate": 3.820431693314502e-09, - "logits/chosen": -2.4993271827697754, - "logits/rejected": -2.4081571102142334, - "logps/chosen": -258.88446044921875, - "logps/rejected": -229.48171997070312, - "loss": 1419.102, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.17407792806625366, - "rewards/margins": 0.08947981894016266, - "rewards/rejected": -0.2635577321052551, - "rewards/safe_rewards": -0.17494182288646698, - "rewards/unsafe_rewards": -0.17321403324604034, - "step": 3580 - }, - { - "epoch": 0.95, - "learning_rate": 3.427750427308168e-09, - "logits/chosen": -2.4362523555755615, - "logits/rejected": -2.258880138397217, - "logps/chosen": -189.74847412109375, - "logps/rejected": -164.9255828857422, - "loss": 1346.8125, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.17817720770835876, - "rewards/margins": 0.10320146381855011, - "rewards/rejected": -0.2813786566257477, - "rewards/safe_rewards": -0.17518207430839539, - "rewards/unsafe_rewards": -0.18117234110832214, - "step": 3590 - }, - { - "epoch": 0.96, - "learning_rate": 3.0562200758736943e-09, - "logits/chosen": -2.462249755859375, - "logits/rejected": -2.4308812618255615, - "logps/chosen": -189.85296630859375, - "logps/rejected": -199.70693969726562, - "loss": 1228.7508, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.1690368950366974, - "rewards/margins": 0.0722934901714325, - "rewards/rejected": -0.24133038520812988, - "rewards/safe_rewards": -0.15611490607261658, - "rewards/unsafe_rewards": -0.1819588541984558, - "step": 3600 - }, - { - "epoch": 0.96, - "learning_rate": 2.7058725088047464e-09, - "logits/chosen": -2.471168279647827, - "logits/rejected": -2.3537933826446533, - "logps/chosen": -208.574462890625, - "logps/rejected": -171.46145629882812, - "loss": 1363.9238, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.13693037629127502, - "rewards/margins": 0.09251546114683151, - "rewards/rejected": -0.22944584488868713, - "rewards/safe_rewards": -0.1513444036245346, - "rewards/unsafe_rewards": -0.12251631915569305, - "step": 3610 - }, - { - "epoch": 0.96, - "learning_rate": 2.3767377788403708e-09, - "logits/chosen": -2.454720973968506, - "logits/rejected": -2.297811985015869, - "logps/chosen": -201.7960662841797, - "logps/rejected": -184.08580017089844, - "loss": 1417.3523, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.16122351586818695, - "rewards/margins": 0.129957914352417, - "rewards/rejected": -0.29118138551712036, - "rewards/safe_rewards": -0.16436359286308289, - "rewards/unsafe_rewards": -0.1580834537744522, - "step": 3620 - }, - { - "epoch": 0.96, - "learning_rate": 2.0688441190869487e-09, - "logits/chosen": -2.3349967002868652, - "logits/rejected": -2.281348705291748, - "logps/chosen": -202.84152221679688, - "logps/rejected": -197.86302185058594, - "loss": 1350.7608, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.18241648375988007, - "rewards/margins": 0.09049202501773834, - "rewards/rejected": -0.2729085087776184, - "rewards/safe_rewards": -0.1941876858472824, - "rewards/unsafe_rewards": -0.17064529657363892, - "step": 3630 - }, - { - "epoch": 0.97, - "learning_rate": 1.7822179405964088e-09, - "logits/chosen": -2.488349437713623, - "logits/rejected": -2.334028959274292, - "logps/chosen": -194.80850219726562, - "logps/rejected": -177.6859588623047, - "loss": 1576.401, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.18650823831558228, - "rewards/margins": 0.0688927099108696, - "rewards/rejected": -0.25540095567703247, - "rewards/safe_rewards": -0.15030570328235626, - "rewards/unsafe_rewards": -0.22271080315113068, - "step": 3640 - }, - { - "epoch": 0.97, - "learning_rate": 1.5168838301007625e-09, - "logits/chosen": -2.4326846599578857, - "logits/rejected": -2.2965712547302246, - "logps/chosen": -176.73690795898438, - "logps/rejected": -155.05499267578125, - "loss": 1272.0088, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.1667037308216095, - "rewards/margins": 0.10573437064886093, - "rewards/rejected": -0.272438108921051, - "rewards/safe_rewards": -0.16226044297218323, - "rewards/unsafe_rewards": -0.17114701867103577, - "step": 3650 - }, - { - "epoch": 0.97, - "learning_rate": 1.2728645479029577e-09, - "logits/chosen": -2.4225659370422363, - "logits/rejected": -2.2661726474761963, - "logps/chosen": -238.89779663085938, - "logps/rejected": -221.12057495117188, - "loss": 1440.7905, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.16345509886741638, - "rewards/margins": 0.1080976277589798, - "rewards/rejected": -0.271552711725235, - "rewards/safe_rewards": -0.14329004287719727, - "rewards/unsafe_rewards": -0.18362018465995789, - "step": 3660 - }, { "epoch": 0.97, - "learning_rate": 1.0501810259246069e-09, - "logits/chosen": -2.4249961376190186, - "logits/rejected": -2.2478957176208496, - "logps/chosen": -221.49649047851562, - "logps/rejected": -171.78646850585938, - "loss": 1355.9148, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.16756613552570343, - "rewards/margins": 0.12343313544988632, - "rewards/rejected": -0.29099923372268677, - "rewards/safe_rewards": -0.16715478897094727, - "rewards/unsafe_rewards": -0.1679774820804596, - "step": 3670 - }, - { - "epoch": 0.98, - "learning_rate": 8.488523659103131e-10, - "logits/chosen": -2.3797924518585205, - "logits/rejected": -2.2979321479797363, - "logps/chosen": -228.2520294189453, - "logps/rejected": -206.13528442382812, - "loss": 1395.9512, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.16874518990516663, - "rewards/margins": 0.0921625867486, - "rewards/rejected": -0.26090773940086365, - "rewards/safe_rewards": -0.18567213416099548, - "rewards/unsafe_rewards": -0.15181824564933777, - "step": 3680 - }, - { - "epoch": 0.98, - "learning_rate": 6.688958377893128e-10, - "logits/chosen": -2.474550724029541, - "logits/rejected": -2.317873239517212, - "logps/chosen": -198.04312133789062, - "logps/rejected": -182.05682373046875, - "loss": 1254.8896, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.14315231144428253, - "rewards/margins": 0.12811912596225739, - "rewards/rejected": -0.2712714374065399, - "rewards/safe_rewards": -0.12252838909626007, - "rewards/unsafe_rewards": -0.1637762486934662, - "step": 3690 - }, - { - "epoch": 0.98, - "learning_rate": 5.103268781938841e-10, - "logits/chosen": -2.4605064392089844, - "logits/rejected": -2.351224184036255, - "logps/chosen": -193.74368286132812, - "logps/rejected": -177.18992614746094, - "loss": 1396.3022, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.1977364718914032, - "rewards/margins": 0.06358613818883896, - "rewards/rejected": -0.26132261753082275, - "rewards/safe_rewards": -0.2148222029209137, - "rewards/unsafe_rewards": -0.1806507259607315, - "step": 3700 + "learning_rate": 1.4830757615760247e-09, + "logits/chosen": -1.1881390810012817, + "logits/rejected": -0.671101987361908, + "logps/chosen": -256.49505615234375, + "logps/rejected": -243.5143585205078, + "loss": 11675.8547, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -0.539075493812561, + "rewards/margins": 0.1820039451122284, + "rewards/rejected": -0.7210793495178223, + "rewards/safe_rewards": -0.5390331745147705, + "rewards/unsafe_rewards": -0.5225407481193542, + "step": 900 }, { "epoch": 0.98, - "learning_rate": 3.7315908913529425e-10, - "logits/chosen": -2.4676260948181152, - "logits/rejected": -2.339677095413208, - "logps/chosen": -196.89321899414062, - "logps/rejected": -172.80345153808594, - "loss": 1320.3254, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": -0.16787005960941315, - "rewards/margins": 0.11410045623779297, - "rewards/rejected": -0.2819705009460449, - "rewards/safe_rewards": -0.16832561790943146, - "rewards/unsafe_rewards": -0.16741445660591125, - "step": 3710 - }, - { - "epoch": 0.99, - "learning_rate": 2.5740423683703885e-10, - "logits/chosen": -2.396761417388916, - "logits/rejected": -2.3387227058410645, - "logps/chosen": -202.34774780273438, - "logps/rejected": -201.5135040283203, - "loss": 1462.3891, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.16167563199996948, - "rewards/margins": 0.08906832337379456, - "rewards/rejected": -0.25074395537376404, - "rewards/safe_rewards": -0.15829941630363464, - "rewards/unsafe_rewards": -0.16505186259746552, - "step": 3720 - }, - { - "epoch": 0.99, - "learning_rate": 1.6307225072542675e-10, - "logits/chosen": -2.492969274520874, - "logits/rejected": -2.364241123199463, - "logps/chosen": -233.29605102539062, - "logps/rejected": -201.5292510986328, - "loss": 1391.1713, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.20615990459918976, - "rewards/margins": 0.0416078120470047, - "rewards/rejected": -0.24776773154735565, - "rewards/safe_rewards": -0.21639379858970642, - "rewards/unsafe_rewards": -0.1959260255098343, - "step": 3730 - }, - { - "epoch": 0.99, - "learning_rate": 9.017122257795606e-11, - "logits/chosen": -2.3729546070098877, - "logits/rejected": -2.239258289337158, - "logps/chosen": -213.7554473876953, - "logps/rejected": -196.91310119628906, - "loss": 1443.4098, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.17523546516895294, - "rewards/margins": 0.08719383925199509, - "rewards/rejected": -0.26242929697036743, - "rewards/safe_rewards": -0.18039007484912872, - "rewards/unsafe_rewards": -0.17008087038993835, - "step": 3740 + "learning_rate": 6.369713474366212e-10, + "logits/chosen": -1.215127944946289, + "logits/rejected": -0.6859368681907654, + "logps/chosen": -265.4532470703125, + "logps/rejected": -244.2112274169922, + "loss": 10490.1055, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.5391587018966675, + "rewards/margins": 0.18435393273830414, + "rewards/rejected": -0.7235127091407776, + "rewards/safe_rewards": -0.5553447604179382, + "rewards/unsafe_rewards": -0.5444998741149902, + "step": 910 }, { "epoch": 0.99, - "learning_rate": 3.8707405829174754e-11, - "logits/chosen": -2.4977498054504395, - "logits/rejected": -2.411048650741577, - "logps/chosen": -243.7748565673828, - "logps/rejected": -213.8860626220703, - "loss": 1417.3644, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.14790478348731995, - "rewards/margins": 0.10260061174631119, - "rewards/rejected": -0.25050538778305054, - "rewards/safe_rewards": -0.15742835402488708, - "rewards/unsafe_rewards": -0.1383811980485916, - "step": 3750 - }, - { - "epoch": 1.0, - "learning_rate": 8.685215034220927e-12, - "logits/chosen": -2.470888376235962, - "logits/rejected": -2.3292765617370605, - "logps/chosen": -234.9925994873047, - "logps/rejected": -256.25494384765625, - "loss": 1315.9805, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": -0.14897190034389496, - "rewards/margins": 0.11322913318872452, - "rewards/rejected": -0.2622010111808777, - "rewards/safe_rewards": -0.15524552762508392, - "rewards/unsafe_rewards": -0.1426982879638672, - "step": 3760 + "learning_rate": 1.429686526593088e-10, + "logits/chosen": -1.2494490146636963, + "logits/rejected": -0.796093761920929, + "logps/chosen": -265.77630615234375, + "logps/rejected": -247.41171264648438, + "loss": 10829.4625, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.5216090679168701, + "rewards/margins": 0.20224042236804962, + "rewards/rejected": -0.7238494753837585, + "rewards/safe_rewards": -0.5372604131698608, + "rewards/unsafe_rewards": -0.5229144096374512, + "step": 920 }, { "epoch": 1.0, - "step": 3769, + "step": 929, "total_flos": 0.0, - "train_loss": 1500.3740138506566, - "train_runtime": 40588.9258, - "train_samples_per_second": 2.972, - "train_steps_per_second": 0.093 + "train_loss": 11837.785513657158, + "train_runtime": 18541.4057, + "train_samples_per_second": 3.208, + "train_steps_per_second": 0.05 } ], "logging_steps": 10, - "max_steps": 3769, + "max_steps": 929, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, - "train_batch_size": 2, + "train_batch_size": 4, "trial_name": null, "trial_params": null }