{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 13557, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.6873156342182892e-09, "logits/chosen": -2.4788765907287598, "logits/rejected": -1.3292487859725952, "logps/chosen": -409.9538269042969, "logps/rejected": -238.84312438964844, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.687315634218289e-08, "logits/chosen": -1.7213034629821777, "logits/rejected": -1.4421601295471191, "logps/chosen": -164.38905334472656, "logps/rejected": -148.8963165283203, "loss": 0.6932, "rewards/accuracies": 0.3333333432674408, "rewards/chosen": -5.6421176850562915e-05, "rewards/margins": -9.963046613847837e-05, "rewards/rejected": 4.320928928791545e-05, "step": 10 }, { "epoch": 0.0, "learning_rate": 7.374631268436579e-08, "logits/chosen": -2.01934552192688, "logits/rejected": -1.5760900974273682, "logps/chosen": -162.64013671875, "logps/rejected": -124.38932800292969, "loss": 0.6931, "rewards/accuracies": 0.375, "rewards/chosen": 3.287907020421699e-05, "rewards/margins": 8.663302287459373e-05, "rewards/rejected": -5.375394903239794e-05, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.1061946902654869e-07, "logits/chosen": -1.9525632858276367, "logits/rejected": -1.6103423833847046, "logps/chosen": -161.3494110107422, "logps/rejected": -124.24827575683594, "loss": 0.6931, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -5.152312223799527e-05, "rewards/margins": 2.178902468585875e-05, "rewards/rejected": -7.331215601880103e-05, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.4749262536873157e-07, "logits/chosen": -2.1486165523529053, "logits/rejected": -1.4793721437454224, "logps/chosen": -209.8780059814453, "logps/rejected": -144.09156799316406, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.00011835995974252, "rewards/margins": 2.2976688342168927e-05, "rewards/rejected": 9.538327140035108e-05, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.8436578171091446e-07, "logits/chosen": -1.8638836145401, "logits/rejected": -1.4776787757873535, "logps/chosen": -132.535888671875, "logps/rejected": -95.51261138916016, "loss": 0.6931, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -7.551767339464277e-06, "rewards/margins": -2.612471507745795e-05, "rewards/rejected": 1.8572953194961883e-05, "step": 50 }, { "epoch": 0.0, "learning_rate": 2.2123893805309737e-07, "logits/chosen": -1.9905354976654053, "logits/rejected": -1.4940404891967773, "logps/chosen": -144.68663024902344, "logps/rejected": -106.77079772949219, "loss": 0.6931, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 1.2859722119173966e-05, "rewards/margins": 1.1314773473714013e-05, "rewards/rejected": 1.5449517150045722e-06, "step": 60 }, { "epoch": 0.01, "learning_rate": 2.5811209439528026e-07, "logits/chosen": -1.8072818517684937, "logits/rejected": -1.543416976928711, "logps/chosen": -110.77174377441406, "logps/rejected": -107.84919738769531, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 3.55792508344166e-05, "rewards/margins": -1.4290347280621063e-05, "rewards/rejected": 4.986958811059594e-05, "step": 70 }, { "epoch": 0.01, "learning_rate": 2.9498525073746315e-07, "logits/chosen": -2.057982921600342, "logits/rejected": -1.6788095235824585, "logps/chosen": -110.81111145019531, "logps/rejected": -92.771728515625, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.00024706899421289563, "rewards/margins": 0.00020930128812324256, "rewards/rejected": 3.776769881369546e-05, "step": 80 }, { "epoch": 0.01, "learning_rate": 3.318584070796461e-07, "logits/chosen": -1.9507834911346436, "logits/rejected": -1.5142406225204468, "logps/chosen": -142.8982696533203, "logps/rejected": -112.25239562988281, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": 0.00032218400156125426, "rewards/margins": 0.00020215558470226824, "rewards/rejected": 0.00012002838775515556, "step": 90 }, { "epoch": 0.01, "learning_rate": 3.687315634218289e-07, "logits/chosen": -1.9335477352142334, "logits/rejected": -1.6856454610824585, "logps/chosen": -137.237060546875, "logps/rejected": -114.21199798583984, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.0005608827341347933, "rewards/margins": 0.00022660612012259662, "rewards/rejected": 0.0003342765849083662, "step": 100 }, { "epoch": 0.01, "learning_rate": 4.0560471976401186e-07, "logits/chosen": -1.790734052658081, "logits/rejected": -1.4557358026504517, "logps/chosen": -124.97225189208984, "logps/rejected": -116.9600830078125, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.000406478822696954, "rewards/margins": 0.00018111658573616296, "rewards/rejected": 0.00022536217875313014, "step": 110 }, { "epoch": 0.01, "learning_rate": 4.4247787610619474e-07, "logits/chosen": -1.8989839553833008, "logits/rejected": -1.4581637382507324, "logps/chosen": -173.8742218017578, "logps/rejected": -144.89381408691406, "loss": 0.6931, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0007460180204361677, "rewards/margins": 0.0004257112159393728, "rewards/rejected": 0.00032030680449679494, "step": 120 }, { "epoch": 0.01, "learning_rate": 4.793510324483777e-07, "logits/chosen": -1.98974609375, "logits/rejected": -1.7140281200408936, "logps/chosen": -140.48538208007812, "logps/rejected": -116.86686706542969, "loss": 0.6931, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0010246632155030966, "rewards/margins": 0.0005635916022583842, "rewards/rejected": 0.00046107155503705144, "step": 130 }, { "epoch": 0.01, "learning_rate": 5.162241887905605e-07, "logits/chosen": -1.9790418148040771, "logits/rejected": -1.6778980493545532, "logps/chosen": -116.15827941894531, "logps/rejected": -130.74288940429688, "loss": 0.693, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0011597160482779145, "rewards/margins": 0.0013133505126461387, "rewards/rejected": -0.00015363460988737643, "step": 140 }, { "epoch": 0.01, "learning_rate": 5.530973451327435e-07, "logits/chosen": -2.088108777999878, "logits/rejected": -1.896816611289978, "logps/chosen": -163.32858276367188, "logps/rejected": -118.4830322265625, "loss": 0.6931, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0008649303345009685, "rewards/margins": 0.0004281798901502043, "rewards/rejected": 0.0004367504734545946, "step": 150 }, { "epoch": 0.01, "learning_rate": 5.899705014749263e-07, "logits/chosen": -2.0121922492980957, "logits/rejected": -1.5529156923294067, "logps/chosen": -137.92465209960938, "logps/rejected": -119.03995513916016, "loss": 0.693, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0017403883393853903, "rewards/margins": 0.0012056892737746239, "rewards/rejected": 0.0005346991238184273, "step": 160 }, { "epoch": 0.01, "learning_rate": 6.268436578171091e-07, "logits/chosen": -1.8290131092071533, "logits/rejected": -1.3624205589294434, "logps/chosen": -189.79788208007812, "logps/rejected": -142.59107971191406, "loss": 0.6929, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0011178858112543821, "rewards/margins": 0.0016311698127537966, "rewards/rejected": -0.0005132838268764317, "step": 170 }, { "epoch": 0.01, "learning_rate": 6.637168141592922e-07, "logits/chosen": -2.152608871459961, "logits/rejected": -1.6454375982284546, "logps/chosen": -168.7275390625, "logps/rejected": -141.51376342773438, "loss": 0.6925, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0024649477563798428, "rewards/margins": 0.0052183400839567184, "rewards/rejected": -0.002753392094746232, "step": 180 }, { "epoch": 0.01, "learning_rate": 7.00589970501475e-07, "logits/chosen": -1.9579254388809204, "logits/rejected": -1.600041389465332, "logps/chosen": -131.2523956298828, "logps/rejected": -96.4346694946289, "loss": 0.6926, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.005869925953447819, "rewards/margins": 0.001955586252734065, "rewards/rejected": -0.007825511507689953, "step": 190 }, { "epoch": 0.01, "learning_rate": 7.374631268436578e-07, "logits/chosen": -1.9982858896255493, "logits/rejected": -1.459930181503296, "logps/chosen": -227.6873779296875, "logps/rejected": -161.56661987304688, "loss": 0.6917, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.004850572440773249, "rewards/margins": 0.011479116976261139, "rewards/rejected": -0.0163296889513731, "step": 200 }, { "epoch": 0.02, "learning_rate": 7.743362831858408e-07, "logits/chosen": -1.9859434366226196, "logits/rejected": -1.5732555389404297, "logps/chosen": -189.78469848632812, "logps/rejected": -174.94448852539062, "loss": 0.6917, "rewards/accuracies": 0.625, "rewards/chosen": -0.014642874710261822, "rewards/margins": 0.015566508285701275, "rewards/rejected": -0.030209382995963097, "step": 210 }, { "epoch": 0.02, "learning_rate": 8.112094395280237e-07, "logits/chosen": -1.7946741580963135, "logits/rejected": -1.3940023183822632, "logps/chosen": -219.55935668945312, "logps/rejected": -197.7119140625, "loss": 0.6881, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03116585873067379, "rewards/margins": 0.04071114584803581, "rewards/rejected": -0.07187700271606445, "step": 220 }, { "epoch": 0.02, "learning_rate": 8.480825958702065e-07, "logits/chosen": -1.9493271112442017, "logits/rejected": -1.535683274269104, "logps/chosen": -266.67242431640625, "logps/rejected": -264.09295654296875, "loss": 0.6889, "rewards/accuracies": 0.625, "rewards/chosen": -0.11858478933572769, "rewards/margins": 0.03607611358165741, "rewards/rejected": -0.1546609103679657, "step": 230 }, { "epoch": 0.02, "learning_rate": 8.849557522123895e-07, "logits/chosen": -1.7229340076446533, "logits/rejected": -1.2285921573638916, "logps/chosen": -243.61837768554688, "logps/rejected": -325.18341064453125, "loss": 0.6835, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12535437941551208, "rewards/margins": 0.09356291592121124, "rewards/rejected": -0.21891728043556213, "step": 240 }, { "epoch": 0.02, "learning_rate": 9.218289085545723e-07, "logits/chosen": -1.9300626516342163, "logits/rejected": -1.3733972311019897, "logps/chosen": -355.2098693847656, "logps/rejected": -420.8204040527344, "loss": 0.6844, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1748114675283432, "rewards/margins": 0.09310068190097809, "rewards/rejected": -0.2679121494293213, "step": 250 }, { "epoch": 0.02, "learning_rate": 9.587020648967554e-07, "logits/chosen": -1.8784319162368774, "logits/rejected": -1.602791428565979, "logps/chosen": -312.51275634765625, "logps/rejected": -401.20184326171875, "loss": 0.6869, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13590438663959503, "rewards/margins": 0.0805073007941246, "rewards/rejected": -0.21641167998313904, "step": 260 }, { "epoch": 0.02, "learning_rate": 9.95575221238938e-07, "logits/chosen": -2.046440839767456, "logits/rejected": -1.7027628421783447, "logps/chosen": -344.7609558105469, "logps/rejected": -442.35748291015625, "loss": 0.6825, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18791678547859192, "rewards/margins": 0.11440564692020416, "rewards/rejected": -0.3023224472999573, "step": 270 }, { "epoch": 0.02, "learning_rate": 1.032448377581121e-06, "logits/chosen": -1.8106426000595093, "logits/rejected": -1.401421308517456, "logps/chosen": -349.99639892578125, "logps/rejected": -449.09979248046875, "loss": 0.6867, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.182583749294281, "rewards/margins": 0.11998214572668076, "rewards/rejected": -0.30256590247154236, "step": 280 }, { "epoch": 0.02, "learning_rate": 1.069321533923304e-06, "logits/chosen": -1.820173978805542, "logits/rejected": -1.4687297344207764, "logps/chosen": -464.2765197753906, "logps/rejected": -560.2600708007812, "loss": 0.6829, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30065301060676575, "rewards/margins": 0.11552959680557251, "rewards/rejected": -0.41618260741233826, "step": 290 }, { "epoch": 0.02, "learning_rate": 1.106194690265487e-06, "logits/chosen": -1.8119823932647705, "logits/rejected": -1.1425138711929321, "logps/chosen": -595.22412109375, "logps/rejected": -680.9646606445312, "loss": 0.6838, "rewards/accuracies": 0.625, "rewards/chosen": -0.44082874059677124, "rewards/margins": 0.13283680379390717, "rewards/rejected": -0.5736656188964844, "step": 300 }, { "epoch": 0.02, "learning_rate": 1.1430678466076696e-06, "logits/chosen": -1.8266925811767578, "logits/rejected": -1.5036365985870361, "logps/chosen": -465.07952880859375, "logps/rejected": -522.3955688476562, "loss": 0.6858, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.326357364654541, "rewards/margins": 0.08478839695453644, "rewards/rejected": -0.41114577651023865, "step": 310 }, { "epoch": 0.02, "learning_rate": 1.1799410029498526e-06, "logits/chosen": -2.0068325996398926, "logits/rejected": -1.4224900007247925, "logps/chosen": -402.070556640625, "logps/rejected": -468.43896484375, "loss": 0.6839, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2398921698331833, "rewards/margins": 0.10939452797174454, "rewards/rejected": -0.34928667545318604, "step": 320 }, { "epoch": 0.02, "learning_rate": 1.2168141592920355e-06, "logits/chosen": -1.8652881383895874, "logits/rejected": -1.5493414402008057, "logps/chosen": -325.8789367675781, "logps/rejected": -345.3408203125, "loss": 0.6892, "rewards/accuracies": 0.5, "rewards/chosen": -0.16698017716407776, "rewards/margins": 0.05401788279414177, "rewards/rejected": -0.22099804878234863, "step": 330 }, { "epoch": 0.03, "learning_rate": 1.2536873156342182e-06, "logits/chosen": -1.8522043228149414, "logits/rejected": -1.3583829402923584, "logps/chosen": -378.66827392578125, "logps/rejected": -463.7928771972656, "loss": 0.6837, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21790365874767303, "rewards/margins": 0.10478894412517548, "rewards/rejected": -0.3226926326751709, "step": 340 }, { "epoch": 0.03, "learning_rate": 1.2905604719764012e-06, "logits/chosen": -1.934893250465393, "logits/rejected": -1.2708985805511475, "logps/chosen": -361.7750549316406, "logps/rejected": -448.55224609375, "loss": 0.6805, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18481972813606262, "rewards/margins": 0.1347089409828186, "rewards/rejected": -0.31952863931655884, "step": 350 }, { "epoch": 0.03, "learning_rate": 1.3274336283185843e-06, "logits/chosen": -1.771085500717163, "logits/rejected": -1.2255713939666748, "logps/chosen": -441.59454345703125, "logps/rejected": -492.6583557128906, "loss": 0.6831, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2711867690086365, "rewards/margins": 0.10351588577032089, "rewards/rejected": -0.3747026324272156, "step": 360 }, { "epoch": 0.03, "learning_rate": 1.364306784660767e-06, "logits/chosen": -1.7396430969238281, "logits/rejected": -1.4632437229156494, "logps/chosen": -470.85003662109375, "logps/rejected": -551.9163208007812, "loss": 0.6856, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.31487274169921875, "rewards/margins": 0.09510970115661621, "rewards/rejected": -0.40998244285583496, "step": 370 }, { "epoch": 0.03, "learning_rate": 1.40117994100295e-06, "logits/chosen": -2.0234246253967285, "logits/rejected": -1.628557562828064, "logps/chosen": -518.6569213867188, "logps/rejected": -588.3342895507812, "loss": 0.6842, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3469838798046112, "rewards/margins": 0.09814401715993881, "rewards/rejected": -0.4451279044151306, "step": 380 }, { "epoch": 0.03, "learning_rate": 1.438053097345133e-06, "logits/chosen": -2.0046608448028564, "logits/rejected": -1.5576605796813965, "logps/chosen": -451.94696044921875, "logps/rejected": -520.5955200195312, "loss": 0.6822, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33083003759384155, "rewards/margins": 0.11202055215835571, "rewards/rejected": -0.44285058975219727, "step": 390 }, { "epoch": 0.03, "learning_rate": 1.4749262536873157e-06, "logits/chosen": -1.867279052734375, "logits/rejected": -1.2544457912445068, "logps/chosen": -516.2893676757812, "logps/rejected": -615.3094482421875, "loss": 0.682, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3608855605125427, "rewards/margins": 0.15538203716278076, "rewards/rejected": -0.5162675976753235, "step": 400 }, { "epoch": 0.03, "learning_rate": 1.5117994100294986e-06, "logits/chosen": -1.907383680343628, "logits/rejected": -1.577263593673706, "logps/chosen": -530.252685546875, "logps/rejected": -585.1219482421875, "loss": 0.6851, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.37288641929626465, "rewards/margins": 0.06618603318929672, "rewards/rejected": -0.43907251954078674, "step": 410 }, { "epoch": 0.03, "learning_rate": 1.5486725663716816e-06, "logits/chosen": -1.854128122329712, "logits/rejected": -1.2359414100646973, "logps/chosen": -498.1875, "logps/rejected": -616.3113403320312, "loss": 0.6765, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.32451412081718445, "rewards/margins": 0.1810571849346161, "rewards/rejected": -0.5055713057518005, "step": 420 }, { "epoch": 0.03, "learning_rate": 1.5855457227138643e-06, "logits/chosen": -1.6812435388565063, "logits/rejected": -1.2456939220428467, "logps/chosen": -528.3734130859375, "logps/rejected": -660.8782958984375, "loss": 0.6792, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39390069246292114, "rewards/margins": 0.1637260913848877, "rewards/rejected": -0.5576268434524536, "step": 430 }, { "epoch": 0.03, "learning_rate": 1.6224188790560474e-06, "logits/chosen": -1.8546499013900757, "logits/rejected": -1.4410336017608643, "logps/chosen": -539.2593383789062, "logps/rejected": -622.1941528320312, "loss": 0.6825, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3938086926937103, "rewards/margins": 0.10453224182128906, "rewards/rejected": -0.4983409345149994, "step": 440 }, { "epoch": 0.03, "learning_rate": 1.6592920353982304e-06, "logits/chosen": -1.7073545455932617, "logits/rejected": -1.6687370538711548, "logps/chosen": -422.2709045410156, "logps/rejected": -465.105224609375, "loss": 0.6907, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.30252018570899963, "rewards/margins": 0.05136919021606445, "rewards/rejected": -0.3538893163204193, "step": 450 }, { "epoch": 0.03, "learning_rate": 1.696165191740413e-06, "logits/chosen": -2.160515069961548, "logits/rejected": -1.420827031135559, "logps/chosen": -379.50299072265625, "logps/rejected": -419.890625, "loss": 0.6831, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19716520607471466, "rewards/margins": 0.11601902544498444, "rewards/rejected": -0.3131842017173767, "step": 460 }, { "epoch": 0.03, "learning_rate": 1.733038348082596e-06, "logits/chosen": -2.045727491378784, "logits/rejected": -1.7110141515731812, "logps/chosen": -434.781982421875, "logps/rejected": -467.20709228515625, "loss": 0.6881, "rewards/accuracies": 0.625, "rewards/chosen": -0.26196640729904175, "rewards/margins": 0.08277492225170135, "rewards/rejected": -0.3447413742542267, "step": 470 }, { "epoch": 0.04, "learning_rate": 1.769911504424779e-06, "logits/chosen": -1.9494785070419312, "logits/rejected": -1.7097057104110718, "logps/chosen": -505.980712890625, "logps/rejected": -525.5626220703125, "loss": 0.6924, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.37043842673301697, "rewards/margins": 0.027207273989915848, "rewards/rejected": -0.3976457118988037, "step": 480 }, { "epoch": 0.04, "learning_rate": 1.8067846607669617e-06, "logits/chosen": -2.045279026031494, "logits/rejected": -1.6425273418426514, "logps/chosen": -452.57891845703125, "logps/rejected": -533.9486083984375, "loss": 0.6868, "rewards/accuracies": 0.625, "rewards/chosen": -0.30547982454299927, "rewards/margins": 0.08933992683887482, "rewards/rejected": -0.39481979608535767, "step": 490 }, { "epoch": 0.04, "learning_rate": 1.8436578171091446e-06, "logits/chosen": -1.8627780675888062, "logits/rejected": -1.5311658382415771, "logps/chosen": -421.89227294921875, "logps/rejected": -524.7391357421875, "loss": 0.6794, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2568140923976898, "rewards/margins": 0.13950814306735992, "rewards/rejected": -0.39632219076156616, "step": 500 }, { "epoch": 0.04, "learning_rate": 1.8805309734513274e-06, "logits/chosen": -1.9864403009414673, "logits/rejected": -1.535455346107483, "logps/chosen": -525.3858642578125, "logps/rejected": -621.3438720703125, "loss": 0.6799, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3375723958015442, "rewards/margins": 0.13930204510688782, "rewards/rejected": -0.476874440908432, "step": 510 }, { "epoch": 0.04, "learning_rate": 1.9174041297935107e-06, "logits/chosen": -2.0065884590148926, "logits/rejected": -1.4865143299102783, "logps/chosen": -411.04638671875, "logps/rejected": -561.2197875976562, "loss": 0.6764, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2829917073249817, "rewards/margins": 0.168602854013443, "rewards/rejected": -0.4515945315361023, "step": 520 }, { "epoch": 0.04, "learning_rate": 1.9542772861356935e-06, "logits/chosen": -1.9803001880645752, "logits/rejected": -1.798586130142212, "logps/chosen": -585.0508422851562, "logps/rejected": -617.8795776367188, "loss": 0.6853, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39687082171440125, "rewards/margins": 0.08475859463214874, "rewards/rejected": -0.4816294312477112, "step": 530 }, { "epoch": 0.04, "learning_rate": 1.991150442477876e-06, "logits/chosen": -1.8822046518325806, "logits/rejected": -1.3915908336639404, "logps/chosen": -534.4107666015625, "logps/rejected": -680.7306518554688, "loss": 0.6777, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36314496397972107, "rewards/margins": 0.1954086273908615, "rewards/rejected": -0.5585536360740662, "step": 540 }, { "epoch": 0.04, "learning_rate": 2.0280235988200593e-06, "logits/chosen": -1.9563770294189453, "logits/rejected": -1.471461296081543, "logps/chosen": -485.81689453125, "logps/rejected": -563.0682373046875, "loss": 0.6816, "rewards/accuracies": 0.75, "rewards/chosen": -0.3260261118412018, "rewards/margins": 0.12986885011196136, "rewards/rejected": -0.45589500665664673, "step": 550 }, { "epoch": 0.04, "learning_rate": 2.064896755162242e-06, "logits/chosen": -1.8118394613265991, "logits/rejected": -1.638448715209961, "logps/chosen": -394.0333251953125, "logps/rejected": -453.635009765625, "loss": 0.6856, "rewards/accuracies": 0.625, "rewards/chosen": -0.21587488055229187, "rewards/margins": 0.09306404739618301, "rewards/rejected": -0.3089389204978943, "step": 560 }, { "epoch": 0.04, "learning_rate": 2.101769911504425e-06, "logits/chosen": -2.137202739715576, "logits/rejected": -1.4553264379501343, "logps/chosen": -446.45086669921875, "logps/rejected": -566.4642333984375, "loss": 0.6797, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29202860593795776, "rewards/margins": 0.14904475212097168, "rewards/rejected": -0.44107335805892944, "step": 570 }, { "epoch": 0.04, "learning_rate": 2.138643067846608e-06, "logits/chosen": -1.7823879718780518, "logits/rejected": -1.4595364332199097, "logps/chosen": -607.0121459960938, "logps/rejected": -737.3904418945312, "loss": 0.6789, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.46222734451293945, "rewards/margins": 0.1648208051919937, "rewards/rejected": -0.627048134803772, "step": 580 }, { "epoch": 0.04, "learning_rate": 2.1755162241887907e-06, "logits/chosen": -1.9204248189926147, "logits/rejected": -1.478869915008545, "logps/chosen": -462.30218505859375, "logps/rejected": -582.520263671875, "loss": 0.6819, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26998433470726013, "rewards/margins": 0.15396475791931152, "rewards/rejected": -0.42394909262657166, "step": 590 }, { "epoch": 0.04, "learning_rate": 2.212389380530974e-06, "logits/chosen": -2.0838003158569336, "logits/rejected": -1.5276468992233276, "logps/chosen": -434.4398498535156, "logps/rejected": -526.2811279296875, "loss": 0.6843, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2667337656021118, "rewards/margins": 0.1474871188402176, "rewards/rejected": -0.4142208695411682, "step": 600 }, { "epoch": 0.04, "learning_rate": 2.2492625368731566e-06, "logits/chosen": -2.005659580230713, "logits/rejected": -1.6580346822738647, "logps/chosen": -391.08837890625, "logps/rejected": -449.92022705078125, "loss": 0.6879, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.25623375177383423, "rewards/margins": 0.0901186540722847, "rewards/rejected": -0.34635239839553833, "step": 610 }, { "epoch": 0.05, "learning_rate": 2.2861356932153393e-06, "logits/chosen": -2.0478882789611816, "logits/rejected": -1.418408751487732, "logps/chosen": -374.15185546875, "logps/rejected": -457.21209716796875, "loss": 0.6767, "rewards/accuracies": 0.75, "rewards/chosen": -0.2127421796321869, "rewards/margins": 0.15468503534793854, "rewards/rejected": -0.367427259683609, "step": 620 }, { "epoch": 0.05, "learning_rate": 2.3230088495575224e-06, "logits/chosen": -2.0864791870117188, "logits/rejected": -1.809248685836792, "logps/chosen": -415.10296630859375, "logps/rejected": -477.59033203125, "loss": 0.6867, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2516500949859619, "rewards/margins": 0.10447762906551361, "rewards/rejected": -0.3561277389526367, "step": 630 }, { "epoch": 0.05, "learning_rate": 2.359882005899705e-06, "logits/chosen": -2.234788656234741, "logits/rejected": -1.7704509496688843, "logps/chosen": -381.68524169921875, "logps/rejected": -445.52032470703125, "loss": 0.6814, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21832557022571564, "rewards/margins": 0.11144711822271347, "rewards/rejected": -0.3297726511955261, "step": 640 }, { "epoch": 0.05, "learning_rate": 2.396755162241888e-06, "logits/chosen": -2.3688178062438965, "logits/rejected": -1.7699651718139648, "logps/chosen": -404.2710266113281, "logps/rejected": -519.812255859375, "loss": 0.6792, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24311621487140656, "rewards/margins": 0.1357416957616806, "rewards/rejected": -0.37885794043540955, "step": 650 }, { "epoch": 0.05, "learning_rate": 2.433628318584071e-06, "logits/chosen": -2.2430148124694824, "logits/rejected": -1.7135947942733765, "logps/chosen": -348.75555419921875, "logps/rejected": -471.67999267578125, "loss": 0.6823, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21386036276817322, "rewards/margins": 0.13416607677936554, "rewards/rejected": -0.34802645444869995, "step": 660 }, { "epoch": 0.05, "learning_rate": 2.4705014749262538e-06, "logits/chosen": -2.2484421730041504, "logits/rejected": -1.8406680822372437, "logps/chosen": -479.677978515625, "logps/rejected": -536.4678344726562, "loss": 0.682, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3293268382549286, "rewards/margins": 0.10312734544277191, "rewards/rejected": -0.4324541985988617, "step": 670 }, { "epoch": 0.05, "learning_rate": 2.5073746312684365e-06, "logits/chosen": -2.155977487564087, "logits/rejected": -1.5651214122772217, "logps/chosen": -468.7041931152344, "logps/rejected": -574.7459716796875, "loss": 0.6815, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3216959834098816, "rewards/margins": 0.1411447823047638, "rewards/rejected": -0.4628407955169678, "step": 680 }, { "epoch": 0.05, "learning_rate": 2.5442477876106196e-06, "logits/chosen": -2.393779993057251, "logits/rejected": -1.7583000659942627, "logps/chosen": -402.8919372558594, "logps/rejected": -537.0436401367188, "loss": 0.6786, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22199706733226776, "rewards/margins": 0.18561187386512756, "rewards/rejected": -0.4076089859008789, "step": 690 }, { "epoch": 0.05, "learning_rate": 2.5811209439528024e-06, "logits/chosen": -2.1495628356933594, "logits/rejected": -1.747648000717163, "logps/chosen": -472.1742248535156, "logps/rejected": -565.7252197265625, "loss": 0.6834, "rewards/accuracies": 0.625, "rewards/chosen": -0.3417675495147705, "rewards/margins": 0.1234516054391861, "rewards/rejected": -0.46521908044815063, "step": 700 }, { "epoch": 0.05, "learning_rate": 2.6179941002949855e-06, "logits/chosen": -2.0047028064727783, "logits/rejected": -1.8650157451629639, "logps/chosen": -463.42938232421875, "logps/rejected": -598.6697387695312, "loss": 0.6825, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3318346440792084, "rewards/margins": 0.14977237582206726, "rewards/rejected": -0.48160701990127563, "step": 710 }, { "epoch": 0.05, "learning_rate": 2.6548672566371687e-06, "logits/chosen": -2.200765371322632, "logits/rejected": -1.4630253314971924, "logps/chosen": -412.4142150878906, "logps/rejected": -576.6033325195312, "loss": 0.6774, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2577304542064667, "rewards/margins": 0.2198226898908615, "rewards/rejected": -0.477553129196167, "step": 720 }, { "epoch": 0.05, "learning_rate": 2.691740412979351e-06, "logits/chosen": -2.31023907661438, "logits/rejected": -2.064582347869873, "logps/chosen": -295.4432678222656, "logps/rejected": -355.28131103515625, "loss": 0.6885, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11662834882736206, "rewards/margins": 0.08975906670093536, "rewards/rejected": -0.20638743042945862, "step": 730 }, { "epoch": 0.05, "learning_rate": 2.728613569321534e-06, "logits/chosen": -2.357072591781616, "logits/rejected": -1.980004906654358, "logps/chosen": -328.4388732910156, "logps/rejected": -394.37469482421875, "loss": 0.6837, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15705019235610962, "rewards/margins": 0.0898301973938942, "rewards/rejected": -0.24688038229942322, "step": 740 }, { "epoch": 0.06, "learning_rate": 2.765486725663717e-06, "logits/chosen": -2.008437156677246, "logits/rejected": -1.53653085231781, "logps/chosen": -413.0132751464844, "logps/rejected": -496.81884765625, "loss": 0.6792, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18872979283332825, "rewards/margins": 0.14261528849601746, "rewards/rejected": -0.3313451111316681, "step": 750 }, { "epoch": 0.06, "learning_rate": 2.8023598820059e-06, "logits/chosen": -2.0243372917175293, "logits/rejected": -1.554598093032837, "logps/chosen": -575.1486206054688, "logps/rejected": -657.1636962890625, "loss": 0.6835, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.41017085313796997, "rewards/margins": 0.1301431804895401, "rewards/rejected": -0.5403140783309937, "step": 760 }, { "epoch": 0.06, "learning_rate": 2.8392330383480827e-06, "logits/chosen": -2.05902099609375, "logits/rejected": -1.6450536251068115, "logps/chosen": -587.5533447265625, "logps/rejected": -688.0265502929688, "loss": 0.6836, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4509713053703308, "rewards/margins": 0.15440842509269714, "rewards/rejected": -0.6053797006607056, "step": 770 }, { "epoch": 0.06, "learning_rate": 2.876106194690266e-06, "logits/chosen": -2.2170329093933105, "logits/rejected": -1.56217622756958, "logps/chosen": -439.9380798339844, "logps/rejected": -531.1654052734375, "loss": 0.6827, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2894015312194824, "rewards/margins": 0.14231763780117035, "rewards/rejected": -0.43171921372413635, "step": 780 }, { "epoch": 0.06, "learning_rate": 2.912979351032449e-06, "logits/chosen": -2.2244606018066406, "logits/rejected": -2.0276694297790527, "logps/chosen": -442.876708984375, "logps/rejected": -519.1077880859375, "loss": 0.6836, "rewards/accuracies": 0.5, "rewards/chosen": -0.3157954216003418, "rewards/margins": 0.0947733223438263, "rewards/rejected": -0.4105687737464905, "step": 790 }, { "epoch": 0.06, "learning_rate": 2.9498525073746313e-06, "logits/chosen": -2.000953197479248, "logits/rejected": -1.5047358274459839, "logps/chosen": -516.7780151367188, "logps/rejected": -596.3982543945312, "loss": 0.6835, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3590072691440582, "rewards/margins": 0.11872158944606781, "rewards/rejected": -0.477728933095932, "step": 800 }, { "epoch": 0.06, "learning_rate": 2.9867256637168145e-06, "logits/chosen": -2.2532784938812256, "logits/rejected": -1.9786303043365479, "logps/chosen": -408.4088439941406, "logps/rejected": -502.13885498046875, "loss": 0.685, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24340829253196716, "rewards/margins": 0.10179108381271362, "rewards/rejected": -0.3451994061470032, "step": 810 }, { "epoch": 0.06, "learning_rate": 3.0235988200589972e-06, "logits/chosen": -2.2390873432159424, "logits/rejected": -1.9252674579620361, "logps/chosen": -386.433349609375, "logps/rejected": -445.1131896972656, "loss": 0.6835, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24789729714393616, "rewards/margins": 0.0969977155327797, "rewards/rejected": -0.34489500522613525, "step": 820 }, { "epoch": 0.06, "learning_rate": 3.0604719764011804e-06, "logits/chosen": -2.05058217048645, "logits/rejected": -1.6930038928985596, "logps/chosen": -572.7770385742188, "logps/rejected": -715.8151245117188, "loss": 0.6812, "rewards/accuracies": 0.625, "rewards/chosen": -0.4364268183708191, "rewards/margins": 0.11795775592327118, "rewards/rejected": -0.5543845891952515, "step": 830 }, { "epoch": 0.06, "learning_rate": 3.097345132743363e-06, "logits/chosen": -2.226712703704834, "logits/rejected": -1.6917667388916016, "logps/chosen": -592.3433837890625, "logps/rejected": -660.2044677734375, "loss": 0.685, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.41704291105270386, "rewards/margins": 0.12907369434833527, "rewards/rejected": -0.5461165308952332, "step": 840 }, { "epoch": 0.06, "learning_rate": 3.134218289085546e-06, "logits/chosen": -2.214346408843994, "logits/rejected": -1.7724472284317017, "logps/chosen": -623.2235717773438, "logps/rejected": -697.6844482421875, "loss": 0.6861, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4432212710380554, "rewards/margins": 0.09432069957256317, "rewards/rejected": -0.537541925907135, "step": 850 }, { "epoch": 0.06, "learning_rate": 3.1710914454277286e-06, "logits/chosen": -2.1937968730926514, "logits/rejected": -1.809098243713379, "logps/chosen": -589.6400146484375, "logps/rejected": -666.11962890625, "loss": 0.6858, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4135228097438812, "rewards/margins": 0.1091686263680458, "rewards/rejected": -0.5226914286613464, "step": 860 }, { "epoch": 0.06, "learning_rate": 3.2079646017699117e-06, "logits/chosen": -2.309391498565674, "logits/rejected": -1.983241081237793, "logps/chosen": -486.3695373535156, "logps/rejected": -532.5344848632812, "loss": 0.6874, "rewards/accuracies": 0.625, "rewards/chosen": -0.3160635828971863, "rewards/margins": 0.05506597086787224, "rewards/rejected": -0.37112957239151, "step": 870 }, { "epoch": 0.06, "learning_rate": 3.244837758112095e-06, "logits/chosen": -2.1608099937438965, "logits/rejected": -1.8877979516983032, "logps/chosen": -466.25439453125, "logps/rejected": -553.8875732421875, "loss": 0.6842, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3247961401939392, "rewards/margins": 0.09231477975845337, "rewards/rejected": -0.41711097955703735, "step": 880 }, { "epoch": 0.07, "learning_rate": 3.2817109144542776e-06, "logits/chosen": -2.380635976791382, "logits/rejected": -1.7805322408676147, "logps/chosen": -466.38983154296875, "logps/rejected": -616.3292236328125, "loss": 0.678, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29668086767196655, "rewards/margins": 0.18588514626026154, "rewards/rejected": -0.48256605863571167, "step": 890 }, { "epoch": 0.07, "learning_rate": 3.3185840707964607e-06, "logits/chosen": -2.4013314247131348, "logits/rejected": -1.9716737270355225, "logps/chosen": -447.8121032714844, "logps/rejected": -542.5108642578125, "loss": 0.6817, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.26833659410476685, "rewards/margins": 0.12271261215209961, "rewards/rejected": -0.39104920625686646, "step": 900 }, { "epoch": 0.07, "learning_rate": 3.355457227138643e-06, "logits/chosen": -2.162184000015259, "logits/rejected": -1.6158878803253174, "logps/chosen": -462.2652282714844, "logps/rejected": -625.208984375, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.30406153202056885, "rewards/margins": 0.19953803718090057, "rewards/rejected": -0.5035995244979858, "step": 910 }, { "epoch": 0.07, "learning_rate": 3.392330383480826e-06, "logits/chosen": -2.2231879234313965, "logits/rejected": -1.597755789756775, "logps/chosen": -605.8489990234375, "logps/rejected": -732.6864624023438, "loss": 0.6815, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4336002767086029, "rewards/margins": 0.16193921864032745, "rewards/rejected": -0.5955394506454468, "step": 920 }, { "epoch": 0.07, "learning_rate": 3.429203539823009e-06, "logits/chosen": -2.1933951377868652, "logits/rejected": -1.711941123008728, "logps/chosen": -557.3629150390625, "logps/rejected": -663.8624267578125, "loss": 0.6834, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.39116525650024414, "rewards/margins": 0.14357350766658783, "rewards/rejected": -0.5347387790679932, "step": 930 }, { "epoch": 0.07, "learning_rate": 3.466076696165192e-06, "logits/chosen": -2.1235463619232178, "logits/rejected": -1.6818050146102905, "logps/chosen": -503.57977294921875, "logps/rejected": -626.7802734375, "loss": 0.6806, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33053433895111084, "rewards/margins": 0.1512194126844406, "rewards/rejected": -0.48175373673439026, "step": 940 }, { "epoch": 0.07, "learning_rate": 3.5029498525073752e-06, "logits/chosen": -2.3788585662841797, "logits/rejected": -1.547771692276001, "logps/chosen": -464.90838623046875, "logps/rejected": -615.4306640625, "loss": 0.6821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3018938899040222, "rewards/margins": 0.200532004237175, "rewards/rejected": -0.5024259090423584, "step": 950 }, { "epoch": 0.07, "learning_rate": 3.539823008849558e-06, "logits/chosen": -2.2488982677459717, "logits/rejected": -1.705553650856018, "logps/chosen": -363.9290466308594, "logps/rejected": -515.9874267578125, "loss": 0.6796, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2080310881137848, "rewards/margins": 0.1806962490081787, "rewards/rejected": -0.3887273371219635, "step": 960 }, { "epoch": 0.07, "learning_rate": 3.576696165191741e-06, "logits/chosen": -2.3850619792938232, "logits/rejected": -1.9208471775054932, "logps/chosen": -437.1626892089844, "logps/rejected": -518.014892578125, "loss": 0.6817, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2581685185432434, "rewards/margins": 0.11867034435272217, "rewards/rejected": -0.3768388628959656, "step": 970 }, { "epoch": 0.07, "learning_rate": 3.6135693215339234e-06, "logits/chosen": -2.041639804840088, "logits/rejected": -1.674957275390625, "logps/chosen": -612.27392578125, "logps/rejected": -665.5975952148438, "loss": 0.6874, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.42934975028038025, "rewards/margins": 0.11096705496311188, "rewards/rejected": -0.5403168201446533, "step": 980 }, { "epoch": 0.07, "learning_rate": 3.6504424778761066e-06, "logits/chosen": -2.2475674152374268, "logits/rejected": -1.7906007766723633, "logps/chosen": -393.2090759277344, "logps/rejected": -470.3455505371094, "loss": 0.6826, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24841980636119843, "rewards/margins": 0.11014661937952042, "rewards/rejected": -0.35856643319129944, "step": 990 }, { "epoch": 0.07, "learning_rate": 3.6873156342182893e-06, "logits/chosen": -2.4129297733306885, "logits/rejected": -1.8394804000854492, "logps/chosen": -352.4050598144531, "logps/rejected": -491.90753173828125, "loss": 0.6784, "rewards/accuracies": 0.75, "rewards/chosen": -0.2062232792377472, "rewards/margins": 0.1854531466960907, "rewards/rejected": -0.39167648553848267, "step": 1000 }, { "epoch": 0.07, "learning_rate": 3.7241887905604724e-06, "logits/chosen": -2.1150364875793457, "logits/rejected": -1.6016334295272827, "logps/chosen": -446.64080810546875, "logps/rejected": -589.5285034179688, "loss": 0.6794, "rewards/accuracies": 0.75, "rewards/chosen": -0.31510692834854126, "rewards/margins": 0.1597348153591156, "rewards/rejected": -0.47484177350997925, "step": 1010 }, { "epoch": 0.08, "learning_rate": 3.7610619469026547e-06, "logits/chosen": -2.196310043334961, "logits/rejected": -1.6023523807525635, "logps/chosen": -607.8207397460938, "logps/rejected": -715.1724853515625, "loss": 0.6819, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40577611327171326, "rewards/margins": 0.15393945574760437, "rewards/rejected": -0.5597155094146729, "step": 1020 }, { "epoch": 0.08, "learning_rate": 3.797935103244838e-06, "logits/chosen": -2.15210223197937, "logits/rejected": -1.9206292629241943, "logps/chosen": -409.7123107910156, "logps/rejected": -546.5791625976562, "loss": 0.6813, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.26498034596443176, "rewards/margins": 0.1483624428510666, "rewards/rejected": -0.41334280371665955, "step": 1030 }, { "epoch": 0.08, "learning_rate": 3.8348082595870215e-06, "logits/chosen": -2.0452022552490234, "logits/rejected": -1.3880304098129272, "logps/chosen": -733.4439697265625, "logps/rejected": -877.87841796875, "loss": 0.6841, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5820300579071045, "rewards/margins": 0.17910122871398926, "rewards/rejected": -0.7611313462257385, "step": 1040 }, { "epoch": 0.08, "learning_rate": 3.871681415929203e-06, "logits/chosen": -2.3809990882873535, "logits/rejected": -1.8621429204940796, "logps/chosen": -677.9534301757812, "logps/rejected": -749.090576171875, "loss": 0.684, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5204936861991882, "rewards/margins": 0.11613865941762924, "rewards/rejected": -0.6366323232650757, "step": 1050 }, { "epoch": 0.08, "learning_rate": 3.908554572271387e-06, "logits/chosen": -2.2403066158294678, "logits/rejected": -1.5676146745681763, "logps/chosen": -530.4642333984375, "logps/rejected": -637.6590576171875, "loss": 0.6803, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.32126384973526, "rewards/margins": 0.19333605468273163, "rewards/rejected": -0.5145999193191528, "step": 1060 }, { "epoch": 0.08, "learning_rate": 3.94542772861357e-06, "logits/chosen": -2.2002460956573486, "logits/rejected": -1.6595430374145508, "logps/chosen": -502.99078369140625, "logps/rejected": -579.5460205078125, "loss": 0.6847, "rewards/accuracies": 0.625, "rewards/chosen": -0.3592817187309265, "rewards/margins": 0.1258457899093628, "rewards/rejected": -0.4851275384426117, "step": 1070 }, { "epoch": 0.08, "learning_rate": 3.982300884955752e-06, "logits/chosen": -2.548799514770508, "logits/rejected": -1.917360544204712, "logps/chosen": -346.53302001953125, "logps/rejected": -482.77984619140625, "loss": 0.6799, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18105648458003998, "rewards/margins": 0.17847676575183868, "rewards/rejected": -0.3595332205295563, "step": 1080 }, { "epoch": 0.08, "learning_rate": 4.019174041297935e-06, "logits/chosen": -2.4243979454040527, "logits/rejected": -1.8537073135375977, "logps/chosen": -526.8582763671875, "logps/rejected": -584.9217529296875, "loss": 0.6841, "rewards/accuracies": 0.625, "rewards/chosen": -0.3302248418331146, "rewards/margins": 0.14908406138420105, "rewards/rejected": -0.47930899262428284, "step": 1090 }, { "epoch": 0.08, "learning_rate": 4.056047197640119e-06, "logits/chosen": -2.4029576778411865, "logits/rejected": -1.6008878946304321, "logps/chosen": -419.37872314453125, "logps/rejected": -508.1085510253906, "loss": 0.6787, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2175808697938919, "rewards/margins": 0.16375145316123962, "rewards/rejected": -0.38133230805397034, "step": 1100 }, { "epoch": 0.08, "learning_rate": 4.092920353982301e-06, "logits/chosen": -2.1061878204345703, "logits/rejected": -1.5732238292694092, "logps/chosen": -555.63134765625, "logps/rejected": -651.214111328125, "loss": 0.6886, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3517112135887146, "rewards/margins": 0.13245999813079834, "rewards/rejected": -0.48417121171951294, "step": 1110 }, { "epoch": 0.08, "learning_rate": 4.129793510324484e-06, "logits/chosen": -2.2837748527526855, "logits/rejected": -1.6197831630706787, "logps/chosen": -426.89190673828125, "logps/rejected": -557.3675537109375, "loss": 0.6779, "rewards/accuracies": 0.75, "rewards/chosen": -0.25485527515411377, "rewards/margins": 0.18350477516651154, "rewards/rejected": -0.4383600652217865, "step": 1120 }, { "epoch": 0.08, "learning_rate": 4.166666666666667e-06, "logits/chosen": -2.3311004638671875, "logits/rejected": -1.7740538120269775, "logps/chosen": -407.7346496582031, "logps/rejected": -492.531982421875, "loss": 0.6828, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24309399724006653, "rewards/margins": 0.12386111915111542, "rewards/rejected": -0.36695510149002075, "step": 1130 }, { "epoch": 0.08, "learning_rate": 4.20353982300885e-06, "logits/chosen": -2.2422280311584473, "logits/rejected": -1.726387619972229, "logps/chosen": -354.3223571777344, "logps/rejected": -436.421630859375, "loss": 0.6812, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18939505517482758, "rewards/margins": 0.14567282795906067, "rewards/rejected": -0.33506789803504944, "step": 1140 }, { "epoch": 0.08, "learning_rate": 4.240412979351033e-06, "logits/chosen": -2.3866708278656006, "logits/rejected": -1.8238232135772705, "logps/chosen": -361.3077697753906, "logps/rejected": -457.0265197753906, "loss": 0.6802, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2011115998029709, "rewards/margins": 0.16210977733135223, "rewards/rejected": -0.3632214069366455, "step": 1150 }, { "epoch": 0.09, "learning_rate": 4.277286135693216e-06, "logits/chosen": -2.0799617767333984, "logits/rejected": -1.6660614013671875, "logps/chosen": -500.35498046875, "logps/rejected": -595.1367797851562, "loss": 0.6821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35124877095222473, "rewards/margins": 0.12214861810207367, "rewards/rejected": -0.4733973443508148, "step": 1160 }, { "epoch": 0.09, "learning_rate": 4.314159292035399e-06, "logits/chosen": -2.3800015449523926, "logits/rejected": -1.7481025457382202, "logps/chosen": -649.2769775390625, "logps/rejected": -761.2306518554688, "loss": 0.68, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.45055022835731506, "rewards/margins": 0.17093273997306824, "rewards/rejected": -0.6214829087257385, "step": 1170 }, { "epoch": 0.09, "learning_rate": 4.351032448377581e-06, "logits/chosen": -2.3247861862182617, "logits/rejected": -1.9131265878677368, "logps/chosen": -595.1065673828125, "logps/rejected": -683.60302734375, "loss": 0.6837, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.40310144424438477, "rewards/margins": 0.11408121883869171, "rewards/rejected": -0.5171826481819153, "step": 1180 }, { "epoch": 0.09, "learning_rate": 4.387905604719764e-06, "logits/chosen": -2.258409023284912, "logits/rejected": -1.5694068670272827, "logps/chosen": -496.86688232421875, "logps/rejected": -660.8016967773438, "loss": 0.6789, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3202557861804962, "rewards/margins": 0.18456882238388062, "rewards/rejected": -0.5048245787620544, "step": 1190 }, { "epoch": 0.09, "learning_rate": 4.424778761061948e-06, "logits/chosen": -2.450465679168701, "logits/rejected": -1.9164297580718994, "logps/chosen": -352.10906982421875, "logps/rejected": -495.384521484375, "loss": 0.678, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1752665638923645, "rewards/margins": 0.16207031905651093, "rewards/rejected": -0.3373368978500366, "step": 1200 }, { "epoch": 0.09, "learning_rate": 4.46165191740413e-06, "logits/chosen": -2.292287826538086, "logits/rejected": -1.6534093618392944, "logps/chosen": -317.5352478027344, "logps/rejected": -437.82855224609375, "loss": 0.683, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12927882373332977, "rewards/margins": 0.1847599595785141, "rewards/rejected": -0.31403878331184387, "step": 1210 }, { "epoch": 0.09, "learning_rate": 4.498525073746313e-06, "logits/chosen": -2.2857823371887207, "logits/rejected": -1.7767950296401978, "logps/chosen": -413.69171142578125, "logps/rejected": -466.34100341796875, "loss": 0.6809, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20510134100914001, "rewards/margins": 0.12268351018428802, "rewards/rejected": -0.32778483629226685, "step": 1220 }, { "epoch": 0.09, "learning_rate": 4.535398230088496e-06, "logits/chosen": -2.414311170578003, "logits/rejected": -1.8810408115386963, "logps/chosen": -470.18109130859375, "logps/rejected": -634.0155029296875, "loss": 0.6786, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27094560861587524, "rewards/margins": 0.18956677615642548, "rewards/rejected": -0.46051234006881714, "step": 1230 }, { "epoch": 0.09, "learning_rate": 4.5722713864306786e-06, "logits/chosen": -2.3036561012268066, "logits/rejected": -1.8412039279937744, "logps/chosen": -755.4630126953125, "logps/rejected": -850.5755615234375, "loss": 0.685, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5288673639297485, "rewards/margins": 0.12043911218643188, "rewards/rejected": -0.6493064165115356, "step": 1240 }, { "epoch": 0.09, "learning_rate": 4.609144542772861e-06, "logits/chosen": -2.1148083209991455, "logits/rejected": -1.7618348598480225, "logps/chosen": -526.0289306640625, "logps/rejected": -619.3069458007812, "loss": 0.6835, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3862895667552948, "rewards/margins": 0.12125791609287262, "rewards/rejected": -0.5075474977493286, "step": 1250 }, { "epoch": 0.09, "learning_rate": 4.646017699115045e-06, "logits/chosen": -2.11248517036438, "logits/rejected": -1.8414957523345947, "logps/chosen": -445.2958068847656, "logps/rejected": -518.60400390625, "loss": 0.6877, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.31975361704826355, "rewards/margins": 0.08607500046491623, "rewards/rejected": -0.4058286249637604, "step": 1260 }, { "epoch": 0.09, "learning_rate": 4.682890855457228e-06, "logits/chosen": -2.1566081047058105, "logits/rejected": -1.4720865488052368, "logps/chosen": -426.45184326171875, "logps/rejected": -560.9105834960938, "loss": 0.6786, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2327970564365387, "rewards/margins": 0.16142287850379944, "rewards/rejected": -0.3942199945449829, "step": 1270 }, { "epoch": 0.09, "learning_rate": 4.71976401179941e-06, "logits/chosen": -2.0924675464630127, "logits/rejected": -1.6457946300506592, "logps/chosen": -439.336181640625, "logps/rejected": -541.0797119140625, "loss": 0.6842, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27090153098106384, "rewards/margins": 0.13351964950561523, "rewards/rejected": -0.4044211506843567, "step": 1280 }, { "epoch": 0.1, "learning_rate": 4.756637168141594e-06, "logits/chosen": -2.4377827644348145, "logits/rejected": -1.861657738685608, "logps/chosen": -423.02587890625, "logps/rejected": -604.1359252929688, "loss": 0.677, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2630787193775177, "rewards/margins": 0.21312209963798523, "rewards/rejected": -0.47620075941085815, "step": 1290 }, { "epoch": 0.1, "learning_rate": 4.793510324483776e-06, "logits/chosen": -1.9843218326568604, "logits/rejected": -1.4334615468978882, "logps/chosen": -588.7548217773438, "logps/rejected": -761.0653686523438, "loss": 0.6759, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.45404118299484253, "rewards/margins": 0.19474050402641296, "rewards/rejected": -0.6487816572189331, "step": 1300 }, { "epoch": 0.1, "learning_rate": 4.830383480825959e-06, "logits/chosen": -2.3460376262664795, "logits/rejected": -2.0704102516174316, "logps/chosen": -545.052490234375, "logps/rejected": -649.2657470703125, "loss": 0.6812, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34260955452919006, "rewards/margins": 0.14627234637737274, "rewards/rejected": -0.4888818860054016, "step": 1310 }, { "epoch": 0.1, "learning_rate": 4.867256637168142e-06, "logits/chosen": -1.9571654796600342, "logits/rejected": -1.4925177097320557, "logps/chosen": -645.50634765625, "logps/rejected": -774.8655395507812, "loss": 0.6797, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.51459801197052, "rewards/margins": 0.14269261062145233, "rewards/rejected": -0.6572905778884888, "step": 1320 }, { "epoch": 0.1, "learning_rate": 4.904129793510325e-06, "logits/chosen": -2.40010404586792, "logits/rejected": -1.677821159362793, "logps/chosen": -504.69622802734375, "logps/rejected": -675.3890380859375, "loss": 0.6756, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36652663350105286, "rewards/margins": 0.20285025238990784, "rewards/rejected": -0.5693768858909607, "step": 1330 }, { "epoch": 0.1, "learning_rate": 4.9410029498525075e-06, "logits/chosen": -2.3933727741241455, "logits/rejected": -1.8548587560653687, "logps/chosen": -516.3531494140625, "logps/rejected": -672.4573974609375, "loss": 0.6787, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3870595097541809, "rewards/margins": 0.16110952198505402, "rewards/rejected": -0.5481690764427185, "step": 1340 }, { "epoch": 0.1, "learning_rate": 4.97787610619469e-06, "logits/chosen": -2.023017168045044, "logits/rejected": -1.5751224756240845, "logps/chosen": -664.4542236328125, "logps/rejected": -731.3201293945312, "loss": 0.6843, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5082038044929504, "rewards/margins": 0.10646750032901764, "rewards/rejected": -0.6146713495254517, "step": 1350 }, { "epoch": 0.1, "learning_rate": 4.9999986740142755e-06, "logits/chosen": -2.203518867492676, "logits/rejected": -1.6165403127670288, "logps/chosen": -579.8915405273438, "logps/rejected": -722.529296875, "loss": 0.6808, "rewards/accuracies": 0.75, "rewards/chosen": -0.42667460441589355, "rewards/margins": 0.17639054358005524, "rewards/rejected": -0.6030651330947876, "step": 1360 }, { "epoch": 0.1, "learning_rate": 4.999983756691033e-06, "logits/chosen": -2.162766933441162, "logits/rejected": -1.740363359451294, "logps/chosen": -504.3063049316406, "logps/rejected": -615.3010864257812, "loss": 0.6856, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.38426119089126587, "rewards/margins": 0.10067132860422134, "rewards/rejected": -0.4849325716495514, "step": 1370 }, { "epoch": 0.1, "learning_rate": 4.99995226466162e-06, "logits/chosen": -2.436600923538208, "logits/rejected": -1.8865001201629639, "logps/chosen": -458.67626953125, "logps/rejected": -556.364013671875, "loss": 0.6801, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26274651288986206, "rewards/margins": 0.17691782116889954, "rewards/rejected": -0.4396643042564392, "step": 1380 }, { "epoch": 0.1, "learning_rate": 4.9999041981348285e-06, "logits/chosen": -2.3823444843292236, "logits/rejected": -1.9588959217071533, "logps/chosen": -511.38580322265625, "logps/rejected": -667.13134765625, "loss": 0.6778, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.33402758836746216, "rewards/margins": 0.14249157905578613, "rewards/rejected": -0.4765191674232483, "step": 1390 }, { "epoch": 0.1, "learning_rate": 4.999839557429336e-06, "logits/chosen": -2.097839832305908, "logits/rejected": -1.772637128829956, "logps/chosen": -674.7249755859375, "logps/rejected": -794.2020874023438, "loss": 0.685, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5652109384536743, "rewards/margins": 0.1257316917181015, "rewards/rejected": -0.6909425854682922, "step": 1400 }, { "epoch": 0.1, "learning_rate": 4.999758342973705e-06, "logits/chosen": -2.3492026329040527, "logits/rejected": -1.5902773141860962, "logps/chosen": -629.7722778320312, "logps/rejected": -843.4520263671875, "loss": 0.6847, "rewards/accuracies": 0.75, "rewards/chosen": -0.45843490958213806, "rewards/margins": 0.24973320960998535, "rewards/rejected": -0.7081681489944458, "step": 1410 }, { "epoch": 0.1, "learning_rate": 4.999660555306381e-06, "logits/chosen": -2.3812742233276367, "logits/rejected": -1.657311201095581, "logps/chosen": -424.9800720214844, "logps/rejected": -512.1578979492188, "loss": 0.6794, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2824508547782898, "rewards/margins": 0.14605812728405, "rewards/rejected": -0.428508996963501, "step": 1420 }, { "epoch": 0.11, "learning_rate": 4.999546195075689e-06, "logits/chosen": -2.340485095977783, "logits/rejected": -1.9159908294677734, "logps/chosen": -446.9598083496094, "logps/rejected": -532.621826171875, "loss": 0.6816, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3186238706111908, "rewards/margins": 0.11151720583438873, "rewards/rejected": -0.4301411211490631, "step": 1430 }, { "epoch": 0.11, "learning_rate": 4.999415263039831e-06, "logits/chosen": -2.312147855758667, "logits/rejected": -1.8298180103302002, "logps/chosen": -398.1839294433594, "logps/rejected": -487.27789306640625, "loss": 0.6779, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24725277721881866, "rewards/margins": 0.15804463624954224, "rewards/rejected": -0.4052974283695221, "step": 1440 }, { "epoch": 0.11, "learning_rate": 4.999267760066874e-06, "logits/chosen": -2.4051074981689453, "logits/rejected": -1.734379529953003, "logps/chosen": -512.5437622070312, "logps/rejected": -577.84423828125, "loss": 0.6803, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29005980491638184, "rewards/margins": 0.12892451882362366, "rewards/rejected": -0.4189843237400055, "step": 1450 }, { "epoch": 0.11, "learning_rate": 4.999103687134752e-06, "logits/chosen": -2.2535529136657715, "logits/rejected": -1.7716491222381592, "logps/chosen": -561.8475952148438, "logps/rejected": -702.7966918945312, "loss": 0.6827, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.46509090065956116, "rewards/margins": 0.14355400204658508, "rewards/rejected": -0.6086449027061462, "step": 1460 }, { "epoch": 0.11, "learning_rate": 4.998923045331258e-06, "logits/chosen": -2.1069467067718506, "logits/rejected": -1.6116771697998047, "logps/chosen": -629.4188232421875, "logps/rejected": -750.4222412109375, "loss": 0.6818, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4656476080417633, "rewards/margins": 0.15705880522727966, "rewards/rejected": -0.622706413269043, "step": 1470 }, { "epoch": 0.11, "learning_rate": 4.998725835854031e-06, "logits/chosen": -2.234212875366211, "logits/rejected": -1.6214549541473389, "logps/chosen": -558.7271118164062, "logps/rejected": -705.9249877929688, "loss": 0.6784, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.40690669417381287, "rewards/margins": 0.1894649714231491, "rewards/rejected": -0.5963717103004456, "step": 1480 }, { "epoch": 0.11, "learning_rate": 4.998512060010559e-06, "logits/chosen": -2.4973812103271484, "logits/rejected": -1.9107367992401123, "logps/chosen": -398.1483154296875, "logps/rejected": -500.00250244140625, "loss": 0.6863, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2567351758480072, "rewards/margins": 0.1284058392047882, "rewards/rejected": -0.385140985250473, "step": 1490 }, { "epoch": 0.11, "learning_rate": 4.998281719218156e-06, "logits/chosen": -2.372483968734741, "logits/rejected": -1.7946888208389282, "logps/chosen": -406.0853576660156, "logps/rejected": -476.80950927734375, "loss": 0.6821, "rewards/accuracies": 0.75, "rewards/chosen": -0.19788196682929993, "rewards/margins": 0.13486209511756897, "rewards/rejected": -0.3327440917491913, "step": 1500 }, { "epoch": 0.11, "learning_rate": 4.998034815003967e-06, "logits/chosen": -2.1081461906433105, "logits/rejected": -1.9837589263916016, "logps/chosen": -404.35400390625, "logps/rejected": -488.4461364746094, "loss": 0.687, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2517507076263428, "rewards/margins": 0.06289561092853546, "rewards/rejected": -0.31464633345603943, "step": 1510 }, { "epoch": 0.11, "learning_rate": 4.9977713490049475e-06, "logits/chosen": -2.0314440727233887, "logits/rejected": -1.8076646327972412, "logps/chosen": -527.1669311523438, "logps/rejected": -607.9647216796875, "loss": 0.6873, "rewards/accuracies": 0.625, "rewards/chosen": -0.37386995553970337, "rewards/margins": 0.08554521203041077, "rewards/rejected": -0.45941513776779175, "step": 1520 }, { "epoch": 0.11, "learning_rate": 4.997491322967857e-06, "logits/chosen": -2.430171489715576, "logits/rejected": -1.8794803619384766, "logps/chosen": -426.8192443847656, "logps/rejected": -565.7362670898438, "loss": 0.681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2968435287475586, "rewards/margins": 0.14570651948451996, "rewards/rejected": -0.44255003333091736, "step": 1530 }, { "epoch": 0.11, "learning_rate": 4.9971947387492485e-06, "logits/chosen": -2.104213237762451, "logits/rejected": -1.7967092990875244, "logps/chosen": -619.91064453125, "logps/rejected": -715.5885009765625, "loss": 0.6837, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4618099629878998, "rewards/margins": 0.1259414106607437, "rewards/rejected": -0.5877513885498047, "step": 1540 }, { "epoch": 0.11, "learning_rate": 4.996881598315452e-06, "logits/chosen": -2.348878860473633, "logits/rejected": -1.3788615465164185, "logps/chosen": -645.3602294921875, "logps/rejected": -776.603759765625, "loss": 0.6775, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.44906121492385864, "rewards/margins": 0.2209613025188446, "rewards/rejected": -0.6700226068496704, "step": 1550 }, { "epoch": 0.12, "learning_rate": 4.996551903742566e-06, "logits/chosen": -2.2943968772888184, "logits/rejected": -1.547495722770691, "logps/chosen": -569.0314331054688, "logps/rejected": -656.4037475585938, "loss": 0.6804, "rewards/accuracies": 0.75, "rewards/chosen": -0.39018338918685913, "rewards/margins": 0.14852651953697205, "rewards/rejected": -0.5387099385261536, "step": 1560 }, { "epoch": 0.12, "learning_rate": 4.996205657216441e-06, "logits/chosen": -2.163938522338867, "logits/rejected": -1.9062862396240234, "logps/chosen": -540.4224853515625, "logps/rejected": -636.2813720703125, "loss": 0.6807, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.41015562415122986, "rewards/margins": 0.10229100286960602, "rewards/rejected": -0.5124467015266418, "step": 1570 }, { "epoch": 0.12, "learning_rate": 4.995842861032665e-06, "logits/chosen": -2.6782803535461426, "logits/rejected": -2.22416353225708, "logps/chosen": -431.62738037109375, "logps/rejected": -494.66229248046875, "loss": 0.683, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23850715160369873, "rewards/margins": 0.10862990468740463, "rewards/rejected": -0.34713706374168396, "step": 1580 }, { "epoch": 0.12, "learning_rate": 4.9954635175965505e-06, "logits/chosen": -2.3216071128845215, "logits/rejected": -2.018397092819214, "logps/chosen": -433.97625732421875, "logps/rejected": -614.004638671875, "loss": 0.6827, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26365989446640015, "rewards/margins": 0.20244243741035461, "rewards/rejected": -0.46610236167907715, "step": 1590 }, { "epoch": 0.12, "learning_rate": 4.995067629423116e-06, "logits/chosen": -2.3307671546936035, "logits/rejected": -1.978761911392212, "logps/chosen": -385.2848815917969, "logps/rejected": -537.2488403320312, "loss": 0.6798, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22916534543037415, "rewards/margins": 0.18945543467998505, "rewards/rejected": -0.4186207354068756, "step": 1600 }, { "epoch": 0.12, "learning_rate": 4.994655199137071e-06, "logits/chosen": -2.2158093452453613, "logits/rejected": -1.5250235795974731, "logps/chosen": -391.8963623046875, "logps/rejected": -543.1158447265625, "loss": 0.6814, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24648427963256836, "rewards/margins": 0.17746660113334656, "rewards/rejected": -0.42395085096359253, "step": 1610 }, { "epoch": 0.12, "learning_rate": 4.9942262294727966e-06, "logits/chosen": -2.3120718002319336, "logits/rejected": -1.5733588933944702, "logps/chosen": -423.83447265625, "logps/rejected": -563.5753173828125, "loss": 0.6778, "rewards/accuracies": 0.625, "rewards/chosen": -0.28222206234931946, "rewards/margins": 0.2009235918521881, "rewards/rejected": -0.4831456243991852, "step": 1620 }, { "epoch": 0.12, "learning_rate": 4.993780723274331e-06, "logits/chosen": -2.2065627574920654, "logits/rejected": -1.6709009408950806, "logps/chosen": -488.39788818359375, "logps/rejected": -612.5750122070312, "loss": 0.6767, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.335333913564682, "rewards/margins": 0.16509655117988586, "rewards/rejected": -0.5004304647445679, "step": 1630 }, { "epoch": 0.12, "learning_rate": 4.993318683495345e-06, "logits/chosen": -2.001959800720215, "logits/rejected": -1.5622756481170654, "logps/chosen": -429.49432373046875, "logps/rejected": -553.2626953125, "loss": 0.6847, "rewards/accuracies": 0.75, "rewards/chosen": -0.27471622824668884, "rewards/margins": 0.147655189037323, "rewards/rejected": -0.42237138748168945, "step": 1640 }, { "epoch": 0.12, "learning_rate": 4.992840113199131e-06, "logits/chosen": -2.1642422676086426, "logits/rejected": -1.3439184427261353, "logps/chosen": -442.06463623046875, "logps/rejected": -550.1664428710938, "loss": 0.6769, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26380711793899536, "rewards/margins": 0.15656888484954834, "rewards/rejected": -0.4203759729862213, "step": 1650 }, { "epoch": 0.12, "learning_rate": 4.992345015558572e-06, "logits/chosen": -2.3754501342773438, "logits/rejected": -1.8901695013046265, "logps/chosen": -410.57330322265625, "logps/rejected": -553.8487548828125, "loss": 0.6824, "rewards/accuracies": 0.75, "rewards/chosen": -0.25150084495544434, "rewards/margins": 0.1453794538974762, "rewards/rejected": -0.39688029885292053, "step": 1660 }, { "epoch": 0.12, "learning_rate": 4.991833393856129e-06, "logits/chosen": -2.31489896774292, "logits/rejected": -1.775649070739746, "logps/chosen": -452.884033203125, "logps/rejected": -503.65240478515625, "loss": 0.6848, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.27159538865089417, "rewards/margins": 0.11033513396978378, "rewards/rejected": -0.38193053007125854, "step": 1670 }, { "epoch": 0.12, "learning_rate": 4.9913052514838155e-06, "logits/chosen": -1.9960263967514038, "logits/rejected": -1.5168339014053345, "logps/chosen": -435.7156677246094, "logps/rejected": -542.5836791992188, "loss": 0.6755, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2650299668312073, "rewards/margins": 0.14305618405342102, "rewards/rejected": -0.4080861508846283, "step": 1680 }, { "epoch": 0.12, "learning_rate": 4.9907605919431776e-06, "logits/chosen": -2.120023012161255, "logits/rejected": -1.8562999963760376, "logps/chosen": -359.0880126953125, "logps/rejected": -538.4150390625, "loss": 0.6762, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2090507447719574, "rewards/margins": 0.15755270421504974, "rewards/rejected": -0.3666034936904907, "step": 1690 }, { "epoch": 0.13, "learning_rate": 4.990199418845267e-06, "logits/chosen": -1.9755375385284424, "logits/rejected": -1.525000810623169, "logps/chosen": -479.3567810058594, "logps/rejected": -582.7515869140625, "loss": 0.6814, "rewards/accuracies": 0.625, "rewards/chosen": -0.3163890540599823, "rewards/margins": 0.1619480401277542, "rewards/rejected": -0.47833719849586487, "step": 1700 }, { "epoch": 0.13, "learning_rate": 4.989621735910618e-06, "logits/chosen": -2.113497257232666, "logits/rejected": -1.5300015211105347, "logps/chosen": -480.3477478027344, "logps/rejected": -604.3723754882812, "loss": 0.6828, "rewards/accuracies": 0.625, "rewards/chosen": -0.3521083891391754, "rewards/margins": 0.1560477614402771, "rewards/rejected": -0.5081561207771301, "step": 1710 }, { "epoch": 0.13, "learning_rate": 4.989027546969227e-06, "logits/chosen": -1.8463550806045532, "logits/rejected": -1.6906096935272217, "logps/chosen": -467.3606872558594, "logps/rejected": -571.8316650390625, "loss": 0.6838, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3562212288379669, "rewards/margins": 0.10015169531106949, "rewards/rejected": -0.4563729763031006, "step": 1720 }, { "epoch": 0.13, "learning_rate": 4.988416855960523e-06, "logits/chosen": -1.8992698192596436, "logits/rejected": -1.7155160903930664, "logps/chosen": -555.6666259765625, "logps/rejected": -718.392822265625, "loss": 0.6814, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3875238001346588, "rewards/margins": 0.1414606124162674, "rewards/rejected": -0.5289844274520874, "step": 1730 }, { "epoch": 0.13, "learning_rate": 4.98778966693334e-06, "logits/chosen": -1.8190828561782837, "logits/rejected": -1.4704898595809937, "logps/chosen": -563.0492553710938, "logps/rejected": -746.2356567382812, "loss": 0.6811, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38192418217658997, "rewards/margins": 0.1680021435022354, "rewards/rejected": -0.5499263405799866, "step": 1740 }, { "epoch": 0.13, "learning_rate": 4.987145984045896e-06, "logits/chosen": -2.3461108207702637, "logits/rejected": -1.6553192138671875, "logps/chosen": -443.93817138671875, "logps/rejected": -510.0069274902344, "loss": 0.6836, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24022233486175537, "rewards/margins": 0.12798359990119934, "rewards/rejected": -0.3682059645652771, "step": 1750 }, { "epoch": 0.13, "learning_rate": 4.986485811565759e-06, "logits/chosen": -2.1806788444519043, "logits/rejected": -1.6117578744888306, "logps/chosen": -422.3408203125, "logps/rejected": -541.0440063476562, "loss": 0.6791, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2597504258155823, "rewards/margins": 0.1776871383190155, "rewards/rejected": -0.4374374747276306, "step": 1760 }, { "epoch": 0.13, "learning_rate": 4.985809153869825e-06, "logits/chosen": -2.0932490825653076, "logits/rejected": -1.468040108680725, "logps/chosen": -367.44647216796875, "logps/rejected": -572.0079345703125, "loss": 0.6694, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23136265575885773, "rewards/margins": 0.2348192036151886, "rewards/rejected": -0.46618181467056274, "step": 1770 }, { "epoch": 0.13, "learning_rate": 4.985116015444283e-06, "logits/chosen": -2.1668906211853027, "logits/rejected": -1.609058141708374, "logps/chosen": -399.7013244628906, "logps/rejected": -569.4808959960938, "loss": 0.6766, "rewards/accuracies": 0.625, "rewards/chosen": -0.2272004783153534, "rewards/margins": 0.22542881965637207, "rewards/rejected": -0.45262932777404785, "step": 1780 }, { "epoch": 0.13, "learning_rate": 4.984406400884592e-06, "logits/chosen": -2.2610116004943848, "logits/rejected": -1.8019282817840576, "logps/chosen": -412.24688720703125, "logps/rejected": -514.0255737304688, "loss": 0.6759, "rewards/accuracies": 0.625, "rewards/chosen": -0.21862578392028809, "rewards/margins": 0.16368992626667023, "rewards/rejected": -0.3823156952857971, "step": 1790 }, { "epoch": 0.13, "learning_rate": 4.983680314895439e-06, "logits/chosen": -2.1407687664031982, "logits/rejected": -1.4283413887023926, "logps/chosen": -410.50885009765625, "logps/rejected": -563.0276489257812, "loss": 0.6824, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2649034559726715, "rewards/margins": 0.1948792040348053, "rewards/rejected": -0.4597826898097992, "step": 1800 }, { "epoch": 0.13, "learning_rate": 4.9829377622907235e-06, "logits/chosen": -2.109602928161621, "logits/rejected": -1.7818371057510376, "logps/chosen": -453.5262145996094, "logps/rejected": -626.7955932617188, "loss": 0.6777, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3216005861759186, "rewards/margins": 0.17053891718387604, "rewards/rejected": -0.4921395182609558, "step": 1810 }, { "epoch": 0.13, "learning_rate": 4.982178747993513e-06, "logits/chosen": -1.7917007207870483, "logits/rejected": -1.654822587966919, "logps/chosen": -661.7495727539062, "logps/rejected": -765.7009887695312, "loss": 0.6826, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4959152638912201, "rewards/margins": 0.10898177325725555, "rewards/rejected": -0.6048970222473145, "step": 1820 }, { "epoch": 0.13, "learning_rate": 4.981403277036017e-06, "logits/chosen": -1.933488130569458, "logits/rejected": -1.7148497104644775, "logps/chosen": -619.5118408203125, "logps/rejected": -724.2005615234375, "loss": 0.6825, "rewards/accuracies": 0.625, "rewards/chosen": -0.48629769682884216, "rewards/margins": 0.10983828455209732, "rewards/rejected": -0.5961359739303589, "step": 1830 }, { "epoch": 0.14, "learning_rate": 4.980611354559547e-06, "logits/chosen": -2.133218288421631, "logits/rejected": -1.6012442111968994, "logps/chosen": -639.1763916015625, "logps/rejected": -769.0061645507812, "loss": 0.6819, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.43269115686416626, "rewards/margins": 0.16681592166423798, "rewards/rejected": -0.5995070338249207, "step": 1840 }, { "epoch": 0.14, "learning_rate": 4.979802985814493e-06, "logits/chosen": -2.0501911640167236, "logits/rejected": -1.4100950956344604, "logps/chosen": -416.9120178222656, "logps/rejected": -578.8995361328125, "loss": 0.6776, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23287971317768097, "rewards/margins": 0.19540268182754517, "rewards/rejected": -0.42828240990638733, "step": 1850 }, { "epoch": 0.14, "learning_rate": 4.978978176160279e-06, "logits/chosen": -2.089853525161743, "logits/rejected": -1.5786672830581665, "logps/chosen": -487.01715087890625, "logps/rejected": -657.8482666015625, "loss": 0.6801, "rewards/accuracies": 0.75, "rewards/chosen": -0.34207263588905334, "rewards/margins": 0.2067478895187378, "rewards/rejected": -0.5488205552101135, "step": 1860 }, { "epoch": 0.14, "learning_rate": 4.978136931065331e-06, "logits/chosen": -2.0291571617126465, "logits/rejected": -1.5240166187286377, "logps/chosen": -333.6182556152344, "logps/rejected": -433.76708984375, "loss": 0.6809, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19811056554317474, "rewards/margins": 0.11670652776956558, "rewards/rejected": -0.31481707096099854, "step": 1870 }, { "epoch": 0.14, "learning_rate": 4.97727925610704e-06, "logits/chosen": -2.1615371704101562, "logits/rejected": -1.7094619274139404, "logps/chosen": -286.234619140625, "logps/rejected": -417.7418518066406, "loss": 0.6807, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18134553730487823, "rewards/margins": 0.14025284349918365, "rewards/rejected": -0.3215983808040619, "step": 1880 }, { "epoch": 0.14, "learning_rate": 4.9764051569717306e-06, "logits/chosen": -2.1933484077453613, "logits/rejected": -1.6335248947143555, "logps/chosen": -390.2818603515625, "logps/rejected": -569.0899658203125, "loss": 0.6777, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22306689620018005, "rewards/margins": 0.2175847291946411, "rewards/rejected": -0.44065165519714355, "step": 1890 }, { "epoch": 0.14, "learning_rate": 4.975514639454613e-06, "logits/chosen": -2.2326772212982178, "logits/rejected": -1.9248530864715576, "logps/chosen": -503.9562072753906, "logps/rejected": -579.269287109375, "loss": 0.6832, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.27650490403175354, "rewards/margins": 0.13666778802871704, "rewards/rejected": -0.4131726622581482, "step": 1900 }, { "epoch": 0.14, "learning_rate": 4.974607709459751e-06, "logits/chosen": -2.414437770843506, "logits/rejected": -1.786085844039917, "logps/chosen": -384.1307067871094, "logps/rejected": -501.8348083496094, "loss": 0.684, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1756352335214615, "rewards/margins": 0.17062394320964813, "rewards/rejected": -0.3462591767311096, "step": 1910 }, { "epoch": 0.14, "learning_rate": 4.973684373000025e-06, "logits/chosen": -2.307424306869507, "logits/rejected": -1.8163970708847046, "logps/chosen": -332.5538024902344, "logps/rejected": -441.11956787109375, "loss": 0.6825, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1627238690853119, "rewards/margins": 0.12535423040390015, "rewards/rejected": -0.28807809948921204, "step": 1920 }, { "epoch": 0.14, "learning_rate": 4.9727446361970855e-06, "logits/chosen": -2.1561312675476074, "logits/rejected": -1.7931798696517944, "logps/chosen": -405.51068115234375, "logps/rejected": -543.1822509765625, "loss": 0.6804, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.259022980928421, "rewards/margins": 0.1413717418909073, "rewards/rejected": -0.4003947377204895, "step": 1930 }, { "epoch": 0.14, "learning_rate": 4.971788505281319e-06, "logits/chosen": -2.176706075668335, "logits/rejected": -1.7507728338241577, "logps/chosen": -514.9395751953125, "logps/rejected": -606.131591796875, "loss": 0.6874, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.30425262451171875, "rewards/margins": 0.11466152966022491, "rewards/rejected": -0.41891416907310486, "step": 1940 }, { "epoch": 0.14, "learning_rate": 4.970815986591801e-06, "logits/chosen": -2.28208589553833, "logits/rejected": -1.7452185153961182, "logps/chosen": -485.8348083496094, "logps/rejected": -608.4963989257812, "loss": 0.6807, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32239046692848206, "rewards/margins": 0.16924962401390076, "rewards/rejected": -0.4916400909423828, "step": 1950 }, { "epoch": 0.14, "learning_rate": 4.969827086576257e-06, "logits/chosen": -2.1605095863342285, "logits/rejected": -1.787091612815857, "logps/chosen": -396.59051513671875, "logps/rejected": -439.9991149902344, "loss": 0.6875, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18570740520954132, "rewards/margins": 0.09025705605745316, "rewards/rejected": -0.2759644389152527, "step": 1960 }, { "epoch": 0.15, "learning_rate": 4.968821811791022e-06, "logits/chosen": -2.1473629474639893, "logits/rejected": -1.7801759243011475, "logps/chosen": -372.379638671875, "logps/rejected": -500.5992736816406, "loss": 0.6821, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23996667563915253, "rewards/margins": 0.13499900698661804, "rewards/rejected": -0.3749656677246094, "step": 1970 }, { "epoch": 0.15, "learning_rate": 4.967800168900995e-06, "logits/chosen": -2.1184182167053223, "logits/rejected": -1.6724315881729126, "logps/chosen": -322.10064697265625, "logps/rejected": -410.78045654296875, "loss": 0.6853, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18654826283454895, "rewards/margins": 0.10092733800411224, "rewards/rejected": -0.2874756157398224, "step": 1980 }, { "epoch": 0.15, "learning_rate": 4.966762164679588e-06, "logits/chosen": -2.117048740386963, "logits/rejected": -1.4557167291641235, "logps/chosen": -359.3560485839844, "logps/rejected": -544.7975463867188, "loss": 0.6762, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21094302833080292, "rewards/margins": 0.21019192039966583, "rewards/rejected": -0.4211350083351135, "step": 1990 }, { "epoch": 0.15, "learning_rate": 4.965707806008695e-06, "logits/chosen": -2.0802249908447266, "logits/rejected": -1.810981035232544, "logps/chosen": -476.354248046875, "logps/rejected": -592.7877197265625, "loss": 0.6829, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3367021679878235, "rewards/margins": 0.13525357842445374, "rewards/rejected": -0.4719557762145996, "step": 2000 }, { "epoch": 0.15, "learning_rate": 4.964637099878634e-06, "logits/chosen": -2.3518433570861816, "logits/rejected": -1.6228231191635132, "logps/chosen": -486.20111083984375, "logps/rejected": -628.6722412109375, "loss": 0.6811, "rewards/accuracies": 0.75, "rewards/chosen": -0.2916795015335083, "rewards/margins": 0.20430822670459747, "rewards/rejected": -0.49598774313926697, "step": 2010 }, { "epoch": 0.15, "learning_rate": 4.9635500533881065e-06, "logits/chosen": -2.1780433654785156, "logits/rejected": -1.5268675088882446, "logps/chosen": -417.7193908691406, "logps/rejected": -568.0610961914062, "loss": 0.6769, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26322078704833984, "rewards/margins": 0.19765083491802216, "rewards/rejected": -0.4608716070652008, "step": 2020 }, { "epoch": 0.15, "learning_rate": 4.962446673744151e-06, "logits/chosen": -2.182119369506836, "logits/rejected": -1.6661262512207031, "logps/chosen": -457.11944580078125, "logps/rejected": -606.3179321289062, "loss": 0.6784, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2587498128414154, "rewards/margins": 0.1764868199825287, "rewards/rejected": -0.4352366328239441, "step": 2030 }, { "epoch": 0.15, "learning_rate": 4.961326968262092e-06, "logits/chosen": -2.066885232925415, "logits/rejected": -1.6241986751556396, "logps/chosen": -436.0508728027344, "logps/rejected": -552.0404663085938, "loss": 0.6826, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26955652236938477, "rewards/margins": 0.1401553452014923, "rewards/rejected": -0.4097118377685547, "step": 2040 }, { "epoch": 0.15, "learning_rate": 4.960190944365494e-06, "logits/chosen": -2.2318403720855713, "logits/rejected": -1.5527019500732422, "logps/chosen": -346.50836181640625, "logps/rejected": -491.1309509277344, "loss": 0.6816, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2079625129699707, "rewards/margins": 0.174115389585495, "rewards/rejected": -0.3820779025554657, "step": 2050 }, { "epoch": 0.15, "learning_rate": 4.95903860958611e-06, "logits/chosen": -1.9849462509155273, "logits/rejected": -1.547700047492981, "logps/chosen": -402.04718017578125, "logps/rejected": -546.7498779296875, "loss": 0.6812, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23943808674812317, "rewards/margins": 0.17839458584785461, "rewards/rejected": -0.4178326725959778, "step": 2060 }, { "epoch": 0.15, "learning_rate": 4.957869971563833e-06, "logits/chosen": -2.124795436859131, "logits/rejected": -1.705963373184204, "logps/chosen": -467.2562561035156, "logps/rejected": -561.8336181640625, "loss": 0.6803, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29554709792137146, "rewards/margins": 0.14544320106506348, "rewards/rejected": -0.44099029898643494, "step": 2070 }, { "epoch": 0.15, "learning_rate": 4.9566850380466474e-06, "logits/chosen": -1.9426883459091187, "logits/rejected": -1.8043241500854492, "logps/chosen": -451.750244140625, "logps/rejected": -569.9832763671875, "loss": 0.6823, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30993664264678955, "rewards/margins": 0.10469113290309906, "rewards/rejected": -0.41462773084640503, "step": 2080 }, { "epoch": 0.15, "learning_rate": 4.955483816890574e-06, "logits/chosen": -2.122488021850586, "logits/rejected": -1.8387778997421265, "logps/chosen": -467.6241149902344, "logps/rejected": -535.9173583984375, "loss": 0.6844, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2748633921146393, "rewards/margins": 0.09479127824306488, "rewards/rejected": -0.36965465545654297, "step": 2090 }, { "epoch": 0.15, "learning_rate": 4.95426631605962e-06, "logits/chosen": -2.26001238822937, "logits/rejected": -1.9809119701385498, "logps/chosen": -319.6461486816406, "logps/rejected": -392.9462890625, "loss": 0.6852, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1728908121585846, "rewards/margins": 0.10622484982013702, "rewards/rejected": -0.2791156470775604, "step": 2100 }, { "epoch": 0.16, "learning_rate": 4.953032543625724e-06, "logits/chosen": -2.0260212421417236, "logits/rejected": -1.580176591873169, "logps/chosen": -475.50238037109375, "logps/rejected": -570.3038330078125, "loss": 0.6854, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.30151301622390747, "rewards/margins": 0.12283631414175034, "rewards/rejected": -0.424349308013916, "step": 2110 }, { "epoch": 0.16, "learning_rate": 4.951782507768707e-06, "logits/chosen": -1.6824731826782227, "logits/rejected": -1.34992253780365, "logps/chosen": -444.5293884277344, "logps/rejected": -579.5582275390625, "loss": 0.6777, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2782571017742157, "rewards/margins": 0.16650907695293427, "rewards/rejected": -0.4447661340236664, "step": 2120 }, { "epoch": 0.16, "learning_rate": 4.950516216776212e-06, "logits/chosen": -2.009174346923828, "logits/rejected": -1.5655643939971924, "logps/chosen": -520.9489135742188, "logps/rejected": -618.607666015625, "loss": 0.6778, "rewards/accuracies": 0.75, "rewards/chosen": -0.34799692034721375, "rewards/margins": 0.14664670825004578, "rewards/rejected": -0.4946436285972595, "step": 2130 }, { "epoch": 0.16, "learning_rate": 4.949233679043655e-06, "logits/chosen": -1.9361282587051392, "logits/rejected": -1.3391456604003906, "logps/chosen": -541.0728759765625, "logps/rejected": -647.0679931640625, "loss": 0.6779, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3605917990207672, "rewards/margins": 0.17967431247234344, "rewards/rejected": -0.5402660369873047, "step": 2140 }, { "epoch": 0.16, "learning_rate": 4.947934903074166e-06, "logits/chosen": -1.9031493663787842, "logits/rejected": -1.4256095886230469, "logps/chosen": -546.0966186523438, "logps/rejected": -646.6503295898438, "loss": 0.6825, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3771653175354004, "rewards/margins": 0.14095620810985565, "rewards/rejected": -0.5181214809417725, "step": 2150 }, { "epoch": 0.16, "learning_rate": 4.946619897478532e-06, "logits/chosen": -1.9977432489395142, "logits/rejected": -1.48088800907135, "logps/chosen": -563.3961181640625, "logps/rejected": -737.5870361328125, "loss": 0.6797, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.381928414106369, "rewards/margins": 0.18436439335346222, "rewards/rejected": -0.5662928819656372, "step": 2160 }, { "epoch": 0.16, "learning_rate": 4.945288670975142e-06, "logits/chosen": -2.1250360012054443, "logits/rejected": -1.6041091680526733, "logps/chosen": -439.52655029296875, "logps/rejected": -605.1467895507812, "loss": 0.6793, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29807743430137634, "rewards/margins": 0.16770774126052856, "rewards/rejected": -0.4657851755619049, "step": 2170 }, { "epoch": 0.16, "learning_rate": 4.943941232389931e-06, "logits/chosen": -2.141709089279175, "logits/rejected": -1.7756226062774658, "logps/chosen": -463.3104553222656, "logps/rejected": -616.2620849609375, "loss": 0.6816, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3089509904384613, "rewards/margins": 0.150753915309906, "rewards/rejected": -0.4597049355506897, "step": 2180 }, { "epoch": 0.16, "learning_rate": 4.9425775906563135e-06, "logits/chosen": -1.8617595434188843, "logits/rejected": -1.3774664402008057, "logps/chosen": -436.5328063964844, "logps/rejected": -604.3076782226562, "loss": 0.6804, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2912885248661041, "rewards/margins": 0.1966002881526947, "rewards/rejected": -0.4878888726234436, "step": 2190 }, { "epoch": 0.16, "learning_rate": 4.941197754815133e-06, "logits/chosen": -2.019990921020508, "logits/rejected": -1.6650947332382202, "logps/chosen": -533.4175415039062, "logps/rejected": -647.5643310546875, "loss": 0.6811, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3930707573890686, "rewards/margins": 0.1602892428636551, "rewards/rejected": -0.5533599257469177, "step": 2200 }, { "epoch": 0.16, "learning_rate": 4.939801734014601e-06, "logits/chosen": -1.8616033792495728, "logits/rejected": -1.7899806499481201, "logps/chosen": -586.544921875, "logps/rejected": -706.3464965820312, "loss": 0.6844, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.42945584654808044, "rewards/margins": 0.12044302374124527, "rewards/rejected": -0.5498989224433899, "step": 2210 }, { "epoch": 0.16, "learning_rate": 4.9383895375102294e-06, "logits/chosen": -2.0268666744232178, "logits/rejected": -1.4953985214233398, "logps/chosen": -518.0233154296875, "logps/rejected": -647.19970703125, "loss": 0.6813, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34401828050613403, "rewards/margins": 0.16802045702934265, "rewards/rejected": -0.5120387673377991, "step": 2220 }, { "epoch": 0.16, "learning_rate": 4.936961174664776e-06, "logits/chosen": -2.3613927364349365, "logits/rejected": -1.8390963077545166, "logps/chosen": -362.2242431640625, "logps/rejected": -455.7596740722656, "loss": 0.6849, "rewards/accuracies": 0.625, "rewards/chosen": -0.15420767664909363, "rewards/margins": 0.134483203291893, "rewards/rejected": -0.2886908948421478, "step": 2230 }, { "epoch": 0.17, "learning_rate": 4.935516654948181e-06, "logits/chosen": -2.0761280059814453, "logits/rejected": -1.6711753606796265, "logps/chosen": -418.2381286621094, "logps/rejected": -531.6326904296875, "loss": 0.6801, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.273235023021698, "rewards/margins": 0.14172202348709106, "rewards/rejected": -0.41495704650878906, "step": 2240 }, { "epoch": 0.17, "learning_rate": 4.934055987937501e-06, "logits/chosen": -2.110250949859619, "logits/rejected": -1.7663872241973877, "logps/chosen": -415.91436767578125, "logps/rejected": -513.2429809570312, "loss": 0.6839, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2829795777797699, "rewards/margins": 0.11236218363046646, "rewards/rejected": -0.39534175395965576, "step": 2250 }, { "epoch": 0.17, "learning_rate": 4.93257918331685e-06, "logits/chosen": -2.0139827728271484, "logits/rejected": -1.8694069385528564, "logps/chosen": -520.3421630859375, "logps/rejected": -649.1539306640625, "loss": 0.6798, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.38664454221725464, "rewards/margins": 0.13381239771842957, "rewards/rejected": -0.5204569101333618, "step": 2260 }, { "epoch": 0.17, "learning_rate": 4.931086250877334e-06, "logits/chosen": -2.2461929321289062, "logits/rejected": -1.7968038320541382, "logps/chosen": -565.0711669921875, "logps/rejected": -642.9112548828125, "loss": 0.6868, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.39696604013442993, "rewards/margins": 0.1443590670824051, "rewards/rejected": -0.541325032711029, "step": 2270 }, { "epoch": 0.17, "learning_rate": 4.929577200516983e-06, "logits/chosen": -2.0329434871673584, "logits/rejected": -1.6964260339736938, "logps/chosen": -540.699951171875, "logps/rejected": -669.5889282226562, "loss": 0.6803, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39678332209587097, "rewards/margins": 0.1187615767121315, "rewards/rejected": -0.5155448913574219, "step": 2280 }, { "epoch": 0.17, "learning_rate": 4.928052042240688e-06, "logits/chosen": -1.9604429006576538, "logits/rejected": -1.4864501953125, "logps/chosen": -544.8407592773438, "logps/rejected": -724.1475830078125, "loss": 0.6789, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40707021951675415, "rewards/margins": 0.19425468146800995, "rewards/rejected": -0.6013249158859253, "step": 2290 }, { "epoch": 0.17, "learning_rate": 4.926510786160136e-06, "logits/chosen": -2.0992865562438965, "logits/rejected": -1.6196266412734985, "logps/chosen": -482.9034118652344, "logps/rejected": -635.896728515625, "loss": 0.6812, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32695919275283813, "rewards/margins": 0.1822510063648224, "rewards/rejected": -0.5092101693153381, "step": 2300 }, { "epoch": 0.17, "learning_rate": 4.924953442493739e-06, "logits/chosen": -1.7546985149383545, "logits/rejected": -1.2381360530853271, "logps/chosen": -495.43121337890625, "logps/rejected": -641.9857788085938, "loss": 0.6801, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3383128046989441, "rewards/margins": 0.19699563086032867, "rewards/rejected": -0.5353084802627563, "step": 2310 }, { "epoch": 0.17, "learning_rate": 4.92338002156657e-06, "logits/chosen": -1.9251667261123657, "logits/rejected": -1.4100152254104614, "logps/chosen": -487.7589416503906, "logps/rejected": -629.0174560546875, "loss": 0.6811, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3087880313396454, "rewards/margins": 0.16659896075725555, "rewards/rejected": -0.47538700699806213, "step": 2320 }, { "epoch": 0.17, "learning_rate": 4.921790533810293e-06, "logits/chosen": -1.8623743057250977, "logits/rejected": -1.4733120203018188, "logps/chosen": -584.7200317382812, "logps/rejected": -719.0805053710938, "loss": 0.6836, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.44738397002220154, "rewards/margins": 0.13917413353919983, "rewards/rejected": -0.5865581035614014, "step": 2330 }, { "epoch": 0.17, "learning_rate": 4.920184989763092e-06, "logits/chosen": -1.8426357507705688, "logits/rejected": -1.2346632480621338, "logps/chosen": -523.5276489257812, "logps/rejected": -742.7013549804688, "loss": 0.6724, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3561467230319977, "rewards/margins": 0.27055758237838745, "rewards/rejected": -0.6267042756080627, "step": 2340 }, { "epoch": 0.17, "learning_rate": 4.918563400069607e-06, "logits/chosen": -1.9781579971313477, "logits/rejected": -1.589500069618225, "logps/chosen": -590.6767578125, "logps/rejected": -691.7242431640625, "loss": 0.6856, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.44454675912857056, "rewards/margins": 0.12111939489841461, "rewards/rejected": -0.5656661987304688, "step": 2350 }, { "epoch": 0.17, "learning_rate": 4.916925775480855e-06, "logits/chosen": -1.933618187904358, "logits/rejected": -1.3225958347320557, "logps/chosen": -570.9900512695312, "logps/rejected": -759.4656372070312, "loss": 0.6759, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4296819567680359, "rewards/margins": 0.21529288589954376, "rewards/rejected": -0.6449748277664185, "step": 2360 }, { "epoch": 0.17, "learning_rate": 4.915272126854166e-06, "logits/chosen": -1.9328773021697998, "logits/rejected": -1.356515645980835, "logps/chosen": -539.2330322265625, "logps/rejected": -710.1680908203125, "loss": 0.679, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4058482050895691, "rewards/margins": 0.21512186527252197, "rewards/rejected": -0.6209701299667358, "step": 2370 }, { "epoch": 0.18, "learning_rate": 4.913602465153105e-06, "logits/chosen": -2.017155170440674, "logits/rejected": -1.5753262042999268, "logps/chosen": -438.91070556640625, "logps/rejected": -574.7557373046875, "loss": 0.679, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26696544885635376, "rewards/margins": 0.16357631981372833, "rewards/rejected": -0.4305417537689209, "step": 2380 }, { "epoch": 0.18, "learning_rate": 4.9119168014474085e-06, "logits/chosen": -1.9465776681900024, "logits/rejected": -1.5304466485977173, "logps/chosen": -437.0359802246094, "logps/rejected": -646.6826782226562, "loss": 0.6782, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2755880653858185, "rewards/margins": 0.24534741044044495, "rewards/rejected": -0.5209354758262634, "step": 2390 }, { "epoch": 0.18, "learning_rate": 4.9102151469129e-06, "logits/chosen": -2.0569117069244385, "logits/rejected": -1.6413549184799194, "logps/chosen": -382.51715087890625, "logps/rejected": -496.6520080566406, "loss": 0.6827, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.232109934091568, "rewards/margins": 0.17673490941524506, "rewards/rejected": -0.40884479880332947, "step": 2400 }, { "epoch": 0.18, "learning_rate": 4.90849751283142e-06, "logits/chosen": -2.122424364089966, "logits/rejected": -1.6203460693359375, "logps/chosen": -508.6318359375, "logps/rejected": -621.3282470703125, "loss": 0.6792, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3398881256580353, "rewards/margins": 0.15040136873722076, "rewards/rejected": -0.49028950929641724, "step": 2410 }, { "epoch": 0.18, "learning_rate": 4.906763910590757e-06, "logits/chosen": -2.100635051727295, "logits/rejected": -1.5242019891738892, "logps/chosen": -585.4560546875, "logps/rejected": -725.6268310546875, "loss": 0.6785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.42005258798599243, "rewards/margins": 0.18844790756702423, "rewards/rejected": -0.6085005402565002, "step": 2420 }, { "epoch": 0.18, "learning_rate": 4.905014351684565e-06, "logits/chosen": -1.9709398746490479, "logits/rejected": -1.6309223175048828, "logps/chosen": -604.32861328125, "logps/rejected": -661.5999755859375, "loss": 0.6828, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4530222415924072, "rewards/margins": 0.11189214140176773, "rewards/rejected": -0.5649144649505615, "step": 2430 }, { "epoch": 0.18, "learning_rate": 4.9032488477122876e-06, "logits/chosen": -2.0830225944519043, "logits/rejected": -1.4696404933929443, "logps/chosen": -435.38665771484375, "logps/rejected": -585.2053833007812, "loss": 0.6747, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26529568433761597, "rewards/margins": 0.2149817943572998, "rewards/rejected": -0.48027747869491577, "step": 2440 }, { "epoch": 0.18, "learning_rate": 4.901467410379086e-06, "logits/chosen": -1.9383325576782227, "logits/rejected": -1.591639518737793, "logps/chosen": -419.5369567871094, "logps/rejected": -446.53741455078125, "loss": 0.6866, "rewards/accuracies": 0.625, "rewards/chosen": -0.19528073072433472, "rewards/margins": 0.09744925051927567, "rewards/rejected": -0.2927299439907074, "step": 2450 }, { "epoch": 0.18, "learning_rate": 4.899670051495756e-06, "logits/chosen": -2.2215495109558105, "logits/rejected": -1.741744041442871, "logps/chosen": -388.2371520996094, "logps/rejected": -532.0533447265625, "loss": 0.6753, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14740610122680664, "rewards/margins": 0.22941145300865173, "rewards/rejected": -0.376817524433136, "step": 2460 }, { "epoch": 0.18, "learning_rate": 4.897856782978655e-06, "logits/chosen": -2.1520392894744873, "logits/rejected": -1.426025629043579, "logps/chosen": -440.3724060058594, "logps/rejected": -630.6165771484375, "loss": 0.6745, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.307037889957428, "rewards/margins": 0.23167535662651062, "rewards/rejected": -0.5387132167816162, "step": 2470 }, { "epoch": 0.18, "learning_rate": 4.896027616849616e-06, "logits/chosen": -1.9238615036010742, "logits/rejected": -1.5554096698760986, "logps/chosen": -493.9755859375, "logps/rejected": -670.4956665039062, "loss": 0.6799, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.37134337425231934, "rewards/margins": 0.1654943823814392, "rewards/rejected": -0.5368377566337585, "step": 2480 }, { "epoch": 0.18, "learning_rate": 4.894182565235876e-06, "logits/chosen": -1.9413474798202515, "logits/rejected": -1.7075296640396118, "logps/chosen": -374.39910888671875, "logps/rejected": -442.74493408203125, "loss": 0.6874, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20813293755054474, "rewards/margins": 0.08636487275362015, "rewards/rejected": -0.2944977879524231, "step": 2490 }, { "epoch": 0.18, "learning_rate": 4.892321640369989e-06, "logits/chosen": -1.989893913269043, "logits/rejected": -1.522605538368225, "logps/chosen": -471.76397705078125, "logps/rejected": -642.712890625, "loss": 0.6773, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.30697718262672424, "rewards/margins": 0.2053314447402954, "rewards/rejected": -0.5123087167739868, "step": 2500 }, { "epoch": 0.19, "learning_rate": 4.8904448545897485e-06, "logits/chosen": -1.8910133838653564, "logits/rejected": -1.4781734943389893, "logps/chosen": -601.6436157226562, "logps/rejected": -683.3905639648438, "loss": 0.6812, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4163696765899658, "rewards/margins": 0.1305966079235077, "rewards/rejected": -0.5469661951065063, "step": 2510 }, { "epoch": 0.19, "learning_rate": 4.888552220338104e-06, "logits/chosen": -1.8071016073226929, "logits/rejected": -1.333792805671692, "logps/chosen": -574.151611328125, "logps/rejected": -748.5635986328125, "loss": 0.6858, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4424493908882141, "rewards/margins": 0.16324040293693542, "rewards/rejected": -0.6056898236274719, "step": 2520 }, { "epoch": 0.19, "learning_rate": 4.8866437501630806e-06, "logits/chosen": -1.9753434658050537, "logits/rejected": -1.538064956665039, "logps/chosen": -542.749755859375, "logps/rejected": -665.6854248046875, "loss": 0.6831, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3674880862236023, "rewards/margins": 0.16926974058151245, "rewards/rejected": -0.5367578268051147, "step": 2530 }, { "epoch": 0.19, "learning_rate": 4.884719456717692e-06, "logits/chosen": -1.9499969482421875, "logits/rejected": -1.4029819965362549, "logps/chosen": -457.060791015625, "logps/rejected": -579.0198974609375, "loss": 0.6804, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2731768488883972, "rewards/margins": 0.16305680572986603, "rewards/rejected": -0.43623366951942444, "step": 2540 }, { "epoch": 0.19, "learning_rate": 4.882779352759863e-06, "logits/chosen": -1.9861555099487305, "logits/rejected": -1.5487226247787476, "logps/chosen": -493.06103515625, "logps/rejected": -663.3422241210938, "loss": 0.6753, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.33946704864501953, "rewards/margins": 0.19553466141223907, "rewards/rejected": -0.5350016951560974, "step": 2550 }, { "epoch": 0.19, "learning_rate": 4.880823451152336e-06, "logits/chosen": -1.9287666082382202, "logits/rejected": -1.3858563899993896, "logps/chosen": -448.2491760253906, "logps/rejected": -620.9635009765625, "loss": 0.6787, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3086509108543396, "rewards/margins": 0.1940041482448578, "rewards/rejected": -0.502655029296875, "step": 2560 }, { "epoch": 0.19, "learning_rate": 4.878851764862593e-06, "logits/chosen": -1.9542789459228516, "logits/rejected": -1.5536928176879883, "logps/chosen": -580.6978759765625, "logps/rejected": -709.4466552734375, "loss": 0.6793, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3658314645290375, "rewards/margins": 0.17895950376987457, "rewards/rejected": -0.5447909832000732, "step": 2570 }, { "epoch": 0.19, "learning_rate": 4.87686430696277e-06, "logits/chosen": -2.072807550430298, "logits/rejected": -1.5429219007492065, "logps/chosen": -507.7230529785156, "logps/rejected": -656.1878662109375, "loss": 0.6803, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.34848642349243164, "rewards/margins": 0.1958581954240799, "rewards/rejected": -0.5443445444107056, "step": 2580 }, { "epoch": 0.19, "learning_rate": 4.874861090629561e-06, "logits/chosen": -1.8816547393798828, "logits/rejected": -1.3930195569992065, "logps/chosen": -629.2784423828125, "logps/rejected": -794.1343994140625, "loss": 0.6726, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4584957957267761, "rewards/margins": 0.21472418308258057, "rewards/rejected": -0.6732200384140015, "step": 2590 }, { "epoch": 0.19, "learning_rate": 4.872842129144145e-06, "logits/chosen": -2.0225672721862793, "logits/rejected": -1.3642065525054932, "logps/chosen": -576.2217407226562, "logps/rejected": -723.9624633789062, "loss": 0.6828, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3863699734210968, "rewards/margins": 0.2021186798810959, "rewards/rejected": -0.5884886980056763, "step": 2600 }, { "epoch": 0.19, "learning_rate": 4.870807435892083e-06, "logits/chosen": -1.8965297937393188, "logits/rejected": -1.5271927118301392, "logps/chosen": -496.9197692871094, "logps/rejected": -599.1149291992188, "loss": 0.6803, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3564664125442505, "rewards/margins": 0.11064217984676361, "rewards/rejected": -0.4671086370944977, "step": 2610 }, { "epoch": 0.19, "learning_rate": 4.868757024363242e-06, "logits/chosen": -2.0839314460754395, "logits/rejected": -1.7193305492401123, "logps/chosen": -460.3458557128906, "logps/rejected": -587.4038696289062, "loss": 0.6795, "rewards/accuracies": 0.75, "rewards/chosen": -0.29591497778892517, "rewards/margins": 0.1804284304380417, "rewards/rejected": -0.47634339332580566, "step": 2620 }, { "epoch": 0.19, "learning_rate": 4.866690908151698e-06, "logits/chosen": -2.0424046516418457, "logits/rejected": -1.6916013956069946, "logps/chosen": -474.6332092285156, "logps/rejected": -627.3050537109375, "loss": 0.6805, "rewards/accuracies": 0.75, "rewards/chosen": -0.3274575173854828, "rewards/margins": 0.18328411877155304, "rewards/rejected": -0.5107415914535522, "step": 2630 }, { "epoch": 0.19, "learning_rate": 4.864609100955646e-06, "logits/chosen": -1.9282909631729126, "logits/rejected": -1.3194262981414795, "logps/chosen": -536.0843505859375, "logps/rejected": -710.6387939453125, "loss": 0.6801, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3801216781139374, "rewards/margins": 0.20628586411476135, "rewards/rejected": -0.5864075422286987, "step": 2640 }, { "epoch": 0.2, "learning_rate": 4.8625116165773125e-06, "logits/chosen": -1.898563027381897, "logits/rejected": -1.7702791690826416, "logps/chosen": -534.3336791992188, "logps/rejected": -621.9896240234375, "loss": 0.6814, "rewards/accuracies": 0.625, "rewards/chosen": -0.3835286796092987, "rewards/margins": 0.11995543539524078, "rewards/rejected": -0.5034841299057007, "step": 2650 }, { "epoch": 0.2, "learning_rate": 4.8603984689228645e-06, "logits/chosen": -1.9916276931762695, "logits/rejected": -1.5071265697479248, "logps/chosen": -504.18170166015625, "logps/rejected": -659.8911743164062, "loss": 0.6799, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3125648498535156, "rewards/margins": 0.18450579047203064, "rewards/rejected": -0.49707064032554626, "step": 2660 }, { "epoch": 0.2, "learning_rate": 4.858269672002312e-06, "logits/chosen": -2.1071367263793945, "logits/rejected": -1.7717968225479126, "logps/chosen": -398.2330017089844, "logps/rejected": -549.4989013671875, "loss": 0.6786, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24701924622058868, "rewards/margins": 0.18079087138175964, "rewards/rejected": -0.42781004309654236, "step": 2670 }, { "epoch": 0.2, "learning_rate": 4.856125239929423e-06, "logits/chosen": -1.8983643054962158, "logits/rejected": -1.5166361331939697, "logps/chosen": -481.17138671875, "logps/rejected": -560.0281372070312, "loss": 0.6832, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28725457191467285, "rewards/margins": 0.1478346884250641, "rewards/rejected": -0.43508926033973694, "step": 2680 }, { "epoch": 0.2, "learning_rate": 4.853965186921619e-06, "logits/chosen": -2.0506086349487305, "logits/rejected": -1.3681724071502686, "logps/chosen": -403.7994689941406, "logps/rejected": -597.2457275390625, "loss": 0.6733, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2552160918712616, "rewards/margins": 0.22805039584636688, "rewards/rejected": -0.4832665026187897, "step": 2690 }, { "epoch": 0.2, "learning_rate": 4.851789527299892e-06, "logits/chosen": -1.7372363805770874, "logits/rejected": -1.3611010313034058, "logps/chosen": -405.5017395019531, "logps/rejected": -569.43896484375, "loss": 0.6786, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2813628017902374, "rewards/margins": 0.16514047980308533, "rewards/rejected": -0.44650331139564514, "step": 2700 }, { "epoch": 0.2, "learning_rate": 4.849598275488702e-06, "logits/chosen": -2.1841342449188232, "logits/rejected": -1.6578344106674194, "logps/chosen": -468.8514099121094, "logps/rejected": -590.2703247070312, "loss": 0.6808, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31932029128074646, "rewards/margins": 0.16815465688705444, "rewards/rejected": -0.4874749183654785, "step": 2710 }, { "epoch": 0.2, "learning_rate": 4.847391446015888e-06, "logits/chosen": -1.9254611730575562, "logits/rejected": -1.4339749813079834, "logps/chosen": -471.6788024902344, "logps/rejected": -696.6669921875, "loss": 0.6813, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3501111567020416, "rewards/margins": 0.25220435857772827, "rewards/rejected": -0.6023155450820923, "step": 2720 }, { "epoch": 0.2, "learning_rate": 4.8451690535125634e-06, "logits/chosen": -1.895127296447754, "logits/rejected": -1.338257074356079, "logps/chosen": -603.3584594726562, "logps/rejected": -744.2330322265625, "loss": 0.6776, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4162725806236267, "rewards/margins": 0.21060046553611755, "rewards/rejected": -0.6268731355667114, "step": 2730 }, { "epoch": 0.2, "learning_rate": 4.842931112713026e-06, "logits/chosen": -2.1715359687805176, "logits/rejected": -1.6297610998153687, "logps/chosen": -594.4014892578125, "logps/rejected": -690.8668823242188, "loss": 0.678, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3530123829841614, "rewards/margins": 0.16728246212005615, "rewards/rejected": -0.5202948451042175, "step": 2740 }, { "epoch": 0.2, "learning_rate": 4.840677638454655e-06, "logits/chosen": -2.2008841037750244, "logits/rejected": -1.6693460941314697, "logps/chosen": -514.0177001953125, "logps/rejected": -624.3578491210938, "loss": 0.6813, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3487882614135742, "rewards/margins": 0.13509999215602875, "rewards/rejected": -0.4838882386684418, "step": 2750 }, { "epoch": 0.2, "learning_rate": 4.838408645677818e-06, "logits/chosen": -2.0401546955108643, "logits/rejected": -1.674791693687439, "logps/chosen": -457.62420654296875, "logps/rejected": -654.6641845703125, "loss": 0.6782, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29688000679016113, "rewards/margins": 0.1917741596698761, "rewards/rejected": -0.4886541962623596, "step": 2760 }, { "epoch": 0.2, "learning_rate": 4.836124149425768e-06, "logits/chosen": -2.064950466156006, "logits/rejected": -1.505575180053711, "logps/chosen": -452.13616943359375, "logps/rejected": -624.4116821289062, "loss": 0.6815, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30180710554122925, "rewards/margins": 0.20856180787086487, "rewards/rejected": -0.5103689432144165, "step": 2770 }, { "epoch": 0.21, "learning_rate": 4.833824164844546e-06, "logits/chosen": -2.055421829223633, "logits/rejected": -1.657907485961914, "logps/chosen": -512.58544921875, "logps/rejected": -638.5838012695312, "loss": 0.6806, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.32626938819885254, "rewards/margins": 0.15966089069843292, "rewards/rejected": -0.48593029379844666, "step": 2780 }, { "epoch": 0.21, "learning_rate": 4.8315087071828775e-06, "logits/chosen": -2.097433090209961, "logits/rejected": -1.4763990640640259, "logps/chosen": -462.60760498046875, "logps/rejected": -649.4661254882812, "loss": 0.6729, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32824015617370605, "rewards/margins": 0.23077067732810974, "rewards/rejected": -0.5590108633041382, "step": 2790 }, { "epoch": 0.21, "learning_rate": 4.829177791792075e-06, "logits/chosen": -1.9530103206634521, "logits/rejected": -1.3779066801071167, "logps/chosen": -464.0525817871094, "logps/rejected": -651.7576904296875, "loss": 0.6706, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.28285345435142517, "rewards/margins": 0.2412024736404419, "rewards/rejected": -0.5240559577941895, "step": 2800 }, { "epoch": 0.21, "learning_rate": 4.826831434125932e-06, "logits/chosen": -1.8675224781036377, "logits/rejected": -1.490267038345337, "logps/chosen": -519.0450439453125, "logps/rejected": -663.8321533203125, "loss": 0.6798, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34535500407218933, "rewards/margins": 0.1770472377538681, "rewards/rejected": -0.5224022269248962, "step": 2810 }, { "epoch": 0.21, "learning_rate": 4.824469649740628e-06, "logits/chosen": -1.8160769939422607, "logits/rejected": -1.331425428390503, "logps/chosen": -496.47442626953125, "logps/rejected": -692.4678955078125, "loss": 0.6749, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3228204846382141, "rewards/margins": 0.1910255402326584, "rewards/rejected": -0.5138460397720337, "step": 2820 }, { "epoch": 0.21, "learning_rate": 4.822092454294617e-06, "logits/chosen": -2.0351576805114746, "logits/rejected": -1.4493870735168457, "logps/chosen": -634.1026611328125, "logps/rejected": -733.4759521484375, "loss": 0.6821, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.449013888835907, "rewards/margins": 0.14797066152095795, "rewards/rejected": -0.5969845056533813, "step": 2830 }, { "epoch": 0.21, "learning_rate": 4.819699863548526e-06, "logits/chosen": -1.9995062351226807, "logits/rejected": -1.7789589166641235, "logps/chosen": -489.53143310546875, "logps/rejected": -637.5982666015625, "loss": 0.6822, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3231997489929199, "rewards/margins": 0.13145264983177185, "rewards/rejected": -0.4546523988246918, "step": 2840 }, { "epoch": 0.21, "learning_rate": 4.817291893365055e-06, "logits/chosen": -1.9488105773925781, "logits/rejected": -1.5790774822235107, "logps/chosen": -538.74072265625, "logps/rejected": -647.57080078125, "loss": 0.6816, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3774206042289734, "rewards/margins": 0.15588635206222534, "rewards/rejected": -0.5333069562911987, "step": 2850 }, { "epoch": 0.21, "learning_rate": 4.814868559708866e-06, "logits/chosen": -2.1347949504852295, "logits/rejected": -1.6634502410888672, "logps/chosen": -391.906982421875, "logps/rejected": -666.5550537109375, "loss": 0.6736, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2492218017578125, "rewards/margins": 0.1816580593585968, "rewards/rejected": -0.4308798313140869, "step": 2860 }, { "epoch": 0.21, "learning_rate": 4.812429878646483e-06, "logits/chosen": -2.059627056121826, "logits/rejected": -1.6293795108795166, "logps/chosen": -482.15863037109375, "logps/rejected": -618.1506958007812, "loss": 0.6819, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30036407709121704, "rewards/margins": 0.16744975745677948, "rewards/rejected": -0.4678138792514801, "step": 2870 }, { "epoch": 0.21, "learning_rate": 4.809975866346177e-06, "logits/chosen": -1.7948729991912842, "logits/rejected": -1.2370790243148804, "logps/chosen": -451.3946838378906, "logps/rejected": -619.2628173828125, "loss": 0.6804, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3274025321006775, "rewards/margins": 0.18716736137866974, "rewards/rejected": -0.514569878578186, "step": 2880 }, { "epoch": 0.21, "learning_rate": 4.807506539077869e-06, "logits/chosen": -2.160552501678467, "logits/rejected": -1.744652509689331, "logps/chosen": -460.280517578125, "logps/rejected": -546.5147705078125, "loss": 0.6874, "rewards/accuracies": 0.625, "rewards/chosen": -0.3053615987300873, "rewards/margins": 0.10155130922794342, "rewards/rejected": -0.4069128930568695, "step": 2890 }, { "epoch": 0.21, "learning_rate": 4.805021913213013e-06, "logits/chosen": -2.072697401046753, "logits/rejected": -1.603668212890625, "logps/chosen": -383.42803955078125, "logps/rejected": -514.6353759765625, "loss": 0.6808, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22464211285114288, "rewards/margins": 0.14382584393024445, "rewards/rejected": -0.3684679865837097, "step": 2900 }, { "epoch": 0.21, "learning_rate": 4.8025220052244945e-06, "logits/chosen": -2.1265883445739746, "logits/rejected": -1.9659149646759033, "logps/chosen": -539.4525756835938, "logps/rejected": -650.6953735351562, "loss": 0.6836, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.34373611211776733, "rewards/margins": 0.12058810889720917, "rewards/rejected": -0.46432414650917053, "step": 2910 }, { "epoch": 0.22, "learning_rate": 4.800006831686518e-06, "logits/chosen": -2.0773959159851074, "logits/rejected": -1.5966839790344238, "logps/chosen": -479.4004821777344, "logps/rejected": -642.3235473632812, "loss": 0.6751, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29926925897598267, "rewards/margins": 0.2080642431974411, "rewards/rejected": -0.5073334574699402, "step": 2920 }, { "epoch": 0.22, "learning_rate": 4.797476409274496e-06, "logits/chosen": -2.1119143962860107, "logits/rejected": -1.7216190099716187, "logps/chosen": -508.1703186035156, "logps/rejected": -633.275146484375, "loss": 0.6796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3717537522315979, "rewards/margins": 0.15455105900764465, "rewards/rejected": -0.5263047814369202, "step": 2930 }, { "epoch": 0.22, "learning_rate": 4.794930754764942e-06, "logits/chosen": -2.1477837562561035, "logits/rejected": -1.6326879262924194, "logps/chosen": -557.4747924804688, "logps/rejected": -674.22314453125, "loss": 0.6781, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3927028775215149, "rewards/margins": 0.16167061030864716, "rewards/rejected": -0.5543734431266785, "step": 2940 }, { "epoch": 0.22, "learning_rate": 4.792369885035353e-06, "logits/chosen": -2.1052510738372803, "logits/rejected": -1.489867925643921, "logps/chosen": -509.635986328125, "logps/rejected": -642.829833984375, "loss": 0.6763, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3661060333251953, "rewards/margins": 0.1702888309955597, "rewards/rejected": -0.5363948941230774, "step": 2950 }, { "epoch": 0.22, "learning_rate": 4.789793817064107e-06, "logits/chosen": -2.2182796001434326, "logits/rejected": -1.7777637243270874, "logps/chosen": -459.7296447753906, "logps/rejected": -634.8563232421875, "loss": 0.6799, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.30696818232536316, "rewards/margins": 0.190567746758461, "rewards/rejected": -0.4975360035896301, "step": 2960 }, { "epoch": 0.22, "learning_rate": 4.787202567930342e-06, "logits/chosen": -2.1613125801086426, "logits/rejected": -1.5353490114212036, "logps/chosen": -388.54339599609375, "logps/rejected": -538.5556030273438, "loss": 0.679, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22435393929481506, "rewards/margins": 0.2205592840909958, "rewards/rejected": -0.44491320848464966, "step": 2970 }, { "epoch": 0.22, "learning_rate": 4.784596154813847e-06, "logits/chosen": -2.1657166481018066, "logits/rejected": -1.6417877674102783, "logps/chosen": -499.8614196777344, "logps/rejected": -570.353515625, "loss": 0.6862, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3289027810096741, "rewards/margins": 0.09487798810005188, "rewards/rejected": -0.42378076910972595, "step": 2980 }, { "epoch": 0.22, "learning_rate": 4.781974594994947e-06, "logits/chosen": -1.937658667564392, "logits/rejected": -1.5452995300292969, "logps/chosen": -511.8965759277344, "logps/rejected": -642.66650390625, "loss": 0.6799, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38078927993774414, "rewards/margins": 0.15459299087524414, "rewards/rejected": -0.5353822708129883, "step": 2990 }, { "epoch": 0.22, "learning_rate": 4.7793379058543885e-06, "logits/chosen": -2.1129038333892822, "logits/rejected": -1.7680890560150146, "logps/chosen": -533.2216186523438, "logps/rejected": -607.221923828125, "loss": 0.6866, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3710973560810089, "rewards/margins": 0.10324674844741821, "rewards/rejected": -0.47434407472610474, "step": 3000 }, { "epoch": 0.22, "learning_rate": 4.776686104873224e-06, "logits/chosen": -2.1473326683044434, "logits/rejected": -1.8521807193756104, "logps/chosen": -533.1450805664062, "logps/rejected": -615.6654052734375, "loss": 0.6841, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3725462555885315, "rewards/margins": 0.11305709928274155, "rewards/rejected": -0.485603392124176, "step": 3010 }, { "epoch": 0.22, "learning_rate": 4.774019209632696e-06, "logits/chosen": -2.313849925994873, "logits/rejected": -1.5469987392425537, "logps/chosen": -436.65667724609375, "logps/rejected": -590.8704833984375, "loss": 0.6793, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2851754128932953, "rewards/margins": 0.190791517496109, "rewards/rejected": -0.4759669303894043, "step": 3020 }, { "epoch": 0.22, "learning_rate": 4.771337237814123e-06, "logits/chosen": -2.063269853591919, "logits/rejected": -1.4987871646881104, "logps/chosen": -417.1874084472656, "logps/rejected": -604.644775390625, "loss": 0.6778, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27084892988204956, "rewards/margins": 0.2177794873714447, "rewards/rejected": -0.48862844705581665, "step": 3030 }, { "epoch": 0.22, "learning_rate": 4.768640207198776e-06, "logits/chosen": -1.9888442754745483, "logits/rejected": -1.4431921243667603, "logps/chosen": -475.28900146484375, "logps/rejected": -662.6255493164062, "loss": 0.6784, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34199684858322144, "rewards/margins": 0.22598235309123993, "rewards/rejected": -0.5679792165756226, "step": 3040 }, { "epoch": 0.22, "learning_rate": 4.76592813566777e-06, "logits/chosen": -1.979185700416565, "logits/rejected": -2.09032940864563, "logps/chosen": -406.5249328613281, "logps/rejected": -539.317626953125, "loss": 0.682, "rewards/accuracies": 0.625, "rewards/chosen": -0.26280340552330017, "rewards/margins": 0.11721128225326538, "rewards/rejected": -0.38001465797424316, "step": 3050 }, { "epoch": 0.23, "learning_rate": 4.763201041201935e-06, "logits/chosen": -2.3927273750305176, "logits/rejected": -1.8055158853530884, "logps/chosen": -459.36773681640625, "logps/rejected": -561.9493408203125, "loss": 0.6777, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2648261785507202, "rewards/margins": 0.17458853125572205, "rewards/rejected": -0.43941473960876465, "step": 3060 }, { "epoch": 0.23, "learning_rate": 4.760458941881706e-06, "logits/chosen": -2.143461227416992, "logits/rejected": -1.7503830194473267, "logps/chosen": -478.701171875, "logps/rejected": -636.1018676757812, "loss": 0.6804, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3179198205471039, "rewards/margins": 0.16323554515838623, "rewards/rejected": -0.4811553955078125, "step": 3070 }, { "epoch": 0.23, "learning_rate": 4.757701855886996e-06, "logits/chosen": -2.1002535820007324, "logits/rejected": -1.599218726158142, "logps/chosen": -421.5325622558594, "logps/rejected": -579.0523681640625, "loss": 0.679, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2491656094789505, "rewards/margins": 0.18271759152412415, "rewards/rejected": -0.43188318610191345, "step": 3080 }, { "epoch": 0.23, "learning_rate": 4.7549298014970804e-06, "logits/chosen": -2.2895026206970215, "logits/rejected": -1.6674244403839111, "logps/chosen": -485.49774169921875, "logps/rejected": -632.1036376953125, "loss": 0.6799, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33800673484802246, "rewards/margins": 0.1954016387462616, "rewards/rejected": -0.5334084033966064, "step": 3090 }, { "epoch": 0.23, "learning_rate": 4.752142797090472e-06, "logits/chosen": -2.06559681892395, "logits/rejected": -1.7571942806243896, "logps/chosen": -419.2191467285156, "logps/rejected": -515.0830688476562, "loss": 0.6839, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2297629415988922, "rewards/margins": 0.14261119067668915, "rewards/rejected": -0.37237411737442017, "step": 3100 }, { "epoch": 0.23, "learning_rate": 4.749340861144806e-06, "logits/chosen": -1.9626312255859375, "logits/rejected": -1.6184285879135132, "logps/chosen": -409.90789794921875, "logps/rejected": -649.4979858398438, "loss": 0.673, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24048984050750732, "rewards/margins": 0.22875627875328064, "rewards/rejected": -0.46924614906311035, "step": 3110 }, { "epoch": 0.23, "learning_rate": 4.746524012236706e-06, "logits/chosen": -2.033668041229248, "logits/rejected": -1.3973853588104248, "logps/chosen": -449.2447814941406, "logps/rejected": -605.2628784179688, "loss": 0.6758, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2808469235897064, "rewards/margins": 0.19546134769916534, "rewards/rejected": -0.47630825638771057, "step": 3120 }, { "epoch": 0.23, "learning_rate": 4.743692269041671e-06, "logits/chosen": -2.060668468475342, "logits/rejected": -1.4617725610733032, "logps/chosen": -506.12469482421875, "logps/rejected": -612.0767211914062, "loss": 0.6778, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3265566825866699, "rewards/margins": 0.17371772229671478, "rewards/rejected": -0.5002744197845459, "step": 3130 }, { "epoch": 0.23, "learning_rate": 4.740845650333949e-06, "logits/chosen": -2.2750937938690186, "logits/rejected": -1.9038136005401611, "logps/chosen": -371.82830810546875, "logps/rejected": -511.453369140625, "loss": 0.6774, "rewards/accuracies": 0.625, "rewards/chosen": -0.20027348399162292, "rewards/margins": 0.1926852911710739, "rewards/rejected": -0.3929588198661804, "step": 3140 }, { "epoch": 0.23, "learning_rate": 4.737984174986409e-06, "logits/chosen": -2.1563897132873535, "logits/rejected": -1.844500184059143, "logps/chosen": -442.1204528808594, "logps/rejected": -533.384765625, "loss": 0.6826, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.30399438738822937, "rewards/margins": 0.12155342102050781, "rewards/rejected": -0.4255477786064148, "step": 3150 }, { "epoch": 0.23, "learning_rate": 4.73510786197042e-06, "logits/chosen": -2.1086549758911133, "logits/rejected": -1.6035468578338623, "logps/chosen": -469.06396484375, "logps/rejected": -575.51953125, "loss": 0.6831, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2740177512168884, "rewards/margins": 0.15295372903347015, "rewards/rejected": -0.426971435546875, "step": 3160 }, { "epoch": 0.23, "learning_rate": 4.732216730355721e-06, "logits/chosen": -2.0455493927001953, "logits/rejected": -1.6433073282241821, "logps/chosen": -474.9363708496094, "logps/rejected": -574.0584716796875, "loss": 0.6822, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3139690160751343, "rewards/margins": 0.13201819360256195, "rewards/rejected": -0.4459872245788574, "step": 3170 }, { "epoch": 0.23, "learning_rate": 4.7293107993103026e-06, "logits/chosen": -2.1597962379455566, "logits/rejected": -1.6508777141571045, "logps/chosen": -399.7731018066406, "logps/rejected": -495.54742431640625, "loss": 0.6815, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24799981713294983, "rewards/margins": 0.1359129697084427, "rewards/rejected": -0.3839127719402313, "step": 3180 }, { "epoch": 0.24, "learning_rate": 4.72639008810027e-06, "logits/chosen": -2.206812620162964, "logits/rejected": -1.818541169166565, "logps/chosen": -431.3895568847656, "logps/rejected": -542.7749633789062, "loss": 0.6809, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.228606179356575, "rewards/margins": 0.12083079665899277, "rewards/rejected": -0.3494369685649872, "step": 3190 }, { "epoch": 0.24, "learning_rate": 4.72345461608972e-06, "logits/chosen": -2.135293483734131, "logits/rejected": -1.8228296041488647, "logps/chosen": -492.6844787597656, "logps/rejected": -638.8058471679688, "loss": 0.6821, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2774745523929596, "rewards/margins": 0.17826326191425323, "rewards/rejected": -0.45573776960372925, "step": 3200 }, { "epoch": 0.24, "learning_rate": 4.720504402740615e-06, "logits/chosen": -2.1343488693237305, "logits/rejected": -1.8152217864990234, "logps/chosen": -468.915283203125, "logps/rejected": -613.5650634765625, "loss": 0.6807, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.29080140590667725, "rewards/margins": 0.1701863706111908, "rewards/rejected": -0.46098774671554565, "step": 3210 }, { "epoch": 0.24, "learning_rate": 4.717539467612648e-06, "logits/chosen": -2.088972568511963, "logits/rejected": -1.7515579462051392, "logps/chosen": -507.94818115234375, "logps/rejected": -706.5578002929688, "loss": 0.6794, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.31380099058151245, "rewards/margins": 0.22428607940673828, "rewards/rejected": -0.5380870699882507, "step": 3220 }, { "epoch": 0.24, "learning_rate": 4.71455983036312e-06, "logits/chosen": -1.927253007888794, "logits/rejected": -1.201525092124939, "logps/chosen": -558.7928466796875, "logps/rejected": -766.3048095703125, "loss": 0.6805, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.38876432180404663, "rewards/margins": 0.2425849884748459, "rewards/rejected": -0.6313492655754089, "step": 3230 }, { "epoch": 0.24, "learning_rate": 4.711565510746804e-06, "logits/chosen": -2.1861143112182617, "logits/rejected": -1.5567336082458496, "logps/chosen": -538.634765625, "logps/rejected": -642.6546630859375, "loss": 0.6794, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34371134638786316, "rewards/margins": 0.18017305433750153, "rewards/rejected": -0.5238844156265259, "step": 3240 }, { "epoch": 0.24, "learning_rate": 4.708556528615815e-06, "logits/chosen": -2.0123813152313232, "logits/rejected": -1.4701013565063477, "logps/chosen": -482.340576171875, "logps/rejected": -643.2246704101562, "loss": 0.6818, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3397231698036194, "rewards/margins": 0.1917569637298584, "rewards/rejected": -0.5314801931381226, "step": 3250 }, { "epoch": 0.24, "learning_rate": 4.705532903919481e-06, "logits/chosen": -1.886785864830017, "logits/rejected": -1.471383810043335, "logps/chosen": -428.0773010253906, "logps/rejected": -620.510498046875, "loss": 0.6805, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.25911474227905273, "rewards/margins": 0.18472735583782196, "rewards/rejected": -0.4438420832157135, "step": 3260 }, { "epoch": 0.24, "learning_rate": 4.702494656704208e-06, "logits/chosen": -2.064593553543091, "logits/rejected": -1.5056880712509155, "logps/chosen": -563.7981567382812, "logps/rejected": -695.2303466796875, "loss": 0.6781, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35683074593544006, "rewards/margins": 0.17803850769996643, "rewards/rejected": -0.5348693132400513, "step": 3270 }, { "epoch": 0.24, "learning_rate": 4.699441807113351e-06, "logits/chosen": -1.9533469676971436, "logits/rejected": -1.4917285442352295, "logps/chosen": -505.28240966796875, "logps/rejected": -679.7451782226562, "loss": 0.6769, "rewards/accuracies": 0.75, "rewards/chosen": -0.33737510442733765, "rewards/margins": 0.2135481834411621, "rewards/rejected": -0.550923228263855, "step": 3280 }, { "epoch": 0.24, "learning_rate": 4.696374375387072e-06, "logits/chosen": -2.1428096294403076, "logits/rejected": -1.6918548345565796, "logps/chosen": -395.3492736816406, "logps/rejected": -521.2198486328125, "loss": 0.677, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2557286024093628, "rewards/margins": 0.16260722279548645, "rewards/rejected": -0.41833582520484924, "step": 3290 }, { "epoch": 0.24, "learning_rate": 4.693292381862218e-06, "logits/chosen": -2.1802663803100586, "logits/rejected": -1.83206307888031, "logps/chosen": -472.1202087402344, "logps/rejected": -531.5265502929688, "loss": 0.6837, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2895938754081726, "rewards/margins": 0.09758220613002777, "rewards/rejected": -0.3871760666370392, "step": 3300 }, { "epoch": 0.24, "learning_rate": 4.690195846972176e-06, "logits/chosen": -1.9511611461639404, "logits/rejected": -1.6374499797821045, "logps/chosen": -547.6549682617188, "logps/rejected": -683.7071533203125, "loss": 0.6786, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34670090675354004, "rewards/margins": 0.1694822460412979, "rewards/rejected": -0.5161831974983215, "step": 3310 }, { "epoch": 0.24, "learning_rate": 4.687084791246741e-06, "logits/chosen": -2.023902416229248, "logits/rejected": -1.5016896724700928, "logps/chosen": -516.6145629882812, "logps/rejected": -672.1461181640625, "loss": 0.681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3610354959964752, "rewards/margins": 0.19902966916561127, "rewards/rejected": -0.5600651502609253, "step": 3320 }, { "epoch": 0.25, "learning_rate": 4.68395923531198e-06, "logits/chosen": -1.9997457265853882, "logits/rejected": -1.4996792078018188, "logps/chosen": -533.1341552734375, "logps/rejected": -666.4573364257812, "loss": 0.6835, "rewards/accuracies": 0.75, "rewards/chosen": -0.35035791993141174, "rewards/margins": 0.18962709605693817, "rewards/rejected": -0.539984941482544, "step": 3330 }, { "epoch": 0.25, "learning_rate": 4.680819199890098e-06, "logits/chosen": -2.163088321685791, "logits/rejected": -1.9058421850204468, "logps/chosen": -412.0467224121094, "logps/rejected": -493.94683837890625, "loss": 0.682, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25381892919540405, "rewards/margins": 0.09846028685569763, "rewards/rejected": -0.3522792458534241, "step": 3340 }, { "epoch": 0.25, "learning_rate": 4.677664705799295e-06, "logits/chosen": -2.070801258087158, "logits/rejected": -1.455816388130188, "logps/chosen": -367.8810119628906, "logps/rejected": -485.77099609375, "loss": 0.6769, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1852436363697052, "rewards/margins": 0.17681248486042023, "rewards/rejected": -0.36205610632896423, "step": 3350 }, { "epoch": 0.25, "learning_rate": 4.674495773953633e-06, "logits/chosen": -2.0949184894561768, "logits/rejected": -1.747802734375, "logps/chosen": -432.77001953125, "logps/rejected": -609.9864501953125, "loss": 0.6771, "rewards/accuracies": 0.75, "rewards/chosen": -0.26822593808174133, "rewards/margins": 0.20896467566490173, "rewards/rejected": -0.4771905839443207, "step": 3360 }, { "epoch": 0.25, "learning_rate": 4.671312425362893e-06, "logits/chosen": -2.0722262859344482, "logits/rejected": -1.582524061203003, "logps/chosen": -330.3735046386719, "logps/rejected": -472.229736328125, "loss": 0.6827, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16254395246505737, "rewards/margins": 0.1817348599433899, "rewards/rejected": -0.34427881240844727, "step": 3370 }, { "epoch": 0.25, "learning_rate": 4.668114681132438e-06, "logits/chosen": -2.003589153289795, "logits/rejected": -1.5650556087493896, "logps/chosen": -443.00628662109375, "logps/rejected": -597.4044799804688, "loss": 0.6822, "rewards/accuracies": 0.75, "rewards/chosen": -0.2869635224342346, "rewards/margins": 0.18669132888317108, "rewards/rejected": -0.4736548364162445, "step": 3380 }, { "epoch": 0.25, "learning_rate": 4.664902562463078e-06, "logits/chosen": -2.109870195388794, "logits/rejected": -1.5292056798934937, "logps/chosen": -345.04815673828125, "logps/rejected": -445.378173828125, "loss": 0.684, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18587860465049744, "rewards/margins": 0.12748491764068604, "rewards/rejected": -0.31336352229118347, "step": 3390 }, { "epoch": 0.25, "learning_rate": 4.661676090650917e-06, "logits/chosen": -1.8539365530014038, "logits/rejected": -1.5071132183074951, "logps/chosen": -333.5733947753906, "logps/rejected": -506.46075439453125, "loss": 0.6816, "rewards/accuracies": 0.75, "rewards/chosen": -0.17049363255500793, "rewards/margins": 0.1716930866241455, "rewards/rejected": -0.34218674898147583, "step": 3400 }, { "epoch": 0.25, "learning_rate": 4.658435287087225e-06, "logits/chosen": -2.1377456188201904, "logits/rejected": -1.7369861602783203, "logps/chosen": -396.3992614746094, "logps/rejected": -493.83782958984375, "loss": 0.6814, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25388047099113464, "rewards/margins": 0.1307210475206375, "rewards/rejected": -0.38460153341293335, "step": 3410 }, { "epoch": 0.25, "learning_rate": 4.6551801732582865e-06, "logits/chosen": -1.9498131275177002, "logits/rejected": -1.2881214618682861, "logps/chosen": -457.5160217285156, "logps/rejected": -635.4774169921875, "loss": 0.6737, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3017142713069916, "rewards/margins": 0.23417258262634277, "rewards/rejected": -0.5358868837356567, "step": 3420 }, { "epoch": 0.25, "learning_rate": 4.651910770745266e-06, "logits/chosen": -1.7100757360458374, "logits/rejected": -1.3130078315734863, "logps/chosen": -588.6864013671875, "logps/rejected": -744.5823364257812, "loss": 0.6782, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.44982266426086426, "rewards/margins": 0.15783119201660156, "rewards/rejected": -0.607653796672821, "step": 3430 }, { "epoch": 0.25, "learning_rate": 4.648627101224057e-06, "logits/chosen": -2.0501115322113037, "logits/rejected": -1.6502844095230103, "logps/chosen": -552.4427490234375, "logps/rejected": -739.8452758789062, "loss": 0.6781, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38217681646347046, "rewards/margins": 0.20264342427253723, "rewards/rejected": -0.5848202109336853, "step": 3440 }, { "epoch": 0.25, "learning_rate": 4.645329186465144e-06, "logits/chosen": -1.9422318935394287, "logits/rejected": -1.5003360509872437, "logps/chosen": -502.4029235839844, "logps/rejected": -641.333251953125, "loss": 0.6784, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3157939314842224, "rewards/margins": 0.16932789981365204, "rewards/rejected": -0.48512181639671326, "step": 3450 }, { "epoch": 0.26, "learning_rate": 4.642017048333457e-06, "logits/chosen": -2.2475054264068604, "logits/rejected": -1.5270471572875977, "logps/chosen": -496.64422607421875, "logps/rejected": -635.1741943359375, "loss": 0.6756, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29622453451156616, "rewards/margins": 0.21357819437980652, "rewards/rejected": -0.5098026990890503, "step": 3460 }, { "epoch": 0.26, "learning_rate": 4.638690708788226e-06, "logits/chosen": -2.181607484817505, "logits/rejected": -1.645473837852478, "logps/chosen": -589.1218872070312, "logps/rejected": -708.7056884765625, "loss": 0.6817, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.41418275237083435, "rewards/margins": 0.16225366294384003, "rewards/rejected": -0.5764364004135132, "step": 3470 }, { "epoch": 0.26, "learning_rate": 4.635350189882833e-06, "logits/chosen": -1.9373209476470947, "logits/rejected": -1.5137991905212402, "logps/chosen": -666.9593505859375, "logps/rejected": -795.01513671875, "loss": 0.6808, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.49166354537010193, "rewards/margins": 0.1509801596403122, "rewards/rejected": -0.6426436901092529, "step": 3480 }, { "epoch": 0.26, "learning_rate": 4.63199551376467e-06, "logits/chosen": -2.043956995010376, "logits/rejected": -1.4333372116088867, "logps/chosen": -635.4266967773438, "logps/rejected": -802.7305908203125, "loss": 0.6742, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4766560196876526, "rewards/margins": 0.20333382487297058, "rewards/rejected": -0.6799898743629456, "step": 3490 }, { "epoch": 0.26, "learning_rate": 4.62862670267499e-06, "logits/chosen": -2.083129405975342, "logits/rejected": -1.526521921157837, "logps/chosen": -515.5819702148438, "logps/rejected": -696.8502197265625, "loss": 0.6786, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36724013090133667, "rewards/margins": 0.21278545260429382, "rewards/rejected": -0.5800256729125977, "step": 3500 }, { "epoch": 0.26, "learning_rate": 4.62524377894876e-06, "logits/chosen": -2.2358899116516113, "logits/rejected": -1.4217404127120972, "logps/chosen": -520.0675048828125, "logps/rejected": -670.1754150390625, "loss": 0.6766, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34360355138778687, "rewards/margins": 0.19638684391975403, "rewards/rejected": -0.5399904251098633, "step": 3510 }, { "epoch": 0.26, "learning_rate": 4.621846765014513e-06, "logits/chosen": -1.7493391036987305, "logits/rejected": -1.544257640838623, "logps/chosen": -510.4440002441406, "logps/rejected": -682.933837890625, "loss": 0.676, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3624548316001892, "rewards/margins": 0.19307884573936462, "rewards/rejected": -0.5555336475372314, "step": 3520 }, { "epoch": 0.26, "learning_rate": 4.618435683394196e-06, "logits/chosen": -2.0856409072875977, "logits/rejected": -1.6226524114608765, "logps/chosen": -403.51934814453125, "logps/rejected": -585.3278198242188, "loss": 0.6733, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28156137466430664, "rewards/margins": 0.21930691599845886, "rewards/rejected": -0.5008682012557983, "step": 3530 }, { "epoch": 0.26, "learning_rate": 4.615010556703029e-06, "logits/chosen": -1.9964889287948608, "logits/rejected": -1.2981853485107422, "logps/chosen": -420.223388671875, "logps/rejected": -599.1774291992188, "loss": 0.6767, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23366045951843262, "rewards/margins": 0.2244924008846283, "rewards/rejected": -0.4581528604030609, "step": 3540 }, { "epoch": 0.26, "learning_rate": 4.611571407649345e-06, "logits/chosen": -2.17317533493042, "logits/rejected": -1.8590233325958252, "logps/chosen": -384.25665283203125, "logps/rejected": -531.1565551757812, "loss": 0.6825, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20370960235595703, "rewards/margins": 0.15051259100437164, "rewards/rejected": -0.35422220826148987, "step": 3550 }, { "epoch": 0.26, "learning_rate": 4.608118259034447e-06, "logits/chosen": -2.020747184753418, "logits/rejected": -1.5471725463867188, "logps/chosen": -424.8536682128906, "logps/rejected": -595.6142578125, "loss": 0.6726, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2253534346818924, "rewards/margins": 0.21536946296691895, "rewards/rejected": -0.44072285294532776, "step": 3560 }, { "epoch": 0.26, "learning_rate": 4.604651133752454e-06, "logits/chosen": -1.7613757848739624, "logits/rejected": -1.4532428979873657, "logps/chosen": -516.093994140625, "logps/rejected": -623.9066162109375, "loss": 0.6802, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3755740821361542, "rewards/margins": 0.13360019028186798, "rewards/rejected": -0.5091742873191833, "step": 3570 }, { "epoch": 0.26, "learning_rate": 4.601170054790147e-06, "logits/chosen": -2.1541924476623535, "logits/rejected": -1.6179163455963135, "logps/chosen": -483.76361083984375, "logps/rejected": -684.8717041015625, "loss": 0.6748, "rewards/accuracies": 0.75, "rewards/chosen": -0.32307302951812744, "rewards/margins": 0.24360735714435577, "rewards/rejected": -0.5666804313659668, "step": 3580 }, { "epoch": 0.26, "learning_rate": 4.597675045226822e-06, "logits/chosen": -2.0487887859344482, "logits/rejected": -1.7267860174179077, "logps/chosen": -496.6344299316406, "logps/rejected": -574.860107421875, "loss": 0.6817, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35725730657577515, "rewards/margins": 0.11896417289972305, "rewards/rejected": -0.47622150182724, "step": 3590 }, { "epoch": 0.27, "learning_rate": 4.594166128234129e-06, "logits/chosen": -2.1126949787139893, "logits/rejected": -1.6451336145401, "logps/chosen": -539.4654541015625, "logps/rejected": -698.4227905273438, "loss": 0.6799, "rewards/accuracies": 0.75, "rewards/chosen": -0.3757946789264679, "rewards/margins": 0.2124338150024414, "rewards/rejected": -0.5882285833358765, "step": 3600 }, { "epoch": 0.27, "learning_rate": 4.59064332707593e-06, "logits/chosen": -2.1157875061035156, "logits/rejected": -1.6640087366104126, "logps/chosen": -608.1643676757812, "logps/rejected": -768.2630004882812, "loss": 0.6751, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.43120208382606506, "rewards/margins": 0.20883961021900177, "rewards/rejected": -0.6400415897369385, "step": 3610 }, { "epoch": 0.27, "learning_rate": 4.587106665108132e-06, "logits/chosen": -2.037764072418213, "logits/rejected": -1.6153948307037354, "logps/chosen": -632.917236328125, "logps/rejected": -766.1554565429688, "loss": 0.6792, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.42730712890625, "rewards/margins": 0.18782958388328552, "rewards/rejected": -0.6151367425918579, "step": 3620 }, { "epoch": 0.27, "learning_rate": 4.5835561657785425e-06, "logits/chosen": -2.0975871086120605, "logits/rejected": -1.5173523426055908, "logps/chosen": -591.9334716796875, "logps/rejected": -800.4415283203125, "loss": 0.6733, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4080953598022461, "rewards/margins": 0.26435571908950806, "rewards/rejected": -0.6724510788917542, "step": 3630 }, { "epoch": 0.27, "learning_rate": 4.579991852626706e-06, "logits/chosen": -2.005526542663574, "logits/rejected": -1.4812642335891724, "logps/chosen": -612.5805053710938, "logps/rejected": -811.107421875, "loss": 0.6763, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4456785321235657, "rewards/margins": 0.21838092803955078, "rewards/rejected": -0.6640594601631165, "step": 3640 }, { "epoch": 0.27, "learning_rate": 4.576413749283753e-06, "logits/chosen": -2.2068512439727783, "logits/rejected": -1.8210744857788086, "logps/chosen": -584.8489990234375, "logps/rejected": -684.6730346679688, "loss": 0.6882, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3803740441799164, "rewards/margins": 0.13406264781951904, "rewards/rejected": -0.5144367218017578, "step": 3650 }, { "epoch": 0.27, "learning_rate": 4.572821879472245e-06, "logits/chosen": -2.061161518096924, "logits/rejected": -1.6803900003433228, "logps/chosen": -459.4137268066406, "logps/rejected": -599.5760498046875, "loss": 0.681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32265740633010864, "rewards/margins": 0.14901676774024963, "rewards/rejected": -0.4716741442680359, "step": 3660 }, { "epoch": 0.27, "learning_rate": 4.569216267006009e-06, "logits/chosen": -2.1531131267547607, "logits/rejected": -1.429368019104004, "logps/chosen": -444.91827392578125, "logps/rejected": -607.9827880859375, "loss": 0.6704, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2823893129825592, "rewards/margins": 0.2072608768939972, "rewards/rejected": -0.489650160074234, "step": 3670 }, { "epoch": 0.27, "learning_rate": 4.565596935789987e-06, "logits/chosen": -2.1514763832092285, "logits/rejected": -1.6253398656845093, "logps/chosen": -431.38427734375, "logps/rejected": -592.7208862304688, "loss": 0.6784, "rewards/accuracies": 0.625, "rewards/chosen": -0.30296745896339417, "rewards/margins": 0.1670651137828827, "rewards/rejected": -0.47003254294395447, "step": 3680 }, { "epoch": 0.27, "learning_rate": 4.561963909820078e-06, "logits/chosen": -2.1155827045440674, "logits/rejected": -1.485912799835205, "logps/chosen": -406.2842102050781, "logps/rejected": -529.9671630859375, "loss": 0.6851, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24619793891906738, "rewards/margins": 0.17929580807685852, "rewards/rejected": -0.4254938066005707, "step": 3690 }, { "epoch": 0.27, "learning_rate": 4.55831721318297e-06, "logits/chosen": -1.9890403747558594, "logits/rejected": -1.5642322301864624, "logps/chosen": -428.8604431152344, "logps/rejected": -645.4283447265625, "loss": 0.6778, "rewards/accuracies": 0.75, "rewards/chosen": -0.26548051834106445, "rewards/margins": 0.22513696551322937, "rewards/rejected": -0.49061745405197144, "step": 3700 }, { "epoch": 0.27, "learning_rate": 4.554656870055993e-06, "logits/chosen": -2.2015175819396973, "logits/rejected": -1.691781997680664, "logps/chosen": -457.389404296875, "logps/rejected": -619.5275268554688, "loss": 0.6804, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2954995036125183, "rewards/margins": 0.15671002864837646, "rewards/rejected": -0.4522095322608948, "step": 3710 }, { "epoch": 0.27, "learning_rate": 4.550982904706949e-06, "logits/chosen": -2.0103049278259277, "logits/rejected": -1.4545055627822876, "logps/chosen": -461.248046875, "logps/rejected": -610.4229736328125, "loss": 0.6747, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3066430985927582, "rewards/margins": 0.21357174217700958, "rewards/rejected": -0.5202149152755737, "step": 3720 }, { "epoch": 0.28, "learning_rate": 4.547295341493954e-06, "logits/chosen": -1.7959524393081665, "logits/rejected": -1.7828565835952759, "logps/chosen": -434.8436584472656, "logps/rejected": -535.3981323242188, "loss": 0.6888, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3301071524620056, "rewards/margins": 0.09084083139896393, "rewards/rejected": -0.4209480285644531, "step": 3730 }, { "epoch": 0.28, "learning_rate": 4.543594204865277e-06, "logits/chosen": -1.9916760921478271, "logits/rejected": -1.6552870273590088, "logps/chosen": -411.0361328125, "logps/rejected": -584.7454223632812, "loss": 0.6789, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2550368905067444, "rewards/margins": 0.16617730259895325, "rewards/rejected": -0.42121416330337524, "step": 3740 }, { "epoch": 0.28, "learning_rate": 4.5398795193591795e-06, "logits/chosen": -2.0119881629943848, "logits/rejected": -1.829275369644165, "logps/chosen": -379.0799865722656, "logps/rejected": -490.9358825683594, "loss": 0.6823, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21410255134105682, "rewards/margins": 0.13510611653327942, "rewards/rejected": -0.34920868277549744, "step": 3750 }, { "epoch": 0.28, "learning_rate": 4.53615130960375e-06, "logits/chosen": -2.2355105876922607, "logits/rejected": -1.5802521705627441, "logps/chosen": -481.89404296875, "logps/rejected": -614.5437622070312, "loss": 0.6811, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3069421350955963, "rewards/margins": 0.17923226952552795, "rewards/rejected": -0.48617440462112427, "step": 3760 }, { "epoch": 0.28, "learning_rate": 4.532409600316741e-06, "logits/chosen": -2.114014148712158, "logits/rejected": -1.5575037002563477, "logps/chosen": -517.7396240234375, "logps/rejected": -633.5247192382812, "loss": 0.6848, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3470626771450043, "rewards/margins": 0.1467229425907135, "rewards/rejected": -0.49378567934036255, "step": 3770 }, { "epoch": 0.28, "learning_rate": 4.5286544163054055e-06, "logits/chosen": -2.139425039291382, "logits/rejected": -1.6500381231307983, "logps/chosen": -406.1200256347656, "logps/rejected": -547.8240356445312, "loss": 0.6748, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2613753378391266, "rewards/margins": 0.18189918994903564, "rewards/rejected": -0.443274587392807, "step": 3780 }, { "epoch": 0.28, "learning_rate": 4.5248857824663365e-06, "logits/chosen": -2.1055569648742676, "logits/rejected": -1.5906869173049927, "logps/chosen": -407.316162109375, "logps/rejected": -604.1676635742188, "loss": 0.6779, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2710603177547455, "rewards/margins": 0.21081450581550598, "rewards/rejected": -0.48187485337257385, "step": 3790 }, { "epoch": 0.28, "learning_rate": 4.5211037237852926e-06, "logits/chosen": -2.3190269470214844, "logits/rejected": -1.7784054279327393, "logps/chosen": -474.517578125, "logps/rejected": -624.3898315429688, "loss": 0.6813, "rewards/accuracies": 0.75, "rewards/chosen": -0.32666462659835815, "rewards/margins": 0.17506316304206848, "rewards/rejected": -0.501727819442749, "step": 3800 }, { "epoch": 0.28, "learning_rate": 4.5173082653370435e-06, "logits/chosen": -2.181819438934326, "logits/rejected": -1.8536802530288696, "logps/chosen": -431.0770568847656, "logps/rejected": -556.99169921875, "loss": 0.6772, "rewards/accuracies": 0.75, "rewards/chosen": -0.22220559418201447, "rewards/margins": 0.16938142478466034, "rewards/rejected": -0.3915869891643524, "step": 3810 }, { "epoch": 0.28, "learning_rate": 4.513499432285196e-06, "logits/chosen": -2.1265206336975098, "logits/rejected": -1.6496025323867798, "logps/chosen": -531.7352905273438, "logps/rejected": -752.3884887695312, "loss": 0.6735, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3706919550895691, "rewards/margins": 0.21235740184783936, "rewards/rejected": -0.5830492973327637, "step": 3820 }, { "epoch": 0.28, "learning_rate": 4.5096772498820274e-06, "logits/chosen": -2.360023021697998, "logits/rejected": -1.9862079620361328, "logps/chosen": -437.3193359375, "logps/rejected": -564.4329223632812, "loss": 0.6812, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27051061391830444, "rewards/margins": 0.13361206650733948, "rewards/rejected": -0.4041227400302887, "step": 3830 }, { "epoch": 0.28, "learning_rate": 4.505841743468326e-06, "logits/chosen": -2.2142796516418457, "logits/rejected": -1.61630117893219, "logps/chosen": -520.5887451171875, "logps/rejected": -655.4898071289062, "loss": 0.6832, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33566048741340637, "rewards/margins": 0.17879366874694824, "rewards/rejected": -0.514454185962677, "step": 3840 }, { "epoch": 0.28, "learning_rate": 4.50199293847321e-06, "logits/chosen": -1.9514808654785156, "logits/rejected": -1.7296068668365479, "logps/chosen": -556.1450805664062, "logps/rejected": -729.8370361328125, "loss": 0.68, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3991331458091736, "rewards/margins": 0.18376648426055908, "rewards/rejected": -0.5828996896743774, "step": 3850 }, { "epoch": 0.28, "learning_rate": 4.498130860413973e-06, "logits/chosen": -2.1395680904388428, "logits/rejected": -1.611881971359253, "logps/chosen": -524.8179931640625, "logps/rejected": -620.5498657226562, "loss": 0.6789, "rewards/accuracies": 0.625, "rewards/chosen": -0.32426661252975464, "rewards/margins": 0.14992190897464752, "rewards/rejected": -0.47418856620788574, "step": 3860 }, { "epoch": 0.29, "learning_rate": 4.4942555348959035e-06, "logits/chosen": -2.363089084625244, "logits/rejected": -1.7650477886199951, "logps/chosen": -432.96539306640625, "logps/rejected": -652.7008056640625, "loss": 0.6724, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29681429266929626, "rewards/margins": 0.24118804931640625, "rewards/rejected": -0.5380023121833801, "step": 3870 }, { "epoch": 0.29, "learning_rate": 4.490366987612121e-06, "logits/chosen": -2.002821445465088, "logits/rejected": -1.5300052165985107, "logps/chosen": -540.4796752929688, "logps/rejected": -674.7386474609375, "loss": 0.6765, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3577803671360016, "rewards/margins": 0.16833558678627014, "rewards/rejected": -0.5261159539222717, "step": 3880 }, { "epoch": 0.29, "learning_rate": 4.486465244343406e-06, "logits/chosen": -2.053421974182129, "logits/rejected": -1.3590614795684814, "logps/chosen": -541.4669189453125, "logps/rejected": -738.9738159179688, "loss": 0.6744, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.380657434463501, "rewards/margins": 0.24342267215251923, "rewards/rejected": -0.6240800619125366, "step": 3890 }, { "epoch": 0.29, "learning_rate": 4.482550330958024e-06, "logits/chosen": -2.160762071609497, "logits/rejected": -1.801643967628479, "logps/chosen": -500.6005859375, "logps/rejected": -615.0338134765625, "loss": 0.6814, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27797746658325195, "rewards/margins": 0.1607418954372406, "rewards/rejected": -0.43871933221817017, "step": 3900 }, { "epoch": 0.29, "learning_rate": 4.478622273411559e-06, "logits/chosen": -1.965999960899353, "logits/rejected": -1.5790588855743408, "logps/chosen": -532.6522216796875, "logps/rejected": -704.6229248046875, "loss": 0.6778, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3726470470428467, "rewards/margins": 0.20000207424163818, "rewards/rejected": -0.5726490616798401, "step": 3910 }, { "epoch": 0.29, "learning_rate": 4.4746810977467435e-06, "logits/chosen": -2.29264235496521, "logits/rejected": -2.0020499229431152, "logps/chosen": -480.98760986328125, "logps/rejected": -630.8859252929688, "loss": 0.6809, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2974182665348053, "rewards/margins": 0.16592827439308167, "rewards/rejected": -0.46334654092788696, "step": 3920 }, { "epoch": 0.29, "learning_rate": 4.470726830093274e-06, "logits/chosen": -2.0071332454681396, "logits/rejected": -1.8308674097061157, "logps/chosen": -478.20086669921875, "logps/rejected": -622.0347900390625, "loss": 0.6797, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3126719295978546, "rewards/margins": 0.17708781361579895, "rewards/rejected": -0.4897596836090088, "step": 3930 }, { "epoch": 0.29, "learning_rate": 4.466759496667654e-06, "logits/chosen": -2.165135383605957, "logits/rejected": -1.9828903675079346, "logps/chosen": -375.06317138671875, "logps/rejected": -482.0126037597656, "loss": 0.6865, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22645564377307892, "rewards/margins": 0.1266649216413498, "rewards/rejected": -0.3531205952167511, "step": 3940 }, { "epoch": 0.29, "learning_rate": 4.462779123773008e-06, "logits/chosen": -2.022597074508667, "logits/rejected": -1.309683084487915, "logps/chosen": -471.60284423828125, "logps/rejected": -624.2610473632812, "loss": 0.6847, "rewards/accuracies": 0.75, "rewards/chosen": -0.29989129304885864, "rewards/margins": 0.20605841279029846, "rewards/rejected": -0.5059496760368347, "step": 3950 }, { "epoch": 0.29, "learning_rate": 4.458785737798911e-06, "logits/chosen": -2.111203670501709, "logits/rejected": -1.6290671825408936, "logps/chosen": -393.31573486328125, "logps/rejected": -522.4007568359375, "loss": 0.6827, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20891878008842468, "rewards/margins": 0.16929133236408234, "rewards/rejected": -0.3782101273536682, "step": 3960 }, { "epoch": 0.29, "learning_rate": 4.454779365221216e-06, "logits/chosen": -2.0934672355651855, "logits/rejected": -1.5571308135986328, "logps/chosen": -526.06005859375, "logps/rejected": -733.3610229492188, "loss": 0.6768, "rewards/accuracies": 0.75, "rewards/chosen": -0.3871205449104309, "rewards/margins": 0.2193876951932907, "rewards/rejected": -0.6065082550048828, "step": 3970 }, { "epoch": 0.29, "learning_rate": 4.450760032601873e-06, "logits/chosen": -2.069620370864868, "logits/rejected": -1.5331978797912598, "logps/chosen": -429.3226013183594, "logps/rejected": -576.5382690429688, "loss": 0.6789, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.29694658517837524, "rewards/margins": 0.17518749833106995, "rewards/rejected": -0.4721341133117676, "step": 3980 }, { "epoch": 0.29, "learning_rate": 4.44672776658876e-06, "logits/chosen": -2.318953275680542, "logits/rejected": -1.8482789993286133, "logps/chosen": -422.5732421875, "logps/rejected": -586.44775390625, "loss": 0.6766, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21459908783435822, "rewards/margins": 0.21023079752922058, "rewards/rejected": -0.4248298704624176, "step": 3990 }, { "epoch": 0.3, "learning_rate": 4.442682593915499e-06, "logits/chosen": -2.1159420013427734, "logits/rejected": -1.6756852865219116, "logps/chosen": -528.2681884765625, "logps/rejected": -657.8836669921875, "loss": 0.6834, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3859134316444397, "rewards/margins": 0.14690224826335907, "rewards/rejected": -0.5328156352043152, "step": 4000 }, { "epoch": 0.3, "learning_rate": 4.4386245414012845e-06, "logits/chosen": -2.1406102180480957, "logits/rejected": -1.8869701623916626, "logps/chosen": -490.47637939453125, "logps/rejected": -576.2750244140625, "loss": 0.6868, "rewards/accuracies": 0.625, "rewards/chosen": -0.3135947585105896, "rewards/margins": 0.11552349478006363, "rewards/rejected": -0.4291183054447174, "step": 4010 }, { "epoch": 0.3, "learning_rate": 4.4345536359507025e-06, "logits/chosen": -2.1632564067840576, "logits/rejected": -1.9004265069961548, "logps/chosen": -454.6832580566406, "logps/rejected": -541.2528686523438, "loss": 0.6829, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.31262174248695374, "rewards/margins": 0.10112510621547699, "rewards/rejected": -0.41374683380126953, "step": 4020 }, { "epoch": 0.3, "learning_rate": 4.430469904553552e-06, "logits/chosen": -1.9300132989883423, "logits/rejected": -1.546290636062622, "logps/chosen": -477.02191162109375, "logps/rejected": -607.6755981445312, "loss": 0.6773, "rewards/accuracies": 0.75, "rewards/chosen": -0.30692505836486816, "rewards/margins": 0.17997345328330994, "rewards/rejected": -0.4868985116481781, "step": 4030 }, { "epoch": 0.3, "learning_rate": 4.426373374284671e-06, "logits/chosen": -2.0140411853790283, "logits/rejected": -1.5477627515792847, "logps/chosen": -399.28570556640625, "logps/rejected": -606.4757690429688, "loss": 0.6737, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26159024238586426, "rewards/margins": 0.24392464756965637, "rewards/rejected": -0.505514919757843, "step": 4040 }, { "epoch": 0.3, "learning_rate": 4.422264072303746e-06, "logits/chosen": -2.0416860580444336, "logits/rejected": -1.674541711807251, "logps/chosen": -549.6180419921875, "logps/rejected": -690.1470947265625, "loss": 0.686, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.430001437664032, "rewards/margins": 0.14625824987888336, "rewards/rejected": -0.5762597322463989, "step": 4050 }, { "epoch": 0.3, "learning_rate": 4.418142025855145e-06, "logits/chosen": -1.960615873336792, "logits/rejected": -1.5696974992752075, "logps/chosen": -578.367919921875, "logps/rejected": -750.600830078125, "loss": 0.6782, "rewards/accuracies": 0.75, "rewards/chosen": -0.42501527070999146, "rewards/margins": 0.1915598213672638, "rewards/rejected": -0.6165751218795776, "step": 4060 }, { "epoch": 0.3, "learning_rate": 4.41400726226773e-06, "logits/chosen": -2.2910966873168945, "logits/rejected": -1.5879672765731812, "logps/chosen": -544.4178466796875, "logps/rejected": -700.865478515625, "loss": 0.6762, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.30994802713394165, "rewards/margins": 0.21740677952766418, "rewards/rejected": -0.5273547172546387, "step": 4070 }, { "epoch": 0.3, "learning_rate": 4.409859808954675e-06, "logits/chosen": -2.0812692642211914, "logits/rejected": -1.347348690032959, "logps/chosen": -519.0238037109375, "logps/rejected": -692.6957397460938, "loss": 0.682, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.36263054609298706, "rewards/margins": 0.24401137232780457, "rewards/rejected": -0.6066418886184692, "step": 4080 }, { "epoch": 0.3, "learning_rate": 4.405699693413287e-06, "logits/chosen": -1.9704952239990234, "logits/rejected": -1.4813319444656372, "logps/chosen": -539.4690551757812, "logps/rejected": -681.2817993164062, "loss": 0.6804, "rewards/accuracies": 0.75, "rewards/chosen": -0.39297619462013245, "rewards/margins": 0.1737770289182663, "rewards/rejected": -0.5667532086372375, "step": 4090 }, { "epoch": 0.3, "learning_rate": 4.401526943224822e-06, "logits/chosen": -2.2112860679626465, "logits/rejected": -1.693476676940918, "logps/chosen": -483.75042724609375, "logps/rejected": -657.279052734375, "loss": 0.6794, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34739580750465393, "rewards/margins": 0.1633853316307068, "rewards/rejected": -0.5107811689376831, "step": 4100 }, { "epoch": 0.3, "learning_rate": 4.3973415860543025e-06, "logits/chosen": -2.190434694290161, "logits/rejected": -1.7217071056365967, "logps/chosen": -576.5224609375, "logps/rejected": -738.0067749023438, "loss": 0.6809, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4110058844089508, "rewards/margins": 0.18543019890785217, "rewards/rejected": -0.596436083316803, "step": 4110 }, { "epoch": 0.3, "learning_rate": 4.393143649650336e-06, "logits/chosen": -2.3100712299346924, "logits/rejected": -1.7221057415008545, "logps/chosen": -678.009521484375, "logps/rejected": -784.9572143554688, "loss": 0.6809, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4802224636077881, "rewards/margins": 0.1785128116607666, "rewards/rejected": -0.6587352752685547, "step": 4120 }, { "epoch": 0.3, "learning_rate": 4.388933161844927e-06, "logits/chosen": -2.2142083644866943, "logits/rejected": -1.7171032428741455, "logps/chosen": -580.5217895507812, "logps/rejected": -696.1652221679688, "loss": 0.6798, "rewards/accuracies": 0.625, "rewards/chosen": -0.4046753942966461, "rewards/margins": 0.16198867559432983, "rewards/rejected": -0.5666640996932983, "step": 4130 }, { "epoch": 0.31, "learning_rate": 4.384710150553298e-06, "logits/chosen": -2.0585150718688965, "logits/rejected": -1.8954334259033203, "logps/chosen": -674.3916015625, "logps/rejected": -820.83642578125, "loss": 0.684, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4931870400905609, "rewards/margins": 0.15004967153072357, "rewards/rejected": -0.6432366967201233, "step": 4140 }, { "epoch": 0.31, "learning_rate": 4.380474643773698e-06, "logits/chosen": -2.0731453895568848, "logits/rejected": -1.6182715892791748, "logps/chosen": -586.5407104492188, "logps/rejected": -758.8681030273438, "loss": 0.6754, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.38900870084762573, "rewards/margins": 0.2123805731534958, "rewards/rejected": -0.6013892889022827, "step": 4150 }, { "epoch": 0.31, "learning_rate": 4.3762266695872215e-06, "logits/chosen": -2.248610019683838, "logits/rejected": -2.0282630920410156, "logps/chosen": -565.8055419921875, "logps/rejected": -644.5511474609375, "loss": 0.6862, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3903128504753113, "rewards/margins": 0.07532461732625961, "rewards/rejected": -0.4656375050544739, "step": 4160 }, { "epoch": 0.31, "learning_rate": 4.371966256157621e-06, "logits/chosen": -2.090864896774292, "logits/rejected": -1.590003490447998, "logps/chosen": -466.3128967285156, "logps/rejected": -639.5113525390625, "loss": 0.6717, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32121387124061584, "rewards/margins": 0.20554928481578827, "rewards/rejected": -0.5267631411552429, "step": 4170 }, { "epoch": 0.31, "learning_rate": 4.367693431731122e-06, "logits/chosen": -2.1199820041656494, "logits/rejected": -1.6934248208999634, "logps/chosen": -462.59814453125, "logps/rejected": -619.623291015625, "loss": 0.6769, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2909795343875885, "rewards/margins": 0.2061474323272705, "rewards/rejected": -0.4971269965171814, "step": 4180 }, { "epoch": 0.31, "learning_rate": 4.363408224636231e-06, "logits/chosen": -2.22369647026062, "logits/rejected": -1.7559322118759155, "logps/chosen": -556.6814575195312, "logps/rejected": -676.3679809570312, "loss": 0.6781, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33462610840797424, "rewards/margins": 0.19820965826511383, "rewards/rejected": -0.5328357815742493, "step": 4190 }, { "epoch": 0.31, "learning_rate": 4.359110663283551e-06, "logits/chosen": -2.1686158180236816, "logits/rejected": -1.8941185474395752, "logps/chosen": -483.4534606933594, "logps/rejected": -639.9644775390625, "loss": 0.6825, "rewards/accuracies": 0.75, "rewards/chosen": -0.25673794746398926, "rewards/margins": 0.20770478248596191, "rewards/rejected": -0.46444272994995117, "step": 4200 }, { "epoch": 0.31, "learning_rate": 4.354800776165596e-06, "logits/chosen": -1.9019454717636108, "logits/rejected": -1.4659960269927979, "logps/chosen": -558.9629516601562, "logps/rejected": -661.8509521484375, "loss": 0.6805, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37244662642478943, "rewards/margins": 0.1526588648557663, "rewards/rejected": -0.5251055359840393, "step": 4210 }, { "epoch": 0.31, "learning_rate": 4.350478591856595e-06, "logits/chosen": -2.081845760345459, "logits/rejected": -1.6903438568115234, "logps/chosen": -594.0341186523438, "logps/rejected": -684.6719970703125, "loss": 0.6813, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.45362749695777893, "rewards/margins": 0.13786223530769348, "rewards/rejected": -0.5914896726608276, "step": 4220 }, { "epoch": 0.31, "learning_rate": 4.346144139012309e-06, "logits/chosen": -2.010509967803955, "logits/rejected": -1.4193147420883179, "logps/chosen": -500.9068298339844, "logps/rejected": -761.5528564453125, "loss": 0.6703, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3666777014732361, "rewards/margins": 0.26595842838287354, "rewards/rejected": -0.6326361298561096, "step": 4230 }, { "epoch": 0.31, "learning_rate": 4.341797446369837e-06, "logits/chosen": -2.2210865020751953, "logits/rejected": -1.5450299978256226, "logps/chosen": -437.05645751953125, "logps/rejected": -615.1804809570312, "loss": 0.6743, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2691074013710022, "rewards/margins": 0.23885194957256317, "rewards/rejected": -0.5079594254493713, "step": 4240 }, { "epoch": 0.31, "learning_rate": 4.337438542747429e-06, "logits/chosen": -2.2811319828033447, "logits/rejected": -1.8662903308868408, "logps/chosen": -479.2779235839844, "logps/rejected": -561.0595703125, "loss": 0.6839, "rewards/accuracies": 0.625, "rewards/chosen": -0.2565178871154785, "rewards/margins": 0.14309760928153992, "rewards/rejected": -0.39961546659469604, "step": 4250 }, { "epoch": 0.31, "learning_rate": 4.33306745704429e-06, "logits/chosen": -2.146693229675293, "logits/rejected": -1.5369186401367188, "logps/chosen": -514.2200927734375, "logps/rejected": -599.0819091796875, "loss": 0.6804, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3287217617034912, "rewards/margins": 0.1386398822069168, "rewards/rejected": -0.46736159920692444, "step": 4260 }, { "epoch": 0.31, "learning_rate": 4.328684218240393e-06, "logits/chosen": -1.9338302612304688, "logits/rejected": -1.6224333047866821, "logps/chosen": -384.7673034667969, "logps/rejected": -528.53076171875, "loss": 0.6811, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19226045906543732, "rewards/margins": 0.16576306521892548, "rewards/rejected": -0.3580234944820404, "step": 4270 }, { "epoch": 0.32, "learning_rate": 4.324288855396285e-06, "logits/chosen": -1.878766655921936, "logits/rejected": -1.4638521671295166, "logps/chosen": -469.6280822753906, "logps/rejected": -673.2039794921875, "loss": 0.6785, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3430597484111786, "rewards/margins": 0.22101983428001404, "rewards/rejected": -0.5640795826911926, "step": 4280 }, { "epoch": 0.32, "learning_rate": 4.319881397652896e-06, "logits/chosen": -2.028428316116333, "logits/rejected": -1.6282380819320679, "logps/chosen": -530.36767578125, "logps/rejected": -708.94384765625, "loss": 0.6766, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35491567850112915, "rewards/margins": 0.20560172200202942, "rewards/rejected": -0.560517430305481, "step": 4290 }, { "epoch": 0.32, "learning_rate": 4.315461874231339e-06, "logits/chosen": -1.8839778900146484, "logits/rejected": -1.3865957260131836, "logps/chosen": -474.79541015625, "logps/rejected": -656.6475830078125, "loss": 0.6759, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3138883709907532, "rewards/margins": 0.20917406678199768, "rewards/rejected": -0.5230624675750732, "step": 4300 }, { "epoch": 0.32, "learning_rate": 4.311030314432728e-06, "logits/chosen": -2.0406928062438965, "logits/rejected": -1.6497993469238281, "logps/chosen": -466.04296875, "logps/rejected": -624.8379516601562, "loss": 0.6789, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3536514639854431, "rewards/margins": 0.16421549022197723, "rewards/rejected": -0.5178669691085815, "step": 4310 }, { "epoch": 0.32, "learning_rate": 4.306586747637974e-06, "logits/chosen": -2.0399813652038574, "logits/rejected": -1.4749126434326172, "logps/chosen": -348.8336486816406, "logps/rejected": -574.1886596679688, "loss": 0.6681, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20764441788196564, "rewards/margins": 0.24871332943439484, "rewards/rejected": -0.4563577175140381, "step": 4320 }, { "epoch": 0.32, "learning_rate": 4.302131203307595e-06, "logits/chosen": -1.9884841442108154, "logits/rejected": -1.5559688806533813, "logps/chosen": -467.17010498046875, "logps/rejected": -632.7347412109375, "loss": 0.6802, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32456663250923157, "rewards/margins": 0.18377678096294403, "rewards/rejected": -0.5083433985710144, "step": 4330 }, { "epoch": 0.32, "learning_rate": 4.297663710981516e-06, "logits/chosen": -2.0517537593841553, "logits/rejected": -1.3356496095657349, "logps/chosen": -467.00616455078125, "logps/rejected": -667.6506958007812, "loss": 0.6768, "rewards/accuracies": 0.75, "rewards/chosen": -0.3243938386440277, "rewards/margins": 0.23374910652637482, "rewards/rejected": -0.558142900466919, "step": 4340 }, { "epoch": 0.32, "learning_rate": 4.29318430027888e-06, "logits/chosen": -1.9264461994171143, "logits/rejected": -1.3837589025497437, "logps/chosen": -566.6915283203125, "logps/rejected": -774.9522094726562, "loss": 0.6776, "rewards/accuracies": 0.75, "rewards/chosen": -0.41847294569015503, "rewards/margins": 0.23506765067577362, "rewards/rejected": -0.6535406708717346, "step": 4350 }, { "epoch": 0.32, "learning_rate": 4.288693000897846e-06, "logits/chosen": -2.107224464416504, "logits/rejected": -1.4211249351501465, "logps/chosen": -488.984619140625, "logps/rejected": -647.3514404296875, "loss": 0.6813, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.332313597202301, "rewards/margins": 0.1715950071811676, "rewards/rejected": -0.5039085149765015, "step": 4360 }, { "epoch": 0.32, "learning_rate": 4.284189842615395e-06, "logits/chosen": -2.0721049308776855, "logits/rejected": -1.5251384973526, "logps/chosen": -580.9405517578125, "logps/rejected": -669.4756469726562, "loss": 0.6825, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36684659123420715, "rewards/margins": 0.16097334027290344, "rewards/rejected": -0.5278199315071106, "step": 4370 }, { "epoch": 0.32, "learning_rate": 4.27967485528713e-06, "logits/chosen": -2.0472476482391357, "logits/rejected": -1.2626854181289673, "logps/chosen": -479.93243408203125, "logps/rejected": -613.3817138671875, "loss": 0.675, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2847113609313965, "rewards/margins": 0.21465793251991272, "rewards/rejected": -0.4993693232536316, "step": 4380 }, { "epoch": 0.32, "learning_rate": 4.275148068847081e-06, "logits/chosen": -2.0124852657318115, "logits/rejected": -1.2215148210525513, "logps/chosen": -569.5925903320312, "logps/rejected": -724.6495361328125, "loss": 0.6729, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36731985211372375, "rewards/margins": 0.22700457274913788, "rewards/rejected": -0.5943244099617004, "step": 4390 }, { "epoch": 0.32, "learning_rate": 4.270609513307506e-06, "logits/chosen": -2.0238211154937744, "logits/rejected": -1.7278627157211304, "logps/chosen": -554.8775634765625, "logps/rejected": -706.656982421875, "loss": 0.6795, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.373274028301239, "rewards/margins": 0.1549447476863861, "rewards/rejected": -0.5282188653945923, "step": 4400 }, { "epoch": 0.33, "learning_rate": 4.2660592187586906e-06, "logits/chosen": -2.1316182613372803, "logits/rejected": -1.4974901676177979, "logps/chosen": -510.2176818847656, "logps/rejected": -743.9851684570312, "loss": 0.6745, "rewards/accuracies": 0.875, "rewards/chosen": -0.3577421307563782, "rewards/margins": 0.25872674584388733, "rewards/rejected": -0.6164687871932983, "step": 4410 }, { "epoch": 0.33, "learning_rate": 4.261497215368747e-06, "logits/chosen": -1.8388111591339111, "logits/rejected": -1.6592826843261719, "logps/chosen": -483.6634216308594, "logps/rejected": -633.8040771484375, "loss": 0.6764, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3498237729072571, "rewards/margins": 0.16234351694583893, "rewards/rejected": -0.5121673345565796, "step": 4420 }, { "epoch": 0.33, "learning_rate": 4.256923533383419e-06, "logits/chosen": -1.789368987083435, "logits/rejected": -1.2012770175933838, "logps/chosen": -478.72021484375, "logps/rejected": -650.7435302734375, "loss": 0.6752, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.35261568427085876, "rewards/margins": 0.2086346596479416, "rewards/rejected": -0.5612503290176392, "step": 4430 }, { "epoch": 0.33, "learning_rate": 4.252338203125879e-06, "logits/chosen": -2.0442452430725098, "logits/rejected": -1.6583852767944336, "logps/chosen": -392.46820068359375, "logps/rejected": -553.8406982421875, "loss": 0.6766, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2198486626148224, "rewards/margins": 0.19674474000930786, "rewards/rejected": -0.41659340262413025, "step": 4440 }, { "epoch": 0.33, "learning_rate": 4.247741254996523e-06, "logits/chosen": -1.9325168132781982, "logits/rejected": -1.5590792894363403, "logps/chosen": -450.497314453125, "logps/rejected": -585.5242919921875, "loss": 0.6834, "rewards/accuracies": 0.75, "rewards/chosen": -0.2786654829978943, "rewards/margins": 0.16083991527557373, "rewards/rejected": -0.4395054876804352, "step": 4450 }, { "epoch": 0.33, "learning_rate": 4.2431327194727755e-06, "logits/chosen": -2.1585159301757812, "logits/rejected": -1.6504318714141846, "logps/chosen": -415.67706298828125, "logps/rejected": -501.166748046875, "loss": 0.6824, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23085565865039825, "rewards/margins": 0.14658120274543762, "rewards/rejected": -0.3774368464946747, "step": 4460 }, { "epoch": 0.33, "learning_rate": 4.238512627108885e-06, "logits/chosen": -2.229053497314453, "logits/rejected": -1.7136398553848267, "logps/chosen": -498.9519958496094, "logps/rejected": -616.4998168945312, "loss": 0.6773, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3400985598564148, "rewards/margins": 0.17671403288841248, "rewards/rejected": -0.5168126225471497, "step": 4470 }, { "epoch": 0.33, "learning_rate": 4.233881008535719e-06, "logits/chosen": -1.942826509475708, "logits/rejected": -1.640557885169983, "logps/chosen": -454.62158203125, "logps/rejected": -559.2020263671875, "loss": 0.6846, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2778385281562805, "rewards/margins": 0.14033696055412292, "rewards/rejected": -0.41817551851272583, "step": 4480 }, { "epoch": 0.33, "learning_rate": 4.229237894460563e-06, "logits/chosen": -2.058985710144043, "logits/rejected": -1.5888521671295166, "logps/chosen": -492.8211364746094, "logps/rejected": -615.7987060546875, "loss": 0.6836, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2936982214450836, "rewards/margins": 0.1643514335155487, "rewards/rejected": -0.4580496847629547, "step": 4490 }, { "epoch": 0.33, "learning_rate": 4.224583315666919e-06, "logits/chosen": -1.9964354038238525, "logits/rejected": -1.3914204835891724, "logps/chosen": -475.102294921875, "logps/rejected": -679.4697875976562, "loss": 0.6772, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3310096561908722, "rewards/margins": 0.22962316870689392, "rewards/rejected": -0.5606328845024109, "step": 4500 }, { "epoch": 0.33, "learning_rate": 4.219917303014297e-06, "logits/chosen": -2.295609951019287, "logits/rejected": -1.6072098016738892, "logps/chosen": -513.9281616210938, "logps/rejected": -699.7166137695312, "loss": 0.6775, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3696182370185852, "rewards/margins": 0.23817281424999237, "rewards/rejected": -0.6077910661697388, "step": 4510 }, { "epoch": 0.33, "learning_rate": 4.215239887438014e-06, "logits/chosen": -2.18269419670105, "logits/rejected": -1.436833143234253, "logps/chosen": -523.0841674804688, "logps/rejected": -669.661865234375, "loss": 0.6781, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32237011194229126, "rewards/margins": 0.19304969906806946, "rewards/rejected": -0.5154197812080383, "step": 4520 }, { "epoch": 0.33, "learning_rate": 4.210551099948987e-06, "logits/chosen": -1.9216140508651733, "logits/rejected": -1.410408616065979, "logps/chosen": -451.6724548339844, "logps/rejected": -593.1229248046875, "loss": 0.6768, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27360451221466064, "rewards/margins": 0.1849326640367508, "rewards/rejected": -0.45853719115257263, "step": 4530 }, { "epoch": 0.33, "learning_rate": 4.205850971633527e-06, "logits/chosen": -1.9477821588516235, "logits/rejected": -1.5593769550323486, "logps/chosen": -500.6622619628906, "logps/rejected": -681.7259521484375, "loss": 0.6766, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.35155823826789856, "rewards/margins": 0.1747664511203766, "rewards/rejected": -0.5263246893882751, "step": 4540 }, { "epoch": 0.34, "learning_rate": 4.201139533653136e-06, "logits/chosen": -2.1926050186157227, "logits/rejected": -1.5135444402694702, "logps/chosen": -531.3878173828125, "logps/rejected": -683.2811279296875, "loss": 0.6763, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.30369535088539124, "rewards/margins": 0.20663519203662872, "rewards/rejected": -0.5103305578231812, "step": 4550 }, { "epoch": 0.34, "learning_rate": 4.196416817244297e-06, "logits/chosen": -2.095155715942383, "logits/rejected": -1.5820648670196533, "logps/chosen": -573.8402099609375, "logps/rejected": -658.0858154296875, "loss": 0.6814, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3960362374782562, "rewards/margins": 0.15459051728248596, "rewards/rejected": -0.5506267547607422, "step": 4560 }, { "epoch": 0.34, "learning_rate": 4.1916828537182666e-06, "logits/chosen": -1.7823041677474976, "logits/rejected": -1.563079833984375, "logps/chosen": -606.4195556640625, "logps/rejected": -742.6245727539062, "loss": 0.6833, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4240674376487732, "rewards/margins": 0.15337036550045013, "rewards/rejected": -0.5774377584457397, "step": 4570 }, { "epoch": 0.34, "learning_rate": 4.186937674460871e-06, "logits/chosen": -1.9426825046539307, "logits/rejected": -1.3855278491973877, "logps/chosen": -616.4877319335938, "logps/rejected": -795.5431518554688, "loss": 0.6754, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.451413094997406, "rewards/margins": 0.2118844985961914, "rewards/rejected": -0.6632975339889526, "step": 4580 }, { "epoch": 0.34, "learning_rate": 4.1821813109322975e-06, "logits/chosen": -2.1910603046417236, "logits/rejected": -1.4368436336517334, "logps/chosen": -530.8137817382812, "logps/rejected": -768.9873657226562, "loss": 0.6725, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.39003464579582214, "rewards/margins": 0.2722468376159668, "rewards/rejected": -0.6622815728187561, "step": 4590 }, { "epoch": 0.34, "learning_rate": 4.17741379466688e-06, "logits/chosen": -2.027900457382202, "logits/rejected": -1.5203375816345215, "logps/chosen": -579.0759887695312, "logps/rejected": -741.79931640625, "loss": 0.6791, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3928004801273346, "rewards/margins": 0.2123395949602127, "rewards/rejected": -0.6051400899887085, "step": 4600 }, { "epoch": 0.34, "learning_rate": 4.172635157272897e-06, "logits/chosen": -2.269648790359497, "logits/rejected": -1.6367452144622803, "logps/chosen": -543.7764892578125, "logps/rejected": -650.6695556640625, "loss": 0.6814, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36043864488601685, "rewards/margins": 0.15506286919116974, "rewards/rejected": -0.5155014991760254, "step": 4610 }, { "epoch": 0.34, "learning_rate": 4.167845430432359e-06, "logits/chosen": -2.084611177444458, "logits/rejected": -1.8284447193145752, "logps/chosen": -461.95367431640625, "logps/rejected": -537.4190673828125, "loss": 0.6831, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3130492568016052, "rewards/margins": 0.1178227886557579, "rewards/rejected": -0.43087202310562134, "step": 4620 }, { "epoch": 0.34, "learning_rate": 4.163044645900797e-06, "logits/chosen": -2.0064475536346436, "logits/rejected": -1.7649123668670654, "logps/chosen": -452.449462890625, "logps/rejected": -534.5064697265625, "loss": 0.6862, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.31121140718460083, "rewards/margins": 0.08229520916938782, "rewards/rejected": -0.39350658655166626, "step": 4630 }, { "epoch": 0.34, "learning_rate": 4.158232835507057e-06, "logits/chosen": -2.11445689201355, "logits/rejected": -1.600807547569275, "logps/chosen": -467.8783264160156, "logps/rejected": -616.8772583007812, "loss": 0.6779, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3426204323768616, "rewards/margins": 0.1663699746131897, "rewards/rejected": -0.5089904069900513, "step": 4640 }, { "epoch": 0.34, "learning_rate": 4.153410031153082e-06, "logits/chosen": -2.169929027557373, "logits/rejected": -1.6455320119857788, "logps/chosen": -435.44512939453125, "logps/rejected": -549.2145385742188, "loss": 0.6866, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.26545220613479614, "rewards/margins": 0.16350439190864563, "rewards/rejected": -0.42895665764808655, "step": 4650 }, { "epoch": 0.34, "learning_rate": 4.148576264813706e-06, "logits/chosen": -2.0811948776245117, "logits/rejected": -1.620600938796997, "logps/chosen": -456.61712646484375, "logps/rejected": -632.0189208984375, "loss": 0.6774, "rewards/accuracies": 0.75, "rewards/chosen": -0.29791897535324097, "rewards/margins": 0.20023036003112793, "rewards/rejected": -0.4981493055820465, "step": 4660 }, { "epoch": 0.34, "learning_rate": 4.143731568536441e-06, "logits/chosen": -2.2468442916870117, "logits/rejected": -1.7513973712921143, "logps/chosen": -516.3638916015625, "logps/rejected": -640.6619873046875, "loss": 0.6795, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3421229422092438, "rewards/margins": 0.1565026044845581, "rewards/rejected": -0.49862557649612427, "step": 4670 }, { "epoch": 0.35, "learning_rate": 4.138875974441261e-06, "logits/chosen": -2.211343288421631, "logits/rejected": -1.6492831707000732, "logps/chosen": -390.81353759765625, "logps/rejected": -543.56494140625, "loss": 0.6787, "rewards/accuracies": 0.75, "rewards/chosen": -0.24865290522575378, "rewards/margins": 0.20777325332164764, "rewards/rejected": -0.456426203250885, "step": 4680 }, { "epoch": 0.35, "learning_rate": 4.134009514720395e-06, "logits/chosen": -2.0125670433044434, "logits/rejected": -1.5957510471343994, "logps/chosen": -461.47796630859375, "logps/rejected": -630.5738525390625, "loss": 0.6791, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2863798439502716, "rewards/margins": 0.20995846390724182, "rewards/rejected": -0.4963383674621582, "step": 4690 }, { "epoch": 0.35, "learning_rate": 4.129132221638107e-06, "logits/chosen": -2.1151833534240723, "logits/rejected": -1.6265485286712646, "logps/chosen": -552.8087158203125, "logps/rejected": -683.3887939453125, "loss": 0.6783, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4105783998966217, "rewards/margins": 0.15718211233615875, "rewards/rejected": -0.5677604675292969, "step": 4700 }, { "epoch": 0.35, "learning_rate": 4.124244127530488e-06, "logits/chosen": -1.9220958948135376, "logits/rejected": -1.4431062936782837, "logps/chosen": -608.1555786132812, "logps/rejected": -775.0755615234375, "loss": 0.6767, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.44231024384498596, "rewards/margins": 0.2178175449371338, "rewards/rejected": -0.6601277589797974, "step": 4710 }, { "epoch": 0.35, "learning_rate": 4.119345264805238e-06, "logits/chosen": -2.0518100261688232, "logits/rejected": -1.635679841041565, "logps/chosen": -523.6256103515625, "logps/rejected": -685.609375, "loss": 0.6743, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3808341920375824, "rewards/margins": 0.18028615415096283, "rewards/rejected": -0.5611204504966736, "step": 4720 }, { "epoch": 0.35, "learning_rate": 4.114435665941452e-06, "logits/chosen": -2.035411834716797, "logits/rejected": -1.389089822769165, "logps/chosen": -510.9144592285156, "logps/rejected": -702.8983764648438, "loss": 0.6778, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3749825954437256, "rewards/margins": 0.24128004908561707, "rewards/rejected": -0.616262674331665, "step": 4730 }, { "epoch": 0.35, "learning_rate": 4.109515363489405e-06, "logits/chosen": -1.973564863204956, "logits/rejected": -1.5319654941558838, "logps/chosen": -545.0478515625, "logps/rejected": -704.545166015625, "loss": 0.6772, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.35753026604652405, "rewards/margins": 0.19567057490348816, "rewards/rejected": -0.5532008409500122, "step": 4740 }, { "epoch": 0.35, "learning_rate": 4.104584390070336e-06, "logits/chosen": -2.0638298988342285, "logits/rejected": -1.425800085067749, "logps/chosen": -483.1240234375, "logps/rejected": -575.5072021484375, "loss": 0.6799, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2862679064273834, "rewards/margins": 0.17576150596141815, "rewards/rejected": -0.462029367685318, "step": 4750 }, { "epoch": 0.35, "learning_rate": 4.0996427783762305e-06, "logits/chosen": -2.102324962615967, "logits/rejected": -1.4926786422729492, "logps/chosen": -497.1849060058594, "logps/rejected": -676.0487060546875, "loss": 0.6739, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31490325927734375, "rewards/margins": 0.22103317081928253, "rewards/rejected": -0.5359364151954651, "step": 4760 }, { "epoch": 0.35, "learning_rate": 4.094690561169607e-06, "logits/chosen": -1.9400266408920288, "logits/rejected": -1.3373876810073853, "logps/chosen": -597.1064453125, "logps/rejected": -792.9906005859375, "loss": 0.678, "rewards/accuracies": 0.75, "rewards/chosen": -0.4210077226161957, "rewards/margins": 0.24839511513710022, "rewards/rejected": -0.6694028377532959, "step": 4770 }, { "epoch": 0.35, "learning_rate": 4.089727771283297e-06, "logits/chosen": -1.814639687538147, "logits/rejected": -1.3624284267425537, "logps/chosen": -592.31689453125, "logps/rejected": -707.7630615234375, "loss": 0.6772, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.43652716279029846, "rewards/margins": 0.16820871829986572, "rewards/rejected": -0.6047358512878418, "step": 4780 }, { "epoch": 0.35, "learning_rate": 4.0847544416202285e-06, "logits/chosen": -2.0010693073272705, "logits/rejected": -1.6501489877700806, "logps/chosen": -509.634765625, "logps/rejected": -635.7188720703125, "loss": 0.6813, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.33672475814819336, "rewards/margins": 0.176512211561203, "rewards/rejected": -0.513236939907074, "step": 4790 }, { "epoch": 0.35, "learning_rate": 4.079770605153206e-06, "logits/chosen": -1.6865450143814087, "logits/rejected": -1.058992862701416, "logps/chosen": -458.51824951171875, "logps/rejected": -652.0930786132812, "loss": 0.6737, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27757638692855835, "rewards/margins": 0.23511190712451935, "rewards/rejected": -0.5126882791519165, "step": 4800 }, { "epoch": 0.35, "learning_rate": 4.074776294924693e-06, "logits/chosen": -2.121105194091797, "logits/rejected": -1.9510799646377563, "logps/chosen": -512.533447265625, "logps/rejected": -601.828369140625, "loss": 0.6837, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34339597821235657, "rewards/margins": 0.10158060491085052, "rewards/rejected": -0.4449765086174011, "step": 4810 }, { "epoch": 0.36, "learning_rate": 4.0697715440465975e-06, "logits/chosen": -2.030217170715332, "logits/rejected": -1.5749047994613647, "logps/chosen": -500.4840393066406, "logps/rejected": -631.9931030273438, "loss": 0.6811, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3107936978340149, "rewards/margins": 0.16935130953788757, "rewards/rejected": -0.48014506697654724, "step": 4820 }, { "epoch": 0.36, "learning_rate": 4.064756385700042e-06, "logits/chosen": -2.149343967437744, "logits/rejected": -1.534906029701233, "logps/chosen": -397.771728515625, "logps/rejected": -578.2429809570312, "loss": 0.6753, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.25182002782821655, "rewards/margins": 0.22294776141643524, "rewards/rejected": -0.4747678339481354, "step": 4830 }, { "epoch": 0.36, "learning_rate": 4.059730853135155e-06, "logits/chosen": -1.7124006748199463, "logits/rejected": -1.2823957204818726, "logps/chosen": -529.0118408203125, "logps/rejected": -679.1007080078125, "loss": 0.6802, "rewards/accuracies": 0.75, "rewards/chosen": -0.3536754548549652, "rewards/margins": 0.20245811343193054, "rewards/rejected": -0.5561336278915405, "step": 4840 }, { "epoch": 0.36, "learning_rate": 4.054694979670843e-06, "logits/chosen": -2.0768208503723145, "logits/rejected": -1.533855676651001, "logps/chosen": -525.8298950195312, "logps/rejected": -620.4069213867188, "loss": 0.6774, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3728491961956024, "rewards/margins": 0.1517309993505478, "rewards/rejected": -0.524580180644989, "step": 4850 }, { "epoch": 0.36, "learning_rate": 4.0496487986945725e-06, "logits/chosen": -2.067941188812256, "logits/rejected": -1.548063039779663, "logps/chosen": -502.1722106933594, "logps/rejected": -614.9371337890625, "loss": 0.6814, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34558480978012085, "rewards/margins": 0.15416575968265533, "rewards/rejected": -0.499750554561615, "step": 4860 }, { "epoch": 0.36, "learning_rate": 4.044592343662146e-06, "logits/chosen": -1.8209116458892822, "logits/rejected": -1.3669567108154297, "logps/chosen": -528.753662109375, "logps/rejected": -763.0119018554688, "loss": 0.6753, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.35965970158576965, "rewards/margins": 0.23949465155601501, "rewards/rejected": -0.5991543531417847, "step": 4870 }, { "epoch": 0.36, "learning_rate": 4.039525648097484e-06, "logits/chosen": -1.9816710948944092, "logits/rejected": -1.2347781658172607, "logps/chosen": -467.7369079589844, "logps/rejected": -663.6854858398438, "loss": 0.674, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3242044746875763, "rewards/margins": 0.24665260314941406, "rewards/rejected": -0.570857048034668, "step": 4880 }, { "epoch": 0.36, "learning_rate": 4.034448745592403e-06, "logits/chosen": -2.0187880992889404, "logits/rejected": -1.4850327968597412, "logps/chosen": -471.1129455566406, "logps/rejected": -677.1145629882812, "loss": 0.6768, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29610925912857056, "rewards/margins": 0.23615007102489471, "rewards/rejected": -0.5322593450546265, "step": 4890 }, { "epoch": 0.36, "learning_rate": 4.029361669806386e-06, "logits/chosen": -2.112351894378662, "logits/rejected": -1.4720081090927124, "logps/chosen": -506.339111328125, "logps/rejected": -668.1380615234375, "loss": 0.6752, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3383534252643585, "rewards/margins": 0.21086983382701874, "rewards/rejected": -0.5492231845855713, "step": 4900 }, { "epoch": 0.36, "learning_rate": 4.024264454466369e-06, "logits/chosen": -1.8996219635009766, "logits/rejected": -1.3876419067382812, "logps/chosen": -616.63427734375, "logps/rejected": -805.2681884765625, "loss": 0.6749, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.41991597414016724, "rewards/margins": 0.21854552626609802, "rewards/rejected": -0.6384615302085876, "step": 4910 }, { "epoch": 0.36, "learning_rate": 4.019157133366509e-06, "logits/chosen": -1.57528817653656, "logits/rejected": -1.2250330448150635, "logps/chosen": -571.7840576171875, "logps/rejected": -758.1402587890625, "loss": 0.6772, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4568096995353699, "rewards/margins": 0.19477088749408722, "rewards/rejected": -0.6515806913375854, "step": 4920 }, { "epoch": 0.36, "learning_rate": 4.0140397403679644e-06, "logits/chosen": -1.839935302734375, "logits/rejected": -1.3248803615570068, "logps/chosen": -480.886962890625, "logps/rejected": -725.005615234375, "loss": 0.6728, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.33016037940979004, "rewards/margins": 0.2378970831632614, "rewards/rejected": -0.5680574178695679, "step": 4930 }, { "epoch": 0.36, "learning_rate": 4.00891230939867e-06, "logits/chosen": -1.9129787683486938, "logits/rejected": -1.5412992238998413, "logps/chosen": -533.988037109375, "logps/rejected": -656.42431640625, "loss": 0.6783, "rewards/accuracies": 0.75, "rewards/chosen": -0.42721542716026306, "rewards/margins": 0.13742132484912872, "rewards/rejected": -0.5646368265151978, "step": 4940 }, { "epoch": 0.37, "learning_rate": 4.003774874453112e-06, "logits/chosen": -1.988730788230896, "logits/rejected": -1.5233473777770996, "logps/chosen": -470.82867431640625, "logps/rejected": -614.6431884765625, "loss": 0.6794, "rewards/accuracies": 0.75, "rewards/chosen": -0.29716524481773376, "rewards/margins": 0.1785602569580078, "rewards/rejected": -0.4757255017757416, "step": 4950 }, { "epoch": 0.37, "learning_rate": 3.998627469592101e-06, "logits/chosen": -2.0494918823242188, "logits/rejected": -1.3190973997116089, "logps/chosen": -541.6898193359375, "logps/rejected": -657.6224365234375, "loss": 0.684, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3161621689796448, "rewards/margins": 0.19704173505306244, "rewards/rejected": -0.5132039189338684, "step": 4960 }, { "epoch": 0.37, "learning_rate": 3.993470128942548e-06, "logits/chosen": -1.9289041757583618, "logits/rejected": -1.3669551610946655, "logps/chosen": -512.722900390625, "logps/rejected": -741.9830932617188, "loss": 0.6736, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3445243239402771, "rewards/margins": 0.26967889070510864, "rewards/rejected": -0.6142032146453857, "step": 4970 }, { "epoch": 0.37, "learning_rate": 3.988302886697237e-06, "logits/chosen": -1.8434703350067139, "logits/rejected": -1.4704275131225586, "logps/chosen": -603.2755126953125, "logps/rejected": -750.6194458007812, "loss": 0.682, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39918914437294006, "rewards/margins": 0.17875897884368896, "rewards/rejected": -0.5779481530189514, "step": 4980 }, { "epoch": 0.37, "learning_rate": 3.9831257771146e-06, "logits/chosen": -1.9151567220687866, "logits/rejected": -1.2869164943695068, "logps/chosen": -456.97589111328125, "logps/rejected": -663.5799560546875, "loss": 0.6702, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2991429567337036, "rewards/margins": 0.23827394843101501, "rewards/rejected": -0.5374168753623962, "step": 4990 }, { "epoch": 0.37, "learning_rate": 3.977938834518489e-06, "logits/chosen": -2.095998764038086, "logits/rejected": -1.6758161783218384, "logps/chosen": -348.2960205078125, "logps/rejected": -537.6554565429688, "loss": 0.6729, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.229129359126091, "rewards/margins": 0.21937792003154755, "rewards/rejected": -0.44850724935531616, "step": 5000 }, { "epoch": 0.37, "learning_rate": 3.972742093297946e-06, "logits/chosen": -1.897723913192749, "logits/rejected": -1.4024503231048584, "logps/chosen": -485.85150146484375, "logps/rejected": -659.1302490234375, "loss": 0.6761, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3235228657722473, "rewards/margins": 0.1881713718175888, "rewards/rejected": -0.5116941928863525, "step": 5010 }, { "epoch": 0.37, "learning_rate": 3.967535587906978e-06, "logits/chosen": -2.0150468349456787, "logits/rejected": -1.6181495189666748, "logps/chosen": -635.1000366210938, "logps/rejected": -793.458251953125, "loss": 0.6816, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4753701090812683, "rewards/margins": 0.16953836381435394, "rewards/rejected": -0.6449085474014282, "step": 5020 }, { "epoch": 0.37, "learning_rate": 3.962319352864328e-06, "logits/chosen": -1.995041847229004, "logits/rejected": -1.5308204889297485, "logps/chosen": -570.77978515625, "logps/rejected": -739.5093994140625, "loss": 0.6769, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.390498548746109, "rewards/margins": 0.22013859450817108, "rewards/rejected": -0.6106370687484741, "step": 5030 }, { "epoch": 0.37, "learning_rate": 3.957093422753248e-06, "logits/chosen": -1.9971033334732056, "logits/rejected": -1.3590974807739258, "logps/chosen": -679.1117553710938, "logps/rejected": -826.3522338867188, "loss": 0.6795, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.46068277955055237, "rewards/margins": 0.2214738130569458, "rewards/rejected": -0.682156503200531, "step": 5040 }, { "epoch": 0.37, "learning_rate": 3.9518578322212625e-06, "logits/chosen": -1.6963460445404053, "logits/rejected": -1.2052608728408813, "logps/chosen": -655.2673950195312, "logps/rejected": -784.6058349609375, "loss": 0.6801, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.49761563539505005, "rewards/margins": 0.18595071136951447, "rewards/rejected": -0.6835662722587585, "step": 5050 }, { "epoch": 0.37, "learning_rate": 3.94661261597995e-06, "logits/chosen": -1.765234351158142, "logits/rejected": -1.2367937564849854, "logps/chosen": -617.15283203125, "logps/rejected": -773.7288818359375, "loss": 0.6737, "rewards/accuracies": 0.75, "rewards/chosen": -0.43606123328208923, "rewards/margins": 0.2195170819759369, "rewards/rejected": -0.6555783152580261, "step": 5060 }, { "epoch": 0.37, "learning_rate": 3.941357808804701e-06, "logits/chosen": -1.9909751415252686, "logits/rejected": -1.7002605199813843, "logps/chosen": -803.8104858398438, "logps/rejected": -805.2496337890625, "loss": 0.6873, "rewards/accuracies": 0.625, "rewards/chosen": -0.5553185343742371, "rewards/margins": 0.0815151035785675, "rewards/rejected": -0.636833667755127, "step": 5070 }, { "epoch": 0.37, "learning_rate": 3.936093445534498e-06, "logits/chosen": -1.7278411388397217, "logits/rejected": -1.293587565422058, "logps/chosen": -653.553955078125, "logps/rejected": -775.5648193359375, "loss": 0.6791, "rewards/accuracies": 0.625, "rewards/chosen": -0.4742676615715027, "rewards/margins": 0.17079885303974152, "rewards/rejected": -0.6450665593147278, "step": 5080 }, { "epoch": 0.38, "learning_rate": 3.930819561071676e-06, "logits/chosen": -1.8814693689346313, "logits/rejected": -1.368492841720581, "logps/chosen": -649.2463989257812, "logps/rejected": -757.3802490234375, "loss": 0.6837, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.49071040749549866, "rewards/margins": 0.13578274846076965, "rewards/rejected": -0.6264931559562683, "step": 5090 }, { "epoch": 0.38, "learning_rate": 3.925536190381697e-06, "logits/chosen": -1.8298383951187134, "logits/rejected": -1.3574283123016357, "logps/chosen": -552.0990600585938, "logps/rejected": -715.2420043945312, "loss": 0.6795, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4386724829673767, "rewards/margins": 0.17943327128887177, "rewards/rejected": -0.6181057691574097, "step": 5100 }, { "epoch": 0.38, "learning_rate": 3.9202433684929155e-06, "logits/chosen": -1.8973544836044312, "logits/rejected": -1.3762882947921753, "logps/chosen": -536.1806640625, "logps/rejected": -644.5281372070312, "loss": 0.6774, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39327841997146606, "rewards/margins": 0.15364022552967072, "rewards/rejected": -0.5469185709953308, "step": 5110 }, { "epoch": 0.38, "learning_rate": 3.9149411304963455e-06, "logits/chosen": -1.9467798471450806, "logits/rejected": -1.379688024520874, "logps/chosen": -499.286376953125, "logps/rejected": -697.4122924804688, "loss": 0.6762, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3789582848548889, "rewards/margins": 0.21642474830150604, "rewards/rejected": -0.5953829884529114, "step": 5120 }, { "epoch": 0.38, "learning_rate": 3.909629511545431e-06, "logits/chosen": -2.0218164920806885, "logits/rejected": -1.5280587673187256, "logps/chosen": -586.6898803710938, "logps/rejected": -772.5906982421875, "loss": 0.6807, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4047798216342926, "rewards/margins": 0.20194454491138458, "rewards/rejected": -0.6067243814468384, "step": 5130 }, { "epoch": 0.38, "learning_rate": 3.90430854685581e-06, "logits/chosen": -1.545405387878418, "logits/rejected": -1.3916435241699219, "logps/chosen": -624.6387939453125, "logps/rejected": -712.9276733398438, "loss": 0.6863, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4878202974796295, "rewards/margins": 0.08731056749820709, "rewards/rejected": -0.5751308798789978, "step": 5140 }, { "epoch": 0.38, "learning_rate": 3.898978271705083e-06, "logits/chosen": -1.94792902469635, "logits/rejected": -1.4921214580535889, "logps/chosen": -636.5615844726562, "logps/rejected": -774.0682373046875, "loss": 0.6781, "rewards/accuracies": 0.75, "rewards/chosen": -0.48876386880874634, "rewards/margins": 0.18174239993095398, "rewards/rejected": -0.6705062389373779, "step": 5150 }, { "epoch": 0.38, "learning_rate": 3.893638721432575e-06, "logits/chosen": -2.199303150177002, "logits/rejected": -1.7656714916229248, "logps/chosen": -582.6051635742188, "logps/rejected": -735.4972534179688, "loss": 0.6761, "rewards/accuracies": 0.75, "rewards/chosen": -0.3827211856842041, "rewards/margins": 0.1969446837902069, "rewards/rejected": -0.5796658396720886, "step": 5160 }, { "epoch": 0.38, "learning_rate": 3.888289931439109e-06, "logits/chosen": -1.9435408115386963, "logits/rejected": -1.351283311843872, "logps/chosen": -571.268310546875, "logps/rejected": -737.5789184570312, "loss": 0.6794, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4263758063316345, "rewards/margins": 0.20954544842243195, "rewards/rejected": -0.6359211802482605, "step": 5170 }, { "epoch": 0.38, "learning_rate": 3.882931937186765e-06, "logits/chosen": -1.908963918685913, "logits/rejected": -1.4402263164520264, "logps/chosen": -461.6094665527344, "logps/rejected": -623.0940551757812, "loss": 0.6754, "rewards/accuracies": 0.625, "rewards/chosen": -0.3544270098209381, "rewards/margins": 0.1885715276002884, "rewards/rejected": -0.5429984927177429, "step": 5180 }, { "epoch": 0.38, "learning_rate": 3.877564774198643e-06, "logits/chosen": -2.1711795330047607, "logits/rejected": -1.8117748498916626, "logps/chosen": -452.7587890625, "logps/rejected": -616.4591064453125, "loss": 0.6807, "rewards/accuracies": 0.75, "rewards/chosen": -0.2760298550128937, "rewards/margins": 0.18230316042900085, "rewards/rejected": -0.45833301544189453, "step": 5190 }, { "epoch": 0.38, "learning_rate": 3.872188478058636e-06, "logits/chosen": -1.6999309062957764, "logits/rejected": -1.148488163948059, "logps/chosen": -419.72686767578125, "logps/rejected": -626.77099609375, "loss": 0.6735, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.28981342911720276, "rewards/margins": 0.25281697511672974, "rewards/rejected": -0.5426303744316101, "step": 5200 }, { "epoch": 0.38, "learning_rate": 3.866803084411187e-06, "logits/chosen": -1.7029212713241577, "logits/rejected": -1.2781116962432861, "logps/chosen": -494.169677734375, "logps/rejected": -681.4754028320312, "loss": 0.6811, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3584573268890381, "rewards/margins": 0.22750861942768097, "rewards/rejected": -0.5859659910202026, "step": 5210 }, { "epoch": 0.39, "learning_rate": 3.861408628961055e-06, "logits/chosen": -1.9407007694244385, "logits/rejected": -1.3569246530532837, "logps/chosen": -570.0714111328125, "logps/rejected": -686.2063598632812, "loss": 0.6803, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.37952226400375366, "rewards/margins": 0.17616084218025208, "rewards/rejected": -0.5556830763816833, "step": 5220 }, { "epoch": 0.39, "learning_rate": 3.856005147473077e-06, "logits/chosen": -1.9142601490020752, "logits/rejected": -1.333569884300232, "logps/chosen": -572.1123046875, "logps/rejected": -668.0932006835938, "loss": 0.678, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.38509267568588257, "rewards/margins": 0.17245812714099884, "rewards/rejected": -0.5575507879257202, "step": 5230 }, { "epoch": 0.39, "learning_rate": 3.850592675771935e-06, "logits/chosen": -1.919061303138733, "logits/rejected": -1.5386958122253418, "logps/chosen": -561.583740234375, "logps/rejected": -699.3116455078125, "loss": 0.678, "rewards/accuracies": 0.75, "rewards/chosen": -0.39986008405685425, "rewards/margins": 0.17530061304569244, "rewards/rejected": -0.5751606822013855, "step": 5240 }, { "epoch": 0.39, "learning_rate": 3.8451712497419105e-06, "logits/chosen": -1.8138999938964844, "logits/rejected": -1.5926733016967773, "logps/chosen": -495.78460693359375, "logps/rejected": -655.5927734375, "loss": 0.681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36154884099960327, "rewards/margins": 0.15091244876384735, "rewards/rejected": -0.5124613046646118, "step": 5250 }, { "epoch": 0.39, "learning_rate": 3.839740905326657e-06, "logits/chosen": -1.9515451192855835, "logits/rejected": -1.374030351638794, "logps/chosen": -552.1556396484375, "logps/rejected": -717.283935546875, "loss": 0.6744, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3412502408027649, "rewards/margins": 0.22766384482383728, "rewards/rejected": -0.5689140558242798, "step": 5260 }, { "epoch": 0.39, "learning_rate": 3.834301678528952e-06, "logits/chosen": -1.8944038152694702, "logits/rejected": -1.5569617748260498, "logps/chosen": -511.8218688964844, "logps/rejected": -724.9166259765625, "loss": 0.6821, "rewards/accuracies": 0.75, "rewards/chosen": -0.3336299955844879, "rewards/margins": 0.23159928619861603, "rewards/rejected": -0.5652292370796204, "step": 5270 }, { "epoch": 0.39, "learning_rate": 3.8288536054104654e-06, "logits/chosen": -1.980128526687622, "logits/rejected": -1.4666037559509277, "logps/chosen": -559.1751098632812, "logps/rejected": -657.6528930664062, "loss": 0.6866, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3752138912677765, "rewards/margins": 0.17921356856822968, "rewards/rejected": -0.5544275045394897, "step": 5280 }, { "epoch": 0.39, "learning_rate": 3.8233967220915146e-06, "logits/chosen": -1.8810808658599854, "logits/rejected": -1.6108601093292236, "logps/chosen": -533.4832763671875, "logps/rejected": -658.1475830078125, "loss": 0.6822, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33501407504081726, "rewards/margins": 0.1479581892490387, "rewards/rejected": -0.48297229409217834, "step": 5290 }, { "epoch": 0.39, "learning_rate": 3.8179310647508315e-06, "logits/chosen": -2.071546792984009, "logits/rejected": -1.5042400360107422, "logps/chosen": -466.07867431640625, "logps/rejected": -648.6842651367188, "loss": 0.6753, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3079812824726105, "rewards/margins": 0.1982308328151703, "rewards/rejected": -0.5062121152877808, "step": 5300 }, { "epoch": 0.39, "learning_rate": 3.812456669625317e-06, "logits/chosen": -1.7099567651748657, "logits/rejected": -1.2119382619857788, "logps/chosen": -637.4942626953125, "logps/rejected": -745.9791259765625, "loss": 0.6806, "rewards/accuracies": 0.75, "rewards/chosen": -0.48223447799682617, "rewards/margins": 0.16910608112812042, "rewards/rejected": -0.6513405442237854, "step": 5310 }, { "epoch": 0.39, "learning_rate": 3.806973573009802e-06, "logits/chosen": -2.0418596267700195, "logits/rejected": -1.4683291912078857, "logps/chosen": -524.8373413085938, "logps/rejected": -706.5059814453125, "loss": 0.6761, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35283151268959045, "rewards/margins": 0.1932392716407776, "rewards/rejected": -0.5460707545280457, "step": 5320 }, { "epoch": 0.39, "learning_rate": 3.8014818112568108e-06, "logits/chosen": -1.9605640172958374, "logits/rejected": -1.628109335899353, "logps/chosen": -478.4222717285156, "logps/rejected": -674.2198486328125, "loss": 0.6795, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3183402419090271, "rewards/margins": 0.2069808691740036, "rewards/rejected": -0.5253211259841919, "step": 5330 }, { "epoch": 0.39, "learning_rate": 3.7959814207763134e-06, "logits/chosen": -1.825298547744751, "logits/rejected": -1.4194700717926025, "logps/chosen": -509.215576171875, "logps/rejected": -717.880126953125, "loss": 0.6785, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3894636034965515, "rewards/margins": 0.20086956024169922, "rewards/rejected": -0.590333104133606, "step": 5340 }, { "epoch": 0.39, "learning_rate": 3.7904724380354883e-06, "logits/chosen": -1.8285179138183594, "logits/rejected": -1.5054690837860107, "logps/chosen": -575.2680053710938, "logps/rejected": -720.6822509765625, "loss": 0.6798, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4419225752353668, "rewards/margins": 0.16541841626167297, "rewards/rejected": -0.607340931892395, "step": 5350 }, { "epoch": 0.4, "learning_rate": 3.784954899558484e-06, "logits/chosen": -1.7046735286712646, "logits/rejected": -1.3077155351638794, "logps/chosen": -581.4420166015625, "logps/rejected": -766.2059326171875, "loss": 0.6746, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.44429105520248413, "rewards/margins": 0.22474519908428192, "rewards/rejected": -0.6690362691879272, "step": 5360 }, { "epoch": 0.4, "learning_rate": 3.779428841926166e-06, "logits/chosen": -1.7986637353897095, "logits/rejected": -1.3688108921051025, "logps/chosen": -673.7161865234375, "logps/rejected": -785.14501953125, "loss": 0.684, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5059278011322021, "rewards/margins": 0.17609229683876038, "rewards/rejected": -0.6820201873779297, "step": 5370 }, { "epoch": 0.4, "learning_rate": 3.773894301775889e-06, "logits/chosen": -1.745784044265747, "logits/rejected": -1.301134467124939, "logps/chosen": -594.2935791015625, "logps/rejected": -796.8236694335938, "loss": 0.6768, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4285285472869873, "rewards/margins": 0.21760055422782898, "rewards/rejected": -0.6461290121078491, "step": 5380 }, { "epoch": 0.4, "learning_rate": 3.7683513158012376e-06, "logits/chosen": -1.725097417831421, "logits/rejected": -1.1264840364456177, "logps/chosen": -585.9773559570312, "logps/rejected": -817.1766967773438, "loss": 0.6702, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.42210355401039124, "rewards/margins": 0.25937971472740173, "rewards/rejected": -0.6814833283424377, "step": 5390 }, { "epoch": 0.4, "learning_rate": 3.7627999207518005e-06, "logits/chosen": -1.7913734912872314, "logits/rejected": -1.3546490669250488, "logps/chosen": -581.4058837890625, "logps/rejected": -711.45654296875, "loss": 0.6818, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4115463197231293, "rewards/margins": 0.17609922587871552, "rewards/rejected": -0.5876455903053284, "step": 5400 }, { "epoch": 0.4, "learning_rate": 3.7572401534329106e-06, "logits/chosen": -1.719708800315857, "logits/rejected": -1.224473237991333, "logps/chosen": -592.3936767578125, "logps/rejected": -778.5916748046875, "loss": 0.6775, "rewards/accuracies": 0.75, "rewards/chosen": -0.4154103696346283, "rewards/margins": 0.25672197341918945, "rewards/rejected": -0.6721323728561401, "step": 5410 }, { "epoch": 0.4, "learning_rate": 3.751672050705412e-06, "logits/chosen": -1.713848352432251, "logits/rejected": -1.3757610321044922, "logps/chosen": -746.0662841796875, "logps/rejected": -922.0105590820312, "loss": 0.6822, "rewards/accuracies": 0.75, "rewards/chosen": -0.6204447746276855, "rewards/margins": 0.17174571752548218, "rewards/rejected": -0.7921904921531677, "step": 5420 }, { "epoch": 0.4, "learning_rate": 3.7460956494854124e-06, "logits/chosen": -1.7947765588760376, "logits/rejected": -1.2975655794143677, "logps/chosen": -619.537841796875, "logps/rejected": -779.485107421875, "loss": 0.6809, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.47192010283470154, "rewards/margins": 0.18998178839683533, "rewards/rejected": -0.6619018912315369, "step": 5430 }, { "epoch": 0.4, "learning_rate": 3.7405109867440344e-06, "logits/chosen": -1.7583087682724, "logits/rejected": -1.2717887163162231, "logps/chosen": -621.03955078125, "logps/rejected": -778.7965087890625, "loss": 0.6772, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4915132522583008, "rewards/margins": 0.18711350858211517, "rewards/rejected": -0.6786267161369324, "step": 5440 }, { "epoch": 0.4, "learning_rate": 3.734918099507179e-06, "logits/chosen": -1.5830639600753784, "logits/rejected": -1.2832969427108765, "logps/chosen": -580.9663696289062, "logps/rejected": -791.1033935546875, "loss": 0.6739, "rewards/accuracies": 0.75, "rewards/chosen": -0.44469746947288513, "rewards/margins": 0.2442280799150467, "rewards/rejected": -0.688925564289093, "step": 5450 }, { "epoch": 0.4, "learning_rate": 3.729317024855269e-06, "logits/chosen": -1.5733113288879395, "logits/rejected": -1.2088617086410522, "logps/chosen": -675.0768432617188, "logps/rejected": -788.5995483398438, "loss": 0.6823, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5117225050926208, "rewards/margins": 0.17262235283851624, "rewards/rejected": -0.6843448281288147, "step": 5460 }, { "epoch": 0.4, "learning_rate": 3.723707799923015e-06, "logits/chosen": -1.6148990392684937, "logits/rejected": -1.2346653938293457, "logps/chosen": -502.8179626464844, "logps/rejected": -696.62109375, "loss": 0.6737, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36271798610687256, "rewards/margins": 0.21073195338249207, "rewards/rejected": -0.573449969291687, "step": 5470 }, { "epoch": 0.4, "learning_rate": 3.7180904618991586e-06, "logits/chosen": -2.2178988456726074, "logits/rejected": -1.6155487298965454, "logps/chosen": -408.31036376953125, "logps/rejected": -507.31134033203125, "loss": 0.685, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24168117344379425, "rewards/margins": 0.1609528362751007, "rewards/rejected": -0.40263399481773376, "step": 5480 }, { "epoch": 0.4, "learning_rate": 3.7124650480262346e-06, "logits/chosen": -1.79499089717865, "logits/rejected": -1.4585219621658325, "logps/chosen": -432.59033203125, "logps/rejected": -608.8306884765625, "loss": 0.6737, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2909473478794098, "rewards/margins": 0.18004021048545837, "rewards/rejected": -0.47098755836486816, "step": 5490 }, { "epoch": 0.41, "learning_rate": 3.706831595600317e-06, "logits/chosen": -1.7598508596420288, "logits/rejected": -1.2342584133148193, "logps/chosen": -457.2286682128906, "logps/rejected": -625.4468383789062, "loss": 0.6717, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.303249716758728, "rewards/margins": 0.22508123517036438, "rewards/rejected": -0.52833092212677, "step": 5500 }, { "epoch": 0.41, "learning_rate": 3.7011901419707764e-06, "logits/chosen": -1.929256796836853, "logits/rejected": -1.290540099143982, "logps/chosen": -545.3565673828125, "logps/rejected": -742.4136352539062, "loss": 0.6785, "rewards/accuracies": 0.75, "rewards/chosen": -0.34427350759506226, "rewards/margins": 0.2619650363922119, "rewards/rejected": -0.6062385439872742, "step": 5510 }, { "epoch": 0.41, "learning_rate": 3.6955407245400288e-06, "logits/chosen": -1.8804881572723389, "logits/rejected": -1.2537413835525513, "logps/chosen": -593.5989990234375, "logps/rejected": -814.696044921875, "loss": 0.6705, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4438878893852234, "rewards/margins": 0.26413658261299133, "rewards/rejected": -0.7080245018005371, "step": 5520 }, { "epoch": 0.41, "learning_rate": 3.6898833807632935e-06, "logits/chosen": -1.954289436340332, "logits/rejected": -1.4776830673217773, "logps/chosen": -535.5418701171875, "logps/rejected": -697.4637451171875, "loss": 0.6725, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.36493057012557983, "rewards/margins": 0.20617768168449402, "rewards/rejected": -0.571108341217041, "step": 5530 }, { "epoch": 0.41, "learning_rate": 3.684218148148337e-06, "logits/chosen": -1.8373931646347046, "logits/rejected": -1.3257968425750732, "logps/chosen": -480.42803955078125, "logps/rejected": -671.13525390625, "loss": 0.6757, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.33289265632629395, "rewards/margins": 0.23646894097328186, "rewards/rejected": -0.5693615674972534, "step": 5540 }, { "epoch": 0.41, "learning_rate": 3.6785450642552305e-06, "logits/chosen": -1.9077049493789673, "logits/rejected": -1.537473440170288, "logps/chosen": -454.708984375, "logps/rejected": -568.9837036132812, "loss": 0.6784, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30643463134765625, "rewards/margins": 0.15584644675254822, "rewards/rejected": -0.4622810482978821, "step": 5550 }, { "epoch": 0.41, "learning_rate": 3.6728641666960974e-06, "logits/chosen": -1.8435380458831787, "logits/rejected": -1.6784700155258179, "logps/chosen": -560.2164306640625, "logps/rejected": -676.8204956054688, "loss": 0.6807, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40253862738609314, "rewards/margins": 0.1185903400182724, "rewards/rejected": -0.5211289525032043, "step": 5560 }, { "epoch": 0.41, "learning_rate": 3.6671754931348647e-06, "logits/chosen": -1.9285767078399658, "logits/rejected": -1.5664182901382446, "logps/chosen": -497.45611572265625, "logps/rejected": -680.0555419921875, "loss": 0.675, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.32361000776290894, "rewards/margins": 0.19288155436515808, "rewards/rejected": -0.5164915323257446, "step": 5570 }, { "epoch": 0.41, "learning_rate": 3.6614790812870165e-06, "logits/chosen": -2.0306754112243652, "logits/rejected": -1.613465666770935, "logps/chosen": -503.21173095703125, "logps/rejected": -704.9832763671875, "loss": 0.6707, "rewards/accuracies": 0.875, "rewards/chosen": -0.3152351975440979, "rewards/margins": 0.22702518105506897, "rewards/rejected": -0.5422604084014893, "step": 5580 }, { "epoch": 0.41, "learning_rate": 3.655774968919338e-06, "logits/chosen": -1.8938430547714233, "logits/rejected": -1.575495958328247, "logps/chosen": -531.0654907226562, "logps/rejected": -667.6689453125, "loss": 0.6787, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.35942959785461426, "rewards/margins": 0.13956838846206665, "rewards/rejected": -0.4989979863166809, "step": 5590 }, { "epoch": 0.41, "learning_rate": 3.6500631938496695e-06, "logits/chosen": -1.9418296813964844, "logits/rejected": -1.3265209197998047, "logps/chosen": -578.5054931640625, "logps/rejected": -704.0382690429688, "loss": 0.6829, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36412233114242554, "rewards/margins": 0.21158304810523987, "rewards/rejected": -0.5757054090499878, "step": 5600 }, { "epoch": 0.41, "learning_rate": 3.644343793946654e-06, "logits/chosen": -1.8872569799423218, "logits/rejected": -1.435028314590454, "logps/chosen": -410.61328125, "logps/rejected": -543.6466674804688, "loss": 0.6814, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2763649523258209, "rewards/margins": 0.18102385103702545, "rewards/rejected": -0.4573887884616852, "step": 5610 }, { "epoch": 0.41, "learning_rate": 3.638616807129488e-06, "logits/chosen": -1.9789842367172241, "logits/rejected": -1.5793168544769287, "logps/chosen": -543.41748046875, "logps/rejected": -615.7886962890625, "loss": 0.6797, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.34264183044433594, "rewards/margins": 0.12056410312652588, "rewards/rejected": -0.46320590376853943, "step": 5620 }, { "epoch": 0.42, "learning_rate": 3.632882271367668e-06, "logits/chosen": -1.9067745208740234, "logits/rejected": -1.5452373027801514, "logps/chosen": -565.1576538085938, "logps/rejected": -748.1326904296875, "loss": 0.6846, "rewards/accuracies": 0.75, "rewards/chosen": -0.4012826383113861, "rewards/margins": 0.1826046258211136, "rewards/rejected": -0.5838872194290161, "step": 5630 }, { "epoch": 0.42, "learning_rate": 3.627140224680738e-06, "logits/chosen": -2.0896670818328857, "logits/rejected": -1.7191846370697021, "logps/chosen": -512.2816772460938, "logps/rejected": -645.0487060546875, "loss": 0.6803, "rewards/accuracies": 0.625, "rewards/chosen": -0.35159462690353394, "rewards/margins": 0.15365605056285858, "rewards/rejected": -0.5052506923675537, "step": 5640 }, { "epoch": 0.42, "learning_rate": 3.6213907051380403e-06, "logits/chosen": -1.8001430034637451, "logits/rejected": -1.382861852645874, "logps/chosen": -711.9121704101562, "logps/rejected": -835.4837036132812, "loss": 0.68, "rewards/accuracies": 0.75, "rewards/chosen": -0.5299803614616394, "rewards/margins": 0.18132202327251434, "rewards/rejected": -0.7113023400306702, "step": 5650 }, { "epoch": 0.42, "learning_rate": 3.6156337508584615e-06, "logits/chosen": -1.8015989065170288, "logits/rejected": -1.2274807691574097, "logps/chosen": -714.375, "logps/rejected": -844.1629028320312, "loss": 0.6744, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5186136960983276, "rewards/margins": 0.225899338722229, "rewards/rejected": -0.7445129752159119, "step": 5660 }, { "epoch": 0.42, "learning_rate": 3.6098694000101795e-06, "logits/chosen": -1.7994234561920166, "logits/rejected": -1.504652738571167, "logps/chosen": -621.9471435546875, "logps/rejected": -803.0714111328125, "loss": 0.6828, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.464768648147583, "rewards/margins": 0.19093814492225647, "rewards/rejected": -0.6557067632675171, "step": 5670 }, { "epoch": 0.42, "learning_rate": 3.6040976908104126e-06, "logits/chosen": -1.7419275045394897, "logits/rejected": -1.2802011966705322, "logps/chosen": -577.388916015625, "logps/rejected": -831.1282958984375, "loss": 0.6657, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.43806084990501404, "rewards/margins": 0.2702586054801941, "rewards/rejected": -0.7083195447921753, "step": 5680 }, { "epoch": 0.42, "learning_rate": 3.5983186615251614e-06, "logits/chosen": -1.6155312061309814, "logits/rejected": -1.1119544506072998, "logps/chosen": -591.8400268554688, "logps/rejected": -760.8656005859375, "loss": 0.6766, "rewards/accuracies": 0.875, "rewards/chosen": -0.410536527633667, "rewards/margins": 0.22862020134925842, "rewards/rejected": -0.6391566395759583, "step": 5690 }, { "epoch": 0.42, "learning_rate": 3.5925323504689604e-06, "logits/chosen": -2.1118829250335693, "logits/rejected": -1.762668251991272, "logps/chosen": -488.0009765625, "logps/rejected": -709.5648803710938, "loss": 0.6755, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3371810019016266, "rewards/margins": 0.22501221299171448, "rewards/rejected": -0.5621932744979858, "step": 5700 }, { "epoch": 0.42, "learning_rate": 3.5867387960046206e-06, "logits/chosen": -1.8872162103652954, "logits/rejected": -1.6411778926849365, "logps/chosen": -517.1973876953125, "logps/rejected": -634.1405029296875, "loss": 0.6802, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34225931763648987, "rewards/margins": 0.1368332803249359, "rewards/rejected": -0.479092538356781, "step": 5710 }, { "epoch": 0.42, "learning_rate": 3.580938036542978e-06, "logits/chosen": -1.93905508518219, "logits/rejected": -1.4156228303909302, "logps/chosen": -532.43896484375, "logps/rejected": -737.7254638671875, "loss": 0.6765, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35869932174682617, "rewards/margins": 0.22421202063560486, "rewards/rejected": -0.5829113721847534, "step": 5720 }, { "epoch": 0.42, "learning_rate": 3.5751301105426356e-06, "logits/chosen": -1.7301609516143799, "logits/rejected": -1.2754340171813965, "logps/chosen": -497.2810974121094, "logps/rejected": -618.6177978515625, "loss": 0.6811, "rewards/accuracies": 0.75, "rewards/chosen": -0.33188238739967346, "rewards/margins": 0.18541887402534485, "rewards/rejected": -0.5173012614250183, "step": 5730 }, { "epoch": 0.42, "learning_rate": 3.569315056509709e-06, "logits/chosen": -1.8971866369247437, "logits/rejected": -1.4330049753189087, "logps/chosen": -523.22021484375, "logps/rejected": -673.4618530273438, "loss": 0.6827, "rewards/accuracies": 0.625, "rewards/chosen": -0.378618448972702, "rewards/margins": 0.17687904834747314, "rewards/rejected": -0.5554975271224976, "step": 5740 }, { "epoch": 0.42, "learning_rate": 3.563492912997575e-06, "logits/chosen": -1.682903528213501, "logits/rejected": -1.3699532747268677, "logps/chosen": -688.6732177734375, "logps/rejected": -798.0615844726562, "loss": 0.6801, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.49444547295570374, "rewards/margins": 0.15280747413635254, "rewards/rejected": -0.6472529768943787, "step": 5750 }, { "epoch": 0.42, "learning_rate": 3.5576637186066104e-06, "logits/chosen": -1.6560713052749634, "logits/rejected": -1.260980248451233, "logps/chosen": -542.1229248046875, "logps/rejected": -741.4356689453125, "loss": 0.6793, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3922831118106842, "rewards/margins": 0.21939516067504883, "rewards/rejected": -0.6116782426834106, "step": 5760 }, { "epoch": 0.43, "learning_rate": 3.5518275119839406e-06, "logits/chosen": -1.7888033390045166, "logits/rejected": -1.285245418548584, "logps/chosen": -586.8167114257812, "logps/rejected": -743.943115234375, "loss": 0.6765, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4176756739616394, "rewards/margins": 0.21544280648231506, "rewards/rejected": -0.6331185102462769, "step": 5770 }, { "epoch": 0.43, "learning_rate": 3.545984331823182e-06, "logits/chosen": -1.8413890600204468, "logits/rejected": -1.5659377574920654, "logps/chosen": -654.1286010742188, "logps/rejected": -744.6405029296875, "loss": 0.6855, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4634554386138916, "rewards/margins": 0.1670142561197281, "rewards/rejected": -0.6304696798324585, "step": 5780 }, { "epoch": 0.43, "learning_rate": 3.5401342168641818e-06, "logits/chosen": -1.8928686380386353, "logits/rejected": -1.5777755975723267, "logps/chosen": -617.6550903320312, "logps/rejected": -780.82470703125, "loss": 0.6803, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44212251901626587, "rewards/margins": 0.19342753291130066, "rewards/rejected": -0.6355500221252441, "step": 5790 }, { "epoch": 0.43, "learning_rate": 3.534277205892768e-06, "logits/chosen": -1.8842899799346924, "logits/rejected": -1.631584882736206, "logps/chosen": -580.9476928710938, "logps/rejected": -767.8767700195312, "loss": 0.6788, "rewards/accuracies": 0.75, "rewards/chosen": -0.4143300950527191, "rewards/margins": 0.1555207520723343, "rewards/rejected": -0.5698508024215698, "step": 5800 }, { "epoch": 0.43, "learning_rate": 3.528413337740487e-06, "logits/chosen": -1.9413467645645142, "logits/rejected": -1.4033992290496826, "logps/chosen": -451.65570068359375, "logps/rejected": -609.1845703125, "loss": 0.6756, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29640060663223267, "rewards/margins": 0.20045793056488037, "rewards/rejected": -0.49685853719711304, "step": 5810 }, { "epoch": 0.43, "learning_rate": 3.5225426512843485e-06, "logits/chosen": -2.1490492820739746, "logits/rejected": -1.446860909461975, "logps/chosen": -427.23468017578125, "logps/rejected": -575.6762084960938, "loss": 0.6778, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.25033336877822876, "rewards/margins": 0.18017560243606567, "rewards/rejected": -0.43050894141197205, "step": 5820 }, { "epoch": 0.43, "learning_rate": 3.516665185446566e-06, "logits/chosen": -2.1974592208862305, "logits/rejected": -1.7829307317733765, "logps/chosen": -525.4083251953125, "logps/rejected": -690.450439453125, "loss": 0.6809, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3421039581298828, "rewards/margins": 0.16327646374702454, "rewards/rejected": -0.5053804516792297, "step": 5830 }, { "epoch": 0.43, "learning_rate": 3.5107809791943005e-06, "logits/chosen": -1.7347261905670166, "logits/rejected": -1.3823744058609009, "logps/chosen": -596.024169921875, "logps/rejected": -746.5048217773438, "loss": 0.6797, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.42345771193504333, "rewards/margins": 0.1852390319108963, "rewards/rejected": -0.6086967587471008, "step": 5840 }, { "epoch": 0.43, "learning_rate": 3.5048900715394018e-06, "logits/chosen": -1.7218672037124634, "logits/rejected": -1.3597314357757568, "logps/chosen": -616.0253295898438, "logps/rejected": -741.6423950195312, "loss": 0.6846, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4344436526298523, "rewards/margins": 0.1495330035686493, "rewards/rejected": -0.583976686000824, "step": 5850 }, { "epoch": 0.43, "learning_rate": 3.4989925015381477e-06, "logits/chosen": -1.8112834692001343, "logits/rejected": -1.513550043106079, "logps/chosen": -498.361083984375, "logps/rejected": -648.869140625, "loss": 0.6727, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.362516850233078, "rewards/margins": 0.19956015050411224, "rewards/rejected": -0.562076985836029, "step": 5860 }, { "epoch": 0.43, "learning_rate": 3.49308830829099e-06, "logits/chosen": -1.9715280532836914, "logits/rejected": -1.4662864208221436, "logps/chosen": -515.6634521484375, "logps/rejected": -694.3631591796875, "loss": 0.6743, "rewards/accuracies": 0.75, "rewards/chosen": -0.353728711605072, "rewards/margins": 0.2157224714756012, "rewards/rejected": -0.5694511532783508, "step": 5870 }, { "epoch": 0.43, "learning_rate": 3.487177530942289e-06, "logits/chosen": -2.0648131370544434, "logits/rejected": -1.5437633991241455, "logps/chosen": -461.19647216796875, "logps/rejected": -655.4368286132812, "loss": 0.6786, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.29834818840026855, "rewards/margins": 0.20662498474121094, "rewards/rejected": -0.5049732327461243, "step": 5880 }, { "epoch": 0.43, "learning_rate": 3.481260208680059e-06, "logits/chosen": -1.8418750762939453, "logits/rejected": -1.1828391551971436, "logps/chosen": -357.86102294921875, "logps/rejected": -561.5368041992188, "loss": 0.6751, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1999930441379547, "rewards/margins": 0.2623118758201599, "rewards/rejected": -0.46230489015579224, "step": 5890 }, { "epoch": 0.44, "learning_rate": 3.475336380735706e-06, "logits/chosen": -2.143359422683716, "logits/rejected": -1.4537363052368164, "logps/chosen": -414.5679626464844, "logps/rejected": -547.591064453125, "loss": 0.6798, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23728516697883606, "rewards/margins": 0.19855627417564392, "rewards/rejected": -0.43584147095680237, "step": 5900 }, { "epoch": 0.44, "learning_rate": 3.4694060863837682e-06, "logits/chosen": -2.1082870960235596, "logits/rejected": -1.5252577066421509, "logps/chosen": -352.1706848144531, "logps/rejected": -500.5538024902344, "loss": 0.6786, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21728770434856415, "rewards/margins": 0.19074209034442902, "rewards/rejected": -0.4080297350883484, "step": 5910 }, { "epoch": 0.44, "learning_rate": 3.463469364941655e-06, "logits/chosen": -2.043893337249756, "logits/rejected": -1.340504765510559, "logps/chosen": -448.32763671875, "logps/rejected": -547.8965454101562, "loss": 0.6807, "rewards/accuracies": 0.75, "rewards/chosen": -0.2399560511112213, "rewards/margins": 0.18385353684425354, "rewards/rejected": -0.42380958795547485, "step": 5920 }, { "epoch": 0.44, "learning_rate": 3.457526255769389e-06, "logits/chosen": -2.104614734649658, "logits/rejected": -1.574414610862732, "logps/chosen": -338.11083984375, "logps/rejected": -465.2049255371094, "loss": 0.6825, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17692983150482178, "rewards/margins": 0.18049179017543793, "rewards/rejected": -0.3574216365814209, "step": 5930 }, { "epoch": 0.44, "learning_rate": 3.451576798269339e-06, "logits/chosen": -2.2431740760803223, "logits/rejected": -2.024341583251953, "logps/chosen": -397.91729736328125, "logps/rejected": -425.70001220703125, "loss": 0.6864, "rewards/accuracies": 0.625, "rewards/chosen": -0.20039162039756775, "rewards/margins": 0.06320742517709732, "rewards/rejected": -0.26359909772872925, "step": 5940 }, { "epoch": 0.44, "learning_rate": 3.4456210318859688e-06, "logits/chosen": -2.178363084793091, "logits/rejected": -1.8274650573730469, "logps/chosen": -307.314208984375, "logps/rejected": -450.89361572265625, "loss": 0.6844, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16386358439922333, "rewards/margins": 0.11779455095529556, "rewards/rejected": -0.2816581428050995, "step": 5950 }, { "epoch": 0.44, "learning_rate": 3.4396589961055634e-06, "logits/chosen": -2.172407388687134, "logits/rejected": -1.6285667419433594, "logps/chosen": -247.1322784423828, "logps/rejected": -340.6658020019531, "loss": 0.6789, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11887268722057343, "rewards/margins": 0.13714393973350525, "rewards/rejected": -0.2560166120529175, "step": 5960 }, { "epoch": 0.44, "learning_rate": 3.433690730455976e-06, "logits/chosen": -1.9339056015014648, "logits/rejected": -1.6449363231658936, "logps/chosen": -341.708984375, "logps/rejected": -478.9895935058594, "loss": 0.6787, "rewards/accuracies": 0.75, "rewards/chosen": -0.21085789799690247, "rewards/margins": 0.14248189330101013, "rewards/rejected": -0.353339821100235, "step": 5970 }, { "epoch": 0.44, "learning_rate": 3.4277162745063654e-06, "logits/chosen": -2.0447239875793457, "logits/rejected": -1.7199071645736694, "logps/chosen": -363.8100280761719, "logps/rejected": -481.35906982421875, "loss": 0.6838, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22310471534729004, "rewards/margins": 0.1352863907814026, "rewards/rejected": -0.358391135931015, "step": 5980 }, { "epoch": 0.44, "learning_rate": 3.4217356678669277e-06, "logits/chosen": -1.9908145666122437, "logits/rejected": -1.7329076528549194, "logps/chosen": -375.48419189453125, "logps/rejected": -474.8768005371094, "loss": 0.681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21668967604637146, "rewards/margins": 0.11941239982843399, "rewards/rejected": -0.33610206842422485, "step": 5990 }, { "epoch": 0.44, "learning_rate": 3.415748950188641e-06, "logits/chosen": -1.9141336679458618, "logits/rejected": -1.5397241115570068, "logps/chosen": -386.39434814453125, "logps/rejected": -510.1167907714844, "loss": 0.6813, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24249887466430664, "rewards/margins": 0.1547732651233673, "rewards/rejected": -0.39727216958999634, "step": 6000 }, { "epoch": 0.44, "learning_rate": 3.4097561611629976e-06, "logits/chosen": -2.0348751544952393, "logits/rejected": -1.5871663093566895, "logps/chosen": -464.72955322265625, "logps/rejected": -630.9901123046875, "loss": 0.6806, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3269668519496918, "rewards/margins": 0.1913371980190277, "rewards/rejected": -0.5183040499687195, "step": 6010 }, { "epoch": 0.44, "learning_rate": 3.4037573405217416e-06, "logits/chosen": -1.805455207824707, "logits/rejected": -1.255211353302002, "logps/chosen": -520.4609375, "logps/rejected": -662.7445068359375, "loss": 0.6779, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3474521040916443, "rewards/margins": 0.18185222148895264, "rewards/rejected": -0.5293043851852417, "step": 6020 }, { "epoch": 0.44, "learning_rate": 3.3977525280366092e-06, "logits/chosen": -2.07625150680542, "logits/rejected": -1.4840152263641357, "logps/chosen": -446.2945251464844, "logps/rejected": -623.4526977539062, "loss": 0.6726, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27349764108657837, "rewards/margins": 0.212972491979599, "rewards/rejected": -0.48647013306617737, "step": 6030 }, { "epoch": 0.45, "learning_rate": 3.3917417635190576e-06, "logits/chosen": -2.1341192722320557, "logits/rejected": -1.5425755977630615, "logps/chosen": -502.3935546875, "logps/rejected": -662.6549072265625, "loss": 0.6746, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3024045526981354, "rewards/margins": 0.24127304553985596, "rewards/rejected": -0.543677568435669, "step": 6040 }, { "epoch": 0.45, "learning_rate": 3.38572508682001e-06, "logits/chosen": -1.9003467559814453, "logits/rejected": -1.5702028274536133, "logps/chosen": -474.44757080078125, "logps/rejected": -586.7430419921875, "loss": 0.6818, "rewards/accuracies": 0.75, "rewards/chosen": -0.3087007403373718, "rewards/margins": 0.11544058471918106, "rewards/rejected": -0.4241413176059723, "step": 6050 }, { "epoch": 0.45, "learning_rate": 3.3797025378295826e-06, "logits/chosen": -1.8500703573226929, "logits/rejected": -1.3663884401321411, "logps/chosen": -621.2401123046875, "logps/rejected": -708.9661865234375, "loss": 0.6817, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4593849182128906, "rewards/margins": 0.14063045382499695, "rewards/rejected": -0.6000152826309204, "step": 6060 }, { "epoch": 0.45, "learning_rate": 3.3736741564768294e-06, "logits/chosen": -1.9363479614257812, "logits/rejected": -1.37538743019104, "logps/chosen": -593.5001831054688, "logps/rejected": -774.9886474609375, "loss": 0.6725, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.40647315979003906, "rewards/margins": 0.22704851627349854, "rewards/rejected": -0.6335216760635376, "step": 6070 }, { "epoch": 0.45, "learning_rate": 3.367639982729469e-06, "logits/chosen": -1.810555100440979, "logits/rejected": -1.408254623413086, "logps/chosen": -538.084228515625, "logps/rejected": -711.6095581054688, "loss": 0.6774, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.42527008056640625, "rewards/margins": 0.1726972460746765, "rewards/rejected": -0.597967267036438, "step": 6080 }, { "epoch": 0.45, "learning_rate": 3.3616000565936235e-06, "logits/chosen": -1.8381048440933228, "logits/rejected": -1.5678539276123047, "logps/chosen": -523.8984375, "logps/rejected": -654.6629638671875, "loss": 0.6777, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.397448867559433, "rewards/margins": 0.15347933769226074, "rewards/rejected": -0.5509282350540161, "step": 6090 }, { "epoch": 0.45, "learning_rate": 3.3555544181135542e-06, "logits/chosen": -2.0426197052001953, "logits/rejected": -1.5259358882904053, "logps/chosen": -472.02679443359375, "logps/rejected": -594.1971435546875, "loss": 0.6778, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2880071997642517, "rewards/margins": 0.1637764871120453, "rewards/rejected": -0.4517836570739746, "step": 6100 }, { "epoch": 0.45, "learning_rate": 3.3495031073713937e-06, "logits/chosen": -2.029232978820801, "logits/rejected": -1.475311279296875, "logps/chosen": -516.3600463867188, "logps/rejected": -636.9332275390625, "loss": 0.68, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3377743363380432, "rewards/margins": 0.16453923285007477, "rewards/rejected": -0.5023135542869568, "step": 6110 }, { "epoch": 0.45, "learning_rate": 3.343446164486881e-06, "logits/chosen": -1.868058443069458, "logits/rejected": -1.5182057619094849, "logps/chosen": -444.9867248535156, "logps/rejected": -711.74072265625, "loss": 0.6768, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.31136995553970337, "rewards/margins": 0.253864586353302, "rewards/rejected": -0.5652346014976501, "step": 6120 }, { "epoch": 0.45, "learning_rate": 3.337383629617098e-06, "logits/chosen": -1.9999878406524658, "logits/rejected": -1.3931248188018799, "logps/chosen": -562.7313232421875, "logps/rejected": -740.0481567382812, "loss": 0.6775, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35319969058036804, "rewards/margins": 0.257065087556839, "rewards/rejected": -0.6102647185325623, "step": 6130 }, { "epoch": 0.45, "learning_rate": 3.331315542956198e-06, "logits/chosen": -1.904035210609436, "logits/rejected": -1.432300329208374, "logps/chosen": -579.0758666992188, "logps/rejected": -698.5496826171875, "loss": 0.68, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4150694012641907, "rewards/margins": 0.1647872030735016, "rewards/rejected": -0.5798565745353699, "step": 6140 }, { "epoch": 0.45, "learning_rate": 3.3252419447351455e-06, "logits/chosen": -1.8445056676864624, "logits/rejected": -1.1112511157989502, "logps/chosen": -500.0738220214844, "logps/rejected": -734.1705932617188, "loss": 0.6722, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3637721538543701, "rewards/margins": 0.2656688988208771, "rewards/rejected": -0.6294410228729248, "step": 6150 }, { "epoch": 0.45, "learning_rate": 3.3191628752214424e-06, "logits/chosen": -1.9894071817398071, "logits/rejected": -1.3852981328964233, "logps/chosen": -582.7052001953125, "logps/rejected": -778.2720947265625, "loss": 0.6734, "rewards/accuracies": 0.75, "rewards/chosen": -0.4115975797176361, "rewards/margins": 0.22903457283973694, "rewards/rejected": -0.6406320929527283, "step": 6160 }, { "epoch": 0.46, "learning_rate": 3.313078374718868e-06, "logits/chosen": -1.9242064952850342, "logits/rejected": -1.3370333909988403, "logps/chosen": -569.1353759765625, "logps/rejected": -759.8958740234375, "loss": 0.6767, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3857324421405792, "rewards/margins": 0.2288162261247635, "rewards/rejected": -0.6145486235618591, "step": 6170 }, { "epoch": 0.46, "learning_rate": 3.3069884835672085e-06, "logits/chosen": -1.8815162181854248, "logits/rejected": -1.5398274660110474, "logps/chosen": -519.205322265625, "logps/rejected": -677.8521728515625, "loss": 0.6807, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3750057518482208, "rewards/margins": 0.17220966517925262, "rewards/rejected": -0.547215461730957, "step": 6180 }, { "epoch": 0.46, "learning_rate": 3.3008932421419863e-06, "logits/chosen": -1.7199426889419556, "logits/rejected": -1.300282597541809, "logps/chosen": -561.9642944335938, "logps/rejected": -702.239013671875, "loss": 0.6778, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3873324990272522, "rewards/margins": 0.2017141580581665, "rewards/rejected": -0.5890466570854187, "step": 6190 }, { "epoch": 0.46, "learning_rate": 3.2947926908542015e-06, "logits/chosen": -2.0027642250061035, "logits/rejected": -1.4616081714630127, "logps/chosen": -534.6630859375, "logps/rejected": -679.27392578125, "loss": 0.6811, "rewards/accuracies": 0.75, "rewards/chosen": -0.341122567653656, "rewards/margins": 0.1993860900402069, "rewards/rejected": -0.5405086874961853, "step": 6200 }, { "epoch": 0.46, "learning_rate": 3.2886868701500523e-06, "logits/chosen": -1.9795745611190796, "logits/rejected": -1.6756694316864014, "logps/chosen": -553.5291748046875, "logps/rejected": -674.4440307617188, "loss": 0.6812, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3971198499202728, "rewards/margins": 0.1646588146686554, "rewards/rejected": -0.5617786645889282, "step": 6210 }, { "epoch": 0.46, "learning_rate": 3.2825758205106763e-06, "logits/chosen": -1.9561436176300049, "logits/rejected": -1.660048484802246, "logps/chosen": -552.8775634765625, "logps/rejected": -685.9751586914062, "loss": 0.6796, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.37929385900497437, "rewards/margins": 0.17874227464199066, "rewards/rejected": -0.5580361485481262, "step": 6220 }, { "epoch": 0.46, "learning_rate": 3.276459582451878e-06, "logits/chosen": -1.7387717962265015, "logits/rejected": -1.2801430225372314, "logps/chosen": -589.7346801757812, "logps/rejected": -723.9328002929688, "loss": 0.6812, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44899114966392517, "rewards/margins": 0.15639494359493256, "rewards/rejected": -0.6053860783576965, "step": 6230 }, { "epoch": 0.46, "learning_rate": 3.270338196523859e-06, "logits/chosen": -1.957258939743042, "logits/rejected": -1.6017682552337646, "logps/chosen": -570.5184936523438, "logps/rejected": -660.681884765625, "loss": 0.6832, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.44183021783828735, "rewards/margins": 0.11420176178216934, "rewards/rejected": -0.5560320019721985, "step": 6240 }, { "epoch": 0.46, "learning_rate": 3.2642117033109545e-06, "logits/chosen": -1.7162551879882812, "logits/rejected": -1.397513747215271, "logps/chosen": -620.0198974609375, "logps/rejected": -755.3873291015625, "loss": 0.6762, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.43943047523498535, "rewards/margins": 0.17023028433322906, "rewards/rejected": -0.6096607446670532, "step": 6250 }, { "epoch": 0.46, "learning_rate": 3.2580801434313556e-06, "logits/chosen": -1.8520504236221313, "logits/rejected": -1.6010191440582275, "logps/chosen": -515.5926513671875, "logps/rejected": -655.2487182617188, "loss": 0.6805, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38644710183143616, "rewards/margins": 0.12604103982448578, "rewards/rejected": -0.5124881863594055, "step": 6260 }, { "epoch": 0.46, "learning_rate": 3.2519435575368504e-06, "logits/chosen": -2.021749973297119, "logits/rejected": -1.4339169263839722, "logps/chosen": -516.9894409179688, "logps/rejected": -736.5732421875, "loss": 0.6719, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3602767288684845, "rewards/margins": 0.25475579500198364, "rewards/rejected": -0.6150324940681458, "step": 6270 }, { "epoch": 0.46, "learning_rate": 3.2458019863125447e-06, "logits/chosen": -1.9513667821884155, "logits/rejected": -1.5122549533843994, "logps/chosen": -598.7296752929688, "logps/rejected": -692.9185791015625, "loss": 0.683, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.42278414964675903, "rewards/margins": 0.1513214111328125, "rewards/rejected": -0.5741055607795715, "step": 6280 }, { "epoch": 0.46, "learning_rate": 3.239655470476599e-06, "logits/chosen": -1.9152135848999023, "logits/rejected": -1.6262691020965576, "logps/chosen": -611.7595825195312, "logps/rejected": -731.10205078125, "loss": 0.68, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4393506646156311, "rewards/margins": 0.1542167067527771, "rewards/rejected": -0.5935673713684082, "step": 6290 }, { "epoch": 0.46, "learning_rate": 3.2335040507799557e-06, "logits/chosen": -1.9327417612075806, "logits/rejected": -1.4148021936416626, "logps/chosen": -602.9780883789062, "logps/rejected": -858.9059448242188, "loss": 0.6746, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.42690712213516235, "rewards/margins": 0.276440292596817, "rewards/rejected": -0.703347384929657, "step": 6300 }, { "epoch": 0.47, "learning_rate": 3.2273477680060695e-06, "logits/chosen": -1.72927987575531, "logits/rejected": -1.3323309421539307, "logps/chosen": -527.9720458984375, "logps/rejected": -668.5506591796875, "loss": 0.6794, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3976271152496338, "rewards/margins": 0.13870391249656677, "rewards/rejected": -0.5363309979438782, "step": 6310 }, { "epoch": 0.47, "learning_rate": 3.221186662970636e-06, "logits/chosen": -2.021775245666504, "logits/rejected": -1.5663917064666748, "logps/chosen": -542.3255004882812, "logps/rejected": -669.5946655273438, "loss": 0.681, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.37046247720718384, "rewards/margins": 0.14788684248924255, "rewards/rejected": -0.518349289894104, "step": 6320 }, { "epoch": 0.47, "learning_rate": 3.2150207765213243e-06, "logits/chosen": -1.8774802684783936, "logits/rejected": -1.3104320764541626, "logps/chosen": -665.9811401367188, "logps/rejected": -823.98974609375, "loss": 0.6751, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.49496036767959595, "rewards/margins": 0.20486266911029816, "rewards/rejected": -0.6998230218887329, "step": 6330 }, { "epoch": 0.47, "learning_rate": 3.2088501495375015e-06, "logits/chosen": -1.948754072189331, "logits/rejected": -1.3522086143493652, "logps/chosen": -630.30615234375, "logps/rejected": -833.8988037109375, "loss": 0.6712, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4587029814720154, "rewards/margins": 0.2540748715400696, "rewards/rejected": -0.712777853012085, "step": 6340 }, { "epoch": 0.47, "learning_rate": 3.2026748229299653e-06, "logits/chosen": -1.8668352365493774, "logits/rejected": -1.289344310760498, "logps/chosen": -551.1192626953125, "logps/rejected": -763.5194091796875, "loss": 0.6698, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38995280861854553, "rewards/margins": 0.2837934195995331, "rewards/rejected": -0.6737462282180786, "step": 6350 }, { "epoch": 0.47, "learning_rate": 3.1964948376406708e-06, "logits/chosen": -1.9655011892318726, "logits/rejected": -1.4031397104263306, "logps/chosen": -594.9956665039062, "logps/rejected": -723.3606567382812, "loss": 0.6766, "rewards/accuracies": 0.75, "rewards/chosen": -0.40737253427505493, "rewards/margins": 0.18851818144321442, "rewards/rejected": -0.5958907604217529, "step": 6360 }, { "epoch": 0.47, "learning_rate": 3.1903102346424603e-06, "logits/chosen": -2.0515990257263184, "logits/rejected": -1.3382363319396973, "logps/chosen": -544.5847778320312, "logps/rejected": -742.0779418945312, "loss": 0.6788, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.39781516790390015, "rewards/margins": 0.25101011991500854, "rewards/rejected": -0.6488253474235535, "step": 6370 }, { "epoch": 0.47, "learning_rate": 3.1841210549387925e-06, "logits/chosen": -1.737266182899475, "logits/rejected": -1.301255702972412, "logps/chosen": -713.3052978515625, "logps/rejected": -835.8338623046875, "loss": 0.6822, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5369859933853149, "rewards/margins": 0.17261692881584167, "rewards/rejected": -0.709602952003479, "step": 6380 }, { "epoch": 0.47, "learning_rate": 3.1779273395634673e-06, "logits/chosen": -2.011296033859253, "logits/rejected": -1.6005446910858154, "logps/chosen": -537.9598999023438, "logps/rejected": -687.9765625, "loss": 0.6759, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.37186792492866516, "rewards/margins": 0.18007388710975647, "rewards/rejected": -0.5519417524337769, "step": 6390 }, { "epoch": 0.47, "learning_rate": 3.171729129580356e-06, "logits/chosen": -2.0133473873138428, "logits/rejected": -1.5651936531066895, "logps/chosen": -519.1151123046875, "logps/rejected": -686.28125, "loss": 0.677, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32393136620521545, "rewards/margins": 0.19091899693012238, "rewards/rejected": -0.514850378036499, "step": 6400 }, { "epoch": 0.47, "learning_rate": 3.1655264660831304e-06, "logits/chosen": -1.7605316638946533, "logits/rejected": -1.4654605388641357, "logps/chosen": -624.3538208007812, "logps/rejected": -737.4174194335938, "loss": 0.6824, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4609278738498688, "rewards/margins": 0.1103988066315651, "rewards/rejected": -0.5713266730308533, "step": 6410 }, { "epoch": 0.47, "learning_rate": 3.159319390194986e-06, "logits/chosen": -2.0168747901916504, "logits/rejected": -1.422100305557251, "logps/chosen": -437.08856201171875, "logps/rejected": -539.4854736328125, "loss": 0.6814, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.29337233304977417, "rewards/margins": 0.14200623333454132, "rewards/rejected": -0.4353785514831543, "step": 6420 }, { "epoch": 0.47, "learning_rate": 3.1531079430683755e-06, "logits/chosen": -1.9542795419692993, "logits/rejected": -1.487581491470337, "logps/chosen": -478.72540283203125, "logps/rejected": -646.8760375976562, "loss": 0.6793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34407079219818115, "rewards/margins": 0.20961180329322815, "rewards/rejected": -0.5536825656890869, "step": 6430 }, { "epoch": 0.48, "learning_rate": 3.1468921658847295e-06, "logits/chosen": -2.0213074684143066, "logits/rejected": -1.7146384716033936, "logps/chosen": -459.3128967285156, "logps/rejected": -637.2595825195312, "loss": 0.6799, "rewards/accuracies": 0.75, "rewards/chosen": -0.3149290680885315, "rewards/margins": 0.185503751039505, "rewards/rejected": -0.5004327893257141, "step": 6440 }, { "epoch": 0.48, "learning_rate": 3.1406720998541882e-06, "logits/chosen": -2.00962233543396, "logits/rejected": -1.6899890899658203, "logps/chosen": -559.3458251953125, "logps/rejected": -725.7919921875, "loss": 0.6756, "rewards/accuracies": 0.75, "rewards/chosen": -0.41702499985694885, "rewards/margins": 0.17267832159996033, "rewards/rejected": -0.5897032618522644, "step": 6450 }, { "epoch": 0.48, "learning_rate": 3.134447786215327e-06, "logits/chosen": -2.083085536956787, "logits/rejected": -1.765974760055542, "logps/chosen": -560.1082763671875, "logps/rejected": -675.1223754882812, "loss": 0.6795, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4175054430961609, "rewards/margins": 0.1455855667591095, "rewards/rejected": -0.5630909204483032, "step": 6460 }, { "epoch": 0.48, "learning_rate": 3.128219266234882e-06, "logits/chosen": -2.138890504837036, "logits/rejected": -1.4240144491195679, "logps/chosen": -500.2314453125, "logps/rejected": -720.4080810546875, "loss": 0.6786, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3726792335510254, "rewards/margins": 0.2392541915178299, "rewards/rejected": -0.6119334101676941, "step": 6470 }, { "epoch": 0.48, "learning_rate": 3.1219865812074763e-06, "logits/chosen": -2.046323299407959, "logits/rejected": -1.5417730808258057, "logps/chosen": -486.86907958984375, "logps/rejected": -670.6871337890625, "loss": 0.6754, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3576766848564148, "rewards/margins": 0.22054007649421692, "rewards/rejected": -0.5782166719436646, "step": 6480 }, { "epoch": 0.48, "learning_rate": 3.115749772455347e-06, "logits/chosen": -2.2089903354644775, "logits/rejected": -1.4030003547668457, "logps/chosen": -474.4725646972656, "logps/rejected": -712.5400390625, "loss": 0.6766, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29477551579475403, "rewards/margins": 0.2976991832256317, "rewards/rejected": -0.5924746990203857, "step": 6490 }, { "epoch": 0.48, "learning_rate": 3.1095088813280728e-06, "logits/chosen": -2.0462565422058105, "logits/rejected": -1.4906820058822632, "logps/chosen": -529.87353515625, "logps/rejected": -729.6755981445312, "loss": 0.6744, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.38403090834617615, "rewards/margins": 0.20272168517112732, "rewards/rejected": -0.5867525935173035, "step": 6500 }, { "epoch": 0.48, "learning_rate": 3.103263949202297e-06, "logits/chosen": -2.099402904510498, "logits/rejected": -1.746106505393982, "logps/chosen": -481.7413635253906, "logps/rejected": -643.9059448242188, "loss": 0.6786, "rewards/accuracies": 0.75, "rewards/chosen": -0.33212077617645264, "rewards/margins": 0.1862402856349945, "rewards/rejected": -0.5183610916137695, "step": 6510 }, { "epoch": 0.48, "learning_rate": 3.097015017481454e-06, "logits/chosen": -1.955195665359497, "logits/rejected": -1.6175222396850586, "logps/chosen": -524.7586059570312, "logps/rejected": -635.8948974609375, "loss": 0.6834, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3892713189125061, "rewards/margins": 0.14361132681369781, "rewards/rejected": -0.532882571220398, "step": 6520 }, { "epoch": 0.48, "learning_rate": 3.0907621275954973e-06, "logits/chosen": -2.11818265914917, "logits/rejected": -1.5418546199798584, "logps/chosen": -562.2062377929688, "logps/rejected": -745.2238159179688, "loss": 0.6769, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3769654631614685, "rewards/margins": 0.2211635410785675, "rewards/rejected": -0.5981289744377136, "step": 6530 }, { "epoch": 0.48, "learning_rate": 3.084505321000619e-06, "logits/chosen": -2.3265984058380127, "logits/rejected": -1.931199073791504, "logps/chosen": -577.4480590820312, "logps/rejected": -583.350341796875, "loss": 0.6874, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3977353274822235, "rewards/margins": 0.06996016204357147, "rewards/rejected": -0.46769553422927856, "step": 6540 }, { "epoch": 0.48, "learning_rate": 3.0782446391789834e-06, "logits/chosen": -2.178900718688965, "logits/rejected": -1.979134202003479, "logps/chosen": -445.25750732421875, "logps/rejected": -625.1329956054688, "loss": 0.6819, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3055296242237091, "rewards/margins": 0.1616038978099823, "rewards/rejected": -0.4671335220336914, "step": 6550 }, { "epoch": 0.48, "learning_rate": 3.0719801236384426e-06, "logits/chosen": -2.077944755554199, "logits/rejected": -1.6268384456634521, "logps/chosen": -611.8064575195312, "logps/rejected": -720.1578369140625, "loss": 0.6825, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.41180044412612915, "rewards/margins": 0.18477635085582733, "rewards/rejected": -0.5965768098831177, "step": 6560 }, { "epoch": 0.48, "learning_rate": 3.06571181591227e-06, "logits/chosen": -1.9684549570083618, "logits/rejected": -1.541473150253296, "logps/chosen": -607.3316650390625, "logps/rejected": -726.31787109375, "loss": 0.6818, "rewards/accuracies": 0.75, "rewards/chosen": -0.4089638590812683, "rewards/margins": 0.1693253219127655, "rewards/rejected": -0.5782891511917114, "step": 6570 }, { "epoch": 0.49, "learning_rate": 3.0594397575588787e-06, "logits/chosen": -2.015972852706909, "logits/rejected": -1.5080727338790894, "logps/chosen": -521.232177734375, "logps/rejected": -668.9642333984375, "loss": 0.6821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3634886145591736, "rewards/margins": 0.175901398062706, "rewards/rejected": -0.5393900275230408, "step": 6580 }, { "epoch": 0.49, "learning_rate": 3.0531639901615473e-06, "logits/chosen": -2.021435260772705, "logits/rejected": -1.6530787944793701, "logps/chosen": -640.26953125, "logps/rejected": -797.0333862304688, "loss": 0.6793, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.49466824531555176, "rewards/margins": 0.16682788729667664, "rewards/rejected": -0.6614962220191956, "step": 6590 }, { "epoch": 0.49, "learning_rate": 3.0468845553281477e-06, "logits/chosen": -1.9210929870605469, "logits/rejected": -1.3341652154922485, "logps/chosen": -573.1805419921875, "logps/rejected": -784.2160034179688, "loss": 0.6735, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.41157984733581543, "rewards/margins": 0.26045146584510803, "rewards/rejected": -0.6720313429832458, "step": 6600 }, { "epoch": 0.49, "learning_rate": 3.0406014946908653e-06, "logits/chosen": -2.093360424041748, "logits/rejected": -1.5156382322311401, "logps/chosen": -638.4937744140625, "logps/rejected": -793.61376953125, "loss": 0.6745, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.42478102445602417, "rewards/margins": 0.21525979042053223, "rewards/rejected": -0.6400408148765564, "step": 6610 }, { "epoch": 0.49, "learning_rate": 3.0343148499059236e-06, "logits/chosen": -2.0836987495422363, "logits/rejected": -1.8176424503326416, "logps/chosen": -498.13409423828125, "logps/rejected": -656.3651733398438, "loss": 0.6792, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3418799340724945, "rewards/margins": 0.17461861670017242, "rewards/rejected": -0.5164986252784729, "step": 6620 }, { "epoch": 0.49, "learning_rate": 3.0280246626533105e-06, "logits/chosen": -2.1817145347595215, "logits/rejected": -1.6666322946548462, "logps/chosen": -500.62548828125, "logps/rejected": -678.0709838867188, "loss": 0.6776, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31255078315734863, "rewards/margins": 0.22410793602466583, "rewards/rejected": -0.5366587042808533, "step": 6630 }, { "epoch": 0.49, "learning_rate": 3.0217309746364975e-06, "logits/chosen": -2.0185773372650146, "logits/rejected": -1.5038419961929321, "logps/chosen": -509.35888671875, "logps/rejected": -682.7445068359375, "loss": 0.6798, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.340786874294281, "rewards/margins": 0.22461406886577606, "rewards/rejected": -0.5654009580612183, "step": 6640 }, { "epoch": 0.49, "learning_rate": 3.01543382758217e-06, "logits/chosen": -2.191497325897217, "logits/rejected": -1.9312095642089844, "logps/chosen": -549.7363891601562, "logps/rejected": -610.630859375, "loss": 0.6858, "rewards/accuracies": 0.625, "rewards/chosen": -0.3978935182094574, "rewards/margins": 0.09043605625629425, "rewards/rejected": -0.48832958936691284, "step": 6650 }, { "epoch": 0.49, "learning_rate": 3.0091332632399413e-06, "logits/chosen": -2.2596595287323, "logits/rejected": -1.6197564601898193, "logps/chosen": -524.9192504882812, "logps/rejected": -679.7347412109375, "loss": 0.6792, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31637540459632874, "rewards/margins": 0.20949716866016388, "rewards/rejected": -0.5258725881576538, "step": 6660 }, { "epoch": 0.49, "learning_rate": 3.0028293233820856e-06, "logits/chosen": -2.0894885063171387, "logits/rejected": -1.3554563522338867, "logps/chosen": -406.24908447265625, "logps/rejected": -703.4998779296875, "loss": 0.6741, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2672123312950134, "rewards/margins": 0.3281785547733307, "rewards/rejected": -0.5953909158706665, "step": 6670 }, { "epoch": 0.49, "learning_rate": 2.9965220498032543e-06, "logits/chosen": -2.3133010864257812, "logits/rejected": -1.765964150428772, "logps/chosen": -473.1839904785156, "logps/rejected": -610.7492065429688, "loss": 0.6728, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27906304597854614, "rewards/margins": 0.20205560326576233, "rewards/rejected": -0.4811186194419861, "step": 6680 }, { "epoch": 0.49, "learning_rate": 2.9902114843202017e-06, "logits/chosen": -2.2570834159851074, "logits/rejected": -1.6558202505111694, "logps/chosen": -519.571533203125, "logps/rejected": -642.6387939453125, "loss": 0.6775, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3026890754699707, "rewards/margins": 0.20839710533618927, "rewards/rejected": -0.511086106300354, "step": 6690 }, { "epoch": 0.49, "learning_rate": 2.9838976687715076e-06, "logits/chosen": -2.159278392791748, "logits/rejected": -1.7351675033569336, "logps/chosen": -603.2738647460938, "logps/rejected": -753.0482177734375, "loss": 0.676, "rewards/accuracies": 0.75, "rewards/chosen": -0.39662590622901917, "rewards/margins": 0.20948393642902374, "rewards/rejected": -0.6061098575592041, "step": 6700 }, { "epoch": 0.49, "learning_rate": 2.977580645017298e-06, "logits/chosen": -2.0612852573394775, "logits/rejected": -1.7824900150299072, "logps/chosen": -569.5782470703125, "logps/rejected": -700.2277221679688, "loss": 0.681, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.41001325845718384, "rewards/margins": 0.15399791300296783, "rewards/rejected": -0.5640112161636353, "step": 6710 }, { "epoch": 0.5, "learning_rate": 2.9712604549389713e-06, "logits/chosen": -1.5957187414169312, "logits/rejected": -1.2469788789749146, "logps/chosen": -616.9733276367188, "logps/rejected": -727.3486328125, "loss": 0.6826, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.44034481048583984, "rewards/margins": 0.1512954980134964, "rewards/rejected": -0.5916402339935303, "step": 6720 }, { "epoch": 0.5, "learning_rate": 2.9649371404389153e-06, "logits/chosen": -1.990435242652893, "logits/rejected": -1.5826531648635864, "logps/chosen": -521.159912109375, "logps/rejected": -666.196533203125, "loss": 0.682, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.38774874806404114, "rewards/margins": 0.15421031415462494, "rewards/rejected": -0.5419590473175049, "step": 6730 }, { "epoch": 0.5, "learning_rate": 2.958610743440236e-06, "logits/chosen": -2.185781478881836, "logits/rejected": -1.5372633934020996, "logps/chosen": -532.966064453125, "logps/rejected": -713.6411743164062, "loss": 0.6794, "rewards/accuracies": 0.75, "rewards/chosen": -0.3574126660823822, "rewards/margins": 0.21017566323280334, "rewards/rejected": -0.5675883293151855, "step": 6740 }, { "epoch": 0.5, "learning_rate": 2.952281305886474e-06, "logits/chosen": -1.9424091577529907, "logits/rejected": -1.5165364742279053, "logps/chosen": -594.5410766601562, "logps/rejected": -706.408447265625, "loss": 0.6793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4352007508277893, "rewards/margins": 0.1538695991039276, "rewards/rejected": -0.5890703201293945, "step": 6750 }, { "epoch": 0.5, "learning_rate": 2.945948869741329e-06, "logits/chosen": -2.196378707885742, "logits/rejected": -1.7798036336898804, "logps/chosen": -519.2769165039062, "logps/rejected": -685.791015625, "loss": 0.6768, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36770525574684143, "rewards/margins": 0.20554545521736145, "rewards/rejected": -0.5732506513595581, "step": 6760 }, { "epoch": 0.5, "learning_rate": 2.9396134769883807e-06, "logits/chosen": -2.0456507205963135, "logits/rejected": -1.599278211593628, "logps/chosen": -452.8682556152344, "logps/rejected": -603.7195434570312, "loss": 0.6781, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.30505138635635376, "rewards/margins": 0.17764846980571747, "rewards/rejected": -0.4826998710632324, "step": 6770 }, { "epoch": 0.5, "learning_rate": 2.933275169630812e-06, "logits/chosen": -2.096311092376709, "logits/rejected": -1.7226508855819702, "logps/chosen": -501.349853515625, "logps/rejected": -613.598876953125, "loss": 0.6793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2949826121330261, "rewards/margins": 0.17843762040138245, "rewards/rejected": -0.47342023253440857, "step": 6780 }, { "epoch": 0.5, "learning_rate": 2.9269339896911287e-06, "logits/chosen": -2.116137981414795, "logits/rejected": -1.8151710033416748, "logps/chosen": -473.00836181640625, "logps/rejected": -623.4537353515625, "loss": 0.6798, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32733437418937683, "rewards/margins": 0.13131150603294373, "rewards/rejected": -0.45864591002464294, "step": 6790 }, { "epoch": 0.5, "learning_rate": 2.920589979210881e-06, "logits/chosen": -2.1532251834869385, "logits/rejected": -1.6394703388214111, "logps/chosen": -437.837890625, "logps/rejected": -569.8243408203125, "loss": 0.6837, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2904403805732727, "rewards/margins": 0.1571992039680481, "rewards/rejected": -0.4476395547389984, "step": 6800 }, { "epoch": 0.5, "learning_rate": 2.9142431802503867e-06, "logits/chosen": -2.0728814601898193, "logits/rejected": -1.6911239624023438, "logps/chosen": -573.6834716796875, "logps/rejected": -714.8901977539062, "loss": 0.6803, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.453084796667099, "rewards/margins": 0.16427884995937347, "rewards/rejected": -0.617363691329956, "step": 6810 }, { "epoch": 0.5, "learning_rate": 2.9078936348884496e-06, "logits/chosen": -2.2866759300231934, "logits/rejected": -1.7376935482025146, "logps/chosen": -619.0081787109375, "logps/rejected": -787.6439208984375, "loss": 0.6788, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4328792691230774, "rewards/margins": 0.18902845680713654, "rewards/rejected": -0.621907651424408, "step": 6820 }, { "epoch": 0.5, "learning_rate": 2.901541385222084e-06, "logits/chosen": -2.2238550186157227, "logits/rejected": -1.3741816282272339, "logps/chosen": -480.2894592285156, "logps/rejected": -622.2225341796875, "loss": 0.6779, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2750176787376404, "rewards/margins": 0.21948309242725372, "rewards/rejected": -0.4945007860660553, "step": 6830 }, { "epoch": 0.5, "learning_rate": 2.895186473366231e-06, "logits/chosen": -2.18540620803833, "logits/rejected": -1.6628131866455078, "logps/chosen": -478.08514404296875, "logps/rejected": -678.989013671875, "loss": 0.6782, "rewards/accuracies": 0.75, "rewards/chosen": -0.3133813738822937, "rewards/margins": 0.22190947830677032, "rewards/rejected": -0.5352908968925476, "step": 6840 }, { "epoch": 0.51, "learning_rate": 2.8888289414534837e-06, "logits/chosen": -1.9425108432769775, "logits/rejected": -1.702750563621521, "logps/chosen": -553.0935668945312, "logps/rejected": -675.4854125976562, "loss": 0.6808, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4037097990512848, "rewards/margins": 0.13028278946876526, "rewards/rejected": -0.5339925289154053, "step": 6850 }, { "epoch": 0.51, "learning_rate": 2.8824688316338047e-06, "logits/chosen": -2.0929312705993652, "logits/rejected": -1.5948318243026733, "logps/chosen": -520.4598388671875, "logps/rejected": -666.9553833007812, "loss": 0.6749, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.34291574358940125, "rewards/margins": 0.20522257685661316, "rewards/rejected": -0.5481383204460144, "step": 6860 }, { "epoch": 0.51, "learning_rate": 2.876106186074249e-06, "logits/chosen": -2.223475694656372, "logits/rejected": -1.7340816259384155, "logps/chosen": -500.9298400878906, "logps/rejected": -629.2818603515625, "loss": 0.6852, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.332428514957428, "rewards/margins": 0.15104129910469055, "rewards/rejected": -0.48346981406211853, "step": 6870 }, { "epoch": 0.51, "learning_rate": 2.869741046958684e-06, "logits/chosen": -2.2205636501312256, "logits/rejected": -1.6753215789794922, "logps/chosen": -470.62744140625, "logps/rejected": -626.9514770507812, "loss": 0.6804, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2862604856491089, "rewards/margins": 0.19629794359207153, "rewards/rejected": -0.48255839943885803, "step": 6880 }, { "epoch": 0.51, "learning_rate": 2.863373456487506e-06, "logits/chosen": -2.4157419204711914, "logits/rejected": -1.9249277114868164, "logps/chosen": -509.0318908691406, "logps/rejected": -620.6505126953125, "loss": 0.6768, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3065008521080017, "rewards/margins": 0.18964199721813202, "rewards/rejected": -0.49614280462265015, "step": 6890 }, { "epoch": 0.51, "learning_rate": 2.857003456877367e-06, "logits/chosen": -2.2208313941955566, "logits/rejected": -1.738355278968811, "logps/chosen": -504.58612060546875, "logps/rejected": -596.03955078125, "loss": 0.6826, "rewards/accuracies": 0.75, "rewards/chosen": -0.3054053783416748, "rewards/margins": 0.13856884837150574, "rewards/rejected": -0.44397425651550293, "step": 6900 }, { "epoch": 0.51, "learning_rate": 2.850631090360889e-06, "logits/chosen": -2.013110876083374, "logits/rejected": -1.6423394680023193, "logps/chosen": -464.2335510253906, "logps/rejected": -583.7534790039062, "loss": 0.6812, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30328264832496643, "rewards/margins": 0.15856127440929413, "rewards/rejected": -0.46184396743774414, "step": 6910 }, { "epoch": 0.51, "learning_rate": 2.8442563991863895e-06, "logits/chosen": -2.1637885570526123, "logits/rejected": -1.5836892127990723, "logps/chosen": -513.9754638671875, "logps/rejected": -656.666259765625, "loss": 0.6769, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3502475619316101, "rewards/margins": 0.19424556195735931, "rewards/rejected": -0.5444931387901306, "step": 6920 }, { "epoch": 0.51, "learning_rate": 2.8378794256175956e-06, "logits/chosen": -1.927412748336792, "logits/rejected": -1.2363088130950928, "logps/chosen": -412.2019958496094, "logps/rejected": -648.8282470703125, "loss": 0.6747, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25399842858314514, "rewards/margins": 0.3019489645957947, "rewards/rejected": -0.5559474229812622, "step": 6930 }, { "epoch": 0.51, "learning_rate": 2.8315002119333663e-06, "logits/chosen": -2.1121907234191895, "logits/rejected": -1.474560022354126, "logps/chosen": -529.808837890625, "logps/rejected": -697.932861328125, "loss": 0.6769, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3417404890060425, "rewards/margins": 0.21175608038902283, "rewards/rejected": -0.5534965395927429, "step": 6940 }, { "epoch": 0.51, "learning_rate": 2.825118800427413e-06, "logits/chosen": -2.0671093463897705, "logits/rejected": -1.5446498394012451, "logps/chosen": -513.3164672851562, "logps/rejected": -688.593994140625, "loss": 0.6757, "rewards/accuracies": 0.875, "rewards/chosen": -0.31935569643974304, "rewards/margins": 0.2431662380695343, "rewards/rejected": -0.5625218749046326, "step": 6950 }, { "epoch": 0.51, "learning_rate": 2.818735233408021e-06, "logits/chosen": -1.8712555170059204, "logits/rejected": -1.4727668762207031, "logps/chosen": -520.6153564453125, "logps/rejected": -659.8048706054688, "loss": 0.6767, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.393976628780365, "rewards/margins": 0.16341757774353027, "rewards/rejected": -0.5573942065238953, "step": 6960 }, { "epoch": 0.51, "learning_rate": 2.812349553197763e-06, "logits/chosen": -1.861668586730957, "logits/rejected": -1.4055752754211426, "logps/chosen": -544.3530883789062, "logps/rejected": -677.0242919921875, "loss": 0.6755, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38595205545425415, "rewards/margins": 0.17246153950691223, "rewards/rejected": -0.558413565158844, "step": 6970 }, { "epoch": 0.51, "learning_rate": 2.805961802133224e-06, "logits/chosen": -1.9489473104476929, "logits/rejected": -1.4721723794937134, "logps/chosen": -624.7426147460938, "logps/rejected": -706.0419921875, "loss": 0.6782, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.42253008484840393, "rewards/margins": 0.154853954911232, "rewards/rejected": -0.5773840546607971, "step": 6980 }, { "epoch": 0.52, "learning_rate": 2.7995720225647162e-06, "logits/chosen": -2.010425090789795, "logits/rejected": -1.230618953704834, "logps/chosen": -592.3186645507812, "logps/rejected": -742.7161865234375, "loss": 0.6786, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.411445289850235, "rewards/margins": 0.19512946903705597, "rewards/rejected": -0.6065747141838074, "step": 6990 }, { "epoch": 0.52, "learning_rate": 2.7931802568560053e-06, "logits/chosen": -2.150203227996826, "logits/rejected": -1.5171254873275757, "logps/chosen": -553.044189453125, "logps/rejected": -835.3795166015625, "loss": 0.6749, "rewards/accuracies": 0.875, "rewards/chosen": -0.3037227988243103, "rewards/margins": 0.3260422348976135, "rewards/rejected": -0.6297651529312134, "step": 7000 }, { "epoch": 0.52, "learning_rate": 2.786786547384019e-06, "logits/chosen": -1.8907781839370728, "logits/rejected": -1.4769850969314575, "logps/chosen": -497.14410400390625, "logps/rejected": -660.35595703125, "loss": 0.6785, "rewards/accuracies": 0.625, "rewards/chosen": -0.3691060543060303, "rewards/margins": 0.16374292969703674, "rewards/rejected": -0.5328489542007446, "step": 7010 }, { "epoch": 0.52, "learning_rate": 2.780390936538577e-06, "logits/chosen": -1.9117997884750366, "logits/rejected": -1.5148385763168335, "logps/chosen": -483.10858154296875, "logps/rejected": -617.5054321289062, "loss": 0.6783, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3630430996417999, "rewards/margins": 0.15400874614715576, "rewards/rejected": -0.5170518159866333, "step": 7020 }, { "epoch": 0.52, "learning_rate": 2.773993466722103e-06, "logits/chosen": -1.896356225013733, "logits/rejected": -1.3693640232086182, "logps/chosen": -464.6968688964844, "logps/rejected": -701.1253662109375, "loss": 0.6718, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3169391453266144, "rewards/margins": 0.24849700927734375, "rewards/rejected": -0.565436065196991, "step": 7030 }, { "epoch": 0.52, "learning_rate": 2.7675941803493435e-06, "logits/chosen": -2.0376861095428467, "logits/rejected": -1.3135230541229248, "logps/chosen": -547.6932373046875, "logps/rejected": -720.8968505859375, "loss": 0.6766, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.36251765489578247, "rewards/margins": 0.2504257261753082, "rewards/rejected": -0.6129433512687683, "step": 7040 }, { "epoch": 0.52, "learning_rate": 2.7611931198470936e-06, "logits/chosen": -2.0115010738372803, "logits/rejected": -1.6732841730117798, "logps/chosen": -471.32305908203125, "logps/rejected": -645.8787231445312, "loss": 0.6798, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31426921486854553, "rewards/margins": 0.17896990478038788, "rewards/rejected": -0.4932391047477722, "step": 7050 }, { "epoch": 0.52, "learning_rate": 2.754790327653906e-06, "logits/chosen": -2.039656400680542, "logits/rejected": -1.4036177396774292, "logps/chosen": -437.1903381347656, "logps/rejected": -633.2273559570312, "loss": 0.675, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.27474337816238403, "rewards/margins": 0.23354168236255646, "rewards/rejected": -0.5082851052284241, "step": 7060 }, { "epoch": 0.52, "learning_rate": 2.7483858462198164e-06, "logits/chosen": -2.030794620513916, "logits/rejected": -1.512404441833496, "logps/chosen": -406.4117736816406, "logps/rejected": -581.4220581054688, "loss": 0.6749, "rewards/accuracies": 0.75, "rewards/chosen": -0.2732481062412262, "rewards/margins": 0.21457383036613464, "rewards/rejected": -0.4878219664096832, "step": 7070 }, { "epoch": 0.52, "learning_rate": 2.74197971800606e-06, "logits/chosen": -1.9958480596542358, "logits/rejected": -1.5414738655090332, "logps/chosen": -508.07354736328125, "logps/rejected": -651.9653930664062, "loss": 0.6799, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.321913480758667, "rewards/margins": 0.18305228650569916, "rewards/rejected": -0.5049657225608826, "step": 7080 }, { "epoch": 0.52, "learning_rate": 2.7355719854847904e-06, "logits/chosen": -2.051257371902466, "logits/rejected": -1.5646947622299194, "logps/chosen": -449.8411560058594, "logps/rejected": -711.3646850585938, "loss": 0.674, "rewards/accuracies": 0.75, "rewards/chosen": -0.29832834005355835, "rewards/margins": 0.2584155201911926, "rewards/rejected": -0.5567438006401062, "step": 7090 }, { "epoch": 0.52, "learning_rate": 2.7291626911387977e-06, "logits/chosen": -2.0472412109375, "logits/rejected": -1.686049222946167, "logps/chosen": -459.6771545410156, "logps/rejected": -656.1570434570312, "loss": 0.6796, "rewards/accuracies": 0.75, "rewards/chosen": -0.297110378742218, "rewards/margins": 0.17285965383052826, "rewards/rejected": -0.46997007727622986, "step": 7100 }, { "epoch": 0.52, "learning_rate": 2.7227518774612237e-06, "logits/chosen": -2.161454677581787, "logits/rejected": -1.7246243953704834, "logps/chosen": -424.4747619628906, "logps/rejected": -572.0380859375, "loss": 0.6825, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.278461217880249, "rewards/margins": 0.18315397202968597, "rewards/rejected": -0.4616151750087738, "step": 7110 }, { "epoch": 0.53, "learning_rate": 2.716339586955288e-06, "logits/chosen": -2.0710055828094482, "logits/rejected": -1.4488961696624756, "logps/chosen": -628.2772827148438, "logps/rejected": -793.5380859375, "loss": 0.6786, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.43881160020828247, "rewards/margins": 0.19287928938865662, "rewards/rejected": -0.6316908597946167, "step": 7120 }, { "epoch": 0.53, "learning_rate": 2.709925862133998e-06, "logits/chosen": -2.1190025806427, "logits/rejected": -1.7256730794906616, "logps/chosen": -560.6673583984375, "logps/rejected": -682.0062255859375, "loss": 0.6754, "rewards/accuracies": 0.75, "rewards/chosen": -0.3578004539012909, "rewards/margins": 0.19308030605316162, "rewards/rejected": -0.5508807897567749, "step": 7130 }, { "epoch": 0.53, "learning_rate": 2.7035107455198728e-06, "logits/chosen": -1.7789504528045654, "logits/rejected": -1.37149977684021, "logps/chosen": -564.5098266601562, "logps/rejected": -715.2890625, "loss": 0.6777, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40149784088134766, "rewards/margins": 0.16967014968395233, "rewards/rejected": -0.571168065071106, "step": 7140 }, { "epoch": 0.53, "learning_rate": 2.697094279644657e-06, "logits/chosen": -2.2071332931518555, "logits/rejected": -1.957985281944275, "logps/chosen": -478.3267517089844, "logps/rejected": -633.4055786132812, "loss": 0.6792, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34753257036209106, "rewards/margins": 0.16290585696697235, "rewards/rejected": -0.5104383826255798, "step": 7150 }, { "epoch": 0.53, "learning_rate": 2.6906765070490427e-06, "logits/chosen": -1.868355393409729, "logits/rejected": -1.4550033807754517, "logps/chosen": -446.63348388671875, "logps/rejected": -641.289306640625, "loss": 0.6726, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.302083820104599, "rewards/margins": 0.21578708291053772, "rewards/rejected": -0.5178709030151367, "step": 7160 }, { "epoch": 0.53, "learning_rate": 2.684257470282382e-06, "logits/chosen": -2.0342049598693848, "logits/rejected": -1.581484079360962, "logps/chosen": -518.367919921875, "logps/rejected": -683.0747680664062, "loss": 0.6825, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3549051880836487, "rewards/margins": 0.1745852530002594, "rewards/rejected": -0.5294904708862305, "step": 7170 }, { "epoch": 0.53, "learning_rate": 2.6778372119024137e-06, "logits/chosen": -2.000892162322998, "logits/rejected": -1.6432902812957764, "logps/chosen": -528.3748168945312, "logps/rejected": -774.1339111328125, "loss": 0.6747, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33558541536331177, "rewards/margins": 0.2673441767692566, "rewards/rejected": -0.6029295921325684, "step": 7180 }, { "epoch": 0.53, "learning_rate": 2.6714157744749714e-06, "logits/chosen": -1.9246819019317627, "logits/rejected": -1.5371758937835693, "logps/chosen": -635.324462890625, "logps/rejected": -721.1027221679688, "loss": 0.6813, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.46025219559669495, "rewards/margins": 0.13059785962104797, "rewards/rejected": -0.5908500552177429, "step": 7190 }, { "epoch": 0.53, "learning_rate": 2.6649932005737066e-06, "logits/chosen": -2.080742359161377, "logits/rejected": -1.6162431240081787, "logps/chosen": -511.440673828125, "logps/rejected": -688.5846557617188, "loss": 0.6751, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31758973002433777, "rewards/margins": 0.17661263048648834, "rewards/rejected": -0.49420231580734253, "step": 7200 }, { "epoch": 0.53, "learning_rate": 2.6585695327798076e-06, "logits/chosen": -2.0073959827423096, "logits/rejected": -1.3116888999938965, "logps/chosen": -503.544189453125, "logps/rejected": -691.8606567382812, "loss": 0.6763, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3074280321598053, "rewards/margins": 0.25744861364364624, "rewards/rejected": -0.5648766756057739, "step": 7210 }, { "epoch": 0.53, "learning_rate": 2.6521448136817114e-06, "logits/chosen": -1.9028689861297607, "logits/rejected": -1.6531364917755127, "logps/chosen": -482.73455810546875, "logps/rejected": -672.7935791015625, "loss": 0.6779, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3803803026676178, "rewards/margins": 0.19202649593353271, "rewards/rejected": -0.5724067091941833, "step": 7220 }, { "epoch": 0.53, "learning_rate": 2.64571908587483e-06, "logits/chosen": -1.9589121341705322, "logits/rejected": -1.5587170124053955, "logps/chosen": -568.365478515625, "logps/rejected": -747.9005737304688, "loss": 0.6777, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40402811765670776, "rewards/margins": 0.21808817982673645, "rewards/rejected": -0.6221163272857666, "step": 7230 }, { "epoch": 0.53, "learning_rate": 2.6392923919612578e-06, "logits/chosen": -1.922145128250122, "logits/rejected": -1.4520418643951416, "logps/chosen": -441.6034240722656, "logps/rejected": -596.5941162109375, "loss": 0.6788, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27755069732666016, "rewards/margins": 0.21662339568138123, "rewards/rejected": -0.4941740930080414, "step": 7240 }, { "epoch": 0.53, "learning_rate": 2.6328647745494984e-06, "logits/chosen": -1.9629440307617188, "logits/rejected": -1.3277546167373657, "logps/chosen": -509.4051818847656, "logps/rejected": -674.8203735351562, "loss": 0.6745, "rewards/accuracies": 0.875, "rewards/chosen": -0.33670833706855774, "rewards/margins": 0.2258642166852951, "rewards/rejected": -0.5625725984573364, "step": 7250 }, { "epoch": 0.54, "learning_rate": 2.6264362762541762e-06, "logits/chosen": -2.0905683040618896, "logits/rejected": -1.4909977912902832, "logps/chosen": -462.2158203125, "logps/rejected": -685.1199951171875, "loss": 0.6766, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3404039144515991, "rewards/margins": 0.23597459495067596, "rewards/rejected": -0.5763784646987915, "step": 7260 }, { "epoch": 0.54, "learning_rate": 2.620006939695756e-06, "logits/chosen": -2.1842477321624756, "logits/rejected": -1.7238874435424805, "logps/chosen": -575.2880249023438, "logps/rejected": -754.3569946289062, "loss": 0.6774, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3930075764656067, "rewards/margins": 0.20077817142009735, "rewards/rejected": -0.5937857627868652, "step": 7270 }, { "epoch": 0.54, "learning_rate": 2.613576807500262e-06, "logits/chosen": -2.0424141883850098, "logits/rejected": -1.7451375722885132, "logps/chosen": -647.0250244140625, "logps/rejected": -782.05810546875, "loss": 0.6845, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.41068124771118164, "rewards/margins": 0.1573675274848938, "rewards/rejected": -0.5680487155914307, "step": 7280 }, { "epoch": 0.54, "learning_rate": 2.60714592229899e-06, "logits/chosen": -2.0143861770629883, "logits/rejected": -1.8481744527816772, "logps/chosen": -428.06256103515625, "logps/rejected": -597.5828857421875, "loss": 0.6814, "rewards/accuracies": 0.625, "rewards/chosen": -0.2865069806575775, "rewards/margins": 0.15629342198371887, "rewards/rejected": -0.4428004324436188, "step": 7290 }, { "epoch": 0.54, "learning_rate": 2.6007143267282302e-06, "logits/chosen": -2.2039132118225098, "logits/rejected": -1.8635151386260986, "logps/chosen": -505.64892578125, "logps/rejected": -610.9797973632812, "loss": 0.6807, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2971366047859192, "rewards/margins": 0.1749112904071808, "rewards/rejected": -0.4720478951931, "step": 7300 }, { "epoch": 0.54, "learning_rate": 2.5942820634289834e-06, "logits/chosen": -2.008291721343994, "logits/rejected": -1.5971412658691406, "logps/chosen": -538.5974731445312, "logps/rejected": -642.2754516601562, "loss": 0.6835, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.37370678782463074, "rewards/margins": 0.11338984966278076, "rewards/rejected": -0.4870966970920563, "step": 7310 }, { "epoch": 0.54, "learning_rate": 2.587849175046676e-06, "logits/chosen": -2.0032241344451904, "logits/rejected": -1.6637356281280518, "logps/chosen": -553.1674194335938, "logps/rejected": -722.0450439453125, "loss": 0.6822, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36941006779670715, "rewards/margins": 0.16770043969154358, "rewards/rejected": -0.5371105074882507, "step": 7320 }, { "epoch": 0.54, "learning_rate": 2.5814157042308798e-06, "logits/chosen": -1.9492065906524658, "logits/rejected": -1.4061845541000366, "logps/chosen": -522.1117553710938, "logps/rejected": -730.2553100585938, "loss": 0.6779, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34874600172042847, "rewards/margins": 0.25660252571105957, "rewards/rejected": -0.6053485870361328, "step": 7330 }, { "epoch": 0.54, "learning_rate": 2.574981693635025e-06, "logits/chosen": -2.102604389190674, "logits/rejected": -1.587804913520813, "logps/chosen": -512.7747802734375, "logps/rejected": -696.1346435546875, "loss": 0.6748, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35349246859550476, "rewards/margins": 0.22715647518634796, "rewards/rejected": -0.5806488990783691, "step": 7340 }, { "epoch": 0.54, "learning_rate": 2.568547185916125e-06, "logits/chosen": -1.854331612586975, "logits/rejected": -1.5277180671691895, "logps/chosen": -448.32135009765625, "logps/rejected": -649.4514770507812, "loss": 0.673, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31261447072029114, "rewards/margins": 0.23210671544075012, "rewards/rejected": -0.5447211265563965, "step": 7350 }, { "epoch": 0.54, "learning_rate": 2.562112223734485e-06, "logits/chosen": -2.1074652671813965, "logits/rejected": -1.4819667339324951, "logps/chosen": -518.4622802734375, "logps/rejected": -756.0079956054688, "loss": 0.6717, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.385943740606308, "rewards/margins": 0.27320075035095215, "rewards/rejected": -0.6591445803642273, "step": 7360 }, { "epoch": 0.54, "learning_rate": 2.5556768497534275e-06, "logits/chosen": -2.190544843673706, "logits/rejected": -1.744971513748169, "logps/chosen": -449.53265380859375, "logps/rejected": -577.8065185546875, "loss": 0.6823, "rewards/accuracies": 0.625, "rewards/chosen": -0.27300047874450684, "rewards/margins": 0.15606296062469482, "rewards/rejected": -0.42906346917152405, "step": 7370 }, { "epoch": 0.54, "learning_rate": 2.5492411066390016e-06, "logits/chosen": -2.1093478202819824, "logits/rejected": -1.4392139911651611, "logps/chosen": -392.85345458984375, "logps/rejected": -597.0903930664062, "loss": 0.6741, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21774744987487793, "rewards/margins": 0.24398139119148254, "rewards/rejected": -0.46172887086868286, "step": 7380 }, { "epoch": 0.55, "learning_rate": 2.542805037059704e-06, "logits/chosen": -2.0209832191467285, "logits/rejected": -1.5729252099990845, "logps/chosen": -480.5216369628906, "logps/rejected": -622.3345947265625, "loss": 0.6785, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3234332203865051, "rewards/margins": 0.17293553054332733, "rewards/rejected": -0.49636873602867126, "step": 7390 }, { "epoch": 0.55, "learning_rate": 2.5363686836861987e-06, "logits/chosen": -1.7399959564208984, "logits/rejected": -1.143331527709961, "logps/chosen": -486.7076721191406, "logps/rejected": -657.19775390625, "loss": 0.6731, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34530574083328247, "rewards/margins": 0.21672403812408447, "rewards/rejected": -0.5620297789573669, "step": 7400 }, { "epoch": 0.55, "learning_rate": 2.529932089191028e-06, "logits/chosen": -2.1621038913726807, "logits/rejected": -1.4625927209854126, "logps/chosen": -442.86981201171875, "logps/rejected": -660.0877685546875, "loss": 0.6755, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27535519003868103, "rewards/margins": 0.2880188822746277, "rewards/rejected": -0.5633741021156311, "step": 7410 }, { "epoch": 0.55, "learning_rate": 2.5234952962483343e-06, "logits/chosen": -2.110100507736206, "logits/rejected": -1.6618812084197998, "logps/chosen": -426.8536682128906, "logps/rejected": -561.9778442382812, "loss": 0.6833, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2675244212150574, "rewards/margins": 0.16930320858955383, "rewards/rejected": -0.4368276596069336, "step": 7420 }, { "epoch": 0.55, "learning_rate": 2.5170583475335768e-06, "logits/chosen": -2.0870962142944336, "logits/rejected": -1.620384931564331, "logps/chosen": -432.99249267578125, "logps/rejected": -628.709716796875, "loss": 0.6771, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3067866265773773, "rewards/margins": 0.21463093161582947, "rewards/rejected": -0.5214175581932068, "step": 7430 }, { "epoch": 0.55, "learning_rate": 2.5106212857232443e-06, "logits/chosen": -2.2182934284210205, "logits/rejected": -1.737728476524353, "logps/chosen": -475.4751892089844, "logps/rejected": -659.6278076171875, "loss": 0.6803, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2902349829673767, "rewards/margins": 0.18377593159675598, "rewards/rejected": -0.4740109443664551, "step": 7440 }, { "epoch": 0.55, "learning_rate": 2.504184153494579e-06, "logits/chosen": -2.038851261138916, "logits/rejected": -1.5938575267791748, "logps/chosen": -390.69122314453125, "logps/rejected": -662.6917724609375, "loss": 0.6711, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2313317060470581, "rewards/margins": 0.2880733609199524, "rewards/rejected": -0.5194050073623657, "step": 7450 }, { "epoch": 0.55, "learning_rate": 2.497746993525286e-06, "logits/chosen": -2.165269613265991, "logits/rejected": -1.7754472494125366, "logps/chosen": -427.68487548828125, "logps/rejected": -607.1161499023438, "loss": 0.679, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2539594769477844, "rewards/margins": 0.20189888775348663, "rewards/rejected": -0.4558583199977875, "step": 7460 }, { "epoch": 0.55, "learning_rate": 2.49130984849326e-06, "logits/chosen": -2.0303587913513184, "logits/rejected": -1.598319411277771, "logps/chosen": -515.0181884765625, "logps/rejected": -682.6281127929688, "loss": 0.6799, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3360186517238617, "rewards/margins": 0.188276469707489, "rewards/rejected": -0.5242950916290283, "step": 7470 }, { "epoch": 0.55, "learning_rate": 2.484872761076291e-06, "logits/chosen": -2.2209267616271973, "logits/rejected": -1.9001041650772095, "logps/chosen": -434.83917236328125, "logps/rejected": -600.1011962890625, "loss": 0.6769, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.282509982585907, "rewards/margins": 0.19113841652870178, "rewards/rejected": -0.47364839911460876, "step": 7480 }, { "epoch": 0.55, "learning_rate": 2.4784357739517897e-06, "logits/chosen": -2.057058095932007, "logits/rejected": -1.6008039712905884, "logps/chosen": -460.9122009277344, "logps/rejected": -606.9761962890625, "loss": 0.6833, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.30122846364974976, "rewards/margins": 0.17729897797107697, "rewards/rejected": -0.4785274565219879, "step": 7490 }, { "epoch": 0.55, "learning_rate": 2.4719989297965007e-06, "logits/chosen": -2.1880087852478027, "logits/rejected": -1.5513215065002441, "logps/chosen": -424.7400817871094, "logps/rejected": -578.949462890625, "loss": 0.6825, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.26308494806289673, "rewards/margins": 0.19034478068351746, "rewards/rejected": -0.4534296989440918, "step": 7500 }, { "epoch": 0.55, "learning_rate": 2.4655622712862236e-06, "logits/chosen": -2.239716053009033, "logits/rejected": -1.6814515590667725, "logps/chosen": -422.30218505859375, "logps/rejected": -625.7264404296875, "loss": 0.68, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.264881432056427, "rewards/margins": 0.24729108810424805, "rewards/rejected": -0.512172520160675, "step": 7510 }, { "epoch": 0.55, "learning_rate": 2.4591258410955232e-06, "logits/chosen": -2.1848855018615723, "logits/rejected": -1.8482431173324585, "logps/chosen": -416.05352783203125, "logps/rejected": -543.830810546875, "loss": 0.6752, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24782809615135193, "rewards/margins": 0.16546115279197693, "rewards/rejected": -0.41328924894332886, "step": 7520 }, { "epoch": 0.56, "learning_rate": 2.4526896818974534e-06, "logits/chosen": -1.9687957763671875, "logits/rejected": -1.4542386531829834, "logps/chosen": -510.9009704589844, "logps/rejected": -676.2137451171875, "loss": 0.6771, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.33839109539985657, "rewards/margins": 0.22968058288097382, "rewards/rejected": -0.568071722984314, "step": 7530 }, { "epoch": 0.56, "learning_rate": 2.4462538363632696e-06, "logits/chosen": -2.1587696075439453, "logits/rejected": -1.7123725414276123, "logps/chosen": -503.14764404296875, "logps/rejected": -659.0988159179688, "loss": 0.6779, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29197198152542114, "rewards/margins": 0.17944850027561188, "rewards/rejected": -0.4714204668998718, "step": 7540 }, { "epoch": 0.56, "learning_rate": 2.4398183471621484e-06, "logits/chosen": -2.1726229190826416, "logits/rejected": -1.7985550165176392, "logps/chosen": -511.0303649902344, "logps/rejected": -591.9874267578125, "loss": 0.6805, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.28733617067337036, "rewards/margins": 0.15286603569984436, "rewards/rejected": -0.44020217657089233, "step": 7550 }, { "epoch": 0.56, "learning_rate": 2.433383256960905e-06, "logits/chosen": -2.254035472869873, "logits/rejected": -1.8797006607055664, "logps/chosen": -454.476806640625, "logps/rejected": -540.177734375, "loss": 0.6832, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28132039308547974, "rewards/margins": 0.13120272755622864, "rewards/rejected": -0.4125231206417084, "step": 7560 }, { "epoch": 0.56, "learning_rate": 2.4269486084237085e-06, "logits/chosen": -2.1689560413360596, "logits/rejected": -1.6921398639678955, "logps/chosen": -529.3504638671875, "logps/rejected": -688.5715942382812, "loss": 0.6773, "rewards/accuracies": 0.75, "rewards/chosen": -0.3533709645271301, "rewards/margins": 0.17857898771762848, "rewards/rejected": -0.5319499373435974, "step": 7570 }, { "epoch": 0.56, "learning_rate": 2.420514444211799e-06, "logits/chosen": -1.9442402124404907, "logits/rejected": -1.5149413347244263, "logps/chosen": -446.07177734375, "logps/rejected": -571.2734375, "loss": 0.6831, "rewards/accuracies": 0.75, "rewards/chosen": -0.28415581583976746, "rewards/margins": 0.16047830879688263, "rewards/rejected": -0.4446341097354889, "step": 7580 }, { "epoch": 0.56, "learning_rate": 2.414080806983206e-06, "logits/chosen": -1.9990017414093018, "logits/rejected": -1.2321110963821411, "logps/chosen": -413.74468994140625, "logps/rejected": -656.4811401367188, "loss": 0.6711, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.25327855348587036, "rewards/margins": 0.2990204393863678, "rewards/rejected": -0.552298903465271, "step": 7590 }, { "epoch": 0.56, "learning_rate": 2.4076477393924656e-06, "logits/chosen": -2.2626166343688965, "logits/rejected": -1.8366540670394897, "logps/chosen": -472.3023986816406, "logps/rejected": -587.825927734375, "loss": 0.6783, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2653431296348572, "rewards/margins": 0.17129996418952942, "rewards/rejected": -0.436643123626709, "step": 7600 }, { "epoch": 0.56, "learning_rate": 2.4012152840903373e-06, "logits/chosen": -1.892343282699585, "logits/rejected": -1.6760470867156982, "logps/chosen": -417.14166259765625, "logps/rejected": -518.3829956054688, "loss": 0.6856, "rewards/accuracies": 0.625, "rewards/chosen": -0.28483954071998596, "rewards/margins": 0.13973651826381683, "rewards/rejected": -0.4245761036872864, "step": 7610 }, { "epoch": 0.56, "learning_rate": 2.3947834837235197e-06, "logits/chosen": -2.0921719074249268, "logits/rejected": -1.6278207302093506, "logps/chosen": -534.2725830078125, "logps/rejected": -726.3082275390625, "loss": 0.6766, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.33820784091949463, "rewards/margins": 0.24165387451648712, "rewards/rejected": -0.5798617601394653, "step": 7620 }, { "epoch": 0.56, "learning_rate": 2.3883523809343713e-06, "logits/chosen": -1.874607801437378, "logits/rejected": -1.5937249660491943, "logps/chosen": -513.0270385742188, "logps/rejected": -671.2501831054688, "loss": 0.6778, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.377322256565094, "rewards/margins": 0.18434211611747742, "rewards/rejected": -0.561664342880249, "step": 7630 }, { "epoch": 0.56, "learning_rate": 2.381922018360624e-06, "logits/chosen": -1.9249584674835205, "logits/rejected": -1.6446644067764282, "logps/chosen": -527.031494140625, "logps/rejected": -647.5135498046875, "loss": 0.6833, "rewards/accuracies": 0.625, "rewards/chosen": -0.3647117614746094, "rewards/margins": 0.13108450174331665, "rewards/rejected": -0.49579620361328125, "step": 7640 }, { "epoch": 0.56, "learning_rate": 2.3754924386351043e-06, "logits/chosen": -1.977705955505371, "logits/rejected": -1.6040191650390625, "logps/chosen": -469.61651611328125, "logps/rejected": -690.3351440429688, "loss": 0.6784, "rewards/accuracies": 0.75, "rewards/chosen": -0.3473663926124573, "rewards/margins": 0.20949387550354004, "rewards/rejected": -0.5568603277206421, "step": 7650 }, { "epoch": 0.57, "learning_rate": 2.369063684385445e-06, "logits/chosen": -2.1457715034484863, "logits/rejected": -1.6712385416030884, "logps/chosen": -439.31243896484375, "logps/rejected": -577.7984619140625, "loss": 0.676, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28092795610427856, "rewards/margins": 0.17725345492362976, "rewards/rejected": -0.45818138122558594, "step": 7660 }, { "epoch": 0.57, "learning_rate": 2.3626357982338104e-06, "logits/chosen": -1.9485371112823486, "logits/rejected": -1.475673794746399, "logps/chosen": -435.11663818359375, "logps/rejected": -567.3678588867188, "loss": 0.676, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2722295820713043, "rewards/margins": 0.20005419850349426, "rewards/rejected": -0.4722837507724762, "step": 7670 }, { "epoch": 0.57, "learning_rate": 2.356208822796606e-06, "logits/chosen": -2.2723746299743652, "logits/rejected": -1.8798434734344482, "logps/chosen": -450.267333984375, "logps/rejected": -617.4276123046875, "loss": 0.6809, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2813589572906494, "rewards/margins": 0.17462356388568878, "rewards/rejected": -0.4559825360774994, "step": 7680 }, { "epoch": 0.57, "learning_rate": 2.3497828006841996e-06, "logits/chosen": -2.0665946006774902, "logits/rejected": -1.4528915882110596, "logps/chosen": -449.54986572265625, "logps/rejected": -667.0205078125, "loss": 0.6793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29334965348243713, "rewards/margins": 0.22289302945137024, "rewards/rejected": -0.5162426233291626, "step": 7690 }, { "epoch": 0.57, "learning_rate": 2.3433577745006403e-06, "logits/chosen": -1.9263999462127686, "logits/rejected": -1.4509209394454956, "logps/chosen": -466.7601623535156, "logps/rejected": -689.8322143554688, "loss": 0.6751, "rewards/accuracies": 0.75, "rewards/chosen": -0.28910601139068604, "rewards/margins": 0.23887066543102264, "rewards/rejected": -0.5279766321182251, "step": 7700 }, { "epoch": 0.57, "learning_rate": 2.3369337868433737e-06, "logits/chosen": -2.1502795219421387, "logits/rejected": -1.9676593542099, "logps/chosen": -590.5692138671875, "logps/rejected": -663.5794067382812, "loss": 0.6823, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.43590861558914185, "rewards/margins": 0.09052357077598572, "rewards/rejected": -0.5264321565628052, "step": 7710 }, { "epoch": 0.57, "learning_rate": 2.3305108803029587e-06, "logits/chosen": -1.965219497680664, "logits/rejected": -1.3955570459365845, "logps/chosen": -547.7435302734375, "logps/rejected": -792.3318481445312, "loss": 0.6748, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.37443357706069946, "rewards/margins": 0.2661077380180359, "rewards/rejected": -0.6405412554740906, "step": 7720 }, { "epoch": 0.57, "learning_rate": 2.324089097462787e-06, "logits/chosen": -2.0148909091949463, "logits/rejected": -1.7183008193969727, "logps/chosen": -410.742919921875, "logps/rejected": -529.8191528320312, "loss": 0.6819, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2934989333152771, "rewards/margins": 0.14522160589694977, "rewards/rejected": -0.4387205243110657, "step": 7730 }, { "epoch": 0.57, "learning_rate": 2.3176684808988026e-06, "logits/chosen": -2.102908134460449, "logits/rejected": -1.6098053455352783, "logps/chosen": -402.6458435058594, "logps/rejected": -635.7747192382812, "loss": 0.671, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2713346481323242, "rewards/margins": 0.25147488713264465, "rewards/rejected": -0.5228095650672913, "step": 7740 }, { "epoch": 0.57, "learning_rate": 2.3112490731792144e-06, "logits/chosen": -1.8601539134979248, "logits/rejected": -1.3782565593719482, "logps/chosen": -479.92559814453125, "logps/rejected": -638.2959594726562, "loss": 0.6771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33395010232925415, "rewards/margins": 0.20877328515052795, "rewards/rejected": -0.5427233576774597, "step": 7750 }, { "epoch": 0.57, "learning_rate": 2.304830916864217e-06, "logits/chosen": -2.3789150714874268, "logits/rejected": -1.7892115116119385, "logps/chosen": -530.3417358398438, "logps/rejected": -702.1229248046875, "loss": 0.6766, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3105645775794983, "rewards/margins": 0.23933200538158417, "rewards/rejected": -0.5498965978622437, "step": 7760 }, { "epoch": 0.57, "learning_rate": 2.298414054505709e-06, "logits/chosen": -1.9688758850097656, "logits/rejected": -1.4064290523529053, "logps/chosen": -547.957763671875, "logps/rejected": -753.4075317382812, "loss": 0.6738, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3985818922519684, "rewards/margins": 0.2431013137102127, "rewards/rejected": -0.6416832208633423, "step": 7770 }, { "epoch": 0.57, "learning_rate": 2.291998528647011e-06, "logits/chosen": -2.2289326190948486, "logits/rejected": -1.8484742641448975, "logps/chosen": -563.22998046875, "logps/rejected": -676.5137939453125, "loss": 0.6812, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3945203125476837, "rewards/margins": 0.15513072907924652, "rewards/rejected": -0.549651026725769, "step": 7780 }, { "epoch": 0.57, "learning_rate": 2.285584381822581e-06, "logits/chosen": -1.892163634300232, "logits/rejected": -1.5788098573684692, "logps/chosen": -495.5595703125, "logps/rejected": -655.3743896484375, "loss": 0.6792, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3091723322868347, "rewards/margins": 0.19917593896389008, "rewards/rejected": -0.508348286151886, "step": 7790 }, { "epoch": 0.58, "learning_rate": 2.279171656557735e-06, "logits/chosen": -2.240032434463501, "logits/rejected": -1.7236493825912476, "logps/chosen": -430.5721740722656, "logps/rejected": -671.6305541992188, "loss": 0.6795, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3006006181240082, "rewards/margins": 0.24138769507408142, "rewards/rejected": -0.5419883728027344, "step": 7800 }, { "epoch": 0.58, "learning_rate": 2.2727603953683646e-06, "logits/chosen": -2.1183719635009766, "logits/rejected": -1.562369704246521, "logps/chosen": -538.8944091796875, "logps/rejected": -687.8453369140625, "loss": 0.6806, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3631441295146942, "rewards/margins": 0.1845545768737793, "rewards/rejected": -0.5476986765861511, "step": 7810 }, { "epoch": 0.58, "learning_rate": 2.266350640760652e-06, "logits/chosen": -2.2111916542053223, "logits/rejected": -1.6609876155853271, "logps/chosen": -424.4498596191406, "logps/rejected": -648.7020263671875, "loss": 0.6806, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24201369285583496, "rewards/margins": 0.25483086705207825, "rewards/rejected": -0.4968445897102356, "step": 7820 }, { "epoch": 0.58, "learning_rate": 2.2599424352307958e-06, "logits/chosen": -2.1762137413024902, "logits/rejected": -1.7203487157821655, "logps/chosen": -519.88330078125, "logps/rejected": -653.2562255859375, "loss": 0.676, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3369320034980774, "rewards/margins": 0.17336204648017883, "rewards/rejected": -0.5102940797805786, "step": 7830 }, { "epoch": 0.58, "learning_rate": 2.25353582126472e-06, "logits/chosen": -2.0267105102539062, "logits/rejected": -1.5509897470474243, "logps/chosen": -499.29132080078125, "logps/rejected": -741.1796875, "loss": 0.6743, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3182995617389679, "rewards/margins": 0.24940040707588196, "rewards/rejected": -0.5677000284194946, "step": 7840 }, { "epoch": 0.58, "learning_rate": 2.2471308413377993e-06, "logits/chosen": -2.106626033782959, "logits/rejected": -1.341158151626587, "logps/chosen": -455.56610107421875, "logps/rejected": -672.9942626953125, "loss": 0.6745, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.290461003780365, "rewards/margins": 0.26254141330718994, "rewards/rejected": -0.5530023574829102, "step": 7850 }, { "epoch": 0.58, "learning_rate": 2.2407275379145722e-06, "logits/chosen": -2.0300238132476807, "logits/rejected": -1.4886293411254883, "logps/chosen": -522.2025146484375, "logps/rejected": -590.2479248046875, "loss": 0.6802, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.268623024225235, "rewards/margins": 0.171607106924057, "rewards/rejected": -0.44023019075393677, "step": 7860 }, { "epoch": 0.58, "learning_rate": 2.2343259534484645e-06, "logits/chosen": -2.1563830375671387, "logits/rejected": -1.6328656673431396, "logps/chosen": -451.8204040527344, "logps/rejected": -634.4393310546875, "loss": 0.6763, "rewards/accuracies": 0.75, "rewards/chosen": -0.2719835340976715, "rewards/margins": 0.20909950137138367, "rewards/rejected": -0.48108306527137756, "step": 7870 }, { "epoch": 0.58, "learning_rate": 2.2279261303815053e-06, "logits/chosen": -2.0048987865448, "logits/rejected": -1.3335803747177124, "logps/chosen": -461.057861328125, "logps/rejected": -682.39404296875, "loss": 0.668, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.30671852827072144, "rewards/margins": 0.25867098569869995, "rewards/rejected": -0.5653895139694214, "step": 7880 }, { "epoch": 0.58, "learning_rate": 2.2215281111440433e-06, "logits/chosen": -2.2879278659820557, "logits/rejected": -1.7321062088012695, "logps/chosen": -469.8922424316406, "logps/rejected": -572.0150756835938, "loss": 0.6847, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2747981548309326, "rewards/margins": 0.17217722535133362, "rewards/rejected": -0.44697538018226624, "step": 7890 }, { "epoch": 0.58, "learning_rate": 2.2151319381544706e-06, "logits/chosen": -2.1068711280822754, "logits/rejected": -1.5348026752471924, "logps/chosen": -555.7792358398438, "logps/rejected": -727.58056640625, "loss": 0.6745, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.39786940813064575, "rewards/margins": 0.19029931724071503, "rewards/rejected": -0.588168740272522, "step": 7900 }, { "epoch": 0.58, "learning_rate": 2.2087376538189375e-06, "logits/chosen": -2.2277214527130127, "logits/rejected": -1.8282978534698486, "logps/chosen": -483.4705505371094, "logps/rejected": -668.34521484375, "loss": 0.6794, "rewards/accuracies": 0.625, "rewards/chosen": -0.2981809377670288, "rewards/margins": 0.23100194334983826, "rewards/rejected": -0.5291829109191895, "step": 7910 }, { "epoch": 0.58, "learning_rate": 2.2023453005310736e-06, "logits/chosen": -2.080200672149658, "logits/rejected": -1.6236766576766968, "logps/chosen": -546.3409423828125, "logps/rejected": -708.7432861328125, "loss": 0.6814, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.34953397512435913, "rewards/margins": 0.17816361784934998, "rewards/rejected": -0.5276976227760315, "step": 7920 }, { "epoch": 0.58, "learning_rate": 2.195954920671706e-06, "logits/chosen": -1.869873046875, "logits/rejected": -1.2895960807800293, "logps/chosen": -521.0703125, "logps/rejected": -724.6827392578125, "loss": 0.6782, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.39181002974510193, "rewards/margins": 0.22316694259643555, "rewards/rejected": -0.6149770021438599, "step": 7930 }, { "epoch": 0.59, "learning_rate": 2.1895665566085762e-06, "logits/chosen": -2.1078152656555176, "logits/rejected": -1.6348276138305664, "logps/chosen": -631.7286376953125, "logps/rejected": -797.3660888671875, "loss": 0.6775, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4765382707118988, "rewards/margins": 0.16831856966018677, "rewards/rejected": -0.6448568105697632, "step": 7940 }, { "epoch": 0.59, "learning_rate": 2.183180250696062e-06, "logits/chosen": -1.7846415042877197, "logits/rejected": -1.240990400314331, "logps/chosen": -690.4173583984375, "logps/rejected": -845.6204223632812, "loss": 0.6791, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5348193645477295, "rewards/margins": 0.1724463552236557, "rewards/rejected": -0.7072657346725464, "step": 7950 }, { "epoch": 0.59, "learning_rate": 2.176796045274897e-06, "logits/chosen": -2.015679121017456, "logits/rejected": -1.5273412466049194, "logps/chosen": -622.2345581054688, "logps/rejected": -744.7322387695312, "loss": 0.683, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5083571672439575, "rewards/margins": 0.15073852241039276, "rewards/rejected": -0.6590956449508667, "step": 7960 }, { "epoch": 0.59, "learning_rate": 2.1704139826718874e-06, "logits/chosen": -1.849808931350708, "logits/rejected": -1.3269693851470947, "logps/chosen": -541.0936279296875, "logps/rejected": -777.6746215820312, "loss": 0.6725, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4091033935546875, "rewards/margins": 0.2589491903781891, "rewards/rejected": -0.6680525541305542, "step": 7970 }, { "epoch": 0.59, "learning_rate": 2.1640341051996333e-06, "logits/chosen": -2.2540132999420166, "logits/rejected": -1.7319358587265015, "logps/chosen": -575.1923828125, "logps/rejected": -717.103515625, "loss": 0.6774, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3421255946159363, "rewards/margins": 0.19650062918663025, "rewards/rejected": -0.5386263132095337, "step": 7980 }, { "epoch": 0.59, "learning_rate": 2.157656455156247e-06, "logits/chosen": -1.9750601053237915, "logits/rejected": -1.4771560430526733, "logps/chosen": -541.2092895507812, "logps/rejected": -663.3555297851562, "loss": 0.6836, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.40831026434898376, "rewards/margins": 0.16455496847629547, "rewards/rejected": -0.5728652477264404, "step": 7990 }, { "epoch": 0.59, "learning_rate": 2.1512810748250726e-06, "logits/chosen": -1.8454818725585938, "logits/rejected": -1.3079173564910889, "logps/chosen": -507.376953125, "logps/rejected": -728.03369140625, "loss": 0.6782, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3925742208957672, "rewards/margins": 0.2309766560792923, "rewards/rejected": -0.6235508918762207, "step": 8000 }, { "epoch": 0.59, "learning_rate": 2.1449080064744088e-06, "logits/chosen": -1.9378257989883423, "logits/rejected": -1.4639049768447876, "logps/chosen": -585.83642578125, "logps/rejected": -739.3861083984375, "loss": 0.6757, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.44874677062034607, "rewards/margins": 0.1672806441783905, "rewards/rejected": -0.6160274744033813, "step": 8010 }, { "epoch": 0.59, "learning_rate": 2.1385372923572236e-06, "logits/chosen": -1.90792715549469, "logits/rejected": -1.3686811923980713, "logps/chosen": -661.677001953125, "logps/rejected": -829.2757568359375, "loss": 0.6756, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5138466954231262, "rewards/margins": 0.20868274569511414, "rewards/rejected": -0.7225293517112732, "step": 8020 }, { "epoch": 0.59, "learning_rate": 2.132168974710877e-06, "logits/chosen": -2.0940182209014893, "logits/rejected": -1.4986727237701416, "logps/chosen": -572.5186157226562, "logps/rejected": -752.10595703125, "loss": 0.6792, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4013442397117615, "rewards/margins": 0.21362653374671936, "rewards/rejected": -0.6149707436561584, "step": 8030 }, { "epoch": 0.59, "learning_rate": 2.125803095756841e-06, "logits/chosen": -1.909919023513794, "logits/rejected": -1.218888521194458, "logps/chosen": -501.70263671875, "logps/rejected": -705.8619384765625, "loss": 0.6734, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.33785131573677063, "rewards/margins": 0.25428083539009094, "rewards/rejected": -0.5921322107315063, "step": 8040 }, { "epoch": 0.59, "learning_rate": 2.1194396977004186e-06, "logits/chosen": -2.1691782474517822, "logits/rejected": -1.552106261253357, "logps/chosen": -622.7073364257812, "logps/rejected": -759.9678344726562, "loss": 0.675, "rewards/accuracies": 0.75, "rewards/chosen": -0.401961088180542, "rewards/margins": 0.21225014328956604, "rewards/rejected": -0.6142112016677856, "step": 8050 }, { "epoch": 0.59, "learning_rate": 2.113078822730466e-06, "logits/chosen": -2.1061816215515137, "logits/rejected": -1.7511653900146484, "logps/chosen": -506.0799865722656, "logps/rejected": -632.4630737304688, "loss": 0.6829, "rewards/accuracies": 0.75, "rewards/chosen": -0.3587154150009155, "rewards/margins": 0.13756349682807922, "rewards/rejected": -0.4962790012359619, "step": 8060 }, { "epoch": 0.6, "learning_rate": 2.10672051301911e-06, "logits/chosen": -1.833169937133789, "logits/rejected": -1.358595609664917, "logps/chosen": -507.90130615234375, "logps/rejected": -735.8176879882812, "loss": 0.6785, "rewards/accuracies": 0.75, "rewards/chosen": -0.38867220282554626, "rewards/margins": 0.21962964534759521, "rewards/rejected": -0.6083018779754639, "step": 8070 }, { "epoch": 0.6, "learning_rate": 2.1003648107214703e-06, "logits/chosen": -2.085820198059082, "logits/rejected": -1.429323673248291, "logps/chosen": -499.8623046875, "logps/rejected": -678.4613037109375, "loss": 0.6759, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3138812184333801, "rewards/margins": 0.22299961745738983, "rewards/rejected": -0.5368808507919312, "step": 8080 }, { "epoch": 0.6, "learning_rate": 2.094011757975381e-06, "logits/chosen": -2.329697847366333, "logits/rejected": -1.5335979461669922, "logps/chosen": -514.0547485351562, "logps/rejected": -700.3912353515625, "loss": 0.6748, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3180443346500397, "rewards/margins": 0.26021474599838257, "rewards/rejected": -0.5782590508460999, "step": 8090 }, { "epoch": 0.6, "learning_rate": 2.087661396901108e-06, "logits/chosen": -2.174964189529419, "logits/rejected": -1.7118914127349854, "logps/chosen": -449.630859375, "logps/rejected": -596.5282592773438, "loss": 0.6802, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3339011073112488, "rewards/margins": 0.16550086438655853, "rewards/rejected": -0.4994019865989685, "step": 8100 }, { "epoch": 0.6, "learning_rate": 2.0813137696010725e-06, "logits/chosen": -1.9349448680877686, "logits/rejected": -1.394268274307251, "logps/chosen": -516.7147216796875, "logps/rejected": -685.6044311523438, "loss": 0.6853, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.35992860794067383, "rewards/margins": 0.1818605363368988, "rewards/rejected": -0.541789174079895, "step": 8110 }, { "epoch": 0.6, "learning_rate": 2.074968918159571e-06, "logits/chosen": -2.0005617141723633, "logits/rejected": -1.5502302646636963, "logps/chosen": -539.0485229492188, "logps/rejected": -692.4366455078125, "loss": 0.6792, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3937462270259857, "rewards/margins": 0.18345214426517487, "rewards/rejected": -0.5771983861923218, "step": 8120 }, { "epoch": 0.6, "learning_rate": 2.0686268846424966e-06, "logits/chosen": -1.7994954586029053, "logits/rejected": -1.3098781108856201, "logps/chosen": -607.9790649414062, "logps/rejected": -786.0548095703125, "loss": 0.6752, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.43818044662475586, "rewards/margins": 0.21330749988555908, "rewards/rejected": -0.6514879465103149, "step": 8130 }, { "epoch": 0.6, "learning_rate": 2.0622877110970583e-06, "logits/chosen": -2.028615951538086, "logits/rejected": -1.5296269655227661, "logps/chosen": -508.47576904296875, "logps/rejected": -674.515380859375, "loss": 0.6756, "rewards/accuracies": 0.75, "rewards/chosen": -0.3677090108394623, "rewards/margins": 0.1973150223493576, "rewards/rejected": -0.5650240778923035, "step": 8140 }, { "epoch": 0.6, "learning_rate": 2.0559514395515053e-06, "logits/chosen": -1.731978178024292, "logits/rejected": -1.4275281429290771, "logps/chosen": -463.19818115234375, "logps/rejected": -675.4525146484375, "loss": 0.6751, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34338125586509705, "rewards/margins": 0.23216405510902405, "rewards/rejected": -0.5755452513694763, "step": 8150 }, { "epoch": 0.6, "learning_rate": 2.0496181120148463e-06, "logits/chosen": -2.1701736450195312, "logits/rejected": -1.7458528280258179, "logps/chosen": -415.2158203125, "logps/rejected": -582.6925659179688, "loss": 0.6782, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.284740149974823, "rewards/margins": 0.17542767524719238, "rewards/rejected": -0.46016788482666016, "step": 8160 }, { "epoch": 0.6, "learning_rate": 2.04328777047657e-06, "logits/chosen": -1.9613335132598877, "logits/rejected": -1.5481268167495728, "logps/chosen": -490.860595703125, "logps/rejected": -673.3078002929688, "loss": 0.6826, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.30566927790641785, "rewards/margins": 0.21540410816669464, "rewards/rejected": -0.5210734009742737, "step": 8170 }, { "epoch": 0.6, "learning_rate": 2.0369604569063717e-06, "logits/chosen": -1.954423189163208, "logits/rejected": -1.5023324489593506, "logps/chosen": -483.6360778808594, "logps/rejected": -640.9954833984375, "loss": 0.6804, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35478639602661133, "rewards/margins": 0.19283735752105713, "rewards/rejected": -0.5476237535476685, "step": 8180 }, { "epoch": 0.6, "learning_rate": 2.0306362132538677e-06, "logits/chosen": -2.188856363296509, "logits/rejected": -1.6829099655151367, "logps/chosen": -563.6106567382812, "logps/rejected": -635.5179443359375, "loss": 0.6785, "rewards/accuracies": 0.625, "rewards/chosen": -0.31449928879737854, "rewards/margins": 0.1477324217557907, "rewards/rejected": -0.46223172545433044, "step": 8190 }, { "epoch": 0.6, "learning_rate": 2.0243150814483237e-06, "logits/chosen": -2.215224504470825, "logits/rejected": -1.4651850461959839, "logps/chosen": -441.3622131347656, "logps/rejected": -622.0687255859375, "loss": 0.6788, "rewards/accuracies": 0.625, "rewards/chosen": -0.2982582449913025, "rewards/margins": 0.2189064472913742, "rewards/rejected": -0.5171645879745483, "step": 8200 }, { "epoch": 0.61, "learning_rate": 2.0179971033983716e-06, "logits/chosen": -2.005624532699585, "logits/rejected": -1.6110576391220093, "logps/chosen": -624.331298828125, "logps/rejected": -703.737548828125, "loss": 0.6792, "rewards/accuracies": 0.625, "rewards/chosen": -0.44104576110839844, "rewards/margins": 0.14136536419391632, "rewards/rejected": -0.5824111104011536, "step": 8210 }, { "epoch": 0.61, "learning_rate": 2.0116823209917362e-06, "logits/chosen": -2.1745638847351074, "logits/rejected": -1.702660322189331, "logps/chosen": -439.1360778808594, "logps/rejected": -594.128662109375, "loss": 0.6749, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27793020009994507, "rewards/margins": 0.20333930850028992, "rewards/rejected": -0.4812694489955902, "step": 8220 }, { "epoch": 0.61, "learning_rate": 2.0053707760949547e-06, "logits/chosen": -1.9972078800201416, "logits/rejected": -1.5870620012283325, "logps/chosen": -597.4382934570312, "logps/rejected": -768.1552124023438, "loss": 0.6785, "rewards/accuracies": 0.75, "rewards/chosen": -0.4471694827079773, "rewards/margins": 0.18795013427734375, "rewards/rejected": -0.635119616985321, "step": 8230 }, { "epoch": 0.61, "learning_rate": 1.9990625105530996e-06, "logits/chosen": -1.785820722579956, "logits/rejected": -1.5524976253509521, "logps/chosen": -534.7361450195312, "logps/rejected": -706.7614135742188, "loss": 0.6776, "rewards/accuracies": 0.625, "rewards/chosen": -0.3989560008049011, "rewards/margins": 0.17877987027168274, "rewards/rejected": -0.5777358412742615, "step": 8240 }, { "epoch": 0.61, "learning_rate": 1.992757566189502e-06, "logits/chosen": -2.060936450958252, "logits/rejected": -1.56722092628479, "logps/chosen": -466.3275451660156, "logps/rejected": -671.6610107421875, "loss": 0.6757, "rewards/accuracies": 0.75, "rewards/chosen": -0.3279922306537628, "rewards/margins": 0.2292335331439972, "rewards/rejected": -0.55722576379776, "step": 8250 }, { "epoch": 0.61, "learning_rate": 1.986455984805473e-06, "logits/chosen": -1.8592472076416016, "logits/rejected": -1.518502950668335, "logps/chosen": -487.41693115234375, "logps/rejected": -645.44140625, "loss": 0.6805, "rewards/accuracies": 0.625, "rewards/chosen": -0.34162309765815735, "rewards/margins": 0.19449517130851746, "rewards/rejected": -0.5361182689666748, "step": 8260 }, { "epoch": 0.61, "learning_rate": 1.9801578081800283e-06, "logits/chosen": -2.140925168991089, "logits/rejected": -1.6329892873764038, "logps/chosen": -511.2369079589844, "logps/rejected": -655.4305419921875, "loss": 0.6769, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3502499461174011, "rewards/margins": 0.18955886363983154, "rewards/rejected": -0.5398088097572327, "step": 8270 }, { "epoch": 0.61, "learning_rate": 1.973863078069611e-06, "logits/chosen": -2.0312740802764893, "logits/rejected": -1.574578046798706, "logps/chosen": -357.6878662109375, "logps/rejected": -520.335205078125, "loss": 0.6769, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17532813549041748, "rewards/margins": 0.21763649582862854, "rewards/rejected": -0.39296460151672363, "step": 8280 }, { "epoch": 0.61, "learning_rate": 1.9675718362078114e-06, "logits/chosen": -2.105140447616577, "logits/rejected": -1.691868782043457, "logps/chosen": -414.1922912597656, "logps/rejected": -634.86181640625, "loss": 0.6739, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2745841443538666, "rewards/margins": 0.24204353988170624, "rewards/rejected": -0.5166277289390564, "step": 8290 }, { "epoch": 0.61, "learning_rate": 1.9612841243050955e-06, "logits/chosen": -1.885363221168518, "logits/rejected": -1.314954161643982, "logps/chosen": -541.5125732421875, "logps/rejected": -606.4385986328125, "loss": 0.6789, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3317708373069763, "rewards/margins": 0.16661317646503448, "rewards/rejected": -0.4983840584754944, "step": 8300 }, { "epoch": 0.61, "learning_rate": 1.9549999840485255e-06, "logits/chosen": -1.9550626277923584, "logits/rejected": -1.5534899234771729, "logps/chosen": -629.6906127929688, "logps/rejected": -738.1923828125, "loss": 0.683, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4464048445224762, "rewards/margins": 0.15508344769477844, "rewards/rejected": -0.6014882326126099, "step": 8310 }, { "epoch": 0.61, "learning_rate": 1.948719457101483e-06, "logits/chosen": -2.0684292316436768, "logits/rejected": -1.3685914278030396, "logps/chosen": -441.43756103515625, "logps/rejected": -661.0225830078125, "loss": 0.6724, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2838292419910431, "rewards/margins": 0.23715916275978088, "rewards/rejected": -0.520988404750824, "step": 8320 }, { "epoch": 0.61, "learning_rate": 1.9424425851033935e-06, "logits/chosen": -2.427685260772705, "logits/rejected": -1.7533296346664429, "logps/chosen": -462.6610412597656, "logps/rejected": -661.0872192382812, "loss": 0.6697, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2887974679470062, "rewards/margins": 0.24754473567008972, "rewards/rejected": -0.536342203617096, "step": 8330 }, { "epoch": 0.62, "learning_rate": 1.9361694096694513e-06, "logits/chosen": -2.144118070602417, "logits/rejected": -1.6589282751083374, "logps/chosen": -476.35418701171875, "logps/rejected": -594.5903930664062, "loss": 0.6778, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3137776553630829, "rewards/margins": 0.14993500709533691, "rewards/rejected": -0.4637126922607422, "step": 8340 }, { "epoch": 0.62, "learning_rate": 1.9298999723903423e-06, "logits/chosen": -2.0943989753723145, "logits/rejected": -1.5682957172393799, "logps/chosen": -541.3721923828125, "logps/rejected": -703.0413818359375, "loss": 0.6742, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.39887529611587524, "rewards/margins": 0.20586642622947693, "rewards/rejected": -0.6047416925430298, "step": 8350 }, { "epoch": 0.62, "learning_rate": 1.923634314831969e-06, "logits/chosen": -1.9454536437988281, "logits/rejected": -1.4637258052825928, "logps/chosen": -601.1502685546875, "logps/rejected": -772.9757080078125, "loss": 0.6787, "rewards/accuracies": 0.75, "rewards/chosen": -0.4042777419090271, "rewards/margins": 0.19957086443901062, "rewards/rejected": -0.6038486361503601, "step": 8360 }, { "epoch": 0.62, "learning_rate": 1.9173724785351753e-06, "logits/chosen": -2.022599458694458, "logits/rejected": -1.4935886859893799, "logps/chosen": -562.4918212890625, "logps/rejected": -729.6209106445312, "loss": 0.6825, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.41400986909866333, "rewards/margins": 0.1915901154279709, "rewards/rejected": -0.6055999994277954, "step": 8370 }, { "epoch": 0.62, "learning_rate": 1.911114505015469e-06, "logits/chosen": -2.120302438735962, "logits/rejected": -1.545533299446106, "logps/chosen": -554.5655517578125, "logps/rejected": -794.4661865234375, "loss": 0.6762, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3831387162208557, "rewards/margins": 0.2650979459285736, "rewards/rejected": -0.6482366323471069, "step": 8380 }, { "epoch": 0.62, "learning_rate": 1.9048604357627482e-06, "logits/chosen": -1.9771219491958618, "logits/rejected": -1.4530855417251587, "logps/chosen": -526.910400390625, "logps/rejected": -719.98974609375, "loss": 0.68, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.38542062044143677, "rewards/margins": 0.22485867142677307, "rewards/rejected": -0.6102792024612427, "step": 8390 }, { "epoch": 0.62, "learning_rate": 1.8986103122410269e-06, "logits/chosen": -2.1152100563049316, "logits/rejected": -1.6334564685821533, "logps/chosen": -453.5707092285156, "logps/rejected": -602.698974609375, "loss": 0.6755, "rewards/accuracies": 0.75, "rewards/chosen": -0.3008975386619568, "rewards/margins": 0.21015533804893494, "rewards/rejected": -0.5110529065132141, "step": 8400 }, { "epoch": 0.62, "learning_rate": 1.892364175888159e-06, "logits/chosen": -2.116184711456299, "logits/rejected": -1.5065926313400269, "logps/chosen": -442.2398376464844, "logps/rejected": -640.2100830078125, "loss": 0.6764, "rewards/accuracies": 0.75, "rewards/chosen": -0.27739399671554565, "rewards/margins": 0.2572004795074463, "rewards/rejected": -0.5345944762229919, "step": 8410 }, { "epoch": 0.62, "learning_rate": 1.886122068115562e-06, "logits/chosen": -2.0967636108398438, "logits/rejected": -1.727539300918579, "logps/chosen": -490.9716796875, "logps/rejected": -655.4788818359375, "loss": 0.6796, "rewards/accuracies": 0.75, "rewards/chosen": -0.33933085203170776, "rewards/margins": 0.18752822279930115, "rewards/rejected": -0.5268591046333313, "step": 8420 }, { "epoch": 0.62, "learning_rate": 1.879884030307947e-06, "logits/chosen": -2.1316399574279785, "logits/rejected": -1.639212965965271, "logps/chosen": -490.1263122558594, "logps/rejected": -682.0147094726562, "loss": 0.6834, "rewards/accuracies": 0.875, "rewards/chosen": -0.2999844253063202, "rewards/margins": 0.21972206234931946, "rewards/rejected": -0.5197064876556396, "step": 8430 }, { "epoch": 0.62, "learning_rate": 1.8736501038230392e-06, "logits/chosen": -2.088714361190796, "logits/rejected": -1.504397988319397, "logps/chosen": -556.993408203125, "logps/rejected": -676.5147705078125, "loss": 0.6867, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.40350279211997986, "rewards/margins": 0.16872739791870117, "rewards/rejected": -0.5722301602363586, "step": 8440 }, { "epoch": 0.62, "learning_rate": 1.867420329991309e-06, "logits/chosen": -1.880455732345581, "logits/rejected": -1.4935274124145508, "logps/chosen": -475.76324462890625, "logps/rejected": -629.3888549804688, "loss": 0.6818, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.36769789457321167, "rewards/margins": 0.16110575199127197, "rewards/rejected": -0.5288037061691284, "step": 8450 }, { "epoch": 0.62, "learning_rate": 1.861194750115691e-06, "logits/chosen": -2.209850311279297, "logits/rejected": -1.5805315971374512, "logps/chosen": -498.922607421875, "logps/rejected": -666.587158203125, "loss": 0.6758, "rewards/accuracies": 0.75, "rewards/chosen": -0.325927197933197, "rewards/margins": 0.23289790749549866, "rewards/rejected": -0.5588251352310181, "step": 8460 }, { "epoch": 0.62, "learning_rate": 1.854973405471317e-06, "logits/chosen": -1.7422832250595093, "logits/rejected": -1.5167686939239502, "logps/chosen": -450.53692626953125, "logps/rejected": -598.3053588867188, "loss": 0.681, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.30303308367729187, "rewards/margins": 0.16516169905662537, "rewards/rejected": -0.46819472312927246, "step": 8470 }, { "epoch": 0.63, "learning_rate": 1.8487563373052394e-06, "logits/chosen": -2.149418830871582, "logits/rejected": -1.6946147680282593, "logps/chosen": -419.5205993652344, "logps/rejected": -584.3961181640625, "loss": 0.6777, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27597224712371826, "rewards/margins": 0.16537509858608246, "rewards/rejected": -0.4413473606109619, "step": 8480 }, { "epoch": 0.63, "learning_rate": 1.8425435868361563e-06, "logits/chosen": -2.098719835281372, "logits/rejected": -1.4108848571777344, "logps/chosen": -438.07421875, "logps/rejected": -611.8868408203125, "loss": 0.681, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2762983441352844, "rewards/margins": 0.21423335373401642, "rewards/rejected": -0.49053168296813965, "step": 8490 }, { "epoch": 0.63, "learning_rate": 1.8363351952541408e-06, "logits/chosen": -1.9387365579605103, "logits/rejected": -1.551558256149292, "logps/chosen": -423.07867431640625, "logps/rejected": -629.2806396484375, "loss": 0.6781, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28026315569877625, "rewards/margins": 0.22550883889198303, "rewards/rejected": -0.5057719945907593, "step": 8500 }, { "epoch": 0.63, "learning_rate": 1.8301312037203677e-06, "logits/chosen": -2.0163416862487793, "logits/rejected": -1.6435441970825195, "logps/chosen": -508.3749084472656, "logps/rejected": -676.56396484375, "loss": 0.677, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3093162775039673, "rewards/margins": 0.19943758845329285, "rewards/rejected": -0.5087538957595825, "step": 8510 }, { "epoch": 0.63, "learning_rate": 1.8239316533668376e-06, "logits/chosen": -1.9578794240951538, "logits/rejected": -1.4708865880966187, "logps/chosen": -575.608154296875, "logps/rejected": -727.2564697265625, "loss": 0.6766, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40931954979896545, "rewards/margins": 0.17676793038845062, "rewards/rejected": -0.5860875248908997, "step": 8520 }, { "epoch": 0.63, "learning_rate": 1.8177365852961081e-06, "logits/chosen": -1.9296290874481201, "logits/rejected": -1.657997488975525, "logps/chosen": -569.5535888671875, "logps/rejected": -745.8170166015625, "loss": 0.6817, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.40855541825294495, "rewards/margins": 0.18225230276584625, "rewards/rejected": -0.5908077359199524, "step": 8530 }, { "epoch": 0.63, "learning_rate": 1.8115460405810206e-06, "logits/chosen": -2.1463589668273926, "logits/rejected": -1.5353436470031738, "logps/chosen": -494.91314697265625, "logps/rejected": -618.0924072265625, "loss": 0.6764, "rewards/accuracies": 0.75, "rewards/chosen": -0.3204557001590729, "rewards/margins": 0.17707788944244385, "rewards/rejected": -0.4975336492061615, "step": 8540 }, { "epoch": 0.63, "learning_rate": 1.805360060264425e-06, "logits/chosen": -2.07724928855896, "logits/rejected": -1.6148828268051147, "logps/chosen": -503.95367431640625, "logps/rejected": -665.2237548828125, "loss": 0.6805, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38587385416030884, "rewards/margins": 0.17106831073760986, "rewards/rejected": -0.5569421648979187, "step": 8550 }, { "epoch": 0.63, "learning_rate": 1.7991786853589092e-06, "logits/chosen": -2.1940884590148926, "logits/rejected": -1.8014024496078491, "logps/chosen": -547.8887939453125, "logps/rejected": -660.459716796875, "loss": 0.6848, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3625122010707855, "rewards/margins": 0.16752037405967712, "rewards/rejected": -0.5300325751304626, "step": 8560 }, { "epoch": 0.63, "learning_rate": 1.7930019568465293e-06, "logits/chosen": -1.8840316534042358, "logits/rejected": -1.3649595975875854, "logps/chosen": -448.1148376464844, "logps/rejected": -602.4068603515625, "loss": 0.6747, "rewards/accuracies": 0.75, "rewards/chosen": -0.2927365005016327, "rewards/margins": 0.1894773691892624, "rewards/rejected": -0.4822138249874115, "step": 8570 }, { "epoch": 0.63, "learning_rate": 1.7868299156785363e-06, "logits/chosen": -2.1355044841766357, "logits/rejected": -1.4113394021987915, "logps/chosen": -649.2208862304688, "logps/rejected": -748.7550659179688, "loss": 0.6775, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.41571131348609924, "rewards/margins": 0.1775016486644745, "rewards/rejected": -0.5932129621505737, "step": 8580 }, { "epoch": 0.63, "learning_rate": 1.7806626027751017e-06, "logits/chosen": -1.9978697299957275, "logits/rejected": -1.349441647529602, "logps/chosen": -626.9761962890625, "logps/rejected": -801.2274780273438, "loss": 0.6779, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4614279866218567, "rewards/margins": 0.17961326241493225, "rewards/rejected": -0.6410412192344666, "step": 8590 }, { "epoch": 0.63, "learning_rate": 1.774500059025052e-06, "logits/chosen": -2.1448261737823486, "logits/rejected": -1.7636711597442627, "logps/chosen": -586.5802001953125, "logps/rejected": -766.14208984375, "loss": 0.6785, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4324590265750885, "rewards/margins": 0.21198682487010956, "rewards/rejected": -0.6444458365440369, "step": 8600 }, { "epoch": 0.64, "learning_rate": 1.768342325285594e-06, "logits/chosen": -1.9005582332611084, "logits/rejected": -1.185455560684204, "logps/chosen": -612.1224365234375, "logps/rejected": -755.6607666015625, "loss": 0.6771, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4352850019931793, "rewards/margins": 0.22075386345386505, "rewards/rejected": -0.6560388803482056, "step": 8610 }, { "epoch": 0.64, "learning_rate": 1.7621894423820418e-06, "logits/chosen": -1.8445221185684204, "logits/rejected": -1.2243995666503906, "logps/chosen": -549.3577880859375, "logps/rejected": -773.5787353515625, "loss": 0.6743, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3826529085636139, "rewards/margins": 0.26308125257492065, "rewards/rejected": -0.6457341909408569, "step": 8620 }, { "epoch": 0.64, "learning_rate": 1.7560414511075538e-06, "logits/chosen": -2.130394220352173, "logits/rejected": -1.5174437761306763, "logps/chosen": -608.2115478515625, "logps/rejected": -803.0562744140625, "loss": 0.6767, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.43715134263038635, "rewards/margins": 0.2306191623210907, "rewards/rejected": -0.667770504951477, "step": 8630 }, { "epoch": 0.64, "learning_rate": 1.7498983922228523e-06, "logits/chosen": -1.9664039611816406, "logits/rejected": -1.1735508441925049, "logps/chosen": -595.1292114257812, "logps/rejected": -792.7169799804688, "loss": 0.6743, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.42510348558425903, "rewards/margins": 0.27466100454330444, "rewards/rejected": -0.6997643709182739, "step": 8640 }, { "epoch": 0.64, "learning_rate": 1.7437603064559612e-06, "logits/chosen": -2.036196708679199, "logits/rejected": -1.3586111068725586, "logps/chosen": -588.1028442382812, "logps/rejected": -706.1736450195312, "loss": 0.6768, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38932889699935913, "rewards/margins": 0.19651982188224792, "rewards/rejected": -0.5858487486839294, "step": 8650 }, { "epoch": 0.64, "learning_rate": 1.7376272345019325e-06, "logits/chosen": -1.874070167541504, "logits/rejected": -1.420116662979126, "logps/chosen": -565.9878540039062, "logps/rejected": -634.5250244140625, "loss": 0.6772, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35846856236457825, "rewards/margins": 0.14578008651733398, "rewards/rejected": -0.5042486190795898, "step": 8660 }, { "epoch": 0.64, "learning_rate": 1.7314992170225752e-06, "logits/chosen": -2.121922016143799, "logits/rejected": -1.6356796026229858, "logps/chosen": -479.06689453125, "logps/rejected": -655.7440185546875, "loss": 0.6762, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3327873945236206, "rewards/margins": 0.19322095811367035, "rewards/rejected": -0.5260083675384521, "step": 8670 }, { "epoch": 0.64, "learning_rate": 1.7253762946461904e-06, "logits/chosen": -1.8930097818374634, "logits/rejected": -1.5234086513519287, "logps/chosen": -424.0302734375, "logps/rejected": -548.6192626953125, "loss": 0.6778, "rewards/accuracies": 0.75, "rewards/chosen": -0.2761727571487427, "rewards/margins": 0.1505815088748932, "rewards/rejected": -0.4267542362213135, "step": 8680 }, { "epoch": 0.64, "learning_rate": 1.7192585079672963e-06, "logits/chosen": -2.103783369064331, "logits/rejected": -1.8319917917251587, "logps/chosen": -442.9908142089844, "logps/rejected": -605.2137451171875, "loss": 0.6781, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2928987443447113, "rewards/margins": 0.1683454066514969, "rewards/rejected": -0.4612441658973694, "step": 8690 }, { "epoch": 0.64, "learning_rate": 1.7131458975463639e-06, "logits/chosen": -2.174776077270508, "logits/rejected": -1.6102584600448608, "logps/chosen": -462.5901794433594, "logps/rejected": -630.7236328125, "loss": 0.6724, "rewards/accuracies": 0.75, "rewards/chosen": -0.2839105725288391, "rewards/margins": 0.22833219170570374, "rewards/rejected": -0.5122427940368652, "step": 8700 }, { "epoch": 0.64, "learning_rate": 1.7070385039095441e-06, "logits/chosen": -2.1334924697875977, "logits/rejected": -1.6529505252838135, "logps/chosen": -482.56976318359375, "logps/rejected": -638.6309814453125, "loss": 0.6768, "rewards/accuracies": 0.75, "rewards/chosen": -0.3127947151660919, "rewards/margins": 0.2017628252506256, "rewards/rejected": -0.5145575404167175, "step": 8710 }, { "epoch": 0.64, "learning_rate": 1.7009363675484036e-06, "logits/chosen": -2.0715725421905518, "logits/rejected": -1.4959946870803833, "logps/chosen": -418.65228271484375, "logps/rejected": -619.63720703125, "loss": 0.6753, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.25542038679122925, "rewards/margins": 0.25517016649246216, "rewards/rejected": -0.5105905532836914, "step": 8720 }, { "epoch": 0.64, "learning_rate": 1.6948395289196517e-06, "logits/chosen": -1.9344584941864014, "logits/rejected": -1.7945448160171509, "logps/chosen": -400.1717529296875, "logps/rejected": -515.4010620117188, "loss": 0.6878, "rewards/accuracies": 0.5, "rewards/chosen": -0.27098479866981506, "rewards/margins": 0.12975767254829407, "rewards/rejected": -0.40074247121810913, "step": 8730 }, { "epoch": 0.64, "learning_rate": 1.688748028444874e-06, "logits/chosen": -2.225827932357788, "logits/rejected": -1.7595809698104858, "logps/chosen": -427.8551330566406, "logps/rejected": -593.6170654296875, "loss": 0.6759, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25104764103889465, "rewards/margins": 0.18975161015987396, "rewards/rejected": -0.4407992362976074, "step": 8740 }, { "epoch": 0.65, "learning_rate": 1.682661906510265e-06, "logits/chosen": -1.84476637840271, "logits/rejected": -1.3462964296340942, "logps/chosen": -435.44189453125, "logps/rejected": -592.9511108398438, "loss": 0.6783, "rewards/accuracies": 0.75, "rewards/chosen": -0.28735217452049255, "rewards/margins": 0.2099519520998001, "rewards/rejected": -0.49730411171913147, "step": 8750 }, { "epoch": 0.65, "learning_rate": 1.6765812034663612e-06, "logits/chosen": -1.9706588983535767, "logits/rejected": -1.5706796646118164, "logps/chosen": -472.97137451171875, "logps/rejected": -623.9747314453125, "loss": 0.6778, "rewards/accuracies": 0.75, "rewards/chosen": -0.3249821662902832, "rewards/margins": 0.18374818563461304, "rewards/rejected": -0.5087303519248962, "step": 8760 }, { "epoch": 0.65, "learning_rate": 1.6705059596277694e-06, "logits/chosen": -2.138390064239502, "logits/rejected": -1.7047516107559204, "logps/chosen": -536.7015991210938, "logps/rejected": -634.7200927734375, "loss": 0.6815, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3780193328857422, "rewards/margins": 0.13693565130233765, "rewards/rejected": -0.5149549245834351, "step": 8770 }, { "epoch": 0.65, "learning_rate": 1.664436215272905e-06, "logits/chosen": -2.079014301300049, "logits/rejected": -1.5482687950134277, "logps/chosen": -421.34832763671875, "logps/rejected": -552.0116577148438, "loss": 0.6822, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.265597403049469, "rewards/margins": 0.170469731092453, "rewards/rejected": -0.4360671639442444, "step": 8780 }, { "epoch": 0.65, "learning_rate": 1.6583720106437207e-06, "logits/chosen": -2.0764644145965576, "logits/rejected": -1.7906367778778076, "logps/chosen": -520.6783447265625, "logps/rejected": -686.7657470703125, "loss": 0.677, "rewards/accuracies": 0.625, "rewards/chosen": -0.34930306673049927, "rewards/margins": 0.17884141206741333, "rewards/rejected": -0.5281445384025574, "step": 8790 }, { "epoch": 0.65, "learning_rate": 1.6523133859454412e-06, "logits/chosen": -2.0131919384002686, "logits/rejected": -1.2904114723205566, "logps/chosen": -516.8017578125, "logps/rejected": -761.5862426757812, "loss": 0.6816, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.36818814277648926, "rewards/margins": 0.28872913122177124, "rewards/rejected": -0.6569172739982605, "step": 8800 }, { "epoch": 0.65, "learning_rate": 1.6462603813462982e-06, "logits/chosen": -1.9735462665557861, "logits/rejected": -1.4063172340393066, "logps/chosen": -606.9288330078125, "logps/rejected": -735.3271484375, "loss": 0.6732, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3511353135108948, "rewards/margins": 0.2278100550174713, "rewards/rejected": -0.5789453387260437, "step": 8810 }, { "epoch": 0.65, "learning_rate": 1.6402130369772611e-06, "logits/chosen": -2.171685218811035, "logits/rejected": -1.813742995262146, "logps/chosen": -425.8299865722656, "logps/rejected": -591.4786376953125, "loss": 0.6758, "rewards/accuracies": 0.75, "rewards/chosen": -0.2819589078426361, "rewards/margins": 0.1842467188835144, "rewards/rejected": -0.4662056565284729, "step": 8820 }, { "epoch": 0.65, "learning_rate": 1.6341713929317732e-06, "logits/chosen": -1.9888454675674438, "logits/rejected": -1.5735652446746826, "logps/chosen": -537.7574462890625, "logps/rejected": -747.2926635742188, "loss": 0.683, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35082387924194336, "rewards/margins": 0.23012332618236542, "rewards/rejected": -0.58094722032547, "step": 8830 }, { "epoch": 0.65, "learning_rate": 1.6281354892654839e-06, "logits/chosen": -1.9177162647247314, "logits/rejected": -1.7782385349273682, "logps/chosen": -463.67034912109375, "logps/rejected": -626.275634765625, "loss": 0.6763, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.323830783367157, "rewards/margins": 0.16459031403064728, "rewards/rejected": -0.48842111229896545, "step": 8840 }, { "epoch": 0.65, "learning_rate": 1.6221053659959852e-06, "logits/chosen": -2.1896636486053467, "logits/rejected": -1.9233083724975586, "logps/chosen": -442.8995056152344, "logps/rejected": -650.7384033203125, "loss": 0.6741, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27410146594047546, "rewards/margins": 0.23827092349529266, "rewards/rejected": -0.5123723745346069, "step": 8850 }, { "epoch": 0.65, "learning_rate": 1.616081063102546e-06, "logits/chosen": -2.064551591873169, "logits/rejected": -1.5471117496490479, "logps/chosen": -490.7765197753906, "logps/rejected": -719.8929443359375, "loss": 0.6683, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3518783152103424, "rewards/margins": 0.2625523805618286, "rewards/rejected": -0.6144307851791382, "step": 8860 }, { "epoch": 0.65, "learning_rate": 1.6100626205258452e-06, "logits/chosen": -2.192876100540161, "logits/rejected": -1.5223476886749268, "logps/chosen": -482.208740234375, "logps/rejected": -660.8082275390625, "loss": 0.6798, "rewards/accuracies": 0.75, "rewards/chosen": -0.2816104292869568, "rewards/margins": 0.21958193182945251, "rewards/rejected": -0.5011923909187317, "step": 8870 }, { "epoch": 0.66, "learning_rate": 1.6040500781677095e-06, "logits/chosen": -2.228968858718872, "logits/rejected": -1.6020208597183228, "logps/chosen": -516.0985107421875, "logps/rejected": -715.4259033203125, "loss": 0.6745, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3498924970626831, "rewards/margins": 0.2246187925338745, "rewards/rejected": -0.5745112895965576, "step": 8880 }, { "epoch": 0.66, "learning_rate": 1.5980434758908464e-06, "logits/chosen": -2.1611475944519043, "logits/rejected": -1.7109302282333374, "logps/chosen": -395.7415771484375, "logps/rejected": -492.335693359375, "loss": 0.6826, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2596690058708191, "rewards/margins": 0.1428290158510208, "rewards/rejected": -0.4024980664253235, "step": 8890 }, { "epoch": 0.66, "learning_rate": 1.5920428535185833e-06, "logits/chosen": -2.092712879180908, "logits/rejected": -1.5427972078323364, "logps/chosen": -514.9134521484375, "logps/rejected": -705.2669677734375, "loss": 0.6769, "rewards/accuracies": 0.75, "rewards/chosen": -0.31557637453079224, "rewards/margins": 0.23860903084278107, "rewards/rejected": -0.5541853904724121, "step": 8900 }, { "epoch": 0.66, "learning_rate": 1.5860482508345981e-06, "logits/chosen": -2.0519022941589355, "logits/rejected": -1.6292016506195068, "logps/chosen": -485.0816955566406, "logps/rejected": -637.870361328125, "loss": 0.6802, "rewards/accuracies": 0.75, "rewards/chosen": -0.3471487760543823, "rewards/margins": 0.17455092072486877, "rewards/rejected": -0.5216997265815735, "step": 8910 }, { "epoch": 0.66, "learning_rate": 1.580059707582661e-06, "logits/chosen": -2.1019370555877686, "logits/rejected": -1.7320783138275146, "logps/chosen": -447.163818359375, "logps/rejected": -616.7916870117188, "loss": 0.677, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28250008821487427, "rewards/margins": 0.21093647181987762, "rewards/rejected": -0.4934365153312683, "step": 8920 }, { "epoch": 0.66, "learning_rate": 1.5740772634663682e-06, "logits/chosen": -2.047079563140869, "logits/rejected": -1.6572802066802979, "logps/chosen": -448.39349365234375, "logps/rejected": -622.62060546875, "loss": 0.6817, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2753896713256836, "rewards/margins": 0.19489280879497528, "rewards/rejected": -0.4702824652194977, "step": 8930 }, { "epoch": 0.66, "learning_rate": 1.5681009581488783e-06, "logits/chosen": -2.0642316341400146, "logits/rejected": -1.6665751934051514, "logps/chosen": -488.059814453125, "logps/rejected": -617.3678588867188, "loss": 0.68, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3092575967311859, "rewards/margins": 0.155753955245018, "rewards/rejected": -0.4650115966796875, "step": 8940 }, { "epoch": 0.66, "learning_rate": 1.56213083125265e-06, "logits/chosen": -2.041703939437866, "logits/rejected": -1.475791096687317, "logps/chosen": -401.22760009765625, "logps/rejected": -598.7857055664062, "loss": 0.6766, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22231772541999817, "rewards/margins": 0.2432185858488083, "rewards/rejected": -0.46553635597229004, "step": 8950 }, { "epoch": 0.66, "learning_rate": 1.5561669223591812e-06, "logits/chosen": -1.9956443309783936, "logits/rejected": -1.5372989177703857, "logps/chosen": -513.5095825195312, "logps/rejected": -681.9465942382812, "loss": 0.6772, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35072728991508484, "rewards/margins": 0.20686344802379608, "rewards/rejected": -0.5575907826423645, "step": 8960 }, { "epoch": 0.66, "learning_rate": 1.5502092710087436e-06, "logits/chosen": -2.1606547832489014, "logits/rejected": -1.6489343643188477, "logps/chosen": -510.75567626953125, "logps/rejected": -668.4631958007812, "loss": 0.674, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3479406535625458, "rewards/margins": 0.21452315151691437, "rewards/rejected": -0.5624638795852661, "step": 8970 }, { "epoch": 0.66, "learning_rate": 1.544257916700121e-06, "logits/chosen": -1.9099938869476318, "logits/rejected": -1.5467380285263062, "logps/chosen": -412.5633850097656, "logps/rejected": -615.3507690429688, "loss": 0.6747, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24318797886371613, "rewards/margins": 0.2276521474123001, "rewards/rejected": -0.47084012627601624, "step": 8980 }, { "epoch": 0.66, "learning_rate": 1.5383128988903504e-06, "logits/chosen": -2.188281774520874, "logits/rejected": -1.6521766185760498, "logps/chosen": -542.1370239257812, "logps/rejected": -658.1896362304688, "loss": 0.684, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3423099219799042, "rewards/margins": 0.16575834155082703, "rewards/rejected": -0.5080682635307312, "step": 8990 }, { "epoch": 0.66, "learning_rate": 1.5323742569944573e-06, "logits/chosen": -2.191091775894165, "logits/rejected": -1.8808568716049194, "logps/chosen": -525.5406494140625, "logps/rejected": -644.0554809570312, "loss": 0.6814, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3299676477909088, "rewards/margins": 0.1451220065355301, "rewards/rejected": -0.4750896394252777, "step": 9000 }, { "epoch": 0.66, "learning_rate": 1.5264420303851951e-06, "logits/chosen": -1.9756219387054443, "logits/rejected": -1.5637612342834473, "logps/chosen": -433.0272521972656, "logps/rejected": -580.4953002929688, "loss": 0.6816, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2777188718318939, "rewards/margins": 0.1834227293729782, "rewards/rejected": -0.46114158630371094, "step": 9010 }, { "epoch": 0.67, "learning_rate": 1.5205162583927846e-06, "logits/chosen": -1.8833532333374023, "logits/rejected": -1.6010726690292358, "logps/chosen": -477.4322814941406, "logps/rejected": -661.3372802734375, "loss": 0.6765, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3182468116283417, "rewards/margins": 0.19452981650829315, "rewards/rejected": -0.5127766728401184, "step": 9020 }, { "epoch": 0.67, "learning_rate": 1.514596980304653e-06, "logits/chosen": -2.235199451446533, "logits/rejected": -1.3740906715393066, "logps/chosen": -529.3816528320312, "logps/rejected": -687.5687255859375, "loss": 0.677, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3277267813682556, "rewards/margins": 0.2551687955856323, "rewards/rejected": -0.5828955769538879, "step": 9030 }, { "epoch": 0.67, "learning_rate": 1.5086842353651721e-06, "logits/chosen": -1.952840805053711, "logits/rejected": -1.5699284076690674, "logps/chosen": -470.126953125, "logps/rejected": -653.86865234375, "loss": 0.6768, "rewards/accuracies": 0.875, "rewards/chosen": -0.3019630014896393, "rewards/margins": 0.2008558213710785, "rewards/rejected": -0.5028188824653625, "step": 9040 }, { "epoch": 0.67, "learning_rate": 1.5027780627754022e-06, "logits/chosen": -2.07087779045105, "logits/rejected": -1.5742106437683105, "logps/chosen": -462.83282470703125, "logps/rejected": -624.7615966796875, "loss": 0.6839, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29168540239334106, "rewards/margins": 0.18432722985744476, "rewards/rejected": -0.476012647151947, "step": 9050 }, { "epoch": 0.67, "learning_rate": 1.4968785016928264e-06, "logits/chosen": -2.2354376316070557, "logits/rejected": -1.7966896295547485, "logps/chosen": -483.34320068359375, "logps/rejected": -673.8905029296875, "loss": 0.676, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31671446561813354, "rewards/margins": 0.21100303530693054, "rewards/rejected": -0.5277174711227417, "step": 9060 }, { "epoch": 0.67, "learning_rate": 1.4909855912310966e-06, "logits/chosen": -2.27778959274292, "logits/rejected": -1.645996332168579, "logps/chosen": -475.1844787597656, "logps/rejected": -611.1456909179688, "loss": 0.6767, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2476363182067871, "rewards/margins": 0.22869984805583954, "rewards/rejected": -0.47633615136146545, "step": 9070 }, { "epoch": 0.67, "learning_rate": 1.4850993704597708e-06, "logits/chosen": -2.1701409816741943, "logits/rejected": -1.728795051574707, "logps/chosen": -447.87872314453125, "logps/rejected": -578.5816040039062, "loss": 0.6812, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27756038308143616, "rewards/margins": 0.18329483270645142, "rewards/rejected": -0.46085524559020996, "step": 9080 }, { "epoch": 0.67, "learning_rate": 1.479219878404053e-06, "logits/chosen": -2.0640575885772705, "logits/rejected": -1.9213602542877197, "logps/chosen": -446.28717041015625, "logps/rejected": -560.2164306640625, "loss": 0.6816, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3113669157028198, "rewards/margins": 0.11748553812503815, "rewards/rejected": -0.42885246872901917, "step": 9090 }, { "epoch": 0.67, "learning_rate": 1.4733471540445386e-06, "logits/chosen": -1.998722791671753, "logits/rejected": -1.6774791479110718, "logps/chosen": -472.3682556152344, "logps/rejected": -626.2091674804688, "loss": 0.6812, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3132663667201996, "rewards/margins": 0.14028164744377136, "rewards/rejected": -0.45354804396629333, "step": 9100 }, { "epoch": 0.67, "learning_rate": 1.467481236316953e-06, "logits/chosen": -2.117865800857544, "logits/rejected": -1.4745333194732666, "logps/chosen": -447.3590393066406, "logps/rejected": -612.25439453125, "loss": 0.6809, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24554744362831116, "rewards/margins": 0.23378269374370575, "rewards/rejected": -0.4793301522731781, "step": 9110 }, { "epoch": 0.67, "learning_rate": 1.4616221641118933e-06, "logits/chosen": -2.2270758152008057, "logits/rejected": -1.8189105987548828, "logps/chosen": -430.14666748046875, "logps/rejected": -536.2477416992188, "loss": 0.6792, "rewards/accuracies": 0.75, "rewards/chosen": -0.26074275374412537, "rewards/margins": 0.15792356431484222, "rewards/rejected": -0.4186663031578064, "step": 9120 }, { "epoch": 0.67, "learning_rate": 1.4557699762745725e-06, "logits/chosen": -2.137803792953491, "logits/rejected": -1.6589038372039795, "logps/chosen": -491.357177734375, "logps/rejected": -647.9700927734375, "loss": 0.6799, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.35298648476600647, "rewards/margins": 0.19299353659152985, "rewards/rejected": -0.5459800362586975, "step": 9130 }, { "epoch": 0.67, "learning_rate": 1.4499247116045594e-06, "logits/chosen": -2.051572322845459, "logits/rejected": -1.4219774007797241, "logps/chosen": -401.8594970703125, "logps/rejected": -606.1187744140625, "loss": 0.6753, "rewards/accuracies": 0.875, "rewards/chosen": -0.2262977808713913, "rewards/margins": 0.2515820562839508, "rewards/rejected": -0.4778798222541809, "step": 9140 }, { "epoch": 0.67, "learning_rate": 1.444086408855524e-06, "logits/chosen": -1.7918446063995361, "logits/rejected": -1.270943522453308, "logps/chosen": -533.8773803710938, "logps/rejected": -632.9041137695312, "loss": 0.6802, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3649519383907318, "rewards/margins": 0.1604725867509842, "rewards/rejected": -0.5254245400428772, "step": 9150 }, { "epoch": 0.68, "learning_rate": 1.4382551067349803e-06, "logits/chosen": -2.1210856437683105, "logits/rejected": -1.6333305835723877, "logps/chosen": -560.3944091796875, "logps/rejected": -737.7425537109375, "loss": 0.68, "rewards/accuracies": 0.75, "rewards/chosen": -0.37847059965133667, "rewards/margins": 0.2060006856918335, "rewards/rejected": -0.5844712853431702, "step": 9160 }, { "epoch": 0.68, "learning_rate": 1.4324308439040258e-06, "logits/chosen": -2.087249755859375, "logits/rejected": -1.8928747177124023, "logps/chosen": -489.85394287109375, "logps/rejected": -639.9654541015625, "loss": 0.6861, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.34598779678344727, "rewards/margins": 0.1242472380399704, "rewards/rejected": -0.4702349603176117, "step": 9170 }, { "epoch": 0.68, "learning_rate": 1.4266136589770885e-06, "logits/chosen": -2.10383677482605, "logits/rejected": -1.4558980464935303, "logps/chosen": -482.5450744628906, "logps/rejected": -597.5216674804688, "loss": 0.6818, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2981413006782532, "rewards/margins": 0.17452549934387207, "rewards/rejected": -0.47266674041748047, "step": 9180 }, { "epoch": 0.68, "learning_rate": 1.4208035905216735e-06, "logits/chosen": -2.2047104835510254, "logits/rejected": -1.9690030813217163, "logps/chosen": -542.6670532226562, "logps/rejected": -616.6893310546875, "loss": 0.6825, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39041417837142944, "rewards/margins": 0.10251070559024811, "rewards/rejected": -0.49292492866516113, "step": 9190 }, { "epoch": 0.68, "learning_rate": 1.415000677058101e-06, "logits/chosen": -2.2410831451416016, "logits/rejected": -1.6083862781524658, "logps/chosen": -544.0029296875, "logps/rejected": -713.998046875, "loss": 0.675, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.35114750266075134, "rewards/margins": 0.2367331087589264, "rewards/rejected": -0.5878806114196777, "step": 9200 }, { "epoch": 0.68, "learning_rate": 1.4092049570592543e-06, "logits/chosen": -2.0717644691467285, "logits/rejected": -1.2469213008880615, "logps/chosen": -462.5146484375, "logps/rejected": -672.9820556640625, "loss": 0.6765, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.28502291440963745, "rewards/margins": 0.2622469961643219, "rewards/rejected": -0.5472699403762817, "step": 9210 }, { "epoch": 0.68, "learning_rate": 1.403416468950327e-06, "logits/chosen": -2.001168727874756, "logits/rejected": -1.4426634311676025, "logps/chosen": -465.3375549316406, "logps/rejected": -695.4229736328125, "loss": 0.6788, "rewards/accuracies": 0.75, "rewards/chosen": -0.29471126198768616, "rewards/margins": 0.24159963428974152, "rewards/rejected": -0.5363108515739441, "step": 9220 }, { "epoch": 0.68, "learning_rate": 1.3976352511085636e-06, "logits/chosen": -1.9769260883331299, "logits/rejected": -1.690763235092163, "logps/chosen": -486.06640625, "logps/rejected": -656.1779174804688, "loss": 0.6782, "rewards/accuracies": 0.875, "rewards/chosen": -0.32458367943763733, "rewards/margins": 0.18941891193389893, "rewards/rejected": -0.5140026211738586, "step": 9230 }, { "epoch": 0.68, "learning_rate": 1.3918613418630072e-06, "logits/chosen": -2.043018102645874, "logits/rejected": -1.4716241359710693, "logps/chosen": -521.8854370117188, "logps/rejected": -739.763671875, "loss": 0.6668, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3615649342536926, "rewards/margins": 0.25867190957069397, "rewards/rejected": -0.620236873626709, "step": 9240 }, { "epoch": 0.68, "learning_rate": 1.3860947794942467e-06, "logits/chosen": -2.1168019771575928, "logits/rejected": -1.440124273300171, "logps/chosen": -500.53076171875, "logps/rejected": -687.705078125, "loss": 0.6777, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3627357482910156, "rewards/margins": 0.22694554924964905, "rewards/rejected": -0.5896812677383423, "step": 9250 }, { "epoch": 0.68, "learning_rate": 1.3803356022341626e-06, "logits/chosen": -2.0701162815093994, "logits/rejected": -1.3513742685317993, "logps/chosen": -457.45501708984375, "logps/rejected": -671.3934326171875, "loss": 0.6756, "rewards/accuracies": 0.75, "rewards/chosen": -0.3144286274909973, "rewards/margins": 0.25306791067123413, "rewards/rejected": -0.5674965381622314, "step": 9260 }, { "epoch": 0.68, "learning_rate": 1.3745838482656704e-06, "logits/chosen": -2.1459431648254395, "logits/rejected": -1.4643604755401611, "logps/chosen": -455.33721923828125, "logps/rejected": -652.1072998046875, "loss": 0.6729, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.27591150999069214, "rewards/margins": 0.2432100474834442, "rewards/rejected": -0.519121527671814, "step": 9270 }, { "epoch": 0.68, "learning_rate": 1.3688395557224693e-06, "logits/chosen": -2.090160608291626, "logits/rejected": -1.4894068241119385, "logps/chosen": -457.63397216796875, "logps/rejected": -633.8932495117188, "loss": 0.6771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3132493495941162, "rewards/margins": 0.2187175452709198, "rewards/rejected": -0.5319668650627136, "step": 9280 }, { "epoch": 0.69, "learning_rate": 1.3631027626887932e-06, "logits/chosen": -2.221074104309082, "logits/rejected": -1.5080559253692627, "logps/chosen": -497.598388671875, "logps/rejected": -657.2376098632812, "loss": 0.6753, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.30686628818511963, "rewards/margins": 0.23408448696136475, "rewards/rejected": -0.5409508347511292, "step": 9290 }, { "epoch": 0.69, "learning_rate": 1.3573735071991523e-06, "logits/chosen": -2.254422903060913, "logits/rejected": -1.7845354080200195, "logps/chosen": -478.5389709472656, "logps/rejected": -556.6605224609375, "loss": 0.6803, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3001326024532318, "rewards/margins": 0.13173003494739532, "rewards/rejected": -0.4318626821041107, "step": 9300 }, { "epoch": 0.69, "learning_rate": 1.3516518272380825e-06, "logits/chosen": -2.0524439811706543, "logits/rejected": -1.8714326620101929, "logps/chosen": -482.1742248535156, "logps/rejected": -630.7334594726562, "loss": 0.6841, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3482135534286499, "rewards/margins": 0.1325848400592804, "rewards/rejected": -0.4807983338832855, "step": 9310 }, { "epoch": 0.69, "learning_rate": 1.345937760739898e-06, "logits/chosen": -2.27714467048645, "logits/rejected": -1.6010007858276367, "logps/chosen": -538.09521484375, "logps/rejected": -735.5863647460938, "loss": 0.676, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.38067328929901123, "rewards/margins": 0.22776886820793152, "rewards/rejected": -0.6084421873092651, "step": 9320 }, { "epoch": 0.69, "learning_rate": 1.3402313455884319e-06, "logits/chosen": -2.020120143890381, "logits/rejected": -1.3856867551803589, "logps/chosen": -509.50372314453125, "logps/rejected": -754.3427734375, "loss": 0.671, "rewards/accuracies": 0.875, "rewards/chosen": -0.37177982926368713, "rewards/margins": 0.26039212942123413, "rewards/rejected": -0.6321719884872437, "step": 9330 }, { "epoch": 0.69, "learning_rate": 1.3345326196167945e-06, "logits/chosen": -2.1008167266845703, "logits/rejected": -1.81035578250885, "logps/chosen": -555.4140625, "logps/rejected": -666.2100830078125, "loss": 0.6849, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3888186812400818, "rewards/margins": 0.12408530712127686, "rewards/rejected": -0.5129040479660034, "step": 9340 }, { "epoch": 0.69, "learning_rate": 1.328841620607112e-06, "logits/chosen": -2.0274226665496826, "logits/rejected": -1.3443734645843506, "logps/chosen": -525.239501953125, "logps/rejected": -720.9281005859375, "loss": 0.6755, "rewards/accuracies": 0.75, "rewards/chosen": -0.3831888735294342, "rewards/margins": 0.21782679855823517, "rewards/rejected": -0.6010157465934753, "step": 9350 }, { "epoch": 0.69, "learning_rate": 1.3231583862902866e-06, "logits/chosen": -2.195366382598877, "logits/rejected": -1.7487256526947021, "logps/chosen": -593.5494384765625, "logps/rejected": -754.0130615234375, "loss": 0.6767, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4129478335380554, "rewards/margins": 0.20418763160705566, "rewards/rejected": -0.6171355247497559, "step": 9360 }, { "epoch": 0.69, "learning_rate": 1.3174829543457373e-06, "logits/chosen": -1.978672742843628, "logits/rejected": -1.6043916940689087, "logps/chosen": -622.1676025390625, "logps/rejected": -789.3187255859375, "loss": 0.6814, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4908216893672943, "rewards/margins": 0.17633536458015442, "rewards/rejected": -0.667156994342804, "step": 9370 }, { "epoch": 0.69, "learning_rate": 1.3118153624011547e-06, "logits/chosen": -1.8363597393035889, "logits/rejected": -1.3821502923965454, "logps/chosen": -578.3713989257812, "logps/rejected": -730.609130859375, "loss": 0.6803, "rewards/accuracies": 0.75, "rewards/chosen": -0.4166502058506012, "rewards/margins": 0.17441783845424652, "rewards/rejected": -0.5910680890083313, "step": 9380 }, { "epoch": 0.69, "learning_rate": 1.3061556480322528e-06, "logits/chosen": -2.087754726409912, "logits/rejected": -1.701856255531311, "logps/chosen": -567.1987915039062, "logps/rejected": -726.6749267578125, "loss": 0.6758, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4139467775821686, "rewards/margins": 0.18273089826107025, "rewards/rejected": -0.5966777205467224, "step": 9390 }, { "epoch": 0.69, "learning_rate": 1.3005038487625165e-06, "logits/chosen": -2.1943271160125732, "logits/rejected": -1.626125693321228, "logps/chosen": -544.0086669921875, "logps/rejected": -679.3651733398438, "loss": 0.6812, "rewards/accuracies": 0.75, "rewards/chosen": -0.35311660170555115, "rewards/margins": 0.199880450963974, "rewards/rejected": -0.5529969930648804, "step": 9400 }, { "epoch": 0.69, "learning_rate": 1.294860002062952e-06, "logits/chosen": -1.9200360774993896, "logits/rejected": -1.519977331161499, "logps/chosen": -501.421142578125, "logps/rejected": -687.9895629882812, "loss": 0.68, "rewards/accuracies": 0.75, "rewards/chosen": -0.33394190669059753, "rewards/margins": 0.2313246726989746, "rewards/rejected": -0.5652665495872498, "step": 9410 }, { "epoch": 0.69, "learning_rate": 1.2892241453518445e-06, "logits/chosen": -2.2578482627868652, "logits/rejected": -1.9409822225570679, "logps/chosen": -417.4517517089844, "logps/rejected": -586.1034545898438, "loss": 0.6823, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2733461260795593, "rewards/margins": 0.18121466040611267, "rewards/rejected": -0.4545608162879944, "step": 9420 }, { "epoch": 0.7, "learning_rate": 1.283596315994505e-06, "logits/chosen": -2.195186138153076, "logits/rejected": -1.674660325050354, "logps/chosen": -472.0448303222656, "logps/rejected": -661.9442138671875, "loss": 0.6785, "rewards/accuracies": 0.75, "rewards/chosen": -0.2765853703022003, "rewards/margins": 0.1867508441209793, "rewards/rejected": -0.46333616971969604, "step": 9430 }, { "epoch": 0.7, "learning_rate": 1.2779765513030223e-06, "logits/chosen": -2.3554372787475586, "logits/rejected": -1.5123693943023682, "logps/chosen": -492.6715393066406, "logps/rejected": -613.7860107421875, "loss": 0.6769, "rewards/accuracies": 0.75, "rewards/chosen": -0.25063157081604004, "rewards/margins": 0.21715489029884338, "rewards/rejected": -0.4677864909172058, "step": 9440 }, { "epoch": 0.7, "learning_rate": 1.2723648885360166e-06, "logits/chosen": -1.9975776672363281, "logits/rejected": -1.4950400590896606, "logps/chosen": -465.91107177734375, "logps/rejected": -617.6917724609375, "loss": 0.6786, "rewards/accuracies": 0.75, "rewards/chosen": -0.33537572622299194, "rewards/margins": 0.17440809309482574, "rewards/rejected": -0.5097838640213013, "step": 9450 }, { "epoch": 0.7, "learning_rate": 1.2667613648983965e-06, "logits/chosen": -2.0018906593322754, "logits/rejected": -1.5405635833740234, "logps/chosen": -459.969970703125, "logps/rejected": -717.269287109375, "loss": 0.6733, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3082197606563568, "rewards/margins": 0.26599282026290894, "rewards/rejected": -0.5742125511169434, "step": 9460 }, { "epoch": 0.7, "learning_rate": 1.2611660175411054e-06, "logits/chosen": -2.128361225128174, "logits/rejected": -1.6057138442993164, "logps/chosen": -449.60693359375, "logps/rejected": -614.9383544921875, "loss": 0.6785, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2963494658470154, "rewards/margins": 0.20758244395256042, "rewards/rejected": -0.5039318799972534, "step": 9470 }, { "epoch": 0.7, "learning_rate": 1.2555788835608788e-06, "logits/chosen": -1.9005718231201172, "logits/rejected": -1.3691926002502441, "logps/chosen": -483.894287109375, "logps/rejected": -667.6126098632812, "loss": 0.6733, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33199360966682434, "rewards/margins": 0.24192607402801514, "rewards/rejected": -0.5739196538925171, "step": 9480 }, { "epoch": 0.7, "learning_rate": 1.2500000000000007e-06, "logits/chosen": -2.1971023082733154, "logits/rejected": -1.8914270401000977, "logps/chosen": -461.159423828125, "logps/rejected": -576.7247314453125, "loss": 0.6772, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2957250475883484, "rewards/margins": 0.16593213379383087, "rewards/rejected": -0.46165719628334045, "step": 9490 }, { "epoch": 0.7, "learning_rate": 1.2444294038460529e-06, "logits/chosen": -2.319936752319336, "logits/rejected": -1.7985763549804688, "logps/chosen": -520.8458251953125, "logps/rejected": -674.3958129882812, "loss": 0.6749, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.32660946249961853, "rewards/margins": 0.22301606833934784, "rewards/rejected": -0.5496255159378052, "step": 9500 }, { "epoch": 0.7, "learning_rate": 1.238867132031671e-06, "logits/chosen": -1.9134324789047241, "logits/rejected": -1.694840431213379, "logps/chosen": -456.7511291503906, "logps/rejected": -579.8649291992188, "loss": 0.6786, "rewards/accuracies": 0.625, "rewards/chosen": -0.29658272862434387, "rewards/margins": 0.15619486570358276, "rewards/rejected": -0.45277756452560425, "step": 9510 }, { "epoch": 0.7, "learning_rate": 1.2333132214343078e-06, "logits/chosen": -1.946877121925354, "logits/rejected": -1.464711308479309, "logps/chosen": -502.477783203125, "logps/rejected": -666.6303100585938, "loss": 0.677, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3336712419986725, "rewards/margins": 0.206000417470932, "rewards/rejected": -0.5396717190742493, "step": 9520 }, { "epoch": 0.7, "learning_rate": 1.2277677088759738e-06, "logits/chosen": -2.1944260597229004, "logits/rejected": -1.2070103883743286, "logps/chosen": -536.9886474609375, "logps/rejected": -662.64892578125, "loss": 0.6743, "rewards/accuracies": 0.625, "rewards/chosen": -0.3519594669342041, "rewards/margins": 0.21187694370746613, "rewards/rejected": -0.5638364553451538, "step": 9530 }, { "epoch": 0.7, "learning_rate": 1.222230631123005e-06, "logits/chosen": -2.3436036109924316, "logits/rejected": -1.8585926294326782, "logps/chosen": -502.0, "logps/rejected": -645.69677734375, "loss": 0.6819, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32101503014564514, "rewards/margins": 0.19283540546894073, "rewards/rejected": -0.5138503909111023, "step": 9540 }, { "epoch": 0.7, "learning_rate": 1.2167020248858136e-06, "logits/chosen": -2.126638889312744, "logits/rejected": -1.5425583124160767, "logps/chosen": -472.7806091308594, "logps/rejected": -698.2958984375, "loss": 0.6749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3259023129940033, "rewards/margins": 0.27059563994407654, "rewards/rejected": -0.5964979529380798, "step": 9550 }, { "epoch": 0.71, "learning_rate": 1.2111819268186495e-06, "logits/chosen": -2.1736347675323486, "logits/rejected": -1.5541110038757324, "logps/chosen": -425.2115783691406, "logps/rejected": -652.8338623046875, "loss": 0.6775, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2315690517425537, "rewards/margins": 0.25714364647865295, "rewards/rejected": -0.48871269822120667, "step": 9560 }, { "epoch": 0.71, "learning_rate": 1.2056703735193504e-06, "logits/chosen": -2.1905815601348877, "logits/rejected": -1.7812836170196533, "logps/chosen": -478.8548889160156, "logps/rejected": -641.1213989257812, "loss": 0.6828, "rewards/accuracies": 0.75, "rewards/chosen": -0.27600783109664917, "rewards/margins": 0.20359542965888977, "rewards/rejected": -0.47960323095321655, "step": 9570 }, { "epoch": 0.71, "learning_rate": 1.2001674015291038e-06, "logits/chosen": -2.449310779571533, "logits/rejected": -1.591649055480957, "logps/chosen": -395.16351318359375, "logps/rejected": -595.4158935546875, "loss": 0.6726, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2280634343624115, "rewards/margins": 0.23700174689292908, "rewards/rejected": -0.4650651812553406, "step": 9580 }, { "epoch": 0.71, "learning_rate": 1.194673047332206e-06, "logits/chosen": -1.937496542930603, "logits/rejected": -1.3853023052215576, "logps/chosen": -453.29193115234375, "logps/rejected": -695.1828002929688, "loss": 0.6761, "rewards/accuracies": 0.875, "rewards/chosen": -0.2722678780555725, "rewards/margins": 0.24583609402179718, "rewards/rejected": -0.5181039571762085, "step": 9590 }, { "epoch": 0.71, "learning_rate": 1.1891873473558141e-06, "logits/chosen": -2.2849669456481934, "logits/rejected": -1.8247950077056885, "logps/chosen": -480.58984375, "logps/rejected": -612.6344604492188, "loss": 0.6815, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.33015018701553345, "rewards/margins": 0.17183634638786316, "rewards/rejected": -0.501986563205719, "step": 9600 }, { "epoch": 0.71, "learning_rate": 1.183710337969712e-06, "logits/chosen": -2.0360960960388184, "logits/rejected": -1.4671202898025513, "logps/chosen": -464.375, "logps/rejected": -604.3684692382812, "loss": 0.6791, "rewards/accuracies": 0.75, "rewards/chosen": -0.2952384054660797, "rewards/margins": 0.17422381043434143, "rewards/rejected": -0.46946224570274353, "step": 9610 }, { "epoch": 0.71, "learning_rate": 1.1782420554860645e-06, "logits/chosen": -2.0310566425323486, "logits/rejected": -1.568773865699768, "logps/chosen": -418.31549072265625, "logps/rejected": -533.8113403320312, "loss": 0.6796, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2843910753726959, "rewards/margins": 0.15975423157215118, "rewards/rejected": -0.4441452920436859, "step": 9620 }, { "epoch": 0.71, "learning_rate": 1.172782536159177e-06, "logits/chosen": -2.2280349731445312, "logits/rejected": -1.6585153341293335, "logps/chosen": -422.79400634765625, "logps/rejected": -642.6764526367188, "loss": 0.6803, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2517324388027191, "rewards/margins": 0.23565343022346497, "rewards/rejected": -0.4873858392238617, "step": 9630 }, { "epoch": 0.71, "learning_rate": 1.1673318161852546e-06, "logits/chosen": -1.9653888940811157, "logits/rejected": -1.7075611352920532, "logps/chosen": -493.49310302734375, "logps/rejected": -635.1326904296875, "loss": 0.6805, "rewards/accuracies": 0.75, "rewards/chosen": -0.3619787395000458, "rewards/margins": 0.1455869972705841, "rewards/rejected": -0.5075656771659851, "step": 9640 }, { "epoch": 0.71, "learning_rate": 1.161889931702167e-06, "logits/chosen": -2.1938672065734863, "logits/rejected": -1.6882559061050415, "logps/chosen": -420.2350158691406, "logps/rejected": -603.9027709960938, "loss": 0.679, "rewards/accuracies": 0.75, "rewards/chosen": -0.23822858929634094, "rewards/margins": 0.21967744827270508, "rewards/rejected": -0.4579060971736908, "step": 9650 }, { "epoch": 0.71, "learning_rate": 1.1564569187892025e-06, "logits/chosen": -2.0744526386260986, "logits/rejected": -1.5862928628921509, "logps/chosen": -493.36151123046875, "logps/rejected": -643.1481323242188, "loss": 0.6795, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3071487247943878, "rewards/margins": 0.18680980801582336, "rewards/rejected": -0.4939584732055664, "step": 9660 }, { "epoch": 0.71, "learning_rate": 1.1510328134668316e-06, "logits/chosen": -2.028777837753296, "logits/rejected": -1.6467475891113281, "logps/chosen": -458.439453125, "logps/rejected": -610.4956665039062, "loss": 0.6788, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.327889084815979, "rewards/margins": 0.17015516757965088, "rewards/rejected": -0.4980442523956299, "step": 9670 }, { "epoch": 0.71, "learning_rate": 1.1456176516964682e-06, "logits/chosen": -2.0318403244018555, "logits/rejected": -1.5387427806854248, "logps/chosen": -431.7394104003906, "logps/rejected": -604.2093505859375, "loss": 0.6769, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3022443652153015, "rewards/margins": 0.17277714610099792, "rewards/rejected": -0.4750215411186218, "step": 9680 }, { "epoch": 0.71, "learning_rate": 1.1402114693802323e-06, "logits/chosen": -2.1902101039886475, "logits/rejected": -1.6152045726776123, "logps/chosen": -443.7882385253906, "logps/rejected": -585.7214965820312, "loss": 0.6759, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.28721562027931213, "rewards/margins": 0.2034604847431183, "rewards/rejected": -0.4906761050224304, "step": 9690 }, { "epoch": 0.72, "learning_rate": 1.1348143023607117e-06, "logits/chosen": -1.9517024755477905, "logits/rejected": -1.5325175523757935, "logps/chosen": -550.8614501953125, "logps/rejected": -674.6437377929688, "loss": 0.6727, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.37725111842155457, "rewards/margins": 0.19830362498760223, "rewards/rejected": -0.5755547285079956, "step": 9700 }, { "epoch": 0.72, "learning_rate": 1.1294261864207192e-06, "logits/chosen": -2.168915271759033, "logits/rejected": -1.6004235744476318, "logps/chosen": -503.26116943359375, "logps/rejected": -675.685546875, "loss": 0.6743, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.36662641167640686, "rewards/margins": 0.19438298046588898, "rewards/rejected": -0.561009407043457, "step": 9710 }, { "epoch": 0.72, "learning_rate": 1.1240471572830649e-06, "logits/chosen": -2.0944597721099854, "logits/rejected": -1.6743457317352295, "logps/chosen": -483.522705078125, "logps/rejected": -636.3590698242188, "loss": 0.6783, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3151502311229706, "rewards/margins": 0.15717802941799164, "rewards/rejected": -0.4723282754421234, "step": 9720 }, { "epoch": 0.72, "learning_rate": 1.11867725061031e-06, "logits/chosen": -2.1751503944396973, "logits/rejected": -1.7700939178466797, "logps/chosen": -463.1065368652344, "logps/rejected": -654.1510009765625, "loss": 0.6783, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28837424516677856, "rewards/margins": 0.21945638954639435, "rewards/rejected": -0.5078305602073669, "step": 9730 }, { "epoch": 0.72, "learning_rate": 1.1133165020045354e-06, "logits/chosen": -2.1405019760131836, "logits/rejected": -1.721184492111206, "logps/chosen": -537.4796142578125, "logps/rejected": -723.4417724609375, "loss": 0.6809, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33919578790664673, "rewards/margins": 0.21739844977855682, "rewards/rejected": -0.55659419298172, "step": 9740 }, { "epoch": 0.72, "learning_rate": 1.1079649470071066e-06, "logits/chosen": -1.943273901939392, "logits/rejected": -1.5166857242584229, "logps/chosen": -513.671875, "logps/rejected": -633.4223022460938, "loss": 0.6832, "rewards/accuracies": 0.625, "rewards/chosen": -0.34975969791412354, "rewards/margins": 0.15630680322647095, "rewards/rejected": -0.5060665011405945, "step": 9750 }, { "epoch": 0.72, "learning_rate": 1.1026226210984342e-06, "logits/chosen": -2.1769795417785645, "logits/rejected": -1.407486915588379, "logps/chosen": -502.85638427734375, "logps/rejected": -702.5418701171875, "loss": 0.6797, "rewards/accuracies": 0.75, "rewards/chosen": -0.328991562128067, "rewards/margins": 0.24244090914726257, "rewards/rejected": -0.5714325308799744, "step": 9760 }, { "epoch": 0.72, "learning_rate": 1.0972895596977404e-06, "logits/chosen": -2.2282614707946777, "logits/rejected": -1.5694293975830078, "logps/chosen": -467.8055114746094, "logps/rejected": -632.4810791015625, "loss": 0.6747, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2795693278312683, "rewards/margins": 0.23960928618907928, "rewards/rejected": -0.5191786885261536, "step": 9770 }, { "epoch": 0.72, "learning_rate": 1.0919657981628257e-06, "logits/chosen": -1.9854530096054077, "logits/rejected": -1.6416412591934204, "logps/chosen": -516.6514892578125, "logps/rejected": -628.7706909179688, "loss": 0.6752, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30685752630233765, "rewards/margins": 0.1779886931180954, "rewards/rejected": -0.48484620451927185, "step": 9780 }, { "epoch": 0.72, "learning_rate": 1.0866513717898339e-06, "logits/chosen": -2.27215838432312, "logits/rejected": -2.0004055500030518, "logps/chosen": -490.93585205078125, "logps/rejected": -649.8984375, "loss": 0.6797, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3238914906978607, "rewards/margins": 0.15162546932697296, "rewards/rejected": -0.4755169749259949, "step": 9790 }, { "epoch": 0.72, "learning_rate": 1.0813463158130152e-06, "logits/chosen": -1.9868415594100952, "logits/rejected": -1.3868099451065063, "logps/chosen": -516.5528564453125, "logps/rejected": -692.7035522460938, "loss": 0.6749, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3654528856277466, "rewards/margins": 0.20662899315357208, "rewards/rejected": -0.5720818042755127, "step": 9800 }, { "epoch": 0.72, "learning_rate": 1.076050665404495e-06, "logits/chosen": -1.9859046936035156, "logits/rejected": -1.5955336093902588, "logps/chosen": -498.20501708984375, "logps/rejected": -668.6904907226562, "loss": 0.6765, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3444667458534241, "rewards/margins": 0.1918603479862213, "rewards/rejected": -0.536327064037323, "step": 9810 }, { "epoch": 0.72, "learning_rate": 1.0707644556740427e-06, "logits/chosen": -2.1245362758636475, "logits/rejected": -1.440810203552246, "logps/chosen": -475.0955505371094, "logps/rejected": -641.7293701171875, "loss": 0.6748, "rewards/accuracies": 0.75, "rewards/chosen": -0.27690187096595764, "rewards/margins": 0.23327037692070007, "rewards/rejected": -0.5101722478866577, "step": 9820 }, { "epoch": 0.73, "learning_rate": 1.0654877216688344e-06, "logits/chosen": -2.0019280910491943, "logits/rejected": -1.5298067331314087, "logps/chosen": -510.3150939941406, "logps/rejected": -698.8375244140625, "loss": 0.6783, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3210746943950653, "rewards/margins": 0.2208009660243988, "rewards/rejected": -0.5418756604194641, "step": 9830 }, { "epoch": 0.73, "learning_rate": 1.060220498373223e-06, "logits/chosen": -2.2014145851135254, "logits/rejected": -1.5351946353912354, "logps/chosen": -506.5331115722656, "logps/rejected": -678.2590942382812, "loss": 0.6782, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35456401109695435, "rewards/margins": 0.22744254767894745, "rewards/rejected": -0.582006573677063, "step": 9840 }, { "epoch": 0.73, "learning_rate": 1.0549628207085086e-06, "logits/chosen": -1.9199187755584717, "logits/rejected": -1.5168988704681396, "logps/chosen": -447.25408935546875, "logps/rejected": -586.1771850585938, "loss": 0.6807, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2728957235813141, "rewards/margins": 0.1769450455904007, "rewards/rejected": -0.449840784072876, "step": 9850 }, { "epoch": 0.73, "learning_rate": 1.0497147235327012e-06, "logits/chosen": -2.0310263633728027, "logits/rejected": -1.4027396440505981, "logps/chosen": -473.4383239746094, "logps/rejected": -653.5820922851562, "loss": 0.6745, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.298270046710968, "rewards/margins": 0.24387094378471375, "rewards/rejected": -0.5421410202980042, "step": 9860 }, { "epoch": 0.73, "learning_rate": 1.044476241640294e-06, "logits/chosen": -2.1483652591705322, "logits/rejected": -1.594416618347168, "logps/chosen": -511.98681640625, "logps/rejected": -676.8222045898438, "loss": 0.6752, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3037962317466736, "rewards/margins": 0.20562143623828888, "rewards/rejected": -0.5094176530838013, "step": 9870 }, { "epoch": 0.73, "learning_rate": 1.0392474097620326e-06, "logits/chosen": -2.0096027851104736, "logits/rejected": -1.4715690612792969, "logps/chosen": -504.80560302734375, "logps/rejected": -690.4794921875, "loss": 0.6697, "rewards/accuracies": 0.875, "rewards/chosen": -0.35185638070106506, "rewards/margins": 0.22734661400318146, "rewards/rejected": -0.5792030096054077, "step": 9880 }, { "epoch": 0.73, "learning_rate": 1.0340282625646838e-06, "logits/chosen": -1.9600883722305298, "logits/rejected": -1.1736111640930176, "logps/chosen": -495.4485778808594, "logps/rejected": -689.5892333984375, "loss": 0.6775, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31907275319099426, "rewards/margins": 0.2669069170951843, "rewards/rejected": -0.585979700088501, "step": 9890 }, { "epoch": 0.73, "learning_rate": 1.0288188346508043e-06, "logits/chosen": -2.165348529815674, "logits/rejected": -1.5086603164672852, "logps/chosen": -464.784912109375, "logps/rejected": -629.7717895507812, "loss": 0.6765, "rewards/accuracies": 0.75, "rewards/chosen": -0.2915901243686676, "rewards/margins": 0.21797578036785126, "rewards/rejected": -0.5095659494400024, "step": 9900 }, { "epoch": 0.73, "learning_rate": 1.0236191605585124e-06, "logits/chosen": -2.203737258911133, "logits/rejected": -1.8750213384628296, "logps/chosen": -450.96661376953125, "logps/rejected": -618.3648681640625, "loss": 0.6762, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28424859046936035, "rewards/margins": 0.1816238909959793, "rewards/rejected": -0.46587246656417847, "step": 9910 }, { "epoch": 0.73, "learning_rate": 1.0184292747612617e-06, "logits/chosen": -2.2975900173187256, "logits/rejected": -1.5579675436019897, "logps/chosen": -452.84637451171875, "logps/rejected": -600.5335693359375, "loss": 0.6795, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2815398573875427, "rewards/margins": 0.1981794834136963, "rewards/rejected": -0.4797193109989166, "step": 9920 }, { "epoch": 0.73, "learning_rate": 1.0132492116676079e-06, "logits/chosen": -2.024618625640869, "logits/rejected": -1.4086413383483887, "logps/chosen": -460.3633728027344, "logps/rejected": -676.0171508789062, "loss": 0.6784, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30948811769485474, "rewards/margins": 0.2521976828575134, "rewards/rejected": -0.5616858601570129, "step": 9930 }, { "epoch": 0.73, "learning_rate": 1.0080790056209826e-06, "logits/chosen": -1.9677470922470093, "logits/rejected": -1.570657730102539, "logps/chosen": -496.4248962402344, "logps/rejected": -697.0764770507812, "loss": 0.6734, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3285077214241028, "rewards/margins": 0.23284277319908142, "rewards/rejected": -0.5613504648208618, "step": 9940 }, { "epoch": 0.73, "learning_rate": 1.002918690899469e-06, "logits/chosen": -2.296189785003662, "logits/rejected": -1.7544498443603516, "logps/chosen": -461.7142639160156, "logps/rejected": -621.6342163085938, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.2758150100708008, "rewards/margins": 0.2133830338716507, "rewards/rejected": -0.4891980290412903, "step": 9950 }, { "epoch": 0.73, "learning_rate": 9.977683017155674e-07, "logits/chosen": -2.123593807220459, "logits/rejected": -1.5396398305892944, "logps/chosen": -522.75830078125, "logps/rejected": -710.5360107421875, "loss": 0.6745, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35130995512008667, "rewards/margins": 0.2087458074092865, "rewards/rejected": -0.5600557327270508, "step": 9960 }, { "epoch": 0.74, "learning_rate": 9.926278722159766e-07, "logits/chosen": -1.9391288757324219, "logits/rejected": -1.6372779607772827, "logps/chosen": -505.9022521972656, "logps/rejected": -635.9070434570312, "loss": 0.681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3656207323074341, "rewards/margins": 0.14316783845424652, "rewards/rejected": -0.5087885856628418, "step": 9970 }, { "epoch": 0.74, "learning_rate": 9.874974364813594e-07, "logits/chosen": -2.2025036811828613, "logits/rejected": -1.601226806640625, "logps/chosen": -503.858642578125, "logps/rejected": -707.1383666992188, "loss": 0.676, "rewards/accuracies": 0.875, "rewards/chosen": -0.3315080404281616, "rewards/margins": 0.24758438766002655, "rewards/rejected": -0.5790924429893494, "step": 9980 }, { "epoch": 0.74, "learning_rate": 9.82377028526125e-07, "logits/chosen": -2.130795955657959, "logits/rejected": -1.6339502334594727, "logps/chosen": -440.772705078125, "logps/rejected": -632.6983032226562, "loss": 0.6735, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.30733758211135864, "rewards/margins": 0.2242458611726761, "rewards/rejected": -0.5315834283828735, "step": 9990 }, { "epoch": 0.74, "learning_rate": 9.772666822981954e-07, "logits/chosen": -2.0717217922210693, "logits/rejected": -1.7363048791885376, "logps/chosen": -518.200439453125, "logps/rejected": -704.6754150390625, "loss": 0.6737, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36196985840797424, "rewards/margins": 0.18813160061836243, "rewards/rejected": -0.5501014590263367, "step": 10000 }, { "epoch": 0.74, "learning_rate": 9.721664316787854e-07, "logits/chosen": -2.1039302349090576, "logits/rejected": -1.4508471488952637, "logps/chosen": -455.5409240722656, "logps/rejected": -671.15380859375, "loss": 0.677, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28868886828422546, "rewards/margins": 0.27234750986099243, "rewards/rejected": -0.5610364079475403, "step": 10010 }, { "epoch": 0.74, "learning_rate": 9.670763104821778e-07, "logits/chosen": -1.8893873691558838, "logits/rejected": -1.5077002048492432, "logps/chosen": -494.78106689453125, "logps/rejected": -652.6716918945312, "loss": 0.6769, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2869865298271179, "rewards/margins": 0.19636228680610657, "rewards/rejected": -0.4833487868309021, "step": 10020 }, { "epoch": 0.74, "learning_rate": 9.61996352455497e-07, "logits/chosen": -2.118373394012451, "logits/rejected": -1.4700742959976196, "logps/chosen": -599.7098388671875, "logps/rejected": -786.4064331054688, "loss": 0.6767, "rewards/accuracies": 0.75, "rewards/chosen": -0.40865272283554077, "rewards/margins": 0.2510501444339752, "rewards/rejected": -0.6597028970718384, "step": 10030 }, { "epoch": 0.74, "learning_rate": 9.569265912784855e-07, "logits/chosen": -2.0295186042785645, "logits/rejected": -1.4700676202774048, "logps/chosen": -435.60284423828125, "logps/rejected": -617.7648315429688, "loss": 0.6789, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2584506869316101, "rewards/margins": 0.23569950461387634, "rewards/rejected": -0.49415022134780884, "step": 10040 }, { "epoch": 0.74, "learning_rate": 9.518670605632829e-07, "logits/chosen": -1.8923717737197876, "logits/rejected": -1.3680839538574219, "logps/chosen": -487.46337890625, "logps/rejected": -659.8492431640625, "loss": 0.6799, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34842944145202637, "rewards/margins": 0.2110081911087036, "rewards/rejected": -0.55943763256073, "step": 10050 }, { "epoch": 0.74, "learning_rate": 9.468177938542023e-07, "logits/chosen": -2.2418034076690674, "logits/rejected": -1.5909608602523804, "logps/chosen": -574.9125366210938, "logps/rejected": -758.3059692382812, "loss": 0.6757, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3717232644557953, "rewards/margins": 0.2254868447780609, "rewards/rejected": -0.5972102284431458, "step": 10060 }, { "epoch": 0.74, "learning_rate": 9.417788246275051e-07, "logits/chosen": -1.9779733419418335, "logits/rejected": -1.5637738704681396, "logps/chosen": -550.5265502929688, "logps/rejected": -729.551025390625, "loss": 0.6742, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.35652559995651245, "rewards/margins": 0.21362201869487762, "rewards/rejected": -0.5701476335525513, "step": 10070 }, { "epoch": 0.74, "learning_rate": 9.367501862911812e-07, "logits/chosen": -2.0663363933563232, "logits/rejected": -1.6880519390106201, "logps/chosen": -533.3814697265625, "logps/rejected": -621.5474853515625, "loss": 0.6835, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3372114300727844, "rewards/margins": 0.14661863446235657, "rewards/rejected": -0.483830064535141, "step": 10080 }, { "epoch": 0.74, "learning_rate": 9.317319121847301e-07, "logits/chosen": -2.2238376140594482, "logits/rejected": -1.7308686971664429, "logps/chosen": -554.133544921875, "logps/rejected": -731.7711181640625, "loss": 0.6795, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38974472880363464, "rewards/margins": 0.21067500114440918, "rewards/rejected": -0.6004197597503662, "step": 10090 }, { "epoch": 0.75, "learning_rate": 9.267240355789347e-07, "logits/chosen": -1.9329675436019897, "logits/rejected": -1.5906981229782104, "logps/chosen": -534.7722778320312, "logps/rejected": -735.968017578125, "loss": 0.6775, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3699116110801697, "rewards/margins": 0.20721833407878876, "rewards/rejected": -0.5771300196647644, "step": 10100 }, { "epoch": 0.75, "learning_rate": 9.21726589675643e-07, "logits/chosen": -2.3005259037017822, "logits/rejected": -1.5307104587554932, "logps/chosen": -503.2723693847656, "logps/rejected": -707.9110107421875, "loss": 0.6765, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3377617299556732, "rewards/margins": 0.25421056151390076, "rewards/rejected": -0.591972291469574, "step": 10110 }, { "epoch": 0.75, "learning_rate": 9.16739607607551e-07, "logits/chosen": -2.124446392059326, "logits/rejected": -1.6150457859039307, "logps/chosen": -455.0406799316406, "logps/rejected": -670.0589599609375, "loss": 0.6713, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3168886601924896, "rewards/margins": 0.21924015879631042, "rewards/rejected": -0.5361288189888, "step": 10120 }, { "epoch": 0.75, "learning_rate": 9.117631224379772e-07, "logits/chosen": -2.1705520153045654, "logits/rejected": -1.468056321144104, "logps/chosen": -481.6073303222656, "logps/rejected": -620.9921875, "loss": 0.6814, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2927219271659851, "rewards/margins": 0.17957019805908203, "rewards/rejected": -0.47229212522506714, "step": 10130 }, { "epoch": 0.75, "learning_rate": 9.067971671606487e-07, "logits/chosen": -2.132093906402588, "logits/rejected": -1.703565001487732, "logps/chosen": -539.2412109375, "logps/rejected": -699.5081787109375, "loss": 0.6797, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.37128663063049316, "rewards/margins": 0.16325204074382782, "rewards/rejected": -0.5345386862754822, "step": 10140 }, { "epoch": 0.75, "learning_rate": 9.018417746994784e-07, "logits/chosen": -2.1238558292388916, "logits/rejected": -1.5699470043182373, "logps/chosen": -476.8199157714844, "logps/rejected": -690.6183471679688, "loss": 0.677, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3270709216594696, "rewards/margins": 0.2720441520214081, "rewards/rejected": -0.5991150736808777, "step": 10150 }, { "epoch": 0.75, "learning_rate": 8.968969779083503e-07, "logits/chosen": -2.0633292198181152, "logits/rejected": -1.7231098413467407, "logps/chosen": -480.430419921875, "logps/rejected": -681.65478515625, "loss": 0.6735, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.28867703676223755, "rewards/margins": 0.24234966933727264, "rewards/rejected": -0.5310267210006714, "step": 10160 }, { "epoch": 0.75, "learning_rate": 8.919628095708985e-07, "logits/chosen": -2.283214807510376, "logits/rejected": -1.5296978950500488, "logps/chosen": -410.012451171875, "logps/rejected": -629.7935791015625, "loss": 0.6747, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24320495128631592, "rewards/margins": 0.24381664395332336, "rewards/rejected": -0.4870215356349945, "step": 10170 }, { "epoch": 0.75, "learning_rate": 8.87039302400291e-07, "logits/chosen": -2.013411045074463, "logits/rejected": -1.5313403606414795, "logps/chosen": -620.913818359375, "logps/rejected": -770.2735595703125, "loss": 0.6794, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.48701587319374084, "rewards/margins": 0.185529887676239, "rewards/rejected": -0.6725457310676575, "step": 10180 }, { "epoch": 0.75, "learning_rate": 8.821264890390149e-07, "logits/chosen": -2.2220911979675293, "logits/rejected": -1.632267951965332, "logps/chosen": -467.25726318359375, "logps/rejected": -669.9965209960938, "loss": 0.675, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33775681257247925, "rewards/margins": 0.23464882373809814, "rewards/rejected": -0.5724056363105774, "step": 10190 }, { "epoch": 0.75, "learning_rate": 8.772244020586568e-07, "logits/chosen": -1.933566689491272, "logits/rejected": -1.6964616775512695, "logps/chosen": -583.8692016601562, "logps/rejected": -701.6781005859375, "loss": 0.6873, "rewards/accuracies": 0.75, "rewards/chosen": -0.43451985716819763, "rewards/margins": 0.13007453083992004, "rewards/rejected": -0.5645943880081177, "step": 10200 }, { "epoch": 0.75, "learning_rate": 8.72333073959687e-07, "logits/chosen": -2.202260732650757, "logits/rejected": -1.5709943771362305, "logps/chosen": -536.2445068359375, "logps/rejected": -692.7323608398438, "loss": 0.6764, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3516356348991394, "rewards/margins": 0.22662141919136047, "rewards/rejected": -0.5782569646835327, "step": 10210 }, { "epoch": 0.75, "learning_rate": 8.674525371712478e-07, "logits/chosen": -2.285273790359497, "logits/rejected": -1.8695347309112549, "logps/chosen": -549.2257690429688, "logps/rejected": -681.0372314453125, "loss": 0.6823, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3532102108001709, "rewards/margins": 0.15622815489768982, "rewards/rejected": -0.5094383955001831, "step": 10220 }, { "epoch": 0.75, "learning_rate": 8.625828240509351e-07, "logits/chosen": -2.3199729919433594, "logits/rejected": -1.6534223556518555, "logps/chosen": -485.77655029296875, "logps/rejected": -636.2962036132812, "loss": 0.6738, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2901304364204407, "rewards/margins": 0.1961476057767868, "rewards/rejected": -0.4862779676914215, "step": 10230 }, { "epoch": 0.76, "learning_rate": 8.577239668845843e-07, "logits/chosen": -2.239691972732544, "logits/rejected": -1.775019645690918, "logps/chosen": -463.2638244628906, "logps/rejected": -610.0720825195312, "loss": 0.6732, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.30191344022750854, "rewards/margins": 0.20565417408943176, "rewards/rejected": -0.5075676441192627, "step": 10240 }, { "epoch": 0.76, "learning_rate": 8.528759978860551e-07, "logits/chosen": -2.247238874435425, "logits/rejected": -1.6750879287719727, "logps/chosen": -485.390380859375, "logps/rejected": -621.9509887695312, "loss": 0.6824, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.284163236618042, "rewards/margins": 0.2057182788848877, "rewards/rejected": -0.4898815155029297, "step": 10250 }, { "epoch": 0.76, "learning_rate": 8.480389491970228e-07, "logits/chosen": -2.3460631370544434, "logits/rejected": -1.6503727436065674, "logps/chosen": -511.74908447265625, "logps/rejected": -766.7935791015625, "loss": 0.6745, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3222716748714447, "rewards/margins": 0.2748538851737976, "rewards/rejected": -0.5971255302429199, "step": 10260 }, { "epoch": 0.76, "learning_rate": 8.432128528867595e-07, "logits/chosen": -2.14178204536438, "logits/rejected": -1.6926014423370361, "logps/chosen": -504.4384765625, "logps/rejected": -717.1395263671875, "loss": 0.6775, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3559182584285736, "rewards/margins": 0.19138272106647491, "rewards/rejected": -0.5473009943962097, "step": 10270 }, { "epoch": 0.76, "learning_rate": 8.383977409519234e-07, "logits/chosen": -2.0021047592163086, "logits/rejected": -1.3700109720230103, "logps/chosen": -510.72088623046875, "logps/rejected": -693.6507568359375, "loss": 0.6743, "rewards/accuracies": 0.75, "rewards/chosen": -0.3527684211730957, "rewards/margins": 0.2227535992860794, "rewards/rejected": -0.5755220055580139, "step": 10280 }, { "epoch": 0.76, "learning_rate": 8.335936453163498e-07, "logits/chosen": -2.0931873321533203, "logits/rejected": -1.5953510999679565, "logps/chosen": -538.0341796875, "logps/rejected": -666.8701171875, "loss": 0.6777, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.37745970487594604, "rewards/margins": 0.18061693012714386, "rewards/rejected": -0.5580765604972839, "step": 10290 }, { "epoch": 0.76, "learning_rate": 8.288005978308341e-07, "logits/chosen": -2.1243772506713867, "logits/rejected": -1.6157270669937134, "logps/chosen": -543.1414794921875, "logps/rejected": -740.5015258789062, "loss": 0.6788, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3754534125328064, "rewards/margins": 0.19746917486190796, "rewards/rejected": -0.5729225873947144, "step": 10300 }, { "epoch": 0.76, "learning_rate": 8.240186302729228e-07, "logits/chosen": -1.9787235260009766, "logits/rejected": -1.4454927444458008, "logps/chosen": -543.25390625, "logps/rejected": -709.697265625, "loss": 0.6809, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36787158250808716, "rewards/margins": 0.21584220230579376, "rewards/rejected": -0.583713710308075, "step": 10310 }, { "epoch": 0.76, "learning_rate": 8.192477743467078e-07, "logits/chosen": -1.963918685913086, "logits/rejected": -1.3494656085968018, "logps/chosen": -563.5178833007812, "logps/rejected": -796.209228515625, "loss": 0.6687, "rewards/accuracies": 0.875, "rewards/chosen": -0.3690522015094757, "rewards/margins": 0.2818796634674072, "rewards/rejected": -0.6509319543838501, "step": 10320 }, { "epoch": 0.76, "learning_rate": 8.144880616826075e-07, "logits/chosen": -2.1680619716644287, "logits/rejected": -1.7342450618743896, "logps/chosen": -607.0447387695312, "logps/rejected": -747.3870849609375, "loss": 0.6819, "rewards/accuracies": 0.75, "rewards/chosen": -0.43493008613586426, "rewards/margins": 0.17203286290168762, "rewards/rejected": -0.6069629788398743, "step": 10330 }, { "epoch": 0.76, "learning_rate": 8.097395238371619e-07, "logits/chosen": -1.8396650552749634, "logits/rejected": -1.5018970966339111, "logps/chosen": -508.913818359375, "logps/rejected": -689.7279052734375, "loss": 0.6799, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37605321407318115, "rewards/margins": 0.1630445271730423, "rewards/rejected": -0.5390976667404175, "step": 10340 }, { "epoch": 0.76, "learning_rate": 8.05002192292823e-07, "logits/chosen": -2.095289945602417, "logits/rejected": -1.7318403720855713, "logps/chosen": -498.1513671875, "logps/rejected": -680.5941162109375, "loss": 0.6764, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3559712767601013, "rewards/margins": 0.1848718822002411, "rewards/rejected": -0.5408431887626648, "step": 10350 }, { "epoch": 0.76, "learning_rate": 8.002760984577479e-07, "logits/chosen": -2.0322444438934326, "logits/rejected": -1.6234338283538818, "logps/chosen": -594.2619018554688, "logps/rejected": -718.9171142578125, "loss": 0.6817, "rewards/accuracies": 0.75, "rewards/chosen": -0.3711128234863281, "rewards/margins": 0.16343705356121063, "rewards/rejected": -0.5345498919487, "step": 10360 }, { "epoch": 0.76, "learning_rate": 7.955612736655854e-07, "logits/chosen": -2.12717866897583, "logits/rejected": -1.8333896398544312, "logps/chosen": -462.38128662109375, "logps/rejected": -603.0007934570312, "loss": 0.6825, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3120769262313843, "rewards/margins": 0.16003912687301636, "rewards/rejected": -0.472116082906723, "step": 10370 }, { "epoch": 0.77, "learning_rate": 7.908577491752726e-07, "logits/chosen": -2.2322306632995605, "logits/rejected": -1.7839767932891846, "logps/chosen": -487.2809143066406, "logps/rejected": -616.3549194335938, "loss": 0.6842, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3307134509086609, "rewards/margins": 0.1458134949207306, "rewards/rejected": -0.4765269160270691, "step": 10380 }, { "epoch": 0.77, "learning_rate": 7.861655561708281e-07, "logits/chosen": -2.0827882289886475, "logits/rejected": -1.550077199935913, "logps/chosen": -546.5426025390625, "logps/rejected": -735.1231079101562, "loss": 0.6796, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38280683755874634, "rewards/margins": 0.18711796402931213, "rewards/rejected": -0.5699248313903809, "step": 10390 }, { "epoch": 0.77, "learning_rate": 7.814847257611404e-07, "logits/chosen": -2.293727159500122, "logits/rejected": -1.706627607345581, "logps/chosen": -552.8975219726562, "logps/rejected": -691.0783081054688, "loss": 0.6785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3492180407047272, "rewards/margins": 0.1909371018409729, "rewards/rejected": -0.5401551127433777, "step": 10400 }, { "epoch": 0.77, "learning_rate": 7.768152889797678e-07, "logits/chosen": -2.173081398010254, "logits/rejected": -1.6440322399139404, "logps/chosen": -483.9349670410156, "logps/rejected": -670.7567138671875, "loss": 0.6781, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35754281282424927, "rewards/margins": 0.21302318572998047, "rewards/rejected": -0.5705659985542297, "step": 10410 }, { "epoch": 0.77, "learning_rate": 7.72157276784729e-07, "logits/chosen": -2.081317186355591, "logits/rejected": -1.7357501983642578, "logps/chosen": -491.40185546875, "logps/rejected": -666.9627075195312, "loss": 0.6768, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3048505485057831, "rewards/margins": 0.21928587555885315, "rewards/rejected": -0.5241364240646362, "step": 10420 }, { "epoch": 0.77, "learning_rate": 7.675107200582973e-07, "logits/chosen": -2.2176342010498047, "logits/rejected": -1.8962520360946655, "logps/chosen": -457.74169921875, "logps/rejected": -614.2047119140625, "loss": 0.6806, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30585217475891113, "rewards/margins": 0.1633278727531433, "rewards/rejected": -0.46918004751205444, "step": 10430 }, { "epoch": 0.77, "learning_rate": 7.628756496067968e-07, "logits/chosen": -2.0073611736297607, "logits/rejected": -1.5957012176513672, "logps/chosen": -473.5005798339844, "logps/rejected": -637.9631958007812, "loss": 0.6738, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3140585124492645, "rewards/margins": 0.20575948059558868, "rewards/rejected": -0.5198179483413696, "step": 10440 }, { "epoch": 0.77, "learning_rate": 7.582520961604014e-07, "logits/chosen": -2.223867893218994, "logits/rejected": -1.5683656930923462, "logps/chosen": -495.99969482421875, "logps/rejected": -676.0767822265625, "loss": 0.6805, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3033469617366791, "rewards/margins": 0.24538779258728027, "rewards/rejected": -0.5487347841262817, "step": 10450 }, { "epoch": 0.77, "learning_rate": 7.536400903729254e-07, "logits/chosen": -2.0299694538116455, "logits/rejected": -1.6584131717681885, "logps/chosen": -393.17626953125, "logps/rejected": -634.564453125, "loss": 0.6728, "rewards/accuracies": 0.75, "rewards/chosen": -0.2712675929069519, "rewards/margins": 0.24683265388011932, "rewards/rejected": -0.5181002616882324, "step": 10460 }, { "epoch": 0.77, "learning_rate": 7.490396628216237e-07, "logits/chosen": -2.129298210144043, "logits/rejected": -1.6416898965835571, "logps/chosen": -576.46533203125, "logps/rejected": -718.6011962890625, "loss": 0.6755, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38648372888565063, "rewards/margins": 0.18817496299743652, "rewards/rejected": -0.5746586918830872, "step": 10470 }, { "epoch": 0.77, "learning_rate": 7.444508440069878e-07, "logits/chosen": -2.1478819847106934, "logits/rejected": -1.7941261529922485, "logps/chosen": -607.56005859375, "logps/rejected": -744.7571411132812, "loss": 0.6796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4114052653312683, "rewards/margins": 0.15853102505207062, "rewards/rejected": -0.5699363350868225, "step": 10480 }, { "epoch": 0.77, "learning_rate": 7.39873664352545e-07, "logits/chosen": -2.0840506553649902, "logits/rejected": -1.5761439800262451, "logps/chosen": -429.5283203125, "logps/rejected": -660.0428466796875, "loss": 0.6772, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2839290499687195, "rewards/margins": 0.25456756353378296, "rewards/rejected": -0.5384966135025024, "step": 10490 }, { "epoch": 0.77, "learning_rate": 7.353081542046573e-07, "logits/chosen": -2.2137935161590576, "logits/rejected": -1.6224193572998047, "logps/chosen": -468.75738525390625, "logps/rejected": -639.6424560546875, "loss": 0.6802, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3299352526664734, "rewards/margins": 0.22001305222511292, "rewards/rejected": -0.5499483346939087, "step": 10500 }, { "epoch": 0.78, "learning_rate": 7.307543438323145e-07, "logits/chosen": -2.11226224899292, "logits/rejected": -1.673348069190979, "logps/chosen": -464.3143005371094, "logps/rejected": -643.0367431640625, "loss": 0.6804, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31322482228279114, "rewards/margins": 0.21714580059051514, "rewards/rejected": -0.5303705930709839, "step": 10510 }, { "epoch": 0.78, "learning_rate": 7.262122634269426e-07, "logits/chosen": -2.335361957550049, "logits/rejected": -2.0480849742889404, "logps/chosen": -493.2566833496094, "logps/rejected": -651.0194091796875, "loss": 0.6816, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32200655341148376, "rewards/margins": 0.19539830088615417, "rewards/rejected": -0.5174048542976379, "step": 10520 }, { "epoch": 0.78, "learning_rate": 7.216819431021954e-07, "logits/chosen": -2.136803150177002, "logits/rejected": -1.6267551183700562, "logps/chosen": -559.646728515625, "logps/rejected": -741.89013671875, "loss": 0.6716, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4095853269100189, "rewards/margins": 0.23721332848072052, "rewards/rejected": -0.6467987298965454, "step": 10530 }, { "epoch": 0.78, "learning_rate": 7.171634128937588e-07, "logits/chosen": -2.1732850074768066, "logits/rejected": -1.477952241897583, "logps/chosen": -605.5438232421875, "logps/rejected": -758.229248046875, "loss": 0.6779, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3889063894748688, "rewards/margins": 0.23894551396369934, "rewards/rejected": -0.6278518438339233, "step": 10540 }, { "epoch": 0.78, "learning_rate": 7.12656702759153e-07, "logits/chosen": -2.3135266304016113, "logits/rejected": -1.9719765186309814, "logps/chosen": -436.92718505859375, "logps/rejected": -587.4166259765625, "loss": 0.6805, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27768781781196594, "rewards/margins": 0.16697007417678833, "rewards/rejected": -0.4446578919887543, "step": 10550 }, { "epoch": 0.78, "learning_rate": 7.081618425775296e-07, "logits/chosen": -2.0069708824157715, "logits/rejected": -1.70595383644104, "logps/chosen": -520.0142822265625, "logps/rejected": -698.7132568359375, "loss": 0.6772, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.354240357875824, "rewards/margins": 0.19485656917095184, "rewards/rejected": -0.549096941947937, "step": 10560 }, { "epoch": 0.78, "learning_rate": 7.03678862149477e-07, "logits/chosen": -2.196704387664795, "logits/rejected": -1.8024059534072876, "logps/chosen": -538.8157958984375, "logps/rejected": -708.4097290039062, "loss": 0.6744, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3629389703273773, "rewards/margins": 0.2258622944355011, "rewards/rejected": -0.5888012647628784, "step": 10570 }, { "epoch": 0.78, "learning_rate": 6.992077911968215e-07, "logits/chosen": -2.0354130268096924, "logits/rejected": -1.5422694683074951, "logps/chosen": -488.69598388671875, "logps/rejected": -713.7655029296875, "loss": 0.6692, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3376120328903198, "rewards/margins": 0.24084456264972687, "rewards/rejected": -0.5784566402435303, "step": 10580 }, { "epoch": 0.78, "learning_rate": 6.94748659362432e-07, "logits/chosen": -1.9729732275009155, "logits/rejected": -1.5894806385040283, "logps/chosen": -439.6116638183594, "logps/rejected": -625.995849609375, "loss": 0.6763, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3148488402366638, "rewards/margins": 0.21760153770446777, "rewards/rejected": -0.5324503779411316, "step": 10590 }, { "epoch": 0.78, "learning_rate": 6.903014962100196e-07, "logits/chosen": -2.1928913593292236, "logits/rejected": -1.4442336559295654, "logps/chosen": -458.33544921875, "logps/rejected": -647.8917236328125, "loss": 0.6782, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2990182042121887, "rewards/margins": 0.23659662902355194, "rewards/rejected": -0.5356148481369019, "step": 10600 }, { "epoch": 0.78, "learning_rate": 6.858663312239439e-07, "logits/chosen": -2.2239551544189453, "logits/rejected": -1.7644134759902954, "logps/chosen": -504.9798889160156, "logps/rejected": -692.3482666015625, "loss": 0.6728, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3300975263118744, "rewards/margins": 0.24519240856170654, "rewards/rejected": -0.5752899646759033, "step": 10610 }, { "epoch": 0.78, "learning_rate": 6.814431938090205e-07, "logits/chosen": -2.0547568798065186, "logits/rejected": -1.8019723892211914, "logps/chosen": -431.4345703125, "logps/rejected": -600.4028930664062, "loss": 0.6867, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2919122874736786, "rewards/margins": 0.14608553051948547, "rewards/rejected": -0.4379977583885193, "step": 10620 }, { "epoch": 0.78, "learning_rate": 6.770321132903199e-07, "logits/chosen": -1.9212839603424072, "logits/rejected": -1.5412118434906006, "logps/chosen": -460.58489990234375, "logps/rejected": -670.6295166015625, "loss": 0.6768, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.322861909866333, "rewards/margins": 0.20820729434490204, "rewards/rejected": -0.5310691595077515, "step": 10630 }, { "epoch": 0.78, "learning_rate": 6.726331189129773e-07, "logits/chosen": -2.191659450531006, "logits/rejected": -1.6888549327850342, "logps/chosen": -465.75250244140625, "logps/rejected": -646.6168212890625, "loss": 0.6776, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24948053061962128, "rewards/margins": 0.24488942325115204, "rewards/rejected": -0.4943699240684509, "step": 10640 }, { "epoch": 0.79, "learning_rate": 6.682462398419986e-07, "logits/chosen": -2.3844292163848877, "logits/rejected": -1.7358745336532593, "logps/chosen": -532.256103515625, "logps/rejected": -633.6298828125, "loss": 0.6789, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3522736430168152, "rewards/margins": 0.1818903386592865, "rewards/rejected": -0.5341639518737793, "step": 10650 }, { "epoch": 0.79, "learning_rate": 6.638715051620657e-07, "logits/chosen": -2.107304573059082, "logits/rejected": -1.8080666065216064, "logps/chosen": -509.128662109375, "logps/rejected": -651.2139282226562, "loss": 0.6793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3440130650997162, "rewards/margins": 0.174733966588974, "rewards/rejected": -0.5187470316886902, "step": 10660 }, { "epoch": 0.79, "learning_rate": 6.595089438773423e-07, "logits/chosen": -2.092844009399414, "logits/rejected": -1.6611655950546265, "logps/chosen": -574.4708862304688, "logps/rejected": -704.6522216796875, "loss": 0.6785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3944827616214752, "rewards/margins": 0.15879106521606445, "rewards/rejected": -0.5532738566398621, "step": 10670 }, { "epoch": 0.79, "learning_rate": 6.551585849112857e-07, "logits/chosen": -2.047351360321045, "logits/rejected": -1.433118462562561, "logps/chosen": -398.7933044433594, "logps/rejected": -616.2645874023438, "loss": 0.6715, "rewards/accuracies": 0.75, "rewards/chosen": -0.29081764817237854, "rewards/margins": 0.25022947788238525, "rewards/rejected": -0.5410471558570862, "step": 10680 }, { "epoch": 0.79, "learning_rate": 6.508204571064531e-07, "logits/chosen": -2.2057063579559326, "logits/rejected": -1.5227476358413696, "logps/chosen": -417.32635498046875, "logps/rejected": -659.7145385742188, "loss": 0.6733, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23548190295696259, "rewards/margins": 0.2823042571544647, "rewards/rejected": -0.5177860856056213, "step": 10690 }, { "epoch": 0.79, "learning_rate": 6.464945892243074e-07, "logits/chosen": -2.0862948894500732, "logits/rejected": -1.6180864572525024, "logps/chosen": -469.9803771972656, "logps/rejected": -619.9669799804688, "loss": 0.6802, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2782118618488312, "rewards/margins": 0.212301567196846, "rewards/rejected": -0.49051347374916077, "step": 10700 }, { "epoch": 0.79, "learning_rate": 6.421810099450306e-07, "logits/chosen": -2.1243906021118164, "logits/rejected": -1.5795905590057373, "logps/chosen": -442.5704650878906, "logps/rejected": -655.1180419921875, "loss": 0.6735, "rewards/accuracies": 0.75, "rewards/chosen": -0.2789689898490906, "rewards/margins": 0.23074118793010712, "rewards/rejected": -0.5097101926803589, "step": 10710 }, { "epoch": 0.79, "learning_rate": 6.378797478673332e-07, "logits/chosen": -2.160186767578125, "logits/rejected": -1.723447561264038, "logps/chosen": -444.0782775878906, "logps/rejected": -602.89404296875, "loss": 0.6772, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2851659655570984, "rewards/margins": 0.17764528095722198, "rewards/rejected": -0.4628112316131592, "step": 10720 }, { "epoch": 0.79, "learning_rate": 6.33590831508262e-07, "logits/chosen": -2.3076741695404053, "logits/rejected": -1.6398544311523438, "logps/chosen": -509.24468994140625, "logps/rejected": -689.7819213867188, "loss": 0.6775, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.30953359603881836, "rewards/margins": 0.24248521029949188, "rewards/rejected": -0.5520188212394714, "step": 10730 }, { "epoch": 0.79, "learning_rate": 6.293142893030122e-07, "logits/chosen": -1.9122966527938843, "logits/rejected": -1.4256138801574707, "logps/chosen": -488.16986083984375, "logps/rejected": -744.6445922851562, "loss": 0.6721, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3417944014072418, "rewards/margins": 0.2601712644100189, "rewards/rejected": -0.6019656658172607, "step": 10740 }, { "epoch": 0.79, "learning_rate": 6.250501496047415e-07, "logits/chosen": -2.1060848236083984, "logits/rejected": -1.696516752243042, "logps/chosen": -517.7864990234375, "logps/rejected": -591.96923828125, "loss": 0.6806, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3320714235305786, "rewards/margins": 0.14746016263961792, "rewards/rejected": -0.47953158617019653, "step": 10750 }, { "epoch": 0.79, "learning_rate": 6.207984406843775e-07, "logits/chosen": -2.2273125648498535, "logits/rejected": -1.4826862812042236, "logps/chosen": -544.3919067382812, "logps/rejected": -679.6768798828125, "loss": 0.6771, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3563745617866516, "rewards/margins": 0.212527796626091, "rewards/rejected": -0.5689023733139038, "step": 10760 }, { "epoch": 0.79, "learning_rate": 6.165591907304355e-07, "logits/chosen": -2.2795653343200684, "logits/rejected": -1.8602781295776367, "logps/chosen": -507.35064697265625, "logps/rejected": -618.6175537109375, "loss": 0.6787, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35342225432395935, "rewards/margins": 0.1522485315799713, "rewards/rejected": -0.5056707859039307, "step": 10770 }, { "epoch": 0.8, "learning_rate": 6.123324278488249e-07, "logits/chosen": -2.295316219329834, "logits/rejected": -1.6200621128082275, "logps/chosen": -483.7051696777344, "logps/rejected": -640.88232421875, "loss": 0.678, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3418608605861664, "rewards/margins": 0.18382051587104797, "rewards/rejected": -0.5256813764572144, "step": 10780 }, { "epoch": 0.8, "learning_rate": 6.081181800626706e-07, "logits/chosen": -2.026620388031006, "logits/rejected": -1.548638105392456, "logps/chosen": -524.4339599609375, "logps/rejected": -655.52392578125, "loss": 0.6777, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3303012549877167, "rewards/margins": 0.20959484577178955, "rewards/rejected": -0.5398961305618286, "step": 10790 }, { "epoch": 0.8, "learning_rate": 6.039164753121213e-07, "logits/chosen": -2.0915515422821045, "logits/rejected": -1.3634885549545288, "logps/chosen": -446.96173095703125, "logps/rejected": -635.18798828125, "loss": 0.6747, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.290677011013031, "rewards/margins": 0.25277942419052124, "rewards/rejected": -0.5434564352035522, "step": 10800 }, { "epoch": 0.8, "learning_rate": 5.997273414541654e-07, "logits/chosen": -2.0989415645599365, "logits/rejected": -1.789210557937622, "logps/chosen": -524.10107421875, "logps/rejected": -721.1797485351562, "loss": 0.6746, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3501608967781067, "rewards/margins": 0.20281445980072021, "rewards/rejected": -0.5529752969741821, "step": 10810 }, { "epoch": 0.8, "learning_rate": 5.955508062624501e-07, "logits/chosen": -1.8890607357025146, "logits/rejected": -1.4278301000595093, "logps/chosen": -504.56402587890625, "logps/rejected": -693.722900390625, "loss": 0.676, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.38536572456359863, "rewards/margins": 0.20909743010997772, "rewards/rejected": -0.5944632291793823, "step": 10820 }, { "epoch": 0.8, "learning_rate": 5.913868974270923e-07, "logits/chosen": -2.149566650390625, "logits/rejected": -1.767926573753357, "logps/chosen": -491.282470703125, "logps/rejected": -621.3731689453125, "loss": 0.6777, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33178552985191345, "rewards/margins": 0.19340650737285614, "rewards/rejected": -0.5251920223236084, "step": 10830 }, { "epoch": 0.8, "learning_rate": 5.872356425544968e-07, "logits/chosen": -1.9499695301055908, "logits/rejected": -1.5834046602249146, "logps/chosen": -485.12701416015625, "logps/rejected": -636.4332275390625, "loss": 0.6774, "rewards/accuracies": 0.625, "rewards/chosen": -0.3430967926979065, "rewards/margins": 0.17392148077487946, "rewards/rejected": -0.5170183181762695, "step": 10840 }, { "epoch": 0.8, "learning_rate": 5.830970691671745e-07, "logits/chosen": -2.2106523513793945, "logits/rejected": -1.8573147058486938, "logps/chosen": -549.6510009765625, "logps/rejected": -679.2759399414062, "loss": 0.681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38454127311706543, "rewards/margins": 0.1454908400774002, "rewards/rejected": -0.5300320386886597, "step": 10850 }, { "epoch": 0.8, "learning_rate": 5.789712047035598e-07, "logits/chosen": -2.124194622039795, "logits/rejected": -1.8096001148223877, "logps/chosen": -448.3265075683594, "logps/rejected": -588.5467529296875, "loss": 0.6826, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2726054787635803, "rewards/margins": 0.1620398461818695, "rewards/rejected": -0.4346453547477722, "step": 10860 }, { "epoch": 0.8, "learning_rate": 5.748580765178266e-07, "logits/chosen": -2.024498462677002, "logits/rejected": -1.5052427053451538, "logps/chosen": -431.35931396484375, "logps/rejected": -616.9334106445312, "loss": 0.6781, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32565975189208984, "rewards/margins": 0.20263054966926575, "rewards/rejected": -0.528290331363678, "step": 10870 }, { "epoch": 0.8, "learning_rate": 5.707577118797073e-07, "logits/chosen": -2.3108887672424316, "logits/rejected": -1.727084755897522, "logps/chosen": -434.0311584472656, "logps/rejected": -621.3746337890625, "loss": 0.6767, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2916490435600281, "rewards/margins": 0.22340039908885956, "rewards/rejected": -0.5150494575500488, "step": 10880 }, { "epoch": 0.8, "learning_rate": 5.666701379743158e-07, "logits/chosen": -2.0472869873046875, "logits/rejected": -1.568629264831543, "logps/chosen": -579.1497802734375, "logps/rejected": -704.1159057617188, "loss": 0.6814, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3562849462032318, "rewards/margins": 0.20228631794452667, "rewards/rejected": -0.5585712790489197, "step": 10890 }, { "epoch": 0.8, "learning_rate": 5.62595381901962e-07, "logits/chosen": -2.185189723968506, "logits/rejected": -1.716295838356018, "logps/chosen": -510.60076904296875, "logps/rejected": -692.7018432617188, "loss": 0.6759, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3397950828075409, "rewards/margins": 0.22368700802326202, "rewards/rejected": -0.5634820461273193, "step": 10900 }, { "epoch": 0.8, "learning_rate": 5.585334706779746e-07, "logits/chosen": -2.127044677734375, "logits/rejected": -1.6272176504135132, "logps/chosen": -509.9183044433594, "logps/rejected": -745.8045043945312, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.3046424388885498, "rewards/margins": 0.266828715801239, "rewards/rejected": -0.5714711546897888, "step": 10910 }, { "epoch": 0.81, "learning_rate": 5.54484431232524e-07, "logits/chosen": -2.1345696449279785, "logits/rejected": -1.5464451313018799, "logps/chosen": -495.1114807128906, "logps/rejected": -671.9736328125, "loss": 0.6769, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3460465371608734, "rewards/margins": 0.21888604760169983, "rewards/rejected": -0.564932644367218, "step": 10920 }, { "epoch": 0.81, "learning_rate": 5.504482904104386e-07, "logits/chosen": -2.0937647819519043, "logits/rejected": -1.7752711772918701, "logps/chosen": -529.858154296875, "logps/rejected": -714.5242309570312, "loss": 0.6762, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.33673641085624695, "rewards/margins": 0.2041545808315277, "rewards/rejected": -0.5408909916877747, "step": 10930 }, { "epoch": 0.81, "learning_rate": 5.464250749710335e-07, "logits/chosen": -2.1945977210998535, "logits/rejected": -1.7298648357391357, "logps/chosen": -471.56256103515625, "logps/rejected": -544.286376953125, "loss": 0.6842, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.27935874462127686, "rewards/margins": 0.13188524544239044, "rewards/rejected": -0.4112439751625061, "step": 10940 }, { "epoch": 0.81, "learning_rate": 5.424148115879255e-07, "logits/chosen": -2.0996742248535156, "logits/rejected": -1.5729163885116577, "logps/chosen": -519.5400390625, "logps/rejected": -659.5472412109375, "loss": 0.6806, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.37379613518714905, "rewards/margins": 0.18908146023750305, "rewards/rejected": -0.5628775954246521, "step": 10950 }, { "epoch": 0.81, "learning_rate": 5.38417526848864e-07, "logits/chosen": -2.2246243953704834, "logits/rejected": -1.5825047492980957, "logps/chosen": -548.1424560546875, "logps/rejected": -694.7077026367188, "loss": 0.6801, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3827727437019348, "rewards/margins": 0.16928884387016296, "rewards/rejected": -0.5520616173744202, "step": 10960 }, { "epoch": 0.81, "learning_rate": 5.344332472555483e-07, "logits/chosen": -2.2554116249084473, "logits/rejected": -1.591826319694519, "logps/chosen": -442.4122009277344, "logps/rejected": -633.5257568359375, "loss": 0.6755, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.25977903604507446, "rewards/margins": 0.24519169330596924, "rewards/rejected": -0.5049706697463989, "step": 10970 }, { "epoch": 0.81, "learning_rate": 5.304619992234548e-07, "logits/chosen": -2.1731274127960205, "logits/rejected": -1.6487624645233154, "logps/chosen": -499.79840087890625, "logps/rejected": -718.3174438476562, "loss": 0.6767, "rewards/accuracies": 0.75, "rewards/chosen": -0.3155311644077301, "rewards/margins": 0.2441098392009735, "rewards/rejected": -0.5596410036087036, "step": 10980 }, { "epoch": 0.81, "learning_rate": 5.265038090816637e-07, "logits/chosen": -2.0957937240600586, "logits/rejected": -1.5934902429580688, "logps/chosen": -514.5609130859375, "logps/rejected": -671.3630981445312, "loss": 0.6729, "rewards/accuracies": 0.75, "rewards/chosen": -0.30915403366088867, "rewards/margins": 0.20196768641471863, "rewards/rejected": -0.5111217498779297, "step": 10990 }, { "epoch": 0.81, "learning_rate": 5.225587030726803e-07, "logits/chosen": -2.219785690307617, "logits/rejected": -1.6170600652694702, "logps/chosen": -478.31146240234375, "logps/rejected": -730.4891357421875, "loss": 0.6694, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.30597952008247375, "rewards/margins": 0.27003732323646545, "rewards/rejected": -0.5760167837142944, "step": 11000 }, { "epoch": 0.81, "learning_rate": 5.18626707352263e-07, "logits/chosen": -1.9076716899871826, "logits/rejected": -1.6002380847930908, "logps/chosen": -403.7596130371094, "logps/rejected": -574.900146484375, "loss": 0.6796, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24498076736927032, "rewards/margins": 0.19579803943634033, "rewards/rejected": -0.44077882170677185, "step": 11010 }, { "epoch": 0.81, "learning_rate": 5.147078479892515e-07, "logits/chosen": -2.1356780529022217, "logits/rejected": -1.445279836654663, "logps/chosen": -413.2644958496094, "logps/rejected": -620.4447021484375, "loss": 0.6752, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27561911940574646, "rewards/margins": 0.23757870495319366, "rewards/rejected": -0.5131978988647461, "step": 11020 }, { "epoch": 0.81, "learning_rate": 5.108021509653918e-07, "logits/chosen": -1.9985374212265015, "logits/rejected": -1.5774873495101929, "logps/chosen": -438.302490234375, "logps/rejected": -641.7302856445312, "loss": 0.6824, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28405022621154785, "rewards/margins": 0.19557705521583557, "rewards/rejected": -0.47962722182273865, "step": 11030 }, { "epoch": 0.81, "learning_rate": 5.06909642175164e-07, "logits/chosen": -2.1556999683380127, "logits/rejected": -1.7028907537460327, "logps/chosen": -449.3116760253906, "logps/rejected": -598.0280151367188, "loss": 0.677, "rewards/accuracies": 0.75, "rewards/chosen": -0.2849337160587311, "rewards/margins": 0.2016363888978958, "rewards/rejected": -0.4865701198577881, "step": 11040 }, { "epoch": 0.82, "learning_rate": 5.030303474256113e-07, "logits/chosen": -2.2377123832702637, "logits/rejected": -1.7960236072540283, "logps/chosen": -500.4176330566406, "logps/rejected": -687.8071899414062, "loss": 0.6793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32132312655448914, "rewards/margins": 0.18983298540115356, "rewards/rejected": -0.5111561417579651, "step": 11050 }, { "epoch": 0.82, "learning_rate": 4.991642924361694e-07, "logits/chosen": -2.189652919769287, "logits/rejected": -1.544108271598816, "logps/chosen": -411.10552978515625, "logps/rejected": -594.7785034179688, "loss": 0.6761, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.271109938621521, "rewards/margins": 0.22368447482585907, "rewards/rejected": -0.49479445815086365, "step": 11060 }, { "epoch": 0.82, "learning_rate": 4.953115028384952e-07, "logits/chosen": -2.041257858276367, "logits/rejected": -1.5810027122497559, "logps/chosen": -505.93646240234375, "logps/rejected": -695.7078857421875, "loss": 0.6762, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33257144689559937, "rewards/margins": 0.2440727949142456, "rewards/rejected": -0.5766441822052002, "step": 11070 }, { "epoch": 0.82, "learning_rate": 4.914720041762958e-07, "logits/chosen": -2.155654191970825, "logits/rejected": -1.499110221862793, "logps/chosen": -419.1410217285156, "logps/rejected": -605.8590698242188, "loss": 0.6756, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2689779996871948, "rewards/margins": 0.22357602417469025, "rewards/rejected": -0.4925540089607239, "step": 11080 }, { "epoch": 0.82, "learning_rate": 4.87645821905163e-07, "logits/chosen": -2.087209701538086, "logits/rejected": -1.6854327917099, "logps/chosen": -429.673828125, "logps/rejected": -617.4861450195312, "loss": 0.6786, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27435997128486633, "rewards/margins": 0.20408925414085388, "rewards/rejected": -0.4784491956233978, "step": 11090 }, { "epoch": 0.82, "learning_rate": 4.838329813923998e-07, "logits/chosen": -2.058884859085083, "logits/rejected": -1.605133056640625, "logps/chosen": -450.6304626464844, "logps/rejected": -610.0919189453125, "loss": 0.6787, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.26683229207992554, "rewards/margins": 0.18453416228294373, "rewards/rejected": -0.4513664245605469, "step": 11100 }, { "epoch": 0.82, "learning_rate": 4.800335079168531e-07, "logits/chosen": -2.2067248821258545, "logits/rejected": -1.7790050506591797, "logps/chosen": -420.9895935058594, "logps/rejected": -670.1575317382812, "loss": 0.6729, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26688870787620544, "rewards/margins": 0.25743168592453003, "rewards/rejected": -0.5243204832077026, "step": 11110 }, { "epoch": 0.82, "learning_rate": 4.7624742666875223e-07, "logits/chosen": -2.064008951187134, "logits/rejected": -1.397564172744751, "logps/chosen": -405.51336669921875, "logps/rejected": -593.7281494140625, "loss": 0.6741, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25921836495399475, "rewards/margins": 0.2343340814113617, "rewards/rejected": -0.49355244636535645, "step": 11120 }, { "epoch": 0.82, "learning_rate": 4.724747627495327e-07, "logits/chosen": -2.2410154342651367, "logits/rejected": -1.8365246057510376, "logps/chosen": -517.0072021484375, "logps/rejected": -665.9581909179688, "loss": 0.6753, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3531070351600647, "rewards/margins": 0.1935461014509201, "rewards/rejected": -0.546653151512146, "step": 11130 }, { "epoch": 0.82, "learning_rate": 4.6871554117167426e-07, "logits/chosen": -2.3499083518981934, "logits/rejected": -1.719670057296753, "logps/chosen": -490.13018798828125, "logps/rejected": -643.7586669921875, "loss": 0.6756, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.33966344594955444, "rewards/margins": 0.20983967185020447, "rewards/rejected": -0.5495030879974365, "step": 11140 }, { "epoch": 0.82, "learning_rate": 4.649697868585379e-07, "logits/chosen": -2.2560536861419678, "logits/rejected": -1.8698015213012695, "logps/chosen": -494.3216247558594, "logps/rejected": -656.3463745117188, "loss": 0.6775, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.37035831809043884, "rewards/margins": 0.17590732872486115, "rewards/rejected": -0.5462656021118164, "step": 11150 }, { "epoch": 0.82, "learning_rate": 4.6123752464419496e-07, "logits/chosen": -2.0887255668640137, "logits/rejected": -1.7620292901992798, "logps/chosen": -477.06298828125, "logps/rejected": -650.06005859375, "loss": 0.6763, "rewards/accuracies": 0.75, "rewards/chosen": -0.30684980750083923, "rewards/margins": 0.18179050087928772, "rewards/rejected": -0.4886403977870941, "step": 11160 }, { "epoch": 0.82, "learning_rate": 4.5751877927326606e-07, "logits/chosen": -2.160163640975952, "logits/rejected": -1.6718511581420898, "logps/chosen": -494.04217529296875, "logps/rejected": -728.4462280273438, "loss": 0.6722, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3679516911506653, "rewards/margins": 0.25780385732650757, "rewards/rejected": -0.6257556080818176, "step": 11170 }, { "epoch": 0.82, "learning_rate": 4.538135754007553e-07, "logits/chosen": -2.058032512664795, "logits/rejected": -1.420099139213562, "logps/chosen": -486.9244689941406, "logps/rejected": -668.7811279296875, "loss": 0.6755, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3181012272834778, "rewards/margins": 0.21412229537963867, "rewards/rejected": -0.5322235822677612, "step": 11180 }, { "epoch": 0.83, "learning_rate": 4.5012193759189e-07, "logits/chosen": -2.180756092071533, "logits/rejected": -1.5842279195785522, "logps/chosen": -517.3098754882812, "logps/rejected": -696.0062255859375, "loss": 0.6731, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3230036199092865, "rewards/margins": 0.2485073357820511, "rewards/rejected": -0.571510910987854, "step": 11190 }, { "epoch": 0.83, "learning_rate": 4.4644389032195256e-07, "logits/chosen": -2.2453360557556152, "logits/rejected": -1.7932755947113037, "logps/chosen": -414.33038330078125, "logps/rejected": -614.6048583984375, "loss": 0.6789, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26915788650512695, "rewards/margins": 0.19958487153053284, "rewards/rejected": -0.468742698431015, "step": 11200 }, { "epoch": 0.83, "learning_rate": 4.4277945797612324e-07, "logits/chosen": -2.285569190979004, "logits/rejected": -1.6237156391143799, "logps/chosen": -379.6518249511719, "logps/rejected": -600.5975341796875, "loss": 0.6757, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19783422350883484, "rewards/margins": 0.2630308270454407, "rewards/rejected": -0.4608650803565979, "step": 11210 }, { "epoch": 0.83, "learning_rate": 4.391286648493165e-07, "logits/chosen": -2.1642675399780273, "logits/rejected": -1.5092395544052124, "logps/chosen": -414.2881774902344, "logps/rejected": -631.5753173828125, "loss": 0.6792, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2849503755569458, "rewards/margins": 0.23269963264465332, "rewards/rejected": -0.5176500082015991, "step": 11220 }, { "epoch": 0.83, "learning_rate": 4.354915351460179e-07, "logits/chosen": -1.9389598369598389, "logits/rejected": -1.7257490158081055, "logps/chosen": -487.755859375, "logps/rejected": -684.9449462890625, "loss": 0.6734, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31951120495796204, "rewards/margins": 0.19847612082958221, "rewards/rejected": -0.5179873704910278, "step": 11230 }, { "epoch": 0.83, "learning_rate": 4.318680929801261e-07, "logits/chosen": -2.0507493019104004, "logits/rejected": -1.5834128856658936, "logps/chosen": -477.0264587402344, "logps/rejected": -618.6024169921875, "loss": 0.678, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2924167811870575, "rewards/margins": 0.17362874746322632, "rewards/rejected": -0.4660455286502838, "step": 11240 }, { "epoch": 0.83, "learning_rate": 4.2825836237479433e-07, "logits/chosen": -1.938289999961853, "logits/rejected": -1.5198588371276855, "logps/chosen": -480.84063720703125, "logps/rejected": -651.6014404296875, "loss": 0.6785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29306405782699585, "rewards/margins": 0.20830193161964417, "rewards/rejected": -0.5013659596443176, "step": 11250 }, { "epoch": 0.83, "learning_rate": 4.246623672622668e-07, "logits/chosen": -2.241762161254883, "logits/rejected": -1.9158923625946045, "logps/chosen": -512.5458984375, "logps/rejected": -562.4452514648438, "loss": 0.6858, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.32688528299331665, "rewards/margins": 0.10371939837932587, "rewards/rejected": -0.43060463666915894, "step": 11260 }, { "epoch": 0.83, "learning_rate": 4.210801314837229e-07, "logits/chosen": -2.1401681900024414, "logits/rejected": -1.6087779998779297, "logps/chosen": -379.76947021484375, "logps/rejected": -545.8226318359375, "loss": 0.6788, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2155829221010208, "rewards/margins": 0.18763995170593262, "rewards/rejected": -0.40322285890579224, "step": 11270 }, { "epoch": 0.83, "learning_rate": 4.1751167878912034e-07, "logits/chosen": -2.117888927459717, "logits/rejected": -1.9225002527236938, "logps/chosen": -476.80450439453125, "logps/rejected": -636.0892944335938, "loss": 0.6755, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3233182430267334, "rewards/margins": 0.1551293432712555, "rewards/rejected": -0.4784475862979889, "step": 11280 }, { "epoch": 0.83, "learning_rate": 4.139570328370335e-07, "logits/chosen": -2.1264469623565674, "logits/rejected": -1.7717773914337158, "logps/chosen": -418.7152404785156, "logps/rejected": -623.0782470703125, "loss": 0.6774, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.29869234561920166, "rewards/margins": 0.20722730457782745, "rewards/rejected": -0.5059196352958679, "step": 11290 }, { "epoch": 0.83, "learning_rate": 4.104162171945017e-07, "logits/chosen": -2.2053301334381104, "logits/rejected": -1.657958745956421, "logps/chosen": -490.5335998535156, "logps/rejected": -707.7520751953125, "loss": 0.6691, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.33579307794570923, "rewards/margins": 0.260396271944046, "rewards/rejected": -0.5961893200874329, "step": 11300 }, { "epoch": 0.83, "learning_rate": 4.068892553368678e-07, "logits/chosen": -2.2042319774627686, "logits/rejected": -1.6038322448730469, "logps/chosen": -540.7276611328125, "logps/rejected": -722.6468505859375, "loss": 0.6759, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3834443986415863, "rewards/margins": 0.21666888892650604, "rewards/rejected": -0.6001132726669312, "step": 11310 }, { "epoch": 0.83, "learning_rate": 4.0337617064762745e-07, "logits/chosen": -2.064817190170288, "logits/rejected": -1.664696455001831, "logps/chosen": -491.5882873535156, "logps/rejected": -689.8379516601562, "loss": 0.6799, "rewards/accuracies": 0.75, "rewards/chosen": -0.3123231530189514, "rewards/margins": 0.2246474325656891, "rewards/rejected": -0.5369704961776733, "step": 11320 }, { "epoch": 0.84, "learning_rate": 3.9987698641827e-07, "logits/chosen": -2.003248929977417, "logits/rejected": -1.3471390008926392, "logps/chosen": -419.8658142089844, "logps/rejected": -627.6174926757812, "loss": 0.668, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2507097125053406, "rewards/margins": 0.25609010457992554, "rewards/rejected": -0.5067998170852661, "step": 11330 }, { "epoch": 0.84, "learning_rate": 3.9639172584812565e-07, "logits/chosen": -2.178476095199585, "logits/rejected": -1.7251958847045898, "logps/chosen": -419.93463134765625, "logps/rejected": -582.7022094726562, "loss": 0.6787, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2696533799171448, "rewards/margins": 0.1866074502468109, "rewards/rejected": -0.4562608301639557, "step": 11340 }, { "epoch": 0.84, "learning_rate": 3.92920412044214e-07, "logits/chosen": -2.2321574687957764, "logits/rejected": -1.7458738088607788, "logps/chosen": -422.13397216796875, "logps/rejected": -556.880615234375, "loss": 0.673, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25415319204330444, "rewards/margins": 0.18912944197654724, "rewards/rejected": -0.4432826638221741, "step": 11350 }, { "epoch": 0.84, "learning_rate": 3.894630680210862e-07, "logits/chosen": -2.1685619354248047, "logits/rejected": -1.646047830581665, "logps/chosen": -454.64739990234375, "logps/rejected": -663.7337646484375, "loss": 0.6756, "rewards/accuracies": 0.75, "rewards/chosen": -0.2742902636528015, "rewards/margins": 0.24126151204109192, "rewards/rejected": -0.5155518651008606, "step": 11360 }, { "epoch": 0.84, "learning_rate": 3.8601971670067517e-07, "logits/chosen": -2.0154871940612793, "logits/rejected": -1.617200493812561, "logps/chosen": -521.3031005859375, "logps/rejected": -711.879150390625, "loss": 0.6772, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3587930202484131, "rewards/margins": 0.23231396079063416, "rewards/rejected": -0.5911070108413696, "step": 11370 }, { "epoch": 0.84, "learning_rate": 3.825903809121445e-07, "logits/chosen": -2.0214409828186035, "logits/rejected": -1.6607402563095093, "logps/chosen": -422.8866271972656, "logps/rejected": -632.7454833984375, "loss": 0.6734, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.259110689163208, "rewards/margins": 0.2233767956495285, "rewards/rejected": -0.4824874997138977, "step": 11380 }, { "epoch": 0.84, "learning_rate": 3.791750833917357e-07, "logits/chosen": -1.9538962841033936, "logits/rejected": -1.8652164936065674, "logps/chosen": -512.67529296875, "logps/rejected": -644.7906494140625, "loss": 0.682, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3720704913139343, "rewards/margins": 0.15399692952632904, "rewards/rejected": -0.5260673761367798, "step": 11390 }, { "epoch": 0.84, "learning_rate": 3.7577384678261637e-07, "logits/chosen": -1.9202601909637451, "logits/rejected": -1.4306604862213135, "logps/chosen": -422.1800231933594, "logps/rejected": -642.9761962890625, "loss": 0.6798, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27318811416625977, "rewards/margins": 0.26081573963165283, "rewards/rejected": -0.5340038537979126, "step": 11400 }, { "epoch": 0.84, "learning_rate": 3.723866936347315e-07, "logits/chosen": -1.8910945653915405, "logits/rejected": -1.4702961444854736, "logps/chosen": -431.59393310546875, "logps/rejected": -663.0261840820312, "loss": 0.6705, "rewards/accuracies": 0.75, "rewards/chosen": -0.29108184576034546, "rewards/margins": 0.24947376549243927, "rewards/rejected": -0.5405555963516235, "step": 11410 }, { "epoch": 0.84, "learning_rate": 3.690136464046551e-07, "logits/chosen": -2.116760015487671, "logits/rejected": -1.608129858970642, "logps/chosen": -419.30572509765625, "logps/rejected": -593.0499267578125, "loss": 0.6795, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.265267938375473, "rewards/margins": 0.2035629004240036, "rewards/rejected": -0.4688308835029602, "step": 11420 }, { "epoch": 0.84, "learning_rate": 3.6565472745543923e-07, "logits/chosen": -2.1577634811401367, "logits/rejected": -1.683428168296814, "logps/chosen": -508.83740234375, "logps/rejected": -631.5335083007812, "loss": 0.6832, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3160319924354553, "rewards/margins": 0.14015093445777893, "rewards/rejected": -0.45618295669555664, "step": 11430 }, { "epoch": 0.84, "learning_rate": 3.623099590564652e-07, "logits/chosen": -2.179245710372925, "logits/rejected": -1.5835354328155518, "logps/chosen": -528.1015014648438, "logps/rejected": -679.2579956054688, "loss": 0.6777, "rewards/accuracies": 0.75, "rewards/chosen": -0.3397906422615051, "rewards/margins": 0.21354146301746368, "rewards/rejected": -0.5533320903778076, "step": 11440 }, { "epoch": 0.84, "learning_rate": 3.5897936338329995e-07, "logits/chosen": -2.2100770473480225, "logits/rejected": -1.7638019323349, "logps/chosen": -533.6240234375, "logps/rejected": -681.646484375, "loss": 0.6795, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35595640540122986, "rewards/margins": 0.17914113402366638, "rewards/rejected": -0.5350974798202515, "step": 11450 }, { "epoch": 0.85, "learning_rate": 3.556629625175442e-07, "logits/chosen": -2.220949649810791, "logits/rejected": -1.5858508348464966, "logps/chosen": -556.9031372070312, "logps/rejected": -733.9974365234375, "loss": 0.6812, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3632674813270569, "rewards/margins": 0.22544661164283752, "rewards/rejected": -0.5887141227722168, "step": 11460 }, { "epoch": 0.85, "learning_rate": 3.5236077844668815e-07, "logits/chosen": -2.4292213916778564, "logits/rejected": -1.7792192697525024, "logps/chosen": -517.9530029296875, "logps/rejected": -674.9904174804688, "loss": 0.6748, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3315976560115814, "rewards/margins": 0.233358234167099, "rewards/rejected": -0.5649558305740356, "step": 11470 }, { "epoch": 0.85, "learning_rate": 3.490728330639667e-07, "logits/chosen": -2.256197929382324, "logits/rejected": -1.787806749343872, "logps/chosen": -507.97271728515625, "logps/rejected": -672.91943359375, "loss": 0.6752, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3638913035392761, "rewards/margins": 0.18816225230693817, "rewards/rejected": -0.5520535707473755, "step": 11480 }, { "epoch": 0.85, "learning_rate": 3.457991481682141e-07, "logits/chosen": -2.192922830581665, "logits/rejected": -1.7576732635498047, "logps/chosen": -510.6812438964844, "logps/rejected": -639.3165283203125, "loss": 0.6802, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3512476086616516, "rewards/margins": 0.15930262207984924, "rewards/rejected": -0.5105502009391785, "step": 11490 }, { "epoch": 0.85, "learning_rate": 3.425397454637161e-07, "logits/chosen": -2.205972909927368, "logits/rejected": -1.4593846797943115, "logps/chosen": -509.5646057128906, "logps/rejected": -735.0399780273438, "loss": 0.6699, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3219185173511505, "rewards/margins": 0.3016934096813202, "rewards/rejected": -0.6236120462417603, "step": 11500 }, { "epoch": 0.85, "learning_rate": 3.392946465600699e-07, "logits/chosen": -2.0271406173706055, "logits/rejected": -1.5553752183914185, "logps/chosen": -534.8543090820312, "logps/rejected": -665.530029296875, "loss": 0.6736, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35646623373031616, "rewards/margins": 0.17970018088817596, "rewards/rejected": -0.5361663103103638, "step": 11510 }, { "epoch": 0.85, "learning_rate": 3.3606387297204085e-07, "logits/chosen": -2.00836181640625, "logits/rejected": -1.8532464504241943, "logps/chosen": -591.5239868164062, "logps/rejected": -711.9527587890625, "loss": 0.6831, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.45443210005760193, "rewards/margins": 0.11555938422679901, "rewards/rejected": -0.5699915289878845, "step": 11520 }, { "epoch": 0.85, "learning_rate": 3.328474461194164e-07, "logits/chosen": -2.237130880355835, "logits/rejected": -1.8356088399887085, "logps/chosen": -456.6768493652344, "logps/rejected": -673.2439575195312, "loss": 0.6789, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2927168905735016, "rewards/margins": 0.23015174269676208, "rewards/rejected": -0.5228685736656189, "step": 11530 }, { "epoch": 0.85, "learning_rate": 3.296453873268665e-07, "logits/chosen": -2.1427905559539795, "logits/rejected": -1.7721540927886963, "logps/chosen": -519.6375122070312, "logps/rejected": -638.3157958984375, "loss": 0.6779, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2919256091117859, "rewards/margins": 0.17656049132347107, "rewards/rejected": -0.46848607063293457, "step": 11540 }, { "epoch": 0.85, "learning_rate": 3.26457717823804e-07, "logits/chosen": -2.167227268218994, "logits/rejected": -1.6636489629745483, "logps/chosen": -491.2501525878906, "logps/rejected": -627.521240234375, "loss": 0.6781, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3354409337043762, "rewards/margins": 0.19265520572662354, "rewards/rejected": -0.5280961990356445, "step": 11550 }, { "epoch": 0.85, "learning_rate": 3.232844587442391e-07, "logits/chosen": -2.0213546752929688, "logits/rejected": -1.6059238910675049, "logps/chosen": -521.2102661132812, "logps/rejected": -675.9132080078125, "loss": 0.6813, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35168904066085815, "rewards/margins": 0.21029198169708252, "rewards/rejected": -0.5619810819625854, "step": 11560 }, { "epoch": 0.85, "learning_rate": 3.20125631126644e-07, "logits/chosen": -2.253649950027466, "logits/rejected": -1.5699660778045654, "logps/chosen": -531.99560546875, "logps/rejected": -713.8012084960938, "loss": 0.6789, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3431040048599243, "rewards/margins": 0.23506584763526917, "rewards/rejected": -0.5781698226928711, "step": 11570 }, { "epoch": 0.85, "learning_rate": 3.1698125591381e-07, "logits/chosen": -2.0836338996887207, "logits/rejected": -1.635046362876892, "logps/chosen": -518.0444946289062, "logps/rejected": -683.6883544921875, "loss": 0.6766, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3558501601219177, "rewards/margins": 0.1992758810520172, "rewards/rejected": -0.5551260113716125, "step": 11580 }, { "epoch": 0.85, "learning_rate": 3.138513539527116e-07, "logits/chosen": -2.145026445388794, "logits/rejected": -1.5616735219955444, "logps/chosen": -565.3060302734375, "logps/rejected": -731.1030883789062, "loss": 0.678, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38231611251831055, "rewards/margins": 0.22278228402137756, "rewards/rejected": -0.6050983667373657, "step": 11590 }, { "epoch": 0.86, "learning_rate": 3.1073594599436485e-07, "logits/chosen": -1.9327256679534912, "logits/rejected": -1.4507391452789307, "logps/chosen": -498.1647033691406, "logps/rejected": -692.8912963867188, "loss": 0.6761, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3596828579902649, "rewards/margins": 0.23680540919303894, "rewards/rejected": -0.5964882969856262, "step": 11600 }, { "epoch": 0.86, "learning_rate": 3.0763505269369247e-07, "logits/chosen": -2.187727928161621, "logits/rejected": -1.7763586044311523, "logps/chosen": -449.74835205078125, "logps/rejected": -671.2135009765625, "loss": 0.6776, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27369779348373413, "rewards/margins": 0.22028645873069763, "rewards/rejected": -0.49398428201675415, "step": 11610 }, { "epoch": 0.86, "learning_rate": 3.045486946093873e-07, "logits/chosen": -2.1413416862487793, "logits/rejected": -1.3869844675064087, "logps/chosen": -430.64642333984375, "logps/rejected": -674.2606811523438, "loss": 0.6702, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2567034363746643, "rewards/margins": 0.30305570363998413, "rewards/rejected": -0.5597590804100037, "step": 11620 }, { "epoch": 0.86, "learning_rate": 3.014768922037728e-07, "logits/chosen": -2.2416787147521973, "logits/rejected": -1.8535083532333374, "logps/chosen": -455.3478088378906, "logps/rejected": -618.4862060546875, "loss": 0.6767, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29529720544815063, "rewards/margins": 0.20304617285728455, "rewards/rejected": -0.4983433187007904, "step": 11630 }, { "epoch": 0.86, "learning_rate": 2.9841966584266967e-07, "logits/chosen": -2.291377544403076, "logits/rejected": -1.5707287788391113, "logps/chosen": -501.26226806640625, "logps/rejected": -647.4762573242188, "loss": 0.6796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29639172554016113, "rewards/margins": 0.2030278742313385, "rewards/rejected": -0.49941959977149963, "step": 11640 }, { "epoch": 0.86, "learning_rate": 2.953770357952618e-07, "logits/chosen": -2.2160801887512207, "logits/rejected": -1.4277667999267578, "logps/chosen": -490.3465881347656, "logps/rejected": -733.4608154296875, "loss": 0.6675, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3304901421070099, "rewards/margins": 0.2996303141117096, "rewards/rejected": -0.6301204562187195, "step": 11650 }, { "epoch": 0.86, "learning_rate": 2.923490222339601e-07, "logits/chosen": -2.3919918537139893, "logits/rejected": -1.6684818267822266, "logps/chosen": -500.06964111328125, "logps/rejected": -640.033935546875, "loss": 0.6743, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.33236920833587646, "rewards/margins": 0.20267538726329803, "rewards/rejected": -0.5350446105003357, "step": 11660 }, { "epoch": 0.86, "learning_rate": 2.893356452342688e-07, "logits/chosen": -2.165426731109619, "logits/rejected": -2.0034661293029785, "logps/chosen": -499.7435607910156, "logps/rejected": -659.79443359375, "loss": 0.6833, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.31031495332717896, "rewards/margins": 0.14567294716835022, "rewards/rejected": -0.4559878408908844, "step": 11670 }, { "epoch": 0.86, "learning_rate": 2.8633692477465226e-07, "logits/chosen": -2.04156494140625, "logits/rejected": -1.5417125225067139, "logps/chosen": -453.80462646484375, "logps/rejected": -610.0916748046875, "loss": 0.6757, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2970024049282074, "rewards/margins": 0.22125527262687683, "rewards/rejected": -0.5182576775550842, "step": 11680 }, { "epoch": 0.86, "learning_rate": 2.833528807364053e-07, "logits/chosen": -2.1854054927825928, "logits/rejected": -1.620578408241272, "logps/chosen": -473.3126525878906, "logps/rejected": -653.05322265625, "loss": 0.6734, "rewards/accuracies": 0.75, "rewards/chosen": -0.2966765761375427, "rewards/margins": 0.2223368138074875, "rewards/rejected": -0.5190134048461914, "step": 11690 }, { "epoch": 0.86, "learning_rate": 2.803835329035165e-07, "logits/chosen": -2.1376399993896484, "logits/rejected": -1.7811199426651, "logps/chosen": -431.7406311035156, "logps/rejected": -651.4739990234375, "loss": 0.6734, "rewards/accuracies": 0.75, "rewards/chosen": -0.30105528235435486, "rewards/margins": 0.20797376334667206, "rewards/rejected": -0.5090290307998657, "step": 11700 }, { "epoch": 0.86, "learning_rate": 2.774289009625411e-07, "logits/chosen": -2.1087450981140137, "logits/rejected": -1.5984749794006348, "logps/chosen": -491.6719665527344, "logps/rejected": -659.2962646484375, "loss": 0.6761, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2954481840133667, "rewards/margins": 0.23134055733680725, "rewards/rejected": -0.5267887711524963, "step": 11710 }, { "epoch": 0.86, "learning_rate": 2.744890045024698e-07, "logits/chosen": -2.097973346710205, "logits/rejected": -1.3421880006790161, "logps/chosen": -439.12847900390625, "logps/rejected": -637.4471435546875, "loss": 0.673, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26415345072746277, "rewards/margins": 0.2592000663280487, "rewards/rejected": -0.5233535766601562, "step": 11720 }, { "epoch": 0.87, "learning_rate": 2.715638630145964e-07, "logits/chosen": -1.9380031824111938, "logits/rejected": -1.7103936672210693, "logps/chosen": -446.4017028808594, "logps/rejected": -591.6092529296875, "loss": 0.6818, "rewards/accuracies": 0.625, "rewards/chosen": -0.3014918863773346, "rewards/margins": 0.16322940587997437, "rewards/rejected": -0.46472129225730896, "step": 11730 }, { "epoch": 0.87, "learning_rate": 2.686534958923906e-07, "logits/chosen": -2.114142894744873, "logits/rejected": -1.50821852684021, "logps/chosen": -480.54376220703125, "logps/rejected": -667.6434326171875, "loss": 0.6711, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3093274235725403, "rewards/margins": 0.2523811459541321, "rewards/rejected": -0.5617085695266724, "step": 11740 }, { "epoch": 0.87, "learning_rate": 2.6575792243137085e-07, "logits/chosen": -2.1178667545318604, "logits/rejected": -1.5729731321334839, "logps/chosen": -527.5065307617188, "logps/rejected": -681.3619995117188, "loss": 0.6755, "rewards/accuracies": 0.75, "rewards/chosen": -0.33909478783607483, "rewards/margins": 0.19606658816337585, "rewards/rejected": -0.5351613163948059, "step": 11750 }, { "epoch": 0.87, "learning_rate": 2.628771618289735e-07, "logits/chosen": -2.1318306922912598, "logits/rejected": -1.5502299070358276, "logps/chosen": -587.4588623046875, "logps/rejected": -773.1563720703125, "loss": 0.674, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38956695795059204, "rewards/margins": 0.2555129826068878, "rewards/rejected": -0.6450799703598022, "step": 11760 }, { "epoch": 0.87, "learning_rate": 2.600112331844265e-07, "logits/chosen": -2.2342400550842285, "logits/rejected": -1.613340139389038, "logps/chosen": -577.593017578125, "logps/rejected": -813.2101440429688, "loss": 0.6726, "rewards/accuracies": 0.75, "rewards/chosen": -0.35767143964767456, "rewards/margins": 0.28449660539627075, "rewards/rejected": -0.6421680450439453, "step": 11770 }, { "epoch": 0.87, "learning_rate": 2.5716015549862303e-07, "logits/chosen": -2.1582324504852295, "logits/rejected": -1.5867431163787842, "logps/chosen": -532.7200927734375, "logps/rejected": -757.3294677734375, "loss": 0.6736, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.387021541595459, "rewards/margins": 0.2973688244819641, "rewards/rejected": -0.6843903660774231, "step": 11780 }, { "epoch": 0.87, "learning_rate": 2.543239476739967e-07, "logits/chosen": -2.0830090045928955, "logits/rejected": -1.413089394569397, "logps/chosen": -449.784423828125, "logps/rejected": -588.8201904296875, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.2903238534927368, "rewards/margins": 0.18816378712654114, "rewards/rejected": -0.4784875512123108, "step": 11790 }, { "epoch": 0.87, "learning_rate": 2.51502628514394e-07, "logits/chosen": -2.3956291675567627, "logits/rejected": -1.7806386947631836, "logps/chosen": -520.2537841796875, "logps/rejected": -653.0791015625, "loss": 0.6801, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3171404302120209, "rewards/margins": 0.21608078479766846, "rewards/rejected": -0.5332211852073669, "step": 11800 }, { "epoch": 0.87, "learning_rate": 2.486962167249501e-07, "logits/chosen": -2.121614456176758, "logits/rejected": -1.498131275177002, "logps/chosen": -384.8736572265625, "logps/rejected": -594.6304931640625, "loss": 0.6729, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24924734234809875, "rewards/margins": 0.24664130806922913, "rewards/rejected": -0.4958886504173279, "step": 11810 }, { "epoch": 0.87, "learning_rate": 2.459047309119672e-07, "logits/chosen": -2.170654773712158, "logits/rejected": -1.657245397567749, "logps/chosen": -472.12908935546875, "logps/rejected": -677.0152587890625, "loss": 0.6718, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.28543829917907715, "rewards/margins": 0.26142674684524536, "rewards/rejected": -0.5468650460243225, "step": 11820 }, { "epoch": 0.87, "learning_rate": 2.431281895827875e-07, "logits/chosen": -1.9854590892791748, "logits/rejected": -1.6743278503417969, "logps/chosen": -379.7472229003906, "logps/rejected": -566.3404541015625, "loss": 0.6798, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24730320274829865, "rewards/margins": 0.20422224700450897, "rewards/rejected": -0.4515254497528076, "step": 11830 }, { "epoch": 0.87, "learning_rate": 2.403666111456743e-07, "logits/chosen": -1.9855289459228516, "logits/rejected": -1.4351197481155396, "logps/chosen": -446.69317626953125, "logps/rejected": -719.0587158203125, "loss": 0.6746, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.30869635939598083, "rewards/margins": 0.3051069676876068, "rewards/rejected": -0.6138033270835876, "step": 11840 }, { "epoch": 0.87, "learning_rate": 2.3762001390968625e-07, "logits/chosen": -2.0995306968688965, "logits/rejected": -1.5040309429168701, "logps/chosen": -601.6218872070312, "logps/rejected": -760.4983520507812, "loss": 0.6735, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.45319685339927673, "rewards/margins": 0.20611241459846497, "rewards/rejected": -0.6593092679977417, "step": 11850 }, { "epoch": 0.87, "learning_rate": 2.348884160845588e-07, "logits/chosen": -2.0843520164489746, "logits/rejected": -1.850628137588501, "logps/chosen": -440.28912353515625, "logps/rejected": -579.2227172851562, "loss": 0.6791, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27222418785095215, "rewards/margins": 0.17143099009990692, "rewards/rejected": -0.4436551630496979, "step": 11860 }, { "epoch": 0.88, "learning_rate": 2.3217183578058245e-07, "logits/chosen": -2.189271926879883, "logits/rejected": -1.680910348892212, "logps/chosen": -456.1978454589844, "logps/rejected": -594.7379760742188, "loss": 0.6814, "rewards/accuracies": 0.75, "rewards/chosen": -0.28806376457214355, "rewards/margins": 0.17133675515651703, "rewards/rejected": -0.459400475025177, "step": 11870 }, { "epoch": 0.88, "learning_rate": 2.2947029100848145e-07, "logits/chosen": -2.0582330226898193, "logits/rejected": -1.742398977279663, "logps/chosen": -445.5935974121094, "logps/rejected": -599.065673828125, "loss": 0.68, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3026937246322632, "rewards/margins": 0.16790415346622467, "rewards/rejected": -0.4705978333950043, "step": 11880 }, { "epoch": 0.88, "learning_rate": 2.2678379967929765e-07, "logits/chosen": -2.096417188644409, "logits/rejected": -1.6395238637924194, "logps/chosen": -517.4729614257812, "logps/rejected": -736.9747924804688, "loss": 0.6762, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3260856866836548, "rewards/margins": 0.2413916140794754, "rewards/rejected": -0.567477285861969, "step": 11890 }, { "epoch": 0.88, "learning_rate": 2.2411237960426797e-07, "logits/chosen": -2.300126075744629, "logits/rejected": -1.6678603887557983, "logps/chosen": -421.0668029785156, "logps/rejected": -624.8994140625, "loss": 0.6752, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23411062359809875, "rewards/margins": 0.25395137071609497, "rewards/rejected": -0.4880619943141937, "step": 11900 }, { "epoch": 0.88, "learning_rate": 2.214560484947076e-07, "logits/chosen": -2.239010810852051, "logits/rejected": -2.01639986038208, "logps/chosen": -500.4898986816406, "logps/rejected": -672.1444702148438, "loss": 0.6833, "rewards/accuracies": 0.625, "rewards/chosen": -0.3665786385536194, "rewards/margins": 0.1604468822479248, "rewards/rejected": -0.527025580406189, "step": 11910 }, { "epoch": 0.88, "learning_rate": 2.188148239618959e-07, "logits/chosen": -2.085510492324829, "logits/rejected": -1.6414371728897095, "logps/chosen": -539.2939453125, "logps/rejected": -670.4768676757812, "loss": 0.6796, "rewards/accuracies": 0.75, "rewards/chosen": -0.36829906702041626, "rewards/margins": 0.18653348088264465, "rewards/rejected": -0.5548325777053833, "step": 11920 }, { "epoch": 0.88, "learning_rate": 2.161887235169538e-07, "logits/chosen": -2.0786986351013184, "logits/rejected": -1.4639780521392822, "logps/chosen": -532.525634765625, "logps/rejected": -654.8619995117188, "loss": 0.6777, "rewards/accuracies": 0.625, "rewards/chosen": -0.376718133687973, "rewards/margins": 0.1862507164478302, "rewards/rejected": -0.5629688501358032, "step": 11930 }, { "epoch": 0.88, "learning_rate": 2.135777645707318e-07, "logits/chosen": -1.9859380722045898, "logits/rejected": -1.594564437866211, "logps/chosen": -478.65411376953125, "logps/rejected": -680.6450805664062, "loss": 0.676, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3420429825782776, "rewards/margins": 0.2257934808731079, "rewards/rejected": -0.5678364038467407, "step": 11940 }, { "epoch": 0.88, "learning_rate": 2.1098196443369355e-07, "logits/chosen": -2.034562587738037, "logits/rejected": -1.5462099313735962, "logps/chosen": -508.8946228027344, "logps/rejected": -645.8365478515625, "loss": 0.6754, "rewards/accuracies": 0.75, "rewards/chosen": -0.3254268169403076, "rewards/margins": 0.18916495144367218, "rewards/rejected": -0.5145917534828186, "step": 11950 }, { "epoch": 0.88, "learning_rate": 2.0840134031580083e-07, "logits/chosen": -1.9585685729980469, "logits/rejected": -1.3877129554748535, "logps/chosen": -439.3160705566406, "logps/rejected": -639.3881225585938, "loss": 0.6775, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3071732223033905, "rewards/margins": 0.2152344286441803, "rewards/rejected": -0.5224076509475708, "step": 11960 }, { "epoch": 0.88, "learning_rate": 2.0583590932639896e-07, "logits/chosen": -2.143796920776367, "logits/rejected": -1.3935296535491943, "logps/chosen": -568.0697021484375, "logps/rejected": -778.9912109375, "loss": 0.6764, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.39132726192474365, "rewards/margins": 0.27364692091941833, "rewards/rejected": -0.6649742126464844, "step": 11970 }, { "epoch": 0.88, "learning_rate": 2.0328568847410413e-07, "logits/chosen": -2.2069287300109863, "logits/rejected": -1.6461378335952759, "logps/chosen": -539.747314453125, "logps/rejected": -691.1760864257812, "loss": 0.6795, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.40635594725608826, "rewards/margins": 0.18719151616096497, "rewards/rejected": -0.5935474634170532, "step": 11980 }, { "epoch": 0.88, "learning_rate": 2.007506946666915e-07, "logits/chosen": -2.0925941467285156, "logits/rejected": -1.7955362796783447, "logps/chosen": -493.159423828125, "logps/rejected": -625.7376098632812, "loss": 0.6817, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3419080376625061, "rewards/margins": 0.17344407737255096, "rewards/rejected": -0.5153521299362183, "step": 11990 }, { "epoch": 0.89, "learning_rate": 1.9823094471098038e-07, "logits/chosen": -2.1348137855529785, "logits/rejected": -1.7344545125961304, "logps/chosen": -567.8165283203125, "logps/rejected": -653.2806396484375, "loss": 0.683, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4114084243774414, "rewards/margins": 0.12877100706100464, "rewards/rejected": -0.540179431438446, "step": 12000 }, { "epoch": 0.89, "learning_rate": 1.957264553127261e-07, "logits/chosen": -2.1595802307128906, "logits/rejected": -1.7724567651748657, "logps/chosen": -477.90582275390625, "logps/rejected": -649.97021484375, "loss": 0.6801, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3317161202430725, "rewards/margins": 0.20590455830097198, "rewards/rejected": -0.5376206636428833, "step": 12010 }, { "epoch": 0.89, "learning_rate": 1.93237243076507e-07, "logits/chosen": -2.014846086502075, "logits/rejected": -1.4031996726989746, "logps/chosen": -473.9551696777344, "logps/rejected": -599.4190673828125, "loss": 0.6769, "rewards/accuracies": 0.75, "rewards/chosen": -0.3242209851741791, "rewards/margins": 0.19171825051307678, "rewards/rejected": -0.5159391760826111, "step": 12020 }, { "epoch": 0.89, "learning_rate": 1.9076332450561452e-07, "logits/chosen": -2.0429158210754395, "logits/rejected": -1.6908773183822632, "logps/chosen": -545.648681640625, "logps/rejected": -658.3882446289062, "loss": 0.6794, "rewards/accuracies": 0.75, "rewards/chosen": -0.3932749629020691, "rewards/margins": 0.15860401093959808, "rewards/rejected": -0.5518790483474731, "step": 12030 }, { "epoch": 0.89, "learning_rate": 1.8830471600194438e-07, "logits/chosen": -2.200688600540161, "logits/rejected": -1.5945208072662354, "logps/chosen": -478.5877990722656, "logps/rejected": -635.7564697265625, "loss": 0.6793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32195204496383667, "rewards/margins": 0.1931217759847641, "rewards/rejected": -0.5150738954544067, "step": 12040 }, { "epoch": 0.89, "learning_rate": 1.8586143386588846e-07, "logits/chosen": -1.9894599914550781, "logits/rejected": -1.498291254043579, "logps/chosen": -525.3455810546875, "logps/rejected": -710.8302001953125, "loss": 0.6792, "rewards/accuracies": 0.75, "rewards/chosen": -0.41273990273475647, "rewards/margins": 0.20264887809753418, "rewards/rejected": -0.6153887510299683, "step": 12050 }, { "epoch": 0.89, "learning_rate": 1.8343349429622515e-07, "logits/chosen": -2.0848288536071777, "logits/rejected": -1.5081634521484375, "logps/chosen": -512.0576171875, "logps/rejected": -692.9437866210938, "loss": 0.6723, "rewards/accuracies": 0.875, "rewards/chosen": -0.3404020667076111, "rewards/margins": 0.22450673580169678, "rewards/rejected": -0.5649088621139526, "step": 12060 }, { "epoch": 0.89, "learning_rate": 1.8102091339001232e-07, "logits/chosen": -2.25319504737854, "logits/rejected": -1.6254732608795166, "logps/chosen": -461.99578857421875, "logps/rejected": -703.13623046875, "loss": 0.6673, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3133341372013092, "rewards/margins": 0.2822135090827942, "rewards/rejected": -0.5955475568771362, "step": 12070 }, { "epoch": 0.89, "learning_rate": 1.786237071424829e-07, "logits/chosen": -2.1125941276550293, "logits/rejected": -1.6478955745697021, "logps/chosen": -506.8475646972656, "logps/rejected": -659.111328125, "loss": 0.677, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3726117014884949, "rewards/margins": 0.16968205571174622, "rewards/rejected": -0.5422937870025635, "step": 12080 }, { "epoch": 0.89, "learning_rate": 1.7624189144693492e-07, "logits/chosen": -2.1996254920959473, "logits/rejected": -1.637750267982483, "logps/chosen": -523.2218017578125, "logps/rejected": -685.9841918945312, "loss": 0.678, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35013651847839355, "rewards/margins": 0.19795601069927216, "rewards/rejected": -0.5480926036834717, "step": 12090 }, { "epoch": 0.89, "learning_rate": 1.7387548209463006e-07, "logits/chosen": -2.1066713333129883, "logits/rejected": -1.6700814962387085, "logps/chosen": -432.80322265625, "logps/rejected": -606.7900390625, "loss": 0.6802, "rewards/accuracies": 0.75, "rewards/chosen": -0.2886849045753479, "rewards/margins": 0.21016578376293182, "rewards/rejected": -0.49885067343711853, "step": 12100 }, { "epoch": 0.89, "learning_rate": 1.7152449477468496e-07, "logits/chosen": -2.091416358947754, "logits/rejected": -1.460940957069397, "logps/chosen": -480.32568359375, "logps/rejected": -625.239013671875, "loss": 0.6736, "rewards/accuracies": 0.75, "rewards/chosen": -0.3050392270088196, "rewards/margins": 0.21094754338264465, "rewards/rejected": -0.5159868001937866, "step": 12110 }, { "epoch": 0.89, "learning_rate": 1.6918894507397145e-07, "logits/chosen": -2.152365207672119, "logits/rejected": -1.6188738346099854, "logps/chosen": -490.80413818359375, "logps/rejected": -698.7080078125, "loss": 0.6781, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3525245189666748, "rewards/margins": 0.22463306784629822, "rewards/rejected": -0.5771576166152954, "step": 12120 }, { "epoch": 0.89, "learning_rate": 1.6686884847701006e-07, "logits/chosen": -2.040642261505127, "logits/rejected": -1.5792055130004883, "logps/chosen": -524.0513916015625, "logps/rejected": -701.6610107421875, "loss": 0.6764, "rewards/accuracies": 0.75, "rewards/chosen": -0.3863694369792938, "rewards/margins": 0.19495384395122528, "rewards/rejected": -0.5813232660293579, "step": 12130 }, { "epoch": 0.9, "learning_rate": 1.6456422036586762e-07, "logits/chosen": -2.054377794265747, "logits/rejected": -1.7981659173965454, "logps/chosen": -487.75860595703125, "logps/rejected": -707.260986328125, "loss": 0.6773, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3346715569496155, "rewards/margins": 0.23494741320610046, "rewards/rejected": -0.5696189403533936, "step": 12140 }, { "epoch": 0.9, "learning_rate": 1.622750760200581e-07, "logits/chosen": -2.192474126815796, "logits/rejected": -1.6106598377227783, "logps/chosen": -457.26336669921875, "logps/rejected": -659.9684448242188, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.30341553688049316, "rewards/margins": 0.239525705575943, "rewards/rejected": -0.5429412126541138, "step": 12150 }, { "epoch": 0.9, "learning_rate": 1.6000143061643741e-07, "logits/chosen": -2.2125775814056396, "logits/rejected": -1.6655919551849365, "logps/chosen": -421.448974609375, "logps/rejected": -678.2706298828125, "loss": 0.6717, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2716054320335388, "rewards/margins": 0.2711058259010315, "rewards/rejected": -0.5427111983299255, "step": 12160 }, { "epoch": 0.9, "learning_rate": 1.5774329922910474e-07, "logits/chosen": -1.7548434734344482, "logits/rejected": -1.3696155548095703, "logps/chosen": -584.982421875, "logps/rejected": -749.907958984375, "loss": 0.6809, "rewards/accuracies": 0.625, "rewards/chosen": -0.4315562844276428, "rewards/margins": 0.19571569561958313, "rewards/rejected": -0.6272720098495483, "step": 12170 }, { "epoch": 0.9, "learning_rate": 1.5550069682930406e-07, "logits/chosen": -2.270050048828125, "logits/rejected": -1.672848105430603, "logps/chosen": -509.9019470214844, "logps/rejected": -726.0106201171875, "loss": 0.6761, "rewards/accuracies": 0.875, "rewards/chosen": -0.33900725841522217, "rewards/margins": 0.25238674879074097, "rewards/rejected": -0.5913940072059631, "step": 12180 }, { "epoch": 0.9, "learning_rate": 1.5327363828532226e-07, "logits/chosen": -2.3608498573303223, "logits/rejected": -1.9883949756622314, "logps/chosen": -487.1329040527344, "logps/rejected": -616.1986083984375, "loss": 0.6807, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33579540252685547, "rewards/margins": 0.14730015397071838, "rewards/rejected": -0.48309555649757385, "step": 12190 }, { "epoch": 0.9, "learning_rate": 1.5106213836239143e-07, "logits/chosen": -2.0216197967529297, "logits/rejected": -1.623793363571167, "logps/chosen": -492.9466857910156, "logps/rejected": -643.3934326171875, "loss": 0.6796, "rewards/accuracies": 0.75, "rewards/chosen": -0.3215225338935852, "rewards/margins": 0.17943766713142395, "rewards/rejected": -0.5009602308273315, "step": 12200 }, { "epoch": 0.9, "learning_rate": 1.4886621172259057e-07, "logits/chosen": -2.0532655715942383, "logits/rejected": -1.6775970458984375, "logps/chosen": -404.18145751953125, "logps/rejected": -600.5253295898438, "loss": 0.6799, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2592475414276123, "rewards/margins": 0.2235616147518158, "rewards/rejected": -0.4828091561794281, "step": 12210 }, { "epoch": 0.9, "learning_rate": 1.466858729247514e-07, "logits/chosen": -2.3478660583496094, "logits/rejected": -1.8313697576522827, "logps/chosen": -425.53265380859375, "logps/rejected": -605.9966430664062, "loss": 0.6776, "rewards/accuracies": 0.75, "rewards/chosen": -0.2668193280696869, "rewards/margins": 0.22519788146018982, "rewards/rejected": -0.4920172691345215, "step": 12220 }, { "epoch": 0.9, "learning_rate": 1.4452113642435677e-07, "logits/chosen": -1.9143339395523071, "logits/rejected": -1.4536736011505127, "logps/chosen": -394.32196044921875, "logps/rejected": -603.6613159179688, "loss": 0.6723, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27741217613220215, "rewards/margins": 0.22135615348815918, "rewards/rejected": -0.49876824021339417, "step": 12230 }, { "epoch": 0.9, "learning_rate": 1.4237201657344824e-07, "logits/chosen": -2.126436710357666, "logits/rejected": -1.7942956686019897, "logps/chosen": -465.685546875, "logps/rejected": -618.6702270507812, "loss": 0.6788, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.338019996881485, "rewards/margins": 0.15990781784057617, "rewards/rejected": -0.4979277551174164, "step": 12240 }, { "epoch": 0.9, "learning_rate": 1.402385276205312e-07, "logits/chosen": -2.074099540710449, "logits/rejected": -1.4839638471603394, "logps/chosen": -537.3040161132812, "logps/rejected": -719.1774291992188, "loss": 0.6754, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.392131507396698, "rewards/margins": 0.18922826647758484, "rewards/rejected": -0.58135986328125, "step": 12250 }, { "epoch": 0.9, "learning_rate": 1.3812068371047844e-07, "logits/chosen": -1.8641332387924194, "logits/rejected": -1.5055952072143555, "logps/chosen": -468.4931640625, "logps/rejected": -597.0126342773438, "loss": 0.6887, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.33312687277793884, "rewards/margins": 0.14262095093727112, "rewards/rejected": -0.4757477641105652, "step": 12260 }, { "epoch": 0.91, "learning_rate": 1.360184988844368e-07, "logits/chosen": -2.1726996898651123, "logits/rejected": -1.582537055015564, "logps/chosen": -541.1795654296875, "logps/rejected": -735.6083374023438, "loss": 0.6775, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3694896996021271, "rewards/margins": 0.21513190865516663, "rewards/rejected": -0.5846216082572937, "step": 12270 }, { "epoch": 0.91, "learning_rate": 1.3393198707973536e-07, "logits/chosen": -2.0849616527557373, "logits/rejected": -1.7607250213623047, "logps/chosen": -517.9638061523438, "logps/rejected": -648.5909423828125, "loss": 0.6833, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3607760965824127, "rewards/margins": 0.18299849331378937, "rewards/rejected": -0.5437746047973633, "step": 12280 }, { "epoch": 0.91, "learning_rate": 1.318611621297927e-07, "logits/chosen": -2.1540427207946777, "logits/rejected": -1.4956104755401611, "logps/chosen": -508.576171875, "logps/rejected": -696.139404296875, "loss": 0.6722, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.32666081190109253, "rewards/margins": 0.23628397285938263, "rewards/rejected": -0.562944769859314, "step": 12290 }, { "epoch": 0.91, "learning_rate": 1.298060377640234e-07, "logits/chosen": -2.136949062347412, "logits/rejected": -1.5032439231872559, "logps/chosen": -445.4178771972656, "logps/rejected": -686.5231323242188, "loss": 0.6699, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2845747470855713, "rewards/margins": 0.2865470349788666, "rewards/rejected": -0.5711218118667603, "step": 12300 }, { "epoch": 0.91, "learning_rate": 1.277666276077491e-07, "logits/chosen": -2.1787149906158447, "logits/rejected": -1.9098033905029297, "logps/chosen": -532.1215209960938, "logps/rejected": -690.8379516601562, "loss": 0.6784, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3701249957084656, "rewards/margins": 0.17608217895030975, "rewards/rejected": -0.5462071299552917, "step": 12310 }, { "epoch": 0.91, "learning_rate": 1.257429451821074e-07, "logits/chosen": -2.1765084266662598, "logits/rejected": -1.8443025350570679, "logps/chosen": -495.49053955078125, "logps/rejected": -637.0192260742188, "loss": 0.6773, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.32345762848854065, "rewards/margins": 0.1655813753604889, "rewards/rejected": -0.48903900384902954, "step": 12320 }, { "epoch": 0.91, "learning_rate": 1.2373500390396227e-07, "logits/chosen": -1.962634801864624, "logits/rejected": -1.355678677558899, "logps/chosen": -478.2196350097656, "logps/rejected": -661.3304443359375, "loss": 0.6737, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.27927955985069275, "rewards/margins": 0.25013941526412964, "rewards/rejected": -0.5294189453125, "step": 12330 }, { "epoch": 0.91, "learning_rate": 1.2174281708581426e-07, "logits/chosen": -1.8963098526000977, "logits/rejected": -1.496100664138794, "logps/chosen": -572.2470703125, "logps/rejected": -735.81103515625, "loss": 0.6813, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.39485257863998413, "rewards/margins": 0.21406404674053192, "rewards/rejected": -0.608916699886322, "step": 12340 }, { "epoch": 0.91, "learning_rate": 1.1976639793571415e-07, "logits/chosen": -2.1424198150634766, "logits/rejected": -1.5939812660217285, "logps/chosen": -470.3157653808594, "logps/rejected": -624.9602661132812, "loss": 0.6773, "rewards/accuracies": 0.75, "rewards/chosen": -0.29389673471450806, "rewards/margins": 0.19294674694538116, "rewards/rejected": -0.4868434965610504, "step": 12350 }, { "epoch": 0.91, "learning_rate": 1.1780575955717355e-07, "logits/chosen": -2.2684998512268066, "logits/rejected": -1.8706061840057373, "logps/chosen": -548.8470458984375, "logps/rejected": -660.318603515625, "loss": 0.6812, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3463972806930542, "rewards/margins": 0.13985338807106018, "rewards/rejected": -0.48625069856643677, "step": 12360 }, { "epoch": 0.91, "learning_rate": 1.1586091494907941e-07, "logits/chosen": -2.0804831981658936, "logits/rejected": -1.8059895038604736, "logps/chosen": -516.5568237304688, "logps/rejected": -666.9929809570312, "loss": 0.6787, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33799779415130615, "rewards/margins": 0.17738130688667297, "rewards/rejected": -0.5153791308403015, "step": 12370 }, { "epoch": 0.91, "learning_rate": 1.1393187700560582e-07, "logits/chosen": -1.91241455078125, "logits/rejected": -1.5198886394500732, "logps/chosen": -558.9293823242188, "logps/rejected": -723.130126953125, "loss": 0.6771, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3749842345714569, "rewards/margins": 0.19852067530155182, "rewards/rejected": -0.5735049843788147, "step": 12380 }, { "epoch": 0.91, "learning_rate": 1.1201865851613148e-07, "logits/chosen": -2.1314287185668945, "logits/rejected": -1.706146240234375, "logps/chosen": -604.000732421875, "logps/rejected": -750.9144897460938, "loss": 0.6805, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.417469322681427, "rewards/margins": 0.18822164833545685, "rewards/rejected": -0.6056910157203674, "step": 12390 }, { "epoch": 0.91, "learning_rate": 1.1012127216515233e-07, "logits/chosen": -2.174743175506592, "logits/rejected": -1.6166400909423828, "logps/chosen": -509.8779296875, "logps/rejected": -709.6063232421875, "loss": 0.6783, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3648774027824402, "rewards/margins": 0.22579646110534668, "rewards/rejected": -0.5906738042831421, "step": 12400 }, { "epoch": 0.92, "learning_rate": 1.0823973053219833e-07, "logits/chosen": -2.038316249847412, "logits/rejected": -1.629507064819336, "logps/chosen": -524.3645629882812, "logps/rejected": -662.1558227539062, "loss": 0.6798, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.36808156967163086, "rewards/margins": 0.17073896527290344, "rewards/rejected": -0.5388205647468567, "step": 12410 }, { "epoch": 0.92, "learning_rate": 1.0637404609175061e-07, "logits/chosen": -2.1070444583892822, "logits/rejected": -1.5186903476715088, "logps/chosen": -461.98931884765625, "logps/rejected": -615.2582397460938, "loss": 0.6772, "rewards/accuracies": 0.75, "rewards/chosen": -0.34282368421554565, "rewards/margins": 0.19936838746070862, "rewards/rejected": -0.5421921014785767, "step": 12420 }, { "epoch": 0.92, "learning_rate": 1.0452423121315835e-07, "logits/chosen": -2.04268217086792, "logits/rejected": -1.683473825454712, "logps/chosen": -547.9979248046875, "logps/rejected": -716.2355346679688, "loss": 0.6768, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4192432761192322, "rewards/margins": 0.18812403082847595, "rewards/rejected": -0.6073673367500305, "step": 12430 }, { "epoch": 0.92, "learning_rate": 1.026902981605557e-07, "logits/chosen": -2.0978572368621826, "logits/rejected": -1.682222604751587, "logps/chosen": -486.3165588378906, "logps/rejected": -700.2891845703125, "loss": 0.6791, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.37371325492858887, "rewards/margins": 0.2183757722377777, "rewards/rejected": -0.592089056968689, "step": 12440 }, { "epoch": 0.92, "learning_rate": 1.008722590927827e-07, "logits/chosen": -2.255584239959717, "logits/rejected": -1.6890909671783447, "logps/chosen": -525.306884765625, "logps/rejected": -775.19189453125, "loss": 0.6795, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29720062017440796, "rewards/margins": 0.28349021077156067, "rewards/rejected": -0.580690860748291, "step": 12450 }, { "epoch": 0.92, "learning_rate": 9.907012606330369e-08, "logits/chosen": -2.232588529586792, "logits/rejected": -1.6436887979507446, "logps/chosen": -409.4840393066406, "logps/rejected": -648.5809326171875, "loss": 0.6774, "rewards/accuracies": 0.75, "rewards/chosen": -0.22649088501930237, "rewards/margins": 0.2743070423603058, "rewards/rejected": -0.5007978677749634, "step": 12460 }, { "epoch": 0.92, "learning_rate": 9.72839110201254e-08, "logits/chosen": -2.1511611938476562, "logits/rejected": -1.5245404243469238, "logps/chosen": -545.3085327148438, "logps/rejected": -705.765869140625, "loss": 0.6762, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.37648192048072815, "rewards/margins": 0.21327409148216248, "rewards/rejected": -0.5897560119628906, "step": 12470 }, { "epoch": 0.92, "learning_rate": 9.551362580572099e-08, "logits/chosen": -2.2956528663635254, "logits/rejected": -1.8444740772247314, "logps/chosen": -493.7486267089844, "logps/rejected": -635.140380859375, "loss": 0.6833, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32734206318855286, "rewards/margins": 0.16510429978370667, "rewards/rejected": -0.49244633316993713, "step": 12480 }, { "epoch": 0.92, "learning_rate": 9.375928215694995e-08, "logits/chosen": -2.20131778717041, "logits/rejected": -1.6756337881088257, "logps/chosen": -541.5386962890625, "logps/rejected": -775.628662109375, "loss": 0.6797, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4200936257839203, "rewards/margins": 0.25533679127693176, "rewards/rejected": -0.675430417060852, "step": 12490 }, { "epoch": 0.92, "learning_rate": 9.202089170497974e-08, "logits/chosen": -2.106100559234619, "logits/rejected": -1.8178637027740479, "logps/chosen": -476.1302795410156, "logps/rejected": -689.4881591796875, "loss": 0.6741, "rewards/accuracies": 0.75, "rewards/chosen": -0.3347732424736023, "rewards/margins": 0.2125118225812912, "rewards/rejected": -0.5472850799560547, "step": 12500 }, { "epoch": 0.92, "learning_rate": 9.02984659752093e-08, "logits/chosen": -2.0193092823028564, "logits/rejected": -1.7335166931152344, "logps/chosen": -489.7693786621094, "logps/rejected": -678.3631591796875, "loss": 0.6789, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3404328227043152, "rewards/margins": 0.21002478897571564, "rewards/rejected": -0.5504575967788696, "step": 12510 }, { "epoch": 0.92, "learning_rate": 8.859201638719367e-08, "logits/chosen": -2.1343116760253906, "logits/rejected": -1.5239624977111816, "logps/chosen": -512.484375, "logps/rejected": -765.1834716796875, "loss": 0.6812, "rewards/accuracies": 0.75, "rewards/chosen": -0.3249683380126953, "rewards/margins": 0.2764081358909607, "rewards/rejected": -0.601376473903656, "step": 12520 }, { "epoch": 0.92, "learning_rate": 8.69015542545662e-08, "logits/chosen": -1.965043306350708, "logits/rejected": -1.6658912897109985, "logps/chosen": -517.3463134765625, "logps/rejected": -651.0448608398438, "loss": 0.6778, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36904194951057434, "rewards/margins": 0.178606316447258, "rewards/rejected": -0.5476483106613159, "step": 12530 }, { "epoch": 0.92, "learning_rate": 8.52270907849645e-08, "logits/chosen": -1.9521411657333374, "logits/rejected": -1.4500691890716553, "logps/chosen": -529.83056640625, "logps/rejected": -749.7096557617188, "loss": 0.6814, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40766558051109314, "rewards/margins": 0.2130332738161087, "rewards/rejected": -0.6206988096237183, "step": 12540 }, { "epoch": 0.93, "learning_rate": 8.356863707995739e-08, "logits/chosen": -1.9924697875976562, "logits/rejected": -1.3096204996109009, "logps/chosen": -552.4850463867188, "logps/rejected": -707.0377807617188, "loss": 0.6774, "rewards/accuracies": 0.75, "rewards/chosen": -0.38361459970474243, "rewards/margins": 0.21443454921245575, "rewards/rejected": -0.5980491638183594, "step": 12550 }, { "epoch": 0.93, "learning_rate": 8.192620413496943e-08, "logits/chosen": -1.9608217477798462, "logits/rejected": -1.4455105066299438, "logps/chosen": -513.3397827148438, "logps/rejected": -696.0731201171875, "loss": 0.6801, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3299045264720917, "rewards/margins": 0.2185518443584442, "rewards/rejected": -0.5484563112258911, "step": 12560 }, { "epoch": 0.93, "learning_rate": 8.029980283920851e-08, "logits/chosen": -2.1587576866149902, "logits/rejected": -1.6649141311645508, "logps/chosen": -444.51531982421875, "logps/rejected": -615.9049682617188, "loss": 0.6756, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3085770010948181, "rewards/margins": 0.19371385872364044, "rewards/rejected": -0.5022908449172974, "step": 12570 }, { "epoch": 0.93, "learning_rate": 7.868944397559392e-08, "logits/chosen": -2.0284018516540527, "logits/rejected": -1.7061134576797485, "logps/chosen": -404.7261657714844, "logps/rejected": -522.7496948242188, "loss": 0.6822, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2537665367126465, "rewards/margins": 0.1504553109407425, "rewards/rejected": -0.40422186255455017, "step": 12580 }, { "epoch": 0.93, "learning_rate": 7.709513822068554e-08, "logits/chosen": -2.1294925212860107, "logits/rejected": -1.625431776046753, "logps/chosen": -547.6978759765625, "logps/rejected": -720.6404418945312, "loss": 0.6728, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3959907591342926, "rewards/margins": 0.1986527293920517, "rewards/rejected": -0.5946434736251831, "step": 12590 }, { "epoch": 0.93, "learning_rate": 7.55168961446115e-08, "logits/chosen": -2.1917057037353516, "logits/rejected": -1.6513198614120483, "logps/chosen": -553.3343505859375, "logps/rejected": -727.6388549804688, "loss": 0.6812, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3642772138118744, "rewards/margins": 0.21628446877002716, "rewards/rejected": -0.580561637878418, "step": 12600 }, { "epoch": 0.93, "learning_rate": 7.395472821099897e-08, "logits/chosen": -1.9908517599105835, "logits/rejected": -1.63107430934906, "logps/chosen": -408.73126220703125, "logps/rejected": -610.4859008789062, "loss": 0.675, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27691856026649475, "rewards/margins": 0.23260512948036194, "rewards/rejected": -0.5095236897468567, "step": 12610 }, { "epoch": 0.93, "learning_rate": 7.240864477690595e-08, "logits/chosen": -2.2255005836486816, "logits/rejected": -1.8979705572128296, "logps/chosen": -469.7120666503906, "logps/rejected": -658.60302734375, "loss": 0.674, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.32999974489212036, "rewards/margins": 0.21176037192344666, "rewards/rejected": -0.5417601466178894, "step": 12620 }, { "epoch": 0.93, "learning_rate": 7.087865609274936e-08, "logits/chosen": -1.8681995868682861, "logits/rejected": -1.5242364406585693, "logps/chosen": -557.416748046875, "logps/rejected": -714.1712646484375, "loss": 0.6806, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.352780818939209, "rewards/margins": 0.21786069869995117, "rewards/rejected": -0.5706414580345154, "step": 12630 }, { "epoch": 0.93, "learning_rate": 6.936477230224087e-08, "logits/chosen": -2.2789344787597656, "logits/rejected": -1.7582995891571045, "logps/chosen": -520.72412109375, "logps/rejected": -624.6638793945312, "loss": 0.6835, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35910558700561523, "rewards/margins": 0.1458774209022522, "rewards/rejected": -0.5049830079078674, "step": 12640 }, { "epoch": 0.93, "learning_rate": 6.786700344231762e-08, "logits/chosen": -2.0200693607330322, "logits/rejected": -1.7008678913116455, "logps/chosen": -462.11517333984375, "logps/rejected": -699.5196533203125, "loss": 0.6756, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.28830546140670776, "rewards/margins": 0.2173430472612381, "rewards/rejected": -0.5056485533714294, "step": 12650 }, { "epoch": 0.93, "learning_rate": 6.638535944307522e-08, "logits/chosen": -2.2289674282073975, "logits/rejected": -1.7153441905975342, "logps/chosen": -554.6151123046875, "logps/rejected": -686.9397583007812, "loss": 0.6804, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.39302998781204224, "rewards/margins": 0.1832413226366043, "rewards/rejected": -0.5762713551521301, "step": 12660 }, { "epoch": 0.93, "learning_rate": 6.491985012770319e-08, "logits/chosen": -2.2531189918518066, "logits/rejected": -1.5317580699920654, "logps/chosen": -509.7763671875, "logps/rejected": -709.8782958984375, "loss": 0.6708, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29545339941978455, "rewards/margins": 0.2762383222579956, "rewards/rejected": -0.5716916918754578, "step": 12670 }, { "epoch": 0.94, "learning_rate": 6.347048521241877e-08, "logits/chosen": -2.173322916030884, "logits/rejected": -1.6505330801010132, "logps/chosen": -499.2825622558594, "logps/rejected": -719.4072265625, "loss": 0.665, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.34817036986351013, "rewards/margins": 0.2476455420255661, "rewards/rejected": -0.595815896987915, "step": 12680 }, { "epoch": 0.94, "learning_rate": 6.203727430640377e-08, "logits/chosen": -2.02827787399292, "logits/rejected": -1.6757946014404297, "logps/chosen": -469.9471740722656, "logps/rejected": -607.8318481445312, "loss": 0.681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3080894649028778, "rewards/margins": 0.17185945808887482, "rewards/rejected": -0.4799489378929138, "step": 12690 }, { "epoch": 0.94, "learning_rate": 6.062022691174008e-08, "logits/chosen": -2.1792654991149902, "logits/rejected": -1.4882527589797974, "logps/chosen": -499.6676330566406, "logps/rejected": -692.9419555664062, "loss": 0.6821, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3299294114112854, "rewards/margins": 0.24393709003925323, "rewards/rejected": -0.5738664865493774, "step": 12700 }, { "epoch": 0.94, "learning_rate": 5.9219352423345865e-08, "logits/chosen": -2.0874757766723633, "logits/rejected": -1.6305296421051025, "logps/chosen": -499.93798828125, "logps/rejected": -663.995361328125, "loss": 0.6756, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3765753507614136, "rewards/margins": 0.18986323475837708, "rewards/rejected": -0.5664385557174683, "step": 12710 }, { "epoch": 0.94, "learning_rate": 5.783466012891481e-08, "logits/chosen": -2.2308082580566406, "logits/rejected": -1.532222032546997, "logps/chosen": -510.47216796875, "logps/rejected": -678.0946044921875, "loss": 0.6733, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.31287485361099243, "rewards/margins": 0.22817668318748474, "rewards/rejected": -0.5410515069961548, "step": 12720 }, { "epoch": 0.94, "learning_rate": 5.6466159208854176e-08, "logits/chosen": -2.4516963958740234, "logits/rejected": -2.003739356994629, "logps/chosen": -518.5861206054688, "logps/rejected": -747.2549438476562, "loss": 0.6735, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3403279781341553, "rewards/margins": 0.24239540100097656, "rewards/rejected": -0.5827234387397766, "step": 12730 }, { "epoch": 0.94, "learning_rate": 5.5113858736222384e-08, "logits/chosen": -2.264930486679077, "logits/rejected": -2.017120122909546, "logps/chosen": -500.3267517089844, "logps/rejected": -611.7821044921875, "loss": 0.6812, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3175230324268341, "rewards/margins": 0.15837731957435608, "rewards/rejected": -0.4759003520011902, "step": 12740 }, { "epoch": 0.94, "learning_rate": 5.377776767667181e-08, "logits/chosen": -2.2760722637176514, "logits/rejected": -1.5986002683639526, "logps/chosen": -447.68170166015625, "logps/rejected": -669.1585693359375, "loss": 0.6729, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.28611525893211365, "rewards/margins": 0.2513144314289093, "rewards/rejected": -0.537429690361023, "step": 12750 }, { "epoch": 0.94, "learning_rate": 5.245789488838609e-08, "logits/chosen": -2.155893087387085, "logits/rejected": -1.7329505681991577, "logps/chosen": -467.204833984375, "logps/rejected": -619.0189208984375, "loss": 0.6835, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3014693856239319, "rewards/margins": 0.17419251799583435, "rewards/rejected": -0.47566184401512146, "step": 12760 }, { "epoch": 0.94, "learning_rate": 5.115424912202349e-08, "logits/chosen": -2.1001620292663574, "logits/rejected": -1.4307503700256348, "logps/chosen": -520.5621948242188, "logps/rejected": -688.2155151367188, "loss": 0.6789, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3288336396217346, "rewards/margins": 0.2111232727766037, "rewards/rejected": -0.5399569272994995, "step": 12770 }, { "epoch": 0.94, "learning_rate": 4.9866839020658866e-08, "logits/chosen": -2.2150330543518066, "logits/rejected": -1.580196499824524, "logps/chosen": -576.223876953125, "logps/rejected": -739.0624389648438, "loss": 0.6772, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3190934658050537, "rewards/margins": 0.2545134127140045, "rewards/rejected": -0.5736068487167358, "step": 12780 }, { "epoch": 0.94, "learning_rate": 4.859567311972513e-08, "logits/chosen": -1.980756163597107, "logits/rejected": -1.4338099956512451, "logps/chosen": -561.0158081054688, "logps/rejected": -697.2262573242188, "loss": 0.6756, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.42496609687805176, "rewards/margins": 0.15980008244514465, "rewards/rejected": -0.5847662091255188, "step": 12790 }, { "epoch": 0.94, "learning_rate": 4.7340759846957184e-08, "logits/chosen": -2.1977999210357666, "logits/rejected": -1.839522361755371, "logps/chosen": -447.82879638671875, "logps/rejected": -599.2345581054688, "loss": 0.6789, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.30951207876205444, "rewards/margins": 0.17535224556922913, "rewards/rejected": -0.48486438393592834, "step": 12800 }, { "epoch": 0.94, "learning_rate": 4.61021075223364e-08, "logits/chosen": -2.2377076148986816, "logits/rejected": -1.5712592601776123, "logps/chosen": -511.265869140625, "logps/rejected": -741.8905639648438, "loss": 0.6709, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3227751553058624, "rewards/margins": 0.23621277511119843, "rewards/rejected": -0.5589879155158997, "step": 12810 }, { "epoch": 0.95, "learning_rate": 4.4879724358036506e-08, "logits/chosen": -1.819135069847107, "logits/rejected": -1.3941258192062378, "logps/chosen": -430.1790466308594, "logps/rejected": -614.5762939453125, "loss": 0.6758, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2838742136955261, "rewards/margins": 0.21227355301380157, "rewards/rejected": -0.4961478114128113, "step": 12820 }, { "epoch": 0.95, "learning_rate": 4.3673618458366376e-08, "logits/chosen": -2.081714630126953, "logits/rejected": -1.7655397653579712, "logps/chosen": -511.0841369628906, "logps/rejected": -660.4319458007812, "loss": 0.6771, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3359869420528412, "rewards/margins": 0.18515540659427643, "rewards/rejected": -0.5211423635482788, "step": 12830 }, { "epoch": 0.95, "learning_rate": 4.24837978197179e-08, "logits/chosen": -2.070183277130127, "logits/rejected": -1.5613805055618286, "logps/chosen": -544.7687377929688, "logps/rejected": -712.4725341796875, "loss": 0.6828, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3667714297771454, "rewards/margins": 0.19356803596019745, "rewards/rejected": -0.5603395104408264, "step": 12840 }, { "epoch": 0.95, "learning_rate": 4.131027033051405e-08, "logits/chosen": -2.2811598777770996, "logits/rejected": -1.6724061965942383, "logps/chosen": -484.611572265625, "logps/rejected": -598.7926025390625, "loss": 0.6849, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30138421058654785, "rewards/margins": 0.18351496756076813, "rewards/rejected": -0.4848991334438324, "step": 12850 }, { "epoch": 0.95, "learning_rate": 4.01530437711542e-08, "logits/chosen": -2.257429599761963, "logits/rejected": -1.6546452045440674, "logps/chosen": -500.04736328125, "logps/rejected": -683.2864990234375, "loss": 0.6798, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3383658528327942, "rewards/margins": 0.19624535739421844, "rewards/rejected": -0.534611165523529, "step": 12860 }, { "epoch": 0.95, "learning_rate": 3.901212581396419e-08, "logits/chosen": -2.0794663429260254, "logits/rejected": -1.5297162532806396, "logps/chosen": -554.9591064453125, "logps/rejected": -715.4069213867188, "loss": 0.6801, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4102260172367096, "rewards/margins": 0.212889164686203, "rewards/rejected": -0.6231151819229126, "step": 12870 }, { "epoch": 0.95, "learning_rate": 3.7887524023145495e-08, "logits/chosen": -2.028671979904175, "logits/rejected": -1.6623729467391968, "logps/chosen": -509.7330017089844, "logps/rejected": -667.0423583984375, "loss": 0.6762, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31411051750183105, "rewards/margins": 0.20937368273735046, "rewards/rejected": -0.5234841704368591, "step": 12880 }, { "epoch": 0.95, "learning_rate": 3.6779245854723924e-08, "logits/chosen": -2.1780922412872314, "logits/rejected": -1.4003506898880005, "logps/chosen": -563.6246948242188, "logps/rejected": -672.9862060546875, "loss": 0.6752, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.39223262667655945, "rewards/margins": 0.15758594870567322, "rewards/rejected": -0.5498186349868774, "step": 12890 }, { "epoch": 0.95, "learning_rate": 3.568729865650156e-08, "logits/chosen": -2.0523457527160645, "logits/rejected": -1.5435668230056763, "logps/chosen": -543.4645385742188, "logps/rejected": -739.8508911132812, "loss": 0.6762, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35088831186294556, "rewards/margins": 0.2590486705303192, "rewards/rejected": -0.6099368929862976, "step": 12900 }, { "epoch": 0.95, "learning_rate": 3.461168966800682e-08, "logits/chosen": -2.1445236206054688, "logits/rejected": -1.69472336769104, "logps/chosen": -556.8291015625, "logps/rejected": -686.7012939453125, "loss": 0.6786, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3555348813533783, "rewards/margins": 0.17221534252166748, "rewards/rejected": -0.5277502536773682, "step": 12910 }, { "epoch": 0.95, "learning_rate": 3.355242602044728e-08, "logits/chosen": -2.0393624305725098, "logits/rejected": -1.8367588520050049, "logps/chosen": -599.5696411132812, "logps/rejected": -747.4046630859375, "loss": 0.6834, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4299301207065582, "rewards/margins": 0.13879582285881042, "rewards/rejected": -0.5687259435653687, "step": 12920 }, { "epoch": 0.95, "learning_rate": 3.2509514736661916e-08, "logits/chosen": -2.3010573387145996, "logits/rejected": -1.9412412643432617, "logps/chosen": -445.8348693847656, "logps/rejected": -628.55908203125, "loss": 0.6781, "rewards/accuracies": 0.625, "rewards/chosen": -0.32043740153312683, "rewards/margins": 0.18692679703235626, "rewards/rejected": -0.5073641538619995, "step": 12930 }, { "epoch": 0.95, "learning_rate": 3.148296273107504e-08, "logits/chosen": -2.088263511657715, "logits/rejected": -1.4514672756195068, "logps/chosen": -435.6410217285156, "logps/rejected": -664.3048095703125, "loss": 0.6701, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2882080078125, "rewards/margins": 0.24878863990306854, "rewards/rejected": -0.5369966626167297, "step": 12940 }, { "epoch": 0.96, "learning_rate": 3.047277680964994e-08, "logits/chosen": -2.095994710922241, "logits/rejected": -1.679168701171875, "logps/chosen": -506.3177185058594, "logps/rejected": -635.7426147460938, "loss": 0.681, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3427339196205139, "rewards/margins": 0.1587332785129547, "rewards/rejected": -0.5014671683311462, "step": 12950 }, { "epoch": 0.96, "learning_rate": 2.9478963669844206e-08, "logits/chosen": -2.221343517303467, "logits/rejected": -1.7979027032852173, "logps/chosen": -538.8377685546875, "logps/rejected": -618.6538696289062, "loss": 0.6844, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.36964863538742065, "rewards/margins": 0.12641626596450806, "rewards/rejected": -0.49606484174728394, "step": 12960 }, { "epoch": 0.96, "learning_rate": 2.8501529900564763e-08, "logits/chosen": -2.083627223968506, "logits/rejected": -1.7667655944824219, "logps/chosen": -450.45770263671875, "logps/rejected": -619.0853881835938, "loss": 0.6764, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3272958993911743, "rewards/margins": 0.18711771070957184, "rewards/rejected": -0.514413595199585, "step": 12970 }, { "epoch": 0.96, "learning_rate": 2.7540481982124556e-08, "logits/chosen": -2.097313642501831, "logits/rejected": -1.6797325611114502, "logps/chosen": -481.0269470214844, "logps/rejected": -629.5370483398438, "loss": 0.6789, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30753764510154724, "rewards/margins": 0.19031976163387299, "rewards/rejected": -0.49785739183425903, "step": 12980 }, { "epoch": 0.96, "learning_rate": 2.659582628620039e-08, "logits/chosen": -2.1006758213043213, "logits/rejected": -1.6357545852661133, "logps/chosen": -479.580078125, "logps/rejected": -669.9364624023438, "loss": 0.6769, "rewards/accuracies": 0.75, "rewards/chosen": -0.34853699803352356, "rewards/margins": 0.19559845328330994, "rewards/rejected": -0.5441353917121887, "step": 12990 }, { "epoch": 0.96, "learning_rate": 2.566756907578849e-08, "logits/chosen": -2.130469560623169, "logits/rejected": -1.8688198328018188, "logps/chosen": -554.951416015625, "logps/rejected": -617.212646484375, "loss": 0.6837, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3740580081939697, "rewards/margins": 0.11796226352453232, "rewards/rejected": -0.49202027916908264, "step": 13000 }, { "epoch": 0.96, "learning_rate": 2.4755716505165396e-08, "logits/chosen": -2.233001232147217, "logits/rejected": -1.4151325225830078, "logps/chosen": -557.0718994140625, "logps/rejected": -750.64306640625, "loss": 0.6756, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3704867959022522, "rewards/margins": 0.2668144702911377, "rewards/rejected": -0.6373013257980347, "step": 13010 }, { "epoch": 0.96, "learning_rate": 2.386027461984575e-08, "logits/chosen": -2.15925931930542, "logits/rejected": -1.6611855030059814, "logps/chosen": -513.4318237304688, "logps/rejected": -689.4993286132812, "loss": 0.677, "rewards/accuracies": 0.75, "rewards/chosen": -0.34821367263793945, "rewards/margins": 0.21051856875419617, "rewards/rejected": -0.5587322115898132, "step": 13020 }, { "epoch": 0.96, "learning_rate": 2.2981249356542613e-08, "logits/chosen": -2.3480141162872314, "logits/rejected": -1.7237787246704102, "logps/chosen": -435.6160583496094, "logps/rejected": -638.805908203125, "loss": 0.674, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24736516177654266, "rewards/margins": 0.2442963570356369, "rewards/rejected": -0.49166154861450195, "step": 13030 }, { "epoch": 0.96, "learning_rate": 2.2118646543127787e-08, "logits/chosen": -2.2762229442596436, "logits/rejected": -1.7449747323989868, "logps/chosen": -530.4911499023438, "logps/rejected": -679.2596435546875, "loss": 0.6741, "rewards/accuracies": 0.75, "rewards/chosen": -0.3678920865058899, "rewards/margins": 0.1879151165485382, "rewards/rejected": -0.5558072328567505, "step": 13040 }, { "epoch": 0.96, "learning_rate": 2.1272471898594048e-08, "logits/chosen": -2.1587331295013428, "logits/rejected": -1.5705537796020508, "logps/chosen": -502.10943603515625, "logps/rejected": -665.5823974609375, "loss": 0.6809, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.30859336256980896, "rewards/margins": 0.21327099204063416, "rewards/rejected": -0.5218642950057983, "step": 13050 }, { "epoch": 0.96, "learning_rate": 2.0442731033016028e-08, "logits/chosen": -2.142001152038574, "logits/rejected": -1.6200952529907227, "logps/chosen": -443.989501953125, "logps/rejected": -636.673583984375, "loss": 0.676, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28371793031692505, "rewards/margins": 0.22288815677165985, "rewards/rejected": -0.5066061019897461, "step": 13060 }, { "epoch": 0.96, "learning_rate": 1.9629429447513838e-08, "logits/chosen": -1.9350045919418335, "logits/rejected": -1.6426713466644287, "logps/chosen": -490.3277893066406, "logps/rejected": -726.4720458984375, "loss": 0.6764, "rewards/accuracies": 0.75, "rewards/chosen": -0.34233832359313965, "rewards/margins": 0.25576573610305786, "rewards/rejected": -0.5981041193008423, "step": 13070 }, { "epoch": 0.96, "learning_rate": 1.883257253421672e-08, "logits/chosen": -2.0295212268829346, "logits/rejected": -1.8594310283660889, "logps/chosen": -527.4175415039062, "logps/rejected": -703.8955688476562, "loss": 0.6794, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.39009684324264526, "rewards/margins": 0.16917237639427185, "rewards/rejected": -0.5592691898345947, "step": 13080 }, { "epoch": 0.97, "learning_rate": 1.805216557622641e-08, "logits/chosen": -2.1977498531341553, "logits/rejected": -1.4193518161773682, "logps/chosen": -537.6456298828125, "logps/rejected": -726.0626220703125, "loss": 0.6745, "rewards/accuracies": 0.75, "rewards/chosen": -0.34622102975845337, "rewards/margins": 0.2525244355201721, "rewards/rejected": -0.5987454652786255, "step": 13090 }, { "epoch": 0.97, "learning_rate": 1.728821374758327e-08, "logits/chosen": -2.115020275115967, "logits/rejected": -1.4226484298706055, "logps/chosen": -563.5181884765625, "logps/rejected": -745.5760498046875, "loss": 0.6705, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4280196726322174, "rewards/margins": 0.24068407714366913, "rewards/rejected": -0.6687037944793701, "step": 13100 }, { "epoch": 0.97, "learning_rate": 1.654072211323049e-08, "logits/chosen": -1.9813369512557983, "logits/rejected": -1.6112480163574219, "logps/chosen": -501.24273681640625, "logps/rejected": -704.4609375, "loss": 0.6715, "rewards/accuracies": 0.75, "rewards/chosen": -0.3541114926338196, "rewards/margins": 0.2156691551208496, "rewards/rejected": -0.569780707359314, "step": 13110 }, { "epoch": 0.97, "learning_rate": 1.5809695628982436e-08, "logits/chosen": -2.06260085105896, "logits/rejected": -1.6569324731826782, "logps/chosen": -415.3033752441406, "logps/rejected": -600.2222900390625, "loss": 0.6742, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2815003991127014, "rewards/margins": 0.204965278506279, "rewards/rejected": -0.4864656925201416, "step": 13120 }, { "epoch": 0.97, "learning_rate": 1.5095139141489967e-08, "logits/chosen": -2.2019333839416504, "logits/rejected": -1.4184856414794922, "logps/chosen": -557.0421142578125, "logps/rejected": -653.9327392578125, "loss": 0.6807, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38121289014816284, "rewards/margins": 0.15846183896064758, "rewards/rejected": -0.5396747589111328, "step": 13130 }, { "epoch": 0.97, "learning_rate": 1.4397057388209624e-08, "logits/chosen": -2.188225269317627, "logits/rejected": -1.4371635913848877, "logps/chosen": -466.34356689453125, "logps/rejected": -643.6205444335938, "loss": 0.6799, "rewards/accuracies": 0.75, "rewards/chosen": -0.3401549458503723, "rewards/margins": 0.22503869235515594, "rewards/rejected": -0.5651935935020447, "step": 13140 }, { "epoch": 0.97, "learning_rate": 1.3715454997371425e-08, "logits/chosen": -2.1964378356933594, "logits/rejected": -1.779140830039978, "logps/chosen": -504.13446044921875, "logps/rejected": -647.8589477539062, "loss": 0.6785, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36439064145088196, "rewards/margins": 0.16630928218364716, "rewards/rejected": -0.5306998491287231, "step": 13150 }, { "epoch": 0.97, "learning_rate": 1.3050336487948345e-08, "logits/chosen": -2.416680097579956, "logits/rejected": -1.7515029907226562, "logps/chosen": -477.11065673828125, "logps/rejected": -653.2784423828125, "loss": 0.6723, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26142990589141846, "rewards/margins": 0.25491592288017273, "rewards/rejected": -0.5163458585739136, "step": 13160 }, { "epoch": 0.97, "learning_rate": 1.2401706269626613e-08, "logits/chosen": -1.9738667011260986, "logits/rejected": -1.5085911750793457, "logps/chosen": -614.4501953125, "logps/rejected": -801.5596923828125, "loss": 0.6823, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4316886365413666, "rewards/margins": 0.24475224316120148, "rewards/rejected": -0.6764408946037292, "step": 13170 }, { "epoch": 0.97, "learning_rate": 1.1769568642776564e-08, "logits/chosen": -2.1701290607452393, "logits/rejected": -1.7823759317398071, "logps/chosen": -441.39837646484375, "logps/rejected": -598.5101318359375, "loss": 0.6774, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.280053973197937, "rewards/margins": 0.19000685214996338, "rewards/rejected": -0.4700608253479004, "step": 13180 }, { "epoch": 0.97, "learning_rate": 1.1153927798422947e-08, "logits/chosen": -2.30700945854187, "logits/rejected": -1.6759897470474243, "logps/chosen": -423.4248962402344, "logps/rejected": -577.5445556640625, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.25268661975860596, "rewards/margins": 0.2047305554151535, "rewards/rejected": -0.45741719007492065, "step": 13190 }, { "epoch": 0.97, "learning_rate": 1.0554787818219392e-08, "logits/chosen": -1.9693443775177002, "logits/rejected": -1.5144970417022705, "logps/chosen": -564.2411499023438, "logps/rejected": -778.2573852539062, "loss": 0.6776, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3845518231391907, "rewards/margins": 0.2363523244857788, "rewards/rejected": -0.6209041476249695, "step": 13200 }, { "epoch": 0.97, "learning_rate": 9.972152674418979e-09, "logits/chosen": -2.2641122341156006, "logits/rejected": -1.7866334915161133, "logps/chosen": -429.98577880859375, "logps/rejected": -576.9481201171875, "loss": 0.6804, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2517644464969635, "rewards/margins": 0.1944137066602707, "rewards/rejected": -0.446178138256073, "step": 13210 }, { "epoch": 0.98, "learning_rate": 9.40602622984954e-09, "logits/chosen": -2.0195271968841553, "logits/rejected": -1.7596170902252197, "logps/chosen": -512.6561279296875, "logps/rejected": -632.3330078125, "loss": 0.6831, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3950675129890442, "rewards/margins": 0.1380140632390976, "rewards/rejected": -0.5330815315246582, "step": 13220 }, { "epoch": 0.98, "learning_rate": 8.856412237887301e-09, "logits/chosen": -2.0663018226623535, "logits/rejected": -1.3500030040740967, "logps/chosen": -509.46142578125, "logps/rejected": -717.6740112304688, "loss": 0.6713, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.356789767742157, "rewards/margins": 0.2694633901119232, "rewards/rejected": -0.626253068447113, "step": 13230 }, { "epoch": 0.98, "learning_rate": 8.32331434243161e-09, "logits/chosen": -1.8588111400604248, "logits/rejected": -1.597657561302185, "logps/chosen": -466.10845947265625, "logps/rejected": -655.0759887695312, "loss": 0.6789, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3215774893760681, "rewards/margins": 0.19036811590194702, "rewards/rejected": -0.5119456052780151, "step": 13240 }, { "epoch": 0.98, "learning_rate": 7.806736077882182e-09, "logits/chosen": -1.9217771291732788, "logits/rejected": -1.5171880722045898, "logps/chosen": -568.1885986328125, "logps/rejected": -749.308349609375, "loss": 0.68, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4489242434501648, "rewards/margins": 0.19380399584770203, "rewards/rejected": -0.6427282691001892, "step": 13250 }, { "epoch": 0.98, "learning_rate": 7.3066808691146795e-09, "logits/chosen": -2.0910661220550537, "logits/rejected": -1.5778052806854248, "logps/chosen": -568.7537841796875, "logps/rejected": -707.5242309570312, "loss": 0.678, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36010006070137024, "rewards/margins": 0.19047895073890686, "rewards/rejected": -0.5505790710449219, "step": 13260 }, { "epoch": 0.98, "learning_rate": 6.823152031457669e-09, "logits/chosen": -1.9582475423812866, "logits/rejected": -1.693516492843628, "logps/chosen": -571.99169921875, "logps/rejected": -737.3331909179688, "loss": 0.6738, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4029804766178131, "rewards/margins": 0.18231156468391418, "rewards/rejected": -0.5852919816970825, "step": 13270 }, { "epoch": 0.98, "learning_rate": 6.356152770671253e-09, "logits/chosen": -1.978082299232483, "logits/rejected": -1.4187763929367065, "logps/chosen": -520.1726684570312, "logps/rejected": -674.650390625, "loss": 0.6778, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3697240948677063, "rewards/margins": 0.18395251035690308, "rewards/rejected": -0.5536766052246094, "step": 13280 }, { "epoch": 0.98, "learning_rate": 5.905686182925696e-09, "logits/chosen": -1.8301141262054443, "logits/rejected": -1.2987146377563477, "logps/chosen": -597.242431640625, "logps/rejected": -775.1890258789062, "loss": 0.6739, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4644309878349304, "rewards/margins": 0.19521036744117737, "rewards/rejected": -0.6596413850784302, "step": 13290 }, { "epoch": 0.98, "learning_rate": 5.471755254781163e-09, "logits/chosen": -1.8936046361923218, "logits/rejected": -1.574065089225769, "logps/chosen": -606.49658203125, "logps/rejected": -745.2042236328125, "loss": 0.6813, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4652292728424072, "rewards/margins": 0.14023764431476593, "rewards/rejected": -0.605466902256012, "step": 13300 }, { "epoch": 0.98, "learning_rate": 5.054362863167461e-09, "logits/chosen": -2.0544211864471436, "logits/rejected": -1.4035484790802002, "logps/chosen": -507.0970764160156, "logps/rejected": -639.04638671875, "loss": 0.6756, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3094492554664612, "rewards/margins": 0.18608149886131287, "rewards/rejected": -0.49553075432777405, "step": 13310 }, { "epoch": 0.98, "learning_rate": 4.653511775364883e-09, "logits/chosen": -2.1841256618499756, "logits/rejected": -1.86862051486969, "logps/chosen": -493.783935546875, "logps/rejected": -671.0776977539062, "loss": 0.6795, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.30472439527511597, "rewards/margins": 0.21225805580615997, "rewards/rejected": -0.5169824361801147, "step": 13320 }, { "epoch": 0.98, "learning_rate": 4.269204648986169e-09, "logits/chosen": -2.1216492652893066, "logits/rejected": -1.589004397392273, "logps/chosen": -487.860595703125, "logps/rejected": -664.3765258789062, "loss": 0.6755, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3390292823314667, "rewards/margins": 0.20134341716766357, "rewards/rejected": -0.5403726696968079, "step": 13330 }, { "epoch": 0.98, "learning_rate": 3.901444031959301e-09, "logits/chosen": -1.9732964038848877, "logits/rejected": -1.4374659061431885, "logps/chosen": -461.24664306640625, "logps/rejected": -641.8704223632812, "loss": 0.6744, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3345456123352051, "rewards/margins": 0.2051454335451126, "rewards/rejected": -0.5396910905838013, "step": 13340 }, { "epoch": 0.98, "learning_rate": 3.5502323625094536e-09, "logits/chosen": -2.2270588874816895, "logits/rejected": -1.7107908725738525, "logps/chosen": -485.97265625, "logps/rejected": -630.3170776367188, "loss": 0.6796, "rewards/accuracies": 0.75, "rewards/chosen": -0.29421865940093994, "rewards/margins": 0.18948009610176086, "rewards/rejected": -0.4836987555027008, "step": 13350 }, { "epoch": 0.99, "learning_rate": 3.215571969144016e-09, "logits/chosen": -2.181978464126587, "logits/rejected": -1.5270510911941528, "logps/chosen": -509.2396545410156, "logps/rejected": -683.7105102539062, "loss": 0.6781, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3201007843017578, "rewards/margins": 0.22625453770160675, "rewards/rejected": -0.5463553071022034, "step": 13360 }, { "epoch": 0.99, "learning_rate": 2.897465070636485e-09, "logits/chosen": -2.0794177055358887, "logits/rejected": -1.4219924211502075, "logps/chosen": -459.6905212402344, "logps/rejected": -748.3280029296875, "loss": 0.6743, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2582501471042633, "rewards/margins": 0.3288925588130951, "rewards/rejected": -0.5871427059173584, "step": 13370 }, { "epoch": 0.99, "learning_rate": 2.595913776012038e-09, "logits/chosen": -2.1510820388793945, "logits/rejected": -1.9786052703857422, "logps/chosen": -524.8404541015625, "logps/rejected": -737.4847412109375, "loss": 0.6801, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3358801007270813, "rewards/margins": 0.17422989010810852, "rewards/rejected": -0.5101100206375122, "step": 13380 }, { "epoch": 0.99, "learning_rate": 2.310920084533097e-09, "logits/chosen": -2.116701602935791, "logits/rejected": -1.2527574300765991, "logps/chosen": -496.93878173828125, "logps/rejected": -712.5230712890625, "loss": 0.6719, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.34854358434677124, "rewards/margins": 0.2819930911064148, "rewards/rejected": -0.630536675453186, "step": 13390 }, { "epoch": 0.99, "learning_rate": 2.0424858856865626e-09, "logits/chosen": -1.8671824932098389, "logits/rejected": -1.5543551445007324, "logps/chosen": -542.1238403320312, "logps/rejected": -697.3936767578125, "loss": 0.6785, "rewards/accuracies": 0.75, "rewards/chosen": -0.37245121598243713, "rewards/margins": 0.1863829791545868, "rewards/rejected": -0.5588341951370239, "step": 13400 }, { "epoch": 0.99, "learning_rate": 1.7906129591713228e-09, "logits/chosen": -1.8405139446258545, "logits/rejected": -1.4381911754608154, "logps/chosen": -569.6453857421875, "logps/rejected": -696.1138305664062, "loss": 0.679, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3609507381916046, "rewards/margins": 0.20545437932014465, "rewards/rejected": -0.5664051175117493, "step": 13410 }, { "epoch": 0.99, "learning_rate": 1.555302974886319e-09, "logits/chosen": -2.2322278022766113, "logits/rejected": -1.8894851207733154, "logps/chosen": -470.84271240234375, "logps/rejected": -588.6470947265625, "loss": 0.6805, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2815457284450531, "rewards/margins": 0.19355514645576477, "rewards/rejected": -0.4751008450984955, "step": 13420 }, { "epoch": 0.99, "learning_rate": 1.3365574929188885e-09, "logits/chosen": -2.0310654640197754, "logits/rejected": -1.5700113773345947, "logps/chosen": -541.9745483398438, "logps/rejected": -712.5228881835938, "loss": 0.6734, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3496272563934326, "rewards/margins": 0.22145159542560577, "rewards/rejected": -0.571078896522522, "step": 13430 }, { "epoch": 0.99, "learning_rate": 1.134377963535327e-09, "logits/chosen": -2.0398566722869873, "logits/rejected": -1.5807676315307617, "logps/chosen": -480.38555908203125, "logps/rejected": -688.5001220703125, "loss": 0.6735, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.32789021730422974, "rewards/margins": 0.23099784553050995, "rewards/rejected": -0.5588880777359009, "step": 13440 }, { "epoch": 0.99, "learning_rate": 9.487657271708972e-10, "logits/chosen": -2.163543224334717, "logits/rejected": -1.7217289209365845, "logps/chosen": -525.933837890625, "logps/rejected": -709.1027221679688, "loss": 0.6699, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40710359811782837, "rewards/margins": 0.19294238090515137, "rewards/rejected": -0.600045919418335, "step": 13450 }, { "epoch": 0.99, "learning_rate": 7.797220144212248e-10, "logits/chosen": -1.9574949741363525, "logits/rejected": -1.522156000137329, "logps/chosen": -528.7313232421875, "logps/rejected": -746.55029296875, "loss": 0.6674, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.35904935002326965, "rewards/margins": 0.2636280655860901, "rewards/rejected": -0.6226774454116821, "step": 13460 }, { "epoch": 0.99, "learning_rate": 6.272479460331382e-10, "logits/chosen": -2.0719189643859863, "logits/rejected": -1.656266450881958, "logps/chosen": -537.4873657226562, "logps/rejected": -650.6677856445312, "loss": 0.6811, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.37548959255218506, "rewards/margins": 0.17830708622932434, "rewards/rejected": -0.553796648979187, "step": 13470 }, { "epoch": 0.99, "learning_rate": 4.913445328982858e-10, "logits/chosen": -2.0133447647094727, "logits/rejected": -1.658456802368164, "logps/chosen": -532.3829345703125, "logps/rejected": -645.3474731445312, "loss": 0.6779, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4054533541202545, "rewards/margins": 0.13806596398353577, "rewards/rejected": -0.5435193181037903, "step": 13480 }, { "epoch": 1.0, "learning_rate": 3.720126760467513e-10, "logits/chosen": -1.9276317358016968, "logits/rejected": -1.5334335565567017, "logps/chosen": -484.605224609375, "logps/rejected": -581.132080078125, "loss": 0.6743, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.304240882396698, "rewards/margins": 0.16634109616279602, "rewards/rejected": -0.47058194875717163, "step": 13490 }, { "epoch": 1.0, "learning_rate": 2.692531666392828e-10, "logits/chosen": -2.2621121406555176, "logits/rejected": -1.946166753768921, "logps/chosen": -508.70263671875, "logps/rejected": -610.4168701171875, "loss": 0.6797, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3451114892959595, "rewards/margins": 0.12800869345664978, "rewards/rejected": -0.47312015295028687, "step": 13500 }, { "epoch": 1.0, "learning_rate": 1.8306668596396182e-10, "logits/chosen": -2.085541009902954, "logits/rejected": -1.5001853704452515, "logps/chosen": -493.76800537109375, "logps/rejected": -686.9650268554688, "loss": 0.6818, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3542770445346832, "rewards/margins": 0.21675463020801544, "rewards/rejected": -0.5710316896438599, "step": 13510 }, { "epoch": 1.0, "learning_rate": 1.1345380543092977e-10, "logits/chosen": -2.341403007507324, "logits/rejected": -1.882266640663147, "logps/chosen": -550.5994873046875, "logps/rejected": -688.9549560546875, "loss": 0.6792, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37705662846565247, "rewards/margins": 0.15848544239997864, "rewards/rejected": -0.5355420708656311, "step": 13520 }, { "epoch": 1.0, "learning_rate": 6.041498656794709e-11, "logits/chosen": -1.9687385559082031, "logits/rejected": -1.5566177368164062, "logps/chosen": -506.3179626464844, "logps/rejected": -712.0162353515625, "loss": 0.678, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3783538341522217, "rewards/margins": 0.2086760699748993, "rewards/rejected": -0.5870299339294434, "step": 13530 }, { "epoch": 1.0, "learning_rate": 2.3950581019005494e-11, "logits/chosen": -2.013645648956299, "logits/rejected": -1.4460623264312744, "logps/chosen": -600.9168090820312, "logps/rejected": -780.1202392578125, "loss": 0.676, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3903467059135437, "rewards/margins": 0.22069242596626282, "rewards/rejected": -0.6110392212867737, "step": 13540 }, { "epoch": 1.0, "learning_rate": 4.060830540164595e-12, "logits/chosen": -1.993157982826233, "logits/rejected": -1.2512309551239014, "logps/chosen": -555.664794921875, "logps/rejected": -782.3320922851562, "loss": 0.6718, "rewards/accuracies": 0.875, "rewards/chosen": -0.4094657003879547, "rewards/margins": 0.27978119254112244, "rewards/rejected": -0.6892468929290771, "step": 13550 }, { "epoch": 1.0, "step": 13557, "total_flos": 0.0, "train_loss": 0.6789484387030773, "train_runtime": 57278.5622, "train_samples_per_second": 0.947, "train_steps_per_second": 0.237 } ], "logging_steps": 10, "max_steps": 13557, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }