{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 15284, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.270111183780249e-09, "logits/chosen": -2.634561777114868, "logits/rejected": -2.673060417175293, "logps/chosen": -207.5323944091797, "logps/rejected": -286.9266052246094, "loss": 0.0999, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.270111183780249e-08, "logits/chosen": -2.217526912689209, "logits/rejected": -1.9651696681976318, "logps/chosen": -185.97515869140625, "logps/rejected": -165.29652404785156, "loss": 0.0679, "rewards/accuracies": 0.25, "rewards/chosen": -5.4748885304434225e-05, "rewards/margins": -0.0001916840556077659, "rewards/rejected": 0.00013693516666535288, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.540222367560497e-08, "logits/chosen": -2.4312758445739746, "logits/rejected": -2.222425937652588, "logps/chosen": -232.4267120361328, "logps/rejected": -231.45645141601562, "loss": 0.0519, "rewards/accuracies": 0.5, "rewards/chosen": 2.1421179553726688e-05, "rewards/margins": 5.48317220818717e-05, "rewards/rejected": -3.341053889016621e-05, "step": 20 }, { "epoch": 0.0, "learning_rate": 9.810333551340746e-08, "logits/chosen": -2.260446310043335, "logits/rejected": -2.1646246910095215, "logps/chosen": -197.43643188476562, "logps/rejected": -219.1787109375, "loss": 0.0568, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.2294274711166508e-05, "rewards/margins": 7.500645733671263e-05, "rewards/rejected": -8.73007156769745e-05, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.3080444735120995e-07, "logits/chosen": -2.211392641067505, "logits/rejected": -2.250948429107666, "logps/chosen": -276.02606201171875, "logps/rejected": -265.74017333984375, "loss": 0.0472, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -3.8421439967351034e-05, "rewards/margins": -5.768024129793048e-05, "rewards/rejected": 1.9258804968558252e-05, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.6350555918901243e-07, "logits/chosen": -2.3482930660247803, "logits/rejected": -2.1414954662323, "logps/chosen": -204.8660125732422, "logps/rejected": -184.7296142578125, "loss": 0.0762, "rewards/accuracies": 0.375, "rewards/chosen": -6.044892506906763e-05, "rewards/margins": -2.5872495825751685e-05, "rewards/rejected": -3.457641651039012e-05, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.9620667102681492e-07, "logits/chosen": -2.3076107501983643, "logits/rejected": -2.068195343017578, "logps/chosen": -209.79153442382812, "logps/rejected": -185.903564453125, "loss": 0.0889, "rewards/accuracies": 0.32499998807907104, "rewards/chosen": 1.4363817172124982e-05, "rewards/margins": -6.558441964443773e-05, "rewards/rejected": 7.994823681656271e-05, "step": 60 }, { "epoch": 0.0, "learning_rate": 2.289077828646174e-07, "logits/chosen": -2.270057201385498, "logits/rejected": -2.157529354095459, "logps/chosen": -218.04248046875, "logps/rejected": -207.9213409423828, "loss": 0.0433, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 6.704496627207845e-05, "rewards/margins": -5.229568341746926e-05, "rewards/rejected": 0.00011934064968954772, "step": 70 }, { "epoch": 0.01, "learning_rate": 2.616088947024199e-07, "logits/chosen": -2.509779214859009, "logits/rejected": -2.2283122539520264, "logps/chosen": -258.8435974121094, "logps/rejected": -213.65139770507812, "loss": 0.0631, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.00010680671402951702, "rewards/margins": -2.5027853553183377e-05, "rewards/rejected": 0.0001318345603067428, "step": 80 }, { "epoch": 0.01, "learning_rate": 2.943100065402224e-07, "logits/chosen": -2.2587084770202637, "logits/rejected": -2.1733267307281494, "logps/chosen": -184.70590209960938, "logps/rejected": -165.45596313476562, "loss": 0.0224, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": 7.888329128036276e-05, "rewards/margins": -2.1445059246616438e-05, "rewards/rejected": 0.000100328354164958, "step": 90 }, { "epoch": 0.01, "learning_rate": 3.2701111837802487e-07, "logits/chosen": -2.431579351425171, "logits/rejected": -2.42628812789917, "logps/chosen": -168.76763916015625, "logps/rejected": -183.9268035888672, "loss": 0.0705, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.0001672560756560415, "rewards/margins": 4.957863893650938e-06, "rewards/rejected": 0.00016229819448199123, "step": 100 }, { "epoch": 0.01, "eval_logits/chosen": -2.347216844558716, "eval_logits/rejected": -2.15889048576355, "eval_logps/chosen": -231.76748657226562, "eval_logps/rejected": -211.46539306640625, "eval_loss": 0.05369621142745018, "eval_rewards/accuracies": 0.5070000290870667, "eval_rewards/chosen": 0.00023747148225083947, "eval_rewards/margins": 9.095761924982071e-05, "eval_rewards/rejected": 0.00014651386300101876, "eval_runtime": 712.6723, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 100 }, { "epoch": 0.01, "learning_rate": 3.5971223021582736e-07, "logits/chosen": -2.348576784133911, "logits/rejected": -2.0004684925079346, "logps/chosen": -222.38973999023438, "logps/rejected": -167.09312438964844, "loss": 0.0747, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0004057864716742188, "rewards/margins": 0.0002643073967192322, "rewards/rejected": 0.00014147911861073226, "step": 110 }, { "epoch": 0.01, "learning_rate": 3.9241334205362984e-07, "logits/chosen": -2.3409206867218018, "logits/rejected": -2.2450687885284424, "logps/chosen": -223.9632568359375, "logps/rejected": -234.099853515625, "loss": 0.0407, "rewards/accuracies": 0.375, "rewards/chosen": 0.00032885983819141984, "rewards/margins": 5.7481003750581294e-05, "rewards/rejected": 0.0002713788126129657, "step": 120 }, { "epoch": 0.01, "learning_rate": 4.251144538914324e-07, "logits/chosen": -2.2624969482421875, "logits/rejected": -2.218531370162964, "logps/chosen": -149.4534454345703, "logps/rejected": -148.3506622314453, "loss": 0.0445, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0001924282405525446, "rewards/margins": 0.00012479283032007515, "rewards/rejected": 6.763542478438467e-05, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.578155657292348e-07, "logits/chosen": -2.3214268684387207, "logits/rejected": -2.222231388092041, "logps/chosen": -225.5745086669922, "logps/rejected": -159.42672729492188, "loss": 0.0516, "rewards/accuracies": 0.5, "rewards/chosen": 0.0003844438761007041, "rewards/margins": 0.00012006834003841504, "rewards/rejected": 0.00026437555789016187, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.905166775670374e-07, "logits/chosen": -2.3672947883605957, "logits/rejected": -2.1587512493133545, "logps/chosen": -231.07369995117188, "logps/rejected": -229.1747589111328, "loss": 0.0976, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.00028193488833494484, "rewards/margins": 0.0002989462809637189, "rewards/rejected": -1.7011407180689275e-05, "step": 150 }, { "epoch": 0.01, "learning_rate": 5.232177894048398e-07, "logits/chosen": -2.215609073638916, "logits/rejected": -2.228020191192627, "logps/chosen": -260.3157653808594, "logps/rejected": -224.8573455810547, "loss": 0.0647, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0005527162575162947, "rewards/margins": 0.00041056034388020635, "rewards/rejected": 0.00014215594273991883, "step": 160 }, { "epoch": 0.01, "learning_rate": 5.559189012426422e-07, "logits/chosen": -2.315016269683838, "logits/rejected": -2.0377418994903564, "logps/chosen": -180.51803588867188, "logps/rejected": -156.79299926757812, "loss": 0.0538, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.00033455403172411025, "rewards/margins": 0.00023702275939285755, "rewards/rejected": 9.753130143508315e-05, "step": 170 }, { "epoch": 0.01, "learning_rate": 5.886200130804448e-07, "logits/chosen": -2.398703098297119, "logits/rejected": -2.3381752967834473, "logps/chosen": -217.62881469726562, "logps/rejected": -198.7718963623047, "loss": 0.0492, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0005997681291773915, "rewards/margins": 0.0005672207335010171, "rewards/rejected": 3.254740659031086e-05, "step": 180 }, { "epoch": 0.01, "learning_rate": 6.213211249182473e-07, "logits/chosen": -2.0695509910583496, "logits/rejected": -2.17470383644104, "logps/chosen": -191.025390625, "logps/rejected": -208.61642456054688, "loss": 0.0433, "rewards/accuracies": 0.5, "rewards/chosen": 0.00047088024439290166, "rewards/margins": 0.0004346524947322905, "rewards/rejected": 3.622781878220849e-05, "step": 190 }, { "epoch": 0.01, "learning_rate": 6.540222367560497e-07, "logits/chosen": -2.277104139328003, "logits/rejected": -2.2413220405578613, "logps/chosen": -146.78689575195312, "logps/rejected": -177.64993286132812, "loss": 0.0633, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0004251459613442421, "rewards/margins": 0.0005465105059556663, "rewards/rejected": -0.00012136458099121228, "step": 200 }, { "epoch": 0.01, "eval_logits/chosen": -2.3490700721740723, "eval_logits/rejected": -2.1606407165527344, "eval_logps/chosen": -231.36859130859375, "eval_logps/rejected": -211.58856201171875, "eval_loss": 0.0534430630505085, "eval_rewards/accuracies": 0.5789999961853027, "eval_rewards/chosen": 0.0006363485590554774, "eval_rewards/margins": 0.0006129963439889252, "eval_rewards/rejected": 2.3352227799477987e-05, "eval_runtime": 715.2446, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 200 }, { "epoch": 0.01, "learning_rate": 6.867233485938523e-07, "logits/chosen": -2.4267737865448, "logits/rejected": -2.203217029571533, "logps/chosen": -218.5438232421875, "logps/rejected": -188.379150390625, "loss": 0.0459, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0009896651608869433, "rewards/margins": 0.0012029169593006372, "rewards/rejected": -0.00021325172565411776, "step": 210 }, { "epoch": 0.01, "learning_rate": 7.194244604316547e-07, "logits/chosen": -2.2128663063049316, "logits/rejected": -2.0407023429870605, "logps/chosen": -182.70306396484375, "logps/rejected": -174.86032104492188, "loss": 0.0294, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0006333122146315873, "rewards/margins": 0.0005503271822817624, "rewards/rejected": 8.298495959024876e-05, "step": 220 }, { "epoch": 0.02, "learning_rate": 7.521255722694571e-07, "logits/chosen": -2.420090436935425, "logits/rejected": -2.0248827934265137, "logps/chosen": -278.8360290527344, "logps/rejected": -184.03358459472656, "loss": 0.0592, "rewards/accuracies": 0.5, "rewards/chosen": 0.0009433095110580325, "rewards/margins": 0.0006528933881781995, "rewards/rejected": 0.0002904160355683416, "step": 230 }, { "epoch": 0.02, "learning_rate": 7.848266841072597e-07, "logits/chosen": -2.200209140777588, "logits/rejected": -2.165177822113037, "logps/chosen": -214.45608520507812, "logps/rejected": -206.3268585205078, "loss": 0.0678, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0013947974657639861, "rewards/margins": 0.0014381732326000929, "rewards/rejected": -4.337554128142074e-05, "step": 240 }, { "epoch": 0.02, "learning_rate": 8.175277959450622e-07, "logits/chosen": -2.1683127880096436, "logits/rejected": -2.327029228210449, "logps/chosen": -217.941162109375, "logps/rejected": -220.6362762451172, "loss": 0.0235, "rewards/accuracies": 0.625, "rewards/chosen": 0.002484980970621109, "rewards/margins": 0.0011551815550774336, "rewards/rejected": 0.0013297994155436754, "step": 250 }, { "epoch": 0.02, "learning_rate": 8.502289077828648e-07, "logits/chosen": -2.506381034851074, "logits/rejected": -2.1447534561157227, "logps/chosen": -254.0188751220703, "logps/rejected": -188.9557647705078, "loss": 0.0549, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0031361293513327837, "rewards/margins": 0.001295455382205546, "rewards/rejected": 0.0018406739691272378, "step": 260 }, { "epoch": 0.02, "learning_rate": 8.829300196206672e-07, "logits/chosen": -2.423065662384033, "logits/rejected": -2.1277382373809814, "logps/chosen": -246.0767059326172, "logps/rejected": -230.6406707763672, "loss": 0.0907, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0027341675013303757, "rewards/margins": 0.0017934661591425538, "rewards/rejected": 0.0009407016332261264, "step": 270 }, { "epoch": 0.02, "learning_rate": 9.156311314584696e-07, "logits/chosen": -2.307023286819458, "logits/rejected": -2.192938804626465, "logps/chosen": -159.63018798828125, "logps/rejected": -146.35768127441406, "loss": 0.0412, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0025582576636224985, "rewards/margins": 0.0011967580066993833, "rewards/rejected": 0.001361499889753759, "step": 280 }, { "epoch": 0.02, "learning_rate": 9.483322432962722e-07, "logits/chosen": -2.5546927452087402, "logits/rejected": -2.160230875015259, "logps/chosen": -281.8050842285156, "logps/rejected": -225.5108642578125, "loss": 0.0281, "rewards/accuracies": 0.625, "rewards/chosen": 0.003373731393367052, "rewards/margins": 0.0011994063388556242, "rewards/rejected": 0.0021743248216807842, "step": 290 }, { "epoch": 0.02, "learning_rate": 9.810333551340747e-07, "logits/chosen": -2.341611385345459, "logits/rejected": -2.1967883110046387, "logps/chosen": -264.48260498046875, "logps/rejected": -238.79946899414062, "loss": 0.0555, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.004177080001682043, "rewards/margins": 0.002966256346553564, "rewards/rejected": 0.0012108234222978354, "step": 300 }, { "epoch": 0.02, "eval_logits/chosen": -2.3529860973358154, "eval_logits/rejected": -2.164405584335327, "eval_logps/chosen": -228.16043090820312, "eval_logps/rejected": -209.732666015625, "eval_loss": 0.05282563716173172, "eval_rewards/accuracies": 0.5864999890327454, "eval_rewards/chosen": 0.0038445049431174994, "eval_rewards/margins": 0.0019652668852359056, "eval_rewards/rejected": 0.00187923782505095, "eval_runtime": 716.7972, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 300 }, { "epoch": 0.02, "learning_rate": 1.0137344669718771e-06, "logits/chosen": -2.349937915802002, "logits/rejected": -2.36421537399292, "logps/chosen": -166.72706604003906, "logps/rejected": -155.67208862304688, "loss": 0.0368, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0027632643468677998, "rewards/margins": 0.0005956062814220786, "rewards/rejected": 0.002167657483369112, "step": 310 }, { "epoch": 0.02, "learning_rate": 1.0464355788096796e-06, "logits/chosen": -2.456150531768799, "logits/rejected": -2.0612757205963135, "logps/chosen": -220.5830078125, "logps/rejected": -191.7897491455078, "loss": 0.053, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.004582090303301811, "rewards/margins": 0.0019576200284063816, "rewards/rejected": 0.0026244705077260733, "step": 320 }, { "epoch": 0.02, "learning_rate": 1.079136690647482e-06, "logits/chosen": -2.437403440475464, "logits/rejected": -2.2046260833740234, "logps/chosen": -202.77822875976562, "logps/rejected": -175.48638916015625, "loss": 0.0913, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.005633027758449316, "rewards/margins": 0.0028553404845297337, "rewards/rejected": 0.002777687506750226, "step": 330 }, { "epoch": 0.02, "learning_rate": 1.1118378024852844e-06, "logits/chosen": -2.186697244644165, "logits/rejected": -2.3530421257019043, "logps/chosen": -150.18624877929688, "logps/rejected": -177.141357421875, "loss": 0.0374, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.003528169821947813, "rewards/margins": -0.00048769908607937396, "rewards/rejected": 0.004015869460999966, "step": 340 }, { "epoch": 0.02, "learning_rate": 1.144538914323087e-06, "logits/chosen": -2.4262590408325195, "logits/rejected": -1.9881395101547241, "logps/chosen": -317.04644775390625, "logps/rejected": -248.05477905273438, "loss": 0.0576, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.005064266733825207, "rewards/margins": 0.003854922251775861, "rewards/rejected": 0.001209344482049346, "step": 350 }, { "epoch": 0.02, "learning_rate": 1.1772400261608895e-06, "logits/chosen": -2.4901607036590576, "logits/rejected": -2.196319580078125, "logps/chosen": -218.91259765625, "logps/rejected": -192.38406372070312, "loss": 0.0478, "rewards/accuracies": 0.625, "rewards/chosen": 0.006567418575286865, "rewards/margins": 0.0036359038203954697, "rewards/rejected": 0.002931515220552683, "step": 360 }, { "epoch": 0.02, "learning_rate": 1.2099411379986922e-06, "logits/chosen": -2.1698031425476074, "logits/rejected": -2.2461369037628174, "logps/chosen": -191.3446807861328, "logps/rejected": -205.6558837890625, "loss": 0.0577, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.006152043584734201, "rewards/margins": 0.001543499412946403, "rewards/rejected": 0.004608544055372477, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.2426422498364946e-06, "logits/chosen": -2.3303966522216797, "logits/rejected": -2.04219126701355, "logps/chosen": -215.49923706054688, "logps/rejected": -162.1612548828125, "loss": 0.0469, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.006148985121399164, "rewards/margins": 0.004239398054778576, "rewards/rejected": 0.001909587299451232, "step": 380 }, { "epoch": 0.03, "learning_rate": 1.2753433616742968e-06, "logits/chosen": -2.3285553455352783, "logits/rejected": -2.248424530029297, "logps/chosen": -181.14744567871094, "logps/rejected": -245.36453247070312, "loss": 0.0819, "rewards/accuracies": 0.625, "rewards/chosen": 0.004153563175350428, "rewards/margins": 0.0035349163226783276, "rewards/rejected": 0.0006186462705954909, "step": 390 }, { "epoch": 0.03, "learning_rate": 1.3080444735120995e-06, "logits/chosen": -2.487774133682251, "logits/rejected": -2.118887186050415, "logps/chosen": -219.4935302734375, "logps/rejected": -181.0131378173828, "loss": 0.0716, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.005999167449772358, "rewards/margins": 0.006769840605556965, "rewards/rejected": -0.0007706738542765379, "step": 400 }, { "epoch": 0.03, "eval_logits/chosen": -2.354426622390747, "eval_logits/rejected": -2.1657989025115967, "eval_logps/chosen": -227.47122192382812, "eval_logps/rejected": -211.32394409179688, "eval_loss": 0.051733482629060745, "eval_rewards/accuracies": 0.5985000133514404, "eval_rewards/chosen": 0.004533740691840649, "eval_rewards/margins": 0.004245795775204897, "eval_rewards/rejected": 0.00028794523677788675, "eval_runtime": 713.7438, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 400 }, { "epoch": 0.03, "learning_rate": 1.3407455853499021e-06, "logits/chosen": -2.473179578781128, "logits/rejected": -2.3229384422302246, "logps/chosen": -256.70166015625, "logps/rejected": -225.2674102783203, "loss": 0.0422, "rewards/accuracies": 0.625, "rewards/chosen": 0.0056515904143452644, "rewards/margins": 0.004962144885212183, "rewards/rejected": 0.0006894455291330814, "step": 410 }, { "epoch": 0.03, "learning_rate": 1.3734466971877046e-06, "logits/chosen": -2.3012423515319824, "logits/rejected": -2.2007696628570557, "logps/chosen": -176.78819274902344, "logps/rejected": -175.4483184814453, "loss": 0.0476, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.005131448619067669, "rewards/margins": 0.0061288634315133095, "rewards/rejected": -0.0009974155109375715, "step": 420 }, { "epoch": 0.03, "learning_rate": 1.406147809025507e-06, "logits/chosen": -2.2929539680480957, "logits/rejected": -2.1048693656921387, "logps/chosen": -210.1273193359375, "logps/rejected": -184.78053283691406, "loss": 0.068, "rewards/accuracies": 0.625, "rewards/chosen": 0.005019956734031439, "rewards/margins": 0.007152647711336613, "rewards/rejected": -0.0021326900459825993, "step": 430 }, { "epoch": 0.03, "learning_rate": 1.4388489208633094e-06, "logits/chosen": -2.389460802078247, "logits/rejected": -2.103985548019409, "logps/chosen": -252.7582550048828, "logps/rejected": -226.56533813476562, "loss": 0.0473, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.001738998107612133, "rewards/margins": 0.003718787804245949, "rewards/rejected": -0.0019797896966338158, "step": 440 }, { "epoch": 0.03, "learning_rate": 1.471550032701112e-06, "logits/chosen": -2.40895676612854, "logits/rejected": -2.206270217895508, "logps/chosen": -188.23260498046875, "logps/rejected": -203.98873901367188, "loss": 0.0905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0014152564108371735, "rewards/margins": 0.009954740293323994, "rewards/rejected": -0.008539484813809395, "step": 450 }, { "epoch": 0.03, "learning_rate": 1.5042511445389143e-06, "logits/chosen": -2.1512463092803955, "logits/rejected": -2.198969602584839, "logps/chosen": -149.03451538085938, "logps/rejected": -216.7880096435547, "loss": 0.1097, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0019054677104577422, "rewards/margins": 0.00432746484875679, "rewards/rejected": -0.002421997021883726, "step": 460 }, { "epoch": 0.03, "learning_rate": 1.536952256376717e-06, "logits/chosen": -2.0389649868011475, "logits/rejected": -2.1013293266296387, "logps/chosen": -201.8627166748047, "logps/rejected": -261.3829345703125, "loss": 0.0698, "rewards/accuracies": 0.625, "rewards/chosen": -0.0009320884710177779, "rewards/margins": 0.01395617425441742, "rewards/rejected": -0.014888262376189232, "step": 470 }, { "epoch": 0.03, "learning_rate": 1.5696533682145194e-06, "logits/chosen": -2.432595729827881, "logits/rejected": -2.2013134956359863, "logps/chosen": -182.6690673828125, "logps/rejected": -164.38609313964844, "loss": 0.0445, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0015776865184307098, "rewards/margins": 0.01277574896812439, "rewards/rejected": -0.011198061518371105, "step": 480 }, { "epoch": 0.03, "learning_rate": 1.602354480052322e-06, "logits/chosen": -2.405876874923706, "logits/rejected": -2.3170888423919678, "logps/chosen": -291.15350341796875, "logps/rejected": -243.79837036132812, "loss": 0.0562, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.024513717740774155, "rewards/margins": 0.007596570998430252, "rewards/rejected": -0.032110292464494705, "step": 490 }, { "epoch": 0.03, "learning_rate": 1.6350555918901245e-06, "logits/chosen": -2.194425344467163, "logits/rejected": -1.8669335842132568, "logps/chosen": -249.9720916748047, "logps/rejected": -244.41177368164062, "loss": 0.0532, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03190293908119202, "rewards/margins": 0.008983219973742962, "rewards/rejected": -0.04088615998625755, "step": 500 }, { "epoch": 0.03, "eval_logits/chosen": -2.3355960845947266, "eval_logits/rejected": -2.1478939056396484, "eval_logps/chosen": -265.49725341796875, "eval_logps/rejected": -252.65875244140625, "eval_loss": 0.050587136298418045, "eval_rewards/accuracies": 0.6050000190734863, "eval_rewards/chosen": -0.033492300659418106, "eval_rewards/margins": 0.007554535288363695, "eval_rewards/rejected": -0.041046835482120514, "eval_runtime": 716.6624, "eval_samples_per_second": 2.791, "eval_steps_per_second": 1.395, "step": 500 }, { "epoch": 0.03, "learning_rate": 1.6677567037279269e-06, "logits/chosen": -2.4753518104553223, "logits/rejected": -2.1261656284332275, "logps/chosen": -328.3849792480469, "logps/rejected": -299.64111328125, "loss": 0.0149, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03212134540081024, "rewards/margins": 0.011641132645308971, "rewards/rejected": -0.04376247525215149, "step": 510 }, { "epoch": 0.03, "learning_rate": 1.7004578155657295e-06, "logits/chosen": -2.284310817718506, "logits/rejected": -2.3136582374572754, "logps/chosen": -210.1264190673828, "logps/rejected": -204.51266479492188, "loss": 0.043, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.003407080424949527, "rewards/margins": 0.014806896448135376, "rewards/rejected": -0.018213976174592972, "step": 520 }, { "epoch": 0.03, "learning_rate": 1.7331589274035318e-06, "logits/chosen": -2.1771652698516846, "logits/rejected": -2.0566582679748535, "logps/chosen": -184.3964080810547, "logps/rejected": -194.8976287841797, "loss": 0.0529, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.00046985363587737083, "rewards/margins": 0.011055449023842812, "rewards/rejected": -0.01152530312538147, "step": 530 }, { "epoch": 0.04, "learning_rate": 1.7658600392413344e-06, "logits/chosen": -2.4068570137023926, "logits/rejected": -2.246674060821533, "logps/chosen": -199.81617736816406, "logps/rejected": -194.9036102294922, "loss": 0.044, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0030392543412745, "rewards/margins": 0.012457060627639294, "rewards/rejected": -0.009417807683348656, "step": 540 }, { "epoch": 0.04, "learning_rate": 1.7985611510791368e-06, "logits/chosen": -2.409641742706299, "logits/rejected": -1.9456228017807007, "logps/chosen": -260.1620178222656, "logps/rejected": -261.0027770996094, "loss": 0.0346, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0033876735251396894, "rewards/margins": 0.017229225486516953, "rewards/rejected": -0.01384155172854662, "step": 550 }, { "epoch": 0.04, "learning_rate": 1.8312622629169393e-06, "logits/chosen": -2.3325753211975098, "logits/rejected": -2.1921544075012207, "logps/chosen": -272.4880065917969, "logps/rejected": -246.2232666015625, "loss": 0.0429, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.009033584035933018, "rewards/margins": 0.008255882188677788, "rewards/rejected": -0.01728946715593338, "step": 560 }, { "epoch": 0.04, "learning_rate": 1.8639633747547417e-06, "logits/chosen": -2.4114229679107666, "logits/rejected": -2.2572600841522217, "logps/chosen": -233.09017944335938, "logps/rejected": -206.3046112060547, "loss": 0.0726, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0039450787007808685, "rewards/margins": 0.00885126180946827, "rewards/rejected": -0.012796339578926563, "step": 570 }, { "epoch": 0.04, "learning_rate": 1.8966644865925443e-06, "logits/chosen": -2.2610888481140137, "logits/rejected": -2.2897298336029053, "logps/chosen": -240.7530059814453, "logps/rejected": -259.40338134765625, "loss": 0.0683, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.001544198370538652, "rewards/margins": 0.007255956530570984, "rewards/rejected": -0.00880015455186367, "step": 580 }, { "epoch": 0.04, "learning_rate": 1.9293655984303466e-06, "logits/chosen": -2.670855760574341, "logits/rejected": -2.255932331085205, "logps/chosen": -286.9781188964844, "logps/rejected": -213.85791015625, "loss": 0.0681, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.004011885728687048, "rewards/margins": 0.009884463623166084, "rewards/rejected": -0.013896350748836994, "step": 590 }, { "epoch": 0.04, "learning_rate": 1.9620667102681494e-06, "logits/chosen": -2.4130911827087402, "logits/rejected": -2.421830654144287, "logps/chosen": -193.62570190429688, "logps/rejected": -185.83499145507812, "loss": 0.0353, "rewards/accuracies": 0.5, "rewards/chosen": -0.004373248666524887, "rewards/margins": 0.006529881618916988, "rewards/rejected": -0.010903130285441875, "step": 600 }, { "epoch": 0.04, "eval_logits/chosen": -2.3732903003692627, "eval_logits/rejected": -2.183703660964966, "eval_logps/chosen": -237.06134033203125, "eval_logps/rejected": -229.23268127441406, "eval_loss": 0.04820794239640236, "eval_rewards/accuracies": 0.6039999723434448, "eval_rewards/chosen": -0.0050563993863761425, "eval_rewards/margins": 0.012564396485686302, "eval_rewards/rejected": -0.017620796337723732, "eval_runtime": 715.3828, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 600 }, { "epoch": 0.04, "learning_rate": 1.994767822105952e-06, "logits/chosen": -2.329235553741455, "logits/rejected": -2.2202258110046387, "logps/chosen": -184.5780792236328, "logps/rejected": -207.1245880126953, "loss": 0.0897, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.009374430403113365, "rewards/margins": 0.01611148752272129, "rewards/rejected": -0.025485917925834656, "step": 610 }, { "epoch": 0.04, "learning_rate": 2.0274689339437543e-06, "logits/chosen": -2.229123830795288, "logits/rejected": -2.021873712539673, "logps/chosen": -285.5155029296875, "logps/rejected": -264.87847900390625, "loss": 0.0318, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.00682572927325964, "rewards/margins": 0.01471722312271595, "rewards/rejected": -0.021542951464653015, "step": 620 }, { "epoch": 0.04, "learning_rate": 2.0601700457815567e-06, "logits/chosen": -2.3667750358581543, "logits/rejected": -2.003872871398926, "logps/chosen": -283.3835144042969, "logps/rejected": -247.8993682861328, "loss": 0.0417, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.024028822779655457, "rewards/margins": 0.008029013872146606, "rewards/rejected": -0.03205784037709236, "step": 630 }, { "epoch": 0.04, "learning_rate": 2.092871157619359e-06, "logits/chosen": -2.4169487953186035, "logits/rejected": -2.2340145111083984, "logps/chosen": -189.7291717529297, "logps/rejected": -219.2557830810547, "loss": 0.0359, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.013066994957625866, "rewards/margins": 0.015815146267414093, "rewards/rejected": -0.028882140293717384, "step": 640 }, { "epoch": 0.04, "learning_rate": 2.1255722694571616e-06, "logits/chosen": -2.4433088302612305, "logits/rejected": -2.0391018390655518, "logps/chosen": -289.28326416015625, "logps/rejected": -221.1160125732422, "loss": 0.0474, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.010388649068772793, "rewards/margins": 0.02480030432343483, "rewards/rejected": -0.035188958048820496, "step": 650 }, { "epoch": 0.04, "learning_rate": 2.158273381294964e-06, "logits/chosen": -2.368497371673584, "logits/rejected": -2.26300311088562, "logps/chosen": -229.018798828125, "logps/rejected": -243.78524780273438, "loss": 0.0372, "rewards/accuracies": 0.5, "rewards/chosen": -0.016591787338256836, "rewards/margins": 0.009254980832338333, "rewards/rejected": -0.02584676817059517, "step": 660 }, { "epoch": 0.04, "learning_rate": 2.190974493132767e-06, "logits/chosen": -2.3458566665649414, "logits/rejected": -2.031294345855713, "logps/chosen": -254.14852905273438, "logps/rejected": -220.53634643554688, "loss": 0.0554, "rewards/accuracies": 0.625, "rewards/chosen": -0.01646944135427475, "rewards/margins": 0.022644639015197754, "rewards/rejected": -0.039114080369472504, "step": 670 }, { "epoch": 0.04, "learning_rate": 2.223675604970569e-06, "logits/chosen": -2.39654541015625, "logits/rejected": -2.1964592933654785, "logps/chosen": -230.85525512695312, "logps/rejected": -232.8872833251953, "loss": 0.0391, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.020120983943343163, "rewards/margins": 0.019362308084964752, "rewards/rejected": -0.03948329761624336, "step": 680 }, { "epoch": 0.05, "learning_rate": 2.2563767168083718e-06, "logits/chosen": -2.4727702140808105, "logits/rejected": -1.977368712425232, "logps/chosen": -275.8236389160156, "logps/rejected": -237.82534790039062, "loss": 0.0638, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.018571864813566208, "rewards/margins": 0.025034001097083092, "rewards/rejected": -0.04360586777329445, "step": 690 }, { "epoch": 0.05, "learning_rate": 2.289077828646174e-06, "logits/chosen": -2.2945492267608643, "logits/rejected": -2.0689382553100586, "logps/chosen": -278.28839111328125, "logps/rejected": -259.5457458496094, "loss": 0.0607, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.03922739624977112, "rewards/margins": 0.005962969269603491, "rewards/rejected": -0.04519036412239075, "step": 700 }, { "epoch": 0.05, "eval_logits/chosen": -2.338188886642456, "eval_logits/rejected": -2.149794340133667, "eval_logps/chosen": -249.42825317382812, "eval_logps/rejected": -251.0581512451172, "eval_loss": 0.044174712151288986, "eval_rewards/accuracies": 0.6154999732971191, "eval_rewards/chosen": -0.017423292621970177, "eval_rewards/margins": 0.022022953256964684, "eval_rewards/rejected": -0.03944624215364456, "eval_runtime": 715.318, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 700 }, { "epoch": 0.05, "learning_rate": 2.3217789404839766e-06, "logits/chosen": -2.203415632247925, "logits/rejected": -2.2807836532592773, "logps/chosen": -177.1423797607422, "logps/rejected": -233.90933227539062, "loss": 0.037, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.020311249420046806, "rewards/margins": 0.009824239648878574, "rewards/rejected": -0.030135491862893105, "step": 710 }, { "epoch": 0.05, "learning_rate": 2.354480052321779e-06, "logits/chosen": -2.5478432178497314, "logits/rejected": -2.1129870414733887, "logps/chosen": -272.4012451171875, "logps/rejected": -239.3954620361328, "loss": 0.0632, "rewards/accuracies": 0.625, "rewards/chosen": -0.01401180773973465, "rewards/margins": 0.026635561138391495, "rewards/rejected": -0.040647368878126144, "step": 720 }, { "epoch": 0.05, "learning_rate": 2.3871811641595815e-06, "logits/chosen": -2.3780322074890137, "logits/rejected": -2.1917457580566406, "logps/chosen": -264.0068054199219, "logps/rejected": -223.34097290039062, "loss": 0.0259, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.017214341089129448, "rewards/margins": 0.017990056425333023, "rewards/rejected": -0.03520439565181732, "step": 730 }, { "epoch": 0.05, "learning_rate": 2.4198822759973843e-06, "logits/chosen": -2.219491481781006, "logits/rejected": -2.2347145080566406, "logps/chosen": -210.78140258789062, "logps/rejected": -248.0644073486328, "loss": 0.069, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02017531357705593, "rewards/margins": 0.023350484669208527, "rewards/rejected": -0.043525803834199905, "step": 740 }, { "epoch": 0.05, "learning_rate": 2.4525833878351864e-06, "logits/chosen": -2.493913412094116, "logits/rejected": -2.2906222343444824, "logps/chosen": -267.8315734863281, "logps/rejected": -221.9871368408203, "loss": 0.038, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.015030624344944954, "rewards/margins": 0.02043559029698372, "rewards/rejected": -0.03546621650457382, "step": 750 }, { "epoch": 0.05, "learning_rate": 2.4852844996729892e-06, "logits/chosen": -2.2203714847564697, "logits/rejected": -2.123723030090332, "logps/chosen": -264.26409912109375, "logps/rejected": -307.8015441894531, "loss": 0.0663, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.02513979747891426, "rewards/margins": 0.04207928106188774, "rewards/rejected": -0.067219078540802, "step": 760 }, { "epoch": 0.05, "learning_rate": 2.5179856115107916e-06, "logits/chosen": -2.3337619304656982, "logits/rejected": -2.006981372833252, "logps/chosen": -311.7841796875, "logps/rejected": -294.84442138671875, "loss": 0.0578, "rewards/accuracies": 0.625, "rewards/chosen": -0.0382879376411438, "rewards/margins": 0.03700857609510422, "rewards/rejected": -0.07529651373624802, "step": 770 }, { "epoch": 0.05, "learning_rate": 2.5506867233485937e-06, "logits/chosen": -2.4037652015686035, "logits/rejected": -1.9791380167007446, "logps/chosen": -307.6341247558594, "logps/rejected": -317.4861755371094, "loss": 0.052, "rewards/accuracies": 0.75, "rewards/chosen": -0.041938185691833496, "rewards/margins": 0.06735068559646606, "rewards/rejected": -0.10928885638713837, "step": 780 }, { "epoch": 0.05, "learning_rate": 2.5833878351863965e-06, "logits/chosen": -2.4779062271118164, "logits/rejected": -2.4091298580169678, "logps/chosen": -310.5218505859375, "logps/rejected": -347.1013488769531, "loss": 0.0353, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05852390080690384, "rewards/margins": 0.03919988125562668, "rewards/rejected": -0.09772378951311111, "step": 790 }, { "epoch": 0.05, "learning_rate": 2.616088947024199e-06, "logits/chosen": -2.1538503170013428, "logits/rejected": -2.3353419303894043, "logps/chosen": -280.5336608886719, "logps/rejected": -338.0328674316406, "loss": 0.0373, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.08404421806335449, "rewards/margins": 0.032972823828458786, "rewards/rejected": -0.11701703071594238, "step": 800 }, { "epoch": 0.05, "eval_logits/chosen": -2.352494716644287, "eval_logits/rejected": -2.1610326766967773, "eval_logps/chosen": -330.5710754394531, "eval_logps/rejected": -346.9683532714844, "eval_loss": 0.044998861849308014, "eval_rewards/accuracies": 0.5899999737739563, "eval_rewards/chosen": -0.09856612235307693, "eval_rewards/margins": 0.036790307611227036, "eval_rewards/rejected": -0.13535642623901367, "eval_runtime": 714.8384, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 800 }, { "epoch": 0.05, "learning_rate": 2.6487900588620014e-06, "logits/chosen": -2.1477859020233154, "logits/rejected": -1.8915302753448486, "logps/chosen": -284.8598327636719, "logps/rejected": -264.51470947265625, "loss": 0.0527, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08474426716566086, "rewards/margins": 0.021906714886426926, "rewards/rejected": -0.10665098577737808, "step": 810 }, { "epoch": 0.05, "learning_rate": 2.6814911706998042e-06, "logits/chosen": -2.3841605186462402, "logits/rejected": -2.1940865516662598, "logps/chosen": -273.18402099609375, "logps/rejected": -254.6923065185547, "loss": 0.0295, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0478447861969471, "rewards/margins": 0.012262302450835705, "rewards/rejected": -0.06010708957910538, "step": 820 }, { "epoch": 0.05, "learning_rate": 2.7141922825376067e-06, "logits/chosen": -2.266127109527588, "logits/rejected": -2.2607052326202393, "logps/chosen": -291.67962646484375, "logps/rejected": -340.18487548828125, "loss": 0.0306, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.03375549986958504, "rewards/margins": 0.038574304431676865, "rewards/rejected": -0.0723298043012619, "step": 830 }, { "epoch": 0.05, "learning_rate": 2.746893394375409e-06, "logits/chosen": -2.391619920730591, "logits/rejected": -2.279348611831665, "logps/chosen": -268.6223449707031, "logps/rejected": -278.1440124511719, "loss": 0.0344, "rewards/accuracies": 0.625, "rewards/chosen": -0.034227922558784485, "rewards/margins": 0.011302990838885307, "rewards/rejected": -0.04553091898560524, "step": 840 }, { "epoch": 0.06, "learning_rate": 2.779594506213211e-06, "logits/chosen": -2.2835421562194824, "logits/rejected": -2.0344347953796387, "logps/chosen": -221.12557983398438, "logps/rejected": -229.5432586669922, "loss": 0.0525, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0312667116522789, "rewards/margins": 0.008730956353247166, "rewards/rejected": -0.03999767452478409, "step": 850 }, { "epoch": 0.06, "learning_rate": 2.812295618051014e-06, "logits/chosen": -2.460794687271118, "logits/rejected": -2.2996268272399902, "logps/chosen": -301.256591796875, "logps/rejected": -256.00347900390625, "loss": 0.0307, "rewards/accuracies": 0.75, "rewards/chosen": -0.020295431837439537, "rewards/margins": 0.016474032774567604, "rewards/rejected": -0.03676946088671684, "step": 860 }, { "epoch": 0.06, "learning_rate": 2.8449967298888164e-06, "logits/chosen": -2.3527684211730957, "logits/rejected": -2.1350655555725098, "logps/chosen": -210.0928192138672, "logps/rejected": -188.89759826660156, "loss": 0.04, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.033268801867961884, "rewards/margins": 0.0068631707690656185, "rewards/rejected": -0.04013197496533394, "step": 870 }, { "epoch": 0.06, "learning_rate": 2.877697841726619e-06, "logits/chosen": -2.404083251953125, "logits/rejected": -2.303309917449951, "logps/chosen": -238.62203979492188, "logps/rejected": -231.1587371826172, "loss": 0.0513, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.019786234945058823, "rewards/margins": 0.00935150496661663, "rewards/rejected": -0.029137736186385155, "step": 880 }, { "epoch": 0.06, "learning_rate": 2.9103989535644217e-06, "logits/chosen": -2.3009395599365234, "logits/rejected": -2.428868293762207, "logps/chosen": -237.9799346923828, "logps/rejected": -284.10015869140625, "loss": 0.0362, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.022496307268738747, "rewards/margins": 0.015589781105518341, "rewards/rejected": -0.03808609023690224, "step": 890 }, { "epoch": 0.06, "learning_rate": 2.943100065402224e-06, "logits/chosen": -2.334961414337158, "logits/rejected": -2.1831624507904053, "logps/chosen": -320.94158935546875, "logps/rejected": -319.6193542480469, "loss": 0.0333, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.028298426419496536, "rewards/margins": 0.013994457200169563, "rewards/rejected": -0.04229288175702095, "step": 900 }, { "epoch": 0.06, "eval_logits/chosen": -2.297419786453247, "eval_logits/rejected": -2.113001823425293, "eval_logps/chosen": -255.12811279296875, "eval_logps/rejected": -253.471923828125, "eval_loss": 0.04525408521294594, "eval_rewards/accuracies": 0.6065000295639038, "eval_rewards/chosen": -0.023123158141970634, "eval_rewards/margins": 0.018736863508820534, "eval_rewards/rejected": -0.04186002165079117, "eval_runtime": 717.7801, "eval_samples_per_second": 2.786, "eval_steps_per_second": 1.393, "step": 900 }, { "epoch": 0.06, "learning_rate": 2.9758011772400266e-06, "logits/chosen": -2.217207193374634, "logits/rejected": -2.228949785232544, "logps/chosen": -285.60479736328125, "logps/rejected": -311.3917236328125, "loss": 0.0222, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02829563245177269, "rewards/margins": 0.01841229386627674, "rewards/rejected": -0.04670792445540428, "step": 910 }, { "epoch": 0.06, "learning_rate": 3.0085022890778286e-06, "logits/chosen": -2.255218029022217, "logits/rejected": -2.0112428665161133, "logps/chosen": -199.38589477539062, "logps/rejected": -193.07144165039062, "loss": 0.0459, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.023646341636776924, "rewards/margins": 0.020609021186828613, "rewards/rejected": -0.04425536096096039, "step": 920 }, { "epoch": 0.06, "learning_rate": 3.0412034009156314e-06, "logits/chosen": -2.2252793312072754, "logits/rejected": -2.3937149047851562, "logps/chosen": -265.0218811035156, "logps/rejected": -286.0140686035156, "loss": 0.033, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02933562360703945, "rewards/margins": 0.02525383234024048, "rewards/rejected": -0.05458945780992508, "step": 930 }, { "epoch": 0.06, "learning_rate": 3.073904512753434e-06, "logits/chosen": -2.394636392593384, "logits/rejected": -2.0322272777557373, "logps/chosen": -263.18890380859375, "logps/rejected": -262.29046630859375, "loss": 0.0399, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.033540278673172, "rewards/margins": 0.034029845148324966, "rewards/rejected": -0.06757012009620667, "step": 940 }, { "epoch": 0.06, "learning_rate": 3.1066056245912363e-06, "logits/chosen": -2.2938437461853027, "logits/rejected": -2.3504161834716797, "logps/chosen": -282.0276184082031, "logps/rejected": -293.95733642578125, "loss": 0.0371, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05104362219572067, "rewards/margins": 0.04388875514268875, "rewards/rejected": -0.09493237733840942, "step": 950 }, { "epoch": 0.06, "learning_rate": 3.1393067364290387e-06, "logits/chosen": -2.2875752449035645, "logits/rejected": -2.063643217086792, "logps/chosen": -284.5735168457031, "logps/rejected": -292.62811279296875, "loss": 0.0535, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05944675952196121, "rewards/margins": 0.04664020612835884, "rewards/rejected": -0.10608696937561035, "step": 960 }, { "epoch": 0.06, "learning_rate": 3.1720078482668416e-06, "logits/chosen": -2.2977986335754395, "logits/rejected": -2.065509557723999, "logps/chosen": -272.6510314941406, "logps/rejected": -260.10699462890625, "loss": 0.0323, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06108039617538452, "rewards/margins": 0.0345628447830677, "rewards/rejected": -0.09564323723316193, "step": 970 }, { "epoch": 0.06, "learning_rate": 3.204708960104644e-06, "logits/chosen": -2.363454818725586, "logits/rejected": -2.023385524749756, "logps/chosen": -265.1027526855469, "logps/rejected": -270.31304931640625, "loss": 0.0438, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.047700025141239166, "rewards/margins": 0.03779798373579979, "rewards/rejected": -0.08549802005290985, "step": 980 }, { "epoch": 0.06, "learning_rate": 3.237410071942446e-06, "logits/chosen": -2.3416037559509277, "logits/rejected": -2.0702126026153564, "logps/chosen": -265.82952880859375, "logps/rejected": -239.28909301757812, "loss": 0.0274, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.05946846678853035, "rewards/margins": 0.007023364305496216, "rewards/rejected": -0.06649182736873627, "step": 990 }, { "epoch": 0.07, "learning_rate": 3.270111183780249e-06, "logits/chosen": -2.1836626529693604, "logits/rejected": -1.975669503211975, "logps/chosen": -268.7564392089844, "logps/rejected": -282.3274841308594, "loss": 0.0469, "rewards/accuracies": 0.625, "rewards/chosen": -0.0687328651547432, "rewards/margins": 0.0315740592777729, "rewards/rejected": -0.1003069281578064, "step": 1000 }, { "epoch": 0.07, "eval_logits/chosen": -2.190696954727173, "eval_logits/rejected": -2.0107743740081787, "eval_logps/chosen": -298.4167785644531, "eval_logps/rejected": -311.0526428222656, "eval_loss": 0.04081423953175545, "eval_rewards/accuracies": 0.6019999980926514, "eval_rewards/chosen": -0.06641184538602829, "eval_rewards/margins": 0.03302890807390213, "eval_rewards/rejected": -0.09944074600934982, "eval_runtime": 715.0654, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 1000 }, { "epoch": 0.07, "learning_rate": 3.3028122956180513e-06, "logits/chosen": -2.033742904663086, "logits/rejected": -2.0922160148620605, "logps/chosen": -274.0719299316406, "logps/rejected": -337.2372741699219, "loss": 0.0385, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06197401136159897, "rewards/margins": 0.035696420818567276, "rewards/rejected": -0.09767042100429535, "step": 1010 }, { "epoch": 0.07, "learning_rate": 3.3355134074558538e-06, "logits/chosen": -2.043740749359131, "logits/rejected": -1.933270812034607, "logps/chosen": -279.52349853515625, "logps/rejected": -292.7263488769531, "loss": 0.0394, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05879075080156326, "rewards/margins": 0.03879036381840706, "rewards/rejected": -0.09758111089468002, "step": 1020 }, { "epoch": 0.07, "learning_rate": 3.368214519293656e-06, "logits/chosen": -1.9517303705215454, "logits/rejected": -1.7242132425308228, "logps/chosen": -249.89022827148438, "logps/rejected": -258.21319580078125, "loss": 0.0403, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.050919920206069946, "rewards/margins": 0.02922142669558525, "rewards/rejected": -0.0801413506269455, "step": 1030 }, { "epoch": 0.07, "learning_rate": 3.400915631131459e-06, "logits/chosen": -1.8200397491455078, "logits/rejected": -1.8474419116973877, "logps/chosen": -274.6683044433594, "logps/rejected": -358.3338317871094, "loss": 0.0592, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07669584453105927, "rewards/margins": 0.05501421540975571, "rewards/rejected": -0.13171006739139557, "step": 1040 }, { "epoch": 0.07, "learning_rate": 3.4336167429692615e-06, "logits/chosen": -1.9922349452972412, "logits/rejected": -1.7748531103134155, "logps/chosen": -305.8026428222656, "logps/rejected": -334.864501953125, "loss": 0.0222, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09125224500894547, "rewards/margins": 0.04531567543745041, "rewards/rejected": -0.13656790554523468, "step": 1050 }, { "epoch": 0.07, "learning_rate": 3.4663178548070635e-06, "logits/chosen": -1.7797714471817017, "logits/rejected": -1.8289556503295898, "logps/chosen": -332.19036865234375, "logps/rejected": -364.2179870605469, "loss": 0.0437, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1172364130616188, "rewards/margins": 0.04846926033496857, "rewards/rejected": -0.16570568084716797, "step": 1060 }, { "epoch": 0.07, "learning_rate": 3.499018966644866e-06, "logits/chosen": -1.902804970741272, "logits/rejected": -1.7526041269302368, "logps/chosen": -330.8368835449219, "logps/rejected": -360.529296875, "loss": 0.0494, "rewards/accuracies": 0.625, "rewards/chosen": -0.13901188969612122, "rewards/margins": 0.036504752933979034, "rewards/rejected": -0.17551663517951965, "step": 1070 }, { "epoch": 0.07, "learning_rate": 3.531720078482669e-06, "logits/chosen": -1.804525375366211, "logits/rejected": -1.4779537916183472, "logps/chosen": -339.5526123046875, "logps/rejected": -377.75567626953125, "loss": 0.0708, "rewards/accuracies": 0.5, "rewards/chosen": -0.11368291079998016, "rewards/margins": 0.04943431168794632, "rewards/rejected": -0.1631172001361847, "step": 1080 }, { "epoch": 0.07, "learning_rate": 3.5644211903204712e-06, "logits/chosen": -1.6263682842254639, "logits/rejected": -1.628077745437622, "logps/chosen": -374.14093017578125, "logps/rejected": -415.0753479003906, "loss": 0.052, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17525847256183624, "rewards/margins": 0.03812093287706375, "rewards/rejected": -0.2133794128894806, "step": 1090 }, { "epoch": 0.07, "learning_rate": 3.5971223021582737e-06, "logits/chosen": -1.7365009784698486, "logits/rejected": -1.5972657203674316, "logps/chosen": -444.3887634277344, "logps/rejected": -403.251220703125, "loss": 0.0387, "rewards/accuracies": 0.625, "rewards/chosen": -0.1885879784822464, "rewards/margins": 0.015977730974555016, "rewards/rejected": -0.20456571877002716, "step": 1100 }, { "epoch": 0.07, "eval_logits/chosen": -1.5583889484405518, "eval_logits/rejected": -1.4114694595336914, "eval_logps/chosen": -422.05035400390625, "eval_logps/rejected": -435.65924072265625, "eval_loss": 0.041607096791267395, "eval_rewards/accuracies": 0.6029999852180481, "eval_rewards/chosen": -0.19004540145397186, "eval_rewards/margins": 0.034001998603343964, "eval_rewards/rejected": -0.22404739260673523, "eval_runtime": 716.6865, "eval_samples_per_second": 2.791, "eval_steps_per_second": 1.395, "step": 1100 }, { "epoch": 0.07, "learning_rate": 3.6298234139960765e-06, "logits/chosen": -1.6406099796295166, "logits/rejected": -1.3539705276489258, "logps/chosen": -393.76348876953125, "logps/rejected": -371.8834228515625, "loss": 0.0653, "rewards/accuracies": 0.5, "rewards/chosen": -0.17599442601203918, "rewards/margins": 0.027791261672973633, "rewards/rejected": -0.203785702586174, "step": 1110 }, { "epoch": 0.07, "learning_rate": 3.6625245258338785e-06, "logits/chosen": -1.4124555587768555, "logits/rejected": -1.3012864589691162, "logps/chosen": -413.30194091796875, "logps/rejected": -545.9480590820312, "loss": 0.0598, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17452356219291687, "rewards/margins": 0.05618544667959213, "rewards/rejected": -0.2307090312242508, "step": 1120 }, { "epoch": 0.07, "learning_rate": 3.695225637671681e-06, "logits/chosen": -1.602260947227478, "logits/rejected": -1.4630420207977295, "logps/chosen": -338.76654052734375, "logps/rejected": -343.47607421875, "loss": 0.0454, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.13148997724056244, "rewards/margins": 0.03778548166155815, "rewards/rejected": -0.1692754626274109, "step": 1130 }, { "epoch": 0.07, "learning_rate": 3.7279267495094834e-06, "logits/chosen": -1.611236810684204, "logits/rejected": -1.4784390926361084, "logps/chosen": -281.90228271484375, "logps/rejected": -367.1201171875, "loss": 0.0379, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12147539854049683, "rewards/margins": 0.07509257644414902, "rewards/rejected": -0.19656798243522644, "step": 1140 }, { "epoch": 0.08, "learning_rate": 3.7606278613472863e-06, "logits/chosen": -1.4988186359405518, "logits/rejected": -1.2309846878051758, "logps/chosen": -469.00244140625, "logps/rejected": -422.4840393066406, "loss": 0.0602, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18673118948936462, "rewards/margins": 0.031049732118844986, "rewards/rejected": -0.2177809178829193, "step": 1150 }, { "epoch": 0.08, "learning_rate": 3.7933289731850887e-06, "logits/chosen": -1.3085658550262451, "logits/rejected": -1.1417553424835205, "logps/chosen": -366.54217529296875, "logps/rejected": -388.99591064453125, "loss": 0.0296, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.137981578707695, "rewards/margins": 0.05071201175451279, "rewards/rejected": -0.1886935979127884, "step": 1160 }, { "epoch": 0.08, "learning_rate": 3.826030085022891e-06, "logits/chosen": -1.5232809782028198, "logits/rejected": -1.3220559358596802, "logps/chosen": -368.5690002441406, "logps/rejected": -374.45074462890625, "loss": 0.0401, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.12091922760009766, "rewards/margins": 0.042033832520246506, "rewards/rejected": -0.16295306384563446, "step": 1170 }, { "epoch": 0.08, "learning_rate": 3.858731196860693e-06, "logits/chosen": -1.68634033203125, "logits/rejected": -1.3274638652801514, "logps/chosen": -342.38568115234375, "logps/rejected": -318.9209899902344, "loss": 0.0412, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09155227988958359, "rewards/margins": 0.018770882859826088, "rewards/rejected": -0.11032316833734512, "step": 1180 }, { "epoch": 0.08, "learning_rate": 3.891432308698496e-06, "logits/chosen": -1.4322830438613892, "logits/rejected": -1.401185393333435, "logps/chosen": -277.618408203125, "logps/rejected": -326.53717041015625, "loss": 0.0674, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0823880210518837, "rewards/margins": 0.023673560470342636, "rewards/rejected": -0.10606157779693604, "step": 1190 }, { "epoch": 0.08, "learning_rate": 3.924133420536299e-06, "logits/chosen": -1.4980944395065308, "logits/rejected": -1.308347463607788, "logps/chosen": -275.05926513671875, "logps/rejected": -267.4244384765625, "loss": 0.0377, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.09291459619998932, "rewards/margins": 0.025767818093299866, "rewards/rejected": -0.11868239939212799, "step": 1200 }, { "epoch": 0.08, "eval_logits/chosen": -1.4831069707870483, "eval_logits/rejected": -1.3324590921401978, "eval_logps/chosen": -339.6366271972656, "eval_logps/rejected": -362.94146728515625, "eval_loss": 0.04094835743308067, "eval_rewards/accuracies": 0.6110000014305115, "eval_rewards/chosen": -0.10763171315193176, "eval_rewards/margins": 0.04369783028960228, "eval_rewards/rejected": -0.15132954716682434, "eval_runtime": 715.2397, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 1200 }, { "epoch": 0.08, "learning_rate": 3.956834532374101e-06, "logits/chosen": -1.4202970266342163, "logits/rejected": -1.0601954460144043, "logps/chosen": -346.89642333984375, "logps/rejected": -379.48968505859375, "loss": 0.0403, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1416337490081787, "rewards/margins": 0.07654460519552231, "rewards/rejected": -0.21817834675312042, "step": 1210 }, { "epoch": 0.08, "learning_rate": 3.989535644211904e-06, "logits/chosen": -1.0082813501358032, "logits/rejected": -0.7807806134223938, "logps/chosen": -431.451904296875, "logps/rejected": -483.2403259277344, "loss": 0.0397, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21432462334632874, "rewards/margins": 0.07440946251153946, "rewards/rejected": -0.28873410820961, "step": 1220 }, { "epoch": 0.08, "learning_rate": 4.022236756049706e-06, "logits/chosen": -1.1275080442428589, "logits/rejected": -0.9325186014175415, "logps/chosen": -482.34100341796875, "logps/rejected": -496.7520446777344, "loss": 0.0278, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2052038013935089, "rewards/margins": 0.04688655957579613, "rewards/rejected": -0.2520903944969177, "step": 1230 }, { "epoch": 0.08, "learning_rate": 4.054937867887509e-06, "logits/chosen": -1.0770269632339478, "logits/rejected": -0.8429039716720581, "logps/chosen": -410.4444885253906, "logps/rejected": -466.2318420410156, "loss": 0.021, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18617768585681915, "rewards/margins": 0.06657584011554718, "rewards/rejected": -0.2527535557746887, "step": 1240 }, { "epoch": 0.08, "learning_rate": 4.087638979725311e-06, "logits/chosen": -1.1457629203796387, "logits/rejected": -1.2210872173309326, "logps/chosen": -386.664794921875, "logps/rejected": -405.68023681640625, "loss": 0.0345, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15882499516010284, "rewards/margins": 0.01756046526134014, "rewards/rejected": -0.17638546228408813, "step": 1250 }, { "epoch": 0.08, "learning_rate": 4.1203400915631135e-06, "logits/chosen": -1.1347625255584717, "logits/rejected": -1.0045520067214966, "logps/chosen": -396.68096923828125, "logps/rejected": -395.95477294921875, "loss": 0.0719, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1593499481678009, "rewards/margins": 0.027554649859666824, "rewards/rejected": -0.18690460920333862, "step": 1260 }, { "epoch": 0.08, "learning_rate": 4.153041203400916e-06, "logits/chosen": -1.586891770362854, "logits/rejected": -1.4639778137207031, "logps/chosen": -381.02789306640625, "logps/rejected": -379.8446044921875, "loss": 0.0089, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11660557985305786, "rewards/margins": 0.023451363667845726, "rewards/rejected": -0.14005693793296814, "step": 1270 }, { "epoch": 0.08, "learning_rate": 4.185742315238718e-06, "logits/chosen": -1.4819332361221313, "logits/rejected": -1.2232481241226196, "logps/chosen": -297.2691650390625, "logps/rejected": -337.279052734375, "loss": 0.0314, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09916757047176361, "rewards/margins": 0.051876842975616455, "rewards/rejected": -0.15104439854621887, "step": 1280 }, { "epoch": 0.08, "learning_rate": 4.218443427076521e-06, "logits/chosen": -1.3263565301895142, "logits/rejected": -1.3113234043121338, "logps/chosen": -322.3730773925781, "logps/rejected": -345.38043212890625, "loss": 0.0538, "rewards/accuracies": 0.5, "rewards/chosen": -0.13961449265480042, "rewards/margins": 0.021536339074373245, "rewards/rejected": -0.16115082800388336, "step": 1290 }, { "epoch": 0.09, "learning_rate": 4.251144538914323e-06, "logits/chosen": -1.3436750173568726, "logits/rejected": -1.2451797723770142, "logps/chosen": -405.1773986816406, "logps/rejected": -432.4930725097656, "loss": 0.0414, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18458710610866547, "rewards/margins": 0.040511369705200195, "rewards/rejected": -0.22509846091270447, "step": 1300 }, { "epoch": 0.09, "eval_logits/chosen": -1.342952847480774, "eval_logits/rejected": -1.202413558959961, "eval_logps/chosen": -424.3460693359375, "eval_logps/rejected": -453.0328063964844, "eval_loss": 0.03527890145778656, "eval_rewards/accuracies": 0.6159999966621399, "eval_rewards/chosen": -0.19234108924865723, "eval_rewards/margins": 0.049079835414886475, "eval_rewards/rejected": -0.2414209097623825, "eval_runtime": 717.3185, "eval_samples_per_second": 2.788, "eval_steps_per_second": 1.394, "step": 1300 }, { "epoch": 0.09, "learning_rate": 4.283845650752126e-06, "logits/chosen": -1.3643449544906616, "logits/rejected": -1.1608686447143555, "logps/chosen": -485.47137451171875, "logps/rejected": -476.75323486328125, "loss": 0.0327, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19925940036773682, "rewards/margins": 0.05547332018613815, "rewards/rejected": -0.25473272800445557, "step": 1310 }, { "epoch": 0.09, "learning_rate": 4.316546762589928e-06, "logits/chosen": -1.4823538064956665, "logits/rejected": -1.4008772373199463, "logps/chosen": -365.8683166503906, "logps/rejected": -384.3430480957031, "loss": 0.0507, "rewards/accuracies": 0.5, "rewards/chosen": -0.1579407900571823, "rewards/margins": 0.05277292802929878, "rewards/rejected": -0.21071374416351318, "step": 1320 }, { "epoch": 0.09, "learning_rate": 4.349247874427731e-06, "logits/chosen": -1.411668062210083, "logits/rejected": -1.158512830734253, "logps/chosen": -412.04705810546875, "logps/rejected": -526.25048828125, "loss": 0.0265, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18551301956176758, "rewards/margins": 0.0741208866238594, "rewards/rejected": -0.2596338987350464, "step": 1330 }, { "epoch": 0.09, "learning_rate": 4.381948986265534e-06, "logits/chosen": -1.5577880144119263, "logits/rejected": -1.3671444654464722, "logps/chosen": -476.67529296875, "logps/rejected": -523.2205200195312, "loss": 0.0324, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21383056044578552, "rewards/margins": 0.05955230072140694, "rewards/rejected": -0.27338287234306335, "step": 1340 }, { "epoch": 0.09, "learning_rate": 4.414650098103336e-06, "logits/chosen": -1.754683494567871, "logits/rejected": -1.5099554061889648, "logps/chosen": -491.84014892578125, "logps/rejected": -532.5946044921875, "loss": 0.0316, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22988763451576233, "rewards/margins": 0.0631786435842514, "rewards/rejected": -0.29306623339653015, "step": 1350 }, { "epoch": 0.09, "learning_rate": 4.447351209941138e-06, "logits/chosen": -1.314060926437378, "logits/rejected": -1.199831247329712, "logps/chosen": -517.6043090820312, "logps/rejected": -528.053955078125, "loss": 0.0449, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.30793967843055725, "rewards/margins": 0.028591204434633255, "rewards/rejected": -0.336530864238739, "step": 1360 }, { "epoch": 0.09, "learning_rate": 4.480052321778941e-06, "logits/chosen": -1.1885846853256226, "logits/rejected": -1.1603362560272217, "logps/chosen": -529.353759765625, "logps/rejected": -545.3722534179688, "loss": 0.0491, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3024435043334961, "rewards/margins": 0.03671448305249214, "rewards/rejected": -0.33915799856185913, "step": 1370 }, { "epoch": 0.09, "learning_rate": 4.5127534336167435e-06, "logits/chosen": -1.2368078231811523, "logits/rejected": -1.0605965852737427, "logps/chosen": -527.9391479492188, "logps/rejected": -527.363525390625, "loss": 0.054, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.28076520562171936, "rewards/margins": 0.04308442026376724, "rewards/rejected": -0.3238496482372284, "step": 1380 }, { "epoch": 0.09, "learning_rate": 4.5454545454545455e-06, "logits/chosen": -1.3435264825820923, "logits/rejected": -1.3093140125274658, "logps/chosen": -386.8943786621094, "logps/rejected": -409.5850830078125, "loss": 0.0645, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21743564307689667, "rewards/margins": 0.03289975970983505, "rewards/rejected": -0.2503353953361511, "step": 1390 }, { "epoch": 0.09, "learning_rate": 4.578155657292348e-06, "logits/chosen": -1.5934984683990479, "logits/rejected": -1.5155658721923828, "logps/chosen": -394.59716796875, "logps/rejected": -465.7230529785156, "loss": 0.0363, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13919150829315186, "rewards/margins": 0.06220396235585213, "rewards/rejected": -0.20139548182487488, "step": 1400 }, { "epoch": 0.09, "eval_logits/chosen": -1.4962059259414673, "eval_logits/rejected": -1.3508222103118896, "eval_logps/chosen": -376.28082275390625, "eval_logps/rejected": -395.2075500488281, "eval_loss": 0.03516998887062073, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.14427584409713745, "eval_rewards/margins": 0.03931979089975357, "eval_rewards/rejected": -0.18359561264514923, "eval_runtime": 713.4356, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 1400 }, { "epoch": 0.09, "learning_rate": 4.610856769130151e-06, "logits/chosen": -1.62082040309906, "logits/rejected": -1.5904090404510498, "logps/chosen": -379.0682067871094, "logps/rejected": -386.9085388183594, "loss": 0.0119, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13992397487163544, "rewards/margins": 0.0373418964445591, "rewards/rejected": -0.17726585268974304, "step": 1410 }, { "epoch": 0.09, "learning_rate": 4.643557880967953e-06, "logits/chosen": -1.5567963123321533, "logits/rejected": -1.3895817995071411, "logps/chosen": -327.0523986816406, "logps/rejected": -361.86126708984375, "loss": 0.0348, "rewards/accuracies": 0.5, "rewards/chosen": -0.12825357913970947, "rewards/margins": 0.03730865195393562, "rewards/rejected": -0.165562242269516, "step": 1420 }, { "epoch": 0.09, "learning_rate": 4.676258992805755e-06, "logits/chosen": -1.3972928524017334, "logits/rejected": -1.1884459257125854, "logps/chosen": -437.8765563964844, "logps/rejected": -475.9781799316406, "loss": 0.0177, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16825659573078156, "rewards/margins": 0.08025003969669342, "rewards/rejected": -0.24850663542747498, "step": 1430 }, { "epoch": 0.09, "learning_rate": 4.708960104643558e-06, "logits/chosen": -1.3751070499420166, "logits/rejected": -1.3199541568756104, "logps/chosen": -509.1207580566406, "logps/rejected": -538.4917602539062, "loss": 0.0263, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20291098952293396, "rewards/margins": 0.06613118201494217, "rewards/rejected": -0.26904216408729553, "step": 1440 }, { "epoch": 0.09, "learning_rate": 4.741661216481361e-06, "logits/chosen": -1.216504454612732, "logits/rejected": -1.2115882635116577, "logps/chosen": -498.08953857421875, "logps/rejected": -585.3047485351562, "loss": 0.0234, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.26174408197402954, "rewards/margins": 0.07596887648105621, "rewards/rejected": -0.33771294355392456, "step": 1450 }, { "epoch": 0.1, "learning_rate": 4.774362328319163e-06, "logits/chosen": -1.096388339996338, "logits/rejected": -0.9131072759628296, "logps/chosen": -491.79296875, "logps/rejected": -504.07257080078125, "loss": 0.0174, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2760029137134552, "rewards/margins": 0.04222895950078964, "rewards/rejected": -0.31823188066482544, "step": 1460 }, { "epoch": 0.1, "learning_rate": 4.807063440156966e-06, "logits/chosen": -1.2352993488311768, "logits/rejected": -1.0059850215911865, "logps/chosen": -485.50787353515625, "logps/rejected": -508.014404296875, "loss": 0.0441, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22219841182231903, "rewards/margins": 0.08387693762779236, "rewards/rejected": -0.3060753643512726, "step": 1470 }, { "epoch": 0.1, "learning_rate": 4.839764551994769e-06, "logits/chosen": -1.300349235534668, "logits/rejected": -1.0231695175170898, "logps/chosen": -492.228759765625, "logps/rejected": -544.5114135742188, "loss": 0.0337, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24352028965950012, "rewards/margins": 0.09001625329256058, "rewards/rejected": -0.3335365355014801, "step": 1480 }, { "epoch": 0.1, "learning_rate": 4.872465663832571e-06, "logits/chosen": -1.2918527126312256, "logits/rejected": -1.2170072793960571, "logps/chosen": -323.4441223144531, "logps/rejected": -359.42620849609375, "loss": 0.0427, "rewards/accuracies": 0.625, "rewards/chosen": -0.1156856045126915, "rewards/margins": 0.0564422681927681, "rewards/rejected": -0.1721278876066208, "step": 1490 }, { "epoch": 0.1, "learning_rate": 4.905166775670373e-06, "logits/chosen": -1.2882636785507202, "logits/rejected": -1.1160205602645874, "logps/chosen": -329.97088623046875, "logps/rejected": -333.76605224609375, "loss": 0.0741, "rewards/accuracies": 0.625, "rewards/chosen": -0.10821093618869781, "rewards/margins": 0.031188705936074257, "rewards/rejected": -0.13939963281154633, "step": 1500 }, { "epoch": 0.1, "eval_logits/chosen": -1.1484423875808716, "eval_logits/rejected": -1.0220106840133667, "eval_logps/chosen": -368.3272705078125, "eval_logps/rejected": -393.9024963378906, "eval_loss": 0.03501873090863228, "eval_rewards/accuracies": 0.6234999895095825, "eval_rewards/chosen": -0.13632231950759888, "eval_rewards/margins": 0.04596830531954765, "eval_rewards/rejected": -0.18229064345359802, "eval_runtime": 713.5347, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 1500 }, { "epoch": 0.1, "learning_rate": 4.9378678875081756e-06, "logits/chosen": -1.2758615016937256, "logits/rejected": -1.0804967880249023, "logps/chosen": -355.92767333984375, "logps/rejected": -388.3727722167969, "loss": 0.0378, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1411840170621872, "rewards/margins": 0.06345327198505402, "rewards/rejected": -0.2046372890472412, "step": 1510 }, { "epoch": 0.1, "learning_rate": 4.9705689993459784e-06, "logits/chosen": -0.9290014505386353, "logits/rejected": -0.7179617881774902, "logps/chosen": -439.88232421875, "logps/rejected": -465.21142578125, "loss": 0.0498, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23495697975158691, "rewards/margins": 0.0830421969294548, "rewards/rejected": -0.3179991841316223, "step": 1520 }, { "epoch": 0.1, "learning_rate": 4.999999934793849e-06, "logits/chosen": -1.226815938949585, "logits/rejected": -1.2101671695709229, "logps/chosen": -447.41943359375, "logps/rejected": -455.0298767089844, "loss": 0.0328, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19946616888046265, "rewards/margins": 0.03702683374285698, "rewards/rejected": -0.23649299144744873, "step": 1530 }, { "epoch": 0.1, "learning_rate": 4.999992110059814e-06, "logits/chosen": -1.1888110637664795, "logits/rejected": -1.1527180671691895, "logps/chosen": -458.2586364746094, "logps/rejected": -489.00286865234375, "loss": 0.0262, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18057116866111755, "rewards/margins": 0.050857581198215485, "rewards/rejected": -0.23142877221107483, "step": 1540 }, { "epoch": 0.1, "learning_rate": 4.999971244142299e-06, "logits/chosen": -1.342946171760559, "logits/rejected": -1.0540968179702759, "logps/chosen": -479.8531188964844, "logps/rejected": -483.3973693847656, "loss": 0.0292, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.206400066614151, "rewards/margins": 0.04279043525457382, "rewards/rejected": -0.2491905242204666, "step": 1550 }, { "epoch": 0.1, "learning_rate": 4.999937337150149e-06, "logits/chosen": -1.07552170753479, "logits/rejected": -0.8667101860046387, "logps/chosen": -441.98089599609375, "logps/rejected": -470.1893005371094, "loss": 0.0486, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20746605098247528, "rewards/margins": 0.04358913376927376, "rewards/rejected": -0.25105518102645874, "step": 1560 }, { "epoch": 0.1, "learning_rate": 4.99989038926024e-06, "logits/chosen": -0.9632455110549927, "logits/rejected": -0.9508832097053528, "logps/chosen": -448.7342834472656, "logps/rejected": -492.5423889160156, "loss": 0.0351, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.25030627846717834, "rewards/margins": 0.041606105864048004, "rewards/rejected": -0.29191237688064575, "step": 1570 }, { "epoch": 0.1, "learning_rate": 4.999830400717476e-06, "logits/chosen": -1.179004430770874, "logits/rejected": -1.1287713050842285, "logps/chosen": -498.1841735839844, "logps/rejected": -517.2852783203125, "loss": 0.0254, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21079714596271515, "rewards/margins": 0.03537663072347641, "rewards/rejected": -0.24617376923561096, "step": 1580 }, { "epoch": 0.1, "learning_rate": 4.999757371834787e-06, "logits/chosen": -1.0717048645019531, "logits/rejected": -1.1543245315551758, "logps/chosen": -454.44940185546875, "logps/rejected": -510.19586181640625, "loss": 0.0282, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21463057398796082, "rewards/margins": 0.07126188278198242, "rewards/rejected": -0.28589242696762085, "step": 1590 }, { "epoch": 0.1, "learning_rate": 4.999671302993125e-06, "logits/chosen": -0.8613811731338501, "logits/rejected": -0.8527324795722961, "logps/chosen": -512.34033203125, "logps/rejected": -586.0049438476562, "loss": 0.0348, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.26375988125801086, "rewards/margins": 0.05827655643224716, "rewards/rejected": -0.3220364451408386, "step": 1600 }, { "epoch": 0.1, "eval_logits/chosen": -0.980300784111023, "eval_logits/rejected": -0.8524603843688965, "eval_logps/chosen": -505.1402587890625, "eval_logps/rejected": -562.7496948242188, "eval_loss": 0.03340306878089905, "eval_rewards/accuracies": 0.6274999976158142, "eval_rewards/chosen": -0.273135244846344, "eval_rewards/margins": 0.07800257205963135, "eval_rewards/rejected": -0.35113781690597534, "eval_runtime": 714.4101, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 1600 }, { "epoch": 0.11, "learning_rate": 4.999572194641471e-06, "logits/chosen": -1.0529712438583374, "logits/rejected": -0.7722855806350708, "logps/chosen": -581.5318603515625, "logps/rejected": -644.9974365234375, "loss": 0.0339, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3060135245323181, "rewards/margins": 0.11593560129404068, "rewards/rejected": -0.4219491481781006, "step": 1610 }, { "epoch": 0.11, "learning_rate": 4.999460047296819e-06, "logits/chosen": -0.8009408712387085, "logits/rejected": -0.6767481565475464, "logps/chosen": -622.4341430664062, "logps/rejected": -665.9566650390625, "loss": 0.0231, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4068300127983093, "rewards/margins": 0.06443161517381668, "rewards/rejected": -0.4712616503238678, "step": 1620 }, { "epoch": 0.11, "learning_rate": 4.999334861544186e-06, "logits/chosen": -1.1299209594726562, "logits/rejected": -0.9722960591316223, "logps/chosen": -533.5238037109375, "logps/rejected": -540.88916015625, "loss": 0.0371, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3040706217288971, "rewards/margins": 0.06336863338947296, "rewards/rejected": -0.36743927001953125, "step": 1630 }, { "epoch": 0.11, "learning_rate": 4.999196638036604e-06, "logits/chosen": -1.4078329801559448, "logits/rejected": -1.230120062828064, "logps/chosen": -517.1390991210938, "logps/rejected": -507.3863220214844, "loss": 0.0068, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22972404956817627, "rewards/margins": 0.025483619421720505, "rewards/rejected": -0.2552076578140259, "step": 1640 }, { "epoch": 0.11, "learning_rate": 4.999045377495111e-06, "logits/chosen": -1.065272569656372, "logits/rejected": -1.2840797901153564, "logps/chosen": -397.42071533203125, "logps/rejected": -561.1363525390625, "loss": 0.0416, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22308608889579773, "rewards/margins": 0.07593308389186859, "rewards/rejected": -0.2990191578865051, "step": 1650 }, { "epoch": 0.11, "learning_rate": 4.998881080708759e-06, "logits/chosen": -0.9700733423233032, "logits/rejected": -0.9583691358566284, "logps/chosen": -523.7947998046875, "logps/rejected": -521.7470703125, "loss": 0.036, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.2834732234477997, "rewards/margins": 0.028897196054458618, "rewards/rejected": -0.3123704195022583, "step": 1660 }, { "epoch": 0.11, "learning_rate": 4.998703748534599e-06, "logits/chosen": -0.7423811554908752, "logits/rejected": -0.6233193874359131, "logps/chosen": -509.57037353515625, "logps/rejected": -504.13592529296875, "loss": 0.0666, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.26894059777259827, "rewards/margins": 0.04525615647435188, "rewards/rejected": -0.31419676542282104, "step": 1670 }, { "epoch": 0.11, "learning_rate": 4.998513381897683e-06, "logits/chosen": -1.318880319595337, "logits/rejected": -1.0461028814315796, "logps/chosen": -432.2079162597656, "logps/rejected": -405.9598083496094, "loss": 0.034, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19296419620513916, "rewards/margins": 0.0307462178170681, "rewards/rejected": -0.22371041774749756, "step": 1680 }, { "epoch": 0.11, "learning_rate": 4.9983099817910565e-06, "logits/chosen": -1.2512848377227783, "logits/rejected": -1.184593915939331, "logps/chosen": -477.58843994140625, "logps/rejected": -525.1726684570312, "loss": 0.0339, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23232655227184296, "rewards/margins": 0.05206217244267464, "rewards/rejected": -0.2843887507915497, "step": 1690 }, { "epoch": 0.11, "learning_rate": 4.998093549275754e-06, "logits/chosen": -1.3204123973846436, "logits/rejected": -1.3649002313613892, "logps/chosen": -476.7478942871094, "logps/rejected": -576.8233032226562, "loss": 0.0251, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2164139300584793, "rewards/margins": 0.078282430768013, "rewards/rejected": -0.2946963608264923, "step": 1700 }, { "epoch": 0.11, "eval_logits/chosen": -1.2961406707763672, "eval_logits/rejected": -1.1494770050048828, "eval_logps/chosen": -489.1553649902344, "eval_logps/rejected": -541.3787841796875, "eval_loss": 0.03176088258624077, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.25715047121047974, "eval_rewards/margins": 0.07261642813682556, "eval_rewards/rejected": -0.3297668695449829, "eval_runtime": 714.3968, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 1700 }, { "epoch": 0.11, "learning_rate": 4.997864085480794e-06, "logits/chosen": -1.4710466861724854, "logits/rejected": -1.2900278568267822, "logps/chosen": -550.7463989257812, "logps/rejected": -602.5755615234375, "loss": 0.0138, "rewards/accuracies": 0.625, "rewards/chosen": -0.28440022468566895, "rewards/margins": 0.07036607712507248, "rewards/rejected": -0.35476627945899963, "step": 1710 }, { "epoch": 0.11, "learning_rate": 4.997621591603171e-06, "logits/chosen": -1.4619470834732056, "logits/rejected": -1.3374793529510498, "logps/chosen": -363.58538818359375, "logps/rejected": -433.81121826171875, "loss": 0.051, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2055526077747345, "rewards/margins": 0.07091064751148224, "rewards/rejected": -0.27646321058273315, "step": 1720 }, { "epoch": 0.11, "learning_rate": 4.997366068907853e-06, "logits/chosen": -1.5577261447906494, "logits/rejected": -1.4925411939620972, "logps/chosen": -432.44036865234375, "logps/rejected": -451.07769775390625, "loss": 0.039, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1740306317806244, "rewards/margins": 0.03851239010691643, "rewards/rejected": -0.2125430405139923, "step": 1730 }, { "epoch": 0.11, "learning_rate": 4.997097518727771e-06, "logits/chosen": -1.6026408672332764, "logits/rejected": -1.2885913848876953, "logps/chosen": -424.702392578125, "logps/rejected": -454.5870666503906, "loss": 0.0377, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20174598693847656, "rewards/margins": 0.06300705671310425, "rewards/rejected": -0.2647530734539032, "step": 1740 }, { "epoch": 0.11, "learning_rate": 4.9968159424638155e-06, "logits/chosen": -1.4218108654022217, "logits/rejected": -1.515400767326355, "logps/chosen": -495.2119140625, "logps/rejected": -599.5548095703125, "loss": 0.0289, "rewards/accuracies": 0.625, "rewards/chosen": -0.2776055932044983, "rewards/margins": 0.05249776691198349, "rewards/rejected": -0.3301033079624176, "step": 1750 }, { "epoch": 0.12, "learning_rate": 4.9965213415848235e-06, "logits/chosen": -1.301551342010498, "logits/rejected": -1.0326621532440186, "logps/chosen": -536.6153564453125, "logps/rejected": -569.9191284179688, "loss": 0.0305, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3082108497619629, "rewards/margins": 0.07016370445489883, "rewards/rejected": -0.3783746361732483, "step": 1760 }, { "epoch": 0.12, "learning_rate": 4.9962137176275805e-06, "logits/chosen": -1.5210860967636108, "logits/rejected": -1.3378417491912842, "logps/chosen": -452.2109375, "logps/rejected": -488.10101318359375, "loss": 0.0146, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21791556477546692, "rewards/margins": 0.03644804656505585, "rewards/rejected": -0.25436362624168396, "step": 1770 }, { "epoch": 0.12, "learning_rate": 4.9958930721968015e-06, "logits/chosen": -1.5874128341674805, "logits/rejected": -1.7135591506958008, "logps/chosen": -407.8630065917969, "logps/rejected": -451.82659912109375, "loss": 0.0314, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19582784175872803, "rewards/margins": 0.03246188163757324, "rewards/rejected": -0.22828975319862366, "step": 1780 }, { "epoch": 0.12, "learning_rate": 4.995559406965132e-06, "logits/chosen": -1.8727906942367554, "logits/rejected": -1.60868239402771, "logps/chosen": -378.7754821777344, "logps/rejected": -401.07318115234375, "loss": 0.0269, "rewards/accuracies": 0.625, "rewards/chosen": -0.14686015248298645, "rewards/margins": 0.051913417875766754, "rewards/rejected": -0.1987735480070114, "step": 1790 }, { "epoch": 0.12, "learning_rate": 4.995212723673131e-06, "logits/chosen": -1.8203155994415283, "logits/rejected": -1.5829551219940186, "logps/chosen": -357.2432861328125, "logps/rejected": -365.09930419921875, "loss": 0.036, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12704019248485565, "rewards/margins": 0.057838957756757736, "rewards/rejected": -0.1848791539669037, "step": 1800 }, { "epoch": 0.12, "eval_logits/chosen": -1.7516365051269531, "eval_logits/rejected": -1.5867112874984741, "eval_logps/chosen": -382.77081298828125, "eval_logps/rejected": -407.4576416015625, "eval_loss": 0.03248048946261406, "eval_rewards/accuracies": 0.6205000281333923, "eval_rewards/chosen": -0.15076588094234467, "eval_rewards/margins": 0.04507984593510628, "eval_rewards/rejected": -0.19584573805332184, "eval_runtime": 714.4464, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.4, "step": 1800 }, { "epoch": 0.12, "learning_rate": 4.99485302412927e-06, "logits/chosen": -1.5064879655838013, "logits/rejected": -1.5007044076919556, "logps/chosen": -370.4170837402344, "logps/rejected": -427.7005310058594, "loss": 0.0478, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1624060571193695, "rewards/margins": 0.05659068375825882, "rewards/rejected": -0.21899676322937012, "step": 1810 }, { "epoch": 0.12, "learning_rate": 4.994480310209918e-06, "logits/chosen": -1.8923534154891968, "logits/rejected": -2.0139334201812744, "logps/chosen": -376.0551452636719, "logps/rejected": -441.14886474609375, "loss": 0.0226, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13407215476036072, "rewards/margins": 0.05133948475122452, "rewards/rejected": -0.18541164696216583, "step": 1820 }, { "epoch": 0.12, "learning_rate": 4.994094583859332e-06, "logits/chosen": -1.8364965915679932, "logits/rejected": -1.6396185159683228, "logps/chosen": -309.92181396484375, "logps/rejected": -395.416259765625, "loss": 0.0438, "rewards/accuracies": 0.625, "rewards/chosen": -0.1450091153383255, "rewards/margins": 0.053464919328689575, "rewards/rejected": -0.19847401976585388, "step": 1830 }, { "epoch": 0.12, "learning_rate": 4.9936958470896525e-06, "logits/chosen": -1.6799719333648682, "logits/rejected": -1.4506856203079224, "logps/chosen": -396.2935485839844, "logps/rejected": -442.51544189453125, "loss": 0.0576, "rewards/accuracies": 0.75, "rewards/chosen": -0.17254841327667236, "rewards/margins": 0.08912496268749237, "rewards/rejected": -0.26167336106300354, "step": 1840 }, { "epoch": 0.12, "learning_rate": 4.993284101980883e-06, "logits/chosen": -1.68587327003479, "logits/rejected": -1.518768548965454, "logps/chosen": -371.5691223144531, "logps/rejected": -433.38739013671875, "loss": 0.0393, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12907375395298004, "rewards/margins": 0.10145126283168793, "rewards/rejected": -0.23052498698234558, "step": 1850 }, { "epoch": 0.12, "learning_rate": 4.9928593506808885e-06, "logits/chosen": -1.6925147771835327, "logits/rejected": -1.4949332475662231, "logps/chosen": -446.60498046875, "logps/rejected": -458.5255432128906, "loss": 0.054, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.18979063630104065, "rewards/margins": 0.04889776185154915, "rewards/rejected": -0.2386883944272995, "step": 1860 }, { "epoch": 0.12, "learning_rate": 4.992421595405381e-06, "logits/chosen": -1.566605806350708, "logits/rejected": -1.2843401432037354, "logps/chosen": -454.0870056152344, "logps/rejected": -444.9664001464844, "loss": 0.0566, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.22589460015296936, "rewards/margins": 0.06172628328204155, "rewards/rejected": -0.2876208424568176, "step": 1870 }, { "epoch": 0.12, "learning_rate": 4.991970838437905e-06, "logits/chosen": -1.4696509838104248, "logits/rejected": -1.4194304943084717, "logps/chosen": -496.87091064453125, "logps/rejected": -586.5699462890625, "loss": 0.0537, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2715413272380829, "rewards/margins": 0.06159435585141182, "rewards/rejected": -0.3331356942653656, "step": 1880 }, { "epoch": 0.12, "learning_rate": 4.9915070821298294e-06, "logits/chosen": -1.4839327335357666, "logits/rejected": -1.2858867645263672, "logps/chosen": -382.99334716796875, "logps/rejected": -414.9761657714844, "loss": 0.0225, "rewards/accuracies": 0.5, "rewards/chosen": -0.2160235345363617, "rewards/margins": 0.04107125476002693, "rewards/rejected": -0.2570948004722595, "step": 1890 }, { "epoch": 0.12, "learning_rate": 4.991030328900336e-06, "logits/chosen": -1.3819067478179932, "logits/rejected": -1.119638204574585, "logps/chosen": -512.6195678710938, "logps/rejected": -523.8358154296875, "loss": 0.0142, "rewards/accuracies": 0.75, "rewards/chosen": -0.2280503213405609, "rewards/margins": 0.08125253766775131, "rewards/rejected": -0.3093028664588928, "step": 1900 }, { "epoch": 0.12, "eval_logits/chosen": -1.369167447090149, "eval_logits/rejected": -1.2253180742263794, "eval_logps/chosen": -489.46966552734375, "eval_logps/rejected": -526.0775756835938, "eval_loss": 0.031181877478957176, "eval_rewards/accuracies": 0.6334999799728394, "eval_rewards/chosen": -0.2574646770954132, "eval_rewards/margins": 0.057000961154699326, "eval_rewards/rejected": -0.31446564197540283, "eval_runtime": 713.0213, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 1900 }, { "epoch": 0.12, "learning_rate": 4.9905405812364014e-06, "logits/chosen": -1.3631902933120728, "logits/rejected": -1.3384044170379639, "logps/chosen": -470.17816162109375, "logps/rejected": -541.5172729492188, "loss": 0.0303, "rewards/accuracies": 0.75, "rewards/chosen": -0.26788195967674255, "rewards/margins": 0.06887385249137878, "rewards/rejected": -0.3367558419704437, "step": 1910 }, { "epoch": 0.13, "learning_rate": 4.990037841692791e-06, "logits/chosen": -1.2530277967453003, "logits/rejected": -1.158358097076416, "logps/chosen": -541.6800537109375, "logps/rejected": -564.888916015625, "loss": 0.0556, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.33563923835754395, "rewards/margins": 0.0664934441447258, "rewards/rejected": -0.4021326005458832, "step": 1920 }, { "epoch": 0.13, "learning_rate": 4.989522112892039e-06, "logits/chosen": -1.1690332889556885, "logits/rejected": -1.1562532186508179, "logps/chosen": -639.4035034179688, "logps/rejected": -683.9676513671875, "loss": 0.0544, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4417742192745209, "rewards/margins": 0.04545414075255394, "rewards/rejected": -0.4872283339500427, "step": 1930 }, { "epoch": 0.13, "learning_rate": 4.98899339752444e-06, "logits/chosen": -1.3420687913894653, "logits/rejected": -1.0928928852081299, "logps/chosen": -502.9352111816406, "logps/rejected": -543.9446411132812, "loss": 0.0565, "rewards/accuracies": 0.625, "rewards/chosen": -0.2733329236507416, "rewards/margins": 0.06995277851819992, "rewards/rejected": -0.3432857096195221, "step": 1940 }, { "epoch": 0.13, "learning_rate": 4.988451698348033e-06, "logits/chosen": -1.3847826719284058, "logits/rejected": -1.4857834577560425, "logps/chosen": -360.64886474609375, "logps/rejected": -417.3042907714844, "loss": 0.0343, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18232128024101257, "rewards/margins": 0.03638608008623123, "rewards/rejected": -0.2187073677778244, "step": 1950 }, { "epoch": 0.13, "learning_rate": 4.987897018188585e-06, "logits/chosen": -1.4893696308135986, "logits/rejected": -1.2397159337997437, "logps/chosen": -396.39996337890625, "logps/rejected": -388.6782531738281, "loss": 0.026, "rewards/accuracies": 0.625, "rewards/chosen": -0.1737806499004364, "rewards/margins": 0.04895434528589249, "rewards/rejected": -0.2227349728345871, "step": 1960 }, { "epoch": 0.13, "learning_rate": 4.9873293599395814e-06, "logits/chosen": -1.6546907424926758, "logits/rejected": -1.524069905281067, "logps/chosen": -346.62530517578125, "logps/rejected": -412.6383361816406, "loss": 0.0482, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15319356322288513, "rewards/margins": 0.06999649107456207, "rewards/rejected": -0.2231900691986084, "step": 1970 }, { "epoch": 0.13, "learning_rate": 4.986748726562203e-06, "logits/chosen": -1.7246673107147217, "logits/rejected": -1.5887459516525269, "logps/chosen": -352.74212646484375, "logps/rejected": -384.5918273925781, "loss": 0.0216, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13688769936561584, "rewards/margins": 0.05674203485250473, "rewards/rejected": -0.19362972676753998, "step": 1980 }, { "epoch": 0.13, "learning_rate": 4.98615512108532e-06, "logits/chosen": -1.8296085596084595, "logits/rejected": -1.7612769603729248, "logps/chosen": -356.28009033203125, "logps/rejected": -412.1853942871094, "loss": 0.0474, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13476939499378204, "rewards/margins": 0.054121870547533035, "rewards/rejected": -0.18889126181602478, "step": 1990 }, { "epoch": 0.13, "learning_rate": 4.985548546605469e-06, "logits/chosen": -1.548813819885254, "logits/rejected": -1.6893627643585205, "logps/chosen": -422.84814453125, "logps/rejected": -489.7745666503906, "loss": 0.0176, "rewards/accuracies": 0.625, "rewards/chosen": -0.20974235236644745, "rewards/margins": 0.04928591102361679, "rewards/rejected": -0.25902825593948364, "step": 2000 }, { "epoch": 0.13, "eval_logits/chosen": -1.7095062732696533, "eval_logits/rejected": -1.539565920829773, "eval_logps/chosen": -417.6275634765625, "eval_logps/rejected": -484.5845031738281, "eval_loss": 0.028233658522367477, "eval_rewards/accuracies": 0.6460000276565552, "eval_rewards/chosen": -0.18562263250350952, "eval_rewards/margins": 0.08734998852014542, "eval_rewards/rejected": -0.27297258377075195, "eval_runtime": 715.4639, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.398, "step": 2000 }, { "epoch": 0.13, "learning_rate": 4.984929006286838e-06, "logits/chosen": -1.5441169738769531, "logits/rejected": -1.4621695280075073, "logps/chosen": -372.61663818359375, "logps/rejected": -400.42059326171875, "loss": 0.061, "rewards/accuracies": 0.5, "rewards/chosen": -0.16330263018608093, "rewards/margins": 0.02434770204126835, "rewards/rejected": -0.18765032291412354, "step": 2010 }, { "epoch": 0.13, "learning_rate": 4.984296503361256e-06, "logits/chosen": -1.85922110080719, "logits/rejected": -1.5930954217910767, "logps/chosen": -297.72613525390625, "logps/rejected": -300.393798828125, "loss": 0.0205, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09580622613430023, "rewards/margins": 0.04467153549194336, "rewards/rejected": -0.1404777616262436, "step": 2020 }, { "epoch": 0.13, "learning_rate": 4.9836510411281645e-06, "logits/chosen": -1.7481101751327515, "logits/rejected": -1.6376245021820068, "logps/chosen": -358.24365234375, "logps/rejected": -413.8905334472656, "loss": 0.0363, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07971179485321045, "rewards/margins": 0.09504932165145874, "rewards/rejected": -0.1747611165046692, "step": 2030 }, { "epoch": 0.13, "learning_rate": 4.982992622954613e-06, "logits/chosen": -1.8572734594345093, "logits/rejected": -1.5880228281021118, "logps/chosen": -385.03997802734375, "logps/rejected": -347.3924865722656, "loss": 0.0446, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10151179879903793, "rewards/margins": 0.07652105391025543, "rewards/rejected": -0.17803284525871277, "step": 2040 }, { "epoch": 0.13, "learning_rate": 4.9823212522752325e-06, "logits/chosen": -1.9195706844329834, "logits/rejected": -1.7052314281463623, "logps/chosen": -418.2029724121094, "logps/rejected": -495.0660705566406, "loss": 0.0236, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14038924872875214, "rewards/margins": 0.11089911311864853, "rewards/rejected": -0.25128835439682007, "step": 2050 }, { "epoch": 0.13, "learning_rate": 4.981636932592222e-06, "logits/chosen": -1.7846180200576782, "logits/rejected": -1.6883538961410522, "logps/chosen": -289.4696350097656, "logps/rejected": -360.0778503417969, "loss": 0.0136, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07655663788318634, "rewards/margins": 0.07462915778160095, "rewards/rejected": -0.1511858105659485, "step": 2060 }, { "epoch": 0.14, "learning_rate": 4.980939667475328e-06, "logits/chosen": -2.003810167312622, "logits/rejected": -1.678297758102417, "logps/chosen": -373.8094482421875, "logps/rejected": -378.05242919921875, "loss": 0.0185, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09963427484035492, "rewards/margins": 0.0648350790143013, "rewards/rejected": -0.16446934640407562, "step": 2070 }, { "epoch": 0.14, "learning_rate": 4.980229460561826e-06, "logits/chosen": -1.8227941989898682, "logits/rejected": -1.7019898891448975, "logps/chosen": -346.2652587890625, "logps/rejected": -447.071044921875, "loss": 0.0158, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12818391621112823, "rewards/margins": 0.11611220985651016, "rewards/rejected": -0.2442961484193802, "step": 2080 }, { "epoch": 0.14, "learning_rate": 4.979506315556503e-06, "logits/chosen": -1.776974081993103, "logits/rejected": -1.4806630611419678, "logps/chosen": -408.65679931640625, "logps/rejected": -443.72698974609375, "loss": 0.0188, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12216466665267944, "rewards/margins": 0.08605816960334778, "rewards/rejected": -0.20822283625602722, "step": 2090 }, { "epoch": 0.14, "learning_rate": 4.9787702362316395e-06, "logits/chosen": -1.8671884536743164, "logits/rejected": -2.02780818939209, "logps/chosen": -289.94903564453125, "logps/rejected": -375.09820556640625, "loss": 0.0176, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09912185370922089, "rewards/margins": 0.05963516980409622, "rewards/rejected": -0.15875700116157532, "step": 2100 }, { "epoch": 0.14, "eval_logits/chosen": -1.7265076637268066, "eval_logits/rejected": -1.558682918548584, "eval_logps/chosen": -364.7261962890625, "eval_logps/rejected": -419.39422607421875, "eval_loss": 0.02750113047659397, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -0.13272124528884888, "eval_rewards/margins": 0.07506108283996582, "eval_rewards/rejected": -0.2077823132276535, "eval_runtime": 714.299, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 2100 }, { "epoch": 0.14, "learning_rate": 4.9780212264269835e-06, "logits/chosen": -1.6017316579818726, "logits/rejected": -1.3658827543258667, "logps/chosen": -352.3345947265625, "logps/rejected": -391.2010192871094, "loss": 0.0175, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.16751551628112793, "rewards/margins": 0.05600215867161751, "rewards/rejected": -0.22351768612861633, "step": 2110 }, { "epoch": 0.14, "learning_rate": 4.977259290049739e-06, "logits/chosen": -1.942870855331421, "logits/rejected": -1.4383772611618042, "logps/chosen": -431.13690185546875, "logps/rejected": -475.76678466796875, "loss": 0.0271, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14419448375701904, "rewards/margins": 0.1141483411192894, "rewards/rejected": -0.25834283232688904, "step": 2120 }, { "epoch": 0.14, "learning_rate": 4.976484431074538e-06, "logits/chosen": -1.8016068935394287, "logits/rejected": -1.7851436138153076, "logps/chosen": -270.5657043457031, "logps/rejected": -304.38629150390625, "loss": 0.0485, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07676021009683609, "rewards/margins": 0.051422975957393646, "rewards/rejected": -0.12818318605422974, "step": 2130 }, { "epoch": 0.14, "learning_rate": 4.975696653543425e-06, "logits/chosen": -1.9399751424789429, "logits/rejected": -1.7240018844604492, "logps/chosen": -338.858642578125, "logps/rejected": -398.15252685546875, "loss": 0.0292, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08920981734991074, "rewards/margins": 0.07186315953731537, "rewards/rejected": -0.1610729694366455, "step": 2140 }, { "epoch": 0.14, "learning_rate": 4.974895961565835e-06, "logits/chosen": -1.7444274425506592, "logits/rejected": -1.4782602787017822, "logps/chosen": -303.783935546875, "logps/rejected": -390.6364440917969, "loss": 0.0281, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1285329908132553, "rewards/margins": 0.08162455260753632, "rewards/rejected": -0.21015754342079163, "step": 2150 }, { "epoch": 0.14, "learning_rate": 4.974082359318566e-06, "logits/chosen": -1.6045331954956055, "logits/rejected": -1.4653266668319702, "logps/chosen": -444.4554748535156, "logps/rejected": -489.509521484375, "loss": 0.0198, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19080546498298645, "rewards/margins": 0.09053321182727814, "rewards/rejected": -0.2813386917114258, "step": 2160 }, { "epoch": 0.14, "learning_rate": 4.973255851045769e-06, "logits/chosen": -1.654924750328064, "logits/rejected": -1.6399033069610596, "logps/chosen": -375.9240417480469, "logps/rejected": -423.3075256347656, "loss": 0.0459, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15856584906578064, "rewards/margins": 0.08413095772266388, "rewards/rejected": -0.24269680678844452, "step": 2170 }, { "epoch": 0.14, "learning_rate": 4.972416441058915e-06, "logits/chosen": -1.533675193786621, "logits/rejected": -1.3963432312011719, "logps/chosen": -415.08636474609375, "logps/rejected": -500.5286560058594, "loss": 0.0378, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1853305846452713, "rewards/margins": 0.11208416521549225, "rewards/rejected": -0.29741474986076355, "step": 2180 }, { "epoch": 0.14, "learning_rate": 4.971564133736777e-06, "logits/chosen": -1.5535693168640137, "logits/rejected": -1.3675225973129272, "logps/chosen": -294.2303771972656, "logps/rejected": -394.2517395019531, "loss": 0.0497, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11622971296310425, "rewards/margins": 0.09504024684429169, "rewards/rejected": -0.21126994490623474, "step": 2190 }, { "epoch": 0.14, "learning_rate": 4.970698933525409e-06, "logits/chosen": -1.9314426183700562, "logits/rejected": -1.6413524150848389, "logps/chosen": -424.8863830566406, "logps/rejected": -438.1034240722656, "loss": 0.0387, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14553944766521454, "rewards/margins": 0.052888669073581696, "rewards/rejected": -0.19842810928821564, "step": 2200 }, { "epoch": 0.14, "eval_logits/chosen": -1.8005229234695435, "eval_logits/rejected": -1.6316050291061401, "eval_logps/chosen": -336.18560791015625, "eval_logps/rejected": -382.42401123046875, "eval_loss": 0.02767937071621418, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": -0.10418065637350082, "eval_rewards/margins": 0.06663144379854202, "eval_rewards/rejected": -0.17081207036972046, "eval_runtime": 716.5337, "eval_samples_per_second": 2.791, "eval_steps_per_second": 1.396, "step": 2200 }, { "epoch": 0.14, "learning_rate": 4.969820844938118e-06, "logits/chosen": -1.8991174697875977, "logits/rejected": -1.6042051315307617, "logps/chosen": -333.9481506347656, "logps/rejected": -344.0919189453125, "loss": 0.0266, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11134489625692368, "rewards/margins": 0.06404642015695572, "rewards/rejected": -0.1753913313150406, "step": 2210 }, { "epoch": 0.15, "learning_rate": 4.968929872555444e-06, "logits/chosen": -1.3960466384887695, "logits/rejected": -1.4402097463607788, "logps/chosen": -415.6363830566406, "logps/rejected": -513.0457763671875, "loss": 0.0373, "rewards/accuracies": 0.625, "rewards/chosen": -0.19850197434425354, "rewards/margins": 0.061746351420879364, "rewards/rejected": -0.2602483332157135, "step": 2220 }, { "epoch": 0.15, "learning_rate": 4.968026021025137e-06, "logits/chosen": -1.7851839065551758, "logits/rejected": -1.57456636428833, "logps/chosen": -364.8609313964844, "logps/rejected": -410.62939453125, "loss": 0.0243, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16329601407051086, "rewards/margins": 0.0858338475227356, "rewards/rejected": -0.24912986159324646, "step": 2230 }, { "epoch": 0.15, "learning_rate": 4.967109295062128e-06, "logits/chosen": -1.5389163494110107, "logits/rejected": -1.394202709197998, "logps/chosen": -441.41131591796875, "logps/rejected": -538.32568359375, "loss": 0.018, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21640989184379578, "rewards/margins": 0.0759933739900589, "rewards/rejected": -0.29240328073501587, "step": 2240 }, { "epoch": 0.15, "learning_rate": 4.966179699448509e-06, "logits/chosen": -1.431494116783142, "logits/rejected": -1.2572250366210938, "logps/chosen": -422.01544189453125, "logps/rejected": -448.91326904296875, "loss": 0.0342, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2312605082988739, "rewards/margins": 0.04472396522760391, "rewards/rejected": -0.2759844660758972, "step": 2250 }, { "epoch": 0.15, "learning_rate": 4.965237239033506e-06, "logits/chosen": -1.7000207901000977, "logits/rejected": -1.4988982677459717, "logps/chosen": -544.0518188476562, "logps/rejected": -639.6204223632812, "loss": 0.0313, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.254393070936203, "rewards/margins": 0.1403505653142929, "rewards/rejected": -0.3947436213493347, "step": 2260 }, { "epoch": 0.15, "learning_rate": 4.964281918733453e-06, "logits/chosen": -1.657395601272583, "logits/rejected": -1.4783971309661865, "logps/chosen": -386.2625732421875, "logps/rejected": -494.24163818359375, "loss": 0.0367, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20314812660217285, "rewards/margins": 0.11243589222431183, "rewards/rejected": -0.3155840039253235, "step": 2270 }, { "epoch": 0.15, "learning_rate": 4.9633137435317715e-06, "logits/chosen": -1.675856351852417, "logits/rejected": -1.171844720840454, "logps/chosen": -489.5335998535156, "logps/rejected": -532.4830322265625, "loss": 0.0286, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2666385769844055, "rewards/margins": 0.11073458194732666, "rewards/rejected": -0.3773731589317322, "step": 2280 }, { "epoch": 0.15, "learning_rate": 4.9623327184789355e-06, "logits/chosen": -1.8558242321014404, "logits/rejected": -1.7875862121582031, "logps/chosen": -464.0330505371094, "logps/rejected": -500.5697326660156, "loss": 0.0245, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.24629256129264832, "rewards/margins": 0.041363026946783066, "rewards/rejected": -0.2876555919647217, "step": 2290 }, { "epoch": 0.15, "learning_rate": 4.9613388486924525e-06, "logits/chosen": -1.5698120594024658, "logits/rejected": -1.7118695974349976, "logps/chosen": -376.6183166503906, "logps/rejected": -450.8517150878906, "loss": 0.0284, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19648431241512299, "rewards/margins": 0.056853294372558594, "rewards/rejected": -0.2533376216888428, "step": 2300 }, { "epoch": 0.15, "eval_logits/chosen": -1.9373195171356201, "eval_logits/rejected": -1.757978916168213, "eval_logps/chosen": -413.41485595703125, "eval_logps/rejected": -458.088623046875, "eval_loss": 0.027522247284650803, "eval_rewards/accuracies": 0.6345000267028809, "eval_rewards/chosen": -0.18140989542007446, "eval_rewards/margins": 0.06506682187318802, "eval_rewards/rejected": -0.24647673964500427, "eval_runtime": 716.6036, "eval_samples_per_second": 2.791, "eval_steps_per_second": 1.395, "step": 2300 }, { "epoch": 0.15, "learning_rate": 4.960332139356834e-06, "logits/chosen": -1.9039875268936157, "logits/rejected": -1.7031854391098022, "logps/chosen": -392.3863220214844, "logps/rejected": -449.6255798339844, "loss": 0.0449, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18356947600841522, "rewards/margins": 0.08367917686700821, "rewards/rejected": -0.26724863052368164, "step": 2310 }, { "epoch": 0.15, "learning_rate": 4.95931259572357e-06, "logits/chosen": -1.818516492843628, "logits/rejected": -1.5259634256362915, "logps/chosen": -489.5165100097656, "logps/rejected": -591.6426391601562, "loss": 0.0405, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.25527650117874146, "rewards/margins": 0.0764525830745697, "rewards/rejected": -0.33172911405563354, "step": 2320 }, { "epoch": 0.15, "learning_rate": 4.9582802231111e-06, "logits/chosen": -1.7926862239837646, "logits/rejected": -1.8711141347885132, "logps/chosen": -401.56390380859375, "logps/rejected": -442.72235107421875, "loss": 0.0313, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18968981504440308, "rewards/margins": 0.06803693622350693, "rewards/rejected": -0.2577267587184906, "step": 2330 }, { "epoch": 0.15, "learning_rate": 4.957235026904782e-06, "logits/chosen": -2.020831346511841, "logits/rejected": -1.7182672023773193, "logps/chosen": -416.442626953125, "logps/rejected": -417.2877502441406, "loss": 0.0192, "rewards/accuracies": 0.625, "rewards/chosen": -0.15429018437862396, "rewards/margins": 0.05196036025881767, "rewards/rejected": -0.20625057816505432, "step": 2340 }, { "epoch": 0.15, "learning_rate": 4.956177012556875e-06, "logits/chosen": -2.0280418395996094, "logits/rejected": -1.8081321716308594, "logps/chosen": -464.5531311035156, "logps/rejected": -458.5823669433594, "loss": 0.0338, "rewards/accuracies": 0.625, "rewards/chosen": -0.22036032378673553, "rewards/margins": 0.0655972808599472, "rewards/rejected": -0.2859576344490051, "step": 2350 }, { "epoch": 0.15, "learning_rate": 4.9551061855864976e-06, "logits/chosen": -1.4685240983963013, "logits/rejected": -1.5254625082015991, "logps/chosen": -517.7073364257812, "logps/rejected": -580.3933715820312, "loss": 0.0328, "rewards/accuracies": 0.625, "rewards/chosen": -0.3248792290687561, "rewards/margins": 0.05847542732954025, "rewards/rejected": -0.38335466384887695, "step": 2360 }, { "epoch": 0.16, "learning_rate": 4.95402255157961e-06, "logits/chosen": -1.5514215230941772, "logits/rejected": -1.5585323572158813, "logps/chosen": -454.39764404296875, "logps/rejected": -622.1051025390625, "loss": 0.0354, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.269341379404068, "rewards/margins": 0.10316131263971329, "rewards/rejected": -0.3725026845932007, "step": 2370 }, { "epoch": 0.16, "learning_rate": 4.952926116188977e-06, "logits/chosen": -2.013460874557495, "logits/rejected": -1.9613279104232788, "logps/chosen": -350.21368408203125, "logps/rejected": -418.2904357910156, "loss": 0.0546, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16683633625507355, "rewards/margins": 0.028872232884168625, "rewards/rejected": -0.19570858776569366, "step": 2380 }, { "epoch": 0.16, "learning_rate": 4.951816885134143e-06, "logits/chosen": -1.9584863185882568, "logits/rejected": -1.9664205312728882, "logps/chosen": -334.9800109863281, "logps/rejected": -378.2928466796875, "loss": 0.0315, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13342705368995667, "rewards/margins": 0.04417356103658676, "rewards/rejected": -0.17760059237480164, "step": 2390 }, { "epoch": 0.16, "learning_rate": 4.950694864201399e-06, "logits/chosen": -1.9186756610870361, "logits/rejected": -1.8166652917861938, "logps/chosen": -373.1692199707031, "logps/rejected": -437.63055419921875, "loss": 0.0351, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13448725640773773, "rewards/margins": 0.0623321533203125, "rewards/rejected": -0.19681939482688904, "step": 2400 }, { "epoch": 0.16, "eval_logits/chosen": -1.8704036474227905, "eval_logits/rejected": -1.6925649642944336, "eval_logps/chosen": -379.8789978027344, "eval_logps/rejected": -420.34344482421875, "eval_loss": 0.029636772349476814, "eval_rewards/accuracies": 0.640500009059906, "eval_rewards/chosen": -0.14787408709526062, "eval_rewards/margins": 0.060857485979795456, "eval_rewards/rejected": -0.20873157680034637, "eval_runtime": 715.3573, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 2400 }, { "epoch": 0.16, "learning_rate": 4.9495600592437575e-06, "logits/chosen": -1.9204187393188477, "logits/rejected": -1.821022391319275, "logps/chosen": -427.71490478515625, "logps/rejected": -469.191162109375, "loss": 0.0374, "rewards/accuracies": 0.625, "rewards/chosen": -0.2008204162120819, "rewards/margins": 0.0382947213947773, "rewards/rejected": -0.2391151487827301, "step": 2410 }, { "epoch": 0.16, "learning_rate": 4.948412476180917e-06, "logits/chosen": -1.7727140188217163, "logits/rejected": -1.5341867208480835, "logps/chosen": -320.97674560546875, "logps/rejected": -372.8519287109375, "loss": 0.0218, "rewards/accuracies": 0.625, "rewards/chosen": -0.1369008719921112, "rewards/margins": 0.07461190968751907, "rewards/rejected": -0.21151280403137207, "step": 2420 }, { "epoch": 0.16, "learning_rate": 4.947252120999232e-06, "logits/chosen": -1.9367573261260986, "logits/rejected": -1.6594650745391846, "logps/chosen": -372.4012756347656, "logps/rejected": -340.89056396484375, "loss": 0.0376, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10529645532369614, "rewards/margins": 0.027263093739748, "rewards/rejected": -0.13255955278873444, "step": 2430 }, { "epoch": 0.16, "learning_rate": 4.946078999751683e-06, "logits/chosen": -1.9032375812530518, "logits/rejected": -1.796146035194397, "logps/chosen": -246.6707000732422, "logps/rejected": -259.6900329589844, "loss": 0.0382, "rewards/accuracies": 0.625, "rewards/chosen": -0.06704357266426086, "rewards/margins": 0.03457552194595337, "rewards/rejected": -0.10161910206079483, "step": 2440 }, { "epoch": 0.16, "learning_rate": 4.944893118557847e-06, "logits/chosen": -1.787763237953186, "logits/rejected": -1.7241474390029907, "logps/chosen": -290.6931457519531, "logps/rejected": -296.21343994140625, "loss": 0.0409, "rewards/accuracies": 0.75, "rewards/chosen": -0.08420853316783905, "rewards/margins": 0.059210728853940964, "rewards/rejected": -0.1434192657470703, "step": 2450 }, { "epoch": 0.16, "learning_rate": 4.943694483603861e-06, "logits/chosen": -2.0436148643493652, "logits/rejected": -1.6360610723495483, "logps/chosen": -302.03106689453125, "logps/rejected": -322.39056396484375, "loss": 0.024, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.076027050614357, "rewards/margins": 0.0713384598493576, "rewards/rejected": -0.1473655104637146, "step": 2460 }, { "epoch": 0.16, "learning_rate": 4.9424831011423914e-06, "logits/chosen": -2.028540849685669, "logits/rejected": -1.9434324502944946, "logps/chosen": -364.19927978515625, "logps/rejected": -351.674072265625, "loss": 0.0357, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0757727399468422, "rewards/margins": 0.02448941580951214, "rewards/rejected": -0.10026215016841888, "step": 2470 }, { "epoch": 0.16, "learning_rate": 4.9412589774926015e-06, "logits/chosen": -2.066434860229492, "logits/rejected": -1.7686599493026733, "logps/chosen": -350.6575622558594, "logps/rejected": -354.8769226074219, "loss": 0.0587, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0764678493142128, "rewards/margins": 0.05484286695718765, "rewards/rejected": -0.13131068646907806, "step": 2480 }, { "epoch": 0.16, "learning_rate": 4.940022119040121e-06, "logits/chosen": -1.9770406484603882, "logits/rejected": -1.7137130498886108, "logps/chosen": -384.12261962890625, "logps/rejected": -391.70477294921875, "loss": 0.0242, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.092206671833992, "rewards/margins": 0.03540036454796791, "rewards/rejected": -0.12760701775550842, "step": 2490 }, { "epoch": 0.16, "learning_rate": 4.93877253223701e-06, "logits/chosen": -1.7713171243667603, "logits/rejected": -1.6282857656478882, "logps/chosen": -440.8924255371094, "logps/rejected": -458.51123046875, "loss": 0.0143, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14862024784088135, "rewards/margins": 0.04919145628809929, "rewards/rejected": -0.19781169295310974, "step": 2500 }, { "epoch": 0.16, "eval_logits/chosen": -1.6983387470245361, "eval_logits/rejected": -1.5350089073181152, "eval_logps/chosen": -391.6553649902344, "eval_logps/rejected": -430.9314270019531, "eval_loss": 0.02853885106742382, "eval_rewards/accuracies": 0.6545000076293945, "eval_rewards/chosen": -0.15965043008327484, "eval_rewards/margins": 0.05966914817690849, "eval_rewards/rejected": -0.21931956708431244, "eval_runtime": 713.0375, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 2500 }, { "epoch": 0.16, "learning_rate": 4.937510223601725e-06, "logits/chosen": -1.9430396556854248, "logits/rejected": -1.8364536762237549, "logps/chosen": -403.91546630859375, "logps/rejected": -386.3529968261719, "loss": 0.031, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14138951897621155, "rewards/margins": 0.024648915976285934, "rewards/rejected": -0.16603845357894897, "step": 2510 }, { "epoch": 0.16, "learning_rate": 4.936235199719085e-06, "logits/chosen": -1.6583465337753296, "logits/rejected": -1.5381600856781006, "logps/chosen": -356.3477783203125, "logps/rejected": -397.0802001953125, "loss": 0.0273, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18404577672481537, "rewards/margins": 0.07091157883405685, "rewards/rejected": -0.2549573481082916, "step": 2520 }, { "epoch": 0.17, "learning_rate": 4.93494746724024e-06, "logits/chosen": -1.684322714805603, "logits/rejected": -1.5036375522613525, "logps/chosen": -432.98016357421875, "logps/rejected": -518.3250732421875, "loss": 0.0273, "rewards/accuracies": 0.75, "rewards/chosen": -0.2091757357120514, "rewards/margins": 0.06788496673107147, "rewards/rejected": -0.27706068754196167, "step": 2530 }, { "epoch": 0.17, "learning_rate": 4.933647032882635e-06, "logits/chosen": -1.7667783498764038, "logits/rejected": -1.5246508121490479, "logps/chosen": -465.6817932128906, "logps/rejected": -488.5626525878906, "loss": 0.0255, "rewards/accuracies": 0.75, "rewards/chosen": -0.22081997990608215, "rewards/margins": 0.06730033457279205, "rewards/rejected": -0.288120299577713, "step": 2540 }, { "epoch": 0.17, "learning_rate": 4.932333903429969e-06, "logits/chosen": -1.3849174976348877, "logits/rejected": -1.238287091255188, "logps/chosen": -407.41973876953125, "logps/rejected": -386.4485778808594, "loss": 0.029, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.2143373042345047, "rewards/margins": 0.004834827966988087, "rewards/rejected": -0.21917212009429932, "step": 2550 }, { "epoch": 0.17, "learning_rate": 4.931008085732172e-06, "logits/chosen": -1.5365387201309204, "logits/rejected": -1.115487813949585, "logps/chosen": -423.08465576171875, "logps/rejected": -442.81640625, "loss": 0.021, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22026176750659943, "rewards/margins": 0.06973310559988022, "rewards/rejected": -0.28999486565589905, "step": 2560 }, { "epoch": 0.17, "learning_rate": 4.9296695867053565e-06, "logits/chosen": -1.4606945514678955, "logits/rejected": -1.1867111921310425, "logps/chosen": -577.0966796875, "logps/rejected": -586.1174926757812, "loss": 0.0159, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2764951288700104, "rewards/margins": 0.07498078793287277, "rewards/rejected": -0.35147592425346375, "step": 2570 }, { "epoch": 0.17, "learning_rate": 4.928318413331791e-06, "logits/chosen": -1.4180063009262085, "logits/rejected": -1.322865605354309, "logps/chosen": -497.649169921875, "logps/rejected": -543.3568115234375, "loss": 0.0374, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.28642207384109497, "rewards/margins": 0.061692975461483, "rewards/rejected": -0.34811505675315857, "step": 2580 }, { "epoch": 0.17, "learning_rate": 4.926954572659855e-06, "logits/chosen": -1.3115441799163818, "logits/rejected": -1.241210699081421, "logps/chosen": -487.13421630859375, "logps/rejected": -587.6708984375, "loss": 0.0399, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24493856728076935, "rewards/margins": 0.0833507850766182, "rewards/rejected": -0.32828935980796814, "step": 2590 }, { "epoch": 0.17, "learning_rate": 4.925578071804013e-06, "logits/chosen": -1.2650806903839111, "logits/rejected": -1.2482917308807373, "logps/chosen": -472.12432861328125, "logps/rejected": -597.6649169921875, "loss": 0.0224, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24241861701011658, "rewards/margins": 0.06812867522239685, "rewards/rejected": -0.3105472922325134, "step": 2600 }, { "epoch": 0.17, "eval_logits/chosen": -1.4685617685317993, "eval_logits/rejected": -1.3151581287384033, "eval_logps/chosen": -438.5660400390625, "eval_logps/rejected": -488.74310302734375, "eval_loss": 0.026503968983888626, "eval_rewards/accuracies": 0.6545000076293945, "eval_rewards/chosen": -0.20656105875968933, "eval_rewards/margins": 0.07057015597820282, "eval_rewards/rejected": -0.27713119983673096, "eval_runtime": 715.1584, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 2600 }, { "epoch": 0.17, "learning_rate": 4.924188917944763e-06, "logits/chosen": -1.5949804782867432, "logits/rejected": -1.3649790287017822, "logps/chosen": -411.91290283203125, "logps/rejected": -482.4175720214844, "loss": 0.0278, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19190777838230133, "rewards/margins": 0.10402512550354004, "rewards/rejected": -0.29593291878700256, "step": 2610 }, { "epoch": 0.17, "learning_rate": 4.922787118328617e-06, "logits/chosen": -1.5534934997558594, "logits/rejected": -1.1872944831848145, "logps/chosen": -477.26605224609375, "logps/rejected": -435.3160095214844, "loss": 0.0352, "rewards/accuracies": 0.5, "rewards/chosen": -0.24724838137626648, "rewards/margins": 0.03436286002397537, "rewards/rejected": -0.28161126375198364, "step": 2620 }, { "epoch": 0.17, "learning_rate": 4.921372680268045e-06, "logits/chosen": -1.3817861080169678, "logits/rejected": -1.2594407796859741, "logps/chosen": -542.6174926757812, "logps/rejected": -546.5518798828125, "loss": 0.0415, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3126128613948822, "rewards/margins": 0.03430309146642685, "rewards/rejected": -0.34691593050956726, "step": 2630 }, { "epoch": 0.17, "learning_rate": 4.919945611141451e-06, "logits/chosen": -1.57100248336792, "logits/rejected": -1.2293686866760254, "logps/chosen": -483.7747497558594, "logps/rejected": -493.34051513671875, "loss": 0.0286, "rewards/accuracies": 0.625, "rewards/chosen": -0.2585400938987732, "rewards/margins": 0.06808514893054962, "rewards/rejected": -0.3266252279281616, "step": 2640 }, { "epoch": 0.17, "learning_rate": 4.918505918393125e-06, "logits/chosen": -1.3956433534622192, "logits/rejected": -1.2775672674179077, "logps/chosen": -406.34210205078125, "logps/rejected": -508.9244689941406, "loss": 0.0371, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.237311452627182, "rewards/margins": 0.07920899242162704, "rewards/rejected": -0.31652045249938965, "step": 2650 }, { "epoch": 0.17, "learning_rate": 4.91705360953321e-06, "logits/chosen": -1.4009259939193726, "logits/rejected": -1.2891381978988647, "logps/chosen": -547.2296752929688, "logps/rejected": -610.4060668945312, "loss": 0.029, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3017420470714569, "rewards/margins": 0.0958317220211029, "rewards/rejected": -0.3975737690925598, "step": 2660 }, { "epoch": 0.17, "learning_rate": 4.9155886921376615e-06, "logits/chosen": -1.437931776046753, "logits/rejected": -1.4025211334228516, "logps/chosen": -476.03460693359375, "logps/rejected": -545.0010375976562, "loss": 0.0498, "rewards/accuracies": 0.625, "rewards/chosen": -0.27006620168685913, "rewards/margins": 0.04729805886745453, "rewards/rejected": -0.31736427545547485, "step": 2670 }, { "epoch": 0.18, "learning_rate": 4.914111173848205e-06, "logits/chosen": -1.6449569463729858, "logits/rejected": -1.5904994010925293, "logps/chosen": -455.48870849609375, "logps/rejected": -480.3805236816406, "loss": 0.0214, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2224571257829666, "rewards/margins": 0.04030979797244072, "rewards/rejected": -0.26276689767837524, "step": 2680 }, { "epoch": 0.18, "learning_rate": 4.9126210623723e-06, "logits/chosen": -1.4655731916427612, "logits/rejected": -1.5623242855072021, "logps/chosen": -387.4043273925781, "logps/rejected": -503.0823669433594, "loss": 0.0243, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18703094124794006, "rewards/margins": 0.08190608024597168, "rewards/rejected": -0.26893705129623413, "step": 2690 }, { "epoch": 0.18, "learning_rate": 4.911118365483098e-06, "logits/chosen": -1.6317088603973389, "logits/rejected": -1.6305482387542725, "logps/chosen": -404.6954040527344, "logps/rejected": -485.8017578125, "loss": 0.0331, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2029411792755127, "rewards/margins": 0.07168016582727432, "rewards/rejected": -0.2746213376522064, "step": 2700 }, { "epoch": 0.18, "eval_logits/chosen": -1.687991976737976, "eval_logits/rejected": -1.522782564163208, "eval_logps/chosen": -405.9103088378906, "eval_logps/rejected": -460.362060546875, "eval_loss": 0.026842227205634117, "eval_rewards/accuracies": 0.6514999866485596, "eval_rewards/chosen": -0.1739053577184677, "eval_rewards/margins": 0.07484481483697891, "eval_rewards/rejected": -0.24875016510486603, "eval_runtime": 715.4872, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.398, "step": 2700 }, { "epoch": 0.18, "learning_rate": 4.909603091019403e-06, "logits/chosen": -1.8760337829589844, "logits/rejected": -1.5929535627365112, "logps/chosen": -365.8999938964844, "logps/rejected": -398.37774658203125, "loss": 0.0125, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1295226365327835, "rewards/margins": 0.06909807026386261, "rewards/rejected": -0.1986207216978073, "step": 2710 }, { "epoch": 0.18, "learning_rate": 4.908075246885626e-06, "logits/chosen": -1.6483469009399414, "logits/rejected": -1.5950958728790283, "logps/chosen": -284.9224548339844, "logps/rejected": -305.2195129394531, "loss": 0.0545, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1341949701309204, "rewards/margins": 0.04768837243318558, "rewards/rejected": -0.1818833351135254, "step": 2720 }, { "epoch": 0.18, "learning_rate": 4.906534841051755e-06, "logits/chosen": -1.5291738510131836, "logits/rejected": -1.550286054611206, "logps/chosen": -355.7880859375, "logps/rejected": -419.233642578125, "loss": 0.0208, "rewards/accuracies": 0.5, "rewards/chosen": -0.11238463968038559, "rewards/margins": 0.05240996554493904, "rewards/rejected": -0.16479462385177612, "step": 2730 }, { "epoch": 0.18, "learning_rate": 4.904981881553297e-06, "logits/chosen": -1.7509527206420898, "logits/rejected": -1.4745022058486938, "logps/chosen": -352.93408203125, "logps/rejected": -337.7349548339844, "loss": 0.0254, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12873777747154236, "rewards/margins": 0.04891907051205635, "rewards/rejected": -0.1776568591594696, "step": 2740 }, { "epoch": 0.18, "learning_rate": 4.903416376491252e-06, "logits/chosen": -1.7483268976211548, "logits/rejected": -1.4771935939788818, "logps/chosen": -409.47113037109375, "logps/rejected": -481.341796875, "loss": 0.0155, "rewards/accuracies": 0.75, "rewards/chosen": -0.13117894530296326, "rewards/margins": 0.11246274411678314, "rewards/rejected": -0.2436417043209076, "step": 2750 }, { "epoch": 0.18, "learning_rate": 4.90183833403206e-06, "logits/chosen": -1.8782541751861572, "logits/rejected": -1.7671693563461304, "logps/chosen": -376.3662414550781, "logps/rejected": -423.91949462890625, "loss": 0.023, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11215315014123917, "rewards/margins": 0.08380623161792755, "rewards/rejected": -0.19595938920974731, "step": 2760 }, { "epoch": 0.18, "learning_rate": 4.900247762407564e-06, "logits/chosen": -1.4917551279067993, "logits/rejected": -1.4164373874664307, "logps/chosen": -300.008544921875, "logps/rejected": -406.7856140136719, "loss": 0.0246, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1277165710926056, "rewards/margins": 0.08473442494869232, "rewards/rejected": -0.2124510109424591, "step": 2770 }, { "epoch": 0.18, "learning_rate": 4.898644669914965e-06, "logits/chosen": -1.4445323944091797, "logits/rejected": -1.3582546710968018, "logps/chosen": -403.2223815917969, "logps/rejected": -472.18359375, "loss": 0.0295, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17915001511573792, "rewards/margins": 0.0827871710062027, "rewards/rejected": -0.2619372010231018, "step": 2780 }, { "epoch": 0.18, "learning_rate": 4.897029064916778e-06, "logits/chosen": -1.1894563436508179, "logits/rejected": -1.114213466644287, "logps/chosen": -351.00579833984375, "logps/rejected": -378.63201904296875, "loss": 0.0312, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14285585284233093, "rewards/margins": 0.04001506790518761, "rewards/rejected": -0.18287089467048645, "step": 2790 }, { "epoch": 0.18, "learning_rate": 4.895400955840791e-06, "logits/chosen": -1.6722192764282227, "logits/rejected": -1.0825845003128052, "logps/chosen": -303.4746398925781, "logps/rejected": -325.2283630371094, "loss": 0.0387, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08312331140041351, "rewards/margins": 0.06345156580209732, "rewards/rejected": -0.14657486975193024, "step": 2800 }, { "epoch": 0.18, "eval_logits/chosen": -1.5555263757705688, "eval_logits/rejected": -1.4048151969909668, "eval_logps/chosen": -308.4064636230469, "eval_logps/rejected": -348.3399658203125, "eval_loss": 0.02764984779059887, "eval_rewards/accuracies": 0.6510000228881836, "eval_rewards/chosen": -0.07640152424573898, "eval_rewards/margins": 0.060326579958200455, "eval_rewards/rejected": -0.13672809302806854, "eval_runtime": 715.0042, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.399, "step": 2800 }, { "epoch": 0.18, "learning_rate": 4.893760351180018e-06, "logits/chosen": -1.5064618587493896, "logits/rejected": -1.5149997472763062, "logps/chosen": -284.88531494140625, "logps/rejected": -341.89605712890625, "loss": 0.021, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08634404838085175, "rewards/margins": 0.051909804344177246, "rewards/rejected": -0.1382538378238678, "step": 2810 }, { "epoch": 0.18, "learning_rate": 4.892107259492657e-06, "logits/chosen": -1.4476242065429688, "logits/rejected": -1.2867496013641357, "logps/chosen": -325.77655029296875, "logps/rejected": -374.5207214355469, "loss": 0.0199, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.08591722697019577, "rewards/margins": 0.04272528737783432, "rewards/rejected": -0.1286425143480301, "step": 2820 }, { "epoch": 0.19, "learning_rate": 4.890441689402042e-06, "logits/chosen": -1.706372618675232, "logits/rejected": -1.497586965560913, "logps/chosen": -442.75762939453125, "logps/rejected": -484.16278076171875, "loss": 0.0191, "rewards/accuracies": 0.75, "rewards/chosen": -0.10725273936986923, "rewards/margins": 0.08940508216619492, "rewards/rejected": -0.19665783643722534, "step": 2830 }, { "epoch": 0.19, "learning_rate": 4.888763649596606e-06, "logits/chosen": -1.5949089527130127, "logits/rejected": -1.4612414836883545, "logps/chosen": -348.3107604980469, "logps/rejected": -399.00946044921875, "loss": 0.0603, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.13814905285835266, "rewards/margins": 0.05950012058019638, "rewards/rejected": -0.19764918088912964, "step": 2840 }, { "epoch": 0.19, "learning_rate": 4.887073148829824e-06, "logits/chosen": -1.4307711124420166, "logits/rejected": -1.3445477485656738, "logps/chosen": -435.87969970703125, "logps/rejected": -503.87677001953125, "loss": 0.0211, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1689760982990265, "rewards/margins": 0.09093265235424042, "rewards/rejected": -0.2599087357521057, "step": 2850 }, { "epoch": 0.19, "learning_rate": 4.885370195920177e-06, "logits/chosen": -1.224533200263977, "logits/rejected": -1.1664388179779053, "logps/chosen": -406.84783935546875, "logps/rejected": -463.352783203125, "loss": 0.0347, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22152414917945862, "rewards/margins": 0.06507807970046997, "rewards/rejected": -0.2866022288799286, "step": 2860 }, { "epoch": 0.19, "learning_rate": 4.883654799751101e-06, "logits/chosen": -1.219029188156128, "logits/rejected": -1.2090712785720825, "logps/chosen": -372.4510192871094, "logps/rejected": -467.0826721191406, "loss": 0.0429, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1567418873310089, "rewards/margins": 0.0598914735019207, "rewards/rejected": -0.2166333645582199, "step": 2870 }, { "epoch": 0.19, "learning_rate": 4.8819269692709435e-06, "logits/chosen": -1.3949222564697266, "logits/rejected": -1.14644193649292, "logps/chosen": -426.17144775390625, "logps/rejected": -431.9847717285156, "loss": 0.0167, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15912523865699768, "rewards/margins": 0.07817539572715759, "rewards/rejected": -0.23730063438415527, "step": 2880 }, { "epoch": 0.19, "learning_rate": 4.880186713492915e-06, "logits/chosen": -1.2567390203475952, "logits/rejected": -1.0953929424285889, "logps/chosen": -440.506103515625, "logps/rejected": -439.71923828125, "loss": 0.0192, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21278877556324005, "rewards/margins": 0.052835047245025635, "rewards/rejected": -0.2656238079071045, "step": 2890 }, { "epoch": 0.19, "learning_rate": 4.878434041495041e-06, "logits/chosen": -1.262115240097046, "logits/rejected": -1.3884295225143433, "logps/chosen": -467.01251220703125, "logps/rejected": -549.6758422851562, "loss": 0.0343, "rewards/accuracies": 0.75, "rewards/chosen": -0.2298467606306076, "rewards/margins": 0.08080112934112549, "rewards/rejected": -0.3106479048728943, "step": 2900 }, { "epoch": 0.19, "eval_logits/chosen": -1.1547576189041138, "eval_logits/rejected": -1.0215644836425781, "eval_logps/chosen": -461.8814392089844, "eval_logps/rejected": -521.826416015625, "eval_loss": 0.026416657492518425, "eval_rewards/accuracies": 0.6535000205039978, "eval_rewards/chosen": -0.2298765331506729, "eval_rewards/margins": 0.0803379938006401, "eval_rewards/rejected": -0.3102145493030548, "eval_runtime": 716.1006, "eval_samples_per_second": 2.793, "eval_steps_per_second": 1.396, "step": 2900 }, { "epoch": 0.19, "learning_rate": 4.876668962420117e-06, "logits/chosen": -1.1848151683807373, "logits/rejected": -1.0112261772155762, "logps/chosen": -491.0107421875, "logps/rejected": -500.01226806640625, "loss": 0.0404, "rewards/accuracies": 0.625, "rewards/chosen": -0.20135970413684845, "rewards/margins": 0.06861764192581177, "rewards/rejected": -0.269977331161499, "step": 2910 }, { "epoch": 0.19, "learning_rate": 4.87489148547566e-06, "logits/chosen": -1.2572965621948242, "logits/rejected": -1.1404519081115723, "logps/chosen": -506.4109802246094, "logps/rejected": -526.6151123046875, "loss": 0.0423, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2465524971485138, "rewards/margins": 0.05337701365351677, "rewards/rejected": -0.29992952942848206, "step": 2920 }, { "epoch": 0.19, "learning_rate": 4.873101619933862e-06, "logits/chosen": -1.3553876876831055, "logits/rejected": -1.0165131092071533, "logps/chosen": -468.7685546875, "logps/rejected": -514.31396484375, "loss": 0.0276, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20424136519432068, "rewards/margins": 0.09486101567745209, "rewards/rejected": -0.2991023659706116, "step": 2930 }, { "epoch": 0.19, "learning_rate": 4.8712993751315385e-06, "logits/chosen": -1.1872071027755737, "logits/rejected": -1.1014295816421509, "logps/chosen": -289.3373107910156, "logps/rejected": -345.5552673339844, "loss": 0.0459, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1686793565750122, "rewards/margins": 0.05231611803174019, "rewards/rejected": -0.2209954708814621, "step": 2940 }, { "epoch": 0.19, "learning_rate": 4.869484760470079e-06, "logits/chosen": -1.2565022706985474, "logits/rejected": -1.0420525074005127, "logps/chosen": -374.64190673828125, "logps/rejected": -409.6905822753906, "loss": 0.018, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18598109483718872, "rewards/margins": 0.07134781777858734, "rewards/rejected": -0.25732892751693726, "step": 2950 }, { "epoch": 0.19, "learning_rate": 4.867657785415404e-06, "logits/chosen": -1.2335708141326904, "logits/rejected": -1.0096508264541626, "logps/chosen": -444.1263732910156, "logps/rejected": -474.22296142578125, "loss": 0.0318, "rewards/accuracies": 0.625, "rewards/chosen": -0.19502733647823334, "rewards/margins": 0.07184124737977982, "rewards/rejected": -0.26686859130859375, "step": 2960 }, { "epoch": 0.19, "learning_rate": 4.865818459497911e-06, "logits/chosen": -1.3747684955596924, "logits/rejected": -1.2176918983459473, "logps/chosen": -467.1026306152344, "logps/rejected": -441.41851806640625, "loss": 0.0275, "rewards/accuracies": 0.625, "rewards/chosen": -0.18292400240898132, "rewards/margins": 0.05911308526992798, "rewards/rejected": -0.2420370876789093, "step": 2970 }, { "epoch": 0.19, "learning_rate": 4.863966792312423e-06, "logits/chosen": -1.4048041105270386, "logits/rejected": -1.0040220022201538, "logps/chosen": -449.4647521972656, "logps/rejected": -510.4853515625, "loss": 0.0209, "rewards/accuracies": 0.625, "rewards/chosen": -0.20800583064556122, "rewards/margins": 0.10207201540470123, "rewards/rejected": -0.31007784605026245, "step": 2980 }, { "epoch": 0.2, "learning_rate": 4.862102793518145e-06, "logits/chosen": -1.224578619003296, "logits/rejected": -1.305317759513855, "logps/chosen": -443.22686767578125, "logps/rejected": -541.0364379882812, "loss": 0.0365, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24894499778747559, "rewards/margins": 0.09088452905416489, "rewards/rejected": -0.33982953429222107, "step": 2990 }, { "epoch": 0.2, "learning_rate": 4.8602264728386075e-06, "logits/chosen": -1.306498646736145, "logits/rejected": -1.1680400371551514, "logps/chosen": -502.73681640625, "logps/rejected": -588.67724609375, "loss": 0.0267, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.25023728609085083, "rewards/margins": 0.0893591046333313, "rewards/rejected": -0.3395964205265045, "step": 3000 }, { "epoch": 0.2, "eval_logits/chosen": -1.208787202835083, "eval_logits/rejected": -1.0687692165374756, "eval_logps/chosen": -479.3534851074219, "eval_logps/rejected": -547.255859375, "eval_loss": 0.02753547392785549, "eval_rewards/accuracies": 0.6520000100135803, "eval_rewards/chosen": -0.2473485767841339, "eval_rewards/margins": 0.08829541504383087, "eval_rewards/rejected": -0.3356439769268036, "eval_runtime": 716.0743, "eval_samples_per_second": 2.793, "eval_steps_per_second": 1.397, "step": 3000 }, { "epoch": 0.2, "learning_rate": 4.858337840061616e-06, "logits/chosen": -1.111453652381897, "logits/rejected": -1.1533695459365845, "logps/chosen": -404.9215087890625, "logps/rejected": -525.7424926757812, "loss": 0.0235, "rewards/accuracies": 0.625, "rewards/chosen": -0.22184114158153534, "rewards/margins": 0.06717177480459213, "rewards/rejected": -0.2890129089355469, "step": 3010 }, { "epoch": 0.2, "learning_rate": 4.856436905039208e-06, "logits/chosen": -1.2779773473739624, "logits/rejected": -1.074496865272522, "logps/chosen": -406.3516845703125, "logps/rejected": -460.1309509277344, "loss": 0.0254, "rewards/accuracies": 0.75, "rewards/chosen": -0.19514666497707367, "rewards/margins": 0.08816659450531006, "rewards/rejected": -0.28331324458122253, "step": 3020 }, { "epoch": 0.2, "learning_rate": 4.854523677687588e-06, "logits/chosen": -1.100765585899353, "logits/rejected": -1.2895265817642212, "logps/chosen": -375.33392333984375, "logps/rejected": -459.338134765625, "loss": 0.0339, "rewards/accuracies": 0.625, "rewards/chosen": -0.19408836960792542, "rewards/margins": 0.06560763716697693, "rewards/rejected": -0.25969600677490234, "step": 3030 }, { "epoch": 0.2, "learning_rate": 4.85259816798709e-06, "logits/chosen": -1.4775636196136475, "logits/rejected": -1.0940361022949219, "logps/chosen": -430.52276611328125, "logps/rejected": -444.9402770996094, "loss": 0.021, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1427607387304306, "rewards/margins": 0.09497347474098206, "rewards/rejected": -0.23773419857025146, "step": 3040 }, { "epoch": 0.2, "learning_rate": 4.850660385982114e-06, "logits/chosen": -1.3967735767364502, "logits/rejected": -1.2292277812957764, "logps/chosen": -388.9773254394531, "logps/rejected": -404.4996032714844, "loss": 0.0449, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14125820994377136, "rewards/margins": 0.07080356776714325, "rewards/rejected": -0.2120617926120758, "step": 3050 }, { "epoch": 0.2, "learning_rate": 4.848710341781081e-06, "logits/chosen": -0.8814219236373901, "logits/rejected": -0.9714235067367554, "logps/chosen": -604.72802734375, "logps/rejected": -645.174072265625, "loss": 0.0377, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4308265745639801, "rewards/margins": 0.04617274925112724, "rewards/rejected": -0.47699934244155884, "step": 3060 }, { "epoch": 0.2, "learning_rate": 4.846748045556377e-06, "logits/chosen": -0.669106125831604, "logits/rejected": -0.5257667303085327, "logps/chosen": -728.2044677734375, "logps/rejected": -723.8983764648438, "loss": 0.0437, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4887174665927887, "rewards/margins": 0.05620478466153145, "rewards/rejected": -0.5449221730232239, "step": 3070 }, { "epoch": 0.2, "learning_rate": 4.8447735075442995e-06, "logits/chosen": -0.8036796450614929, "logits/rejected": -0.6234291791915894, "logps/chosen": -700.3363647460938, "logps/rejected": -778.0164794921875, "loss": 0.038, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5027741193771362, "rewards/margins": 0.08108066022396088, "rewards/rejected": -0.5838547945022583, "step": 3080 }, { "epoch": 0.2, "learning_rate": 4.8427867380450075e-06, "logits/chosen": -1.237807035446167, "logits/rejected": -0.8516899943351746, "logps/chosen": -569.9614868164062, "logps/rejected": -595.0052490234375, "loss": 0.0284, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3396221399307251, "rewards/margins": 0.07129369676113129, "rewards/rejected": -0.4109157919883728, "step": 3090 }, { "epoch": 0.2, "learning_rate": 4.840787747422462e-06, "logits/chosen": -1.4270131587982178, "logits/rejected": -1.2692177295684814, "logps/chosen": -436.2783203125, "logps/rejected": -467.39129638671875, "loss": 0.0355, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24062223732471466, "rewards/margins": 0.061370570212602615, "rewards/rejected": -0.301992803812027, "step": 3100 }, { "epoch": 0.2, "eval_logits/chosen": -1.4359893798828125, "eval_logits/rejected": -1.2857165336608887, "eval_logps/chosen": -459.7388610839844, "eval_logps/rejected": -509.36956787109375, "eval_loss": 0.02801002562046051, "eval_rewards/accuracies": 0.6414999961853027, "eval_rewards/chosen": -0.22773395478725433, "eval_rewards/margins": 0.07002376765012741, "eval_rewards/rejected": -0.29775771498680115, "eval_runtime": 715.4853, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.398, "step": 3100 }, { "epoch": 0.2, "learning_rate": 4.838776546104378e-06, "logits/chosen": -1.5482267141342163, "logits/rejected": -1.408126711845398, "logps/chosen": -501.5435485839844, "logps/rejected": -532.2131958007812, "loss": 0.0214, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22059035301208496, "rewards/margins": 0.07107152789831161, "rewards/rejected": -0.2916618883609772, "step": 3110 }, { "epoch": 0.2, "learning_rate": 4.836753144582168e-06, "logits/chosen": -1.4709421396255493, "logits/rejected": -1.1053333282470703, "logps/chosen": -448.6698303222656, "logps/rejected": -527.6964111328125, "loss": 0.0362, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20643825829029083, "rewards/margins": 0.10313912481069565, "rewards/rejected": -0.30957740545272827, "step": 3120 }, { "epoch": 0.2, "learning_rate": 4.834717553410884e-06, "logits/chosen": -1.5624732971191406, "logits/rejected": -1.4535454511642456, "logps/chosen": -352.4505920410156, "logps/rejected": -463.25164794921875, "loss": 0.02, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16317462921142578, "rewards/margins": 0.09713192284107208, "rewards/rejected": -0.26030653715133667, "step": 3130 }, { "epoch": 0.21, "learning_rate": 4.832669783209167e-06, "logits/chosen": -1.3832824230194092, "logits/rejected": -1.5235271453857422, "logps/chosen": -389.8204040527344, "logps/rejected": -417.5896911621094, "loss": 0.0181, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14539441466331482, "rewards/margins": 0.027220387011766434, "rewards/rejected": -0.17261478304862976, "step": 3140 }, { "epoch": 0.21, "learning_rate": 4.8306098446591895e-06, "logits/chosen": -1.0622313022613525, "logits/rejected": -1.1479713916778564, "logps/chosen": -305.59869384765625, "logps/rejected": -387.69500732421875, "loss": 0.0335, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1293298304080963, "rewards/margins": 0.05716506391763687, "rewards/rejected": -0.18649490177631378, "step": 3150 }, { "epoch": 0.21, "learning_rate": 4.828537748506601e-06, "logits/chosen": -1.7271381616592407, "logits/rejected": -1.4797282218933105, "logps/chosen": -408.99945068359375, "logps/rejected": -389.69342041015625, "loss": 0.0155, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1356011927127838, "rewards/margins": 0.036940205842256546, "rewards/rejected": -0.17254140973091125, "step": 3160 }, { "epoch": 0.21, "learning_rate": 4.826453505560469e-06, "logits/chosen": -1.257057547569275, "logits/rejected": -1.2101637125015259, "logps/chosen": -348.58990478515625, "logps/rejected": -395.6633605957031, "loss": 0.0272, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15761083364486694, "rewards/margins": 0.06076999753713608, "rewards/rejected": -0.21838083863258362, "step": 3170 }, { "epoch": 0.21, "learning_rate": 4.824357126693226e-06, "logits/chosen": -1.206905722618103, "logits/rejected": -1.0954079627990723, "logps/chosen": -440.3356018066406, "logps/rejected": -450.68426513671875, "loss": 0.0253, "rewards/accuracies": 0.625, "rewards/chosen": -0.18159246444702148, "rewards/margins": 0.05907604843378067, "rewards/rejected": -0.24066850543022156, "step": 3180 }, { "epoch": 0.21, "learning_rate": 4.8222486228406105e-06, "logits/chosen": -1.585700273513794, "logits/rejected": -1.3248627185821533, "logps/chosen": -361.4765625, "logps/rejected": -396.8673095703125, "loss": 0.018, "rewards/accuracies": 0.75, "rewards/chosen": -0.15023620426654816, "rewards/margins": 0.07007183134555817, "rewards/rejected": -0.22030803561210632, "step": 3190 }, { "epoch": 0.21, "learning_rate": 4.820128005001612e-06, "logits/chosen": -1.3011162281036377, "logits/rejected": -1.1820589303970337, "logps/chosen": -330.17327880859375, "logps/rejected": -426.6387634277344, "loss": 0.0291, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11117974668741226, "rewards/margins": 0.11800827085971832, "rewards/rejected": -0.22918801009655, "step": 3200 }, { "epoch": 0.21, "eval_logits/chosen": -1.5017451047897339, "eval_logits/rejected": -1.3483959436416626, "eval_logps/chosen": -383.9444274902344, "eval_logps/rejected": -439.85009765625, "eval_loss": 0.0258590467274189, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -0.1519394963979721, "eval_rewards/margins": 0.07629870623350143, "eval_rewards/rejected": -0.22823821008205414, "eval_runtime": 718.5201, "eval_samples_per_second": 2.783, "eval_steps_per_second": 1.392, "step": 3200 }, { "epoch": 0.21, "learning_rate": 4.817995284238412e-06, "logits/chosen": -1.3580515384674072, "logits/rejected": -1.40303635597229, "logps/chosen": -380.62884521484375, "logps/rejected": -494.95751953125, "loss": 0.0191, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1841045618057251, "rewards/margins": 0.08684380352497101, "rewards/rejected": -0.2709483504295349, "step": 3210 }, { "epoch": 0.21, "learning_rate": 4.815850471676327e-06, "logits/chosen": -1.6468623876571655, "logits/rejected": -1.3511347770690918, "logps/chosen": -375.91961669921875, "logps/rejected": -481.4476623535156, "loss": 0.0258, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13750562071800232, "rewards/margins": 0.1097753494977951, "rewards/rejected": -0.24728095531463623, "step": 3220 }, { "epoch": 0.21, "learning_rate": 4.813693578503751e-06, "logits/chosen": -1.5668243169784546, "logits/rejected": -1.3791062831878662, "logps/chosen": -434.0006408691406, "logps/rejected": -457.78350830078125, "loss": 0.0143, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13749387860298157, "rewards/margins": 0.07924045622348785, "rewards/rejected": -0.21673433482646942, "step": 3230 }, { "epoch": 0.21, "learning_rate": 4.811524615972093e-06, "logits/chosen": -1.539772868156433, "logits/rejected": -1.5294477939605713, "logps/chosen": -377.7463073730469, "logps/rejected": -465.9175720214844, "loss": 0.0467, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14781072735786438, "rewards/margins": 0.0799689069390297, "rewards/rejected": -0.22777962684631348, "step": 3240 }, { "epoch": 0.21, "learning_rate": 4.809343595395724e-06, "logits/chosen": -2.0052363872528076, "logits/rejected": -1.7364158630371094, "logps/chosen": -335.25421142578125, "logps/rejected": -355.6961669921875, "loss": 0.044, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14619852602481842, "rewards/margins": 0.0510307252407074, "rewards/rejected": -0.19722925126552582, "step": 3250 }, { "epoch": 0.21, "learning_rate": 4.807150528151918e-06, "logits/chosen": -1.5962978601455688, "logits/rejected": -1.4829938411712646, "logps/chosen": -341.68096923828125, "logps/rejected": -452.62078857421875, "loss": 0.0183, "rewards/accuracies": 0.625, "rewards/chosen": -0.17498841881752014, "rewards/margins": 0.09307295083999634, "rewards/rejected": -0.2680613398551941, "step": 3260 }, { "epoch": 0.21, "learning_rate": 4.804945425680787e-06, "logits/chosen": -1.601499319076538, "logits/rejected": -1.5964961051940918, "logps/chosen": -416.5970153808594, "logps/rejected": -444.89837646484375, "loss": 0.0322, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22815386950969696, "rewards/margins": 0.04837958887219429, "rewards/rejected": -0.27653342485427856, "step": 3270 }, { "epoch": 0.21, "learning_rate": 4.802728299485225e-06, "logits/chosen": -1.4276443719863892, "logits/rejected": -1.391061782836914, "logps/chosen": -368.29736328125, "logps/rejected": -430.25555419921875, "loss": 0.0337, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.21769294142723083, "rewards/margins": 0.04042655974626541, "rewards/rejected": -0.25811952352523804, "step": 3280 }, { "epoch": 0.22, "learning_rate": 4.8004991611308495e-06, "logits/chosen": -1.7569210529327393, "logits/rejected": -1.5368220806121826, "logps/chosen": -374.6806335449219, "logps/rejected": -434.1981506347656, "loss": 0.0127, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.133126899600029, "rewards/margins": 0.07556769251823425, "rewards/rejected": -0.20869460701942444, "step": 3290 }, { "epoch": 0.22, "learning_rate": 4.798258022245937e-06, "logits/chosen": -1.731288194656372, "logits/rejected": -1.439117193222046, "logps/chosen": -349.6397705078125, "logps/rejected": -379.30352783203125, "loss": 0.035, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13203327357769012, "rewards/margins": 0.06812886893749237, "rewards/rejected": -0.2001621276140213, "step": 3300 }, { "epoch": 0.22, "eval_logits/chosen": -1.6883187294006348, "eval_logits/rejected": -1.526490330696106, "eval_logps/chosen": -353.01788330078125, "eval_logps/rejected": -412.4005126953125, "eval_loss": 0.0257407259196043, "eval_rewards/accuracies": 0.6554999947547913, "eval_rewards/chosen": -0.12101297080516815, "eval_rewards/margins": 0.07977566123008728, "eval_rewards/rejected": -0.20078861713409424, "eval_runtime": 718.9454, "eval_samples_per_second": 2.782, "eval_steps_per_second": 1.391, "step": 3300 }, { "epoch": 0.22, "learning_rate": 4.796004894521365e-06, "logits/chosen": -1.7550973892211914, "logits/rejected": -1.485835313796997, "logps/chosen": -339.69219970703125, "logps/rejected": -450.79931640625, "loss": 0.0393, "rewards/accuracies": 0.5, "rewards/chosen": -0.11112241446971893, "rewards/margins": 0.08985067903995514, "rewards/rejected": -0.20097307860851288, "step": 3310 }, { "epoch": 0.22, "learning_rate": 4.7937397897105545e-06, "logits/chosen": -1.6924505233764648, "logits/rejected": -1.598737359046936, "logps/chosen": -313.4986267089844, "logps/rejected": -320.2013854980469, "loss": 0.0174, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.10826580226421356, "rewards/margins": 0.03191622346639633, "rewards/rejected": -0.1401820331811905, "step": 3320 }, { "epoch": 0.22, "learning_rate": 4.791462719629399e-06, "logits/chosen": -1.6165030002593994, "logits/rejected": -1.51338791847229, "logps/chosen": -283.0477600097656, "logps/rejected": -355.3376159667969, "loss": 0.0151, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09732256829738617, "rewards/margins": 0.09470139443874359, "rewards/rejected": -0.19202396273612976, "step": 3330 }, { "epoch": 0.22, "learning_rate": 4.789173696156212e-06, "logits/chosen": -1.676601767539978, "logits/rejected": -1.3383705615997314, "logps/chosen": -413.7373046875, "logps/rejected": -552.4971923828125, "loss": 0.0162, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13729673624038696, "rewards/margins": 0.15740932524204254, "rewards/rejected": -0.2947060465812683, "step": 3340 }, { "epoch": 0.22, "learning_rate": 4.786872731231662e-06, "logits/chosen": -1.7850911617279053, "logits/rejected": -1.6605621576309204, "logps/chosen": -364.562744140625, "logps/rejected": -422.9776916503906, "loss": 0.032, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14767178893089294, "rewards/margins": 0.06978397071361542, "rewards/rejected": -0.21745578944683075, "step": 3350 }, { "epoch": 0.22, "learning_rate": 4.784559836858709e-06, "logits/chosen": -1.7245270013809204, "logits/rejected": -1.3981090784072876, "logps/chosen": -322.774169921875, "logps/rejected": -365.6064147949219, "loss": 0.0156, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08885706216096878, "rewards/margins": 0.07325319945812225, "rewards/rejected": -0.16211025416851044, "step": 3360 }, { "epoch": 0.22, "learning_rate": 4.782235025102542e-06, "logits/chosen": -1.783856987953186, "logits/rejected": -1.7142223119735718, "logps/chosen": -325.214599609375, "logps/rejected": -368.7933349609375, "loss": 0.0323, "rewards/accuracies": 0.625, "rewards/chosen": -0.09889528900384903, "rewards/margins": 0.061059266328811646, "rewards/rejected": -0.15995456278324127, "step": 3370 }, { "epoch": 0.22, "learning_rate": 4.779898308090519e-06, "logits/chosen": -1.7646636962890625, "logits/rejected": -1.5503871440887451, "logps/chosen": -356.7688903808594, "logps/rejected": -392.7975158691406, "loss": 0.0348, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08209162950515747, "rewards/margins": 0.07285936176776886, "rewards/rejected": -0.15495099127292633, "step": 3380 }, { "epoch": 0.22, "learning_rate": 4.777549698012101e-06, "logits/chosen": -1.5743420124053955, "logits/rejected": -1.469313383102417, "logps/chosen": -375.27203369140625, "logps/rejected": -453.04217529296875, "loss": 0.0197, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.132055401802063, "rewards/margins": 0.09692979604005814, "rewards/rejected": -0.22898522019386292, "step": 3390 }, { "epoch": 0.22, "learning_rate": 4.775189207118787e-06, "logits/chosen": -1.5084998607635498, "logits/rejected": -1.37344229221344, "logps/chosen": -395.0802307128906, "logps/rejected": -464.81494140625, "loss": 0.0319, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12326253950595856, "rewards/margins": 0.09207239747047424, "rewards/rejected": -0.2153349369764328, "step": 3400 }, { "epoch": 0.22, "eval_logits/chosen": -1.5692068338394165, "eval_logits/rejected": -1.4125865697860718, "eval_logps/chosen": -369.19439697265625, "eval_logps/rejected": -426.3359680175781, "eval_loss": 0.026329396292567253, "eval_rewards/accuracies": 0.6514999866485596, "eval_rewards/chosen": -0.13718947768211365, "eval_rewards/margins": 0.07753460109233856, "eval_rewards/rejected": -0.2147240936756134, "eval_runtime": 715.6206, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.397, "step": 3400 }, { "epoch": 0.22, "learning_rate": 4.772816847724054e-06, "logits/chosen": -1.6219704151153564, "logits/rejected": -1.5611522197723389, "logps/chosen": -352.13653564453125, "logps/rejected": -406.34002685546875, "loss": 0.0494, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12958194315433502, "rewards/margins": 0.054705776274204254, "rewards/rejected": -0.18428772687911987, "step": 3410 }, { "epoch": 0.22, "learning_rate": 4.770432632203294e-06, "logits/chosen": -1.3276927471160889, "logits/rejected": -1.2615364789962769, "logps/chosen": -363.6788635253906, "logps/rejected": -354.28448486328125, "loss": 0.0213, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12003171443939209, "rewards/margins": 0.040923960506916046, "rewards/rejected": -0.16095566749572754, "step": 3420 }, { "epoch": 0.22, "learning_rate": 4.768036572993738e-06, "logits/chosen": -1.5845472812652588, "logits/rejected": -1.5167104005813599, "logps/chosen": -398.7275695800781, "logps/rejected": -445.26666259765625, "loss": 0.0194, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11870861053466797, "rewards/margins": 0.062138963490724564, "rewards/rejected": -0.18084758520126343, "step": 3430 }, { "epoch": 0.23, "learning_rate": 4.765628682594409e-06, "logits/chosen": -1.651971459388733, "logits/rejected": -1.511574625968933, "logps/chosen": -346.2164611816406, "logps/rejected": -390.0993347167969, "loss": 0.026, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10245941579341888, "rewards/margins": 0.0677853375673294, "rewards/rejected": -0.17024476826190948, "step": 3440 }, { "epoch": 0.23, "learning_rate": 4.763208973566041e-06, "logits/chosen": -1.5378526449203491, "logits/rejected": -1.4347686767578125, "logps/chosen": -319.76324462890625, "logps/rejected": -418.73834228515625, "loss": 0.0114, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13563355803489685, "rewards/margins": 0.08025987446308136, "rewards/rejected": -0.21589341759681702, "step": 3450 }, { "epoch": 0.23, "learning_rate": 4.76077745853102e-06, "logits/chosen": -1.738347053527832, "logits/rejected": -1.6669028997421265, "logps/chosen": -408.7169494628906, "logps/rejected": -488.80877685546875, "loss": 0.021, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15866276621818542, "rewards/margins": 0.07896674424409866, "rewards/rejected": -0.2376295030117035, "step": 3460 }, { "epoch": 0.23, "learning_rate": 4.758334150173322e-06, "logits/chosen": -1.5776073932647705, "logits/rejected": -1.4525458812713623, "logps/chosen": -377.888671875, "logps/rejected": -415.124267578125, "loss": 0.0222, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11480472981929779, "rewards/margins": 0.06228917837142944, "rewards/rejected": -0.17709390819072723, "step": 3470 }, { "epoch": 0.23, "learning_rate": 4.755879061238439e-06, "logits/chosen": -1.7227039337158203, "logits/rejected": -1.5206025838851929, "logps/chosen": -384.97589111328125, "logps/rejected": -417.37799072265625, "loss": 0.0204, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12880250811576843, "rewards/margins": 0.04503921791911125, "rewards/rejected": -0.17384172976016998, "step": 3480 }, { "epoch": 0.23, "learning_rate": 4.753412204533317e-06, "logits/chosen": -1.8658301830291748, "logits/rejected": -1.3638795614242554, "logps/chosen": -395.65032958984375, "logps/rejected": -444.8770446777344, "loss": 0.0143, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13279227912425995, "rewards/margins": 0.09461811184883118, "rewards/rejected": -0.22741039097309113, "step": 3490 }, { "epoch": 0.23, "learning_rate": 4.750933592926292e-06, "logits/chosen": -1.7066158056259155, "logits/rejected": -1.3803119659423828, "logps/chosen": -388.97735595703125, "logps/rejected": -450.00494384765625, "loss": 0.0257, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17024967074394226, "rewards/margins": 0.08824402093887329, "rewards/rejected": -0.25849369168281555, "step": 3500 }, { "epoch": 0.23, "eval_logits/chosen": -1.5721629858016968, "eval_logits/rejected": -1.416274905204773, "eval_logps/chosen": -398.126220703125, "eval_logps/rejected": -454.50531005859375, "eval_loss": 0.025550194084644318, "eval_rewards/accuracies": 0.6549999713897705, "eval_rewards/chosen": -0.166121244430542, "eval_rewards/margins": 0.07677214592695236, "eval_rewards/rejected": -0.24289338290691376, "eval_runtime": 718.9843, "eval_samples_per_second": 2.782, "eval_steps_per_second": 1.391, "step": 3500 }, { "epoch": 0.23, "learning_rate": 4.7484432393470124e-06, "logits/chosen": -1.7730098962783813, "logits/rejected": -1.2713485956192017, "logps/chosen": -367.51861572265625, "logps/rejected": -437.786865234375, "loss": 0.0293, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16543671488761902, "rewards/margins": 0.125665083527565, "rewards/rejected": -0.2911017835140228, "step": 3510 }, { "epoch": 0.23, "learning_rate": 4.745941156786385e-06, "logits/chosen": -1.3074201345443726, "logits/rejected": -1.3363656997680664, "logps/chosen": -350.04833984375, "logps/rejected": -536.9381713867188, "loss": 0.0478, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19700928032398224, "rewards/margins": 0.15604856610298157, "rewards/rejected": -0.3530578315258026, "step": 3520 }, { "epoch": 0.23, "learning_rate": 4.743427358296497e-06, "logits/chosen": -1.5538303852081299, "logits/rejected": -1.356993317604065, "logps/chosen": -389.4168395996094, "logps/rejected": -584.497314453125, "loss": 0.0371, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19970861077308655, "rewards/margins": 0.18377310037612915, "rewards/rejected": -0.3834817111492157, "step": 3530 }, { "epoch": 0.23, "learning_rate": 4.740901856990553e-06, "logits/chosen": -1.5904241800308228, "logits/rejected": -1.4152660369873047, "logps/chosen": -367.8504638671875, "logps/rejected": -392.91839599609375, "loss": 0.0421, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11115854978561401, "rewards/margins": 0.06914053857326508, "rewards/rejected": -0.1802990883588791, "step": 3540 }, { "epoch": 0.23, "learning_rate": 4.738364666042804e-06, "logits/chosen": -1.881345510482788, "logits/rejected": -1.502852201461792, "logps/chosen": -355.54150390625, "logps/rejected": -366.6114196777344, "loss": 0.0242, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0637965202331543, "rewards/margins": 0.07470977306365967, "rewards/rejected": -0.13850629329681396, "step": 3550 }, { "epoch": 0.23, "learning_rate": 4.735815798688483e-06, "logits/chosen": -1.7680435180664062, "logits/rejected": -1.5473442077636719, "logps/chosen": -290.61358642578125, "logps/rejected": -402.99066162109375, "loss": 0.0173, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09228159487247467, "rewards/margins": 0.09223800152540207, "rewards/rejected": -0.18451958894729614, "step": 3560 }, { "epoch": 0.23, "learning_rate": 4.7332552682237285e-06, "logits/chosen": -1.6272271871566772, "logits/rejected": -1.2562711238861084, "logps/chosen": -295.3713684082031, "logps/rejected": -391.9466857910156, "loss": 0.021, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12161143869161606, "rewards/margins": 0.11234778165817261, "rewards/rejected": -0.23395919799804688, "step": 3570 }, { "epoch": 0.23, "learning_rate": 4.7306830880055234e-06, "logits/chosen": -1.5470902919769287, "logits/rejected": -1.4845939874649048, "logps/chosen": -412.5762634277344, "logps/rejected": -512.0144653320312, "loss": 0.0157, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2218424379825592, "rewards/margins": 0.09110113978385925, "rewards/rejected": -0.31294357776641846, "step": 3580 }, { "epoch": 0.23, "learning_rate": 4.728099271451619e-06, "logits/chosen": -1.5043888092041016, "logits/rejected": -1.4955155849456787, "logps/chosen": -415.22161865234375, "logps/rejected": -506.2706604003906, "loss": 0.0296, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.224213644862175, "rewards/margins": 0.09800152480602264, "rewards/rejected": -0.32221516966819763, "step": 3590 }, { "epoch": 0.24, "learning_rate": 4.725503832040466e-06, "logits/chosen": -1.363813877105713, "logits/rejected": -1.2473770380020142, "logps/chosen": -348.1240234375, "logps/rejected": -457.43377685546875, "loss": 0.0275, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1989661306142807, "rewards/margins": 0.0832224115729332, "rewards/rejected": -0.2821885645389557, "step": 3600 }, { "epoch": 0.24, "eval_logits/chosen": -1.5261356830596924, "eval_logits/rejected": -1.3716883659362793, "eval_logps/chosen": -403.8748779296875, "eval_logps/rejected": -474.4635009765625, "eval_loss": 0.026187097653746605, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.17186996340751648, "eval_rewards/margins": 0.09098166227340698, "eval_rewards/rejected": -0.26285162568092346, "eval_runtime": 714.3713, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 3600 }, { "epoch": 0.24, "learning_rate": 4.722896783311152e-06, "logits/chosen": -1.5962718725204468, "logits/rejected": -1.4725377559661865, "logps/chosen": -421.6626892089844, "logps/rejected": -536.9539794921875, "loss": 0.0381, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16261371970176697, "rewards/margins": 0.06415730714797974, "rewards/rejected": -0.2267710417509079, "step": 3610 }, { "epoch": 0.24, "learning_rate": 4.720278138863318e-06, "logits/chosen": -1.778754472732544, "logits/rejected": -1.6733324527740479, "logps/chosen": -297.1551818847656, "logps/rejected": -315.61602783203125, "loss": 0.0366, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.10703345388174057, "rewards/margins": 0.05026799440383911, "rewards/rejected": -0.15730145573616028, "step": 3620 }, { "epoch": 0.24, "learning_rate": 4.717647912357095e-06, "logits/chosen": -1.8943226337432861, "logits/rejected": -1.9697908163070679, "logps/chosen": -366.98248291015625, "logps/rejected": -383.81658935546875, "loss": 0.0233, "rewards/accuracies": 0.5, "rewards/chosen": -0.09450535476207733, "rewards/margins": 0.0039016217924654484, "rewards/rejected": -0.09840697795152664, "step": 3630 }, { "epoch": 0.24, "learning_rate": 4.715006117513035e-06, "logits/chosen": -2.034503221511841, "logits/rejected": -1.8429596424102783, "logps/chosen": -365.08599853515625, "logps/rejected": -365.51361083984375, "loss": 0.0333, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.040960345417261124, "rewards/margins": 0.05560765415430069, "rewards/rejected": -0.09656800329685211, "step": 3640 }, { "epoch": 0.24, "learning_rate": 4.7123527681120326e-06, "logits/chosen": -1.8327289819717407, "logits/rejected": -1.678963303565979, "logps/chosen": -299.1856994628906, "logps/rejected": -331.3011779785156, "loss": 0.0373, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0520145408809185, "rewards/margins": 0.06026715785264969, "rewards/rejected": -0.11228171736001968, "step": 3650 }, { "epoch": 0.24, "learning_rate": 4.7096878779952594e-06, "logits/chosen": -1.8556129932403564, "logits/rejected": -1.8009693622589111, "logps/chosen": -381.96612548828125, "logps/rejected": -435.67205810546875, "loss": 0.0176, "rewards/accuracies": 0.75, "rewards/chosen": -0.10655276477336884, "rewards/margins": 0.05628864839673042, "rewards/rejected": -0.16284143924713135, "step": 3660 }, { "epoch": 0.24, "learning_rate": 4.707011461064086e-06, "logits/chosen": -1.5580644607543945, "logits/rejected": -1.3014835119247437, "logps/chosen": -417.1654357910156, "logps/rejected": -453.55938720703125, "loss": 0.0329, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1083063930273056, "rewards/margins": 0.08058464527130127, "rewards/rejected": -0.18889105319976807, "step": 3670 }, { "epoch": 0.24, "learning_rate": 4.704323531280016e-06, "logits/chosen": -1.4654561281204224, "logits/rejected": -1.358193278312683, "logps/chosen": -445.32745361328125, "logps/rejected": -434.67059326171875, "loss": 0.0156, "rewards/accuracies": 0.75, "rewards/chosen": -0.11811591684818268, "rewards/margins": 0.07253879308700562, "rewards/rejected": -0.1906547248363495, "step": 3680 }, { "epoch": 0.24, "learning_rate": 4.701624102664606e-06, "logits/chosen": -1.68216872215271, "logits/rejected": -1.4499469995498657, "logps/chosen": -452.471923828125, "logps/rejected": -483.283447265625, "loss": 0.0231, "rewards/accuracies": 0.625, "rewards/chosen": -0.19135603308677673, "rewards/margins": 0.08491180092096329, "rewards/rejected": -0.2762678265571594, "step": 3690 }, { "epoch": 0.24, "learning_rate": 4.698913189299399e-06, "logits/chosen": -1.565720796585083, "logits/rejected": -1.5686825513839722, "logps/chosen": -379.4205017089844, "logps/rejected": -467.1686096191406, "loss": 0.0367, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19326870143413544, "rewards/margins": 0.05524136498570442, "rewards/rejected": -0.24851007759571075, "step": 3700 }, { "epoch": 0.24, "eval_logits/chosen": -1.5758051872253418, "eval_logits/rejected": -1.4203293323516846, "eval_logps/chosen": -404.56427001953125, "eval_logps/rejected": -463.46734619140625, "eval_loss": 0.026558561250567436, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.17255933582782745, "eval_rewards/margins": 0.07929609715938568, "eval_rewards/rejected": -0.25185543298721313, "eval_runtime": 715.8551, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 3700 }, { "epoch": 0.24, "learning_rate": 4.696190805325847e-06, "logits/chosen": -1.6376221179962158, "logits/rejected": -1.5006821155548096, "logps/chosen": -351.1948547363281, "logps/rejected": -410.2684631347656, "loss": 0.015, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14481601119041443, "rewards/margins": 0.0861922949552536, "rewards/rejected": -0.23100832104682922, "step": 3710 }, { "epoch": 0.24, "learning_rate": 4.693456964945239e-06, "logits/chosen": -1.7421514987945557, "logits/rejected": -1.338126540184021, "logps/chosen": -419.68304443359375, "logps/rejected": -391.30072021484375, "loss": 0.0463, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12157756090164185, "rewards/margins": 0.07270988076925278, "rewards/rejected": -0.19428744912147522, "step": 3720 }, { "epoch": 0.24, "learning_rate": 4.6907116824186245e-06, "logits/chosen": -1.6877094507217407, "logits/rejected": -1.6332886219024658, "logps/chosen": -347.43511962890625, "logps/rejected": -398.9467468261719, "loss": 0.0246, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12010757625102997, "rewards/margins": 0.05170787125825882, "rewards/rejected": -0.1718154400587082, "step": 3730 }, { "epoch": 0.24, "learning_rate": 4.687954972066742e-06, "logits/chosen": -1.5253454446792603, "logits/rejected": -1.363525629043579, "logps/chosen": -418.96868896484375, "logps/rejected": -525.2415161132812, "loss": 0.0346, "rewards/accuracies": 0.75, "rewards/chosen": -0.18915246427059174, "rewards/margins": 0.1271437704563141, "rewards/rejected": -0.316296249628067, "step": 3740 }, { "epoch": 0.25, "learning_rate": 4.685186848269944e-06, "logits/chosen": -1.4734838008880615, "logits/rejected": -1.311118245124817, "logps/chosen": -431.68994140625, "logps/rejected": -473.01397705078125, "loss": 0.0462, "rewards/accuracies": 0.625, "rewards/chosen": -0.2198176383972168, "rewards/margins": 0.0787028968334198, "rewards/rejected": -0.2985205352306366, "step": 3750 }, { "epoch": 0.25, "learning_rate": 4.682407325468119e-06, "logits/chosen": -1.6313402652740479, "logits/rejected": -1.3151494264602661, "logps/chosen": -361.9095458984375, "logps/rejected": -433.63739013671875, "loss": 0.0178, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1444905400276184, "rewards/margins": 0.10535760223865509, "rewards/rejected": -0.2498481273651123, "step": 3760 }, { "epoch": 0.25, "learning_rate": 4.67961641816062e-06, "logits/chosen": -1.7059803009033203, "logits/rejected": -1.5351479053497314, "logps/chosen": -368.4649963378906, "logps/rejected": -390.50994873046875, "loss": 0.0271, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09266825020313263, "rewards/margins": 0.0633750781416893, "rewards/rejected": -0.15604332089424133, "step": 3770 }, { "epoch": 0.25, "learning_rate": 4.676814140906188e-06, "logits/chosen": -1.6113401651382446, "logits/rejected": -1.506805658340454, "logps/chosen": -332.47088623046875, "logps/rejected": -354.0028381347656, "loss": 0.0304, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09293471276760101, "rewards/margins": 0.052137620747089386, "rewards/rejected": -0.145072340965271, "step": 3780 }, { "epoch": 0.25, "learning_rate": 4.674000508322872e-06, "logits/chosen": -1.4703335762023926, "logits/rejected": -1.566943883895874, "logps/chosen": -284.104736328125, "logps/rejected": -353.2925720214844, "loss": 0.0217, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06656982749700546, "rewards/margins": 0.06000424176454544, "rewards/rejected": -0.1265740692615509, "step": 3790 }, { "epoch": 0.25, "learning_rate": 4.671175535087959e-06, "logits/chosen": -1.6811034679412842, "logits/rejected": -1.6779899597167969, "logps/chosen": -369.7184753417969, "logps/rejected": -457.66693115234375, "loss": 0.0357, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07839083671569824, "rewards/margins": 0.09118005633354187, "rewards/rejected": -0.1695708930492401, "step": 3800 }, { "epoch": 0.25, "eval_logits/chosen": -1.6888610124588013, "eval_logits/rejected": -1.5307316780090332, "eval_logps/chosen": -302.4371337890625, "eval_logps/rejected": -350.2819519042969, "eval_loss": 0.025977861136198044, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.0704321637749672, "eval_rewards/margins": 0.06823787838220596, "eval_rewards/rejected": -0.13867002725601196, "eval_runtime": 714.195, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 3800 }, { "epoch": 0.25, "learning_rate": 4.6683392359378924e-06, "logits/chosen": -1.5835708379745483, "logits/rejected": -1.424617052078247, "logps/chosen": -301.4721984863281, "logps/rejected": -340.4933166503906, "loss": 0.0119, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0663231760263443, "rewards/margins": 0.06453759968280792, "rewards/rejected": -0.13086077570915222, "step": 3810 }, { "epoch": 0.25, "learning_rate": 4.665491625668198e-06, "logits/chosen": -1.401033639907837, "logits/rejected": -1.4427587985992432, "logps/chosen": -260.38079833984375, "logps/rejected": -370.03289794921875, "loss": 0.028, "rewards/accuracies": 0.625, "rewards/chosen": -0.10413353145122528, "rewards/margins": 0.08609706163406372, "rewards/rejected": -0.1902305781841278, "step": 3820 }, { "epoch": 0.25, "learning_rate": 4.662632719133407e-06, "logits/chosen": -1.6172155141830444, "logits/rejected": -1.3668307065963745, "logps/chosen": -310.74346923828125, "logps/rejected": -306.8648681640625, "loss": 0.023, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07994594424962997, "rewards/margins": 0.06312881410121918, "rewards/rejected": -0.14307476580142975, "step": 3830 }, { "epoch": 0.25, "learning_rate": 4.659762531246974e-06, "logits/chosen": -1.4214946031570435, "logits/rejected": -1.3595560789108276, "logps/chosen": -355.81390380859375, "logps/rejected": -370.12353515625, "loss": 0.0274, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.1412351429462433, "rewards/margins": 0.04826633259654045, "rewards/rejected": -0.18950147926807404, "step": 3840 }, { "epoch": 0.25, "learning_rate": 4.656881076981207e-06, "logits/chosen": -1.5625451803207397, "logits/rejected": -1.4436548948287964, "logps/chosen": -340.0423889160156, "logps/rejected": -372.79901123046875, "loss": 0.0338, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12685899436473846, "rewards/margins": 0.05065537244081497, "rewards/rejected": -0.17751434445381165, "step": 3850 }, { "epoch": 0.25, "learning_rate": 4.653988371367183e-06, "logits/chosen": -1.48854660987854, "logits/rejected": -1.2074941396713257, "logps/chosen": -355.6903076171875, "logps/rejected": -338.88482666015625, "loss": 0.0357, "rewards/accuracies": 0.625, "rewards/chosen": -0.11488630622625351, "rewards/margins": 0.0444025881588459, "rewards/rejected": -0.15928888320922852, "step": 3860 }, { "epoch": 0.25, "learning_rate": 4.651084429494671e-06, "logits/chosen": -1.4786748886108398, "logits/rejected": -1.2028788328170776, "logps/chosen": -407.66796875, "logps/rejected": -382.55609130859375, "loss": 0.0146, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13231699168682098, "rewards/margins": 0.05441192537546158, "rewards/rejected": -0.18672892451286316, "step": 3870 }, { "epoch": 0.25, "learning_rate": 4.648169266512053e-06, "logits/chosen": -1.5586563348770142, "logits/rejected": -1.286772608757019, "logps/chosen": -355.00738525390625, "logps/rejected": -397.573486328125, "loss": 0.0151, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12967927753925323, "rewards/margins": 0.08752022683620453, "rewards/rejected": -0.21719948947429657, "step": 3880 }, { "epoch": 0.25, "learning_rate": 4.6452428976262505e-06, "logits/chosen": -1.2812411785125732, "logits/rejected": -1.0025954246520996, "logps/chosen": -341.2714538574219, "logps/rejected": -426.65106201171875, "loss": 0.0251, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1374800056219101, "rewards/margins": 0.12970241904258728, "rewards/rejected": -0.26718243956565857, "step": 3890 }, { "epoch": 0.26, "learning_rate": 4.642305338102633e-06, "logits/chosen": -1.1453635692596436, "logits/rejected": -1.3280820846557617, "logps/chosen": -337.386962890625, "logps/rejected": -429.5384826660156, "loss": 0.0249, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1756374090909958, "rewards/margins": 0.07400581985712051, "rewards/rejected": -0.2496432363986969, "step": 3900 }, { "epoch": 0.26, "eval_logits/chosen": -1.2148537635803223, "eval_logits/rejected": -1.0766971111297607, "eval_logps/chosen": -432.31280517578125, "eval_logps/rejected": -502.6758117675781, "eval_loss": 0.025551505386829376, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -0.20030786097049713, "eval_rewards/margins": 0.09075606614351273, "eval_rewards/rejected": -0.29106393456459045, "eval_runtime": 714.9626, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.399, "step": 3900 }, { "epoch": 0.26, "learning_rate": 4.639356603264953e-06, "logits/chosen": -1.2749978303909302, "logits/rejected": -1.2009389400482178, "logps/chosen": -438.8550720214844, "logps/rejected": -469.20782470703125, "loss": 0.0147, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19702328741550446, "rewards/margins": 0.0560835599899292, "rewards/rejected": -0.25310683250427246, "step": 3910 }, { "epoch": 0.26, "learning_rate": 4.636396708495255e-06, "logits/chosen": -1.0566312074661255, "logits/rejected": -1.0134824514389038, "logps/chosen": -403.2691955566406, "logps/rejected": -456.4483337402344, "loss": 0.0154, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17407411336898804, "rewards/margins": 0.07802309840917587, "rewards/rejected": -0.2520971894264221, "step": 3920 }, { "epoch": 0.26, "learning_rate": 4.633425669233799e-06, "logits/chosen": -1.4404993057250977, "logits/rejected": -1.367196798324585, "logps/chosen": -403.69573974609375, "logps/rejected": -469.69354248046875, "loss": 0.0235, "rewards/accuracies": 0.625, "rewards/chosen": -0.16886918246746063, "rewards/margins": 0.07044507563114166, "rewards/rejected": -0.2393142729997635, "step": 3930 }, { "epoch": 0.26, "learning_rate": 4.6304435009789825e-06, "logits/chosen": -1.446575403213501, "logits/rejected": -1.1691230535507202, "logps/chosen": -377.27392578125, "logps/rejected": -399.2811279296875, "loss": 0.022, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13976678252220154, "rewards/margins": 0.09291701018810272, "rewards/rejected": -0.23268374800682068, "step": 3940 }, { "epoch": 0.26, "learning_rate": 4.627450219287256e-06, "logits/chosen": -1.4396696090698242, "logits/rejected": -1.3591365814208984, "logps/chosen": -316.14276123046875, "logps/rejected": -379.31890869140625, "loss": 0.0259, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13458362221717834, "rewards/margins": 0.08568727225065231, "rewards/rejected": -0.22027090191841125, "step": 3950 }, { "epoch": 0.26, "learning_rate": 4.624445839773042e-06, "logits/chosen": -1.2288976907730103, "logits/rejected": -1.2009155750274658, "logps/chosen": -338.10137939453125, "logps/rejected": -372.7433166503906, "loss": 0.0557, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1680900752544403, "rewards/margins": 0.03570578247308731, "rewards/rejected": -0.20379586517810822, "step": 3960 }, { "epoch": 0.26, "learning_rate": 4.621430378108656e-06, "logits/chosen": -1.3681949377059937, "logits/rejected": -1.1702930927276611, "logps/chosen": -469.4034118652344, "logps/rejected": -559.2224731445312, "loss": 0.0146, "rewards/accuracies": 0.75, "rewards/chosen": -0.21164508163928986, "rewards/margins": 0.09658339619636536, "rewards/rejected": -0.30822843313217163, "step": 3970 }, { "epoch": 0.26, "learning_rate": 4.618403850024223e-06, "logits/chosen": -1.2393152713775635, "logits/rejected": -1.1041834354400635, "logps/chosen": -416.4683532714844, "logps/rejected": -428.821533203125, "loss": 0.0277, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16097818315029144, "rewards/margins": 0.05901552364230156, "rewards/rejected": -0.2199936807155609, "step": 3980 }, { "epoch": 0.26, "learning_rate": 4.615366271307598e-06, "logits/chosen": -1.4692407846450806, "logits/rejected": -1.3024734258651733, "logps/chosen": -362.8622131347656, "logps/rejected": -416.260498046875, "loss": 0.0212, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17091922461986542, "rewards/margins": 0.0652906745672226, "rewards/rejected": -0.236209899187088, "step": 3990 }, { "epoch": 0.26, "learning_rate": 4.612317657804277e-06, "logits/chosen": -1.403191328048706, "logits/rejected": -1.4055851697921753, "logps/chosen": -344.8889465332031, "logps/rejected": -478.85528564453125, "loss": 0.0496, "rewards/accuracies": 0.5, "rewards/chosen": -0.19765588641166687, "rewards/margins": 0.08261851221323013, "rewards/rejected": -0.2802744209766388, "step": 4000 }, { "epoch": 0.26, "eval_logits/chosen": -1.4356458187103271, "eval_logits/rejected": -1.287010669708252, "eval_logps/chosen": -402.0155944824219, "eval_logps/rejected": -466.59539794921875, "eval_loss": 0.024634743109345436, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.17001065611839294, "eval_rewards/margins": 0.08497288823127747, "eval_rewards/rejected": -0.2549835443496704, "eval_runtime": 715.5856, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.397, "step": 4000 }, { "epoch": 0.26, "learning_rate": 4.6092580254173236e-06, "logits/chosen": -1.2963931560516357, "logits/rejected": -1.1828718185424805, "logps/chosen": -440.81201171875, "logps/rejected": -524.130126953125, "loss": 0.0299, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18348929286003113, "rewards/margins": 0.10198618471622467, "rewards/rejected": -0.2854754626750946, "step": 4010 }, { "epoch": 0.26, "learning_rate": 4.606187390107277e-06, "logits/chosen": -1.3276742696762085, "logits/rejected": -1.2177103757858276, "logps/chosen": -402.9412841796875, "logps/rejected": -442.60870361328125, "loss": 0.0356, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17806589603424072, "rewards/margins": 0.07866012305021286, "rewards/rejected": -0.25672605633735657, "step": 4020 }, { "epoch": 0.26, "learning_rate": 4.603105767892077e-06, "logits/chosen": -1.569495439529419, "logits/rejected": -1.4920079708099365, "logps/chosen": -323.8802795410156, "logps/rejected": -413.13104248046875, "loss": 0.016, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12919025123119354, "rewards/margins": 0.0712975263595581, "rewards/rejected": -0.20048777759075165, "step": 4030 }, { "epoch": 0.26, "learning_rate": 4.6000131748469725e-06, "logits/chosen": -1.575714349746704, "logits/rejected": -1.3403669595718384, "logps/chosen": -356.54840087890625, "logps/rejected": -347.47711181640625, "loss": 0.0487, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10640816390514374, "rewards/margins": 0.06524702161550522, "rewards/rejected": -0.17165519297122955, "step": 4040 }, { "epoch": 0.26, "learning_rate": 4.596909627104445e-06, "logits/chosen": -1.7491534948349, "logits/rejected": -1.6122124195098877, "logps/chosen": -403.44500732421875, "logps/rejected": -447.87286376953125, "loss": 0.0138, "rewards/accuracies": 0.625, "rewards/chosen": -0.15470674633979797, "rewards/margins": 0.07706204801797867, "rewards/rejected": -0.23176880180835724, "step": 4050 }, { "epoch": 0.27, "learning_rate": 4.5937951408541215e-06, "logits/chosen": -1.6188724040985107, "logits/rejected": -1.1481155157089233, "logps/chosen": -429.2108459472656, "logps/rejected": -500.86962890625, "loss": 0.0288, "rewards/accuracies": 0.75, "rewards/chosen": -0.17657236754894257, "rewards/margins": 0.1170358881354332, "rewards/rejected": -0.29360824823379517, "step": 4060 }, { "epoch": 0.27, "learning_rate": 4.590669732342685e-06, "logits/chosen": -1.2865307331085205, "logits/rejected": -1.1235215663909912, "logps/chosen": -386.5255126953125, "logps/rejected": -471.8382873535156, "loss": 0.0534, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1743779331445694, "rewards/margins": 0.08342008292675018, "rewards/rejected": -0.2577980160713196, "step": 4070 }, { "epoch": 0.27, "learning_rate": 4.587533417873799e-06, "logits/chosen": -1.3697569370269775, "logits/rejected": -1.3195903301239014, "logps/chosen": -416.93292236328125, "logps/rejected": -551.0460205078125, "loss": 0.0132, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2237689048051834, "rewards/margins": 0.07440945506095886, "rewards/rejected": -0.29817837476730347, "step": 4080 }, { "epoch": 0.27, "learning_rate": 4.584386213808016e-06, "logits/chosen": -1.248326063156128, "logits/rejected": -1.086198091506958, "logps/chosen": -407.97381591796875, "logps/rejected": -403.442626953125, "loss": 0.0531, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18629486858844757, "rewards/margins": 0.041244909167289734, "rewards/rejected": -0.2275397777557373, "step": 4090 }, { "epoch": 0.27, "learning_rate": 4.581228136562693e-06, "logits/chosen": -1.3161365985870361, "logits/rejected": -1.285868525505066, "logps/chosen": -402.3750305175781, "logps/rejected": -405.1878967285156, "loss": 0.0166, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16419456899166107, "rewards/margins": 0.028497496619820595, "rewards/rejected": -0.19269205629825592, "step": 4100 }, { "epoch": 0.27, "eval_logits/chosen": -1.3467671871185303, "eval_logits/rejected": -1.205845594406128, "eval_logps/chosen": -415.33544921875, "eval_logps/rejected": -457.4212646484375, "eval_loss": 0.027326496317982674, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -0.1833304762840271, "eval_rewards/margins": 0.06247887760400772, "eval_rewards/rejected": -0.24580936133861542, "eval_runtime": 715.1078, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 4100 }, { "epoch": 0.27, "learning_rate": 4.578059202611909e-06, "logits/chosen": -1.3168730735778809, "logits/rejected": -1.234607458114624, "logps/chosen": -429.9736328125, "logps/rejected": -452.296875, "loss": 0.0232, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1728147268295288, "rewards/margins": 0.03707920387387276, "rewards/rejected": -0.20989394187927246, "step": 4110 }, { "epoch": 0.27, "learning_rate": 4.574879428486376e-06, "logits/chosen": -1.296732783317566, "logits/rejected": -1.3045345544815063, "logps/chosen": -418.42547607421875, "logps/rejected": -473.14697265625, "loss": 0.0133, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2055089771747589, "rewards/margins": 0.05764361470937729, "rewards/rejected": -0.2631525993347168, "step": 4120 }, { "epoch": 0.27, "learning_rate": 4.571688830773352e-06, "logits/chosen": -1.430281400680542, "logits/rejected": -1.3226784467697144, "logps/chosen": -389.53857421875, "logps/rejected": -405.88995361328125, "loss": 0.0291, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.16747038066387177, "rewards/margins": 0.0362304151058197, "rewards/rejected": -0.20370078086853027, "step": 4130 }, { "epoch": 0.27, "learning_rate": 4.568487426116559e-06, "logits/chosen": -1.2957525253295898, "logits/rejected": -1.2123627662658691, "logps/chosen": -325.9900207519531, "logps/rejected": -360.6960754394531, "loss": 0.0385, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15401820838451385, "rewards/margins": 0.042239442467689514, "rewards/rejected": -0.19625765085220337, "step": 4140 }, { "epoch": 0.27, "learning_rate": 4.565275231216092e-06, "logits/chosen": -1.1298608779907227, "logits/rejected": -1.1379327774047852, "logps/chosen": -278.19586181640625, "logps/rejected": -372.4629821777344, "loss": 0.022, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1268523782491684, "rewards/margins": 0.04827722907066345, "rewards/rejected": -0.17512962222099304, "step": 4150 }, { "epoch": 0.27, "learning_rate": 4.562052262828331e-06, "logits/chosen": -1.405649185180664, "logits/rejected": -1.3019465208053589, "logps/chosen": -350.8763732910156, "logps/rejected": -423.3707580566406, "loss": 0.0384, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15206533670425415, "rewards/margins": 0.07777031511068344, "rewards/rejected": -0.2298356294631958, "step": 4160 }, { "epoch": 0.27, "learning_rate": 4.558818537765861e-06, "logits/chosen": -1.6493561267852783, "logits/rejected": -1.225176453590393, "logps/chosen": -393.669677734375, "logps/rejected": -436.1175231933594, "loss": 0.0394, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15765532851219177, "rewards/margins": 0.07731587439775467, "rewards/rejected": -0.23497121036052704, "step": 4170 }, { "epoch": 0.27, "learning_rate": 4.555574072897374e-06, "logits/chosen": -1.4597662687301636, "logits/rejected": -1.5101134777069092, "logps/chosen": -348.5613098144531, "logps/rejected": -432.7642517089844, "loss": 0.0268, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14660003781318665, "rewards/margins": 0.08647465705871582, "rewards/rejected": -0.23307469487190247, "step": 4180 }, { "epoch": 0.27, "learning_rate": 4.552318885147589e-06, "logits/chosen": -1.701123595237732, "logits/rejected": -1.3631237745285034, "logps/chosen": -377.8849182128906, "logps/rejected": -410.96295166015625, "loss": 0.0276, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.136967271566391, "rewards/margins": 0.09246762096881866, "rewards/rejected": -0.22943489253520966, "step": 4190 }, { "epoch": 0.27, "learning_rate": 4.549052991497159e-06, "logits/chosen": -1.5439984798431396, "logits/rejected": -1.4903287887573242, "logps/chosen": -350.50823974609375, "logps/rejected": -410.6221618652344, "loss": 0.0257, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17034107446670532, "rewards/margins": 0.059695053845644, "rewards/rejected": -0.23003613948822021, "step": 4200 }, { "epoch": 0.27, "eval_logits/chosen": -1.543513298034668, "eval_logits/rejected": -1.388250470161438, "eval_logps/chosen": -387.0569152832031, "eval_logps/rejected": -440.8662414550781, "eval_loss": 0.027481814846396446, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -0.15505196154117584, "eval_rewards/margins": 0.07420240342617035, "eval_rewards/rejected": -0.2292543649673462, "eval_runtime": 713.8237, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 4200 }, { "epoch": 0.28, "learning_rate": 4.545776408982585e-06, "logits/chosen": -1.5604270696640015, "logits/rejected": -1.5197086334228516, "logps/chosen": -374.8223876953125, "logps/rejected": -445.38470458984375, "loss": 0.0319, "rewards/accuracies": 0.625, "rewards/chosen": -0.14307831227779388, "rewards/margins": 0.07957559078931808, "rewards/rejected": -0.22265391051769257, "step": 4210 }, { "epoch": 0.28, "learning_rate": 4.542489154696128e-06, "logits/chosen": -1.7489147186279297, "logits/rejected": -1.4021837711334229, "logps/chosen": -391.36309814453125, "logps/rejected": -396.5628356933594, "loss": 0.0146, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.12364277988672256, "rewards/margins": 0.06645040214061737, "rewards/rejected": -0.19009318947792053, "step": 4220 }, { "epoch": 0.28, "learning_rate": 4.5391912457857145e-06, "logits/chosen": -1.700891137123108, "logits/rejected": -1.491917610168457, "logps/chosen": -382.04071044921875, "logps/rejected": -397.81976318359375, "loss": 0.0256, "rewards/accuracies": 0.625, "rewards/chosen": -0.11707104742527008, "rewards/margins": 0.06288883090019226, "rewards/rejected": -0.17995986342430115, "step": 4230 }, { "epoch": 0.28, "learning_rate": 4.535882699454854e-06, "logits/chosen": -1.7376819849014282, "logits/rejected": -1.6242554187774658, "logps/chosen": -384.94793701171875, "logps/rejected": -471.80120849609375, "loss": 0.0293, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11389689147472382, "rewards/margins": 0.06519055366516113, "rewards/rejected": -0.17908743023872375, "step": 4240 }, { "epoch": 0.28, "learning_rate": 4.532563532962546e-06, "logits/chosen": -1.8700637817382812, "logits/rejected": -1.9039455652236938, "logps/chosen": -322.6061706542969, "logps/rejected": -408.3436584472656, "loss": 0.0258, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13271191716194153, "rewards/margins": 0.06414255499839783, "rewards/rejected": -0.19685448706150055, "step": 4250 }, { "epoch": 0.28, "learning_rate": 4.529233763623187e-06, "logits/chosen": -1.7179536819458008, "logits/rejected": -1.4310009479522705, "logps/chosen": -324.5379943847656, "logps/rejected": -354.0625, "loss": 0.0288, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12243751436471939, "rewards/margins": 0.0774841457605362, "rewards/rejected": -0.19992166757583618, "step": 4260 }, { "epoch": 0.28, "learning_rate": 4.5258934088064854e-06, "logits/chosen": -1.5239393711090088, "logits/rejected": -1.1681437492370605, "logps/chosen": -413.602294921875, "logps/rejected": -469.358642578125, "loss": 0.0253, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19362159073352814, "rewards/margins": 0.11045382171869278, "rewards/rejected": -0.3040754199028015, "step": 4270 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.5334413051605225, "logits/rejected": -1.3172454833984375, "logps/chosen": -504.45306396484375, "logps/rejected": -519.475341796875, "loss": 0.0096, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2138005793094635, "rewards/margins": 0.1116631031036377, "rewards/rejected": -0.3254636824131012, "step": 4280 }, { "epoch": 0.28, "learning_rate": 4.519181012495892e-06, "logits/chosen": -1.72525155544281, "logits/rejected": -1.517737627029419, "logps/chosen": -404.04571533203125, "logps/rejected": -447.5331115722656, "loss": 0.0355, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16182644665241241, "rewards/margins": 0.07256150245666504, "rewards/rejected": -0.23438794910907745, "step": 4290 }, { "epoch": 0.28, "learning_rate": 4.515809006017147e-06, "logits/chosen": -1.6283308267593384, "logits/rejected": -1.4045281410217285, "logps/chosen": -340.22052001953125, "logps/rejected": -377.0026550292969, "loss": 0.0381, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10681094229221344, "rewards/margins": 0.07229141891002655, "rewards/rejected": -0.1791023463010788, "step": 4300 }, { "epoch": 0.28, "eval_logits/chosen": -1.6789805889129639, "eval_logits/rejected": -1.5157997608184814, "eval_logps/chosen": -341.5804138183594, "eval_logps/rejected": -398.1020812988281, "eval_loss": 0.02558263950049877, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -0.10957543551921844, "eval_rewards/margins": 0.07691475749015808, "eval_rewards/rejected": -0.18649020791053772, "eval_runtime": 714.8314, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 4300 }, { "epoch": 0.28, "learning_rate": 4.512426484091171e-06, "logits/chosen": -1.8729350566864014, "logits/rejected": -1.5964362621307373, "logps/chosen": -374.73223876953125, "logps/rejected": -388.2032165527344, "loss": 0.0353, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09715107083320618, "rewards/margins": 0.04976404458284378, "rewards/rejected": -0.14691512286663055, "step": 4310 }, { "epoch": 0.28, "learning_rate": 4.509033464362858e-06, "logits/chosen": -1.5874884128570557, "logits/rejected": -1.6020339727401733, "logps/chosen": -325.8070983886719, "logps/rejected": -403.746826171875, "loss": 0.0173, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0845412164926529, "rewards/margins": 0.0637088194489479, "rewards/rejected": -0.1482500284910202, "step": 4320 }, { "epoch": 0.28, "learning_rate": 4.505629964531857e-06, "logits/chosen": -1.713687539100647, "logits/rejected": -1.5335952043533325, "logps/chosen": -332.15411376953125, "logps/rejected": -389.15130615234375, "loss": 0.024, "rewards/accuracies": 0.625, "rewards/chosen": -0.10894972085952759, "rewards/margins": 0.08781043440103531, "rewards/rejected": -0.1967601478099823, "step": 4330 }, { "epoch": 0.28, "learning_rate": 4.502216002352492e-06, "logits/chosen": -1.554183840751648, "logits/rejected": -1.33809494972229, "logps/chosen": -321.9082946777344, "logps/rejected": -361.6943359375, "loss": 0.0516, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15886741876602173, "rewards/margins": 0.05920020490884781, "rewards/rejected": -0.21806764602661133, "step": 4340 }, { "epoch": 0.28, "learning_rate": 4.498791595633663e-06, "logits/chosen": -1.44529390335083, "logits/rejected": -1.1645525693893433, "logps/chosen": -344.6398620605469, "logps/rejected": -312.952392578125, "loss": 0.03, "rewards/accuracies": 0.625, "rewards/chosen": -0.08225558698177338, "rewards/margins": 0.05551298335194588, "rewards/rejected": -0.13776858150959015, "step": 4350 }, { "epoch": 0.29, "learning_rate": 4.495356762238751e-06, "logits/chosen": -1.890144944190979, "logits/rejected": -1.4477609395980835, "logps/chosen": -336.624267578125, "logps/rejected": -306.9760437011719, "loss": 0.017, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05342636629939079, "rewards/margins": 0.0678037703037262, "rewards/rejected": -0.12123014777898788, "step": 4360 }, { "epoch": 0.29, "learning_rate": 4.491911520085532e-06, "logits/chosen": -1.431183099746704, "logits/rejected": -1.3622066974639893, "logps/chosen": -247.00927734375, "logps/rejected": -325.14453125, "loss": 0.026, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04660152271389961, "rewards/margins": 0.0673627033829689, "rewards/rejected": -0.11396422237157822, "step": 4370 }, { "epoch": 0.29, "learning_rate": 4.488455887146075e-06, "logits/chosen": -1.5854401588439941, "logits/rejected": -1.5128647089004517, "logps/chosen": -232.297607421875, "logps/rejected": -336.1612548828125, "loss": 0.0279, "rewards/accuracies": 0.625, "rewards/chosen": -0.06218549609184265, "rewards/margins": 0.09069998562335968, "rewards/rejected": -0.15288548171520233, "step": 4380 }, { "epoch": 0.29, "learning_rate": 4.484989881446654e-06, "logits/chosen": -1.6342254877090454, "logits/rejected": -1.528773307800293, "logps/chosen": -279.3342590332031, "logps/rejected": -297.1978454589844, "loss": 0.0416, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.07685824483633041, "rewards/margins": 0.03580832481384277, "rewards/rejected": -0.11266656965017319, "step": 4390 }, { "epoch": 0.29, "learning_rate": 4.481513521067654e-06, "logits/chosen": -1.4173234701156616, "logits/rejected": -1.2449252605438232, "logps/chosen": -360.70404052734375, "logps/rejected": -415.06634521484375, "loss": 0.0142, "rewards/accuracies": 0.75, "rewards/chosen": -0.13592949509620667, "rewards/margins": 0.08977806568145752, "rewards/rejected": -0.22570756077766418, "step": 4400 }, { "epoch": 0.29, "eval_logits/chosen": -1.2624623775482178, "eval_logits/rejected": -1.1203445196151733, "eval_logps/chosen": -374.83502197265625, "eval_logps/rejected": -441.24371337890625, "eval_loss": 0.02555946260690689, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.14283010363578796, "eval_rewards/margins": 0.08680171519517899, "eval_rewards/rejected": -0.22963182628154755, "eval_runtime": 714.8406, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 4400 }, { "epoch": 0.29, "learning_rate": 4.478026824143473e-06, "logits/chosen": -1.348807692527771, "logits/rejected": -1.2761566638946533, "logps/chosen": -434.299560546875, "logps/rejected": -488.93072509765625, "loss": 0.018, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16823793947696686, "rewards/margins": 0.11119858920574188, "rewards/rejected": -0.27943652868270874, "step": 4410 }, { "epoch": 0.29, "learning_rate": 4.474529808862429e-06, "logits/chosen": -1.207189917564392, "logits/rejected": -1.2272939682006836, "logps/chosen": -341.6673889160156, "logps/rejected": -455.37847900390625, "loss": 0.0426, "rewards/accuracies": 0.625, "rewards/chosen": -0.1528470814228058, "rewards/margins": 0.09608684480190277, "rewards/rejected": -0.24893391132354736, "step": 4420 }, { "epoch": 0.29, "learning_rate": 4.471022493466669e-06, "logits/chosen": -1.3031737804412842, "logits/rejected": -1.0330963134765625, "logps/chosen": -459.22930908203125, "logps/rejected": -452.4422302246094, "loss": 0.0237, "rewards/accuracies": 0.625, "rewards/chosen": -0.1572902500629425, "rewards/margins": 0.07022421061992645, "rewards/rejected": -0.22751443088054657, "step": 4430 }, { "epoch": 0.29, "learning_rate": 4.467504896252066e-06, "logits/chosen": -1.363821268081665, "logits/rejected": -1.3267080783843994, "logps/chosen": -433.4723205566406, "logps/rejected": -500.95086669921875, "loss": 0.0197, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18377093970775604, "rewards/margins": 0.09622083604335785, "rewards/rejected": -0.2799917757511139, "step": 4440 }, { "epoch": 0.29, "learning_rate": 4.463977035568132e-06, "logits/chosen": -1.1484137773513794, "logits/rejected": -1.2523006200790405, "logps/chosen": -362.26849365234375, "logps/rejected": -461.9901428222656, "loss": 0.021, "rewards/accuracies": 0.5, "rewards/chosen": -0.14988572895526886, "rewards/margins": 0.04801609739661217, "rewards/rejected": -0.19790181517601013, "step": 4450 }, { "epoch": 0.29, "learning_rate": 4.460438929817914e-06, "logits/chosen": -1.2076404094696045, "logits/rejected": -0.9844943881034851, "logps/chosen": -352.26177978515625, "logps/rejected": -420.0936584472656, "loss": 0.0316, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1458420753479004, "rewards/margins": 0.07212045043706894, "rewards/rejected": -0.21796250343322754, "step": 4460 }, { "epoch": 0.29, "learning_rate": 4.456890597457907e-06, "logits/chosen": -1.1379069089889526, "logits/rejected": -1.1137340068817139, "logps/chosen": -379.97442626953125, "logps/rejected": -479.16851806640625, "loss": 0.0212, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16783876717090607, "rewards/margins": 0.08098968863487244, "rewards/rejected": -0.2488284558057785, "step": 4470 }, { "epoch": 0.29, "learning_rate": 4.453332056997951e-06, "logits/chosen": -1.1332893371582031, "logits/rejected": -1.0970113277435303, "logps/chosen": -315.16229248046875, "logps/rejected": -415.5948791503906, "loss": 0.0205, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13520283997058868, "rewards/margins": 0.10475758463144302, "rewards/rejected": -0.2399604618549347, "step": 4480 }, { "epoch": 0.29, "learning_rate": 4.449763327001134e-06, "logits/chosen": -1.3053241968154907, "logits/rejected": -1.3183414936065674, "logps/chosen": -313.153076171875, "logps/rejected": -403.06732177734375, "loss": 0.0255, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1243908628821373, "rewards/margins": 0.061498843133449554, "rewards/rejected": -0.18588969111442566, "step": 4490 }, { "epoch": 0.29, "learning_rate": 4.446184426083702e-06, "logits/chosen": -1.4142491817474365, "logits/rejected": -1.2123726606369019, "logps/chosen": -326.9809875488281, "logps/rejected": -450.9129333496094, "loss": 0.0161, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13341663777828217, "rewards/margins": 0.11556436121463776, "rewards/rejected": -0.2489810287952423, "step": 4500 }, { "epoch": 0.29, "eval_logits/chosen": -1.3863813877105713, "eval_logits/rejected": -1.2417441606521606, "eval_logps/chosen": -361.1861877441406, "eval_logps/rejected": -412.9791259765625, "eval_loss": 0.025348447263240814, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.12918120622634888, "eval_rewards/margins": 0.07218600809574127, "eval_rewards/rejected": -0.20136722922325134, "eval_runtime": 715.7929, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 4500 }, { "epoch": 0.3, "learning_rate": 4.442595372914954e-06, "logits/chosen": -1.4278943538665771, "logits/rejected": -1.3390228748321533, "logps/chosen": -340.3716735839844, "logps/rejected": -335.7162170410156, "loss": 0.0151, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1033913642168045, "rewards/margins": 0.08061876147985458, "rewards/rejected": -0.18401013314723969, "step": 4510 }, { "epoch": 0.3, "learning_rate": 4.43899618621715e-06, "logits/chosen": -1.4802106618881226, "logits/rejected": -1.236612319946289, "logps/chosen": -400.3420104980469, "logps/rejected": -505.5450134277344, "loss": 0.0319, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14928816258907318, "rewards/margins": 0.10200627148151398, "rewards/rejected": -0.25129443407058716, "step": 4520 }, { "epoch": 0.3, "learning_rate": 4.4353868847654105e-06, "logits/chosen": -1.650275468826294, "logits/rejected": -1.3741801977157593, "logps/chosen": -383.4633483886719, "logps/rejected": -432.15478515625, "loss": 0.0256, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13760510087013245, "rewards/margins": 0.07875902950763702, "rewards/rejected": -0.21636414527893066, "step": 4530 }, { "epoch": 0.3, "learning_rate": 4.43176748738762e-06, "logits/chosen": -1.36962890625, "logits/rejected": -1.2580490112304688, "logps/chosen": -440.6005859375, "logps/rejected": -559.8140869140625, "loss": 0.0163, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20865269005298615, "rewards/margins": 0.11482508480548859, "rewards/rejected": -0.32347774505615234, "step": 4540 }, { "epoch": 0.3, "learning_rate": 4.4281380129643295e-06, "logits/chosen": -1.4430954456329346, "logits/rejected": -1.2729356288909912, "logps/chosen": -374.62274169921875, "logps/rejected": -467.5389709472656, "loss": 0.0239, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14487263560295105, "rewards/margins": 0.10339025408029556, "rewards/rejected": -0.24826285243034363, "step": 4550 }, { "epoch": 0.3, "learning_rate": 4.424498480428654e-06, "logits/chosen": -1.6050965785980225, "logits/rejected": -1.4748914241790771, "logps/chosen": -373.5273742675781, "logps/rejected": -351.7547302246094, "loss": 0.0366, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.12625543773174286, "rewards/margins": 0.018252816051244736, "rewards/rejected": -0.1445082724094391, "step": 4560 }, { "epoch": 0.3, "learning_rate": 4.420848908766178e-06, "logits/chosen": -1.7298862934112549, "logits/rejected": -1.6421781778335571, "logps/chosen": -300.76483154296875, "logps/rejected": -378.29168701171875, "loss": 0.0249, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.09353800117969513, "rewards/margins": 0.06864660978317261, "rewards/rejected": -0.16218461096286774, "step": 4570 }, { "epoch": 0.3, "learning_rate": 4.417189317014855e-06, "logits/chosen": -1.5955970287322998, "logits/rejected": -1.7729259729385376, "logps/chosen": -299.55535888671875, "logps/rejected": -373.0260925292969, "loss": 0.0298, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0987594872713089, "rewards/margins": 0.04277385398745537, "rewards/rejected": -0.14153333008289337, "step": 4580 }, { "epoch": 0.3, "learning_rate": 4.41351972426491e-06, "logits/chosen": -1.5033591985702515, "logits/rejected": -1.5283015966415405, "logps/chosen": -345.3142395019531, "logps/rejected": -462.55804443359375, "loss": 0.017, "rewards/accuracies": 0.625, "rewards/chosen": -0.09857489168643951, "rewards/margins": 0.06319073587656021, "rewards/rejected": -0.16176562011241913, "step": 4590 }, { "epoch": 0.3, "learning_rate": 4.409840149658735e-06, "logits/chosen": -1.5775784254074097, "logits/rejected": -1.3997588157653809, "logps/chosen": -358.9991455078125, "logps/rejected": -367.135986328125, "loss": 0.0252, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07448185980319977, "rewards/margins": 0.05828867107629776, "rewards/rejected": -0.13277053833007812, "step": 4600 }, { "epoch": 0.3, "eval_logits/chosen": -1.6068298816680908, "eval_logits/rejected": -1.4505927562713623, "eval_logps/chosen": -321.5078125, "eval_logps/rejected": -365.614501953125, "eval_loss": 0.025990908965468407, "eval_rewards/accuracies": 0.6614999771118164, "eval_rewards/chosen": -0.08950285613536835, "eval_rewards/margins": 0.06449978053569794, "eval_rewards/rejected": -0.1540026217699051, "eval_runtime": 712.7453, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 4600 }, { "epoch": 0.3, "learning_rate": 4.4061506123907925e-06, "logits/chosen": -1.5166257619857788, "logits/rejected": -1.3623923063278198, "logps/chosen": -377.86004638671875, "logps/rejected": -396.5621032714844, "loss": 0.0279, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11376826465129852, "rewards/margins": 0.059008192270994186, "rewards/rejected": -0.172776460647583, "step": 4610 }, { "epoch": 0.3, "learning_rate": 4.402451131707519e-06, "logits/chosen": -1.6833021640777588, "logits/rejected": -1.2976399660110474, "logps/chosen": -327.23748779296875, "logps/rejected": -338.04986572265625, "loss": 0.0161, "rewards/accuracies": 0.625, "rewards/chosen": -0.11777210235595703, "rewards/margins": 0.0862937793135643, "rewards/rejected": -0.20406587421894073, "step": 4620 }, { "epoch": 0.3, "learning_rate": 4.398741726907215e-06, "logits/chosen": -1.7456413507461548, "logits/rejected": -1.4316861629486084, "logps/chosen": -394.04150390625, "logps/rejected": -432.5865783691406, "loss": 0.0285, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1147841066122055, "rewards/margins": 0.07887180149555206, "rewards/rejected": -0.19365592300891876, "step": 4630 }, { "epoch": 0.3, "learning_rate": 4.395022417339955e-06, "logits/chosen": -1.243901014328003, "logits/rejected": -1.18853759765625, "logps/chosen": -421.06427001953125, "logps/rejected": -499.82037353515625, "loss": 0.0416, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2144392728805542, "rewards/margins": 0.0702669769525528, "rewards/rejected": -0.2847062647342682, "step": 4640 }, { "epoch": 0.3, "learning_rate": 4.391293222407479e-06, "logits/chosen": -1.133734107017517, "logits/rejected": -1.1859863996505737, "logps/chosen": -305.1255187988281, "logps/rejected": -382.8733825683594, "loss": 0.0191, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16856837272644043, "rewards/margins": 0.059517838060855865, "rewards/rejected": -0.2280861884355545, "step": 4650 }, { "epoch": 0.3, "learning_rate": 4.387554161563094e-06, "logits/chosen": -1.2345072031021118, "logits/rejected": -1.1850998401641846, "logps/chosen": -429.11724853515625, "logps/rejected": -517.4794921875, "loss": 0.0253, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23025307059288025, "rewards/margins": 0.10018191486597061, "rewards/rejected": -0.33043500781059265, "step": 4660 }, { "epoch": 0.31, "learning_rate": 4.383805254311575e-06, "logits/chosen": -1.3093342781066895, "logits/rejected": -0.9698618054389954, "logps/chosen": -511.65606689453125, "logps/rejected": -532.4832153320312, "loss": 0.0428, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.25551503896713257, "rewards/margins": 0.06639346480369568, "rewards/rejected": -0.32190844416618347, "step": 4670 }, { "epoch": 0.31, "learning_rate": 4.380046520209056e-06, "logits/chosen": -1.156526803970337, "logits/rejected": -0.9005554914474487, "logps/chosen": -417.3006896972656, "logps/rejected": -478.34014892578125, "loss": 0.0136, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21703433990478516, "rewards/margins": 0.0848786011338234, "rewards/rejected": -0.30191293358802795, "step": 4680 }, { "epoch": 0.31, "learning_rate": 4.376277978862936e-06, "logits/chosen": -0.9123376607894897, "logits/rejected": -0.8319652676582336, "logps/chosen": -435.146240234375, "logps/rejected": -450.79302978515625, "loss": 0.0258, "rewards/accuracies": 0.625, "rewards/chosen": -0.20915763080120087, "rewards/margins": 0.05617832392454147, "rewards/rejected": -0.26533591747283936, "step": 4690 }, { "epoch": 0.31, "learning_rate": 4.372499649931774e-06, "logits/chosen": -1.1358721256256104, "logits/rejected": -1.0301427841186523, "logps/chosen": -451.2008361816406, "logps/rejected": -599.9830322265625, "loss": 0.0265, "rewards/accuracies": 0.75, "rewards/chosen": -0.2421092987060547, "rewards/margins": 0.1408257633447647, "rewards/rejected": -0.3829350769519806, "step": 4700 }, { "epoch": 0.31, "eval_logits/chosen": -1.084369421005249, "eval_logits/rejected": -0.9480556845664978, "eval_logps/chosen": -474.7586975097656, "eval_logps/rejected": -548.102294921875, "eval_loss": 0.02621944434940815, "eval_rewards/accuracies": 0.656499981880188, "eval_rewards/chosen": -0.2427537888288498, "eval_rewards/margins": 0.09373660385608673, "eval_rewards/rejected": -0.3364903926849365, "eval_runtime": 715.7724, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 4700 }, { "epoch": 0.31, "learning_rate": 4.368711553125185e-06, "logits/chosen": -1.25480055809021, "logits/rejected": -1.1933152675628662, "logps/chosen": -497.8189392089844, "logps/rejected": -521.3856201171875, "loss": 0.0264, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22062890231609344, "rewards/margins": 0.08201020956039429, "rewards/rejected": -0.3026391267776489, "step": 4710 }, { "epoch": 0.31, "learning_rate": 4.364913708203734e-06, "logits/chosen": -1.207310438156128, "logits/rejected": -1.0174314975738525, "logps/chosen": -498.9319763183594, "logps/rejected": -503.42401123046875, "loss": 0.0191, "rewards/accuracies": 0.75, "rewards/chosen": -0.21403571963310242, "rewards/margins": 0.07990926504135132, "rewards/rejected": -0.2939450144767761, "step": 4720 }, { "epoch": 0.31, "learning_rate": 4.361106134978844e-06, "logits/chosen": -1.069122552871704, "logits/rejected": -0.8567806482315063, "logps/chosen": -443.868408203125, "logps/rejected": -493.8467712402344, "loss": 0.0307, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1734379678964615, "rewards/margins": 0.06292911618947983, "rewards/rejected": -0.23636707663536072, "step": 4730 }, { "epoch": 0.31, "learning_rate": 4.357288853312681e-06, "logits/chosen": -1.0762145519256592, "logits/rejected": -1.0776039361953735, "logps/chosen": -442.0189514160156, "logps/rejected": -477.848388671875, "loss": 0.0163, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15854120254516602, "rewards/margins": 0.04056631401181221, "rewards/rejected": -0.19910749793052673, "step": 4740 }, { "epoch": 0.31, "learning_rate": 4.353461883118056e-06, "logits/chosen": -0.9976078867912292, "logits/rejected": -0.8613501787185669, "logps/chosen": -416.0547790527344, "logps/rejected": -433.9742126464844, "loss": 0.0347, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1867642104625702, "rewards/margins": 0.0390704981982708, "rewards/rejected": -0.2258346974849701, "step": 4750 }, { "epoch": 0.31, "learning_rate": 4.34962524435832e-06, "logits/chosen": -0.8916527032852173, "logits/rejected": -0.8016008138656616, "logps/chosen": -383.7411193847656, "logps/rejected": -426.06524658203125, "loss": 0.033, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16406071186065674, "rewards/margins": 0.0791575089097023, "rewards/rejected": -0.24321822822093964, "step": 4760 }, { "epoch": 0.31, "learning_rate": 4.34577895704726e-06, "logits/chosen": -1.2843925952911377, "logits/rejected": -1.0757373571395874, "logps/chosen": -406.8370056152344, "logps/rejected": -438.86895751953125, "loss": 0.0295, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1452229917049408, "rewards/margins": 0.0577559694647789, "rewards/rejected": -0.2029789686203003, "step": 4770 }, { "epoch": 0.31, "learning_rate": 4.3419230412489954e-06, "logits/chosen": -1.2315599918365479, "logits/rejected": -0.9553321003913879, "logps/chosen": -424.24884033203125, "logps/rejected": -392.9151306152344, "loss": 0.0437, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13488097488880157, "rewards/margins": 0.04473022744059563, "rewards/rejected": -0.1796111762523651, "step": 4780 }, { "epoch": 0.31, "learning_rate": 4.338057517077872e-06, "logits/chosen": -1.076871395111084, "logits/rejected": -0.9164541363716125, "logps/chosen": -322.08929443359375, "logps/rejected": -447.45098876953125, "loss": 0.0368, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1280481219291687, "rewards/margins": 0.16871391236782074, "rewards/rejected": -0.29676204919815063, "step": 4790 }, { "epoch": 0.31, "learning_rate": 4.334182404698356e-06, "logits/chosen": -1.0682333707809448, "logits/rejected": -0.8314278721809387, "logps/chosen": -399.42425537109375, "logps/rejected": -379.38226318359375, "loss": 0.0428, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.16931729018688202, "rewards/margins": 0.058357782661914825, "rewards/rejected": -0.22767508029937744, "step": 4800 }, { "epoch": 0.31, "eval_logits/chosen": -0.9170395135879517, "eval_logits/rejected": -0.7928075790405273, "eval_logps/chosen": -408.2502746582031, "eval_logps/rejected": -470.0755310058594, "eval_loss": 0.025109512731432915, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -0.17624534666538239, "eval_rewards/margins": 0.08221827447414398, "eval_rewards/rejected": -0.25846362113952637, "eval_runtime": 715.1063, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 4800 }, { "epoch": 0.31, "learning_rate": 4.330297724324933e-06, "logits/chosen": -1.2837412357330322, "logits/rejected": -0.797059953212738, "logps/chosen": -484.1185607910156, "logps/rejected": -463.5137634277344, "loss": 0.0143, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.175707146525383, "rewards/margins": 0.08059539645910263, "rewards/rejected": -0.2563025653362274, "step": 4810 }, { "epoch": 0.32, "learning_rate": 4.326403496221999e-06, "logits/chosen": -0.9811674356460571, "logits/rejected": -0.9895191192626953, "logps/chosen": -317.13555908203125, "logps/rejected": -354.28594970703125, "loss": 0.0328, "rewards/accuracies": 0.5, "rewards/chosen": -0.15570653975009918, "rewards/margins": 0.06393031775951385, "rewards/rejected": -0.21963687241077423, "step": 4820 }, { "epoch": 0.32, "learning_rate": 4.322499740703755e-06, "logits/chosen": -1.0477482080459595, "logits/rejected": -1.1232573986053467, "logps/chosen": -303.56512451171875, "logps/rejected": -377.63385009765625, "loss": 0.0247, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11146591603755951, "rewards/margins": 0.049545418471097946, "rewards/rejected": -0.16101132333278656, "step": 4830 }, { "epoch": 0.32, "learning_rate": 4.318586478134101e-06, "logits/chosen": -1.1854190826416016, "logits/rejected": -0.7953944206237793, "logps/chosen": -301.4418640136719, "logps/rejected": -329.42840576171875, "loss": 0.0312, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10877438634634018, "rewards/margins": 0.06850701570510864, "rewards/rejected": -0.17728139460086823, "step": 4840 }, { "epoch": 0.32, "learning_rate": 4.314663728926534e-06, "logits/chosen": -1.3397547006607056, "logits/rejected": -0.9475423097610474, "logps/chosen": -425.7220764160156, "logps/rejected": -475.20849609375, "loss": 0.0298, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16884633898735046, "rewards/margins": 0.060064781457185745, "rewards/rejected": -0.2289111167192459, "step": 4850 }, { "epoch": 0.32, "learning_rate": 4.310731513544033e-06, "logits/chosen": -1.0397546291351318, "logits/rejected": -0.7292269468307495, "logps/chosen": -419.3829040527344, "logps/rejected": -454.90948486328125, "loss": 0.0288, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17652516067028046, "rewards/margins": 0.08160610496997833, "rewards/rejected": -0.2581312656402588, "step": 4860 }, { "epoch": 0.32, "learning_rate": 4.30678985249896e-06, "logits/chosen": -1.041822075843811, "logits/rejected": -1.0448477268218994, "logps/chosen": -324.1391906738281, "logps/rejected": -450.3477478027344, "loss": 0.0567, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16698917746543884, "rewards/margins": 0.1079450398683548, "rewards/rejected": -0.27493420243263245, "step": 4870 }, { "epoch": 0.32, "learning_rate": 4.302838766352952e-06, "logits/chosen": -0.9377396702766418, "logits/rejected": -0.7817686796188354, "logps/chosen": -420.3194885253906, "logps/rejected": -478.0335388183594, "loss": 0.0262, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16472652554512024, "rewards/margins": 0.09350559860467911, "rewards/rejected": -0.25823211669921875, "step": 4880 }, { "epoch": 0.32, "learning_rate": 4.298878275716806e-06, "logits/chosen": -0.8833919763565063, "logits/rejected": -0.821493923664093, "logps/chosen": -366.5534973144531, "logps/rejected": -474.9911193847656, "loss": 0.033, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17467406392097473, "rewards/margins": 0.1112346202135086, "rewards/rejected": -0.28590866923332214, "step": 4890 }, { "epoch": 0.32, "learning_rate": 4.294908401250386e-06, "logits/chosen": -0.9654667973518372, "logits/rejected": -0.771117627620697, "logps/chosen": -389.1456298828125, "logps/rejected": -451.322021484375, "loss": 0.0331, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17440257966518402, "rewards/margins": 0.10670874267816544, "rewards/rejected": -0.28111132979393005, "step": 4900 }, { "epoch": 0.32, "eval_logits/chosen": -0.9422993063926697, "eval_logits/rejected": -0.8175697922706604, "eval_logps/chosen": -395.7015075683594, "eval_logps/rejected": -459.6623229980469, "eval_loss": 0.02566472440958023, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.1636965572834015, "eval_rewards/margins": 0.08435382694005966, "eval_rewards/rejected": -0.24805039167404175, "eval_runtime": 715.2945, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 4900 }, { "epoch": 0.32, "learning_rate": 4.290929163662498e-06, "logits/chosen": -0.6387882232666016, "logits/rejected": -0.6571437120437622, "logps/chosen": -408.33935546875, "logps/rejected": -430.31463623046875, "loss": 0.0282, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1386040896177292, "rewards/margins": 0.07969176769256592, "rewards/rejected": -0.2182958573102951, "step": 4910 }, { "epoch": 0.32, "learning_rate": 4.286940583710796e-06, "logits/chosen": -1.213193655014038, "logits/rejected": -0.9925897717475891, "logps/chosen": -467.76287841796875, "logps/rejected": -497.6581115722656, "loss": 0.0163, "rewards/accuracies": 0.75, "rewards/chosen": -0.17501424252986908, "rewards/margins": 0.086525097489357, "rewards/rejected": -0.2615393102169037, "step": 4920 }, { "epoch": 0.32, "learning_rate": 4.282942682201667e-06, "logits/chosen": -1.0573886632919312, "logits/rejected": -0.852222740650177, "logps/chosen": -396.466064453125, "logps/rejected": -426.05328369140625, "loss": 0.0317, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14435826241970062, "rewards/margins": 0.06917376816272736, "rewards/rejected": -0.21353201568126678, "step": 4930 }, { "epoch": 0.32, "learning_rate": 4.278935479990123e-06, "logits/chosen": -1.346343994140625, "logits/rejected": -0.9595292806625366, "logps/chosen": -317.3451232910156, "logps/rejected": -340.243408203125, "loss": 0.024, "rewards/accuracies": 0.5, "rewards/chosen": -0.1126817837357521, "rewards/margins": 0.06586971879005432, "rewards/rejected": -0.17855150997638702, "step": 4940 }, { "epoch": 0.32, "learning_rate": 4.274918997979695e-06, "logits/chosen": -1.1766690015792847, "logits/rejected": -1.2016173601150513, "logps/chosen": -314.9263000488281, "logps/rejected": -378.2809143066406, "loss": 0.0351, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12129127979278564, "rewards/margins": 0.061619244515895844, "rewards/rejected": -0.18291053175926208, "step": 4950 }, { "epoch": 0.32, "learning_rate": 4.270893257122319e-06, "logits/chosen": -0.932578444480896, "logits/rejected": -0.8031288385391235, "logps/chosen": -351.32537841796875, "logps/rejected": -490.75836181640625, "loss": 0.0253, "rewards/accuracies": 0.75, "rewards/chosen": -0.1226034164428711, "rewards/margins": 0.10992362350225449, "rewards/rejected": -0.23252706229686737, "step": 4960 }, { "epoch": 0.33, "learning_rate": 4.266858278418232e-06, "logits/chosen": -0.571777880191803, "logits/rejected": -0.6533526182174683, "logps/chosen": -383.21282958984375, "logps/rejected": -429.4039611816406, "loss": 0.0275, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1466086208820343, "rewards/margins": 0.06654229760169983, "rewards/rejected": -0.21315094828605652, "step": 4970 }, { "epoch": 0.33, "learning_rate": 4.26281408291586e-06, "logits/chosen": -0.9399505853652954, "logits/rejected": -0.6909580826759338, "logps/chosen": -404.631103515625, "logps/rejected": -488.45184326171875, "loss": 0.0371, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16229382157325745, "rewards/margins": 0.11335308849811554, "rewards/rejected": -0.2756468951702118, "step": 4980 }, { "epoch": 0.33, "learning_rate": 4.258760691711706e-06, "logits/chosen": -0.9400049448013306, "logits/rejected": -0.7925345301628113, "logps/chosen": -355.00579833984375, "logps/rejected": -430.6436462402344, "loss": 0.0173, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15754422545433044, "rewards/margins": 0.08023068308830261, "rewards/rejected": -0.23777492344379425, "step": 4990 }, { "epoch": 0.33, "learning_rate": 4.254698125950247e-06, "logits/chosen": -1.1554138660430908, "logits/rejected": -0.927202582359314, "logps/chosen": -424.57122802734375, "logps/rejected": -435.2237243652344, "loss": 0.0206, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12355668842792511, "rewards/margins": 0.05731816962361336, "rewards/rejected": -0.18087486922740936, "step": 5000 }, { "epoch": 0.33, "eval_logits/chosen": -0.823323130607605, "eval_logits/rejected": -0.7097766995429993, "eval_logps/chosen": -376.7930603027344, "eval_logps/rejected": -430.96429443359375, "eval_loss": 0.02630794607102871, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -0.14478811621665955, "eval_rewards/margins": 0.07456426322460175, "eval_rewards/rejected": -0.2193523645401001, "eval_runtime": 717.2208, "eval_samples_per_second": 2.789, "eval_steps_per_second": 1.394, "step": 5000 }, { "epoch": 0.33, "learning_rate": 4.250626406823815e-06, "logits/chosen": -0.9734691381454468, "logits/rejected": -0.7217172384262085, "logps/chosen": -378.42437744140625, "logps/rejected": -529.853271484375, "loss": 0.0456, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16156169772148132, "rewards/margins": 0.13525086641311646, "rewards/rejected": -0.2968125343322754, "step": 5010 }, { "epoch": 0.33, "learning_rate": 4.246545555572489e-06, "logits/chosen": -0.6029716730117798, "logits/rejected": -0.706284761428833, "logps/chosen": -327.2740478515625, "logps/rejected": -444.4313049316406, "loss": 0.0277, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17307792603969574, "rewards/margins": 0.09220514446496964, "rewards/rejected": -0.26528307795524597, "step": 5020 }, { "epoch": 0.33, "learning_rate": 4.242455593483992e-06, "logits/chosen": -0.658368706703186, "logits/rejected": -0.4860460162162781, "logps/chosen": -438.3896484375, "logps/rejected": -436.488525390625, "loss": 0.0272, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21923580765724182, "rewards/margins": 0.04780827462673187, "rewards/rejected": -0.2670440673828125, "step": 5030 }, { "epoch": 0.33, "learning_rate": 4.238356541893567e-06, "logits/chosen": -0.5742266774177551, "logits/rejected": -0.606495201587677, "logps/chosen": -427.51617431640625, "logps/rejected": -507.86712646484375, "loss": 0.0141, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23738297820091248, "rewards/margins": 0.09589354693889618, "rewards/rejected": -0.33327654004096985, "step": 5040 }, { "epoch": 0.33, "learning_rate": 4.234248422183876e-06, "logits/chosen": -0.8235737085342407, "logits/rejected": -0.9690597653388977, "logps/chosen": -438.0294494628906, "logps/rejected": -498.9979553222656, "loss": 0.04, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1950947493314743, "rewards/margins": 0.06552483141422272, "rewards/rejected": -0.260619580745697, "step": 5050 }, { "epoch": 0.33, "learning_rate": 4.230131255784884e-06, "logits/chosen": -1.21338951587677, "logits/rejected": -0.9423225522041321, "logps/chosen": -452.3050842285156, "logps/rejected": -506.7218322753906, "loss": 0.0226, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19442197680473328, "rewards/margins": 0.07321318238973618, "rewards/rejected": -0.26763516664505005, "step": 5060 }, { "epoch": 0.33, "learning_rate": 4.226005064173748e-06, "logits/chosen": -0.9354829788208008, "logits/rejected": -0.8446968793869019, "logps/chosen": -482.5591735839844, "logps/rejected": -558.6563110351562, "loss": 0.0176, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21791496872901917, "rewards/margins": 0.05764005333185196, "rewards/rejected": -0.27555498480796814, "step": 5070 }, { "epoch": 0.33, "learning_rate": 4.2218698688747035e-06, "logits/chosen": -0.5974547266960144, "logits/rejected": -0.44372043013572693, "logps/chosen": -512.1973266601562, "logps/rejected": -544.0070190429688, "loss": 0.0219, "rewards/accuracies": 0.75, "rewards/chosen": -0.2794879972934723, "rewards/margins": 0.07693992555141449, "rewards/rejected": -0.3564279079437256, "step": 5080 }, { "epoch": 0.33, "learning_rate": 4.217725691458957e-06, "logits/chosen": -1.1167364120483398, "logits/rejected": -0.9290026426315308, "logps/chosen": -449.79888916015625, "logps/rejected": -575.9512939453125, "loss": 0.0244, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.26557475328445435, "rewards/margins": 0.09920646250247955, "rewards/rejected": -0.3647812306880951, "step": 5090 }, { "epoch": 0.33, "learning_rate": 4.213572553544565e-06, "logits/chosen": -0.7339587211608887, "logits/rejected": -0.753326952457428, "logps/chosen": -545.8994140625, "logps/rejected": -627.5664672851562, "loss": 0.0158, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3049275279045105, "rewards/margins": 0.08903156965970993, "rewards/rejected": -0.393959105014801, "step": 5100 }, { "epoch": 0.33, "eval_logits/chosen": -0.8615201711654663, "eval_logits/rejected": -0.7416000366210938, "eval_logps/chosen": -510.8705139160156, "eval_logps/rejected": -573.3056030273438, "eval_loss": 0.025622623041272163, "eval_rewards/accuracies": 0.6554999947547913, "eval_rewards/chosen": -0.2788655459880829, "eval_rewards/margins": 0.08282823115587234, "eval_rewards/rejected": -0.361693799495697, "eval_runtime": 713.326, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 5100 }, { "epoch": 0.33, "learning_rate": 4.209410476796331e-06, "logits/chosen": -0.7524689435958862, "logits/rejected": -0.7590414881706238, "logps/chosen": -469.38006591796875, "logps/rejected": -535.579345703125, "loss": 0.0396, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.29828861355781555, "rewards/margins": 0.07618852704763412, "rewards/rejected": -0.3744771480560303, "step": 5110 }, { "epoch": 0.33, "learning_rate": 4.205239482925686e-06, "logits/chosen": -0.8803011178970337, "logits/rejected": -0.8016735911369324, "logps/chosen": -430.9256896972656, "logps/rejected": -513.93798828125, "loss": 0.032, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24311034381389618, "rewards/margins": 0.060885947197675705, "rewards/rejected": -0.3039962947368622, "step": 5120 }, { "epoch": 0.34, "learning_rate": 4.201059593690577e-06, "logits/chosen": -1.2470813989639282, "logits/rejected": -1.1861910820007324, "logps/chosen": -470.6795959472656, "logps/rejected": -504.35833740234375, "loss": 0.0094, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24509187042713165, "rewards/margins": 0.05910780280828476, "rewards/rejected": -0.3041996657848358, "step": 5130 }, { "epoch": 0.34, "learning_rate": 4.196870830895354e-06, "logits/chosen": -0.9839805364608765, "logits/rejected": -0.89997398853302, "logps/chosen": -488.5289611816406, "logps/rejected": -592.5863647460938, "loss": 0.0171, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23078136146068573, "rewards/margins": 0.054550062865018845, "rewards/rejected": -0.2853314280509949, "step": 5140 }, { "epoch": 0.34, "learning_rate": 4.192673216390657e-06, "logits/chosen": -1.0446064472198486, "logits/rejected": -0.8286064267158508, "logps/chosen": -470.79376220703125, "logps/rejected": -499.12506103515625, "loss": 0.0447, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23782086372375488, "rewards/margins": 0.0666588619351387, "rewards/rejected": -0.304479718208313, "step": 5150 }, { "epoch": 0.34, "learning_rate": 4.188466772073296e-06, "logits/chosen": -1.0337674617767334, "logits/rejected": -0.9799618721008301, "logps/chosen": -497.9425354003906, "logps/rejected": -518.5938720703125, "loss": 0.0181, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2774180471897125, "rewards/margins": 0.041412971913814545, "rewards/rejected": -0.31883102655410767, "step": 5160 }, { "epoch": 0.34, "learning_rate": 4.184251519886148e-06, "logits/chosen": -0.9038311243057251, "logits/rejected": -0.7815853953361511, "logps/chosen": -546.5521850585938, "logps/rejected": -661.8203735351562, "loss": 0.0263, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35044464468955994, "rewards/margins": 0.08661321550607681, "rewards/rejected": -0.43705788254737854, "step": 5170 }, { "epoch": 0.34, "learning_rate": 4.180027481818033e-06, "logits/chosen": -1.0261890888214111, "logits/rejected": -1.122697353363037, "logps/chosen": -585.0550537109375, "logps/rejected": -593.5311279296875, "loss": 0.0347, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3183569312095642, "rewards/margins": 0.04630974680185318, "rewards/rejected": -0.3646667003631592, "step": 5180 }, { "epoch": 0.34, "learning_rate": 4.175794679903602e-06, "logits/chosen": -1.01822829246521, "logits/rejected": -0.8567032814025879, "logps/chosen": -463.68328857421875, "logps/rejected": -475.3485412597656, "loss": 0.0406, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23604245483875275, "rewards/margins": 0.08402223885059357, "rewards/rejected": -0.3200646936893463, "step": 5190 }, { "epoch": 0.34, "learning_rate": 4.171553136223222e-06, "logits/chosen": -1.286424160003662, "logits/rejected": -1.1344553232192993, "logps/chosen": -479.45196533203125, "logps/rejected": -591.9468994140625, "loss": 0.0145, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21213114261627197, "rewards/margins": 0.10178504139184952, "rewards/rejected": -0.3139161765575409, "step": 5200 }, { "epoch": 0.34, "eval_logits/chosen": -1.0756512880325317, "eval_logits/rejected": -0.9478035569190979, "eval_logps/chosen": -429.8432312011719, "eval_logps/rejected": -480.5621643066406, "eval_loss": 0.02596084587275982, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -0.19783827662467957, "eval_rewards/margins": 0.0711119994521141, "eval_rewards/rejected": -0.26895028352737427, "eval_runtime": 714.6942, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 5200 }, { "epoch": 0.34, "learning_rate": 4.167302872902865e-06, "logits/chosen": -1.1648603677749634, "logits/rejected": -0.8383561968803406, "logps/chosen": -456.4186096191406, "logps/rejected": -538.2722778320312, "loss": 0.0313, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2049504816532135, "rewards/margins": 0.09379171580076218, "rewards/rejected": -0.2987422049045563, "step": 5210 }, { "epoch": 0.34, "learning_rate": 4.163043912113985e-06, "logits/chosen": -1.1259856224060059, "logits/rejected": -0.9301275014877319, "logps/chosen": -452.885986328125, "logps/rejected": -465.56707763671875, "loss": 0.0322, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19384589791297913, "rewards/margins": 0.046675633639097214, "rewards/rejected": -0.24052152037620544, "step": 5220 }, { "epoch": 0.34, "learning_rate": 4.15877627607341e-06, "logits/chosen": -0.9052518010139465, "logits/rejected": -0.6302906274795532, "logps/chosen": -426.9661560058594, "logps/rejected": -465.74871826171875, "loss": 0.0188, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21037133038043976, "rewards/margins": 0.06300492584705353, "rewards/rejected": -0.2733762860298157, "step": 5230 }, { "epoch": 0.34, "learning_rate": 4.154499987043217e-06, "logits/chosen": -1.086658239364624, "logits/rejected": -0.9023993611335754, "logps/chosen": -439.8319396972656, "logps/rejected": -528.7637939453125, "loss": 0.0123, "rewards/accuracies": 0.75, "rewards/chosen": -0.21562838554382324, "rewards/margins": 0.11111153662204742, "rewards/rejected": -0.3267399072647095, "step": 5240 }, { "epoch": 0.34, "learning_rate": 4.150215067330625e-06, "logits/chosen": -0.9510372281074524, "logits/rejected": -0.6933058500289917, "logps/chosen": -434.48297119140625, "logps/rejected": -530.3092651367188, "loss": 0.0389, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22543415427207947, "rewards/margins": 0.0817890539765358, "rewards/rejected": -0.30722326040267944, "step": 5250 }, { "epoch": 0.34, "learning_rate": 4.145921539287876e-06, "logits/chosen": -0.8996394276618958, "logits/rejected": -0.8662531971931458, "logps/chosen": -412.24090576171875, "logps/rejected": -503.5206604003906, "loss": 0.0233, "rewards/accuracies": 0.75, "rewards/chosen": -0.22164829075336456, "rewards/margins": 0.1112830638885498, "rewards/rejected": -0.3329313397407532, "step": 5260 }, { "epoch": 0.34, "learning_rate": 4.141619425312115e-06, "logits/chosen": -1.0143764019012451, "logits/rejected": -0.7150412797927856, "logps/chosen": -437.21337890625, "logps/rejected": -454.25213623046875, "loss": 0.0232, "rewards/accuracies": 0.5, "rewards/chosen": -0.2285621166229248, "rewards/margins": 0.041480645537376404, "rewards/rejected": -0.2700427770614624, "step": 5270 }, { "epoch": 0.35, "learning_rate": 4.1373087478452735e-06, "logits/chosen": -1.0359195470809937, "logits/rejected": -0.9358541369438171, "logps/chosen": -407.6039733886719, "logps/rejected": -488.2931213378906, "loss": 0.0392, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18236342072486877, "rewards/margins": 0.1353030651807785, "rewards/rejected": -0.3176664710044861, "step": 5280 }, { "epoch": 0.35, "learning_rate": 4.132989529373959e-06, "logits/chosen": -1.1896417140960693, "logits/rejected": -0.9655329585075378, "logps/chosen": -468.32562255859375, "logps/rejected": -458.0716857910156, "loss": 0.0265, "rewards/accuracies": 0.625, "rewards/chosen": -0.21241283416748047, "rewards/margins": 0.06874451041221619, "rewards/rejected": -0.28115734457969666, "step": 5290 }, { "epoch": 0.35, "learning_rate": 4.128661792429331e-06, "logits/chosen": -1.2529733180999756, "logits/rejected": -1.1396033763885498, "logps/chosen": -419.14642333984375, "logps/rejected": -491.6564025878906, "loss": 0.0209, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16537588834762573, "rewards/margins": 0.06761355698108673, "rewards/rejected": -0.23298940062522888, "step": 5300 }, { "epoch": 0.35, "eval_logits/chosen": -1.3869491815567017, "eval_logits/rejected": -1.239190936088562, "eval_logps/chosen": -384.1584167480469, "eval_logps/rejected": -440.3551940917969, "eval_loss": 0.02548842690885067, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.15215347707271576, "eval_rewards/margins": 0.07658979296684265, "eval_rewards/rejected": -0.2287432700395584, "eval_runtime": 714.8467, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 5300 }, { "epoch": 0.35, "learning_rate": 4.124325559586985e-06, "logits/chosen": -1.3616712093353271, "logits/rejected": -1.277834415435791, "logps/chosen": -354.333740234375, "logps/rejected": -375.1416931152344, "loss": 0.0508, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.16627705097198486, "rewards/margins": 0.007883940823376179, "rewards/rejected": -0.17416098713874817, "step": 5310 }, { "epoch": 0.35, "learning_rate": 4.119980853466835e-06, "logits/chosen": -1.3799018859863281, "logits/rejected": -1.002909541130066, "logps/chosen": -344.64617919921875, "logps/rejected": -396.9372253417969, "loss": 0.0425, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1354917734861374, "rewards/margins": 0.07940524816513062, "rewards/rejected": -0.21489699184894562, "step": 5320 }, { "epoch": 0.35, "learning_rate": 4.115627696732997e-06, "logits/chosen": -1.3049077987670898, "logits/rejected": -1.1814768314361572, "logps/chosen": -308.7670593261719, "logps/rejected": -357.3789367675781, "loss": 0.02, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11735282838344574, "rewards/margins": 0.06793279945850372, "rewards/rejected": -0.18528564274311066, "step": 5330 }, { "epoch": 0.35, "learning_rate": 4.111266112093668e-06, "logits/chosen": -1.4339227676391602, "logits/rejected": -1.277630090713501, "logps/chosen": -378.3221740722656, "logps/rejected": -488.48309326171875, "loss": 0.027, "rewards/accuracies": 0.75, "rewards/chosen": -0.17485448718070984, "rewards/margins": 0.09176313877105713, "rewards/rejected": -0.2666175961494446, "step": 5340 }, { "epoch": 0.35, "learning_rate": 4.1068961223010115e-06, "logits/chosen": -1.4751958847045898, "logits/rejected": -1.0743850469589233, "logps/chosen": -478.87066650390625, "logps/rejected": -544.0775146484375, "loss": 0.0291, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20817002654075623, "rewards/margins": 0.08788318932056427, "rewards/rejected": -0.2960532307624817, "step": 5350 }, { "epoch": 0.35, "learning_rate": 4.102517750151034e-06, "logits/chosen": -1.464954137802124, "logits/rejected": -1.224535346031189, "logps/chosen": -468.95465087890625, "logps/rejected": -444.91387939453125, "loss": 0.0319, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17390979826450348, "rewards/margins": 0.047272175550460815, "rewards/rejected": -0.22118201851844788, "step": 5360 }, { "epoch": 0.35, "learning_rate": 4.09813101848347e-06, "logits/chosen": -1.472651481628418, "logits/rejected": -1.328861117362976, "logps/chosen": -373.92303466796875, "logps/rejected": -463.69451904296875, "loss": 0.0191, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15493713319301605, "rewards/margins": 0.0726172924041748, "rewards/rejected": -0.22755444049835205, "step": 5370 }, { "epoch": 0.35, "learning_rate": 4.093735950181659e-06, "logits/chosen": -1.3882478475570679, "logits/rejected": -1.2646280527114868, "logps/chosen": -348.1236267089844, "logps/rejected": -475.47259521484375, "loss": 0.0163, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12265026569366455, "rewards/margins": 0.1028057187795639, "rewards/rejected": -0.22545596957206726, "step": 5380 }, { "epoch": 0.35, "learning_rate": 4.0893325681724326e-06, "logits/chosen": -1.580491542816162, "logits/rejected": -1.504421591758728, "logps/chosen": -426.92333984375, "logps/rejected": -496.18017578125, "loss": 0.0361, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16850170493125916, "rewards/margins": 0.08153624087572098, "rewards/rejected": -0.25003793835639954, "step": 5390 }, { "epoch": 0.35, "learning_rate": 4.084920895425988e-06, "logits/chosen": -1.4114059209823608, "logits/rejected": -1.2922006845474243, "logps/chosen": -452.6734313964844, "logps/rejected": -533.3858032226562, "loss": 0.0292, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21530881524085999, "rewards/margins": 0.06706748157739639, "rewards/rejected": -0.2823762595653534, "step": 5400 }, { "epoch": 0.35, "eval_logits/chosen": -1.3683128356933594, "eval_logits/rejected": -1.222110390663147, "eval_logps/chosen": -405.9722900390625, "eval_logps/rejected": -457.4742431640625, "eval_loss": 0.025775643065571785, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.17396733164787292, "eval_rewards/margins": 0.07189502567052841, "eval_rewards/rejected": -0.24586234986782074, "eval_runtime": 715.2877, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 5400 }, { "epoch": 0.35, "learning_rate": 4.080500954955769e-06, "logits/chosen": -1.267074465751648, "logits/rejected": -1.1021515130996704, "logps/chosen": -456.5711364746094, "logps/rejected": -515.9779052734375, "loss": 0.0323, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20069925487041473, "rewards/margins": 0.0776929035782814, "rewards/rejected": -0.27839216589927673, "step": 5410 }, { "epoch": 0.35, "learning_rate": 4.076072769818354e-06, "logits/chosen": -1.5735347270965576, "logits/rejected": -1.2642533779144287, "logps/chosen": -423.47454833984375, "logps/rejected": -429.453369140625, "loss": 0.0146, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.17975224554538727, "rewards/margins": 0.0679444968700409, "rewards/rejected": -0.2476966828107834, "step": 5420 }, { "epoch": 0.36, "learning_rate": 4.071636363113323e-06, "logits/chosen": -1.096242904663086, "logits/rejected": -1.0024423599243164, "logps/chosen": -450.58636474609375, "logps/rejected": -453.54425048828125, "loss": 0.0264, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18245628476142883, "rewards/margins": 0.057000450789928436, "rewards/rejected": -0.23945674300193787, "step": 5430 }, { "epoch": 0.36, "learning_rate": 4.067191757983146e-06, "logits/chosen": -1.1468369960784912, "logits/rejected": -1.0676357746124268, "logps/chosen": -425.11993408203125, "logps/rejected": -528.5230102539062, "loss": 0.0223, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1955442726612091, "rewards/margins": 0.10661707073450089, "rewards/rejected": -0.302161306142807, "step": 5440 }, { "epoch": 0.36, "learning_rate": 4.062738977613063e-06, "logits/chosen": -1.1041396856307983, "logits/rejected": -1.1108615398406982, "logps/chosen": -417.5469665527344, "logps/rejected": -438.81964111328125, "loss": 0.0304, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.18307065963745117, "rewards/margins": 0.06966587156057358, "rewards/rejected": -0.25273653864860535, "step": 5450 }, { "epoch": 0.36, "learning_rate": 4.058278045230957e-06, "logits/chosen": -1.4018709659576416, "logits/rejected": -1.390424132347107, "logps/chosen": -437.18035888671875, "logps/rejected": -488.399169921875, "loss": 0.0311, "rewards/accuracies": 0.625, "rewards/chosen": -0.21881353855133057, "rewards/margins": 0.04963109642267227, "rewards/rejected": -0.26844462752342224, "step": 5460 }, { "epoch": 0.36, "learning_rate": 4.053808984107235e-06, "logits/chosen": -1.4311949014663696, "logits/rejected": -1.2379485368728638, "logps/chosen": -400.61529541015625, "logps/rejected": -389.9832763671875, "loss": 0.0516, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.16774697601795197, "rewards/margins": 0.0296611450612545, "rewards/rejected": -0.19740812480449677, "step": 5470 }, { "epoch": 0.36, "learning_rate": 4.04933181755471e-06, "logits/chosen": -1.2934504747390747, "logits/rejected": -1.2656474113464355, "logps/chosen": -420.289794921875, "logps/rejected": -491.38519287109375, "loss": 0.0385, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21370916068553925, "rewards/margins": 0.07830795645713806, "rewards/rejected": -0.2920171320438385, "step": 5480 }, { "epoch": 0.36, "learning_rate": 4.044846568928477e-06, "logits/chosen": -1.44814133644104, "logits/rejected": -1.388803482055664, "logps/chosen": -441.94305419921875, "logps/rejected": -503.94146728515625, "loss": 0.0337, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1804189234972, "rewards/margins": 0.07085631787776947, "rewards/rejected": -0.2512752413749695, "step": 5490 }, { "epoch": 0.36, "learning_rate": 4.040353261625788e-06, "logits/chosen": -1.554697871208191, "logits/rejected": -1.0627509355545044, "logps/chosen": -433.16357421875, "logps/rejected": -493.19854736328125, "loss": 0.0104, "rewards/accuracies": 0.625, "rewards/chosen": -0.15661834180355072, "rewards/margins": 0.09866548329591751, "rewards/rejected": -0.25528383255004883, "step": 5500 }, { "epoch": 0.36, "eval_logits/chosen": -1.317143201828003, "eval_logits/rejected": -1.1723862886428833, "eval_logps/chosen": -394.80975341796875, "eval_logps/rejected": -453.00579833984375, "eval_loss": 0.02578054554760456, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.16280482709407806, "eval_rewards/margins": 0.07858908921480179, "eval_rewards/rejected": -0.24139392375946045, "eval_runtime": 714.0165, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 5500 }, { "epoch": 0.36, "learning_rate": 4.035851919085936e-06, "logits/chosen": -1.391754388809204, "logits/rejected": -1.1405208110809326, "logps/chosen": -457.9352111816406, "logps/rejected": -464.3085021972656, "loss": 0.0136, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1905411034822464, "rewards/margins": 0.08508516848087311, "rewards/rejected": -0.2756262719631195, "step": 5510 }, { "epoch": 0.36, "learning_rate": 4.031342564790128e-06, "logits/chosen": -1.399954080581665, "logits/rejected": -1.1710107326507568, "logps/chosen": -338.07080078125, "logps/rejected": -447.4768981933594, "loss": 0.0369, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13209910690784454, "rewards/margins": 0.1141788512468338, "rewards/rejected": -0.24627797305583954, "step": 5520 }, { "epoch": 0.36, "learning_rate": 4.026825222261367e-06, "logits/chosen": -1.1558170318603516, "logits/rejected": -0.9577473402023315, "logps/chosen": -462.66326904296875, "logps/rejected": -522.847412109375, "loss": 0.0482, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.28958821296691895, "rewards/margins": 0.06732732802629471, "rewards/rejected": -0.35691553354263306, "step": 5530 }, { "epoch": 0.36, "learning_rate": 4.022299915064321e-06, "logits/chosen": -1.3923536539077759, "logits/rejected": -1.1838186979293823, "logps/chosen": -562.73681640625, "logps/rejected": -573.6365356445312, "loss": 0.0454, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.25688880681991577, "rewards/margins": 0.06400532275438309, "rewards/rejected": -0.32089412212371826, "step": 5540 }, { "epoch": 0.36, "learning_rate": 4.017766666805213e-06, "logits/chosen": -1.3105462789535522, "logits/rejected": -1.1304515600204468, "logps/chosen": -443.61907958984375, "logps/rejected": -478.7421875, "loss": 0.034, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.230098694562912, "rewards/margins": 0.07048474997282028, "rewards/rejected": -0.3005834221839905, "step": 5550 }, { "epoch": 0.36, "learning_rate": 4.013225501131684e-06, "logits/chosen": -1.4791831970214844, "logits/rejected": -1.2157981395721436, "logps/chosen": -405.5892333984375, "logps/rejected": -440.9805603027344, "loss": 0.0238, "rewards/accuracies": 0.5, "rewards/chosen": -0.18821164965629578, "rewards/margins": 0.062364112585783005, "rewards/rejected": -0.2505757808685303, "step": 5560 }, { "epoch": 0.36, "learning_rate": 4.008676441732679e-06, "logits/chosen": -1.2817049026489258, "logits/rejected": -1.0414891242980957, "logps/chosen": -414.7164611816406, "logps/rejected": -427.27740478515625, "loss": 0.0411, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.20842210948467255, "rewards/margins": 0.06056179478764534, "rewards/rejected": -0.2689839005470276, "step": 5570 }, { "epoch": 0.37, "learning_rate": 4.00411951233832e-06, "logits/chosen": -1.4686822891235352, "logits/rejected": -1.2353346347808838, "logps/chosen": -447.0987243652344, "logps/rejected": -493.9344787597656, "loss": 0.0397, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22737355530261993, "rewards/margins": 0.09150932729244232, "rewards/rejected": -0.31888288259506226, "step": 5580 }, { "epoch": 0.37, "learning_rate": 3.999554736719785e-06, "logits/chosen": -1.233842372894287, "logits/rejected": -1.1729977130889893, "logps/chosen": -547.4593505859375, "logps/rejected": -577.8104248046875, "loss": 0.0264, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2554824948310852, "rewards/margins": 0.07386898249387741, "rewards/rejected": -0.3293514847755432, "step": 5590 }, { "epoch": 0.37, "learning_rate": 3.994982138689177e-06, "logits/chosen": -1.618848443031311, "logits/rejected": -1.3463637828826904, "logps/chosen": -530.5050048828125, "logps/rejected": -586.9093017578125, "loss": 0.0201, "rewards/accuracies": 0.625, "rewards/chosen": -0.2931048274040222, "rewards/margins": 0.058255456387996674, "rewards/rejected": -0.3513602614402771, "step": 5600 }, { "epoch": 0.37, "eval_logits/chosen": -1.2817002534866333, "eval_logits/rejected": -1.134204626083374, "eval_logps/chosen": -532.1311645507812, "eval_logps/rejected": -595.0032958984375, "eval_loss": 0.026721389964222908, "eval_rewards/accuracies": 0.659500002861023, "eval_rewards/chosen": -0.30012622475624084, "eval_rewards/margins": 0.08326515555381775, "eval_rewards/rejected": -0.38339143991470337, "eval_runtime": 715.3427, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 5600 }, { "epoch": 0.37, "learning_rate": 3.990401742099408e-06, "logits/chosen": -1.1029692888259888, "logits/rejected": -1.1162054538726807, "logps/chosen": -424.474853515625, "logps/rejected": -474.2005310058594, "loss": 0.0304, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.2456117570400238, "rewards/margins": 0.05707447975873947, "rewards/rejected": -0.30268624424934387, "step": 5610 }, { "epoch": 0.37, "learning_rate": 3.985813570844072e-06, "logits/chosen": -1.3899219036102295, "logits/rejected": -1.2691123485565186, "logps/chosen": -546.1656494140625, "logps/rejected": -602.9925537109375, "loss": 0.0374, "rewards/accuracies": 0.75, "rewards/chosen": -0.25177890062332153, "rewards/margins": 0.08437846601009369, "rewards/rejected": -0.33615735173225403, "step": 5620 }, { "epoch": 0.37, "learning_rate": 3.981217648857316e-06, "logits/chosen": -1.3798067569732666, "logits/rejected": -1.2693253755569458, "logps/chosen": -339.47283935546875, "logps/rejected": -426.76153564453125, "loss": 0.0139, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16693241894245148, "rewards/margins": 0.08550689369440079, "rewards/rejected": -0.25243932008743286, "step": 5630 }, { "epoch": 0.37, "learning_rate": 3.97661400011372e-06, "logits/chosen": -1.3402612209320068, "logits/rejected": -1.4176042079925537, "logps/chosen": -398.94158935546875, "logps/rejected": -436.2691345214844, "loss": 0.0377, "rewards/accuracies": 0.625, "rewards/chosen": -0.155914306640625, "rewards/margins": 0.048750389367341995, "rewards/rejected": -0.2046646773815155, "step": 5640 }, { "epoch": 0.37, "learning_rate": 3.972002648628174e-06, "logits/chosen": -1.3796780109405518, "logits/rejected": -1.1762449741363525, "logps/chosen": -421.19232177734375, "logps/rejected": -420.214111328125, "loss": 0.0252, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14416152238845825, "rewards/margins": 0.04671357944607735, "rewards/rejected": -0.1908750832080841, "step": 5650 }, { "epoch": 0.37, "learning_rate": 3.967383618455743e-06, "logits/chosen": -1.348460078239441, "logits/rejected": -1.3641364574432373, "logps/chosen": -408.205322265625, "logps/rejected": -487.0420837402344, "loss": 0.0467, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18108566105365753, "rewards/margins": 0.06064629554748535, "rewards/rejected": -0.24173195660114288, "step": 5660 }, { "epoch": 0.37, "learning_rate": 3.9627569336915515e-06, "logits/chosen": -1.6144300699234009, "logits/rejected": -1.3587367534637451, "logps/chosen": -402.56182861328125, "logps/rejected": -448.1279296875, "loss": 0.0279, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1550990492105484, "rewards/margins": 0.10224269330501556, "rewards/rejected": -0.25734174251556396, "step": 5670 }, { "epoch": 0.37, "learning_rate": 3.9581226184706555e-06, "logits/chosen": -1.440455675125122, "logits/rejected": -1.6092504262924194, "logps/chosen": -376.7449951171875, "logps/rejected": -520.094482421875, "loss": 0.0141, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18207959830760956, "rewards/margins": 0.07265181839466095, "rewards/rejected": -0.2547314167022705, "step": 5680 }, { "epoch": 0.37, "learning_rate": 3.953480696967912e-06, "logits/chosen": -0.9862279891967773, "logits/rejected": -1.1157066822052002, "logps/chosen": -477.7144470214844, "logps/rejected": -590.9965209960938, "loss": 0.028, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.26834288239479065, "rewards/margins": 0.07683631032705307, "rewards/rejected": -0.3451792001724243, "step": 5690 }, { "epoch": 0.37, "learning_rate": 3.948831193397857e-06, "logits/chosen": -0.9048309326171875, "logits/rejected": -0.9119782447814941, "logps/chosen": -485.4453125, "logps/rejected": -544.5375366210938, "loss": 0.0258, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.3194299638271332, "rewards/margins": 0.053266387432813644, "rewards/rejected": -0.3726963400840759, "step": 5700 }, { "epoch": 0.37, "eval_logits/chosen": -1.034959316253662, "eval_logits/rejected": -0.902513861656189, "eval_logps/chosen": -553.446044921875, "eval_logps/rejected": -615.78759765625, "eval_loss": 0.0263617355376482, "eval_rewards/accuracies": 0.6495000123977661, "eval_rewards/chosen": -0.321441113948822, "eval_rewards/margins": 0.0827346220612526, "eval_rewards/rejected": -0.404175728559494, "eval_runtime": 714.6238, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 5700 }, { "epoch": 0.37, "learning_rate": 3.94417413201458e-06, "logits/chosen": -1.21490478515625, "logits/rejected": -0.9374274015426636, "logps/chosen": -479.5146484375, "logps/rejected": -522.0642700195312, "loss": 0.0741, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2679431438446045, "rewards/margins": 0.060316406190395355, "rewards/rejected": -0.32825952768325806, "step": 5710 }, { "epoch": 0.37, "learning_rate": 3.9395095371115935e-06, "logits/chosen": -1.2432184219360352, "logits/rejected": -1.1395750045776367, "logps/chosen": -456.03497314453125, "logps/rejected": -524.8805541992188, "loss": 0.0392, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2433778941631317, "rewards/margins": 0.07945677638053894, "rewards/rejected": -0.32283467054367065, "step": 5720 }, { "epoch": 0.37, "learning_rate": 3.93483743302171e-06, "logits/chosen": -1.3308954238891602, "logits/rejected": -1.0365536212921143, "logps/chosen": -431.49005126953125, "logps/rejected": -485.2618713378906, "loss": 0.0306, "rewards/accuracies": 0.625, "rewards/chosen": -0.2186984270811081, "rewards/margins": 0.07750718295574188, "rewards/rejected": -0.29620561003685, "step": 5730 }, { "epoch": 0.38, "learning_rate": 3.930157844116913e-06, "logits/chosen": -1.3236615657806396, "logits/rejected": -1.1010358333587646, "logps/chosen": -379.8088073730469, "logps/rejected": -435.0101623535156, "loss": 0.021, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.17005157470703125, "rewards/margins": 0.06992004811763763, "rewards/rejected": -0.2399716079235077, "step": 5740 }, { "epoch": 0.38, "learning_rate": 3.925470794808229e-06, "logits/chosen": -1.35048246383667, "logits/rejected": -1.1555402278900146, "logps/chosen": -432.5293884277344, "logps/rejected": -479.93524169921875, "loss": 0.0321, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19354164600372314, "rewards/margins": 0.08022323250770569, "rewards/rejected": -0.2737649083137512, "step": 5750 }, { "epoch": 0.38, "learning_rate": 3.920776309545606e-06, "logits/chosen": -1.650655746459961, "logits/rejected": -1.452972412109375, "logps/chosen": -250.83334350585938, "logps/rejected": -307.3514709472656, "loss": 0.0299, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.09812633693218231, "rewards/margins": 0.0633506029844284, "rewards/rejected": -0.1614769548177719, "step": 5760 }, { "epoch": 0.38, "learning_rate": 3.916074412817778e-06, "logits/chosen": -1.5202057361602783, "logits/rejected": -1.1989761590957642, "logps/chosen": -368.3289489746094, "logps/rejected": -448.4273986816406, "loss": 0.0388, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12499499320983887, "rewards/margins": 0.07957610487937927, "rewards/rejected": -0.20457109808921814, "step": 5770 }, { "epoch": 0.38, "learning_rate": 3.911365129152139e-06, "logits/chosen": -1.544917345046997, "logits/rejected": -1.425197958946228, "logps/chosen": -356.8941345214844, "logps/rejected": -425.58294677734375, "loss": 0.0223, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12446029484272003, "rewards/margins": 0.07422862201929092, "rewards/rejected": -0.19868893921375275, "step": 5780 }, { "epoch": 0.38, "learning_rate": 3.906648483114623e-06, "logits/chosen": -1.4581537246704102, "logits/rejected": -1.2918035984039307, "logps/chosen": -322.9499816894531, "logps/rejected": -377.5986633300781, "loss": 0.0318, "rewards/accuracies": 0.625, "rewards/chosen": -0.1256263554096222, "rewards/margins": 0.08723373711109161, "rewards/rejected": -0.212860107421875, "step": 5790 }, { "epoch": 0.38, "learning_rate": 3.901924499309564e-06, "logits/chosen": -1.327897548675537, "logits/rejected": -1.1938635110855103, "logps/chosen": -372.50543212890625, "logps/rejected": -429.55029296875, "loss": 0.0254, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1438005119562149, "rewards/margins": 0.08965082466602325, "rewards/rejected": -0.23345136642456055, "step": 5800 }, { "epoch": 0.38, "eval_logits/chosen": -1.3844434022903442, "eval_logits/rejected": -1.2336195707321167, "eval_logps/chosen": -413.2734069824219, "eval_logps/rejected": -481.4164123535156, "eval_loss": 0.02475276030600071, "eval_rewards/accuracies": 0.656000018119812, "eval_rewards/chosen": -0.18126845359802246, "eval_rewards/margins": 0.08853607624769211, "eval_rewards/rejected": -0.26980453729629517, "eval_runtime": 713.2729, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 5800 }, { "epoch": 0.38, "learning_rate": 3.897193202379575e-06, "logits/chosen": -1.4435081481933594, "logits/rejected": -1.2470020055770874, "logps/chosen": -365.19989013671875, "logps/rejected": -437.09588623046875, "loss": 0.0221, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16472217440605164, "rewards/margins": 0.08886884152889252, "rewards/rejected": -0.25359100103378296, "step": 5810 }, { "epoch": 0.38, "learning_rate": 3.8924546170054215e-06, "logits/chosen": -1.4334276914596558, "logits/rejected": -1.2906148433685303, "logps/chosen": -362.6976013183594, "logps/rejected": -414.1009826660156, "loss": 0.0245, "rewards/accuracies": 0.625, "rewards/chosen": -0.14678999781608582, "rewards/margins": 0.07087678462266922, "rewards/rejected": -0.21766678988933563, "step": 5820 }, { "epoch": 0.38, "learning_rate": 3.887708767905883e-06, "logits/chosen": -1.7028868198394775, "logits/rejected": -1.396533727645874, "logps/chosen": -374.7129211425781, "logps/rejected": -366.75030517578125, "loss": 0.0206, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13094571232795715, "rewards/margins": 0.05788036435842514, "rewards/rejected": -0.18882611393928528, "step": 5830 }, { "epoch": 0.38, "learning_rate": 3.882955679837636e-06, "logits/chosen": -1.4821518659591675, "logits/rejected": -1.4822427034378052, "logps/chosen": -358.50177001953125, "logps/rejected": -411.06005859375, "loss": 0.0388, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11912477016448975, "rewards/margins": 0.05018296092748642, "rewards/rejected": -0.16930773854255676, "step": 5840 }, { "epoch": 0.38, "learning_rate": 3.878195377595113e-06, "logits/chosen": -1.4833205938339233, "logits/rejected": -1.3330621719360352, "logps/chosen": -335.9757995605469, "logps/rejected": -425.16485595703125, "loss": 0.0323, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.100070059299469, "rewards/margins": 0.09475259482860565, "rewards/rejected": -0.19482265412807465, "step": 5850 }, { "epoch": 0.38, "learning_rate": 3.873427886010384e-06, "logits/chosen": -1.5438989400863647, "logits/rejected": -1.2941625118255615, "logps/chosen": -275.01947021484375, "logps/rejected": -345.59893798828125, "loss": 0.0201, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.084567591547966, "rewards/margins": 0.08966846764087677, "rewards/rejected": -0.17423605918884277, "step": 5860 }, { "epoch": 0.38, "learning_rate": 3.868653229953021e-06, "logits/chosen": -1.5654484033584595, "logits/rejected": -1.3439537286758423, "logps/chosen": -330.6562194824219, "logps/rejected": -433.37640380859375, "loss": 0.0128, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09344138950109482, "rewards/margins": 0.10478410869836807, "rewards/rejected": -0.1982254981994629, "step": 5870 }, { "epoch": 0.38, "learning_rate": 3.8638714343299675e-06, "logits/chosen": -1.5149357318878174, "logits/rejected": -1.3934400081634521, "logps/chosen": -327.4629821777344, "logps/rejected": -422.2232360839844, "loss": 0.014, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10912027209997177, "rewards/margins": 0.07964450865983963, "rewards/rejected": -0.1887647807598114, "step": 5880 }, { "epoch": 0.39, "learning_rate": 3.859082524085414e-06, "logits/chosen": -1.4048163890838623, "logits/rejected": -1.1734538078308105, "logps/chosen": -394.7745361328125, "logps/rejected": -397.6026611328125, "loss": 0.0258, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12243137508630753, "rewards/margins": 0.0630672425031662, "rewards/rejected": -0.18549862504005432, "step": 5890 }, { "epoch": 0.39, "learning_rate": 3.854286524200659e-06, "logits/chosen": -1.6986172199249268, "logits/rejected": -1.3177049160003662, "logps/chosen": -390.75067138671875, "logps/rejected": -394.251953125, "loss": 0.0237, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11433323472738266, "rewards/margins": 0.0541377067565918, "rewards/rejected": -0.16847094893455505, "step": 5900 }, { "epoch": 0.39, "eval_logits/chosen": -1.3840978145599365, "eval_logits/rejected": -1.236135721206665, "eval_logps/chosen": -367.7495422363281, "eval_logps/rejected": -428.4645080566406, "eval_loss": 0.024681083858013153, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.13574457168579102, "eval_rewards/margins": 0.08110800385475159, "eval_rewards/rejected": -0.2168525904417038, "eval_runtime": 712.2717, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 5900 }, { "epoch": 0.39, "learning_rate": 3.849483459693991e-06, "logits/chosen": -1.4808847904205322, "logits/rejected": -1.2697575092315674, "logps/chosen": -355.9921875, "logps/rejected": -433.46124267578125, "loss": 0.0142, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14631548523902893, "rewards/margins": 0.11613816022872925, "rewards/rejected": -0.2624536454677582, "step": 5910 }, { "epoch": 0.39, "learning_rate": 3.844673355620544e-06, "logits/chosen": -1.4002643823623657, "logits/rejected": -1.160140872001648, "logps/chosen": -446.7584533691406, "logps/rejected": -500.3331604003906, "loss": 0.0209, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20018558204174042, "rewards/margins": 0.09186019748449326, "rewards/rejected": -0.2920457720756531, "step": 5920 }, { "epoch": 0.39, "learning_rate": 3.839856237072178e-06, "logits/chosen": -1.216516375541687, "logits/rejected": -1.1570258140563965, "logps/chosen": -389.444091796875, "logps/rejected": -533.6334838867188, "loss": 0.031, "rewards/accuracies": 0.625, "rewards/chosen": -0.20686998963356018, "rewards/margins": 0.12699730694293976, "rewards/rejected": -0.33386728167533875, "step": 5930 }, { "epoch": 0.39, "learning_rate": 3.8350321291773455e-06, "logits/chosen": -1.3091750144958496, "logits/rejected": -1.1703035831451416, "logps/chosen": -320.9457092285156, "logps/rejected": -360.7986755371094, "loss": 0.0236, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11842372268438339, "rewards/margins": 0.08404932916164398, "rewards/rejected": -0.20247304439544678, "step": 5940 }, { "epoch": 0.39, "learning_rate": 3.830201057100953e-06, "logits/chosen": -1.6962482929229736, "logits/rejected": -1.634911298751831, "logps/chosen": -300.6828308105469, "logps/rejected": -407.61383056640625, "loss": 0.0161, "rewards/accuracies": 0.75, "rewards/chosen": -0.10890986025333405, "rewards/margins": 0.09143822640180588, "rewards/rejected": -0.20034806430339813, "step": 5950 }, { "epoch": 0.39, "learning_rate": 3.82536304604424e-06, "logits/chosen": -1.4389673471450806, "logits/rejected": -1.3319076299667358, "logps/chosen": -304.05865478515625, "logps/rejected": -347.17950439453125, "loss": 0.0414, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06789440661668777, "rewards/margins": 0.07485699653625488, "rewards/rejected": -0.14275141060352325, "step": 5960 }, { "epoch": 0.39, "learning_rate": 3.8205181212446435e-06, "logits/chosen": -1.7190014123916626, "logits/rejected": -1.5242640972137451, "logps/chosen": -352.6591491699219, "logps/rejected": -372.52044677734375, "loss": 0.0134, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08210022747516632, "rewards/margins": 0.06026480346918106, "rewards/rejected": -0.14236502349376678, "step": 5970 }, { "epoch": 0.39, "learning_rate": 3.815666307975664e-06, "logits/chosen": -1.536247968673706, "logits/rejected": -1.494377851486206, "logps/chosen": -338.80792236328125, "logps/rejected": -367.85357666015625, "loss": 0.0212, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1014900803565979, "rewards/margins": 0.048227667808532715, "rewards/rejected": -0.14971774816513062, "step": 5980 }, { "epoch": 0.39, "learning_rate": 3.8108076315467346e-06, "logits/chosen": -1.7070388793945312, "logits/rejected": -1.58781898021698, "logps/chosen": -357.9298400878906, "logps/rejected": -353.7469787597656, "loss": 0.0256, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10064709186553955, "rewards/margins": 0.07054366171360016, "rewards/rejected": -0.1711907684803009, "step": 5990 }, { "epoch": 0.39, "learning_rate": 3.805942117303093e-06, "logits/chosen": -1.7836854457855225, "logits/rejected": -1.5686861276626587, "logps/chosen": -402.962646484375, "logps/rejected": -414.76806640625, "loss": 0.025, "rewards/accuracies": 0.75, "rewards/chosen": -0.08692711591720581, "rewards/margins": 0.05832895636558533, "rewards/rejected": -0.14525607228279114, "step": 6000 }, { "epoch": 0.39, "eval_logits/chosen": -1.4747037887573242, "eval_logits/rejected": -1.3252232074737549, "eval_logps/chosen": -325.64068603515625, "eval_logps/rejected": -375.6243896484375, "eval_loss": 0.025045854970812798, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.09363573789596558, "eval_rewards/margins": 0.07037677615880966, "eval_rewards/rejected": -0.16401249170303345, "eval_runtime": 717.8154, "eval_samples_per_second": 2.786, "eval_steps_per_second": 1.393, "step": 6000 }, { "epoch": 0.39, "learning_rate": 3.8010697906256446e-06, "logits/chosen": -1.5050716400146484, "logits/rejected": -1.3207429647445679, "logps/chosen": -345.32421875, "logps/rejected": -376.1249084472656, "loss": 0.0271, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1386277675628662, "rewards/margins": 0.05863092094659805, "rewards/rejected": -0.19725868105888367, "step": 6010 }, { "epoch": 0.39, "learning_rate": 3.7961906769308323e-06, "logits/chosen": -1.2243303060531616, "logits/rejected": -1.1939160823822021, "logps/chosen": -326.7681884765625, "logps/rejected": -390.2221984863281, "loss": 0.0206, "rewards/accuracies": 0.625, "rewards/chosen": -0.12053300440311432, "rewards/margins": 0.053379058837890625, "rewards/rejected": -0.17391204833984375, "step": 6020 }, { "epoch": 0.39, "learning_rate": 3.7913048016705028e-06, "logits/chosen": -1.4837299585342407, "logits/rejected": -1.3631340265274048, "logps/chosen": -349.81494140625, "logps/rejected": -402.7240295410156, "loss": 0.0127, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08987536281347275, "rewards/margins": 0.05495712161064148, "rewards/rejected": -0.14483249187469482, "step": 6030 }, { "epoch": 0.4, "learning_rate": 3.786412190331775e-06, "logits/chosen": -1.4572464227676392, "logits/rejected": -1.007939100265503, "logps/chosen": -305.79888916015625, "logps/rejected": -344.55291748046875, "loss": 0.0261, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10293839126825333, "rewards/margins": 0.0758533626794815, "rewards/rejected": -0.17879177629947662, "step": 6040 }, { "epoch": 0.4, "learning_rate": 3.781512868436906e-06, "logits/chosen": -1.3904000520706177, "logits/rejected": -1.3835663795471191, "logps/chosen": -227.48593139648438, "logps/rejected": -283.7209167480469, "loss": 0.0163, "rewards/accuracies": 0.625, "rewards/chosen": -0.09487520158290863, "rewards/margins": 0.048445992171764374, "rewards/rejected": -0.1433212012052536, "step": 6050 }, { "epoch": 0.4, "learning_rate": 3.7766068615431605e-06, "logits/chosen": -1.2308261394500732, "logits/rejected": -1.127966284751892, "logps/chosen": -376.37896728515625, "logps/rejected": -389.527587890625, "loss": 0.0323, "rewards/accuracies": 0.625, "rewards/chosen": -0.11929190158843994, "rewards/margins": 0.061783622950315475, "rewards/rejected": -0.18107552826404572, "step": 6060 }, { "epoch": 0.4, "learning_rate": 3.771694195242671e-06, "logits/chosen": -1.5557541847229004, "logits/rejected": -0.9996698498725891, "logps/chosen": -417.5899353027344, "logps/rejected": -378.42071533203125, "loss": 0.034, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11841116845607758, "rewards/margins": 0.07256177067756653, "rewards/rejected": -0.1909729540348053, "step": 6070 }, { "epoch": 0.4, "learning_rate": 3.766774895162314e-06, "logits/chosen": -1.273895502090454, "logits/rejected": -1.2442222833633423, "logps/chosen": -387.3916015625, "logps/rejected": -370.888427734375, "loss": 0.0174, "rewards/accuracies": 0.625, "rewards/chosen": -0.14085188508033752, "rewards/margins": 0.03963253274559975, "rewards/rejected": -0.18048441410064697, "step": 6080 }, { "epoch": 0.4, "learning_rate": 3.7618489869635666e-06, "logits/chosen": -1.277278184890747, "logits/rejected": -1.1270201206207275, "logps/chosen": -397.9208984375, "logps/rejected": -430.5558166503906, "loss": 0.0341, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15089401602745056, "rewards/margins": 0.05237780138850212, "rewards/rejected": -0.2032717913389206, "step": 6090 }, { "epoch": 0.4, "learning_rate": 3.756916496342379e-06, "logits/chosen": -1.4025242328643799, "logits/rejected": -1.4383857250213623, "logps/chosen": -296.19927978515625, "logps/rejected": -378.6092834472656, "loss": 0.0267, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10932741314172745, "rewards/margins": 0.07395712286233902, "rewards/rejected": -0.18328453600406647, "step": 6100 }, { "epoch": 0.4, "eval_logits/chosen": -1.3186856508255005, "eval_logits/rejected": -1.1770720481872559, "eval_logps/chosen": -339.8831481933594, "eval_logps/rejected": -396.3333740234375, "eval_loss": 0.02451777271926403, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.10787823051214218, "eval_rewards/margins": 0.07684329152107239, "eval_rewards/rejected": -0.18472151458263397, "eval_runtime": 716.0453, "eval_samples_per_second": 2.793, "eval_steps_per_second": 1.397, "step": 6100 }, { "epoch": 0.4, "learning_rate": 3.751977449029039e-06, "logits/chosen": -1.0779231786727905, "logits/rejected": -1.0133717060089111, "logps/chosen": -380.71527099609375, "logps/rejected": -446.07135009765625, "loss": 0.0217, "rewards/accuracies": 0.75, "rewards/chosen": -0.12063882499933243, "rewards/margins": 0.10275165736675262, "rewards/rejected": -0.22339048981666565, "step": 6110 }, { "epoch": 0.4, "learning_rate": 3.747031870788037e-06, "logits/chosen": -1.4621073007583618, "logits/rejected": -1.2584234476089478, "logps/chosen": -408.7204895019531, "logps/rejected": -416.100341796875, "loss": 0.0212, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09196718782186508, "rewards/margins": 0.08979923278093338, "rewards/rejected": -0.18176642060279846, "step": 6120 }, { "epoch": 0.4, "learning_rate": 3.7420797874179326e-06, "logits/chosen": -1.1391491889953613, "logits/rejected": -1.0356509685516357, "logps/chosen": -354.79217529296875, "logps/rejected": -379.3558349609375, "loss": 0.024, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11768273264169693, "rewards/margins": 0.08345271646976471, "rewards/rejected": -0.20113544166088104, "step": 6130 }, { "epoch": 0.4, "learning_rate": 3.7371212247512167e-06, "logits/chosen": -1.5690444707870483, "logits/rejected": -1.2695146799087524, "logps/chosen": -418.77069091796875, "logps/rejected": -434.9921875, "loss": 0.0313, "rewards/accuracies": 0.625, "rewards/chosen": -0.09237619489431381, "rewards/margins": 0.08132969588041306, "rewards/rejected": -0.17370590567588806, "step": 6140 }, { "epoch": 0.4, "learning_rate": 3.7321562086541817e-06, "logits/chosen": -1.3362683057785034, "logits/rejected": -1.2703540325164795, "logps/chosen": -359.4862365722656, "logps/rejected": -429.68841552734375, "loss": 0.0152, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1065061092376709, "rewards/margins": 0.06941194832324982, "rewards/rejected": -0.17591805756092072, "step": 6150 }, { "epoch": 0.4, "learning_rate": 3.7271847650267834e-06, "logits/chosen": -1.2099182605743408, "logits/rejected": -1.0662115812301636, "logps/chosen": -315.2735595703125, "logps/rejected": -363.6462097167969, "loss": 0.0446, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11533892154693604, "rewards/margins": 0.04926292970776558, "rewards/rejected": -0.16460183262825012, "step": 6160 }, { "epoch": 0.4, "learning_rate": 3.7222069198025086e-06, "logits/chosen": -1.1777377128601074, "logits/rejected": -1.0358705520629883, "logps/chosen": -341.04632568359375, "logps/rejected": -394.2268981933594, "loss": 0.0271, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13250383734703064, "rewards/margins": 0.0737728625535965, "rewards/rejected": -0.20627668499946594, "step": 6170 }, { "epoch": 0.4, "learning_rate": 3.7172226989482353e-06, "logits/chosen": -1.181279182434082, "logits/rejected": -1.0866568088531494, "logps/chosen": -338.822998046875, "logps/rejected": -399.9607238769531, "loss": 0.0214, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12885017693042755, "rewards/margins": 0.06328286230564117, "rewards/rejected": -0.1921330690383911, "step": 6180 }, { "epoch": 0.4, "learning_rate": 3.7122321284641007e-06, "logits/chosen": -1.447378396987915, "logits/rejected": -1.2162498235702515, "logps/chosen": -512.1373901367188, "logps/rejected": -513.296875, "loss": 0.0171, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14935074746608734, "rewards/margins": 0.11164933443069458, "rewards/rejected": -0.2610000967979431, "step": 6190 }, { "epoch": 0.41, "learning_rate": 3.707235234383365e-06, "logits/chosen": -1.2193877696990967, "logits/rejected": -1.1026809215545654, "logps/chosen": -363.25262451171875, "logps/rejected": -334.28631591796875, "loss": 0.0157, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10199868679046631, "rewards/margins": 0.057812582701444626, "rewards/rejected": -0.15981127321720123, "step": 6200 }, { "epoch": 0.41, "eval_logits/chosen": -1.3533542156219482, "eval_logits/rejected": -1.2099162340164185, "eval_logps/chosen": -352.04486083984375, "eval_logps/rejected": -408.590576171875, "eval_loss": 0.02436411939561367, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -0.12003994733095169, "eval_rewards/margins": 0.07693877816200256, "eval_rewards/rejected": -0.19697873294353485, "eval_runtime": 715.1501, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 6200 }, { "epoch": 0.41, "learning_rate": 3.702232042772277e-06, "logits/chosen": -1.3497097492218018, "logits/rejected": -1.2977203130722046, "logps/chosen": -356.9574279785156, "logps/rejected": -424.80084228515625, "loss": 0.0248, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15441124141216278, "rewards/margins": 0.0901980772614479, "rewards/rejected": -0.2446093112230301, "step": 6210 }, { "epoch": 0.41, "learning_rate": 3.6972225797299325e-06, "logits/chosen": -1.327143907546997, "logits/rejected": -1.3463459014892578, "logps/chosen": -420.46795654296875, "logps/rejected": -504.77911376953125, "loss": 0.0265, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16995935142040253, "rewards/margins": 0.10024671256542206, "rewards/rejected": -0.2702060341835022, "step": 6220 }, { "epoch": 0.41, "learning_rate": 3.692206871388147e-06, "logits/chosen": -1.3235714435577393, "logits/rejected": -0.9383972883224487, "logps/chosen": -410.0464782714844, "logps/rejected": -482.36419677734375, "loss": 0.018, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17765776813030243, "rewards/margins": 0.11354689300060272, "rewards/rejected": -0.29120463132858276, "step": 6230 }, { "epoch": 0.41, "learning_rate": 3.6871849439113115e-06, "logits/chosen": -0.7814895510673523, "logits/rejected": -0.9545269012451172, "logps/chosen": -384.17742919921875, "logps/rejected": -453.65118408203125, "loss": 0.025, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1542665660381317, "rewards/margins": 0.07902156561613083, "rewards/rejected": -0.23328813910484314, "step": 6240 }, { "epoch": 0.41, "learning_rate": 3.682156823496259e-06, "logits/chosen": -1.1929423809051514, "logits/rejected": -0.8324581980705261, "logps/chosen": -367.5755920410156, "logps/rejected": -435.47772216796875, "loss": 0.0175, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15366163849830627, "rewards/margins": 0.10026772320270538, "rewards/rejected": -0.25392937660217285, "step": 6250 }, { "epoch": 0.41, "learning_rate": 3.67712253637213e-06, "logits/chosen": -1.1682441234588623, "logits/rejected": -0.9871164560317993, "logps/chosen": -442.9244079589844, "logps/rejected": -427.46636962890625, "loss": 0.0159, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1506299525499344, "rewards/margins": 0.07445450127124786, "rewards/rejected": -0.22508445382118225, "step": 6260 }, { "epoch": 0.41, "learning_rate": 3.672082108800231e-06, "logits/chosen": -0.9721360206604004, "logits/rejected": -0.990211009979248, "logps/chosen": -425.170166015625, "logps/rejected": -484.49151611328125, "loss": 0.0311, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2113543450832367, "rewards/margins": 0.09336082637310028, "rewards/rejected": -0.3047151267528534, "step": 6270 }, { "epoch": 0.41, "learning_rate": 3.6670355670739012e-06, "logits/chosen": -1.086909294128418, "logits/rejected": -0.9387370347976685, "logps/chosen": -321.0994567871094, "logps/rejected": -417.4297790527344, "loss": 0.0121, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1591460406780243, "rewards/margins": 0.09896901994943619, "rewards/rejected": -0.2581150531768799, "step": 6280 }, { "epoch": 0.41, "learning_rate": 3.6619829375183745e-06, "logits/chosen": -1.2397186756134033, "logits/rejected": -1.114363670349121, "logps/chosen": -401.2189025878906, "logps/rejected": -499.301025390625, "loss": 0.0297, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17521636188030243, "rewards/margins": 0.11415299028158188, "rewards/rejected": -0.2893693745136261, "step": 6290 }, { "epoch": 0.41, "learning_rate": 3.6569242464906427e-06, "logits/chosen": -1.234266996383667, "logits/rejected": -1.1459366083145142, "logps/chosen": -307.16473388671875, "logps/rejected": -409.5794372558594, "loss": 0.0339, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09943567216396332, "rewards/margins": 0.08136560022830963, "rewards/rejected": -0.18080130219459534, "step": 6300 }, { "epoch": 0.41, "eval_logits/chosen": -1.3300995826721191, "eval_logits/rejected": -1.1887269020080566, "eval_logps/chosen": -346.13214111328125, "eval_logps/rejected": -402.73675537109375, "eval_loss": 0.025024227797985077, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -0.11412721127271652, "eval_rewards/margins": 0.07699765264987946, "eval_rewards/rejected": -0.19112485647201538, "eval_runtime": 717.7847, "eval_samples_per_second": 2.786, "eval_steps_per_second": 1.393, "step": 6300 }, { "epoch": 0.41, "learning_rate": 3.6518595203793156e-06, "logits/chosen": -1.25554621219635, "logits/rejected": -1.1617560386657715, "logps/chosen": -357.56634521484375, "logps/rejected": -478.54986572265625, "loss": 0.0131, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09885212779045105, "rewards/margins": 0.10750870406627655, "rewards/rejected": -0.2063608467578888, "step": 6310 }, { "epoch": 0.41, "learning_rate": 3.646788785604485e-06, "logits/chosen": -1.3969213962554932, "logits/rejected": -1.3904623985290527, "logps/chosen": -285.71783447265625, "logps/rejected": -338.4125671386719, "loss": 0.0166, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0800594687461853, "rewards/margins": 0.05493972823023796, "rewards/rejected": -0.13499920070171356, "step": 6320 }, { "epoch": 0.41, "learning_rate": 3.641712068617588e-06, "logits/chosen": -1.264022946357727, "logits/rejected": -1.1449713706970215, "logps/chosen": -364.6275939941406, "logps/rejected": -363.40521240234375, "loss": 0.0255, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10326752811670303, "rewards/margins": 0.0523221418261528, "rewards/rejected": -0.15558966994285583, "step": 6330 }, { "epoch": 0.41, "learning_rate": 3.6366293959012673e-06, "logits/chosen": -1.0860211849212646, "logits/rejected": -0.9140866994857788, "logps/chosen": -268.46905517578125, "logps/rejected": -333.28863525390625, "loss": 0.0367, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08726704120635986, "rewards/margins": 0.08434279263019562, "rewards/rejected": -0.17160983383655548, "step": 6340 }, { "epoch": 0.42, "learning_rate": 3.631540793969233e-06, "logits/chosen": -1.5471423864364624, "logits/rejected": -1.3722846508026123, "logps/chosen": -271.39581298828125, "logps/rejected": -322.4822692871094, "loss": 0.03, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.08277805894613266, "rewards/margins": 0.0494081974029541, "rewards/rejected": -0.13218626379966736, "step": 6350 }, { "epoch": 0.42, "learning_rate": 3.626446289366127e-06, "logits/chosen": -1.448638677597046, "logits/rejected": -1.2076566219329834, "logps/chosen": -330.4332580566406, "logps/rejected": -292.58099365234375, "loss": 0.0238, "rewards/accuracies": 0.625, "rewards/chosen": -0.11028406769037247, "rewards/margins": 0.03366657346487045, "rewards/rejected": -0.14395064115524292, "step": 6360 }, { "epoch": 0.42, "learning_rate": 3.6213459086673786e-06, "logits/chosen": -1.3209408521652222, "logits/rejected": -1.3818042278289795, "logps/chosen": -285.59423828125, "logps/rejected": -370.0176086425781, "loss": 0.0321, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11465664207935333, "rewards/margins": 0.07505010068416595, "rewards/rejected": -0.18970675766468048, "step": 6370 }, { "epoch": 0.42, "learning_rate": 3.6162396784790737e-06, "logits/chosen": -1.1178932189941406, "logits/rejected": -1.0045349597930908, "logps/chosen": -336.37774658203125, "logps/rejected": -382.5118103027344, "loss": 0.0305, "rewards/accuracies": 0.625, "rewards/chosen": -0.11337069422006607, "rewards/margins": 0.04629364609718323, "rewards/rejected": -0.1596643477678299, "step": 6380 }, { "epoch": 0.42, "learning_rate": 3.6111276254378095e-06, "logits/chosen": -1.4404252767562866, "logits/rejected": -1.344377875328064, "logps/chosen": -310.81597900390625, "logps/rejected": -392.5060729980469, "loss": 0.0156, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0828590840101242, "rewards/margins": 0.086103655397892, "rewards/rejected": -0.1689627468585968, "step": 6390 }, { "epoch": 0.42, "learning_rate": 3.606009776210559e-06, "logits/chosen": -1.3774988651275635, "logits/rejected": -1.3316395282745361, "logps/chosen": -365.7055969238281, "logps/rejected": -410.6561584472656, "loss": 0.0239, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12508191168308258, "rewards/margins": 0.08860930055379868, "rewards/rejected": -0.21369118988513947, "step": 6400 }, { "epoch": 0.42, "eval_logits/chosen": -1.3053640127182007, "eval_logits/rejected": -1.1653472185134888, "eval_logps/chosen": -341.53546142578125, "eval_logps/rejected": -400.2938232421875, "eval_loss": 0.02557436376810074, "eval_rewards/accuracies": 0.6545000076293945, "eval_rewards/chosen": -0.109530508518219, "eval_rewards/margins": 0.07915138453245163, "eval_rewards/rejected": -0.18868190050125122, "eval_runtime": 715.0603, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 6400 }, { "epoch": 0.42, "learning_rate": 3.600886157494531e-06, "logits/chosen": -1.5063680410385132, "logits/rejected": -1.396681785583496, "logps/chosen": -380.1568603515625, "logps/rejected": -447.0098571777344, "loss": 0.0169, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11730033159255981, "rewards/margins": 0.07863511890172958, "rewards/rejected": -0.1959354430437088, "step": 6410 }, { "epoch": 0.42, "learning_rate": 3.5957567960170304e-06, "logits/chosen": -1.4328868389129639, "logits/rejected": -0.952072024345398, "logps/chosen": -404.99005126953125, "logps/rejected": -383.29718017578125, "loss": 0.0275, "rewards/accuracies": 0.75, "rewards/chosen": -0.11380495131015778, "rewards/margins": 0.09696482867002487, "rewards/rejected": -0.21076980233192444, "step": 6420 }, { "epoch": 0.42, "learning_rate": 3.590621718535319e-06, "logits/chosen": -0.982082188129425, "logits/rejected": -0.9484511613845825, "logps/chosen": -346.18194580078125, "logps/rejected": -458.25750732421875, "loss": 0.0331, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1494595855474472, "rewards/margins": 0.11296814680099487, "rewards/rejected": -0.26242774724960327, "step": 6430 }, { "epoch": 0.42, "learning_rate": 3.5854809518364775e-06, "logits/chosen": -1.4372667074203491, "logits/rejected": -1.294067144393921, "logps/chosen": -332.11663818359375, "logps/rejected": -388.09613037109375, "loss": 0.0238, "rewards/accuracies": 0.625, "rewards/chosen": -0.08596986532211304, "rewards/margins": 0.10049222409725189, "rewards/rejected": -0.18646208941936493, "step": 6440 }, { "epoch": 0.42, "learning_rate": 3.580334522737262e-06, "logits/chosen": -1.2732690572738647, "logits/rejected": -1.184430480003357, "logps/chosen": -289.6878662109375, "logps/rejected": -334.49371337890625, "loss": 0.0146, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08774345368146896, "rewards/margins": 0.07845543324947357, "rewards/rejected": -0.16619887948036194, "step": 6450 }, { "epoch": 0.42, "learning_rate": 3.575182458083968e-06, "logits/chosen": -1.2820252180099487, "logits/rejected": -1.2376700639724731, "logps/chosen": -342.3106689453125, "logps/rejected": -426.3113708496094, "loss": 0.0187, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10150563716888428, "rewards/margins": 0.11200573295354843, "rewards/rejected": -0.21351134777069092, "step": 6460 }, { "epoch": 0.42, "learning_rate": 3.5700247847522883e-06, "logits/chosen": -1.5064538717269897, "logits/rejected": -1.4124815464019775, "logps/chosen": -274.50555419921875, "logps/rejected": -356.1802062988281, "loss": 0.0419, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0717075765132904, "rewards/margins": 0.08169287443161011, "rewards/rejected": -0.1534004509449005, "step": 6470 }, { "epoch": 0.42, "learning_rate": 3.5648615296471743e-06, "logits/chosen": -1.1872482299804688, "logits/rejected": -1.1185665130615234, "logps/chosen": -302.097900390625, "logps/rejected": -438.69598388671875, "loss": 0.022, "rewards/accuracies": 0.75, "rewards/chosen": -0.10473509877920151, "rewards/margins": 0.09852424263954163, "rewards/rejected": -0.20325934886932373, "step": 6480 }, { "epoch": 0.42, "learning_rate": 3.559692719702693e-06, "logits/chosen": -0.7233016490936279, "logits/rejected": -0.5935336351394653, "logps/chosen": -434.8587951660156, "logps/rejected": -483.56268310546875, "loss": 0.033, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1487136036157608, "rewards/margins": 0.10543341934680939, "rewards/rejected": -0.2541469931602478, "step": 6490 }, { "epoch": 0.43, "learning_rate": 3.55451838188189e-06, "logits/chosen": -1.013187289237976, "logits/rejected": -1.1005239486694336, "logps/chosen": -387.13250732421875, "logps/rejected": -501.197021484375, "loss": 0.0609, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12584570050239563, "rewards/margins": 0.09213140606880188, "rewards/rejected": -0.21797709167003632, "step": 6500 }, { "epoch": 0.43, "eval_logits/chosen": -0.8671056032180786, "eval_logits/rejected": -0.7519211173057556, "eval_logps/chosen": -411.0542907714844, "eval_logps/rejected": -475.3233642578125, "eval_loss": 0.02578655816614628, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.1790493130683899, "eval_rewards/margins": 0.08466215431690216, "eval_rewards/rejected": -0.26371145248413086, "eval_runtime": 716.7315, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 6500 }, { "epoch": 0.43, "learning_rate": 3.549338543176645e-06, "logits/chosen": -0.9332177042961121, "logits/rejected": -0.8590467572212219, "logps/chosen": -489.1602478027344, "logps/rejected": -529.0696411132812, "loss": 0.0302, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1826716661453247, "rewards/margins": 0.07226262986660004, "rewards/rejected": -0.25493428111076355, "step": 6510 }, { "epoch": 0.43, "learning_rate": 3.5441532306075342e-06, "logits/chosen": -1.0239486694335938, "logits/rejected": -0.9157370328903198, "logps/chosen": -446.84478759765625, "logps/rejected": -538.4420166015625, "loss": 0.0244, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2154255211353302, "rewards/margins": 0.04503200948238373, "rewards/rejected": -0.2604575455188751, "step": 6520 }, { "epoch": 0.43, "learning_rate": 3.5389624712236894e-06, "logits/chosen": -0.7710838317871094, "logits/rejected": -0.6246452331542969, "logps/chosen": -410.34710693359375, "logps/rejected": -418.6532287597656, "loss": 0.0278, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.1963113248348236, "rewards/margins": 0.03523395210504532, "rewards/rejected": -0.23154528439044952, "step": 6530 }, { "epoch": 0.43, "learning_rate": 3.533766292102653e-06, "logits/chosen": -0.5735124349594116, "logits/rejected": -0.7767961621284485, "logps/chosen": -421.01885986328125, "logps/rejected": -465.1748962402344, "loss": 0.0609, "rewards/accuracies": 0.625, "rewards/chosen": -0.21117432415485382, "rewards/margins": 0.05584477260708809, "rewards/rejected": -0.2670190930366516, "step": 6540 }, { "epoch": 0.43, "learning_rate": 3.5285647203502404e-06, "logits/chosen": -1.231539011001587, "logits/rejected": -0.9936496615409851, "logps/chosen": -435.5107421875, "logps/rejected": -455.29022216796875, "loss": 0.0194, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17823976278305054, "rewards/margins": 0.057545531541109085, "rewards/rejected": -0.2357853204011917, "step": 6550 }, { "epoch": 0.43, "learning_rate": 3.5233577831003983e-06, "logits/chosen": -0.9878465533256531, "logits/rejected": -0.8150946497917175, "logps/chosen": -429.75640869140625, "logps/rejected": -486.04180908203125, "loss": 0.0181, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.17758052051067352, "rewards/margins": 0.07995395362377167, "rewards/rejected": -0.2575344741344452, "step": 6560 }, { "epoch": 0.43, "learning_rate": 3.5181455075150628e-06, "logits/chosen": -0.9305365681648254, "logits/rejected": -0.7193669080734253, "logps/chosen": -330.2605895996094, "logps/rejected": -353.6876220703125, "loss": 0.0255, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1483144462108612, "rewards/margins": 0.07004992663860321, "rewards/rejected": -0.21836435794830322, "step": 6570 }, { "epoch": 0.43, "learning_rate": 3.512927920784016e-06, "logits/chosen": -1.1423308849334717, "logits/rejected": -0.9897313117980957, "logps/chosen": -351.7737731933594, "logps/rejected": -460.94232177734375, "loss": 0.0222, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12609969079494476, "rewards/margins": 0.12914861738681793, "rewards/rejected": -0.2552483081817627, "step": 6580 }, { "epoch": 0.43, "learning_rate": 3.5077050501247457e-06, "logits/chosen": -1.3219091892242432, "logits/rejected": -0.8576357960700989, "logps/chosen": -373.5453186035156, "logps/rejected": -398.2494812011719, "loss": 0.0253, "rewards/accuracies": 0.625, "rewards/chosen": -0.09253041446208954, "rewards/margins": 0.09209271520376205, "rewards/rejected": -0.1846231371164322, "step": 6590 }, { "epoch": 0.43, "learning_rate": 3.5024769227823042e-06, "logits/chosen": -1.3254988193511963, "logits/rejected": -1.094796061515808, "logps/chosen": -298.9532775878906, "logps/rejected": -338.76800537109375, "loss": 0.0274, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1360819786787033, "rewards/margins": 0.08061237633228302, "rewards/rejected": -0.21669435501098633, "step": 6600 }, { "epoch": 0.43, "eval_logits/chosen": -1.2477281093597412, "eval_logits/rejected": -1.1116926670074463, "eval_logps/chosen": -355.33404541015625, "eval_logps/rejected": -411.8316345214844, "eval_loss": 0.02515600621700287, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -0.12332908064126968, "eval_rewards/margins": 0.07689066976308823, "eval_rewards/rejected": -0.20021973550319672, "eval_runtime": 716.421, "eval_samples_per_second": 2.792, "eval_steps_per_second": 1.396, "step": 6600 }, { "epoch": 0.43, "learning_rate": 3.4972435660291646e-06, "logits/chosen": -1.4781440496444702, "logits/rejected": -1.4482628107070923, "logps/chosen": -374.1960754394531, "logps/rejected": -428.7872009277344, "loss": 0.0198, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13105180859565735, "rewards/margins": 0.07953085750341415, "rewards/rejected": -0.2105826437473297, "step": 6610 }, { "epoch": 0.43, "learning_rate": 3.492005007165079e-06, "logits/chosen": -1.2019245624542236, "logits/rejected": -1.138061285018921, "logps/chosen": -325.6165466308594, "logps/rejected": -396.22381591796875, "loss": 0.0333, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09607954323291779, "rewards/margins": 0.0644506961107254, "rewards/rejected": -0.1605302393436432, "step": 6620 }, { "epoch": 0.43, "learning_rate": 3.4867612735169377e-06, "logits/chosen": -1.5788242816925049, "logits/rejected": -1.1769440174102783, "logps/chosen": -324.53857421875, "logps/rejected": -332.48065185546875, "loss": 0.0282, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09950422495603561, "rewards/margins": 0.08617033064365387, "rewards/rejected": -0.1856745481491089, "step": 6630 }, { "epoch": 0.43, "learning_rate": 3.4815123924386226e-06, "logits/chosen": -1.658078908920288, "logits/rejected": -1.3251959085464478, "logps/chosen": -398.3742980957031, "logps/rejected": -390.64849853515625, "loss": 0.0172, "rewards/accuracies": 0.625, "rewards/chosen": -0.09183292090892792, "rewards/margins": 0.061946701258420944, "rewards/rejected": -0.15377964079380035, "step": 6640 }, { "epoch": 0.44, "learning_rate": 3.4762583913108696e-06, "logits/chosen": -0.9634467959403992, "logits/rejected": -0.850784420967102, "logps/chosen": -439.59918212890625, "logps/rejected": -468.9859313964844, "loss": 0.0258, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16984084248542786, "rewards/margins": 0.07030192017555237, "rewards/rejected": -0.24014274775981903, "step": 6650 }, { "epoch": 0.44, "learning_rate": 3.4709992975411217e-06, "logits/chosen": -1.1142640113830566, "logits/rejected": -0.8459417223930359, "logps/chosen": -418.287353515625, "logps/rejected": -456.25799560546875, "loss": 0.0343, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16332793235778809, "rewards/margins": 0.09023764729499817, "rewards/rejected": -0.25356557965278625, "step": 6660 }, { "epoch": 0.44, "learning_rate": 3.4657351385633886e-06, "logits/chosen": -1.1483465433120728, "logits/rejected": -1.0727392435073853, "logps/chosen": -352.75689697265625, "logps/rejected": -452.12774658203125, "loss": 0.0256, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1674662083387375, "rewards/margins": 0.11157108843326569, "rewards/rejected": -0.2790372669696808, "step": 6670 }, { "epoch": 0.44, "learning_rate": 3.4604659418381024e-06, "logits/chosen": -1.237518072128296, "logits/rejected": -0.8883558511734009, "logps/chosen": -452.83917236328125, "logps/rejected": -537.6838989257812, "loss": 0.0258, "rewards/accuracies": 0.75, "rewards/chosen": -0.23478484153747559, "rewards/margins": 0.11437537521123886, "rewards/rejected": -0.34916022419929504, "step": 6680 }, { "epoch": 0.44, "learning_rate": 3.4551917348519744e-06, "logits/chosen": -1.134535551071167, "logits/rejected": -0.9374223947525024, "logps/chosen": -455.59259033203125, "logps/rejected": -491.38690185546875, "loss": 0.031, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17409385740756989, "rewards/margins": 0.08299489319324493, "rewards/rejected": -0.2570887506008148, "step": 6690 }, { "epoch": 0.44, "learning_rate": 3.4499125451178505e-06, "logits/chosen": -0.6436673402786255, "logits/rejected": -0.687527060508728, "logps/chosen": -413.9833068847656, "logps/rejected": -498.94012451171875, "loss": 0.0308, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20969200134277344, "rewards/margins": 0.0603441521525383, "rewards/rejected": -0.27003616094589233, "step": 6700 }, { "epoch": 0.44, "eval_logits/chosen": -0.9570625424385071, "eval_logits/rejected": -0.8338717818260193, "eval_logps/chosen": -435.3034973144531, "eval_logps/rejected": -504.28302001953125, "eval_loss": 0.025986583903431892, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.20329861342906952, "eval_rewards/margins": 0.08937252312898636, "eval_rewards/rejected": -0.2926711142063141, "eval_runtime": 716.2031, "eval_samples_per_second": 2.793, "eval_steps_per_second": 1.396, "step": 6700 }, { "epoch": 0.44, "learning_rate": 3.4446284001745723e-06, "logits/chosen": -0.5381631851196289, "logits/rejected": -0.5534365773200989, "logps/chosen": -467.60015869140625, "logps/rejected": -535.2893676757812, "loss": 0.0396, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.26136648654937744, "rewards/margins": 0.050942324101924896, "rewards/rejected": -0.31230881810188293, "step": 6710 }, { "epoch": 0.44, "learning_rate": 3.439339327586827e-06, "logits/chosen": -0.8436568379402161, "logits/rejected": -0.912280261516571, "logps/chosen": -331.2764587402344, "logps/rejected": -421.1726989746094, "loss": 0.0194, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14125874638557434, "rewards/margins": 0.095259889960289, "rewards/rejected": -0.23651862144470215, "step": 6720 }, { "epoch": 0.44, "learning_rate": 3.434045354945008e-06, "logits/chosen": -0.9469612836837769, "logits/rejected": -0.8832687139511108, "logps/chosen": -483.98077392578125, "logps/rejected": -529.2609252929688, "loss": 0.0287, "rewards/accuracies": 0.75, "rewards/chosen": -0.20995616912841797, "rewards/margins": 0.05001606419682503, "rewards/rejected": -0.2599722743034363, "step": 6730 }, { "epoch": 0.44, "learning_rate": 3.4287465098650713e-06, "logits/chosen": -1.3372983932495117, "logits/rejected": -1.0186946392059326, "logps/chosen": -407.8543701171875, "logps/rejected": -447.96533203125, "loss": 0.0178, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15987886488437653, "rewards/margins": 0.05495258420705795, "rewards/rejected": -0.21483144164085388, "step": 6740 }, { "epoch": 0.44, "learning_rate": 3.423442819988387e-06, "logits/chosen": -0.9015372395515442, "logits/rejected": -0.7367811799049377, "logps/chosen": -347.05621337890625, "logps/rejected": -409.1177062988281, "loss": 0.0295, "rewards/accuracies": 0.75, "rewards/chosen": -0.16366896033287048, "rewards/margins": 0.06925087422132492, "rewards/rejected": -0.2329198569059372, "step": 6750 }, { "epoch": 0.44, "learning_rate": 3.4181343129816e-06, "logits/chosen": -0.8707712292671204, "logits/rejected": -0.8245078921318054, "logps/chosen": -297.7099609375, "logps/rejected": -348.69647216796875, "loss": 0.0343, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1213482990860939, "rewards/margins": 0.06527663767337799, "rewards/rejected": -0.1866249293088913, "step": 6760 }, { "epoch": 0.44, "learning_rate": 3.4128210165364837e-06, "logits/chosen": -0.8718382120132446, "logits/rejected": -0.6119092702865601, "logps/chosen": -326.9593505859375, "logps/rejected": -451.88958740234375, "loss": 0.021, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13555015623569489, "rewards/margins": 0.11640676110982895, "rewards/rejected": -0.2519569396972656, "step": 6770 }, { "epoch": 0.44, "learning_rate": 3.407502958369795e-06, "logits/chosen": -1.0548205375671387, "logits/rejected": -0.8692372441291809, "logps/chosen": -396.14910888671875, "logps/rejected": -482.7608337402344, "loss": 0.0378, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1537013053894043, "rewards/margins": 0.11582410335540771, "rewards/rejected": -0.2695253789424896, "step": 6780 }, { "epoch": 0.44, "learning_rate": 3.4021801662231297e-06, "logits/chosen": -1.0834194421768188, "logits/rejected": -0.9020644426345825, "logps/chosen": -419.06610107421875, "logps/rejected": -456.6568298339844, "loss": 0.0267, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17018933594226837, "rewards/margins": 0.05986611917614937, "rewards/rejected": -0.23005542159080505, "step": 6790 }, { "epoch": 0.44, "learning_rate": 3.3968526678627793e-06, "logits/chosen": -0.8485884666442871, "logits/rejected": -0.648202121257782, "logps/chosen": -428.04058837890625, "logps/rejected": -449.8564453125, "loss": 0.0442, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16301007568836212, "rewards/margins": 0.06591950356960297, "rewards/rejected": -0.22892959415912628, "step": 6800 }, { "epoch": 0.44, "eval_logits/chosen": -1.031590223312378, "eval_logits/rejected": -0.9081704616546631, "eval_logps/chosen": -388.711181640625, "eval_logps/rejected": -444.3406982421875, "eval_loss": 0.025185106322169304, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -0.15670622885227203, "eval_rewards/margins": 0.07602259516716003, "eval_rewards/rejected": -0.23272882401943207, "eval_runtime": 715.5798, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.397, "step": 6800 }, { "epoch": 0.45, "learning_rate": 3.391520491079586e-06, "logits/chosen": -1.4769017696380615, "logits/rejected": -1.094515085220337, "logps/chosen": -344.1140441894531, "logps/rejected": -359.5335693359375, "loss": 0.0547, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1481514275074005, "rewards/margins": 0.04594920575618744, "rewards/rejected": -0.19410061836242676, "step": 6810 }, { "epoch": 0.45, "learning_rate": 3.3861836636887936e-06, "logits/chosen": -1.17664635181427, "logits/rejected": -0.881076455116272, "logps/chosen": -413.3184509277344, "logps/rejected": -434.80841064453125, "loss": 0.0215, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1464996337890625, "rewards/margins": 0.06796838343143463, "rewards/rejected": -0.21446803212165833, "step": 6820 }, { "epoch": 0.45, "learning_rate": 3.3808422135299106e-06, "logits/chosen": -1.1393859386444092, "logits/rejected": -1.0811306238174438, "logps/chosen": -441.72625732421875, "logps/rejected": -559.4169921875, "loss": 0.0168, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15604844689369202, "rewards/margins": 0.06461524218320847, "rewards/rejected": -0.22066371142864227, "step": 6830 }, { "epoch": 0.45, "learning_rate": 3.375496168466556e-06, "logits/chosen": -1.2507377862930298, "logits/rejected": -0.9604101181030273, "logps/chosen": -315.4072265625, "logps/rejected": -314.32733154296875, "loss": 0.0164, "rewards/accuracies": 0.625, "rewards/chosen": -0.10046914964914322, "rewards/margins": 0.05484669655561447, "rewards/rejected": -0.1553158313035965, "step": 6840 }, { "epoch": 0.45, "learning_rate": 3.3701455563863205e-06, "logits/chosen": -1.5131487846374512, "logits/rejected": -1.3059285879135132, "logps/chosen": -444.54144287109375, "logps/rejected": -516.2932739257812, "loss": 0.0203, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15266673266887665, "rewards/margins": 0.10807087272405624, "rewards/rejected": -0.2607375979423523, "step": 6850 }, { "epoch": 0.45, "learning_rate": 3.3647904052006174e-06, "logits/chosen": -1.3105812072753906, "logits/rejected": -1.212453842163086, "logps/chosen": -423.3076171875, "logps/rejected": -509.37188720703125, "loss": 0.0221, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15800932049751282, "rewards/margins": 0.08305768668651581, "rewards/rejected": -0.24106702208518982, "step": 6860 }, { "epoch": 0.45, "learning_rate": 3.3594307428445383e-06, "logits/chosen": -1.4982898235321045, "logits/rejected": -1.1128604412078857, "logps/chosen": -483.586181640625, "logps/rejected": -520.4454345703125, "loss": 0.0145, "rewards/accuracies": 0.75, "rewards/chosen": -0.1503354012966156, "rewards/margins": 0.06827588379383087, "rewards/rejected": -0.21861127018928528, "step": 6870 }, { "epoch": 0.45, "learning_rate": 3.354066597276707e-06, "logits/chosen": -0.9910423159599304, "logits/rejected": -0.9390872120857239, "logps/chosen": -390.6603088378906, "logps/rejected": -500.29681396484375, "loss": 0.0295, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16709603369235992, "rewards/margins": 0.07427604496479034, "rewards/rejected": -0.24137206375598907, "step": 6880 }, { "epoch": 0.45, "learning_rate": 3.348697996479136e-06, "logits/chosen": -1.167785406112671, "logits/rejected": -0.9886842966079712, "logps/chosen": -403.75225830078125, "logps/rejected": -406.30548095703125, "loss": 0.0217, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1759433001279831, "rewards/margins": 0.056024931371212006, "rewards/rejected": -0.2319682389497757, "step": 6890 }, { "epoch": 0.45, "learning_rate": 3.3433249684570757e-06, "logits/chosen": -0.8083165287971497, "logits/rejected": -0.6016203165054321, "logps/chosen": -356.08868408203125, "logps/rejected": -402.3526611328125, "loss": 0.0454, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17825916409492493, "rewards/margins": 0.09198234975337982, "rewards/rejected": -0.27024149894714355, "step": 6900 }, { "epoch": 0.45, "eval_logits/chosen": -0.9271209836006165, "eval_logits/rejected": -0.809074878692627, "eval_logps/chosen": -417.9737854003906, "eval_logps/rejected": -474.31805419921875, "eval_loss": 0.024388469755649567, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -0.185968816280365, "eval_rewards/margins": 0.07673736661672592, "eval_rewards/rejected": -0.2627061903476715, "eval_runtime": 716.1909, "eval_samples_per_second": 2.793, "eval_steps_per_second": 1.396, "step": 6900 }, { "epoch": 0.45, "learning_rate": 3.3379475412388724e-06, "logits/chosen": -1.0178124904632568, "logits/rejected": -0.8541426658630371, "logps/chosen": -419.1349182128906, "logps/rejected": -487.2738342285156, "loss": 0.0356, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17717988789081573, "rewards/margins": 0.10183756053447723, "rewards/rejected": -0.27901747822761536, "step": 6910 }, { "epoch": 0.45, "learning_rate": 3.3325657428758207e-06, "logits/chosen": -0.7613261938095093, "logits/rejected": -0.796710193157196, "logps/chosen": -437.67584228515625, "logps/rejected": -524.4844360351562, "loss": 0.0364, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19400033354759216, "rewards/margins": 0.08598187565803528, "rewards/rejected": -0.27998223900794983, "step": 6920 }, { "epoch": 0.45, "learning_rate": 3.3271796014420175e-06, "logits/chosen": -1.0257625579833984, "logits/rejected": -0.6929565668106079, "logps/chosen": -460.5440368652344, "logps/rejected": -558.5516357421875, "loss": 0.029, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24809376895427704, "rewards/margins": 0.11571590602397919, "rewards/rejected": -0.36380964517593384, "step": 6930 }, { "epoch": 0.45, "learning_rate": 3.3217891450342142e-06, "logits/chosen": -0.8528691530227661, "logits/rejected": -0.7414941191673279, "logps/chosen": -478.1455078125, "logps/rejected": -514.1324462890625, "loss": 0.0109, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2195471078157425, "rewards/margins": 0.10497979819774628, "rewards/rejected": -0.3245268762111664, "step": 6940 }, { "epoch": 0.45, "learning_rate": 3.3163944017716733e-06, "logits/chosen": -1.247727394104004, "logits/rejected": -1.0572454929351807, "logps/chosen": -408.42938232421875, "logps/rejected": -457.87127685546875, "loss": 0.016, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1920585185289383, "rewards/margins": 0.08253969252109528, "rewards/rejected": -0.27459821105003357, "step": 6950 }, { "epoch": 0.46, "learning_rate": 3.310995399796017e-06, "logits/chosen": -1.306460976600647, "logits/rejected": -1.2302920818328857, "logps/chosen": -442.58355712890625, "logps/rejected": -488.088623046875, "loss": 0.0364, "rewards/accuracies": 0.5, "rewards/chosen": -0.16859734058380127, "rewards/margins": 0.052307210862636566, "rewards/rejected": -0.22090455889701843, "step": 6960 }, { "epoch": 0.46, "learning_rate": 3.305592167271085e-06, "logits/chosen": -1.1445953845977783, "logits/rejected": -0.8947767019271851, "logps/chosen": -337.24591064453125, "logps/rejected": -410.85858154296875, "loss": 0.0276, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13999974727630615, "rewards/margins": 0.08452476561069489, "rewards/rejected": -0.22452449798583984, "step": 6970 }, { "epoch": 0.46, "learning_rate": 3.3001847323827846e-06, "logits/chosen": -1.0822927951812744, "logits/rejected": -1.2282577753067017, "logps/chosen": -426.80419921875, "logps/rejected": -499.98114013671875, "loss": 0.0246, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15870651602745056, "rewards/margins": 0.07659576833248138, "rewards/rejected": -0.23530229926109314, "step": 6980 }, { "epoch": 0.46, "learning_rate": 3.2947731233389447e-06, "logits/chosen": -0.8651480674743652, "logits/rejected": -0.6951728463172913, "logps/chosen": -425.15399169921875, "logps/rejected": -486.6709899902344, "loss": 0.0175, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17764738202095032, "rewards/margins": 0.11694429814815521, "rewards/rejected": -0.2945916950702667, "step": 6990 }, { "epoch": 0.46, "learning_rate": 3.2893573683691706e-06, "logits/chosen": -0.8079308271408081, "logits/rejected": -0.7077968716621399, "logps/chosen": -381.0784912109375, "logps/rejected": -467.71649169921875, "loss": 0.0229, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17638501524925232, "rewards/margins": 0.10453379154205322, "rewards/rejected": -0.28091880679130554, "step": 7000 }, { "epoch": 0.46, "eval_logits/chosen": -0.9160138964653015, "eval_logits/rejected": -0.7966699600219727, "eval_logps/chosen": -421.6742248535156, "eval_logps/rejected": -485.5566711425781, "eval_loss": 0.024113964289426804, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -0.18966925144195557, "eval_rewards/margins": 0.0842754915356636, "eval_rewards/rejected": -0.27394476532936096, "eval_runtime": 715.2185, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 7000 }, { "epoch": 0.46, "learning_rate": 3.2839374957246915e-06, "logits/chosen": -1.0175594091415405, "logits/rejected": -0.7546167969703674, "logps/chosen": -474.11505126953125, "logps/rejected": -431.2002868652344, "loss": 0.0241, "rewards/accuracies": 0.625, "rewards/chosen": -0.2080247849225998, "rewards/margins": 0.048623427748680115, "rewards/rejected": -0.2566482126712799, "step": 7010 }, { "epoch": 0.46, "learning_rate": 3.2785135336782187e-06, "logits/chosen": -0.8031293153762817, "logits/rejected": -0.7071239948272705, "logps/chosen": -455.78253173828125, "logps/rejected": -561.9644775390625, "loss": 0.0123, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2218671292066574, "rewards/margins": 0.0838695541024208, "rewards/rejected": -0.3057366907596588, "step": 7020 }, { "epoch": 0.46, "learning_rate": 3.2730855105237952e-06, "logits/chosen": -0.9485722780227661, "logits/rejected": -0.8604018092155457, "logps/chosen": -437.15216064453125, "logps/rejected": -573.8688354492188, "loss": 0.0285, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21687562763690948, "rewards/margins": 0.09253615885972977, "rewards/rejected": -0.30941176414489746, "step": 7030 }, { "epoch": 0.46, "learning_rate": 3.2676534545766486e-06, "logits/chosen": -0.8744341731071472, "logits/rejected": -0.7336697578430176, "logps/chosen": -436.20916748046875, "logps/rejected": -473.65106201171875, "loss": 0.027, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22373966872692108, "rewards/margins": 0.047334734350442886, "rewards/rejected": -0.2710743844509125, "step": 7040 }, { "epoch": 0.46, "learning_rate": 3.262217394173043e-06, "logits/chosen": -0.7904328107833862, "logits/rejected": -0.701250433921814, "logps/chosen": -459.13916015625, "logps/rejected": -557.8087158203125, "loss": 0.0288, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2182885706424713, "rewards/margins": 0.10475250333547592, "rewards/rejected": -0.3230411112308502, "step": 7050 }, { "epoch": 0.46, "learning_rate": 3.2567773576701333e-06, "logits/chosen": -0.8007340431213379, "logits/rejected": -0.6579197645187378, "logps/chosen": -451.7880859375, "logps/rejected": -572.09130859375, "loss": 0.0239, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1960306167602539, "rewards/margins": 0.1506188064813614, "rewards/rejected": -0.3466494381427765, "step": 7060 }, { "epoch": 0.46, "learning_rate": 3.2513333734458154e-06, "logits/chosen": -0.7304666042327881, "logits/rejected": -0.5545434951782227, "logps/chosen": -400.06341552734375, "logps/rejected": -435.1558532714844, "loss": 0.0266, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19322210550308228, "rewards/margins": 0.05170009657740593, "rewards/rejected": -0.2449222058057785, "step": 7070 }, { "epoch": 0.46, "learning_rate": 3.245885469898576e-06, "logits/chosen": -0.6283619999885559, "logits/rejected": -0.7113500833511353, "logps/chosen": -513.007080078125, "logps/rejected": -543.78369140625, "loss": 0.0203, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21245193481445312, "rewards/margins": 0.09446465224027634, "rewards/rejected": -0.30691659450531006, "step": 7080 }, { "epoch": 0.46, "learning_rate": 3.2404336754473497e-06, "logits/chosen": -0.6811359524726868, "logits/rejected": -0.5386748909950256, "logps/chosen": -456.03448486328125, "logps/rejected": -457.955078125, "loss": 0.015, "rewards/accuracies": 0.75, "rewards/chosen": -0.18852753937244415, "rewards/margins": 0.06669080257415771, "rewards/rejected": -0.25521835684776306, "step": 7090 }, { "epoch": 0.46, "learning_rate": 3.234978018531367e-06, "logits/chosen": -1.2770839929580688, "logits/rejected": -0.775590717792511, "logps/chosen": -454.53900146484375, "logps/rejected": -468.47821044921875, "loss": 0.0213, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19603875279426575, "rewards/margins": 0.07873886823654175, "rewards/rejected": -0.2747776508331299, "step": 7100 }, { "epoch": 0.46, "eval_logits/chosen": -0.7425191402435303, "eval_logits/rejected": -0.6326358318328857, "eval_logps/chosen": -441.93560791015625, "eval_logps/rejected": -507.9072570800781, "eval_loss": 0.023862788453698158, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.2099306434392929, "eval_rewards/margins": 0.08636472374200821, "eval_rewards/rejected": -0.2962953746318817, "eval_runtime": 717.2748, "eval_samples_per_second": 2.788, "eval_steps_per_second": 1.394, "step": 7100 }, { "epoch": 0.47, "learning_rate": 3.229518527610006e-06, "logits/chosen": -1.0590064525604248, "logits/rejected": -0.9328305125236511, "logps/chosen": -491.8431701660156, "logps/rejected": -517.9161987304688, "loss": 0.0185, "rewards/accuracies": 0.625, "rewards/chosen": -0.199053093791008, "rewards/margins": 0.0712491050362587, "rewards/rejected": -0.2703022062778473, "step": 7110 }, { "epoch": 0.47, "learning_rate": 3.2240552311626465e-06, "logits/chosen": -0.8787094354629517, "logits/rejected": -0.7058129906654358, "logps/chosen": -415.72711181640625, "logps/rejected": -458.05169677734375, "loss": 0.0149, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17451338469982147, "rewards/margins": 0.0572696328163147, "rewards/rejected": -0.23178300261497498, "step": 7120 }, { "epoch": 0.47, "learning_rate": 3.2185881576885193e-06, "logits/chosen": -0.9212052226066589, "logits/rejected": -0.7970582246780396, "logps/chosen": -436.0469665527344, "logps/rejected": -458.67340087890625, "loss": 0.0339, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22432419657707214, "rewards/margins": 0.0612938217818737, "rewards/rejected": -0.28561800718307495, "step": 7130 }, { "epoch": 0.47, "learning_rate": 3.213117335706557e-06, "logits/chosen": -1.0062284469604492, "logits/rejected": -0.9220073819160461, "logps/chosen": -442.7872009277344, "logps/rejected": -516.6103515625, "loss": 0.0278, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18674224615097046, "rewards/margins": 0.06461696326732635, "rewards/rejected": -0.2513591945171356, "step": 7140 }, { "epoch": 0.47, "learning_rate": 3.2076427937552473e-06, "logits/chosen": -0.9384700059890747, "logits/rejected": -0.5967062711715698, "logps/chosen": -387.3866271972656, "logps/rejected": -478.8876953125, "loss": 0.0261, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14189685881137848, "rewards/margins": 0.1106247678399086, "rewards/rejected": -0.2525216341018677, "step": 7150 }, { "epoch": 0.47, "learning_rate": 3.2021645603924827e-06, "logits/chosen": -0.6398612260818481, "logits/rejected": -0.6815491914749146, "logps/chosen": -321.0020446777344, "logps/rejected": -444.0062561035156, "loss": 0.0294, "rewards/accuracies": 0.625, "rewards/chosen": -0.18254795670509338, "rewards/margins": 0.11363975703716278, "rewards/rejected": -0.29618769884109497, "step": 7160 }, { "epoch": 0.47, "learning_rate": 3.196682664195412e-06, "logits/chosen": -0.625547468662262, "logits/rejected": -0.563734233379364, "logps/chosen": -378.33721923828125, "logps/rejected": -393.31134033203125, "loss": 0.0296, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1742890477180481, "rewards/margins": 0.04986513406038284, "rewards/rejected": -0.22415415942668915, "step": 7170 }, { "epoch": 0.47, "learning_rate": 3.191197133760291e-06, "logits/chosen": -1.4326298236846924, "logits/rejected": -0.8061431646347046, "logps/chosen": -427.77081298828125, "logps/rejected": -466.6153259277344, "loss": 0.0317, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16036739945411682, "rewards/margins": 0.11309325695037842, "rewards/rejected": -0.27346062660217285, "step": 7180 }, { "epoch": 0.47, "learning_rate": 3.185707997702334e-06, "logits/chosen": -1.0118279457092285, "logits/rejected": -0.6012075543403625, "logps/chosen": -428.33306884765625, "logps/rejected": -472.82470703125, "loss": 0.0185, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18756821751594543, "rewards/margins": 0.08234323561191559, "rewards/rejected": -0.2699114680290222, "step": 7190 }, { "epoch": 0.47, "learning_rate": 3.1802152846555624e-06, "logits/chosen": -0.8777405023574829, "logits/rejected": -0.6505894064903259, "logps/chosen": -397.32159423828125, "logps/rejected": -474.1864318847656, "loss": 0.0351, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17307859659194946, "rewards/margins": 0.09238413721323013, "rewards/rejected": -0.265462726354599, "step": 7200 }, { "epoch": 0.47, "eval_logits/chosen": -0.8202336430549622, "eval_logits/rejected": -0.7077284455299377, "eval_logps/chosen": -414.6007995605469, "eval_logps/rejected": -471.44921875, "eval_loss": 0.024090323597192764, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -0.1825958788394928, "eval_rewards/margins": 0.0772414281964302, "eval_rewards/rejected": -0.2598372995853424, "eval_runtime": 718.6874, "eval_samples_per_second": 2.783, "eval_steps_per_second": 1.391, "step": 7200 }, { "epoch": 0.47, "learning_rate": 3.174719023272659e-06, "logits/chosen": -1.1505197286605835, "logits/rejected": -1.024150013923645, "logps/chosen": -394.5386657714844, "logps/rejected": -533.9312133789062, "loss": 0.0154, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1808445155620575, "rewards/margins": 0.09304332733154297, "rewards/rejected": -0.27388784289360046, "step": 7210 }, { "epoch": 0.47, "learning_rate": 3.169219242224816e-06, "logits/chosen": -1.1072607040405273, "logits/rejected": -0.8794090151786804, "logps/chosen": -449.34869384765625, "logps/rejected": -512.7760009765625, "loss": 0.0119, "rewards/accuracies": 0.625, "rewards/chosen": -0.20942148566246033, "rewards/margins": 0.0695226639509201, "rewards/rejected": -0.27894413471221924, "step": 7220 }, { "epoch": 0.47, "learning_rate": 3.1637159702015837e-06, "logits/chosen": -0.8730020523071289, "logits/rejected": -0.8278923034667969, "logps/chosen": -408.51885986328125, "logps/rejected": -483.2220153808594, "loss": 0.0216, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20262667536735535, "rewards/margins": 0.09402598440647125, "rewards/rejected": -0.2966527044773102, "step": 7230 }, { "epoch": 0.47, "learning_rate": 3.1582092359107263e-06, "logits/chosen": -0.8979321718215942, "logits/rejected": -0.6492558121681213, "logps/chosen": -474.2720642089844, "logps/rejected": -511.1580505371094, "loss": 0.0229, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19599440693855286, "rewards/margins": 0.08271664381027222, "rewards/rejected": -0.2787110507488251, "step": 7240 }, { "epoch": 0.47, "learning_rate": 3.152699068078067e-06, "logits/chosen": -0.7777132987976074, "logits/rejected": -0.7277923822402954, "logps/chosen": -492.8728942871094, "logps/rejected": -589.6721801757812, "loss": 0.0187, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21258261799812317, "rewards/margins": 0.11734038591384888, "rewards/rejected": -0.32992297410964966, "step": 7250 }, { "epoch": 0.48, "learning_rate": 3.1471854954473415e-06, "logits/chosen": -1.0924317836761475, "logits/rejected": -0.9903742671012878, "logps/chosen": -376.240966796875, "logps/rejected": -467.5462341308594, "loss": 0.0161, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12547428905963898, "rewards/margins": 0.09507327526807785, "rewards/rejected": -0.22054755687713623, "step": 7260 }, { "epoch": 0.48, "learning_rate": 3.1416685467800436e-06, "logits/chosen": -0.7809782028198242, "logits/rejected": -0.33117085695266724, "logps/chosen": -382.5895080566406, "logps/rejected": -450.17486572265625, "loss": 0.0225, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1970066875219345, "rewards/margins": 0.0901150032877922, "rewards/rejected": -0.2871217131614685, "step": 7270 }, { "epoch": 0.48, "learning_rate": 3.1361482508552803e-06, "logits/chosen": -0.9118472933769226, "logits/rejected": -0.8214088678359985, "logps/chosen": -430.8885192871094, "logps/rejected": -456.4303283691406, "loss": 0.0337, "rewards/accuracies": 0.75, "rewards/chosen": -0.18893763422966003, "rewards/margins": 0.068928062915802, "rewards/rejected": -0.2578657269477844, "step": 7280 }, { "epoch": 0.48, "learning_rate": 3.1306246364696198e-06, "logits/chosen": -1.208669900894165, "logits/rejected": -0.9519757032394409, "logps/chosen": -444.3653259277344, "logps/rejected": -501.03851318359375, "loss": 0.0154, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19189099967479706, "rewards/margins": 0.0741782933473587, "rewards/rejected": -0.26606932282447815, "step": 7290 }, { "epoch": 0.48, "learning_rate": 3.1250977324369413e-06, "logits/chosen": -0.7202613949775696, "logits/rejected": -0.6008542776107788, "logps/chosen": -367.3160400390625, "logps/rejected": -469.4248046875, "loss": 0.0198, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2083636224269867, "rewards/margins": 0.0953967273235321, "rewards/rejected": -0.3037603497505188, "step": 7300 }, { "epoch": 0.48, "eval_logits/chosen": -0.7480952143669128, "eval_logits/rejected": -0.638218343257904, "eval_logps/chosen": -473.7773742675781, "eval_logps/rejected": -533.2532958984375, "eval_loss": 0.02371617592871189, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.24177244305610657, "eval_rewards/margins": 0.07986901700496674, "eval_rewards/rejected": -0.3216414153575897, "eval_runtime": 714.6892, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 7300 }, { "epoch": 0.48, "learning_rate": 3.1195675675882825e-06, "logits/chosen": -0.8372277021408081, "logits/rejected": -0.55473792552948, "logps/chosen": -516.3038330078125, "logps/rejected": -555.3046875, "loss": 0.0282, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28111279010772705, "rewards/margins": 0.06960564851760864, "rewards/rejected": -0.3507184684276581, "step": 7310 }, { "epoch": 0.48, "learning_rate": 3.1140341707716926e-06, "logits/chosen": -0.2712094187736511, "logits/rejected": -0.3225840628147125, "logps/chosen": -447.2505798339844, "logps/rejected": -501.6929626464844, "loss": 0.041, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25144287943840027, "rewards/margins": 0.0974586009979248, "rewards/rejected": -0.3489014506340027, "step": 7320 }, { "epoch": 0.48, "learning_rate": 3.1084975708520803e-06, "logits/chosen": -1.020155668258667, "logits/rejected": -0.7410798668861389, "logps/chosen": -481.20849609375, "logps/rejected": -499.14459228515625, "loss": 0.0181, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21948465704917908, "rewards/margins": 0.08868256956338882, "rewards/rejected": -0.3081672191619873, "step": 7330 }, { "epoch": 0.48, "learning_rate": 3.1029577967110625e-06, "logits/chosen": -1.011373519897461, "logits/rejected": -0.7138611078262329, "logps/chosen": -421.7295837402344, "logps/rejected": -418.33392333984375, "loss": 0.0394, "rewards/accuracies": 0.625, "rewards/chosen": -0.20533525943756104, "rewards/margins": 0.045662663877010345, "rewards/rejected": -0.250997930765152, "step": 7340 }, { "epoch": 0.48, "learning_rate": 3.097414877246814e-06, "logits/chosen": -1.0094305276870728, "logits/rejected": -0.7122253775596619, "logps/chosen": -392.9300842285156, "logps/rejected": -470.08160400390625, "loss": 0.032, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19428661465644836, "rewards/margins": 0.10666443407535553, "rewards/rejected": -0.3009510338306427, "step": 7350 }, { "epoch": 0.48, "learning_rate": 3.0918688413739197e-06, "logits/chosen": -0.9062078595161438, "logits/rejected": -0.6033264398574829, "logps/chosen": -396.75640869140625, "logps/rejected": -435.7647399902344, "loss": 0.0191, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.16407346725463867, "rewards/margins": 0.10328109562397003, "rewards/rejected": -0.2673545479774475, "step": 7360 }, { "epoch": 0.48, "learning_rate": 3.0863197180232178e-06, "logits/chosen": -0.9381674528121948, "logits/rejected": -0.8313081860542297, "logps/chosen": -403.8127746582031, "logps/rejected": -465.6454162597656, "loss": 0.0192, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20406858623027802, "rewards/margins": 0.07405339181423187, "rewards/rejected": -0.2781219780445099, "step": 7370 }, { "epoch": 0.48, "learning_rate": 3.0807675361416554e-06, "logits/chosen": -0.7997512221336365, "logits/rejected": -0.7093746066093445, "logps/chosen": -360.4013366699219, "logps/rejected": -357.00579833984375, "loss": 0.0336, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1715211123228073, "rewards/margins": 0.07563885301351547, "rewards/rejected": -0.2471599578857422, "step": 7380 }, { "epoch": 0.48, "learning_rate": 3.0752123246921327e-06, "logits/chosen": -1.0084792375564575, "logits/rejected": -0.6458248496055603, "logps/chosen": -476.39385986328125, "logps/rejected": -493.07452392578125, "loss": 0.0182, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19848057627677917, "rewards/margins": 0.08361216634511948, "rewards/rejected": -0.28209275007247925, "step": 7390 }, { "epoch": 0.48, "learning_rate": 3.069654112653353e-06, "logits/chosen": -1.10367751121521, "logits/rejected": -0.8654531240463257, "logps/chosen": -467.70880126953125, "logps/rejected": -477.7017517089844, "loss": 0.0267, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2512245774269104, "rewards/margins": 0.04254506900906563, "rewards/rejected": -0.2937696874141693, "step": 7400 }, { "epoch": 0.48, "eval_logits/chosen": -0.9285605549812317, "eval_logits/rejected": -0.8072183132171631, "eval_logps/chosen": -458.3289794921875, "eval_logps/rejected": -523.6795654296875, "eval_loss": 0.02381235919892788, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -0.22632405161857605, "eval_rewards/margins": 0.08574356883764267, "eval_rewards/rejected": -0.31206759810447693, "eval_runtime": 715.7723, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 7400 }, { "epoch": 0.48, "learning_rate": 3.064092929019673e-06, "logits/chosen": -0.8597939610481262, "logits/rejected": -1.109464406967163, "logps/chosen": -468.0879821777344, "logps/rejected": -545.7868041992188, "loss": 0.0263, "rewards/accuracies": 0.625, "rewards/chosen": -0.21054451167583466, "rewards/margins": 0.05782405659556389, "rewards/rejected": -0.26836857199668884, "step": 7410 }, { "epoch": 0.49, "learning_rate": 3.058528802800952e-06, "logits/chosen": -1.1463249921798706, "logits/rejected": -0.9255644083023071, "logps/chosen": -485.87664794921875, "logps/rejected": -530.3701782226562, "loss": 0.0187, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1932429075241089, "rewards/margins": 0.08106640726327896, "rewards/rejected": -0.27430933713912964, "step": 7420 }, { "epoch": 0.49, "learning_rate": 3.052961763022397e-06, "logits/chosen": -1.314130425453186, "logits/rejected": -0.691076934337616, "logps/chosen": -381.27203369140625, "logps/rejected": -456.42169189453125, "loss": 0.0365, "rewards/accuracies": 0.625, "rewards/chosen": -0.19839081168174744, "rewards/margins": 0.11387337744235992, "rewards/rejected": -0.31226420402526855, "step": 7430 }, { "epoch": 0.49, "learning_rate": 3.047391838724415e-06, "logits/chosen": -1.202976942062378, "logits/rejected": -1.0476958751678467, "logps/chosen": -445.1197814941406, "logps/rejected": -528.016845703125, "loss": 0.0268, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20960108935832977, "rewards/margins": 0.09896369278430939, "rewards/rejected": -0.30856481194496155, "step": 7440 }, { "epoch": 0.49, "learning_rate": 3.0418190589624587e-06, "logits/chosen": -0.8290348052978516, "logits/rejected": -0.7111979722976685, "logps/chosen": -381.62457275390625, "logps/rejected": -448.641845703125, "loss": 0.0254, "rewards/accuracies": 0.625, "rewards/chosen": -0.2040024697780609, "rewards/margins": 0.05866492539644241, "rewards/rejected": -0.26266735792160034, "step": 7450 }, { "epoch": 0.49, "learning_rate": 3.0362434528068784e-06, "logits/chosen": -0.7356087565422058, "logits/rejected": -0.593967854976654, "logps/chosen": -467.4925231933594, "logps/rejected": -474.9930114746094, "loss": 0.0095, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19816938042640686, "rewards/margins": 0.0929945856332779, "rewards/rejected": -0.29116398096084595, "step": 7460 }, { "epoch": 0.49, "learning_rate": 3.0306650493427657e-06, "logits/chosen": -0.9103811979293823, "logits/rejected": -0.7590612769126892, "logps/chosen": -415.922119140625, "logps/rejected": -491.2625427246094, "loss": 0.0318, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18430288136005402, "rewards/margins": 0.08430235832929611, "rewards/rejected": -0.26860523223876953, "step": 7470 }, { "epoch": 0.49, "learning_rate": 3.0250838776698077e-06, "logits/chosen": -1.02713143825531, "logits/rejected": -0.7544373273849487, "logps/chosen": -409.3565368652344, "logps/rejected": -493.7476501464844, "loss": 0.0238, "rewards/accuracies": 0.625, "rewards/chosen": -0.2254372537136078, "rewards/margins": 0.08713744580745697, "rewards/rejected": -0.31257471442222595, "step": 7480 }, { "epoch": 0.49, "learning_rate": 3.0194999669021275e-06, "logits/chosen": -0.6396011710166931, "logits/rejected": -0.37460362911224365, "logps/chosen": -441.87396240234375, "logps/rejected": -513.9738159179688, "loss": 0.0276, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21500559151172638, "rewards/margins": 0.11915179342031479, "rewards/rejected": -0.33415737748146057, "step": 7490 }, { "epoch": 0.49, "learning_rate": 3.0139133461681403e-06, "logits/chosen": -1.0721373558044434, "logits/rejected": -0.8857678174972534, "logps/chosen": -457.5224609375, "logps/rejected": -519.1844482421875, "loss": 0.0183, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19312818348407745, "rewards/margins": 0.11780223995447159, "rewards/rejected": -0.31093043088912964, "step": 7500 }, { "epoch": 0.49, "eval_logits/chosen": -0.9167981743812561, "eval_logits/rejected": -0.7952778935432434, "eval_logps/chosen": -458.1802062988281, "eval_logps/rejected": -526.6685791015625, "eval_loss": 0.023969681933522224, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -0.22617526352405548, "eval_rewards/margins": 0.08888135105371475, "eval_rewards/rejected": -0.31505662202835083, "eval_runtime": 714.1334, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 7500 }, { "epoch": 0.49, "learning_rate": 3.0083240446103965e-06, "logits/chosen": -0.7077032923698425, "logits/rejected": -0.4208412170410156, "logps/chosen": -404.287353515625, "logps/rejected": -520.3126831054688, "loss": 0.0125, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22025275230407715, "rewards/margins": 0.10835625976324081, "rewards/rejected": -0.32860901951789856, "step": 7510 }, { "epoch": 0.49, "learning_rate": 3.0027320913854306e-06, "logits/chosen": -1.278203010559082, "logits/rejected": -1.0798954963684082, "logps/chosen": -494.61907958984375, "logps/rejected": -533.409912109375, "loss": 0.0204, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2021574229001999, "rewards/margins": 0.1015002503991127, "rewards/rejected": -0.3036576807498932, "step": 7520 }, { "epoch": 0.49, "learning_rate": 2.997137515663609e-06, "logits/chosen": -0.9956961870193481, "logits/rejected": -0.8046631813049316, "logps/chosen": -380.2830810546875, "logps/rejected": -436.5585021972656, "loss": 0.0558, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15490567684173584, "rewards/margins": 0.09397459030151367, "rewards/rejected": -0.2488802969455719, "step": 7530 }, { "epoch": 0.49, "learning_rate": 2.991540346628981e-06, "logits/chosen": -0.8524099588394165, "logits/rejected": -0.8989107012748718, "logps/chosen": -451.1139221191406, "logps/rejected": -478.8126525878906, "loss": 0.0161, "rewards/accuracies": 0.625, "rewards/chosen": -0.21151356399059296, "rewards/margins": 0.053806982934474945, "rewards/rejected": -0.2653205394744873, "step": 7540 }, { "epoch": 0.49, "learning_rate": 2.985940613479121e-06, "logits/chosen": -1.1140300035476685, "logits/rejected": -0.9845923185348511, "logps/chosen": -514.0938720703125, "logps/rejected": -517.62158203125, "loss": 0.0329, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22024419903755188, "rewards/margins": 0.06289959698915482, "rewards/rejected": -0.2831438183784485, "step": 7550 }, { "epoch": 0.49, "learning_rate": 2.980338345424981e-06, "logits/chosen": -0.8097160458564758, "logits/rejected": -0.7492599487304688, "logps/chosen": -462.32073974609375, "logps/rejected": -485.8959045410156, "loss": 0.0167, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21310095489025116, "rewards/margins": 0.07185201346874237, "rewards/rejected": -0.28495293855667114, "step": 7560 }, { "epoch": 0.5, "learning_rate": 2.974733571690735e-06, "logits/chosen": -1.0027107000350952, "logits/rejected": -0.5719642639160156, "logps/chosen": -540.3960571289062, "logps/rejected": -573.8260498046875, "loss": 0.0457, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3029892146587372, "rewards/margins": 0.09272249788045883, "rewards/rejected": -0.395711749792099, "step": 7570 }, { "epoch": 0.5, "learning_rate": 2.9691263215136274e-06, "logits/chosen": -1.1959030628204346, "logits/rejected": -1.0594180822372437, "logps/chosen": -462.3970642089844, "logps/rejected": -507.36126708984375, "loss": 0.0123, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19539669156074524, "rewards/margins": 0.0766187459230423, "rewards/rejected": -0.27201542258262634, "step": 7580 }, { "epoch": 0.5, "learning_rate": 2.963516624143823e-06, "logits/chosen": -0.7993000745773315, "logits/rejected": -1.0808978080749512, "logps/chosen": -482.3975524902344, "logps/rejected": -522.1849975585938, "loss": 0.0256, "rewards/accuracies": 0.625, "rewards/chosen": -0.2698308527469635, "rewards/margins": 0.0773182362318039, "rewards/rejected": -0.3471491038799286, "step": 7590 }, { "epoch": 0.5, "learning_rate": 2.9579045088442504e-06, "logits/chosen": -0.9126564860343933, "logits/rejected": -0.7356353998184204, "logps/chosen": -423.11651611328125, "logps/rejected": -537.8327026367188, "loss": 0.0384, "rewards/accuracies": 0.625, "rewards/chosen": -0.23461151123046875, "rewards/margins": 0.09496726095676422, "rewards/rejected": -0.32957878708839417, "step": 7600 }, { "epoch": 0.5, "eval_logits/chosen": -0.9928443431854248, "eval_logits/rejected": -0.8678056597709656, "eval_logps/chosen": -453.06781005859375, "eval_logps/rejected": -522.6358642578125, "eval_loss": 0.024425040930509567, "eval_rewards/accuracies": 0.6620000004768372, "eval_rewards/chosen": -0.22106286883354187, "eval_rewards/margins": 0.08996104449033737, "eval_rewards/rejected": -0.31102389097213745, "eval_runtime": 717.3053, "eval_samples_per_second": 2.788, "eval_steps_per_second": 1.394, "step": 7600 }, { "epoch": 0.5, "learning_rate": 2.9522900048904534e-06, "logits/chosen": -1.1984961032867432, "logits/rejected": -0.9421941637992859, "logps/chosen": -478.04119873046875, "logps/rejected": -503.965576171875, "loss": 0.0214, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23830311000347137, "rewards/margins": 0.055945832282304764, "rewards/rejected": -0.29424890875816345, "step": 7610 }, { "epoch": 0.5, "learning_rate": 2.9466731415704343e-06, "logits/chosen": -1.0784003734588623, "logits/rejected": -0.9554091691970825, "logps/chosen": -409.42620849609375, "logps/rejected": -496.312255859375, "loss": 0.022, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18563954532146454, "rewards/margins": 0.09125347435474396, "rewards/rejected": -0.2768930196762085, "step": 7620 }, { "epoch": 0.5, "learning_rate": 2.941053948184503e-06, "logits/chosen": -1.18716561794281, "logits/rejected": -0.8165410161018372, "logps/chosen": -431.05743408203125, "logps/rejected": -452.7134704589844, "loss": 0.0295, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15111836791038513, "rewards/margins": 0.05656830593943596, "rewards/rejected": -0.20768669247627258, "step": 7630 }, { "epoch": 0.5, "learning_rate": 2.935432454045125e-06, "logits/chosen": -0.8616430163383484, "logits/rejected": -0.8946436643600464, "logps/chosen": -404.14569091796875, "logps/rejected": -421.9381408691406, "loss": 0.0217, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1721813976764679, "rewards/margins": 0.038999903947114944, "rewards/rejected": -0.21118128299713135, "step": 7640 }, { "epoch": 0.5, "learning_rate": 2.929808688476768e-06, "logits/chosen": -1.224557638168335, "logits/rejected": -1.2068487405776978, "logps/chosen": -403.6648864746094, "logps/rejected": -463.6199645996094, "loss": 0.0363, "rewards/accuracies": 0.625, "rewards/chosen": -0.1643024981021881, "rewards/margins": 0.07755754142999649, "rewards/rejected": -0.241860032081604, "step": 7650 }, { "epoch": 0.5, "learning_rate": 2.924182680815748e-06, "logits/chosen": -1.1361750364303589, "logits/rejected": -1.0951645374298096, "logps/chosen": -384.43121337890625, "logps/rejected": -480.97198486328125, "loss": 0.0148, "rewards/accuracies": 0.75, "rewards/chosen": -0.149800643324852, "rewards/margins": 0.11924842745065689, "rewards/rejected": -0.26904910802841187, "step": 7660 }, { "epoch": 0.5, "learning_rate": 2.9185544604100765e-06, "logits/chosen": -0.8657207489013672, "logits/rejected": -0.780017077922821, "logps/chosen": -329.89007568359375, "logps/rejected": -394.96337890625, "loss": 0.0259, "rewards/accuracies": 0.5, "rewards/chosen": -0.13397996127605438, "rewards/margins": 0.0693286880850792, "rewards/rejected": -0.20330862700939178, "step": 7670 }, { "epoch": 0.5, "learning_rate": 2.9129240566193083e-06, "logits/chosen": -1.4844722747802734, "logits/rejected": -1.1259534358978271, "logps/chosen": -309.76544189453125, "logps/rejected": -389.955810546875, "loss": 0.0218, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1076851636171341, "rewards/margins": 0.09347833693027496, "rewards/rejected": -0.20116348564624786, "step": 7680 }, { "epoch": 0.5, "learning_rate": 2.9072914988143874e-06, "logits/chosen": -1.2336488962173462, "logits/rejected": -1.0480939149856567, "logps/chosen": -338.2337646484375, "logps/rejected": -449.3302307128906, "loss": 0.0426, "rewards/accuracies": 0.75, "rewards/chosen": -0.13680395483970642, "rewards/margins": 0.11956790834665298, "rewards/rejected": -0.2563718557357788, "step": 7690 }, { "epoch": 0.5, "learning_rate": 2.9016568163774956e-06, "logits/chosen": -1.3061578273773193, "logits/rejected": -1.1339387893676758, "logps/chosen": -295.0944519042969, "logps/rejected": -343.52667236328125, "loss": 0.0107, "rewards/accuracies": 0.625, "rewards/chosen": -0.12286518514156342, "rewards/margins": 0.08248507231473923, "rewards/rejected": -0.20535027980804443, "step": 7700 }, { "epoch": 0.5, "eval_logits/chosen": -1.3134905099868774, "eval_logits/rejected": -1.1731081008911133, "eval_logps/chosen": -368.13104248046875, "eval_logps/rejected": -429.5078430175781, "eval_loss": 0.024295959621667862, "eval_rewards/accuracies": 0.6614999771118164, "eval_rewards/chosen": -0.1361261010169983, "eval_rewards/margins": 0.08176986873149872, "eval_rewards/rejected": -0.21789595484733582, "eval_runtime": 713.4914, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 7700 }, { "epoch": 0.5, "learning_rate": 2.8960200387018942e-06, "logits/chosen": -1.345518708229065, "logits/rejected": -1.1864873170852661, "logps/chosen": -450.6943359375, "logps/rejected": -464.3408203125, "loss": 0.0255, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14296133816242218, "rewards/margins": 0.06248089671134949, "rewards/rejected": -0.20544223487377167, "step": 7710 }, { "epoch": 0.51, "learning_rate": 2.8903811951917792e-06, "logits/chosen": -1.356708288192749, "logits/rejected": -1.307135820388794, "logps/chosen": -332.11572265625, "logps/rejected": -357.31658935546875, "loss": 0.0272, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1338360458612442, "rewards/margins": 0.0723431333899498, "rewards/rejected": -0.2061791867017746, "step": 7720 }, { "epoch": 0.51, "learning_rate": 2.88474031526212e-06, "logits/chosen": -1.3988991975784302, "logits/rejected": -1.2867591381072998, "logps/chosen": -366.85198974609375, "logps/rejected": -439.1573791503906, "loss": 0.0187, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16401226818561554, "rewards/margins": 0.05887676402926445, "rewards/rejected": -0.2228890359401703, "step": 7730 }, { "epoch": 0.51, "learning_rate": 2.879097428338509e-06, "logits/chosen": -1.2603391408920288, "logits/rejected": -0.9710729718208313, "logps/chosen": -377.1734313964844, "logps/rejected": -422.1307678222656, "loss": 0.0352, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16127847135066986, "rewards/margins": 0.06728565692901611, "rewards/rejected": -0.22856411337852478, "step": 7740 }, { "epoch": 0.51, "learning_rate": 2.8734525638570094e-06, "logits/chosen": -1.3242861032485962, "logits/rejected": -1.2716325521469116, "logps/chosen": -393.7176513671875, "logps/rejected": -438.01055908203125, "loss": 0.0212, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15952344238758087, "rewards/margins": 0.05666619539260864, "rewards/rejected": -0.21618962287902832, "step": 7750 }, { "epoch": 0.51, "learning_rate": 2.8678057512639982e-06, "logits/chosen": -1.3280117511749268, "logits/rejected": -1.2386116981506348, "logps/chosen": -406.34844970703125, "logps/rejected": -506.6224670410156, "loss": 0.018, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12028200924396515, "rewards/margins": 0.12233342975378036, "rewards/rejected": -0.2426154613494873, "step": 7760 }, { "epoch": 0.51, "learning_rate": 2.8621570200160172e-06, "logits/chosen": -0.8720345497131348, "logits/rejected": -0.8212788701057434, "logps/chosen": -291.7910461425781, "logps/rejected": -380.91436767578125, "loss": 0.0204, "rewards/accuracies": 0.625, "rewards/chosen": -0.12322195619344711, "rewards/margins": 0.09730129688978195, "rewards/rejected": -0.22052326798439026, "step": 7770 }, { "epoch": 0.51, "learning_rate": 2.856506399579615e-06, "logits/chosen": -1.2530772686004639, "logits/rejected": -1.2239480018615723, "logps/chosen": -412.46612548828125, "logps/rejected": -463.9945373535156, "loss": 0.0308, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19410264492034912, "rewards/margins": 0.07101768255233765, "rewards/rejected": -0.26512032747268677, "step": 7780 }, { "epoch": 0.51, "learning_rate": 2.8508539194311964e-06, "logits/chosen": -1.0956141948699951, "logits/rejected": -1.2320266962051392, "logps/chosen": -418.2640075683594, "logps/rejected": -510.81097412109375, "loss": 0.0096, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16453054547309875, "rewards/margins": 0.07971692830324173, "rewards/rejected": -0.24424748122692108, "step": 7790 }, { "epoch": 0.51, "learning_rate": 2.8451996090568656e-06, "logits/chosen": -0.9811515808105469, "logits/rejected": -0.7523955702781677, "logps/chosen": -423.70184326171875, "logps/rejected": -499.3480529785156, "loss": 0.026, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23756226897239685, "rewards/margins": 0.09360777586698532, "rewards/rejected": -0.33117008209228516, "step": 7800 }, { "epoch": 0.51, "eval_logits/chosen": -0.9938773512840271, "eval_logits/rejected": -0.8686197400093079, "eval_logps/chosen": -458.37713623046875, "eval_logps/rejected": -525.5044555664062, "eval_loss": 0.02480470947921276, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -0.22637218236923218, "eval_rewards/margins": 0.08752032369375229, "eval_rewards/rejected": -0.31389251351356506, "eval_runtime": 715.0075, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.399, "step": 7800 }, { "epoch": 0.51, "learning_rate": 2.839543497952276e-06, "logits/chosen": -0.9234379529953003, "logits/rejected": -0.9001466035842896, "logps/chosen": -403.1349792480469, "logps/rejected": -494.17041015625, "loss": 0.0401, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21852488815784454, "rewards/margins": 0.09973929822444916, "rewards/rejected": -0.3182641863822937, "step": 7810 }, { "epoch": 0.51, "learning_rate": 2.833885615622474e-06, "logits/chosen": -0.9364679455757141, "logits/rejected": -0.8149012327194214, "logps/chosen": -432.9266662597656, "logps/rejected": -509.9027404785156, "loss": 0.0339, "rewards/accuracies": 0.625, "rewards/chosen": -0.2303508222103119, "rewards/margins": 0.06750074028968811, "rewards/rejected": -0.2978515625, "step": 7820 }, { "epoch": 0.51, "learning_rate": 2.8282259915817454e-06, "logits/chosen": -0.754353404045105, "logits/rejected": -0.7188607454299927, "logps/chosen": -339.5250244140625, "logps/rejected": -482.813720703125, "loss": 0.0158, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19858035445213318, "rewards/margins": 0.10307709872722626, "rewards/rejected": -0.30165746808052063, "step": 7830 }, { "epoch": 0.51, "learning_rate": 2.8225646553534614e-06, "logits/chosen": -0.7747805714607239, "logits/rejected": -0.7131972312927246, "logps/chosen": -352.065673828125, "logps/rejected": -409.45306396484375, "loss": 0.0295, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.152749702334404, "rewards/margins": 0.060678668320178986, "rewards/rejected": -0.2134283483028412, "step": 7840 }, { "epoch": 0.51, "learning_rate": 2.8169016364699255e-06, "logits/chosen": -1.0688912868499756, "logits/rejected": -0.9289358258247375, "logps/chosen": -425.92041015625, "logps/rejected": -472.3887634277344, "loss": 0.0242, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21403256058692932, "rewards/margins": 0.04471813514828682, "rewards/rejected": -0.25875067710876465, "step": 7850 }, { "epoch": 0.51, "learning_rate": 2.811236964472217e-06, "logits/chosen": -1.1873152256011963, "logits/rejected": -1.0396217107772827, "logps/chosen": -517.8572998046875, "logps/rejected": -532.118408203125, "loss": 0.03, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20745067298412323, "rewards/margins": 0.07438112050294876, "rewards/rejected": -0.2818318009376526, "step": 7860 }, { "epoch": 0.51, "learning_rate": 2.805570668910041e-06, "logits/chosen": -0.8459591865539551, "logits/rejected": -0.9099504351615906, "logps/chosen": -421.40234375, "logps/rejected": -575.4613647460938, "loss": 0.0164, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25176310539245605, "rewards/margins": 0.09055864065885544, "rewards/rejected": -0.3423217236995697, "step": 7870 }, { "epoch": 0.52, "learning_rate": 2.7999027793415695e-06, "logits/chosen": -1.3415210247039795, "logits/rejected": -0.8667888641357422, "logps/chosen": -447.70513916015625, "logps/rejected": -469.01556396484375, "loss": 0.0119, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20278914272785187, "rewards/margins": 0.06623613834381104, "rewards/rejected": -0.2690252661705017, "step": 7880 }, { "epoch": 0.52, "learning_rate": 2.794233325333293e-06, "logits/chosen": -1.0970004796981812, "logits/rejected": -0.9378741383552551, "logps/chosen": -449.1304626464844, "logps/rejected": -531.5797729492188, "loss": 0.0157, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19084899127483368, "rewards/margins": 0.10612665116786957, "rewards/rejected": -0.29697564244270325, "step": 7890 }, { "epoch": 0.52, "learning_rate": 2.7885623364598597e-06, "logits/chosen": -1.3654180765151978, "logits/rejected": -1.0494868755340576, "logps/chosen": -495.35638427734375, "logps/rejected": -560.6453247070312, "loss": 0.0268, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.231663778424263, "rewards/margins": 0.10720783472061157, "rewards/rejected": -0.33887162804603577, "step": 7900 }, { "epoch": 0.52, "eval_logits/chosen": -1.1572563648223877, "eval_logits/rejected": -1.022184133529663, "eval_logps/chosen": -443.9241638183594, "eval_logps/rejected": -513.252685546875, "eval_loss": 0.023533135652542114, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -0.21191924810409546, "eval_rewards/margins": 0.08972154557704926, "eval_rewards/rejected": -0.30164074897766113, "eval_runtime": 714.2499, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 7900 }, { "epoch": 0.52, "learning_rate": 2.782889842303926e-06, "logits/chosen": -1.165021300315857, "logits/rejected": -1.0839018821716309, "logps/chosen": -399.1462707519531, "logps/rejected": -447.34942626953125, "loss": 0.0386, "rewards/accuracies": 0.625, "rewards/chosen": -0.23891720175743103, "rewards/margins": 0.05370098352432251, "rewards/rejected": -0.29261818528175354, "step": 7910 }, { "epoch": 0.52, "learning_rate": 2.7772158724559987e-06, "logits/chosen": -1.1648354530334473, "logits/rejected": -0.9081848859786987, "logps/chosen": -375.67303466796875, "logps/rejected": -574.4231567382812, "loss": 0.0105, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1585639864206314, "rewards/margins": 0.16127946972846985, "rewards/rejected": -0.31984347105026245, "step": 7920 }, { "epoch": 0.52, "learning_rate": 2.7715404565142856e-06, "logits/chosen": -0.9997104406356812, "logits/rejected": -1.0880539417266846, "logps/chosen": -391.35516357421875, "logps/rejected": -446.83636474609375, "loss": 0.0172, "rewards/accuracies": 0.625, "rewards/chosen": -0.1913909912109375, "rewards/margins": 0.06170380115509033, "rewards/rejected": -0.2530948221683502, "step": 7930 }, { "epoch": 0.52, "learning_rate": 2.7658636240845354e-06, "logits/chosen": -1.4432083368301392, "logits/rejected": -1.3656854629516602, "logps/chosen": -440.9434509277344, "logps/rejected": -555.53173828125, "loss": 0.0067, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2174951732158661, "rewards/margins": 0.09997011721134186, "rewards/rejected": -0.31746530532836914, "step": 7940 }, { "epoch": 0.52, "learning_rate": 2.7601854047798872e-06, "logits/chosen": -0.9748222231864929, "logits/rejected": -1.0181443691253662, "logps/chosen": -432.9224548339844, "logps/rejected": -529.5062255859375, "loss": 0.0293, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20899906754493713, "rewards/margins": 0.0771685466170311, "rewards/rejected": -0.28616756200790405, "step": 7950 }, { "epoch": 0.52, "learning_rate": 2.7545058282207148e-06, "logits/chosen": -0.9508014917373657, "logits/rejected": -0.8231958150863647, "logps/chosen": -446.45123291015625, "logps/rejected": -497.5562438964844, "loss": 0.0271, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23490944504737854, "rewards/margins": 0.08168394863605499, "rewards/rejected": -0.31659334897994995, "step": 7960 }, { "epoch": 0.52, "learning_rate": 2.748824924034471e-06, "logits/chosen": -1.2198760509490967, "logits/rejected": -1.0862025022506714, "logps/chosen": -481.0332946777344, "logps/rejected": -535.6265869140625, "loss": 0.0184, "rewards/accuracies": 0.625, "rewards/chosen": -0.2616230845451355, "rewards/margins": 0.07348993420600891, "rewards/rejected": -0.3351130485534668, "step": 7970 }, { "epoch": 0.52, "learning_rate": 2.743142721855536e-06, "logits/chosen": -0.8524892926216125, "logits/rejected": -0.9843443036079407, "logps/chosen": -360.3910217285156, "logps/rejected": -414.02667236328125, "loss": 0.0335, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20853090286254883, "rewards/margins": 0.06198418140411377, "rewards/rejected": -0.2705150544643402, "step": 7980 }, { "epoch": 0.52, "learning_rate": 2.737459251325058e-06, "logits/chosen": -1.213433861732483, "logits/rejected": -1.091732382774353, "logps/chosen": -455.72705078125, "logps/rejected": -486.27423095703125, "loss": 0.0087, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1857534945011139, "rewards/margins": 0.05307856202125549, "rewards/rejected": -0.2388320416212082, "step": 7990 }, { "epoch": 0.52, "learning_rate": 2.731774542090804e-06, "logits/chosen": -0.9943619966506958, "logits/rejected": -0.9232209324836731, "logps/chosen": -364.77679443359375, "logps/rejected": -391.28643798828125, "loss": 0.0368, "rewards/accuracies": 0.5, "rewards/chosen": -0.17200076580047607, "rewards/margins": 0.04554467648267746, "rewards/rejected": -0.21754543483257294, "step": 8000 }, { "epoch": 0.52, "eval_logits/chosen": -1.2254464626312256, "eval_logits/rejected": -1.0878034830093384, "eval_logps/chosen": -403.5860900878906, "eval_logps/rejected": -466.9293212890625, "eval_loss": 0.023399606347084045, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.17158114910125732, "eval_rewards/margins": 0.08373628556728363, "eval_rewards/rejected": -0.25531744956970215, "eval_runtime": 717.6201, "eval_samples_per_second": 2.787, "eval_steps_per_second": 1.393, "step": 8000 }, { "epoch": 0.52, "learning_rate": 2.7260886238070034e-06, "logits/chosen": -1.3021564483642578, "logits/rejected": -1.1898549795150757, "logps/chosen": -362.55126953125, "logps/rejected": -427.02984619140625, "loss": 0.0392, "rewards/accuracies": 0.625, "rewards/chosen": -0.16608133912086487, "rewards/margins": 0.06918665021657944, "rewards/rejected": -0.2352680265903473, "step": 8010 }, { "epoch": 0.52, "learning_rate": 2.72040152613419e-06, "logits/chosen": -1.1601581573486328, "logits/rejected": -0.989398181438446, "logps/chosen": -383.90374755859375, "logps/rejected": -428.2503967285156, "loss": 0.0371, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16702868044376373, "rewards/margins": 0.12757211923599243, "rewards/rejected": -0.29460081458091736, "step": 8020 }, { "epoch": 0.53, "learning_rate": 2.7147132787390516e-06, "logits/chosen": -1.250454306602478, "logits/rejected": -0.9804635047912598, "logps/chosen": -396.14666748046875, "logps/rejected": -456.0750427246094, "loss": 0.0212, "rewards/accuracies": 0.625, "rewards/chosen": -0.16866858303546906, "rewards/margins": 0.0769139751791954, "rewards/rejected": -0.24558253586292267, "step": 8030 }, { "epoch": 0.53, "learning_rate": 2.709023911294273e-06, "logits/chosen": -1.298724889755249, "logits/rejected": -1.0445770025253296, "logps/chosen": -381.4348449707031, "logps/rejected": -493.0828552246094, "loss": 0.0337, "rewards/accuracies": 0.75, "rewards/chosen": -0.13911600410938263, "rewards/margins": 0.142547607421875, "rewards/rejected": -0.28166359663009644, "step": 8040 }, { "epoch": 0.53, "learning_rate": 2.7033334534783806e-06, "logits/chosen": -1.1964550018310547, "logits/rejected": -1.3304070234298706, "logps/chosen": -362.37957763671875, "logps/rejected": -462.181396484375, "loss": 0.0258, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1632337123155594, "rewards/margins": 0.08360270410776138, "rewards/rejected": -0.24683642387390137, "step": 8050 }, { "epoch": 0.53, "learning_rate": 2.697641934975592e-06, "logits/chosen": -1.2163738012313843, "logits/rejected": -1.041234016418457, "logps/chosen": -417.10400390625, "logps/rejected": -474.76422119140625, "loss": 0.0338, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18931759893894196, "rewards/margins": 0.09021403640508652, "rewards/rejected": -0.2795316278934479, "step": 8060 }, { "epoch": 0.53, "learning_rate": 2.691949385475654e-06, "logits/chosen": -1.18105149269104, "logits/rejected": -1.0042648315429688, "logps/chosen": -460.1038513183594, "logps/rejected": -508.90185546875, "loss": 0.0398, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21621306240558624, "rewards/margins": 0.07436920702457428, "rewards/rejected": -0.2905822694301605, "step": 8070 }, { "epoch": 0.53, "learning_rate": 2.6862558346736937e-06, "logits/chosen": -1.1207855939865112, "logits/rejected": -0.9030588269233704, "logps/chosen": -453.5801696777344, "logps/rejected": -607.0015258789062, "loss": 0.0172, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21522796154022217, "rewards/margins": 0.15862303972244263, "rewards/rejected": -0.3738510310649872, "step": 8080 }, { "epoch": 0.53, "learning_rate": 2.6805613122700617e-06, "logits/chosen": -0.9256788492202759, "logits/rejected": -0.9394919276237488, "logps/chosen": -467.3502502441406, "logps/rejected": -561.8762817382812, "loss": 0.0223, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24377775192260742, "rewards/margins": 0.09358389675617218, "rewards/rejected": -0.3373616635799408, "step": 8090 }, { "epoch": 0.53, "learning_rate": 2.674865847970176e-06, "logits/chosen": -0.9925457835197449, "logits/rejected": -0.8263480067253113, "logps/chosen": -453.21954345703125, "logps/rejected": -551.3258666992188, "loss": 0.0293, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24560317397117615, "rewards/margins": 0.07565805315971375, "rewards/rejected": -0.3212612271308899, "step": 8100 }, { "epoch": 0.53, "eval_logits/chosen": -0.9809005260467529, "eval_logits/rejected": -0.8558536767959595, "eval_logps/chosen": -454.8971862792969, "eval_logps/rejected": -523.4253540039062, "eval_loss": 0.02304054982960224, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.22289228439331055, "eval_rewards/margins": 0.0889212042093277, "eval_rewards/rejected": -0.31181350350379944, "eval_runtime": 717.6699, "eval_samples_per_second": 2.787, "eval_steps_per_second": 1.393, "step": 8100 }, { "epoch": 0.53, "learning_rate": 2.669169471484368e-06, "logits/chosen": -0.7397729754447937, "logits/rejected": -0.7941542863845825, "logps/chosen": -389.26361083984375, "logps/rejected": -429.8604431152344, "loss": 0.0301, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.22420558333396912, "rewards/margins": 0.04323706030845642, "rewards/rejected": -0.26744264364242554, "step": 8110 }, { "epoch": 0.53, "learning_rate": 2.6634722125277278e-06, "logits/chosen": -1.0851494073867798, "logits/rejected": -0.8365179896354675, "logps/chosen": -473.2884826660156, "logps/rejected": -560.3230590820312, "loss": 0.0281, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2411685287952423, "rewards/margins": 0.0788397341966629, "rewards/rejected": -0.3200082778930664, "step": 8120 }, { "epoch": 0.53, "learning_rate": 2.6577741008199498e-06, "logits/chosen": -0.8316506147384644, "logits/rejected": -0.7271164655685425, "logps/chosen": -510.27532958984375, "logps/rejected": -608.437744140625, "loss": 0.0214, "rewards/accuracies": 0.75, "rewards/chosen": -0.2541172504425049, "rewards/margins": 0.15820366144180298, "rewards/rejected": -0.41232091188430786, "step": 8130 }, { "epoch": 0.53, "learning_rate": 2.652075166085175e-06, "logits/chosen": -0.7823055386543274, "logits/rejected": -0.8570632934570312, "logps/chosen": -514.2627563476562, "logps/rejected": -668.1049194335938, "loss": 0.0278, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2796834409236908, "rewards/margins": 0.13029704988002777, "rewards/rejected": -0.4099804759025574, "step": 8140 }, { "epoch": 0.53, "learning_rate": 2.6463754380518395e-06, "logits/chosen": -0.7126733064651489, "logits/rejected": -0.6021712422370911, "logps/chosen": -493.42340087890625, "logps/rejected": -533.3378295898438, "loss": 0.0251, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.26246577501296997, "rewards/margins": 0.09474147111177444, "rewards/rejected": -0.3572072684764862, "step": 8150 }, { "epoch": 0.53, "learning_rate": 2.6406749464525167e-06, "logits/chosen": -1.2083556652069092, "logits/rejected": -0.872018039226532, "logps/chosen": -390.27716064453125, "logps/rejected": -440.4684143066406, "loss": 0.0403, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15671806037425995, "rewards/margins": 0.09742309153079987, "rewards/rejected": -0.2541411221027374, "step": 8160 }, { "epoch": 0.53, "learning_rate": 2.634973721023762e-06, "logits/chosen": -1.2732129096984863, "logits/rejected": -1.1315809488296509, "logps/chosen": -454.3955078125, "logps/rejected": -465.996337890625, "loss": 0.0462, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.199933260679245, "rewards/margins": 0.061039429157972336, "rewards/rejected": -0.2609727084636688, "step": 8170 }, { "epoch": 0.54, "learning_rate": 2.6292717915059605e-06, "logits/chosen": -1.2912507057189941, "logits/rejected": -1.1304757595062256, "logps/chosen": -476.2607421875, "logps/rejected": -517.3587036132812, "loss": 0.0177, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20063069462776184, "rewards/margins": 0.09729097038507462, "rewards/rejected": -0.29792168736457825, "step": 8180 }, { "epoch": 0.54, "learning_rate": 2.6235691876431706e-06, "logits/chosen": -1.2985169887542725, "logits/rejected": -1.203507423400879, "logps/chosen": -383.10601806640625, "logps/rejected": -458.4640197753906, "loss": 0.0212, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16449043154716492, "rewards/margins": 0.06975887715816498, "rewards/rejected": -0.2342493087053299, "step": 8190 }, { "epoch": 0.54, "learning_rate": 2.6178659391829673e-06, "logits/chosen": -1.3266557455062866, "logits/rejected": -1.0287048816680908, "logps/chosen": -391.74658203125, "logps/rejected": -422.36468505859375, "loss": 0.0127, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15426863729953766, "rewards/margins": 0.07146136462688446, "rewards/rejected": -0.22572998702526093, "step": 8200 }, { "epoch": 0.54, "eval_logits/chosen": -1.1700315475463867, "eval_logits/rejected": -1.0360503196716309, "eval_logps/chosen": -412.9599304199219, "eval_logps/rejected": -473.23687744140625, "eval_loss": 0.02342168055474758, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -0.18095499277114868, "eval_rewards/margins": 0.08066999912261963, "eval_rewards/rejected": -0.2616249918937683, "eval_runtime": 715.9268, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 8200 }, { "epoch": 0.54, "learning_rate": 2.6121620758762877e-06, "logits/chosen": -1.2620677947998047, "logits/rejected": -0.962398886680603, "logps/chosen": -369.5198669433594, "logps/rejected": -439.24688720703125, "loss": 0.0365, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17743559181690216, "rewards/margins": 0.07052607089281082, "rewards/rejected": -0.24796167016029358, "step": 8210 }, { "epoch": 0.54, "learning_rate": 2.606457627477277e-06, "logits/chosen": -0.9980144500732422, "logits/rejected": -0.9191421270370483, "logps/chosen": -317.8196105957031, "logps/rejected": -402.2234802246094, "loss": 0.0421, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1421903371810913, "rewards/margins": 0.08105181157588959, "rewards/rejected": -0.2232421636581421, "step": 8220 }, { "epoch": 0.54, "learning_rate": 2.6007526237431324e-06, "logits/chosen": -1.3227336406707764, "logits/rejected": -1.2014495134353638, "logps/chosen": -328.77606201171875, "logps/rejected": -436.69964599609375, "loss": 0.0187, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14674654603004456, "rewards/margins": 0.09380555152893066, "rewards/rejected": -0.2405521422624588, "step": 8230 }, { "epoch": 0.54, "learning_rate": 2.5950470944339478e-06, "logits/chosen": -1.2177183628082275, "logits/rejected": -1.2471904754638672, "logps/chosen": -348.53924560546875, "logps/rejected": -380.2584228515625, "loss": 0.0304, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1267373114824295, "rewards/margins": 0.035214945673942566, "rewards/rejected": -0.16195227205753326, "step": 8240 }, { "epoch": 0.54, "learning_rate": 2.58934106931256e-06, "logits/chosen": -1.2010172605514526, "logits/rejected": -1.0474350452423096, "logps/chosen": -389.3740234375, "logps/rejected": -431.21832275390625, "loss": 0.025, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16860197484493256, "rewards/margins": 0.0610218271613121, "rewards/rejected": -0.22962382435798645, "step": 8250 }, { "epoch": 0.54, "learning_rate": 2.58363457814439e-06, "logits/chosen": -1.3049981594085693, "logits/rejected": -1.0223881006240845, "logps/chosen": -395.8489990234375, "logps/rejected": -465.0389099121094, "loss": 0.0272, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1851770579814911, "rewards/margins": 0.08291341364383698, "rewards/rejected": -0.26809048652648926, "step": 8260 }, { "epoch": 0.54, "learning_rate": 2.5779276506972924e-06, "logits/chosen": -1.1672968864440918, "logits/rejected": -1.1771270036697388, "logps/chosen": -403.6295471191406, "logps/rejected": -419.79571533203125, "loss": 0.023, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17167674005031586, "rewards/margins": 0.0555482916533947, "rewards/rejected": -0.22722503542900085, "step": 8270 }, { "epoch": 0.54, "learning_rate": 2.5722203167413945e-06, "logits/chosen": -1.325514554977417, "logits/rejected": -1.1584815979003906, "logps/chosen": -429.17230224609375, "logps/rejected": -442.06585693359375, "loss": 0.0137, "rewards/accuracies": 0.75, "rewards/chosen": -0.14587461948394775, "rewards/margins": 0.09481439739465714, "rewards/rejected": -0.2406889945268631, "step": 8280 }, { "epoch": 0.54, "learning_rate": 2.5665126060489476e-06, "logits/chosen": -1.3827365636825562, "logits/rejected": -1.3085436820983887, "logps/chosen": -351.1171875, "logps/rejected": -441.63055419921875, "loss": 0.0131, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16301514208316803, "rewards/margins": 0.0616668239235878, "rewards/rejected": -0.2246820032596588, "step": 8290 }, { "epoch": 0.54, "learning_rate": 2.560804548394165e-06, "logits/chosen": -1.2535436153411865, "logits/rejected": -0.8736147880554199, "logps/chosen": -421.904541015625, "logps/rejected": -465.21575927734375, "loss": 0.0169, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17462418973445892, "rewards/margins": 0.08858349174261093, "rewards/rejected": -0.26320767402648926, "step": 8300 }, { "epoch": 0.54, "eval_logits/chosen": -1.3180593252182007, "eval_logits/rejected": -1.1765270233154297, "eval_logps/chosen": -376.22979736328125, "eval_logps/rejected": -434.5301208496094, "eval_loss": 0.024123165756464005, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -0.1442248672246933, "eval_rewards/margins": 0.07869336009025574, "eval_rewards/rejected": -0.22291821241378784, "eval_runtime": 714.6025, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 8300 }, { "epoch": 0.54, "learning_rate": 2.5550961735530734e-06, "logits/chosen": -1.0757203102111816, "logits/rejected": -1.213310718536377, "logps/chosen": -273.1213073730469, "logps/rejected": -358.64508056640625, "loss": 0.0246, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11008550226688385, "rewards/margins": 0.054933033883571625, "rewards/rejected": -0.16501852869987488, "step": 8310 }, { "epoch": 0.54, "learning_rate": 2.549387511303351e-06, "logits/chosen": -1.2930816411972046, "logits/rejected": -1.4007723331451416, "logps/chosen": -305.3135070800781, "logps/rejected": -406.21624755859375, "loss": 0.0137, "rewards/accuracies": 0.625, "rewards/chosen": -0.13557668030261993, "rewards/margins": 0.05662214756011963, "rewards/rejected": -0.19219882786273956, "step": 8320 }, { "epoch": 0.55, "learning_rate": 2.5436785914241774e-06, "logits/chosen": -1.2323229312896729, "logits/rejected": -1.1406350135803223, "logps/chosen": -357.2239074707031, "logps/rejected": -461.88800048828125, "loss": 0.032, "rewards/accuracies": 0.75, "rewards/chosen": -0.15826469659805298, "rewards/margins": 0.1366337239742279, "rewards/rejected": -0.2948984205722809, "step": 8330 }, { "epoch": 0.55, "learning_rate": 2.5379694436960746e-06, "logits/chosen": -1.4527695178985596, "logits/rejected": -1.3670345544815063, "logps/chosen": -383.5470275878906, "logps/rejected": -461.9171447753906, "loss": 0.0243, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13753603398799896, "rewards/margins": 0.06780519336462021, "rewards/rejected": -0.20534124970436096, "step": 8340 }, { "epoch": 0.55, "learning_rate": 2.5322600979007533e-06, "logits/chosen": -1.5108410120010376, "logits/rejected": -1.2519161701202393, "logps/chosen": -370.40594482421875, "logps/rejected": -411.65814208984375, "loss": 0.0237, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15791048109531403, "rewards/margins": 0.06195145100355148, "rewards/rejected": -0.2198619395494461, "step": 8350 }, { "epoch": 0.55, "learning_rate": 2.5265505838209592e-06, "logits/chosen": -1.4661318063735962, "logits/rejected": -1.269770860671997, "logps/chosen": -430.763916015625, "logps/rejected": -439.575439453125, "loss": 0.0243, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1738003045320511, "rewards/margins": 0.05522479489445686, "rewards/rejected": -0.22902509570121765, "step": 8360 }, { "epoch": 0.55, "learning_rate": 2.520840931240314e-06, "logits/chosen": -1.5166094303131104, "logits/rejected": -1.1362040042877197, "logps/chosen": -370.4966735839844, "logps/rejected": -386.2825927734375, "loss": 0.0168, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.160688117146492, "rewards/margins": 0.0795067697763443, "rewards/rejected": -0.2401948720216751, "step": 8370 }, { "epoch": 0.55, "learning_rate": 2.515131169943162e-06, "logits/chosen": -1.0941801071166992, "logits/rejected": -1.1212772130966187, "logps/chosen": -413.2151794433594, "logps/rejected": -496.95184326171875, "loss": 0.0248, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15512065589427948, "rewards/margins": 0.09070895612239838, "rewards/rejected": -0.24582962691783905, "step": 8380 }, { "epoch": 0.55, "learning_rate": 2.509421329714416e-06, "logits/chosen": -1.1231410503387451, "logits/rejected": -1.2304757833480835, "logps/chosen": -329.92547607421875, "logps/rejected": -402.47247314453125, "loss": 0.0348, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1237863078713417, "rewards/margins": 0.05135495588183403, "rewards/rejected": -0.17514127492904663, "step": 8390 }, { "epoch": 0.55, "learning_rate": 2.5037114403393987e-06, "logits/chosen": -1.2592103481292725, "logits/rejected": -1.0274041891098022, "logps/chosen": -322.8609313964844, "logps/rejected": -349.1400451660156, "loss": 0.0177, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11188056319952011, "rewards/margins": 0.06005961447954178, "rewards/rejected": -0.1719401776790619, "step": 8400 }, { "epoch": 0.55, "eval_logits/chosen": -1.3186016082763672, "eval_logits/rejected": -1.180397391319275, "eval_logps/chosen": -355.2328186035156, "eval_logps/rejected": -403.56817626953125, "eval_loss": 0.024870624765753746, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -0.12322785705327988, "eval_rewards/margins": 0.06872842460870743, "eval_rewards/rejected": -0.1919562816619873, "eval_runtime": 714.6299, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 8400 }, { "epoch": 0.55, "learning_rate": 2.4980015316036908e-06, "logits/chosen": -1.1658084392547607, "logits/rejected": -1.1698143482208252, "logps/chosen": -286.16473388671875, "logps/rejected": -415.3582458496094, "loss": 0.0189, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11035499721765518, "rewards/margins": 0.10760772228240967, "rewards/rejected": -0.21796271204948425, "step": 8410 }, { "epoch": 0.55, "learning_rate": 2.4922916332929725e-06, "logits/chosen": -1.4470919370651245, "logits/rejected": -1.3968331813812256, "logps/chosen": -359.916015625, "logps/rejected": -357.2181396484375, "loss": 0.0256, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12449178844690323, "rewards/margins": 0.03691656142473221, "rewards/rejected": -0.16140836477279663, "step": 8420 }, { "epoch": 0.55, "learning_rate": 2.4865817751928716e-06, "logits/chosen": -1.3524065017700195, "logits/rejected": -1.2814269065856934, "logps/chosen": -326.2364501953125, "logps/rejected": -473.6361389160156, "loss": 0.0378, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1307477355003357, "rewards/margins": 0.12097755819559097, "rewards/rejected": -0.25172528624534607, "step": 8430 }, { "epoch": 0.55, "learning_rate": 2.4808719870888037e-06, "logits/chosen": -1.1732075214385986, "logits/rejected": -0.9570087194442749, "logps/chosen": -351.3630065917969, "logps/rejected": -434.7479553222656, "loss": 0.0162, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13369154930114746, "rewards/margins": 0.11665866523981094, "rewards/rejected": -0.2503502070903778, "step": 8440 }, { "epoch": 0.55, "learning_rate": 2.4751622987658206e-06, "logits/chosen": -1.4137294292449951, "logits/rejected": -1.2659153938293457, "logps/chosen": -407.6845397949219, "logps/rejected": -461.49395751953125, "loss": 0.0236, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16973426938056946, "rewards/margins": 0.06455008685588837, "rewards/rejected": -0.23428435623645782, "step": 8450 }, { "epoch": 0.55, "learning_rate": 2.4694527400084546e-06, "logits/chosen": -1.200165033340454, "logits/rejected": -1.0718694925308228, "logps/chosen": -371.3102111816406, "logps/rejected": -431.95892333984375, "loss": 0.0253, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1490655392408371, "rewards/margins": 0.06649337708950043, "rewards/rejected": -0.21555891633033752, "step": 8460 }, { "epoch": 0.55, "learning_rate": 2.4637433406005607e-06, "logits/chosen": -1.1983340978622437, "logits/rejected": -1.3590775728225708, "logps/chosen": -497.31732177734375, "logps/rejected": -504.91375732421875, "loss": 0.0213, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18759100139141083, "rewards/margins": 0.035639677196741104, "rewards/rejected": -0.22323067486286163, "step": 8470 }, { "epoch": 0.55, "learning_rate": 2.4580341303251628e-06, "logits/chosen": -0.9466561079025269, "logits/rejected": -0.8011342883110046, "logps/chosen": -443.24853515625, "logps/rejected": -505.4261779785156, "loss": 0.0307, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18329733610153198, "rewards/margins": 0.09857640415430069, "rewards/rejected": -0.28187376260757446, "step": 8480 }, { "epoch": 0.56, "learning_rate": 2.4523251389642984e-06, "logits/chosen": -1.157138466835022, "logits/rejected": -0.8302817344665527, "logps/chosen": -457.27587890625, "logps/rejected": -528.2332763671875, "loss": 0.0415, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2027166336774826, "rewards/margins": 0.10809580981731415, "rewards/rejected": -0.31081244349479675, "step": 8490 }, { "epoch": 0.56, "learning_rate": 2.4466163962988626e-06, "logits/chosen": -1.3935226202011108, "logits/rejected": -1.0951566696166992, "logps/chosen": -457.9581604003906, "logps/rejected": -463.484130859375, "loss": 0.0277, "rewards/accuracies": 0.625, "rewards/chosen": -0.17735713720321655, "rewards/margins": 0.10424534976482391, "rewards/rejected": -0.28160250186920166, "step": 8500 }, { "epoch": 0.56, "eval_logits/chosen": -1.0856397151947021, "eval_logits/rejected": -0.9559272527694702, "eval_logps/chosen": -435.6166076660156, "eval_logps/rejected": -503.44256591796875, "eval_loss": 0.02318776771426201, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -0.20361165702342987, "eval_rewards/margins": 0.08821899443864822, "eval_rewards/rejected": -0.2918306887149811, "eval_runtime": 715.1768, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 8500 }, { "epoch": 0.56, "learning_rate": 2.4409079321084543e-06, "logits/chosen": -1.1841926574707031, "logits/rejected": -1.3386503458023071, "logps/chosen": -378.7930603027344, "logps/rejected": -505.86553955078125, "loss": 0.0317, "rewards/accuracies": 0.75, "rewards/chosen": -0.16367337107658386, "rewards/margins": 0.0971335917711258, "rewards/rejected": -0.26080697774887085, "step": 8510 }, { "epoch": 0.56, "learning_rate": 2.4351997761712184e-06, "logits/chosen": -1.457297682762146, "logits/rejected": -0.920403003692627, "logps/chosen": -428.2220153808594, "logps/rejected": -456.60992431640625, "loss": 0.0109, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1828337013721466, "rewards/margins": 0.09509377181529999, "rewards/rejected": -0.2779274582862854, "step": 8520 }, { "epoch": 0.56, "learning_rate": 2.4294919582636933e-06, "logits/chosen": -1.265917181968689, "logits/rejected": -1.0613781213760376, "logps/chosen": -371.7326354980469, "logps/rejected": -448.7403259277344, "loss": 0.0252, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16063013672828674, "rewards/margins": 0.08884704858064651, "rewards/rejected": -0.24947719275951385, "step": 8530 }, { "epoch": 0.56, "learning_rate": 2.423784508160652e-06, "logits/chosen": -1.2524003982543945, "logits/rejected": -1.0956599712371826, "logps/chosen": -465.9705505371094, "logps/rejected": -498.0621643066406, "loss": 0.0137, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21073980629444122, "rewards/margins": 0.07992897927761078, "rewards/rejected": -0.290668785572052, "step": 8540 }, { "epoch": 0.56, "learning_rate": 2.418077455634951e-06, "logits/chosen": -1.1104614734649658, "logits/rejected": -1.0461666584014893, "logps/chosen": -405.6872863769531, "logps/rejected": -480.6177673339844, "loss": 0.0149, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1891162097454071, "rewards/margins": 0.047373365610837936, "rewards/rejected": -0.23648957908153534, "step": 8550 }, { "epoch": 0.56, "learning_rate": 2.4123708304573714e-06, "logits/chosen": -1.2678070068359375, "logits/rejected": -0.9694994688034058, "logps/chosen": -454.0263671875, "logps/rejected": -505.9630432128906, "loss": 0.0293, "rewards/accuracies": 0.625, "rewards/chosen": -0.16425900161266327, "rewards/margins": 0.06863544881343842, "rewards/rejected": -0.23289446532726288, "step": 8560 }, { "epoch": 0.56, "learning_rate": 2.406664662396465e-06, "logits/chosen": -0.7929967641830444, "logits/rejected": -0.7807854413986206, "logps/chosen": -402.71630859375, "logps/rejected": -442.024658203125, "loss": 0.016, "rewards/accuracies": 0.625, "rewards/chosen": -0.21833448112010956, "rewards/margins": 0.05499053746461868, "rewards/rejected": -0.27332502603530884, "step": 8570 }, { "epoch": 0.56, "learning_rate": 2.4009589812184012e-06, "logits/chosen": -1.0709073543548584, "logits/rejected": -0.7296839952468872, "logps/chosen": -380.4244689941406, "logps/rejected": -422.2705993652344, "loss": 0.0144, "rewards/accuracies": 0.625, "rewards/chosen": -0.17720454931259155, "rewards/margins": 0.09495712071657181, "rewards/rejected": -0.27216166257858276, "step": 8580 }, { "epoch": 0.56, "learning_rate": 2.3952538166868073e-06, "logits/chosen": -0.9249528646469116, "logits/rejected": -0.9736728668212891, "logps/chosen": -412.15777587890625, "logps/rejected": -519.8077392578125, "loss": 0.031, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19461138546466827, "rewards/margins": 0.12424316257238388, "rewards/rejected": -0.31885457038879395, "step": 8590 }, { "epoch": 0.56, "learning_rate": 2.389549198562616e-06, "logits/chosen": -1.1701180934906006, "logits/rejected": -0.7888563275337219, "logps/chosen": -424.3082580566406, "logps/rejected": -507.860595703125, "loss": 0.0187, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.198226660490036, "rewards/margins": 0.11353246122598648, "rewards/rejected": -0.3117591440677643, "step": 8600 }, { "epoch": 0.56, "eval_logits/chosen": -1.103326678276062, "eval_logits/rejected": -0.9719772934913635, "eval_logps/chosen": -428.9140930175781, "eval_logps/rejected": -498.3625793457031, "eval_loss": 0.02304939180612564, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -0.19690914452075958, "eval_rewards/margins": 0.0898415818810463, "eval_rewards/rejected": -0.2867507338523865, "eval_runtime": 716.7049, "eval_samples_per_second": 2.791, "eval_steps_per_second": 1.395, "step": 8600 }, { "epoch": 0.56, "learning_rate": 2.3838451566039098e-06, "logits/chosen": -1.3200554847717285, "logits/rejected": -1.1475986242294312, "logps/chosen": -441.43927001953125, "logps/rejected": -461.1790466308594, "loss": 0.0344, "rewards/accuracies": 0.625, "rewards/chosen": -0.20292928814888, "rewards/margins": 0.02987387776374817, "rewards/rejected": -0.23280315101146698, "step": 8610 }, { "epoch": 0.56, "learning_rate": 2.3781417205657662e-06, "logits/chosen": -1.2200117111206055, "logits/rejected": -1.0525275468826294, "logps/chosen": -401.9714660644531, "logps/rejected": -434.011962890625, "loss": 0.0456, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20421500504016876, "rewards/margins": 0.07024766504764557, "rewards/rejected": -0.27446264028549194, "step": 8620 }, { "epoch": 0.56, "learning_rate": 2.3724389202001006e-06, "logits/chosen": -1.1263614892959595, "logits/rejected": -0.8714662790298462, "logps/chosen": -409.1367492675781, "logps/rejected": -462.83355712890625, "loss": 0.0144, "rewards/accuracies": 0.625, "rewards/chosen": -0.2068132907152176, "rewards/margins": 0.07539348304271698, "rewards/rejected": -0.28220680356025696, "step": 8630 }, { "epoch": 0.57, "learning_rate": 2.366736785255514e-06, "logits/chosen": -1.150650143623352, "logits/rejected": -1.2050501108169556, "logps/chosen": -407.73126220703125, "logps/rejected": -461.512939453125, "loss": 0.0203, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20900245010852814, "rewards/margins": 0.0650017112493515, "rewards/rejected": -0.27400416135787964, "step": 8640 }, { "epoch": 0.57, "learning_rate": 2.3610353454771355e-06, "logits/chosen": -0.9518573880195618, "logits/rejected": -0.8007330894470215, "logps/chosen": -373.9163818359375, "logps/rejected": -441.87530517578125, "loss": 0.0463, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1839689463376999, "rewards/margins": 0.08425349742174149, "rewards/rejected": -0.268222451210022, "step": 8650 }, { "epoch": 0.57, "learning_rate": 2.355334630606467e-06, "logits/chosen": -1.416197419166565, "logits/rejected": -1.1298973560333252, "logps/chosen": -440.350830078125, "logps/rejected": -447.80908203125, "loss": 0.0152, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20006056129932404, "rewards/margins": 0.07100383937358856, "rewards/rejected": -0.2710644006729126, "step": 8660 }, { "epoch": 0.57, "learning_rate": 2.349634670381231e-06, "logits/chosen": -0.8596407771110535, "logits/rejected": -0.7406224012374878, "logps/chosen": -434.5526428222656, "logps/rejected": -514.2337646484375, "loss": 0.0428, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22735360264778137, "rewards/margins": 0.06988237798213959, "rewards/rejected": -0.29723599553108215, "step": 8670 }, { "epoch": 0.57, "learning_rate": 2.3439354945352104e-06, "logits/chosen": -1.1225316524505615, "logits/rejected": -0.9927463531494141, "logps/chosen": -433.17266845703125, "logps/rejected": -427.2831115722656, "loss": 0.0612, "rewards/accuracies": 0.5, "rewards/chosen": -0.1896490603685379, "rewards/margins": 0.03850818797945976, "rewards/rejected": -0.22815723717212677, "step": 8680 }, { "epoch": 0.57, "learning_rate": 2.3382371327981e-06, "logits/chosen": -1.1011989116668701, "logits/rejected": -0.9909159541130066, "logps/chosen": -415.0127868652344, "logps/rejected": -479.8639221191406, "loss": 0.0298, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18353652954101562, "rewards/margins": 0.07914594560861588, "rewards/rejected": -0.2626824975013733, "step": 8690 }, { "epoch": 0.57, "learning_rate": 2.3325396148953456e-06, "logits/chosen": -0.7343860864639282, "logits/rejected": -0.8679190874099731, "logps/chosen": -423.27764892578125, "logps/rejected": -564.2716064453125, "loss": 0.0464, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2519761919975281, "rewards/margins": 0.08686365187168121, "rewards/rejected": -0.33883988857269287, "step": 8700 }, { "epoch": 0.57, "eval_logits/chosen": -0.9893494248390198, "eval_logits/rejected": -0.865770697593689, "eval_logps/chosen": -447.0790100097656, "eval_logps/rejected": -509.2526550292969, "eval_loss": 0.023175977170467377, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -0.21507403254508972, "eval_rewards/margins": 0.0825667455792427, "eval_rewards/rejected": -0.29764077067375183, "eval_runtime": 714.8563, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 8700 }, { "epoch": 0.57, "learning_rate": 2.3268429705479915e-06, "logits/chosen": -1.41554856300354, "logits/rejected": -0.9965320825576782, "logps/chosen": -451.8052673339844, "logps/rejected": -490.7281188964844, "loss": 0.0236, "rewards/accuracies": 0.75, "rewards/chosen": -0.2269926816225052, "rewards/margins": 0.0802418664097786, "rewards/rejected": -0.30723458528518677, "step": 8710 }, { "epoch": 0.57, "learning_rate": 2.3211472294725248e-06, "logits/chosen": -0.946434497833252, "logits/rejected": -0.8355273008346558, "logps/chosen": -412.6177673339844, "logps/rejected": -481.0457458496094, "loss": 0.0285, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1962025761604309, "rewards/margins": 0.0810951441526413, "rewards/rejected": -0.2772977352142334, "step": 8720 }, { "epoch": 0.57, "learning_rate": 2.315452421380721e-06, "logits/chosen": -0.809756875038147, "logits/rejected": -0.6623619198799133, "logps/chosen": -468.9425354003906, "logps/rejected": -514.598388671875, "loss": 0.02, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2139982432126999, "rewards/margins": 0.09364893287420273, "rewards/rejected": -0.307647168636322, "step": 8730 }, { "epoch": 0.57, "learning_rate": 2.3097585759794886e-06, "logits/chosen": -0.8704820871353149, "logits/rejected": -0.7541652917861938, "logps/chosen": -476.35076904296875, "logps/rejected": -541.16845703125, "loss": 0.026, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22200950980186462, "rewards/margins": 0.1282096803188324, "rewards/rejected": -0.350219190120697, "step": 8740 }, { "epoch": 0.57, "learning_rate": 2.3040657229707155e-06, "logits/chosen": -1.1483707427978516, "logits/rejected": -0.950681209564209, "logps/chosen": -378.4579772949219, "logps/rejected": -489.4610290527344, "loss": 0.0228, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20641037821769714, "rewards/margins": 0.09944740682840347, "rewards/rejected": -0.3058578073978424, "step": 8750 }, { "epoch": 0.57, "learning_rate": 2.2983738920511104e-06, "logits/chosen": -1.0625044107437134, "logits/rejected": -0.8732616305351257, "logps/chosen": -451.6895446777344, "logps/rejected": -469.4817810058594, "loss": 0.0255, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1818966418504715, "rewards/margins": 0.06809432804584503, "rewards/rejected": -0.24999098479747772, "step": 8760 }, { "epoch": 0.57, "learning_rate": 2.2926831129120523e-06, "logits/chosen": -0.7622604966163635, "logits/rejected": -0.617195725440979, "logps/chosen": -426.2591247558594, "logps/rejected": -456.37567138671875, "loss": 0.0178, "rewards/accuracies": 0.75, "rewards/chosen": -0.19046752154827118, "rewards/margins": 0.05847223475575447, "rewards/rejected": -0.24893975257873535, "step": 8770 }, { "epoch": 0.57, "learning_rate": 2.2869934152394323e-06, "logits/chosen": -1.0377821922302246, "logits/rejected": -0.996080219745636, "logps/chosen": -496.8089294433594, "logps/rejected": -519.8660888671875, "loss": 0.0327, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2278434932231903, "rewards/margins": 0.07865927368402481, "rewards/rejected": -0.3065027594566345, "step": 8780 }, { "epoch": 0.58, "learning_rate": 2.281304828713501e-06, "logits/chosen": -1.2376750707626343, "logits/rejected": -1.0340487957000732, "logps/chosen": -434.2633361816406, "logps/rejected": -495.86407470703125, "loss": 0.0203, "rewards/accuracies": 0.625, "rewards/chosen": -0.20127606391906738, "rewards/margins": 0.06910329312086105, "rewards/rejected": -0.27037936449050903, "step": 8790 }, { "epoch": 0.58, "learning_rate": 2.275617383008711e-06, "logits/chosen": -1.1501182317733765, "logits/rejected": -1.095918893814087, "logps/chosen": -420.28643798828125, "logps/rejected": -474.21337890625, "loss": 0.0296, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1847989857196808, "rewards/margins": 0.05659471079707146, "rewards/rejected": -0.24139371514320374, "step": 8800 }, { "epoch": 0.58, "eval_logits/chosen": -1.0851621627807617, "eval_logits/rejected": -0.9561504125595093, "eval_logps/chosen": -423.37908935546875, "eval_logps/rejected": -486.5063171386719, "eval_loss": 0.023111525923013687, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -0.19137415289878845, "eval_rewards/margins": 0.08352024853229523, "eval_rewards/rejected": -0.2748944163322449, "eval_runtime": 714.2685, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 8800 }, { "epoch": 0.58, "learning_rate": 2.269931107793567e-06, "logits/chosen": -0.8227458000183105, "logits/rejected": -0.7988048791885376, "logps/chosen": -369.4283752441406, "logps/rejected": -444.9707946777344, "loss": 0.023, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1571480631828308, "rewards/margins": 0.06744993478059769, "rewards/rejected": -0.2245980203151703, "step": 8810 }, { "epoch": 0.58, "learning_rate": 2.2642460327304655e-06, "logits/chosen": -1.1591593027114868, "logits/rejected": -1.0964168310165405, "logps/chosen": -467.172119140625, "logps/rejected": -529.4580078125, "loss": 0.0162, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22261233627796173, "rewards/margins": 0.07813344895839691, "rewards/rejected": -0.30074578523635864, "step": 8820 }, { "epoch": 0.58, "learning_rate": 2.258562187475543e-06, "logits/chosen": -1.02983558177948, "logits/rejected": -0.6385548710823059, "logps/chosen": -440.3499450683594, "logps/rejected": -485.58746337890625, "loss": 0.0143, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21340306103229523, "rewards/margins": 0.0827493965625763, "rewards/rejected": -0.2961524426937103, "step": 8830 }, { "epoch": 0.58, "learning_rate": 2.2528796016785196e-06, "logits/chosen": -0.7042688131332397, "logits/rejected": -0.7924408912658691, "logps/chosen": -407.8680114746094, "logps/rejected": -526.853759765625, "loss": 0.0291, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21795007586479187, "rewards/margins": 0.10931973159313202, "rewards/rejected": -0.3272698223590851, "step": 8840 }, { "epoch": 0.58, "learning_rate": 2.247198304982548e-06, "logits/chosen": -0.7539125680923462, "logits/rejected": -0.7314913272857666, "logps/chosen": -384.89495849609375, "logps/rejected": -452.0636291503906, "loss": 0.0272, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2229234278202057, "rewards/margins": 0.07111810147762299, "rewards/rejected": -0.29404154419898987, "step": 8850 }, { "epoch": 0.58, "learning_rate": 2.2415183270240533e-06, "logits/chosen": -1.3467342853546143, "logits/rejected": -1.223217487335205, "logps/chosen": -414.50091552734375, "logps/rejected": -513.168701171875, "loss": 0.0452, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21517133712768555, "rewards/margins": 0.09436213970184326, "rewards/rejected": -0.3095334470272064, "step": 8860 }, { "epoch": 0.58, "learning_rate": 2.2358396974325837e-06, "logits/chosen": -0.9331719279289246, "logits/rejected": -0.9130775332450867, "logps/chosen": -465.64324951171875, "logps/rejected": -541.3157958984375, "loss": 0.0348, "rewards/accuracies": 0.75, "rewards/chosen": -0.22337570786476135, "rewards/margins": 0.10193965584039688, "rewards/rejected": -0.32531529664993286, "step": 8870 }, { "epoch": 0.58, "learning_rate": 2.2301624458306525e-06, "logits/chosen": -1.0336159467697144, "logits/rejected": -1.0400692224502563, "logps/chosen": -530.0833740234375, "logps/rejected": -551.9833374023438, "loss": 0.0212, "rewards/accuracies": 0.75, "rewards/chosen": -0.2707352936267853, "rewards/margins": 0.0732412338256836, "rewards/rejected": -0.3439764976501465, "step": 8880 }, { "epoch": 0.58, "learning_rate": 2.2244866018335855e-06, "logits/chosen": -1.0109436511993408, "logits/rejected": -0.9462583661079407, "logps/chosen": -425.92852783203125, "logps/rejected": -505.53466796875, "loss": 0.0279, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2088708132505417, "rewards/margins": 0.0651991069316864, "rewards/rejected": -0.2740699350833893, "step": 8890 }, { "epoch": 0.58, "learning_rate": 2.2188121950493648e-06, "logits/chosen": -1.1671619415283203, "logits/rejected": -0.7927858233451843, "logps/chosen": -474.0235290527344, "logps/rejected": -465.1826171875, "loss": 0.0416, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25528010725975037, "rewards/margins": 0.06941138207912445, "rewards/rejected": -0.3246915340423584, "step": 8900 }, { "epoch": 0.58, "eval_logits/chosen": -0.9865862131118774, "eval_logits/rejected": -0.8593341112136841, "eval_logps/chosen": -486.5626525878906, "eval_logps/rejected": -561.4706420898438, "eval_loss": 0.02303200587630272, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -0.2545577585697174, "eval_rewards/margins": 0.09530099481344223, "eval_rewards/rejected": -0.34985873103141785, "eval_runtime": 716.8994, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 8900 }, { "epoch": 0.58, "learning_rate": 2.2131392550784766e-06, "logits/chosen": -1.0619500875473022, "logits/rejected": -0.7372158765792847, "logps/chosen": -545.7124633789062, "logps/rejected": -542.1942138671875, "loss": 0.0186, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2608337998390198, "rewards/margins": 0.09204601496458054, "rewards/rejected": -0.3528798222541809, "step": 8910 }, { "epoch": 0.58, "learning_rate": 2.2074678115137533e-06, "logits/chosen": -1.0386908054351807, "logits/rejected": -0.8392025232315063, "logps/chosen": -439.1180725097656, "logps/rejected": -552.4931030273438, "loss": 0.0316, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24440112709999084, "rewards/margins": 0.1084585189819336, "rewards/rejected": -0.35285961627960205, "step": 8920 }, { "epoch": 0.58, "learning_rate": 2.201797893940224e-06, "logits/chosen": -0.8829715847969055, "logits/rejected": -0.9416031837463379, "logps/chosen": -472.1068420410156, "logps/rejected": -570.1318969726562, "loss": 0.0137, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23892948031425476, "rewards/margins": 0.07767447084188461, "rewards/rejected": -0.31660395860671997, "step": 8930 }, { "epoch": 0.58, "learning_rate": 2.196129531934956e-06, "logits/chosen": -0.9516005516052246, "logits/rejected": -0.9088290333747864, "logps/chosen": -453.919677734375, "logps/rejected": -515.0782470703125, "loss": 0.0149, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21600571274757385, "rewards/margins": 0.07922112941741943, "rewards/rejected": -0.2952268421649933, "step": 8940 }, { "epoch": 0.59, "learning_rate": 2.190462755066902e-06, "logits/chosen": -1.1723531484603882, "logits/rejected": -0.8882836103439331, "logps/chosen": -509.51580810546875, "logps/rejected": -547.8773193359375, "loss": 0.0151, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2471146136522293, "rewards/margins": 0.06385231763124466, "rewards/rejected": -0.31096693873405457, "step": 8950 }, { "epoch": 0.59, "learning_rate": 2.184797592896746e-06, "logits/chosen": -1.1835236549377441, "logits/rejected": -1.07135808467865, "logps/chosen": -465.6865234375, "logps/rejected": -517.5577392578125, "loss": 0.0144, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23125100135803223, "rewards/margins": 0.07840429246425629, "rewards/rejected": -0.3096553385257721, "step": 8960 }, { "epoch": 0.59, "learning_rate": 2.17913407497675e-06, "logits/chosen": -1.138485074043274, "logits/rejected": -1.0707722902297974, "logps/chosen": -353.9120788574219, "logps/rejected": -465.55694580078125, "loss": 0.0477, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17553013563156128, "rewards/margins": 0.07769358903169632, "rewards/rejected": -0.253223717212677, "step": 8970 }, { "epoch": 0.59, "learning_rate": 2.173472230850596e-06, "logits/chosen": -1.3441592454910278, "logits/rejected": -1.0344698429107666, "logps/chosen": -387.0591735839844, "logps/rejected": -409.57720947265625, "loss": 0.0418, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.186796635389328, "rewards/margins": 0.06296174973249435, "rewards/rejected": -0.24975833296775818, "step": 8980 }, { "epoch": 0.59, "learning_rate": 2.1678120900532375e-06, "logits/chosen": -1.1257423162460327, "logits/rejected": -0.9433009028434753, "logps/chosen": -465.3280334472656, "logps/rejected": -541.2745361328125, "loss": 0.0263, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2294132262468338, "rewards/margins": 0.10520676523447037, "rewards/rejected": -0.33461999893188477, "step": 8990 }, { "epoch": 0.59, "learning_rate": 2.1621536821107412e-06, "logits/chosen": -1.10348379611969, "logits/rejected": -0.9332711100578308, "logps/chosen": -399.70166015625, "logps/rejected": -439.9261779785156, "loss": 0.0374, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19713184237480164, "rewards/margins": 0.08225681632757187, "rewards/rejected": -0.2793886363506317, "step": 9000 }, { "epoch": 0.59, "eval_logits/chosen": -1.0981136560440063, "eval_logits/rejected": -0.967559814453125, "eval_logps/chosen": -427.6932678222656, "eval_logps/rejected": -490.01934814453125, "eval_loss": 0.02288178913295269, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.1956883817911148, "eval_rewards/margins": 0.08271908015012741, "eval_rewards/rejected": -0.2784074544906616, "eval_runtime": 716.2831, "eval_samples_per_second": 2.792, "eval_steps_per_second": 1.396, "step": 9000 }, { "epoch": 0.59, "learning_rate": 2.1564970365401346e-06, "logits/chosen": -1.304007649421692, "logits/rejected": -0.9915387034416199, "logps/chosen": -371.32330322265625, "logps/rejected": -412.00274658203125, "loss": 0.0264, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18717223405838013, "rewards/margins": 0.07868753373622894, "rewards/rejected": -0.2658597528934479, "step": 9010 }, { "epoch": 0.59, "learning_rate": 2.1508421828492527e-06, "logits/chosen": -1.3968846797943115, "logits/rejected": -1.0737828016281128, "logps/chosen": -378.46612548828125, "logps/rejected": -379.1678161621094, "loss": 0.0332, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15327341854572296, "rewards/margins": 0.06894200295209885, "rewards/rejected": -0.2222154140472412, "step": 9020 }, { "epoch": 0.59, "learning_rate": 2.145189150536582e-06, "logits/chosen": -1.0988125801086426, "logits/rejected": -1.017960548400879, "logps/chosen": -388.9222412109375, "logps/rejected": -396.22100830078125, "loss": 0.038, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16675269603729248, "rewards/margins": 0.056701015681028366, "rewards/rejected": -0.22345371544361115, "step": 9030 }, { "epoch": 0.59, "learning_rate": 2.139537969091107e-06, "logits/chosen": -1.0001757144927979, "logits/rejected": -0.9789652824401855, "logps/chosen": -451.310546875, "logps/rejected": -427.1902770996094, "loss": 0.0256, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18572688102722168, "rewards/margins": 0.03805026784539223, "rewards/rejected": -0.2237771451473236, "step": 9040 }, { "epoch": 0.59, "learning_rate": 2.1338886679921603e-06, "logits/chosen": -1.1584631204605103, "logits/rejected": -1.0880556106567383, "logps/chosen": -411.52130126953125, "logps/rejected": -452.15106201171875, "loss": 0.0319, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.16997823119163513, "rewards/margins": 0.06104014068841934, "rewards/rejected": -0.23101837933063507, "step": 9050 }, { "epoch": 0.59, "learning_rate": 2.128241276709263e-06, "logits/chosen": -1.3563841581344604, "logits/rejected": -1.4039558172225952, "logps/chosen": -339.45831298828125, "logps/rejected": -427.64019775390625, "loss": 0.0305, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1361965537071228, "rewards/margins": 0.06814448535442352, "rewards/rejected": -0.20434102416038513, "step": 9060 }, { "epoch": 0.59, "learning_rate": 2.1225958247019746e-06, "logits/chosen": -1.4546421766281128, "logits/rejected": -1.4920566082000732, "logps/chosen": -353.6759948730469, "logps/rejected": -425.53094482421875, "loss": 0.0144, "rewards/accuracies": 0.625, "rewards/chosen": -0.1665341705083847, "rewards/margins": 0.05171530693769455, "rewards/rejected": -0.21824948489665985, "step": 9070 }, { "epoch": 0.59, "learning_rate": 2.1169523414197383e-06, "logits/chosen": -1.087052583694458, "logits/rejected": -1.0149786472320557, "logps/chosen": -364.465087890625, "logps/rejected": -433.18572998046875, "loss": 0.0177, "rewards/accuracies": 0.5, "rewards/chosen": -0.16565944254398346, "rewards/margins": 0.04539897292852402, "rewards/rejected": -0.21105840802192688, "step": 9080 }, { "epoch": 0.59, "learning_rate": 2.1113108563017267e-06, "logits/chosen": -1.0152915716171265, "logits/rejected": -0.9994319677352905, "logps/chosen": -445.9559020996094, "logps/rejected": -495.5621032714844, "loss": 0.0256, "rewards/accuracies": 0.625, "rewards/chosen": -0.2331726998090744, "rewards/margins": 0.08023539930582047, "rewards/rejected": -0.3134080767631531, "step": 9090 }, { "epoch": 0.6, "learning_rate": 2.1056713987766905e-06, "logits/chosen": -1.2826510667800903, "logits/rejected": -1.093489408493042, "logps/chosen": -411.60089111328125, "logps/rejected": -445.36260986328125, "loss": 0.026, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19640058279037476, "rewards/margins": 0.0824771523475647, "rewards/rejected": -0.27887773513793945, "step": 9100 }, { "epoch": 0.6, "eval_logits/chosen": -1.1806323528289795, "eval_logits/rejected": -1.0459074974060059, "eval_logps/chosen": -422.1302185058594, "eval_logps/rejected": -480.4328918457031, "eval_loss": 0.023122180253267288, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -0.1901252567768097, "eval_rewards/margins": 0.07869578152894974, "eval_rewards/rejected": -0.26882103085517883, "eval_runtime": 714.4884, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.4, "step": 9100 }, { "epoch": 0.6, "learning_rate": 2.1000339982628022e-06, "logits/chosen": -1.0115242004394531, "logits/rejected": -0.9201635122299194, "logps/chosen": -467.29193115234375, "logps/rejected": -500.25885009765625, "loss": 0.0187, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21601906418800354, "rewards/margins": 0.06728707253932953, "rewards/rejected": -0.2833061218261719, "step": 9110 }, { "epoch": 0.6, "learning_rate": 2.0943986841675043e-06, "logits/chosen": -1.2916444540023804, "logits/rejected": -0.9944061040878296, "logps/chosen": -408.3946533203125, "logps/rejected": -474.9046936035156, "loss": 0.0101, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20518366992473602, "rewards/margins": 0.08584979921579361, "rewards/rejected": -0.29103344678878784, "step": 9120 }, { "epoch": 0.6, "learning_rate": 2.088765485887356e-06, "logits/chosen": -1.2981860637664795, "logits/rejected": -1.0464775562286377, "logps/chosen": -425.41387939453125, "logps/rejected": -431.24407958984375, "loss": 0.0206, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18069897592067719, "rewards/margins": 0.04745563864707947, "rewards/rejected": -0.22815461456775665, "step": 9130 }, { "epoch": 0.6, "learning_rate": 2.083134432807879e-06, "logits/chosen": -1.3589370250701904, "logits/rejected": -1.1675573587417603, "logps/chosen": -394.42449951171875, "logps/rejected": -518.3064575195312, "loss": 0.02, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20015235245227814, "rewards/margins": 0.1023639664053917, "rewards/rejected": -0.30251628160476685, "step": 9140 }, { "epoch": 0.6, "learning_rate": 2.077505554303404e-06, "logits/chosen": -1.2667617797851562, "logits/rejected": -1.2513234615325928, "logps/chosen": -306.76043701171875, "logps/rejected": -382.7593994140625, "loss": 0.0153, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1322084367275238, "rewards/margins": 0.07163238525390625, "rewards/rejected": -0.20384080708026886, "step": 9150 }, { "epoch": 0.6, "learning_rate": 2.071878879736918e-06, "logits/chosen": -1.4281046390533447, "logits/rejected": -1.276647686958313, "logps/chosen": -415.97222900390625, "logps/rejected": -553.07666015625, "loss": 0.022, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16913263499736786, "rewards/margins": 0.05882060527801514, "rewards/rejected": -0.227953240275383, "step": 9160 }, { "epoch": 0.6, "learning_rate": 2.0662544384599136e-06, "logits/chosen": -1.2730841636657715, "logits/rejected": -1.1552072763442993, "logps/chosen": -330.63323974609375, "logps/rejected": -387.49981689453125, "loss": 0.0329, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12852661311626434, "rewards/margins": 0.07335192710161209, "rewards/rejected": -0.20187854766845703, "step": 9170 }, { "epoch": 0.6, "learning_rate": 2.0606322598122314e-06, "logits/chosen": -1.2468078136444092, "logits/rejected": -1.3828418254852295, "logps/chosen": -312.9874267578125, "logps/rejected": -356.3817138671875, "loss": 0.0179, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.12647388875484467, "rewards/margins": 0.02373340353369713, "rewards/rejected": -0.1502072960138321, "step": 9180 }, { "epoch": 0.6, "learning_rate": 2.0550123731219085e-06, "logits/chosen": -1.744610071182251, "logits/rejected": -1.3850758075714111, "logps/chosen": -352.10723876953125, "logps/rejected": -371.8777770996094, "loss": 0.0254, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09892739355564117, "rewards/margins": 0.060915421694517136, "rewards/rejected": -0.15984280407428741, "step": 9190 }, { "epoch": 0.6, "learning_rate": 2.0493948077050267e-06, "logits/chosen": -1.0627145767211914, "logits/rejected": -0.9221268892288208, "logps/chosen": -318.2849426269531, "logps/rejected": -383.50439453125, "loss": 0.0247, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.12502898275852203, "rewards/margins": 0.08330903947353363, "rewards/rejected": -0.20833802223205566, "step": 9200 }, { "epoch": 0.6, "eval_logits/chosen": -1.3378127813339233, "eval_logits/rejected": -1.1933120489120483, "eval_logps/chosen": -349.09417724609375, "eval_logps/rejected": -403.3863830566406, "eval_loss": 0.02362094819545746, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -0.11708921939134598, "eval_rewards/margins": 0.07468526810407639, "eval_rewards/rejected": -0.19177447259426117, "eval_runtime": 714.4811, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.4, "step": 9200 }, { "epoch": 0.6, "learning_rate": 2.0437795928655596e-06, "logits/chosen": -1.4034985303878784, "logits/rejected": -1.448559045791626, "logps/chosen": -401.74755859375, "logps/rejected": -441.1143493652344, "loss": 0.023, "rewards/accuracies": 0.625, "rewards/chosen": -0.12003491818904877, "rewards/margins": 0.05755053833127022, "rewards/rejected": -0.1775854527950287, "step": 9210 }, { "epoch": 0.6, "learning_rate": 2.0381667578952184e-06, "logits/chosen": -1.4078466892242432, "logits/rejected": -1.2501410245895386, "logps/chosen": -346.948974609375, "logps/rejected": -447.73126220703125, "loss": 0.0527, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13607612252235413, "rewards/margins": 0.09516526758670807, "rewards/rejected": -0.2312413901090622, "step": 9220 }, { "epoch": 0.6, "learning_rate": 2.0325563320732995e-06, "logits/chosen": -1.4277667999267578, "logits/rejected": -1.270898461341858, "logps/chosen": -417.259765625, "logps/rejected": -461.073974609375, "loss": 0.0214, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15392141044139862, "rewards/margins": 0.08783210813999176, "rewards/rejected": -0.2417535036802292, "step": 9230 }, { "epoch": 0.6, "learning_rate": 2.026948344666532e-06, "logits/chosen": -1.0002763271331787, "logits/rejected": -1.0820634365081787, "logps/chosen": -394.8781433105469, "logps/rejected": -489.8401794433594, "loss": 0.0237, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19575223326683044, "rewards/margins": 0.09540662914514542, "rewards/rejected": -0.29115888476371765, "step": 9240 }, { "epoch": 0.61, "learning_rate": 2.0213428249289257e-06, "logits/chosen": -0.688109815120697, "logits/rejected": -0.8924848437309265, "logps/chosen": -426.791259765625, "logps/rejected": -532.3915405273438, "loss": 0.02, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22825077176094055, "rewards/margins": 0.10379274189472198, "rewards/rejected": -0.33204352855682373, "step": 9250 }, { "epoch": 0.61, "learning_rate": 2.0157398021016175e-06, "logits/chosen": -0.9222259521484375, "logits/rejected": -0.9409950971603394, "logps/chosen": -345.83319091796875, "logps/rejected": -485.420166015625, "loss": 0.0198, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1982978880405426, "rewards/margins": 0.09338219463825226, "rewards/rejected": -0.2916800379753113, "step": 9260 }, { "epoch": 0.61, "learning_rate": 2.010139305412719e-06, "logits/chosen": -1.5309436321258545, "logits/rejected": -1.2233386039733887, "logps/chosen": -502.99072265625, "logps/rejected": -533.7097778320312, "loss": 0.0128, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22639504075050354, "rewards/margins": 0.07308313995599747, "rewards/rejected": -0.2994782030582428, "step": 9270 }, { "epoch": 0.61, "learning_rate": 2.0045413640771644e-06, "logits/chosen": -1.210741400718689, "logits/rejected": -0.9934937357902527, "logps/chosen": -482.53302001953125, "logps/rejected": -584.1541137695312, "loss": 0.0267, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22736771404743195, "rewards/margins": 0.09836546331644058, "rewards/rejected": -0.3257331848144531, "step": 9280 }, { "epoch": 0.61, "learning_rate": 1.998946007296558e-06, "logits/chosen": -1.2440948486328125, "logits/rejected": -1.130077838897705, "logps/chosen": -508.4644470214844, "logps/rejected": -538.3056640625, "loss": 0.0139, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19986383616924286, "rewards/margins": 0.09327809512615204, "rewards/rejected": -0.2931419610977173, "step": 9290 }, { "epoch": 0.61, "learning_rate": 1.9933532642590215e-06, "logits/chosen": -0.842333972454071, "logits/rejected": -0.5751477479934692, "logps/chosen": -351.67999267578125, "logps/rejected": -397.8497009277344, "loss": 0.0193, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16252170503139496, "rewards/margins": 0.09633050858974457, "rewards/rejected": -0.2588522136211395, "step": 9300 }, { "epoch": 0.61, "eval_logits/chosen": -1.09257173538208, "eval_logits/rejected": -0.9579460620880127, "eval_logps/chosen": -440.48712158203125, "eval_logps/rejected": -506.2588195800781, "eval_loss": 0.023130562156438828, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -0.20848216116428375, "eval_rewards/margins": 0.08616477996110916, "eval_rewards/rejected": -0.2946469485759735, "eval_runtime": 713.9068, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 9300 }, { "epoch": 0.61, "learning_rate": 1.987763164139042e-06, "logits/chosen": -1.2068696022033691, "logits/rejected": -0.9639801979064941, "logps/chosen": -415.4097595214844, "logps/rejected": -505.5393981933594, "loss": 0.0176, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2101546823978424, "rewards/margins": 0.08787533640861511, "rewards/rejected": -0.29802998900413513, "step": 9310 }, { "epoch": 0.61, "learning_rate": 1.982175736097321e-06, "logits/chosen": -0.9660323858261108, "logits/rejected": -0.9258397817611694, "logps/chosen": -513.8777465820312, "logps/rejected": -613.3631591796875, "loss": 0.018, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.23933669924736023, "rewards/margins": 0.08246560394763947, "rewards/rejected": -0.3218023180961609, "step": 9320 }, { "epoch": 0.61, "learning_rate": 1.9765910092806196e-06, "logits/chosen": -1.0344462394714355, "logits/rejected": -0.8943778872489929, "logps/chosen": -341.25616455078125, "logps/rejected": -392.76373291015625, "loss": 0.0366, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16318069398403168, "rewards/margins": 0.0711657851934433, "rewards/rejected": -0.23434647917747498, "step": 9330 }, { "epoch": 0.61, "learning_rate": 1.9710090128216083e-06, "logits/chosen": -1.1656386852264404, "logits/rejected": -0.9893093109130859, "logps/chosen": -454.06317138671875, "logps/rejected": -549.486328125, "loss": 0.0245, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2313331663608551, "rewards/margins": 0.11158627271652222, "rewards/rejected": -0.3429194390773773, "step": 9340 }, { "epoch": 0.61, "learning_rate": 1.9654297758387155e-06, "logits/chosen": -1.0094630718231201, "logits/rejected": -0.8354465365409851, "logps/chosen": -393.27862548828125, "logps/rejected": -481.7088928222656, "loss": 0.0289, "rewards/accuracies": 0.625, "rewards/chosen": -0.23240864276885986, "rewards/margins": 0.06770481169223785, "rewards/rejected": -0.3001134693622589, "step": 9350 }, { "epoch": 0.61, "learning_rate": 1.9598533274359736e-06, "logits/chosen": -1.005838394165039, "logits/rejected": -1.0910465717315674, "logps/chosen": -455.43585205078125, "logps/rejected": -499.8480529785156, "loss": 0.0238, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.21575555205345154, "rewards/margins": 0.03705067187547684, "rewards/rejected": -0.25280624628067017, "step": 9360 }, { "epoch": 0.61, "learning_rate": 1.9542796967028697e-06, "logits/chosen": -1.3075627088546753, "logits/rejected": -1.0493786334991455, "logps/chosen": -423.6852111816406, "logps/rejected": -462.982421875, "loss": 0.0155, "rewards/accuracies": 0.75, "rewards/chosen": -0.20544464886188507, "rewards/margins": 0.05754012614488602, "rewards/rejected": -0.2629847526550293, "step": 9370 }, { "epoch": 0.61, "learning_rate": 1.948708912714192e-06, "logits/chosen": -0.7306618094444275, "logits/rejected": -0.8221921920776367, "logps/chosen": -493.35369873046875, "logps/rejected": -525.2420654296875, "loss": 0.023, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24640849232673645, "rewards/margins": 0.06282415241003036, "rewards/rejected": -0.309232622385025, "step": 9380 }, { "epoch": 0.61, "learning_rate": 1.9431410045298786e-06, "logits/chosen": -0.7777185440063477, "logits/rejected": -0.9001587629318237, "logps/chosen": -412.6835021972656, "logps/rejected": -488.19775390625, "loss": 0.0183, "rewards/accuracies": 0.75, "rewards/chosen": -0.19555814564228058, "rewards/margins": 0.07648999243974686, "rewards/rejected": -0.2720481753349304, "step": 9390 }, { "epoch": 0.62, "learning_rate": 1.9375760011948654e-06, "logits/chosen": -1.1120212078094482, "logits/rejected": -1.1079556941986084, "logps/chosen": -371.98907470703125, "logps/rejected": -483.3607482910156, "loss": 0.028, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17231112718582153, "rewards/margins": 0.08905123174190521, "rewards/rejected": -0.26136231422424316, "step": 9400 }, { "epoch": 0.62, "eval_logits/chosen": -1.048321008682251, "eval_logits/rejected": -0.9186062216758728, "eval_logps/chosen": -416.7447204589844, "eval_logps/rejected": -474.66119384765625, "eval_loss": 0.023227345198392868, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -0.18473976850509644, "eval_rewards/margins": 0.07830949872732162, "eval_rewards/rejected": -0.26304924488067627, "eval_runtime": 714.239, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 9400 }, { "epoch": 0.62, "learning_rate": 1.932013931738937e-06, "logits/chosen": -1.0578038692474365, "logits/rejected": -0.8294500112533569, "logps/chosen": -415.5970764160156, "logps/rejected": -551.5552368164062, "loss": 0.031, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21199533343315125, "rewards/margins": 0.12307636439800262, "rewards/rejected": -0.3350716829299927, "step": 9410 }, { "epoch": 0.62, "learning_rate": 1.9264548251765717e-06, "logits/chosen": -1.177817940711975, "logits/rejected": -1.0999213457107544, "logps/chosen": -396.321533203125, "logps/rejected": -468.03399658203125, "loss": 0.0145, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19251887500286102, "rewards/margins": 0.07482416182756424, "rewards/rejected": -0.26734304428100586, "step": 9420 }, { "epoch": 0.62, "learning_rate": 1.9208987105067924e-06, "logits/chosen": -0.8988040685653687, "logits/rejected": -0.727282702922821, "logps/chosen": -407.2919921875, "logps/rejected": -453.62957763671875, "loss": 0.0274, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19273492693901062, "rewards/margins": 0.07022528350353241, "rewards/rejected": -0.26296019554138184, "step": 9430 }, { "epoch": 0.62, "learning_rate": 1.9153456167130154e-06, "logits/chosen": -0.9604202508926392, "logits/rejected": -0.9843860864639282, "logps/chosen": -407.5146484375, "logps/rejected": -514.0279541015625, "loss": 0.0407, "rewards/accuracies": 0.625, "rewards/chosen": -0.20344404876232147, "rewards/margins": 0.08031464368104935, "rewards/rejected": -0.28375867009162903, "step": 9440 }, { "epoch": 0.62, "learning_rate": 1.9097955727628975e-06, "logits/chosen": -1.1365015506744385, "logits/rejected": -1.1631238460540771, "logps/chosen": -364.5661926269531, "logps/rejected": -443.78521728515625, "loss": 0.035, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.16858068108558655, "rewards/margins": 0.06728406250476837, "rewards/rejected": -0.2358647584915161, "step": 9450 }, { "epoch": 0.62, "learning_rate": 1.904248607608187e-06, "logits/chosen": -0.6814016103744507, "logits/rejected": -0.9138643145561218, "logps/chosen": -460.478271484375, "logps/rejected": -474.22381591796875, "loss": 0.018, "rewards/accuracies": 0.75, "rewards/chosen": -0.20325827598571777, "rewards/margins": 0.0608523003757, "rewards/rejected": -0.2641105651855469, "step": 9460 }, { "epoch": 0.62, "learning_rate": 1.8987047501845714e-06, "logits/chosen": -1.0013633966445923, "logits/rejected": -0.7580755949020386, "logps/chosen": -400.2743225097656, "logps/rejected": -491.4267578125, "loss": 0.0239, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23334984481334686, "rewards/margins": 0.09739793092012405, "rewards/rejected": -0.3307478129863739, "step": 9470 }, { "epoch": 0.62, "learning_rate": 1.8931640294115267e-06, "logits/chosen": -0.6929682493209839, "logits/rejected": -0.43697991967201233, "logps/chosen": -416.61920166015625, "logps/rejected": -504.7317810058594, "loss": 0.0439, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22261326014995575, "rewards/margins": 0.10364284366369247, "rewards/rejected": -0.3262560963630676, "step": 9480 }, { "epoch": 0.62, "learning_rate": 1.8876264741921662e-06, "logits/chosen": -0.7538151741027832, "logits/rejected": -0.7175725698471069, "logps/chosen": -424.713623046875, "logps/rejected": -527.66455078125, "loss": 0.0235, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23458819091320038, "rewards/margins": 0.10838641226291656, "rewards/rejected": -0.34297463297843933, "step": 9490 }, { "epoch": 0.62, "learning_rate": 1.8820921134130912e-06, "logits/chosen": -0.927859902381897, "logits/rejected": -0.5650545358657837, "logps/chosen": -482.4242248535156, "logps/rejected": -587.3576049804688, "loss": 0.0119, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25191205739974976, "rewards/margins": 0.14984267950057983, "rewards/rejected": -0.401754766702652, "step": 9500 }, { "epoch": 0.62, "eval_logits/chosen": -0.7299796938896179, "eval_logits/rejected": -0.615023136138916, "eval_logps/chosen": -492.27032470703125, "eval_logps/rejected": -561.1231689453125, "eval_loss": 0.02354375086724758, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -0.2602653503417969, "eval_rewards/margins": 0.08924593031406403, "eval_rewards/rejected": -0.3495113253593445, "eval_runtime": 716.7968, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 9500 }, { "epoch": 0.62, "learning_rate": 1.8765609759442378e-06, "logits/chosen": -0.2547836899757385, "logits/rejected": -0.3996204733848572, "logps/chosen": -521.2526245117188, "logps/rejected": -587.9320678710938, "loss": 0.0178, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27993136644363403, "rewards/margins": 0.07749157398939133, "rewards/rejected": -0.35742294788360596, "step": 9510 }, { "epoch": 0.62, "learning_rate": 1.8710330906387288e-06, "logits/chosen": -1.201780080795288, "logits/rejected": -1.1656601428985596, "logps/chosen": -532.4616088867188, "logps/rejected": -641.0281982421875, "loss": 0.0256, "rewards/accuracies": 0.75, "rewards/chosen": -0.2939741015434265, "rewards/margins": 0.08497663587331772, "rewards/rejected": -0.37895068526268005, "step": 9520 }, { "epoch": 0.62, "learning_rate": 1.8655084863327222e-06, "logits/chosen": -0.6963063478469849, "logits/rejected": -0.6313799619674683, "logps/chosen": -394.53387451171875, "logps/rejected": -481.82757568359375, "loss": 0.0256, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.21107593178749084, "rewards/margins": 0.08113153278827667, "rewards/rejected": -0.2922074794769287, "step": 9530 }, { "epoch": 0.62, "learning_rate": 1.8599871918452603e-06, "logits/chosen": -0.5522001385688782, "logits/rejected": -0.645211935043335, "logps/chosen": -478.93585205078125, "logps/rejected": -573.2581787109375, "loss": 0.01, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.25584399700164795, "rewards/margins": 0.08107289671897888, "rewards/rejected": -0.3369169235229492, "step": 9540 }, { "epoch": 0.62, "learning_rate": 1.8544692359781192e-06, "logits/chosen": -0.5303779244422913, "logits/rejected": -0.56864333152771, "logps/chosen": -405.8132629394531, "logps/rejected": -448.8749084472656, "loss": 0.039, "rewards/accuracies": 0.625, "rewards/chosen": -0.21822865307331085, "rewards/margins": 0.07400884479284286, "rewards/rejected": -0.2922375202178955, "step": 9550 }, { "epoch": 0.63, "learning_rate": 1.8489546475156602e-06, "logits/chosen": -0.9627624750137329, "logits/rejected": -0.9790258407592773, "logps/chosen": -456.1783142089844, "logps/rejected": -519.9884033203125, "loss": 0.014, "rewards/accuracies": 0.75, "rewards/chosen": -0.23668548464775085, "rewards/margins": 0.08197810500860214, "rewards/rejected": -0.3186635971069336, "step": 9560 }, { "epoch": 0.63, "learning_rate": 1.8434434552246778e-06, "logits/chosen": -0.70296710729599, "logits/rejected": -0.6965140104293823, "logps/chosen": -445.3968200683594, "logps/rejected": -510.31341552734375, "loss": 0.019, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23576530814170837, "rewards/margins": 0.07617957890033722, "rewards/rejected": -0.3119449019432068, "step": 9570 }, { "epoch": 0.63, "learning_rate": 1.837935687854251e-06, "logits/chosen": -0.8402090072631836, "logits/rejected": -0.6238566637039185, "logps/chosen": -453.19305419921875, "logps/rejected": -509.3421936035156, "loss": 0.0313, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2316463738679886, "rewards/margins": 0.08794920891523361, "rewards/rejected": -0.3195955455303192, "step": 9580 }, { "epoch": 0.63, "learning_rate": 1.832431374135592e-06, "logits/chosen": -0.9057952761650085, "logits/rejected": -1.0153768062591553, "logps/chosen": -498.65045166015625, "logps/rejected": -592.8546142578125, "loss": 0.0188, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2502151131629944, "rewards/margins": 0.11307723820209503, "rewards/rejected": -0.363292396068573, "step": 9590 }, { "epoch": 0.63, "learning_rate": 1.8269305427818977e-06, "logits/chosen": -0.9547305107116699, "logits/rejected": -0.8878790736198425, "logps/chosen": -448.505615234375, "logps/rejected": -484.99029541015625, "loss": 0.0178, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23988935351371765, "rewards/margins": 0.06414850056171417, "rewards/rejected": -0.304037868976593, "step": 9600 }, { "epoch": 0.63, "eval_logits/chosen": -0.7643759846687317, "eval_logits/rejected": -0.6485594511032104, "eval_logps/chosen": -478.0711364746094, "eval_logps/rejected": -544.4889526367188, "eval_loss": 0.02320183627307415, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -0.24606618285179138, "eval_rewards/margins": 0.08681086450815201, "eval_rewards/rejected": -0.3328770101070404, "eval_runtime": 715.8807, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 9600 }, { "epoch": 0.63, "learning_rate": 1.821433222488199e-06, "logits/chosen": -0.45180949568748474, "logits/rejected": -0.5726373791694641, "logps/chosen": -455.1207580566406, "logps/rejected": -511.7930603027344, "loss": 0.0088, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22950604557991028, "rewards/margins": 0.08441989123821259, "rewards/rejected": -0.31392592191696167, "step": 9610 }, { "epoch": 0.63, "learning_rate": 1.8159394419312112e-06, "logits/chosen": -0.9840434193611145, "logits/rejected": -0.7001659870147705, "logps/chosen": -506.9087829589844, "logps/rejected": -607.7694702148438, "loss": 0.0224, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24817872047424316, "rewards/margins": 0.14739947021007538, "rewards/rejected": -0.3955782353878021, "step": 9620 }, { "epoch": 0.63, "learning_rate": 1.8104492297691845e-06, "logits/chosen": -0.9013242721557617, "logits/rejected": -0.7251831293106079, "logps/chosen": -580.0592041015625, "logps/rejected": -634.0327758789062, "loss": 0.0408, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35353603959083557, "rewards/margins": 0.07808615267276764, "rewards/rejected": -0.431622177362442, "step": 9630 }, { "epoch": 0.63, "learning_rate": 1.8049626146417562e-06, "logits/chosen": -0.06507325172424316, "logits/rejected": -0.3152211010456085, "logps/chosen": -457.336181640625, "logps/rejected": -569.2476806640625, "loss": 0.0538, "rewards/accuracies": 0.625, "rewards/chosen": -0.30226030945777893, "rewards/margins": 0.11562438309192657, "rewards/rejected": -0.4178847372531891, "step": 9640 }, { "epoch": 0.63, "learning_rate": 1.7994796251697983e-06, "logits/chosen": -0.43689170479774475, "logits/rejected": -0.22706642746925354, "logps/chosen": -507.68975830078125, "logps/rejected": -654.2774047851562, "loss": 0.0178, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31565091013908386, "rewards/margins": 0.10702282190322876, "rewards/rejected": -0.422673761844635, "step": 9650 }, { "epoch": 0.63, "learning_rate": 1.794000289955269e-06, "logits/chosen": -0.5939318537712097, "logits/rejected": -0.7237597703933716, "logps/chosen": -592.3312377929688, "logps/rejected": -647.5352783203125, "loss": 0.0384, "rewards/accuracies": 0.625, "rewards/chosen": -0.32527098059654236, "rewards/margins": 0.08665090054273605, "rewards/rejected": -0.4119219183921814, "step": 9660 }, { "epoch": 0.63, "learning_rate": 1.7885246375810646e-06, "logits/chosen": -0.29227548837661743, "logits/rejected": -0.3264136016368866, "logps/chosen": -489.73822021484375, "logps/rejected": -550.651611328125, "loss": 0.0244, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2654643654823303, "rewards/margins": 0.06467528641223907, "rewards/rejected": -0.330139696598053, "step": 9670 }, { "epoch": 0.63, "learning_rate": 1.7830526966108713e-06, "logits/chosen": -0.5846347212791443, "logits/rejected": -0.4384569227695465, "logps/chosen": -513.6630859375, "logps/rejected": -611.033203125, "loss": 0.048, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3271901309490204, "rewards/margins": 0.12598168849945068, "rewards/rejected": -0.45317181944847107, "step": 9680 }, { "epoch": 0.63, "learning_rate": 1.7775844955890129e-06, "logits/chosen": -0.5724061727523804, "logits/rejected": -0.4638313353061676, "logps/chosen": -467.29132080078125, "logps/rejected": -557.9446411132812, "loss": 0.0208, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.255744993686676, "rewards/margins": 0.10222996771335602, "rewards/rejected": -0.35797494649887085, "step": 9690 }, { "epoch": 0.63, "learning_rate": 1.7721200630403046e-06, "logits/chosen": -0.578085720539093, "logits/rejected": -0.5706226825714111, "logps/chosen": -424.65167236328125, "logps/rejected": -520.2809448242188, "loss": 0.0355, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2288893461227417, "rewards/margins": 0.0700833797454834, "rewards/rejected": -0.2989726960659027, "step": 9700 }, { "epoch": 0.63, "eval_logits/chosen": -0.8231872320175171, "eval_logits/rejected": -0.7045159339904785, "eval_logps/chosen": -493.8836669921875, "eval_logps/rejected": -555.6817626953125, "eval_loss": 0.023173321038484573, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -0.26187869906425476, "eval_rewards/margins": 0.08219120651483536, "eval_rewards/rejected": -0.3440699279308319, "eval_runtime": 716.8548, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 9700 }, { "epoch": 0.64, "learning_rate": 1.7666594274699037e-06, "logits/chosen": -0.7449361681938171, "logits/rejected": -0.6999005079269409, "logps/chosen": -543.0670166015625, "logps/rejected": -621.6408081054688, "loss": 0.0157, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29037678241729736, "rewards/margins": 0.1167779341340065, "rewards/rejected": -0.40715470910072327, "step": 9710 }, { "epoch": 0.64, "learning_rate": 1.76120261736316e-06, "logits/chosen": -0.7081559896469116, "logits/rejected": -0.4256313741207123, "logps/chosen": -497.9676208496094, "logps/rejected": -584.1989135742188, "loss": 0.0251, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2712525725364685, "rewards/margins": 0.11337454617023468, "rewards/rejected": -0.3846271336078644, "step": 9720 }, { "epoch": 0.64, "learning_rate": 1.755749661185468e-06, "logits/chosen": -0.9210338592529297, "logits/rejected": -0.7490307092666626, "logps/chosen": -567.2615356445312, "logps/rejected": -597.1901245117188, "loss": 0.0216, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27281200885772705, "rewards/margins": 0.0887623056769371, "rewards/rejected": -0.36157432198524475, "step": 9730 }, { "epoch": 0.64, "learning_rate": 1.7503005873821183e-06, "logits/chosen": -0.7272100448608398, "logits/rejected": -0.8965389132499695, "logps/chosen": -420.45831298828125, "logps/rejected": -545.4678344726562, "loss": 0.0132, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.26002997159957886, "rewards/margins": 0.09874562919139862, "rewards/rejected": -0.35877561569213867, "step": 9740 }, { "epoch": 0.64, "learning_rate": 1.744855424378148e-06, "logits/chosen": -0.5290648341178894, "logits/rejected": -0.8868474960327148, "logps/chosen": -446.6329040527344, "logps/rejected": -573.585693359375, "loss": 0.0205, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.25800594687461853, "rewards/margins": 0.10350728034973145, "rewards/rejected": -0.36151322722435, "step": 9750 }, { "epoch": 0.64, "learning_rate": 1.7394142005781973e-06, "logits/chosen": -0.880287766456604, "logits/rejected": -0.7796365022659302, "logps/chosen": -536.3551025390625, "logps/rejected": -610.8638305664062, "loss": 0.0141, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27464404702186584, "rewards/margins": 0.0709378570318222, "rewards/rejected": -0.34558188915252686, "step": 9760 }, { "epoch": 0.64, "learning_rate": 1.7339769443663528e-06, "logits/chosen": -0.8233503103256226, "logits/rejected": -0.8335351943969727, "logps/chosen": -404.61309814453125, "logps/rejected": -491.12237548828125, "loss": 0.0279, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.26167091727256775, "rewards/margins": 0.08580608665943146, "rewards/rejected": -0.347476989030838, "step": 9770 }, { "epoch": 0.64, "learning_rate": 1.7285436841060078e-06, "logits/chosen": -0.9491190910339355, "logits/rejected": -0.8153438568115234, "logps/chosen": -536.893798828125, "logps/rejected": -571.3229370117188, "loss": 0.0238, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26076540350914, "rewards/margins": 0.0773833766579628, "rewards/rejected": -0.3381488025188446, "step": 9780 }, { "epoch": 0.64, "learning_rate": 1.7231144481397083e-06, "logits/chosen": -1.0377777814865112, "logits/rejected": -0.9175702929496765, "logps/chosen": -456.68841552734375, "logps/rejected": -497.2161560058594, "loss": 0.0138, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2362900674343109, "rewards/margins": 0.0659664124250412, "rewards/rejected": -0.3022565245628357, "step": 9790 }, { "epoch": 0.64, "learning_rate": 1.7176892647890092e-06, "logits/chosen": -0.9101846814155579, "logits/rejected": -0.5792271494865417, "logps/chosen": -493.27691650390625, "logps/rejected": -509.8060607910156, "loss": 0.0238, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25761085748672485, "rewards/margins": 0.055410634726285934, "rewards/rejected": -0.3130214810371399, "step": 9800 }, { "epoch": 0.64, "eval_logits/chosen": -0.8786482214927673, "eval_logits/rejected": -0.7576613426208496, "eval_logps/chosen": -495.97174072265625, "eval_logps/rejected": -555.2313232421875, "eval_loss": 0.023389168083667755, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -0.26396679878234863, "eval_rewards/margins": 0.07965261489152908, "eval_rewards/rejected": -0.3436194062232971, "eval_runtime": 715.6888, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.397, "step": 9800 }, { "epoch": 0.64, "learning_rate": 1.7122681623543239e-06, "logits/chosen": -1.0381107330322266, "logits/rejected": -1.0744585990905762, "logps/chosen": -501.9896545410156, "logps/rejected": -595.4471435546875, "loss": 0.0192, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2550010085105896, "rewards/margins": 0.1046714335680008, "rewards/rejected": -0.359672486782074, "step": 9810 }, { "epoch": 0.64, "learning_rate": 1.7068511691147788e-06, "logits/chosen": -0.8338741064071655, "logits/rejected": -0.777117908000946, "logps/chosen": -422.57427978515625, "logps/rejected": -499.3260803222656, "loss": 0.0112, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2284432202577591, "rewards/margins": 0.06838220357894897, "rewards/rejected": -0.29682543873786926, "step": 9820 }, { "epoch": 0.64, "learning_rate": 1.7014383133280636e-06, "logits/chosen": -1.031035304069519, "logits/rejected": -0.6725751161575317, "logps/chosen": -528.4259033203125, "logps/rejected": -549.6954345703125, "loss": 0.0273, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2797914743423462, "rewards/margins": 0.07466354966163635, "rewards/rejected": -0.35445499420166016, "step": 9830 }, { "epoch": 0.64, "learning_rate": 1.696029623230286e-06, "logits/chosen": -0.9840434789657593, "logits/rejected": -1.0721688270568848, "logps/chosen": -507.8865661621094, "logps/rejected": -622.5177001953125, "loss": 0.0175, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.251964271068573, "rewards/margins": 0.09585200250148773, "rewards/rejected": -0.34781625866889954, "step": 9840 }, { "epoch": 0.64, "learning_rate": 1.6906251270358229e-06, "logits/chosen": -1.107911467552185, "logits/rejected": -1.0096760988235474, "logps/chosen": -534.1586303710938, "logps/rejected": -538.4244995117188, "loss": 0.0154, "rewards/accuracies": 0.75, "rewards/chosen": -0.2713043689727783, "rewards/margins": 0.054007671773433685, "rewards/rejected": -0.3253120481967926, "step": 9850 }, { "epoch": 0.65, "learning_rate": 1.685224852937174e-06, "logits/chosen": -0.8664234280586243, "logits/rejected": -0.5502254366874695, "logps/chosen": -447.83697509765625, "logps/rejected": -654.33740234375, "loss": 0.0351, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24947214126586914, "rewards/margins": 0.16464152932167053, "rewards/rejected": -0.4141136109828949, "step": 9860 }, { "epoch": 0.65, "learning_rate": 1.6798288291048136e-06, "logits/chosen": -0.8167924880981445, "logits/rejected": -0.7502031326293945, "logps/chosen": -520.0014038085938, "logps/rejected": -609.8368530273438, "loss": 0.0242, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.29675978422164917, "rewards/margins": 0.11854046583175659, "rewards/rejected": -0.41530027985572815, "step": 9870 }, { "epoch": 0.65, "learning_rate": 1.6744370836870466e-06, "logits/chosen": -1.5024065971374512, "logits/rejected": -1.071380853652954, "logps/chosen": -603.2869262695312, "logps/rejected": -634.3928833007812, "loss": 0.0164, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.26859331130981445, "rewards/margins": 0.11462382972240448, "rewards/rejected": -0.3832171559333801, "step": 9880 }, { "epoch": 0.65, "learning_rate": 1.6690496448098576e-06, "logits/chosen": -0.8916500210762024, "logits/rejected": -0.7170946598052979, "logps/chosen": -452.3939514160156, "logps/rejected": -501.5279846191406, "loss": 0.0201, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22335776686668396, "rewards/margins": 0.06999315321445465, "rewards/rejected": -0.2933509051799774, "step": 9890 }, { "epoch": 0.65, "learning_rate": 1.6636665405767666e-06, "logits/chosen": -0.7821698784828186, "logits/rejected": -0.6962658762931824, "logps/chosen": -459.97174072265625, "logps/rejected": -508.4629821777344, "loss": 0.0315, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22220078110694885, "rewards/margins": 0.06738129258155823, "rewards/rejected": -0.2895820736885071, "step": 9900 }, { "epoch": 0.65, "eval_logits/chosen": -0.9754452705383301, "eval_logits/rejected": -0.8463736772537231, "eval_logps/chosen": -472.1986389160156, "eval_logps/rejected": -543.9803466796875, "eval_loss": 0.023106198757886887, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -0.2401936948299408, "eval_rewards/margins": 0.09217477589845657, "eval_rewards/rejected": -0.3323684632778168, "eval_runtime": 716.734, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 9900 }, { "epoch": 0.65, "learning_rate": 1.6582877990686827e-06, "logits/chosen": -0.9380279779434204, "logits/rejected": -1.0737155675888062, "logps/chosen": -323.838623046875, "logps/rejected": -452.19482421875, "loss": 0.0271, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20254425704479218, "rewards/margins": 0.10602487623691559, "rewards/rejected": -0.30856913328170776, "step": 9910 }, { "epoch": 0.65, "learning_rate": 1.6529134483437562e-06, "logits/chosen": -0.9110596776008606, "logits/rejected": -1.0504239797592163, "logps/chosen": -446.7535095214844, "logps/rejected": -509.842529296875, "loss": 0.0363, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2411792278289795, "rewards/margins": 0.10254274308681488, "rewards/rejected": -0.34372198581695557, "step": 9920 }, { "epoch": 0.65, "learning_rate": 1.647543516437233e-06, "logits/chosen": -1.1894365549087524, "logits/rejected": -1.1917979717254639, "logps/chosen": -417.04693603515625, "logps/rejected": -522.7171020507812, "loss": 0.0366, "rewards/accuracies": 0.625, "rewards/chosen": -0.21800872683525085, "rewards/margins": 0.08498533070087433, "rewards/rejected": -0.3029940724372864, "step": 9930 }, { "epoch": 0.65, "learning_rate": 1.6421780313613088e-06, "logits/chosen": -1.0490660667419434, "logits/rejected": -0.6548418402671814, "logps/chosen": -424.30499267578125, "logps/rejected": -493.7715759277344, "loss": 0.0391, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21956340968608856, "rewards/margins": 0.10265642404556274, "rewards/rejected": -0.3222197890281677, "step": 9940 }, { "epoch": 0.65, "learning_rate": 1.6368170211049816e-06, "logits/chosen": -0.7849863767623901, "logits/rejected": -0.6953365206718445, "logps/chosen": -528.9827880859375, "logps/rejected": -562.0130004882812, "loss": 0.0175, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24518266320228577, "rewards/margins": 0.09314166009426117, "rewards/rejected": -0.33832424879074097, "step": 9950 }, { "epoch": 0.65, "learning_rate": 1.6314605136339074e-06, "logits/chosen": -1.04189932346344, "logits/rejected": -0.8757699131965637, "logps/chosen": -414.812744140625, "logps/rejected": -471.56463623046875, "loss": 0.0445, "rewards/accuracies": 0.625, "rewards/chosen": -0.21922604739665985, "rewards/margins": 0.07465136051177979, "rewards/rejected": -0.29387742280960083, "step": 9960 }, { "epoch": 0.65, "learning_rate": 1.6261085368902526e-06, "logits/chosen": -1.3741345405578613, "logits/rejected": -1.1826173067092896, "logps/chosen": -461.17742919921875, "logps/rejected": -482.8580017089844, "loss": 0.0224, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19623661041259766, "rewards/margins": 0.06531001627445221, "rewards/rejected": -0.26154661178588867, "step": 9970 }, { "epoch": 0.65, "learning_rate": 1.6207611187925503e-06, "logits/chosen": -1.0865633487701416, "logits/rejected": -1.0380734205245972, "logps/chosen": -436.879150390625, "logps/rejected": -570.8054809570312, "loss": 0.0328, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22805233299732208, "rewards/margins": 0.08760843425989151, "rewards/rejected": -0.3156607747077942, "step": 9980 }, { "epoch": 0.65, "learning_rate": 1.6154182872355512e-06, "logits/chosen": -0.9029491543769836, "logits/rejected": -1.0374914407730103, "logps/chosen": -408.09735107421875, "logps/rejected": -493.378662109375, "loss": 0.0452, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2442799061536789, "rewards/margins": 0.07086168974637985, "rewards/rejected": -0.31514161825180054, "step": 9990 }, { "epoch": 0.65, "learning_rate": 1.610080070090084e-06, "logits/chosen": -1.034294605255127, "logits/rejected": -0.896858811378479, "logps/chosen": -459.4972229003906, "logps/rejected": -564.35546875, "loss": 0.0267, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27857595682144165, "rewards/margins": 0.11388659477233887, "rewards/rejected": -0.3924625813961029, "step": 10000 }, { "epoch": 0.65, "eval_logits/chosen": -1.0083671808242798, "eval_logits/rejected": -0.8768463730812073, "eval_logps/chosen": -465.3472595214844, "eval_logps/rejected": -539.839599609375, "eval_loss": 0.02333732694387436, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -0.23334230482578278, "eval_rewards/margins": 0.0948854312300682, "eval_rewards/rejected": -0.3282277286052704, "eval_runtime": 712.8235, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 10000 }, { "epoch": 0.65, "learning_rate": 1.6047464952029034e-06, "logits/chosen": -1.298064947128296, "logits/rejected": -1.2320672273635864, "logps/chosen": -460.6311950683594, "logps/rejected": -573.823974609375, "loss": 0.0105, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20634377002716064, "rewards/margins": 0.10496672242879868, "rewards/rejected": -0.3113105595111847, "step": 10010 }, { "epoch": 0.66, "learning_rate": 1.5994175903965486e-06, "logits/chosen": -0.8933245539665222, "logits/rejected": -0.6711474061012268, "logps/chosen": -494.6673889160156, "logps/rejected": -592.6099243164062, "loss": 0.0337, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24433700740337372, "rewards/margins": 0.09618799388408661, "rewards/rejected": -0.3405250608921051, "step": 10020 }, { "epoch": 0.66, "learning_rate": 1.5940933834691977e-06, "logits/chosen": -1.3617267608642578, "logits/rejected": -0.927338719367981, "logps/chosen": -534.2659912109375, "logps/rejected": -500.13623046875, "loss": 0.0239, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22900009155273438, "rewards/margins": 0.07376754283905029, "rewards/rejected": -0.3027676045894623, "step": 10030 }, { "epoch": 0.66, "learning_rate": 1.588773902194522e-06, "logits/chosen": -0.9182957410812378, "logits/rejected": -0.6212406754493713, "logps/chosen": -466.16357421875, "logps/rejected": -593.2935791015625, "loss": 0.0126, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26208385825157166, "rewards/margins": 0.1313703954219818, "rewards/rejected": -0.39345425367355347, "step": 10040 }, { "epoch": 0.66, "learning_rate": 1.583459174321541e-06, "logits/chosen": -0.7339197993278503, "logits/rejected": -0.7337380647659302, "logps/chosen": -478.57989501953125, "logps/rejected": -558.4588623046875, "loss": 0.0309, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.268962562084198, "rewards/margins": 0.10697309672832489, "rewards/rejected": -0.3759356439113617, "step": 10050 }, { "epoch": 0.66, "learning_rate": 1.5781492275744797e-06, "logits/chosen": -1.3656100034713745, "logits/rejected": -1.1886814832687378, "logps/chosen": -524.6400756835938, "logps/rejected": -612.5868530273438, "loss": 0.0191, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23039564490318298, "rewards/margins": 0.11416473239660263, "rewards/rejected": -0.3445603847503662, "step": 10060 }, { "epoch": 0.66, "learning_rate": 1.5728440896526215e-06, "logits/chosen": -0.8405293226242065, "logits/rejected": -0.757517397403717, "logps/chosen": -471.328369140625, "logps/rejected": -519.2421264648438, "loss": 0.0122, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19792643189430237, "rewards/margins": 0.09411662071943283, "rewards/rejected": -0.2920430898666382, "step": 10070 }, { "epoch": 0.66, "learning_rate": 1.5675437882301633e-06, "logits/chosen": -1.0592447519302368, "logits/rejected": -1.000099778175354, "logps/chosen": -419.27032470703125, "logps/rejected": -411.03851318359375, "loss": 0.0383, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20022614300251007, "rewards/margins": 0.026563968509435654, "rewards/rejected": -0.22679011523723602, "step": 10080 }, { "epoch": 0.66, "learning_rate": 1.5622483509560748e-06, "logits/chosen": -0.9169955253601074, "logits/rejected": -1.0003207921981812, "logps/chosen": -345.77484130859375, "logps/rejected": -475.9300231933594, "loss": 0.034, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17504572868347168, "rewards/margins": 0.09997285902500153, "rewards/rejected": -0.2750186026096344, "step": 10090 }, { "epoch": 0.66, "learning_rate": 1.5569578054539506e-06, "logits/chosen": -1.129949927330017, "logits/rejected": -0.8046758770942688, "logps/chosen": -460.017578125, "logps/rejected": -514.3373413085938, "loss": 0.018, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18082045018672943, "rewards/margins": 0.1381828933954239, "rewards/rejected": -0.3190033435821533, "step": 10100 }, { "epoch": 0.66, "eval_logits/chosen": -1.0826523303985596, "eval_logits/rejected": -0.950714111328125, "eval_logps/chosen": -419.0773620605469, "eval_logps/rejected": -481.2974548339844, "eval_loss": 0.02352077141404152, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.18707245588302612, "eval_rewards/margins": 0.08261309564113617, "eval_rewards/rejected": -0.2696855366230011, "eval_runtime": 716.063, "eval_samples_per_second": 2.793, "eval_steps_per_second": 1.397, "step": 10100 }, { "epoch": 0.66, "learning_rate": 1.551672179321867e-06, "logits/chosen": -1.0766394138336182, "logits/rejected": -1.0798542499542236, "logps/chosen": -398.76123046875, "logps/rejected": -453.6695861816406, "loss": 0.0158, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.18290428817272186, "rewards/margins": 0.07607951015233994, "rewards/rejected": -0.2589837908744812, "step": 10110 }, { "epoch": 0.66, "learning_rate": 1.5463915001322398e-06, "logits/chosen": -1.0942211151123047, "logits/rejected": -0.9170432090759277, "logps/chosen": -438.19091796875, "logps/rejected": -520.5393676757812, "loss": 0.0396, "rewards/accuracies": 0.625, "rewards/chosen": -0.19253458082675934, "rewards/margins": 0.0967160314321518, "rewards/rejected": -0.28925061225891113, "step": 10120 }, { "epoch": 0.66, "learning_rate": 1.5411157954316784e-06, "logits/chosen": -1.2777159214019775, "logits/rejected": -0.9283512830734253, "logps/chosen": -391.48992919921875, "logps/rejected": -445.36431884765625, "loss": 0.0149, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18961237370967865, "rewards/margins": 0.06756417453289032, "rewards/rejected": -0.25717657804489136, "step": 10130 }, { "epoch": 0.66, "learning_rate": 1.535845092740843e-06, "logits/chosen": -1.1484943628311157, "logits/rejected": -1.1189451217651367, "logps/chosen": -411.4507751464844, "logps/rejected": -473.96868896484375, "loss": 0.0278, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.17412957549095154, "rewards/margins": 0.05012207478284836, "rewards/rejected": -0.2242516577243805, "step": 10140 }, { "epoch": 0.66, "learning_rate": 1.5305794195543005e-06, "logits/chosen": -1.237973928451538, "logits/rejected": -1.2358169555664062, "logps/chosen": -407.0169372558594, "logps/rejected": -479.32513427734375, "loss": 0.0295, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1998378336429596, "rewards/margins": 0.08923965692520142, "rewards/rejected": -0.2890775203704834, "step": 10150 }, { "epoch": 0.66, "learning_rate": 1.5253188033403816e-06, "logits/chosen": -1.3141751289367676, "logits/rejected": -1.249427080154419, "logps/chosen": -349.1514892578125, "logps/rejected": -402.80487060546875, "loss": 0.026, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17841657996177673, "rewards/margins": 0.0369950532913208, "rewards/rejected": -0.21541163325309753, "step": 10160 }, { "epoch": 0.67, "learning_rate": 1.520063271541037e-06, "logits/chosen": -1.2129685878753662, "logits/rejected": -1.118257761001587, "logps/chosen": -377.628662109375, "logps/rejected": -509.57470703125, "loss": 0.0145, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19912391901016235, "rewards/margins": 0.1432490050792694, "rewards/rejected": -0.34237295389175415, "step": 10170 }, { "epoch": 0.67, "learning_rate": 1.5148128515716954e-06, "logits/chosen": -1.4041849374771118, "logits/rejected": -0.9499991536140442, "logps/chosen": -441.6243591308594, "logps/rejected": -473.60400390625, "loss": 0.0265, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17509515583515167, "rewards/margins": 0.10435383021831512, "rewards/rejected": -0.2794489860534668, "step": 10180 }, { "epoch": 0.67, "learning_rate": 1.5095675708211197e-06, "logits/chosen": -1.2573442459106445, "logits/rejected": -1.1832830905914307, "logps/chosen": -429.87518310546875, "logps/rejected": -490.94189453125, "loss": 0.0502, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.238714337348938, "rewards/margins": 0.037723056972026825, "rewards/rejected": -0.2764374315738678, "step": 10190 }, { "epoch": 0.67, "learning_rate": 1.504327456651263e-06, "logits/chosen": -1.0339068174362183, "logits/rejected": -0.9260076284408569, "logps/chosen": -493.24200439453125, "logps/rejected": -547.1416625976562, "loss": 0.0183, "rewards/accuracies": 0.75, "rewards/chosen": -0.23183909058570862, "rewards/margins": 0.08086429536342621, "rewards/rejected": -0.31270334124565125, "step": 10200 }, { "epoch": 0.67, "eval_logits/chosen": -1.1422438621520996, "eval_logits/rejected": -1.0028353929519653, "eval_logps/chosen": -446.30010986328125, "eval_logps/rejected": -522.2762451171875, "eval_loss": 0.023282397538423538, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -0.2142951935529709, "eval_rewards/margins": 0.09636922925710678, "eval_rewards/rejected": -0.31066441535949707, "eval_runtime": 715.0266, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.399, "step": 10200 }, { "epoch": 0.67, "learning_rate": 1.4990925363971284e-06, "logits/chosen": -1.2530837059020996, "logits/rejected": -0.7607366442680359, "logps/chosen": -526.9996337890625, "logps/rejected": -627.1127319335938, "loss": 0.0367, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23707647621631622, "rewards/margins": 0.1686021387577057, "rewards/rejected": -0.4056786000728607, "step": 10210 }, { "epoch": 0.67, "learning_rate": 1.4938628373666236e-06, "logits/chosen": -1.1070585250854492, "logits/rejected": -1.0464718341827393, "logps/chosen": -374.60211181640625, "logps/rejected": -462.85137939453125, "loss": 0.0329, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20202283561229706, "rewards/margins": 0.08637657761573792, "rewards/rejected": -0.2883993983268738, "step": 10220 }, { "epoch": 0.67, "learning_rate": 1.4886383868404203e-06, "logits/chosen": -0.9251636266708374, "logits/rejected": -0.9677795171737671, "logps/chosen": -331.31475830078125, "logps/rejected": -416.2554626464844, "loss": 0.016, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17009297013282776, "rewards/margins": 0.09497067332267761, "rewards/rejected": -0.26506364345550537, "step": 10230 }, { "epoch": 0.67, "learning_rate": 1.483419212071813e-06, "logits/chosen": -0.8171922564506531, "logits/rejected": -0.5974997282028198, "logps/chosen": -397.45245361328125, "logps/rejected": -464.6980895996094, "loss": 0.0295, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20898830890655518, "rewards/margins": 0.07442699372768402, "rewards/rejected": -0.2834152579307556, "step": 10240 }, { "epoch": 0.67, "learning_rate": 1.478205340286573e-06, "logits/chosen": -1.0605578422546387, "logits/rejected": -1.1492241621017456, "logps/chosen": -460.0035705566406, "logps/rejected": -510.2579650878906, "loss": 0.044, "rewards/accuracies": 0.5, "rewards/chosen": -0.2563946545124054, "rewards/margins": 0.06825922429561615, "rewards/rejected": -0.32465386390686035, "step": 10250 }, { "epoch": 0.67, "learning_rate": 1.4729967986828104e-06, "logits/chosen": -1.1528981924057007, "logits/rejected": -1.0834747552871704, "logps/chosen": -509.494873046875, "logps/rejected": -551.4874877929688, "loss": 0.0287, "rewards/accuracies": 0.75, "rewards/chosen": -0.18751117587089539, "rewards/margins": 0.08996061235666275, "rewards/rejected": -0.27747178077697754, "step": 10260 }, { "epoch": 0.67, "learning_rate": 1.4677936144308286e-06, "logits/chosen": -1.2274298667907715, "logits/rejected": -0.9683295488357544, "logps/chosen": -399.6896057128906, "logps/rejected": -491.942626953125, "loss": 0.0291, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17544488608837128, "rewards/margins": 0.1203160509467125, "rewards/rejected": -0.2957609295845032, "step": 10270 }, { "epoch": 0.67, "learning_rate": 1.4625958146729864e-06, "logits/chosen": -1.3483864068984985, "logits/rejected": -1.0505443811416626, "logps/chosen": -406.66851806640625, "logps/rejected": -478.885498046875, "loss": 0.021, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18640469014644623, "rewards/margins": 0.08816500753164291, "rewards/rejected": -0.27456969022750854, "step": 10280 }, { "epoch": 0.67, "learning_rate": 1.4574034265235523e-06, "logits/chosen": -1.1080683469772339, "logits/rejected": -0.8625591397285461, "logps/chosen": -435.21197509765625, "logps/rejected": -440.0894470214844, "loss": 0.0322, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17938342690467834, "rewards/margins": 0.10264036804437637, "rewards/rejected": -0.2820238173007965, "step": 10290 }, { "epoch": 0.67, "learning_rate": 1.452216477068568e-06, "logits/chosen": -0.9548495411872864, "logits/rejected": -0.6878235936164856, "logps/chosen": -380.4754943847656, "logps/rejected": -393.02935791015625, "loss": 0.0162, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15043053030967712, "rewards/margins": 0.10546863079071045, "rewards/rejected": -0.2558991312980652, "step": 10300 }, { "epoch": 0.67, "eval_logits/chosen": -1.128338098526001, "eval_logits/rejected": -0.9919458031654358, "eval_logps/chosen": -428.4236755371094, "eval_logps/rejected": -494.7216796875, "eval_loss": 0.02286919392645359, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.19641870260238647, "eval_rewards/margins": 0.08669107407331467, "eval_rewards/rejected": -0.28310978412628174, "eval_runtime": 718.9393, "eval_samples_per_second": 2.782, "eval_steps_per_second": 1.391, "step": 10300 }, { "epoch": 0.67, "learning_rate": 1.4470349933657004e-06, "logits/chosen": -1.6205631494522095, "logits/rejected": -1.214104413986206, "logps/chosen": -387.6978759765625, "logps/rejected": -454.418212890625, "loss": 0.0228, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16696825623512268, "rewards/margins": 0.09069926291704178, "rewards/rejected": -0.25766751170158386, "step": 10310 }, { "epoch": 0.68, "learning_rate": 1.4418590024441096e-06, "logits/chosen": -1.399248480796814, "logits/rejected": -0.9027661085128784, "logps/chosen": -448.176513671875, "logps/rejected": -474.9004821777344, "loss": 0.019, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19471624493598938, "rewards/margins": 0.09320934861898422, "rewards/rejected": -0.2879256010055542, "step": 10320 }, { "epoch": 0.68, "learning_rate": 1.436688531304297e-06, "logits/chosen": -1.2631736993789673, "logits/rejected": -1.039074420928955, "logps/chosen": -402.1783447265625, "logps/rejected": -488.706298828125, "loss": 0.03, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18385080993175507, "rewards/margins": 0.09201714396476746, "rewards/rejected": -0.2758679687976837, "step": 10330 }, { "epoch": 0.68, "learning_rate": 1.431523606917974e-06, "logits/chosen": -1.1699297428131104, "logits/rejected": -1.1202093362808228, "logps/chosen": -446.58905029296875, "logps/rejected": -551.8796997070312, "loss": 0.0192, "rewards/accuracies": 0.625, "rewards/chosen": -0.24148976802825928, "rewards/margins": 0.09867588430643082, "rewards/rejected": -0.3401656746864319, "step": 10340 }, { "epoch": 0.68, "learning_rate": 1.4263642562279162e-06, "logits/chosen": -0.9263246655464172, "logits/rejected": -0.8033633232116699, "logps/chosen": -488.059814453125, "logps/rejected": -607.4882202148438, "loss": 0.0095, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23857250809669495, "rewards/margins": 0.11093832552433014, "rewards/rejected": -0.3495108485221863, "step": 10350 }, { "epoch": 0.68, "learning_rate": 1.4212105061478257e-06, "logits/chosen": -1.0120891332626343, "logits/rejected": -0.8031824827194214, "logps/chosen": -503.4956970214844, "logps/rejected": -597.7406005859375, "loss": 0.0216, "rewards/accuracies": 0.75, "rewards/chosen": -0.2736051380634308, "rewards/margins": 0.08642153441905975, "rewards/rejected": -0.36002665758132935, "step": 10360 }, { "epoch": 0.68, "learning_rate": 1.4160623835621848e-06, "logits/chosen": -1.451712965965271, "logits/rejected": -1.1101382970809937, "logps/chosen": -435.37451171875, "logps/rejected": -524.0077514648438, "loss": 0.0153, "rewards/accuracies": 0.75, "rewards/chosen": -0.19806334376335144, "rewards/margins": 0.10011138021945953, "rewards/rejected": -0.2981747090816498, "step": 10370 }, { "epoch": 0.68, "learning_rate": 1.4109199153261249e-06, "logits/chosen": -1.1535043716430664, "logits/rejected": -0.9691632986068726, "logps/chosen": -500.82427978515625, "logps/rejected": -577.4119262695312, "loss": 0.0139, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22482964396476746, "rewards/margins": 0.10690130293369293, "rewards/rejected": -0.3317309617996216, "step": 10380 }, { "epoch": 0.68, "learning_rate": 1.405783128265278e-06, "logits/chosen": -1.1918237209320068, "logits/rejected": -1.096649169921875, "logps/chosen": -469.00018310546875, "logps/rejected": -531.7686767578125, "loss": 0.0198, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.26486673951148987, "rewards/margins": 0.06603468954563141, "rewards/rejected": -0.3309014141559601, "step": 10390 }, { "epoch": 0.68, "learning_rate": 1.4006520491756427e-06, "logits/chosen": -1.0796409845352173, "logits/rejected": -0.7628124952316284, "logps/chosen": -405.5753479003906, "logps/rejected": -439.8372497558594, "loss": 0.0134, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20935113728046417, "rewards/margins": 0.0989028587937355, "rewards/rejected": -0.3082539737224579, "step": 10400 }, { "epoch": 0.68, "eval_logits/chosen": -1.1325947046279907, "eval_logits/rejected": -0.9949163794517517, "eval_logps/chosen": -439.49896240234375, "eval_logps/rejected": -510.0121765136719, "eval_loss": 0.023098591715097427, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -0.20749403536319733, "eval_rewards/margins": 0.09090621769428253, "eval_rewards/rejected": -0.29840028285980225, "eval_runtime": 715.7631, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 10400 }, { "epoch": 0.68, "learning_rate": 1.39552670482344e-06, "logits/chosen": -1.0814664363861084, "logits/rejected": -1.198115587234497, "logps/chosen": -365.227294921875, "logps/rejected": -437.4296875, "loss": 0.0195, "rewards/accuracies": 0.625, "rewards/chosen": -0.19062772393226624, "rewards/margins": 0.07231093943119049, "rewards/rejected": -0.26293864846229553, "step": 10410 }, { "epoch": 0.68, "learning_rate": 1.3904071219449776e-06, "logits/chosen": -1.0268791913986206, "logits/rejected": -0.7396032214164734, "logps/chosen": -372.50347900390625, "logps/rejected": -364.7556457519531, "loss": 0.0228, "rewards/accuracies": 0.625, "rewards/chosen": -0.1726018637418747, "rewards/margins": 0.07691226154565811, "rewards/rejected": -0.2495141327381134, "step": 10420 }, { "epoch": 0.68, "learning_rate": 1.3852933272465068e-06, "logits/chosen": -1.1927297115325928, "logits/rejected": -1.0620100498199463, "logps/chosen": -352.6067199707031, "logps/rejected": -384.55133056640625, "loss": 0.0258, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1147465854883194, "rewards/margins": 0.06716850399971008, "rewards/rejected": -0.1819150745868683, "step": 10430 }, { "epoch": 0.68, "learning_rate": 1.3801853474040873e-06, "logits/chosen": -1.0750788450241089, "logits/rejected": -1.0022704601287842, "logps/chosen": -421.36053466796875, "logps/rejected": -498.98583984375, "loss": 0.0194, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18283934891223907, "rewards/margins": 0.09408555179834366, "rewards/rejected": -0.27692490816116333, "step": 10440 }, { "epoch": 0.68, "learning_rate": 1.3750832090634417e-06, "logits/chosen": -1.2315846681594849, "logits/rejected": -1.0047106742858887, "logps/chosen": -352.58447265625, "logps/rejected": -412.78741455078125, "loss": 0.0132, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16641180217266083, "rewards/margins": 0.0748542845249176, "rewards/rejected": -0.24126610159873962, "step": 10450 }, { "epoch": 0.68, "learning_rate": 1.3699869388398245e-06, "logits/chosen": -1.0742437839508057, "logits/rejected": -0.9582866430282593, "logps/chosen": -400.4927673339844, "logps/rejected": -462.943115234375, "loss": 0.0182, "rewards/accuracies": 0.625, "rewards/chosen": -0.18747146427631378, "rewards/margins": 0.08090374618768692, "rewards/rejected": -0.2683752179145813, "step": 10460 }, { "epoch": 0.69, "learning_rate": 1.3648965633178772e-06, "logits/chosen": -1.1939609050750732, "logits/rejected": -1.0864191055297852, "logps/chosen": -382.8097839355469, "logps/rejected": -484.5547790527344, "loss": 0.0295, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18207761645317078, "rewards/margins": 0.09033346176147461, "rewards/rejected": -0.2724111080169678, "step": 10470 }, { "epoch": 0.69, "learning_rate": 1.3598121090514938e-06, "logits/chosen": -0.9973956942558289, "logits/rejected": -1.0758070945739746, "logps/chosen": -341.7118225097656, "logps/rejected": -400.3506774902344, "loss": 0.0229, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15870392322540283, "rewards/margins": 0.08276219666004181, "rewards/rejected": -0.24146613478660583, "step": 10480 }, { "epoch": 0.69, "learning_rate": 1.3547336025636753e-06, "logits/chosen": -1.0464165210723877, "logits/rejected": -0.7893644571304321, "logps/chosen": -479.22674560546875, "logps/rejected": -506.6607360839844, "loss": 0.0191, "rewards/accuracies": 0.625, "rewards/chosen": -0.20393025875091553, "rewards/margins": 0.0689629539847374, "rewards/rejected": -0.2728932201862335, "step": 10490 }, { "epoch": 0.69, "learning_rate": 1.3496610703464022e-06, "logits/chosen": -1.2571821212768555, "logits/rejected": -0.9113852381706238, "logps/chosen": -441.39324951171875, "logps/rejected": -475.70562744140625, "loss": 0.0195, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.21217873692512512, "rewards/margins": 0.07983369380235672, "rewards/rejected": -0.29201242327690125, "step": 10500 }, { "epoch": 0.69, "eval_logits/chosen": -1.100478172302246, "eval_logits/rejected": -0.965235710144043, "eval_logps/chosen": -434.7630920410156, "eval_logps/rejected": -502.5017395019531, "eval_loss": 0.022978268563747406, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.20275816321372986, "eval_rewards/margins": 0.08813170343637466, "eval_rewards/rejected": -0.2908898591995239, "eval_runtime": 717.0244, "eval_samples_per_second": 2.789, "eval_steps_per_second": 1.395, "step": 10500 }, { "epoch": 0.69, "learning_rate": 1.3445945388604848e-06, "logits/chosen": -1.1791807413101196, "logits/rejected": -0.7003865838050842, "logps/chosen": -478.32086181640625, "logps/rejected": -550.1403198242188, "loss": 0.0267, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24009737372398376, "rewards/margins": 0.11498017609119415, "rewards/rejected": -0.3550775647163391, "step": 10510 }, { "epoch": 0.69, "learning_rate": 1.3395340345354358e-06, "logits/chosen": -1.157021164894104, "logits/rejected": -1.2307841777801514, "logps/chosen": -422.7535705566406, "logps/rejected": -533.4961547851562, "loss": 0.0209, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1990557610988617, "rewards/margins": 0.09349498152732849, "rewards/rejected": -0.2925507426261902, "step": 10520 }, { "epoch": 0.69, "learning_rate": 1.334479583769322e-06, "logits/chosen": -1.3072254657745361, "logits/rejected": -1.2678983211517334, "logps/chosen": -467.9456481933594, "logps/rejected": -468.49468994140625, "loss": 0.0278, "rewards/accuracies": 0.625, "rewards/chosen": -0.21310687065124512, "rewards/margins": 0.05051872879266739, "rewards/rejected": -0.2636255919933319, "step": 10530 }, { "epoch": 0.69, "learning_rate": 1.3294312129286366e-06, "logits/chosen": -1.0449919700622559, "logits/rejected": -0.9436649084091187, "logps/chosen": -443.8401794433594, "logps/rejected": -491.58856201171875, "loss": 0.0153, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17711159586906433, "rewards/margins": 0.061419256031513214, "rewards/rejected": -0.23853084444999695, "step": 10540 }, { "epoch": 0.69, "learning_rate": 1.324388948348153e-06, "logits/chosen": -1.4212422370910645, "logits/rejected": -1.0505855083465576, "logps/chosen": -456.15283203125, "logps/rejected": -464.35430908203125, "loss": 0.0237, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17091485857963562, "rewards/margins": 0.08956794440746307, "rewards/rejected": -0.2604827880859375, "step": 10550 }, { "epoch": 0.69, "learning_rate": 1.319352816330796e-06, "logits/chosen": -1.4374910593032837, "logits/rejected": -1.001198172569275, "logps/chosen": -474.38519287109375, "logps/rejected": -462.852294921875, "loss": 0.0183, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1911880075931549, "rewards/margins": 0.0949224978685379, "rewards/rejected": -0.286110520362854, "step": 10560 }, { "epoch": 0.69, "learning_rate": 1.314322843147494e-06, "logits/chosen": -1.0267784595489502, "logits/rejected": -1.117527723312378, "logps/chosen": -412.05560302734375, "logps/rejected": -543.71826171875, "loss": 0.0195, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24172978103160858, "rewards/margins": 0.07679490745067596, "rewards/rejected": -0.31852468848228455, "step": 10570 }, { "epoch": 0.69, "learning_rate": 1.3092990550370526e-06, "logits/chosen": -1.1435863971710205, "logits/rejected": -1.051611065864563, "logps/chosen": -560.325439453125, "logps/rejected": -564.2654418945312, "loss": 0.0137, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21456709504127502, "rewards/margins": 0.08929123729467392, "rewards/rejected": -0.30385833978652954, "step": 10580 }, { "epoch": 0.69, "learning_rate": 1.3042814782060131e-06, "logits/chosen": -0.792064368724823, "logits/rejected": -0.7335621118545532, "logps/chosen": -355.91937255859375, "logps/rejected": -444.6099548339844, "loss": 0.0125, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17441654205322266, "rewards/margins": 0.11260686069726944, "rewards/rejected": -0.2870233952999115, "step": 10590 }, { "epoch": 0.69, "learning_rate": 1.2992701388285112e-06, "logits/chosen": -0.917579174041748, "logits/rejected": -0.7966980934143066, "logps/chosen": -460.79205322265625, "logps/rejected": -501.6399841308594, "loss": 0.0151, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19144295156002045, "rewards/margins": 0.08531997352838516, "rewards/rejected": -0.2767629325389862, "step": 10600 }, { "epoch": 0.69, "eval_logits/chosen": -1.0145879983901978, "eval_logits/rejected": -0.8827481865882874, "eval_logps/chosen": -459.4988098144531, "eval_logps/rejected": -531.7596435546875, "eval_loss": 0.023178620263934135, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -0.22749388217926025, "eval_rewards/margins": 0.09265387803316116, "eval_rewards/rejected": -0.32014772295951843, "eval_runtime": 719.3624, "eval_samples_per_second": 2.78, "eval_steps_per_second": 1.39, "step": 10600 }, { "epoch": 0.69, "learning_rate": 1.29426506304615e-06, "logits/chosen": -0.9469764828681946, "logits/rejected": -0.9384641647338867, "logps/chosen": -479.50262451171875, "logps/rejected": -508.95806884765625, "loss": 0.0385, "rewards/accuracies": 0.625, "rewards/chosen": -0.2567792236804962, "rewards/margins": 0.04893350973725319, "rewards/rejected": -0.3057126998901367, "step": 10610 }, { "epoch": 0.69, "learning_rate": 1.289266276967855e-06, "logits/chosen": -1.2782361507415771, "logits/rejected": -1.054734468460083, "logps/chosen": -542.8716430664062, "logps/rejected": -523.0281982421875, "loss": 0.0178, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20428982377052307, "rewards/margins": 0.07031477987766266, "rewards/rejected": -0.27460458874702454, "step": 10620 }, { "epoch": 0.7, "learning_rate": 1.284273806669745e-06, "logits/chosen": -1.0302644968032837, "logits/rejected": -0.9786216020584106, "logps/chosen": -493.48431396484375, "logps/rejected": -609.5590209960938, "loss": 0.0183, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2541593313217163, "rewards/margins": 0.09853410720825195, "rewards/rejected": -0.35269343852996826, "step": 10630 }, { "epoch": 0.7, "learning_rate": 1.2792876781949884e-06, "logits/chosen": -0.6649162173271179, "logits/rejected": -0.5674262046813965, "logps/chosen": -400.558837890625, "logps/rejected": -481.7037658691406, "loss": 0.0275, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19448626041412354, "rewards/margins": 0.10046249628067017, "rewards/rejected": -0.2949487268924713, "step": 10640 }, { "epoch": 0.7, "learning_rate": 1.274307917553676e-06, "logits/chosen": -0.978837788105011, "logits/rejected": -0.7985986471176147, "logps/chosen": -435.20654296875, "logps/rejected": -583.4346923828125, "loss": 0.0275, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23954780399799347, "rewards/margins": 0.12362854182720184, "rewards/rejected": -0.3631763160228729, "step": 10650 }, { "epoch": 0.7, "learning_rate": 1.2693345507226767e-06, "logits/chosen": -0.9895383715629578, "logits/rejected": -0.8179743885993958, "logps/chosen": -459.59381103515625, "logps/rejected": -590.1747436523438, "loss": 0.0127, "rewards/accuracies": 0.75, "rewards/chosen": -0.22960355877876282, "rewards/margins": 0.1284674108028412, "rewards/rejected": -0.3580709993839264, "step": 10660 }, { "epoch": 0.7, "learning_rate": 1.2643676036455099e-06, "logits/chosen": -1.3213170766830444, "logits/rejected": -1.1559522151947021, "logps/chosen": -475.58441162109375, "logps/rejected": -477.3138122558594, "loss": 0.0213, "rewards/accuracies": 0.625, "rewards/chosen": -0.18773195147514343, "rewards/margins": 0.05247045308351517, "rewards/rejected": -0.24020235240459442, "step": 10670 }, { "epoch": 0.7, "learning_rate": 1.259407102232203e-06, "logits/chosen": -1.278294324874878, "logits/rejected": -0.8299957513809204, "logps/chosen": -497.51385498046875, "logps/rejected": -529.4931030273438, "loss": 0.0122, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21987955272197723, "rewards/margins": 0.10492320358753204, "rewards/rejected": -0.32480278611183167, "step": 10680 }, { "epoch": 0.7, "learning_rate": 1.254453072359163e-06, "logits/chosen": -0.8485827445983887, "logits/rejected": -0.8449796438217163, "logps/chosen": -442.3443908691406, "logps/rejected": -494.9901428222656, "loss": 0.018, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2083243876695633, "rewards/margins": 0.07451190799474716, "rewards/rejected": -0.28283628821372986, "step": 10690 }, { "epoch": 0.7, "learning_rate": 1.2495055398690337e-06, "logits/chosen": -1.3337225914001465, "logits/rejected": -1.1015886068344116, "logps/chosen": -412.2879943847656, "logps/rejected": -458.264892578125, "loss": 0.0207, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18243363499641418, "rewards/margins": 0.05526005104184151, "rewards/rejected": -0.2376936972141266, "step": 10700 }, { "epoch": 0.7, "eval_logits/chosen": -0.943865954875946, "eval_logits/rejected": -0.8175816535949707, "eval_logps/chosen": -442.1295166015625, "eval_logps/rejected": -508.0856018066406, "eval_loss": 0.022853834554553032, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -0.21012459695339203, "eval_rewards/margins": 0.08634911477565765, "eval_rewards/rejected": -0.2964736819267273, "eval_runtime": 717.3436, "eval_samples_per_second": 2.788, "eval_steps_per_second": 1.394, "step": 10700 }, { "epoch": 0.7, "learning_rate": 1.2445645305705718e-06, "logits/chosen": -1.084926962852478, "logits/rejected": -1.0485444068908691, "logps/chosen": -447.56732177734375, "logps/rejected": -494.869873046875, "loss": 0.0282, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24124634265899658, "rewards/margins": 0.0679454430937767, "rewards/rejected": -0.3091917932033539, "step": 10710 }, { "epoch": 0.7, "learning_rate": 1.2396300702384995e-06, "logits/chosen": -1.092814326286316, "logits/rejected": -0.9777520298957825, "logps/chosen": -467.2496643066406, "logps/rejected": -471.74664306640625, "loss": 0.0126, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20944158732891083, "rewards/margins": 0.04216230660676956, "rewards/rejected": -0.251603901386261, "step": 10720 }, { "epoch": 0.7, "learning_rate": 1.234702184613381e-06, "logits/chosen": -0.9209340810775757, "logits/rejected": -0.7125831246376038, "logps/chosen": -403.3192138671875, "logps/rejected": -476.3380432128906, "loss": 0.0156, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1878369003534317, "rewards/margins": 0.07335436344146729, "rewards/rejected": -0.2611912488937378, "step": 10730 }, { "epoch": 0.7, "learning_rate": 1.2297808994014793e-06, "logits/chosen": -1.1957898139953613, "logits/rejected": -0.9735271334648132, "logps/chosen": -483.4905700683594, "logps/rejected": -512.9801025390625, "loss": 0.0199, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1975557506084442, "rewards/margins": 0.06438203155994415, "rewards/rejected": -0.2619377672672272, "step": 10740 }, { "epoch": 0.7, "learning_rate": 1.2248662402746314e-06, "logits/chosen": -0.8916974067687988, "logits/rejected": -1.0448212623596191, "logps/chosen": -416.05010986328125, "logps/rejected": -474.84259033203125, "loss": 0.0298, "rewards/accuracies": 0.625, "rewards/chosen": -0.22613339126110077, "rewards/margins": 0.06490835547447205, "rewards/rejected": -0.2910417318344116, "step": 10750 }, { "epoch": 0.7, "learning_rate": 1.2199582328701045e-06, "logits/chosen": -1.1036632061004639, "logits/rejected": -0.9856752157211304, "logps/chosen": -477.70623779296875, "logps/rejected": -540.041259765625, "loss": 0.0204, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1870727390050888, "rewards/margins": 0.10262595117092133, "rewards/rejected": -0.2896987199783325, "step": 10760 }, { "epoch": 0.7, "learning_rate": 1.2150569027904712e-06, "logits/chosen": -1.0499202013015747, "logits/rejected": -1.0206396579742432, "logps/chosen": -451.539794921875, "logps/rejected": -517.6983032226562, "loss": 0.0391, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20555636286735535, "rewards/margins": 0.07246137410402298, "rewards/rejected": -0.27801769971847534, "step": 10770 }, { "epoch": 0.71, "learning_rate": 1.2101622756034688e-06, "logits/chosen": -1.0748493671417236, "logits/rejected": -1.0255393981933594, "logps/chosen": -393.61907958984375, "logps/rejected": -443.6466369628906, "loss": 0.0301, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.17086508870124817, "rewards/margins": 0.08039279282093048, "rewards/rejected": -0.25125789642333984, "step": 10780 }, { "epoch": 0.71, "learning_rate": 1.2052743768418715e-06, "logits/chosen": -1.074951410293579, "logits/rejected": -0.8938143849372864, "logps/chosen": -404.97186279296875, "logps/rejected": -457.84527587890625, "loss": 0.0093, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1571628451347351, "rewards/margins": 0.08688019961118698, "rewards/rejected": -0.24404306709766388, "step": 10790 }, { "epoch": 0.71, "learning_rate": 1.2003932320033523e-06, "logits/chosen": -1.1365292072296143, "logits/rejected": -1.1270167827606201, "logps/chosen": -391.2300720214844, "logps/rejected": -495.93670654296875, "loss": 0.0343, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1647273600101471, "rewards/margins": 0.10407479852437973, "rewards/rejected": -0.2688021659851074, "step": 10800 }, { "epoch": 0.71, "eval_logits/chosen": -1.0659940242767334, "eval_logits/rejected": -0.9334921836853027, "eval_logps/chosen": -409.19219970703125, "eval_logps/rejected": -474.0302429199219, "eval_loss": 0.022922594100236893, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -0.1771872192621231, "eval_rewards/margins": 0.08523111045360565, "eval_rewards/rejected": -0.26241832971572876, "eval_runtime": 714.8093, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 10800 }, { "epoch": 0.71, "learning_rate": 1.1955188665503553e-06, "logits/chosen": -0.9277946352958679, "logits/rejected": -0.8723438382148743, "logps/chosen": -402.2226867675781, "logps/rejected": -447.3357849121094, "loss": 0.0315, "rewards/accuracies": 0.625, "rewards/chosen": -0.1970990002155304, "rewards/margins": 0.06590325385332108, "rewards/rejected": -0.2630022168159485, "step": 10810 }, { "epoch": 0.71, "learning_rate": 1.1906513059099566e-06, "logits/chosen": -1.1835887432098389, "logits/rejected": -0.8962932825088501, "logps/chosen": -425.53558349609375, "logps/rejected": -529.1549682617188, "loss": 0.0172, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1940465271472931, "rewards/margins": 0.1123599037528038, "rewards/rejected": -0.3064064383506775, "step": 10820 }, { "epoch": 0.71, "learning_rate": 1.185790575473738e-06, "logits/chosen": -1.0998814105987549, "logits/rejected": -0.7160011529922485, "logps/chosen": -417.5850524902344, "logps/rejected": -470.40887451171875, "loss": 0.0258, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18845175206661224, "rewards/margins": 0.10009801387786865, "rewards/rejected": -0.2885497808456421, "step": 10830 }, { "epoch": 0.71, "learning_rate": 1.1809367005976516e-06, "logits/chosen": -1.0662267208099365, "logits/rejected": -0.9050619006156921, "logps/chosen": -426.7510681152344, "logps/rejected": -406.6272888183594, "loss": 0.0353, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14425630867481232, "rewards/margins": 0.050923608243465424, "rewards/rejected": -0.19517990946769714, "step": 10840 }, { "epoch": 0.71, "learning_rate": 1.1760897066018842e-06, "logits/chosen": -0.9952324628829956, "logits/rejected": -0.8830828666687012, "logps/chosen": -382.8329162597656, "logps/rejected": -467.06365966796875, "loss": 0.0136, "rewards/accuracies": 0.75, "rewards/chosen": -0.15979185700416565, "rewards/margins": 0.09022766351699829, "rewards/rejected": -0.25001949071884155, "step": 10850 }, { "epoch": 0.71, "learning_rate": 1.1712496187707327e-06, "logits/chosen": -0.9870246648788452, "logits/rejected": -1.1668940782546997, "logps/chosen": -438.7378845214844, "logps/rejected": -555.7759399414062, "loss": 0.054, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19843365252017975, "rewards/margins": 0.13110387325286865, "rewards/rejected": -0.3295375108718872, "step": 10860 }, { "epoch": 0.71, "learning_rate": 1.1664164623524646e-06, "logits/chosen": -1.0623973608016968, "logits/rejected": -0.8748255968093872, "logps/chosen": -380.19219970703125, "logps/rejected": -433.5972595214844, "loss": 0.0366, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16181719303131104, "rewards/margins": 0.0816623866558075, "rewards/rejected": -0.24347960948944092, "step": 10870 }, { "epoch": 0.71, "learning_rate": 1.1615902625591926e-06, "logits/chosen": -1.1392452716827393, "logits/rejected": -0.7797281742095947, "logps/chosen": -427.402587890625, "logps/rejected": -488.474609375, "loss": 0.0287, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20218141376972198, "rewards/margins": 0.0704856887459755, "rewards/rejected": -0.2726671099662781, "step": 10880 }, { "epoch": 0.71, "learning_rate": 1.156771044566738e-06, "logits/chosen": -1.077528715133667, "logits/rejected": -0.9703509211540222, "logps/chosen": -447.404052734375, "logps/rejected": -484.466796875, "loss": 0.0101, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1827893853187561, "rewards/margins": 0.08028020709753036, "rewards/rejected": -0.26306959986686707, "step": 10890 }, { "epoch": 0.71, "learning_rate": 1.1519588335145037e-06, "logits/chosen": -1.1248142719268799, "logits/rejected": -1.3123174905776978, "logps/chosen": -376.7156066894531, "logps/rejected": -437.0814514160156, "loss": 0.0277, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16913677752017975, "rewards/margins": 0.041666414588689804, "rewards/rejected": -0.21080319583415985, "step": 10900 }, { "epoch": 0.71, "eval_logits/chosen": -1.010233998298645, "eval_logits/rejected": -0.881983757019043, "eval_logps/chosen": -415.1988220214844, "eval_logps/rejected": -475.7294006347656, "eval_loss": 0.023203978314995766, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -0.18319390714168549, "eval_rewards/margins": 0.08092358708381653, "eval_rewards/rejected": -0.2641174793243408, "eval_runtime": 716.9785, "eval_samples_per_second": 2.789, "eval_steps_per_second": 1.395, "step": 10900 }, { "epoch": 0.71, "learning_rate": 1.1471536545053382e-06, "logits/chosen": -1.016697645187378, "logits/rejected": -1.0197445154190063, "logps/chosen": -372.4880676269531, "logps/rejected": -458.74603271484375, "loss": 0.047, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1634531319141388, "rewards/margins": 0.07870586216449738, "rewards/rejected": -0.24215897917747498, "step": 10910 }, { "epoch": 0.71, "learning_rate": 1.1423555326054112e-06, "logits/chosen": -0.9696292877197266, "logits/rejected": -0.6852289438247681, "logps/chosen": -479.50396728515625, "logps/rejected": -555.6917114257812, "loss": 0.0176, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19666942954063416, "rewards/margins": 0.14962373673915863, "rewards/rejected": -0.3462931513786316, "step": 10920 }, { "epoch": 0.72, "learning_rate": 1.1375644928440743e-06, "logits/chosen": -1.1011669635772705, "logits/rejected": -0.805029034614563, "logps/chosen": -439.20465087890625, "logps/rejected": -471.09210205078125, "loss": 0.0143, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2025213986635208, "rewards/margins": 0.09939121454954147, "rewards/rejected": -0.3019126355648041, "step": 10930 }, { "epoch": 0.72, "learning_rate": 1.1327805602137396e-06, "logits/chosen": -1.1375086307525635, "logits/rejected": -0.9269174337387085, "logps/chosen": -479.64111328125, "logps/rejected": -513.5501708984375, "loss": 0.0211, "rewards/accuracies": 0.75, "rewards/chosen": -0.22089700400829315, "rewards/margins": 0.08688704669475555, "rewards/rejected": -0.3077840507030487, "step": 10940 }, { "epoch": 0.72, "learning_rate": 1.1280037596697426e-06, "logits/chosen": -1.0122627019882202, "logits/rejected": -0.763548731803894, "logps/chosen": -472.14190673828125, "logps/rejected": -649.8763427734375, "loss": 0.0292, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2512595057487488, "rewards/margins": 0.14080765843391418, "rewards/rejected": -0.3920671343803406, "step": 10950 }, { "epoch": 0.72, "learning_rate": 1.123234116130216e-06, "logits/chosen": -0.9326109886169434, "logits/rejected": -0.8292649388313293, "logps/chosen": -403.70367431640625, "logps/rejected": -526.8153076171875, "loss": 0.0283, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2178153097629547, "rewards/margins": 0.11705734580755234, "rewards/rejected": -0.33487263321876526, "step": 10960 }, { "epoch": 0.72, "learning_rate": 1.1184716544759553e-06, "logits/chosen": -0.6938477158546448, "logits/rejected": -0.6629935503005981, "logps/chosen": -347.72467041015625, "logps/rejected": -410.2701721191406, "loss": 0.0379, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.18236052989959717, "rewards/margins": 0.050130896270275116, "rewards/rejected": -0.23249144852161407, "step": 10970 }, { "epoch": 0.72, "learning_rate": 1.1137163995502948e-06, "logits/chosen": -1.4915335178375244, "logits/rejected": -1.287002444267273, "logps/chosen": -400.2135009765625, "logps/rejected": -442.28106689453125, "loss": 0.0185, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1786784827709198, "rewards/margins": 0.07476945966482162, "rewards/rejected": -0.25344792008399963, "step": 10980 }, { "epoch": 0.72, "learning_rate": 1.1089683761589717e-06, "logits/chosen": -0.8917869329452515, "logits/rejected": -0.8462752103805542, "logps/chosen": -411.16339111328125, "logps/rejected": -505.95953369140625, "loss": 0.0129, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1709570437669754, "rewards/margins": 0.11371036618947983, "rewards/rejected": -0.284667432308197, "step": 10990 }, { "epoch": 0.72, "learning_rate": 1.1042276090700044e-06, "logits/chosen": -0.994330883026123, "logits/rejected": -1.0441759824752808, "logps/chosen": -423.8777770996094, "logps/rejected": -516.5511474609375, "loss": 0.0468, "rewards/accuracies": 0.5, "rewards/chosen": -0.21357569098472595, "rewards/margins": 0.06250393390655518, "rewards/rejected": -0.27607959508895874, "step": 11000 }, { "epoch": 0.72, "eval_logits/chosen": -1.0789541006088257, "eval_logits/rejected": -0.9471370577812195, "eval_logps/chosen": -400.4061584472656, "eval_logps/rejected": -461.7659606933594, "eval_loss": 0.023225940763950348, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -0.16840118169784546, "eval_rewards/margins": 0.08175291121006012, "eval_rewards/rejected": -0.2501541078090668, "eval_runtime": 713.9675, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 11000 }, { "epoch": 0.72, "learning_rate": 1.0994941230135536e-06, "logits/chosen": -1.1401865482330322, "logits/rejected": -1.018668532371521, "logps/chosen": -383.0191345214844, "logps/rejected": -471.2349548339844, "loss": 0.0169, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15016913414001465, "rewards/margins": 0.1242755651473999, "rewards/rejected": -0.27444472908973694, "step": 11010 }, { "epoch": 0.72, "learning_rate": 1.094767942681804e-06, "logits/chosen": -1.5718252658843994, "logits/rejected": -1.1552879810333252, "logps/chosen": -468.95684814453125, "logps/rejected": -519.3148803710938, "loss": 0.0284, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22108781337738037, "rewards/margins": 0.08955870568752289, "rewards/rejected": -0.3106464743614197, "step": 11020 }, { "epoch": 0.72, "learning_rate": 1.0900490927288248e-06, "logits/chosen": -0.8818660974502563, "logits/rejected": -0.9248073697090149, "logps/chosen": -433.7283630371094, "logps/rejected": -454.76129150390625, "loss": 0.0237, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17326004803180695, "rewards/margins": 0.07050933688879013, "rewards/rejected": -0.24376940727233887, "step": 11030 }, { "epoch": 0.72, "learning_rate": 1.0853375977704511e-06, "logits/chosen": -1.1856893301010132, "logits/rejected": -1.0138791799545288, "logps/chosen": -421.9573669433594, "logps/rejected": -421.90826416015625, "loss": 0.0295, "rewards/accuracies": 0.75, "rewards/chosen": -0.1815328150987625, "rewards/margins": 0.06396160274744034, "rewards/rejected": -0.24549439549446106, "step": 11040 }, { "epoch": 0.72, "learning_rate": 1.0806334823841466e-06, "logits/chosen": -1.1366987228393555, "logits/rejected": -1.2651304006576538, "logps/chosen": -422.4078674316406, "logps/rejected": -503.5257873535156, "loss": 0.0378, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18279197812080383, "rewards/margins": 0.05171481892466545, "rewards/rejected": -0.2345067709684372, "step": 11050 }, { "epoch": 0.72, "learning_rate": 1.0759367711088825e-06, "logits/chosen": -0.915551483631134, "logits/rejected": -1.1488310098648071, "logps/chosen": -347.27520751953125, "logps/rejected": -437.59356689453125, "loss": 0.0191, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1634427011013031, "rewards/margins": 0.0568411760032177, "rewards/rejected": -0.2202838957309723, "step": 11060 }, { "epoch": 0.72, "learning_rate": 1.0712474884450056e-06, "logits/chosen": -1.1386525630950928, "logits/rejected": -0.9761357307434082, "logps/chosen": -339.6700439453125, "logps/rejected": -400.8133850097656, "loss": 0.0532, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13683676719665527, "rewards/margins": 0.08993016183376312, "rewards/rejected": -0.2267669439315796, "step": 11070 }, { "epoch": 0.72, "learning_rate": 1.066565658854112e-06, "logits/chosen": -0.9509509205818176, "logits/rejected": -1.029321312904358, "logps/chosen": -277.3965759277344, "logps/rejected": -357.4387512207031, "loss": 0.0226, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1495647132396698, "rewards/margins": 0.08327466249465942, "rewards/rejected": -0.23283937573432922, "step": 11080 }, { "epoch": 0.73, "learning_rate": 1.0618913067589165e-06, "logits/chosen": -1.2269341945648193, "logits/rejected": -0.876134991645813, "logps/chosen": -349.1415710449219, "logps/rejected": -405.8893737792969, "loss": 0.0386, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13332685828208923, "rewards/margins": 0.09420770406723022, "rewards/rejected": -0.22753457725048065, "step": 11090 }, { "epoch": 0.73, "learning_rate": 1.0572244565431313e-06, "logits/chosen": -1.077650547027588, "logits/rejected": -1.0578992366790771, "logps/chosen": -308.6154479980469, "logps/rejected": -389.65423583984375, "loss": 0.0205, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16319867968559265, "rewards/margins": 0.07587755471467972, "rewards/rejected": -0.23907622694969177, "step": 11100 }, { "epoch": 0.73, "eval_logits/chosen": -1.1645113229751587, "eval_logits/rejected": -1.0275779962539673, "eval_logps/chosen": -380.51123046875, "eval_logps/rejected": -443.96624755859375, "eval_loss": 0.0231324415653944, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -0.1485062539577484, "eval_rewards/margins": 0.08384808897972107, "eval_rewards/rejected": -0.23235435783863068, "eval_runtime": 714.0773, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 11100 }, { "epoch": 0.73, "learning_rate": 1.0525651325513317e-06, "logits/chosen": -1.1633539199829102, "logits/rejected": -1.178456425666809, "logps/chosen": -456.3701171875, "logps/rejected": -488.2652282714844, "loss": 0.0229, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.12510047852993011, "rewards/margins": 0.05305058881640434, "rewards/rejected": -0.17815105617046356, "step": 11110 }, { "epoch": 0.73, "learning_rate": 1.0479133590888351e-06, "logits/chosen": -1.1748346090316772, "logits/rejected": -1.0340824127197266, "logps/chosen": -405.7606201171875, "logps/rejected": -471.91375732421875, "loss": 0.0178, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15030567348003387, "rewards/margins": 0.09312538802623749, "rewards/rejected": -0.24343104660511017, "step": 11120 }, { "epoch": 0.73, "learning_rate": 1.0432691604215695e-06, "logits/chosen": -1.194390892982483, "logits/rejected": -0.9836454391479492, "logps/chosen": -370.35052490234375, "logps/rejected": -399.611083984375, "loss": 0.0281, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1353035569190979, "rewards/margins": 0.06097368150949478, "rewards/rejected": -0.19627723097801208, "step": 11130 }, { "epoch": 0.73, "learning_rate": 1.0386325607759515e-06, "logits/chosen": -1.0975955724716187, "logits/rejected": -0.8900185823440552, "logps/chosen": -317.8079833984375, "logps/rejected": -394.64459228515625, "loss": 0.0271, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1264178603887558, "rewards/margins": 0.09690706431865692, "rewards/rejected": -0.2233249396085739, "step": 11140 }, { "epoch": 0.73, "learning_rate": 1.0340035843387544e-06, "logits/chosen": -1.0070667266845703, "logits/rejected": -0.8539411425590515, "logps/chosen": -348.4281921386719, "logps/rejected": -391.7406311035156, "loss": 0.0173, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16869264841079712, "rewards/margins": 0.07089529186487198, "rewards/rejected": -0.2395879477262497, "step": 11150 }, { "epoch": 0.73, "learning_rate": 1.0293822552569887e-06, "logits/chosen": -1.2996865510940552, "logits/rejected": -1.111810326576233, "logps/chosen": -395.456787109375, "logps/rejected": -443.95672607421875, "loss": 0.0197, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14357289671897888, "rewards/margins": 0.10433048009872437, "rewards/rejected": -0.24790339171886444, "step": 11160 }, { "epoch": 0.73, "learning_rate": 1.0247685976377688e-06, "logits/chosen": -1.194035291671753, "logits/rejected": -0.9167041778564453, "logps/chosen": -369.6636962890625, "logps/rejected": -400.9215087890625, "loss": 0.0277, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18797869980335236, "rewards/margins": 0.0700891762971878, "rewards/rejected": -0.25806787610054016, "step": 11170 }, { "epoch": 0.73, "learning_rate": 1.0201626355481939e-06, "logits/chosen": -1.3550119400024414, "logits/rejected": -1.153591513633728, "logps/chosen": -378.2259216308594, "logps/rejected": -410.29901123046875, "loss": 0.0124, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16210542619228363, "rewards/margins": 0.08563300967216492, "rewards/rejected": -0.24773843586444855, "step": 11180 }, { "epoch": 0.73, "learning_rate": 1.0155643930152192e-06, "logits/chosen": -1.381773591041565, "logits/rejected": -1.2466447353363037, "logps/chosen": -427.7259216308594, "logps/rejected": -429.55755615234375, "loss": 0.0156, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.15511856973171234, "rewards/margins": 0.05647754669189453, "rewards/rejected": -0.21159613132476807, "step": 11190 }, { "epoch": 0.73, "learning_rate": 1.0109738940255286e-06, "logits/chosen": -1.1574543714523315, "logits/rejected": -1.0072754621505737, "logps/chosen": -367.4029846191406, "logps/rejected": -396.0596618652344, "loss": 0.0208, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15042048692703247, "rewards/margins": 0.06593789160251617, "rewards/rejected": -0.21635837852954865, "step": 11200 }, { "epoch": 0.73, "eval_logits/chosen": -1.2266347408294678, "eval_logits/rejected": -1.086592435836792, "eval_logps/chosen": -374.1264343261719, "eval_logps/rejected": -435.7383117675781, "eval_loss": 0.023207908496260643, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.14212149381637573, "eval_rewards/margins": 0.08200491964817047, "eval_rewards/rejected": -0.2241264134645462, "eval_runtime": 715.9203, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 11200 }, { "epoch": 0.73, "learning_rate": 1.0063911625254155e-06, "logits/chosen": -1.312185525894165, "logits/rejected": -1.3114862442016602, "logps/chosen": -344.67169189453125, "logps/rejected": -421.61883544921875, "loss": 0.0254, "rewards/accuracies": 0.625, "rewards/chosen": -0.10874901711940765, "rewards/margins": 0.07631779462099075, "rewards/rejected": -0.185066819190979, "step": 11210 }, { "epoch": 0.73, "learning_rate": 1.0018162224206502e-06, "logits/chosen": -1.1632659435272217, "logits/rejected": -1.1636685132980347, "logps/chosen": -323.69964599609375, "logps/rejected": -438.305908203125, "loss": 0.02, "rewards/accuracies": 0.625, "rewards/chosen": -0.14922483265399933, "rewards/margins": 0.11089619249105453, "rewards/rejected": -0.26012104749679565, "step": 11220 }, { "epoch": 0.73, "learning_rate": 9.97249097576363e-07, "logits/chosen": -1.6443986892700195, "logits/rejected": -1.2382805347442627, "logps/chosen": -371.77362060546875, "logps/rejected": -435.08123779296875, "loss": 0.0285, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13871707022190094, "rewards/margins": 0.10679943859577179, "rewards/rejected": -0.24551650881767273, "step": 11230 }, { "epoch": 0.74, "learning_rate": 9.92689811816913e-07, "logits/chosen": -1.2340461015701294, "logits/rejected": -0.941146194934845, "logps/chosen": -374.0948181152344, "logps/rejected": -394.6793212890625, "loss": 0.0416, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1615736186504364, "rewards/margins": 0.06798748672008514, "rewards/rejected": -0.22956109046936035, "step": 11240 }, { "epoch": 0.74, "learning_rate": 9.881383889257691e-07, "logits/chosen": -1.1651827096939087, "logits/rejected": -1.2603347301483154, "logps/chosen": -329.04852294921875, "logps/rejected": -447.6138610839844, "loss": 0.0159, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.15975165367126465, "rewards/margins": 0.054486922919750214, "rewards/rejected": -0.21423859894275665, "step": 11250 }, { "epoch": 0.74, "learning_rate": 9.835948526453817e-07, "logits/chosen": -0.8881275057792664, "logits/rejected": -1.2403464317321777, "logps/chosen": -357.779052734375, "logps/rejected": -451.1248474121094, "loss": 0.0325, "rewards/accuracies": 0.625, "rewards/chosen": -0.17734970152378082, "rewards/margins": 0.04967401176691055, "rewards/rejected": -0.22702369093894958, "step": 11260 }, { "epoch": 0.74, "learning_rate": 9.790592266770633e-07, "logits/chosen": -1.3715448379516602, "logits/rejected": -1.2031675577163696, "logps/chosen": -440.8570251464844, "logps/rejected": -489.68670654296875, "loss": 0.027, "rewards/accuracies": 0.75, "rewards/chosen": -0.17151229083538055, "rewards/margins": 0.07706841826438904, "rewards/rejected": -0.2485806941986084, "step": 11270 }, { "epoch": 0.74, "learning_rate": 9.745315346808584e-07, "logits/chosen": -0.9693616032600403, "logits/rejected": -0.9835758209228516, "logps/chosen": -360.45074462890625, "logps/rejected": -409.4380187988281, "loss": 0.0305, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1413753181695938, "rewards/margins": 0.07137038558721542, "rewards/rejected": -0.21274569630622864, "step": 11280 }, { "epoch": 0.74, "learning_rate": 9.70011800275428e-07, "logits/chosen": -0.9502639770507812, "logits/rejected": -0.9292412996292114, "logps/chosen": -445.00286865234375, "logps/rejected": -549.6346435546875, "loss": 0.0203, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20747800171375275, "rewards/margins": 0.09318571537733078, "rewards/rejected": -0.30066370964050293, "step": 11290 }, { "epoch": 0.74, "learning_rate": 9.655000470379206e-07, "logits/chosen": -1.040000557899475, "logits/rejected": -0.8406152725219727, "logps/chosen": -405.9225158691406, "logps/rejected": -504.32391357421875, "loss": 0.0203, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1955750435590744, "rewards/margins": 0.10908393561840057, "rewards/rejected": -0.304658979177475, "step": 11300 }, { "epoch": 0.74, "eval_logits/chosen": -1.0693349838256836, "eval_logits/rejected": -0.9368215799331665, "eval_logps/chosen": -418.5068664550781, "eval_logps/rejected": -485.0167541503906, "eval_loss": 0.022832898423075676, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.1865019053220749, "eval_rewards/margins": 0.0869029238820076, "eval_rewards/rejected": -0.2734048366546631, "eval_runtime": 714.3159, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 11300 }, { "epoch": 0.74, "learning_rate": 9.609962985038517e-07, "logits/chosen": -1.1131622791290283, "logits/rejected": -1.0102578401565552, "logps/chosen": -402.699462890625, "logps/rejected": -510.6493225097656, "loss": 0.0306, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1873384416103363, "rewards/margins": 0.1330747753381729, "rewards/rejected": -0.32041317224502563, "step": 11310 }, { "epoch": 0.74, "learning_rate": 9.565005781669786e-07, "logits/chosen": -1.4016231298446655, "logits/rejected": -1.033085584640503, "logps/chosen": -439.47344970703125, "logps/rejected": -471.2001953125, "loss": 0.0298, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1788828819990158, "rewards/margins": 0.08503957837820053, "rewards/rejected": -0.26392242312431335, "step": 11320 }, { "epoch": 0.74, "learning_rate": 9.520129094791822e-07, "logits/chosen": -1.0139018297195435, "logits/rejected": -0.8032572865486145, "logps/chosen": -371.7670593261719, "logps/rejected": -449.46484375, "loss": 0.0357, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1922713816165924, "rewards/margins": 0.08790101855993271, "rewards/rejected": -0.2801723778247833, "step": 11330 }, { "epoch": 0.74, "learning_rate": 9.475333158503389e-07, "logits/chosen": -1.0802491903305054, "logits/rejected": -0.9376344680786133, "logps/chosen": -360.4689636230469, "logps/rejected": -373.1958923339844, "loss": 0.0262, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14264671504497528, "rewards/margins": 0.05788546800613403, "rewards/rejected": -0.2005321979522705, "step": 11340 }, { "epoch": 0.74, "learning_rate": 9.430618206482053e-07, "logits/chosen": -1.0669714212417603, "logits/rejected": -0.9494711756706238, "logps/chosen": -290.5818176269531, "logps/rejected": -345.97076416015625, "loss": 0.0164, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15015889704227448, "rewards/margins": 0.05479348823428154, "rewards/rejected": -0.2049523890018463, "step": 11350 }, { "epoch": 0.74, "learning_rate": 9.385984471982892e-07, "logits/chosen": -1.0498079061508179, "logits/rejected": -0.8382881283760071, "logps/chosen": -367.2361755371094, "logps/rejected": -459.05841064453125, "loss": 0.0127, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15241031348705292, "rewards/margins": 0.142556831240654, "rewards/rejected": -0.2949672043323517, "step": 11360 }, { "epoch": 0.74, "learning_rate": 9.341432187837343e-07, "logits/chosen": -1.3421348333358765, "logits/rejected": -1.1668442487716675, "logps/chosen": -332.52337646484375, "logps/rejected": -459.5577697753906, "loss": 0.0301, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1422196328639984, "rewards/margins": 0.11216600984334946, "rewards/rejected": -0.25438565015792847, "step": 11370 }, { "epoch": 0.74, "learning_rate": 9.29696158645193e-07, "logits/chosen": -1.1534702777862549, "logits/rejected": -1.2748430967330933, "logps/chosen": -392.8805847167969, "logps/rejected": -554.1047973632812, "loss": 0.0129, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1717783510684967, "rewards/margins": 0.1322343945503235, "rewards/rejected": -0.3040127158164978, "step": 11380 }, { "epoch": 0.75, "learning_rate": 9.252572899807111e-07, "logits/chosen": -1.1801443099975586, "logits/rejected": -1.0295169353485107, "logps/chosen": -472.58197021484375, "logps/rejected": -569.1712036132812, "loss": 0.0084, "rewards/accuracies": 0.75, "rewards/chosen": -0.20198242366313934, "rewards/margins": 0.12277617305517197, "rewards/rejected": -0.3247585892677307, "step": 11390 }, { "epoch": 0.75, "learning_rate": 9.208266359456003e-07, "logits/chosen": -1.2797205448150635, "logits/rejected": -1.149174451828003, "logps/chosen": -330.77105712890625, "logps/rejected": -421.34490966796875, "loss": 0.0322, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12496531009674072, "rewards/margins": 0.08100312948226929, "rewards/rejected": -0.2059684544801712, "step": 11400 }, { "epoch": 0.75, "eval_logits/chosen": -1.1439898014068604, "eval_logits/rejected": -1.0057190656661987, "eval_logps/chosen": -423.4101867675781, "eval_logps/rejected": -494.9005126953125, "eval_loss": 0.023191066458821297, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -0.19140517711639404, "eval_rewards/margins": 0.09188342094421387, "eval_rewards/rejected": -0.2832885980606079, "eval_runtime": 713.3493, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 11400 }, { "epoch": 0.75, "learning_rate": 9.164042196523229e-07, "logits/chosen": -1.4289063215255737, "logits/rejected": -1.1208937168121338, "logps/chosen": -386.10113525390625, "logps/rejected": -480.11492919921875, "loss": 0.0257, "rewards/accuracies": 0.625, "rewards/chosen": -0.1967000961303711, "rewards/margins": 0.10753147304058075, "rewards/rejected": -0.30423155426979065, "step": 11410 }, { "epoch": 0.75, "learning_rate": 9.119900641703696e-07, "logits/chosen": -1.3510946035385132, "logits/rejected": -1.085390329360962, "logps/chosen": -399.81036376953125, "logps/rejected": -427.9717712402344, "loss": 0.0208, "rewards/accuracies": 0.625, "rewards/chosen": -0.18636399507522583, "rewards/margins": 0.07449640333652496, "rewards/rejected": -0.2608603835105896, "step": 11420 }, { "epoch": 0.75, "learning_rate": 9.075841925261364e-07, "logits/chosen": -1.4528703689575195, "logits/rejected": -1.3141974210739136, "logps/chosen": -403.43621826171875, "logps/rejected": -480.54888916015625, "loss": 0.0499, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16551706194877625, "rewards/margins": 0.09154149144887924, "rewards/rejected": -0.2570585608482361, "step": 11430 }, { "epoch": 0.75, "learning_rate": 9.031866277028093e-07, "logits/chosen": -1.0612777471542358, "logits/rejected": -0.9092742800712585, "logps/chosen": -376.8293151855469, "logps/rejected": -498.85205078125, "loss": 0.0138, "rewards/accuracies": 0.625, "rewards/chosen": -0.18575455248355865, "rewards/margins": 0.09521301090717316, "rewards/rejected": -0.2809675633907318, "step": 11440 }, { "epoch": 0.75, "learning_rate": 8.987973926402391e-07, "logits/chosen": -0.9107683300971985, "logits/rejected": -1.0483675003051758, "logps/chosen": -390.3138122558594, "logps/rejected": -472.00323486328125, "loss": 0.0351, "rewards/accuracies": 0.625, "rewards/chosen": -0.17797425389289856, "rewards/margins": 0.08227677643299103, "rewards/rejected": -0.2602510452270508, "step": 11450 }, { "epoch": 0.75, "learning_rate": 8.944165102348273e-07, "logits/chosen": -1.2199598550796509, "logits/rejected": -1.0704153776168823, "logps/chosen": -287.2079162597656, "logps/rejected": -408.4291687011719, "loss": 0.05, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.13026081025600433, "rewards/margins": 0.11396391689777374, "rewards/rejected": -0.24422471225261688, "step": 11460 }, { "epoch": 0.75, "learning_rate": 8.900440033394018e-07, "logits/chosen": -0.9697388410568237, "logits/rejected": -1.011523723602295, "logps/chosen": -343.2810974121094, "logps/rejected": -408.3849182128906, "loss": 0.0236, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16076485812664032, "rewards/margins": 0.07438953220844269, "rewards/rejected": -0.2351544201374054, "step": 11470 }, { "epoch": 0.75, "learning_rate": 8.856798947631009e-07, "logits/chosen": -1.1279700994491577, "logits/rejected": -1.075347900390625, "logps/chosen": -369.8497314453125, "logps/rejected": -500.8539123535156, "loss": 0.0231, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17897404730319977, "rewards/margins": 0.11405213922262192, "rewards/rejected": -0.2930262088775635, "step": 11480 }, { "epoch": 0.75, "learning_rate": 8.813242072712519e-07, "logits/chosen": -0.565597414970398, "logits/rejected": -0.4586416184902191, "logps/chosen": -378.6501770019531, "logps/rejected": -443.42547607421875, "loss": 0.0348, "rewards/accuracies": 0.5, "rewards/chosen": -0.2167879045009613, "rewards/margins": 0.06363924592733383, "rewards/rejected": -0.28042712807655334, "step": 11490 }, { "epoch": 0.75, "learning_rate": 8.769769635852557e-07, "logits/chosen": -0.9551329612731934, "logits/rejected": -0.9990715980529785, "logps/chosen": -395.9271545410156, "logps/rejected": -424.8460998535156, "loss": 0.0208, "rewards/accuracies": 0.625, "rewards/chosen": -0.18061937391757965, "rewards/margins": 0.06567604094743729, "rewards/rejected": -0.24629542231559753, "step": 11500 }, { "epoch": 0.75, "eval_logits/chosen": -0.9951764941215515, "eval_logits/rejected": -0.8679130673408508, "eval_logps/chosen": -416.435302734375, "eval_logps/rejected": -479.0217590332031, "eval_loss": 0.02297355607151985, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -0.18443039059638977, "eval_rewards/margins": 0.08297950774431229, "eval_rewards/rejected": -0.26740992069244385, "eval_runtime": 712.7836, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 11500 }, { "epoch": 0.75, "learning_rate": 8.726381863824635e-07, "logits/chosen": -1.3418853282928467, "logits/rejected": -1.1397570371627808, "logps/chosen": -480.6543884277344, "logps/rejected": -479.19921875, "loss": 0.0174, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19557733833789825, "rewards/margins": 0.06595106422901154, "rewards/rejected": -0.2615284025669098, "step": 11510 }, { "epoch": 0.75, "learning_rate": 8.683078982960638e-07, "logits/chosen": -0.7785824537277222, "logits/rejected": -0.5852483510971069, "logps/chosen": -425.610595703125, "logps/rejected": -491.669677734375, "loss": 0.0341, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20092561841011047, "rewards/margins": 0.11615000665187836, "rewards/rejected": -0.31707563996315, "step": 11520 }, { "epoch": 0.75, "learning_rate": 8.639861219149584e-07, "logits/chosen": -1.0364845991134644, "logits/rejected": -0.7560804486274719, "logps/chosen": -470.0465393066406, "logps/rejected": -529.1448974609375, "loss": 0.0217, "rewards/accuracies": 0.625, "rewards/chosen": -0.20606450736522675, "rewards/margins": 0.10202287137508392, "rewards/rejected": -0.30808740854263306, "step": 11530 }, { "epoch": 0.76, "learning_rate": 8.596728797836532e-07, "logits/chosen": -0.9838936924934387, "logits/rejected": -0.9281862378120422, "logps/chosen": -390.7336730957031, "logps/rejected": -537.155029296875, "loss": 0.0228, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17542493343353271, "rewards/margins": 0.11682838201522827, "rewards/rejected": -0.292253315448761, "step": 11540 }, { "epoch": 0.76, "learning_rate": 8.553681944021294e-07, "logits/chosen": -1.1515814065933228, "logits/rejected": -1.335495948791504, "logps/chosen": -408.4917907714844, "logps/rejected": -479.4371032714844, "loss": 0.0175, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16940782964229584, "rewards/margins": 0.09029257297515869, "rewards/rejected": -0.2597004175186157, "step": 11550 }, { "epoch": 0.76, "learning_rate": 8.510720882257365e-07, "logits/chosen": -0.6723235845565796, "logits/rejected": -0.7348573207855225, "logps/chosen": -348.6835021972656, "logps/rejected": -525.1763916015625, "loss": 0.0191, "rewards/accuracies": 0.75, "rewards/chosen": -0.18738296627998352, "rewards/margins": 0.13526089489459991, "rewards/rejected": -0.32264384627342224, "step": 11560 }, { "epoch": 0.76, "learning_rate": 8.467845836650667e-07, "logits/chosen": -0.5859243869781494, "logits/rejected": -0.6484355926513672, "logps/chosen": -439.5799255371094, "logps/rejected": -540.8603515625, "loss": 0.0256, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23131337761878967, "rewards/margins": 0.0947253555059433, "rewards/rejected": -0.32603874802589417, "step": 11570 }, { "epoch": 0.76, "learning_rate": 8.425057030858461e-07, "logits/chosen": -0.7306903004646301, "logits/rejected": -0.7738875150680542, "logps/chosen": -331.8514404296875, "logps/rejected": -457.28802490234375, "loss": 0.015, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17579558491706848, "rewards/margins": 0.09641371667385101, "rewards/rejected": -0.2722092568874359, "step": 11580 }, { "epoch": 0.76, "learning_rate": 8.382354688088098e-07, "logits/chosen": -0.8348797559738159, "logits/rejected": -0.9412097930908203, "logps/chosen": -357.9990234375, "logps/rejected": -442.42120361328125, "loss": 0.0353, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19984368979930878, "rewards/margins": 0.06899918615818024, "rewards/rejected": -0.268842875957489, "step": 11590 }, { "epoch": 0.76, "learning_rate": 8.33973903109594e-07, "logits/chosen": -0.9040916562080383, "logits/rejected": -0.955480694770813, "logps/chosen": -431.417724609375, "logps/rejected": -505.04058837890625, "loss": 0.0289, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21482057869434357, "rewards/margins": 0.10518453270196915, "rewards/rejected": -0.3200050890445709, "step": 11600 }, { "epoch": 0.76, "eval_logits/chosen": -0.9086868166923523, "eval_logits/rejected": -0.784236490726471, "eval_logps/chosen": -445.85113525390625, "eval_logps/rejected": -517.5433349609375, "eval_loss": 0.022915581241250038, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -0.2138461321592331, "eval_rewards/margins": 0.09208526462316513, "eval_rewards/rejected": -0.30593141913414, "eval_runtime": 715.7831, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 11600 }, { "epoch": 0.76, "learning_rate": 8.297210282186102e-07, "logits/chosen": -0.9908230900764465, "logits/rejected": -0.9281162023544312, "logps/chosen": -499.87322998046875, "logps/rejected": -590.39794921875, "loss": 0.0203, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.269855797290802, "rewards/margins": 0.0691581517457962, "rewards/rejected": -0.3390139639377594, "step": 11610 }, { "epoch": 0.76, "learning_rate": 8.254768663209397e-07, "logits/chosen": -0.7285041809082031, "logits/rejected": -0.7825806736946106, "logps/chosen": -483.4415588378906, "logps/rejected": -486.7078552246094, "loss": 0.032, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20486171543598175, "rewards/margins": 0.06573797762393951, "rewards/rejected": -0.2705996632575989, "step": 11620 }, { "epoch": 0.76, "learning_rate": 8.212414395562079e-07, "logits/chosen": -0.8392747640609741, "logits/rejected": -0.7306877970695496, "logps/chosen": -481.7899475097656, "logps/rejected": -563.9202880859375, "loss": 0.038, "rewards/accuracies": 0.625, "rewards/chosen": -0.2515757977962494, "rewards/margins": 0.05820949748158455, "rewards/rejected": -0.30978530645370483, "step": 11630 }, { "epoch": 0.76, "learning_rate": 8.170147700184775e-07, "logits/chosen": -0.8609894514083862, "logits/rejected": -0.7965823411941528, "logps/chosen": -463.954345703125, "logps/rejected": -549.462890625, "loss": 0.0232, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2075030356645584, "rewards/margins": 0.09444130957126617, "rewards/rejected": -0.3019443452358246, "step": 11640 }, { "epoch": 0.76, "learning_rate": 8.127968797561242e-07, "logits/chosen": -1.0651228427886963, "logits/rejected": -0.8415554761886597, "logps/chosen": -494.5506286621094, "logps/rejected": -581.2954711914062, "loss": 0.0245, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.270579993724823, "rewards/margins": 0.10916776955127716, "rewards/rejected": -0.3797477185726166, "step": 11650 }, { "epoch": 0.76, "learning_rate": 8.085877907717338e-07, "logits/chosen": -0.890338122844696, "logits/rejected": -1.0240013599395752, "logps/chosen": -441.25482177734375, "logps/rejected": -534.9625244140625, "loss": 0.0159, "rewards/accuracies": 0.625, "rewards/chosen": -0.22034330666065216, "rewards/margins": 0.10538692772388458, "rewards/rejected": -0.32573026418685913, "step": 11660 }, { "epoch": 0.76, "learning_rate": 8.043875250219732e-07, "logits/chosen": -0.8416398763656616, "logits/rejected": -0.5524312257766724, "logps/chosen": -473.6619567871094, "logps/rejected": -507.393798828125, "loss": 0.0374, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24425721168518066, "rewards/margins": 0.05480074882507324, "rewards/rejected": -0.2990579605102539, "step": 11670 }, { "epoch": 0.76, "learning_rate": 8.001961044174881e-07, "logits/chosen": -0.954155445098877, "logits/rejected": -0.528092086315155, "logps/chosen": -469.5812072753906, "logps/rejected": -451.99322509765625, "loss": 0.0344, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24111679196357727, "rewards/margins": 0.03960014134645462, "rewards/rejected": -0.2807169258594513, "step": 11680 }, { "epoch": 0.76, "learning_rate": 7.960135508227795e-07, "logits/chosen": -0.9125617742538452, "logits/rejected": -0.7902709245681763, "logps/chosen": -508.7157287597656, "logps/rejected": -520.7106323242188, "loss": 0.027, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2148466855287552, "rewards/margins": 0.0763109028339386, "rewards/rejected": -0.291157603263855, "step": 11690 }, { "epoch": 0.77, "learning_rate": 7.91839886056098e-07, "logits/chosen": -1.0659853219985962, "logits/rejected": -0.6218445301055908, "logps/chosen": -521.8982543945312, "logps/rejected": -560.7169189453125, "loss": 0.0196, "rewards/accuracies": 0.625, "rewards/chosen": -0.23813612759113312, "rewards/margins": 0.06938929855823517, "rewards/rejected": -0.3075253963470459, "step": 11700 }, { "epoch": 0.77, "eval_logits/chosen": -0.8164606094360352, "eval_logits/rejected": -0.6984692215919495, "eval_logps/chosen": -448.2765808105469, "eval_logps/rejected": -514.3255615234375, "eval_loss": 0.022854939103126526, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -0.21627163887023926, "eval_rewards/margins": 0.0864420086145401, "eval_rewards/rejected": -0.30271366238594055, "eval_runtime": 715.2052, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 11700 }, { "epoch": 0.77, "learning_rate": 7.876751318893217e-07, "logits/chosen": -0.8110322952270508, "logits/rejected": -0.5777091383934021, "logps/chosen": -436.01220703125, "logps/rejected": -495.97052001953125, "loss": 0.0221, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1981465071439743, "rewards/margins": 0.08154644072055817, "rewards/rejected": -0.27969294786453247, "step": 11710 }, { "epoch": 0.77, "learning_rate": 7.8351931004785e-07, "logits/chosen": -0.4404289722442627, "logits/rejected": -0.6184440851211548, "logps/chosen": -443.473388671875, "logps/rejected": -499.5425720214844, "loss": 0.0223, "rewards/accuracies": 0.5, "rewards/chosen": -0.2348451316356659, "rewards/margins": 0.08095255494117737, "rewards/rejected": -0.31579768657684326, "step": 11720 }, { "epoch": 0.77, "learning_rate": 7.793724422104834e-07, "logits/chosen": -0.8746858835220337, "logits/rejected": -0.781100869178772, "logps/chosen": -438.51025390625, "logps/rejected": -613.78515625, "loss": 0.0305, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22968599200248718, "rewards/margins": 0.1057976484298706, "rewards/rejected": -0.3354836404323578, "step": 11730 }, { "epoch": 0.77, "learning_rate": 7.752345500093184e-07, "logits/chosen": -0.9969593286514282, "logits/rejected": -0.8467572927474976, "logps/chosen": -457.9947204589844, "logps/rejected": -495.57952880859375, "loss": 0.047, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.24783968925476074, "rewards/margins": 0.06734929978847504, "rewards/rejected": -0.3151889741420746, "step": 11740 }, { "epoch": 0.77, "learning_rate": 7.711056550296253e-07, "logits/chosen": -1.038310170173645, "logits/rejected": -0.775715172290802, "logps/chosen": -448.45947265625, "logps/rejected": -510.3683166503906, "loss": 0.0414, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20166563987731934, "rewards/margins": 0.09893393516540527, "rewards/rejected": -0.3005995750427246, "step": 11750 }, { "epoch": 0.77, "learning_rate": 7.669857788097445e-07, "logits/chosen": -0.3451920449733734, "logits/rejected": -0.191893070936203, "logps/chosen": -422.9198303222656, "logps/rejected": -543.1383666992188, "loss": 0.0194, "rewards/accuracies": 0.625, "rewards/chosen": -0.26135388016700745, "rewards/margins": 0.09359373897314072, "rewards/rejected": -0.35494762659072876, "step": 11760 }, { "epoch": 0.77, "learning_rate": 7.628749428409676e-07, "logits/chosen": -0.7092111110687256, "logits/rejected": -0.554137110710144, "logps/chosen": -485.0926208496094, "logps/rejected": -532.4954223632812, "loss": 0.0392, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24820482730865479, "rewards/margins": 0.10615434497594833, "rewards/rejected": -0.35435914993286133, "step": 11770 }, { "epoch": 0.77, "learning_rate": 7.587731685674288e-07, "logits/chosen": -0.9842255711555481, "logits/rejected": -0.9049288630485535, "logps/chosen": -530.26025390625, "logps/rejected": -629.2454833984375, "loss": 0.0088, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2566719651222229, "rewards/margins": 0.0911482572555542, "rewards/rejected": -0.3478202223777771, "step": 11780 }, { "epoch": 0.77, "learning_rate": 7.546804773859931e-07, "logits/chosen": -0.8065023422241211, "logits/rejected": -0.7058273553848267, "logps/chosen": -468.3248596191406, "logps/rejected": -561.8131103515625, "loss": 0.0156, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2412848025560379, "rewards/margins": 0.11342419683933258, "rewards/rejected": -0.3547089695930481, "step": 11790 }, { "epoch": 0.77, "learning_rate": 7.505968906461409e-07, "logits/chosen": -0.4481712281703949, "logits/rejected": -0.8796482086181641, "logps/chosen": -504.87158203125, "logps/rejected": -554.5408935546875, "loss": 0.0164, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2639315724372864, "rewards/margins": 0.07381553202867508, "rewards/rejected": -0.33774709701538086, "step": 11800 }, { "epoch": 0.77, "eval_logits/chosen": -0.7455423474311829, "eval_logits/rejected": -0.6314948797225952, "eval_logps/chosen": -460.1055603027344, "eval_logps/rejected": -524.326904296875, "eval_loss": 0.022844623774290085, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -0.22810059785842896, "eval_rewards/margins": 0.08461443334817886, "eval_rewards/rejected": -0.3127150535583496, "eval_runtime": 716.8373, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 11800 }, { "epoch": 0.77, "learning_rate": 7.465224296498627e-07, "logits/chosen": -0.7608274221420288, "logits/rejected": -0.5792019963264465, "logps/chosen": -463.06103515625, "logps/rejected": -502.1822814941406, "loss": 0.0282, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23010194301605225, "rewards/margins": 0.0802503153681755, "rewards/rejected": -0.31035223603248596, "step": 11810 }, { "epoch": 0.77, "learning_rate": 7.424571156515412e-07, "logits/chosen": -0.8726217150688171, "logits/rejected": -0.6571828126907349, "logps/chosen": -417.160400390625, "logps/rejected": -538.8943481445312, "loss": 0.0298, "rewards/accuracies": 0.625, "rewards/chosen": -0.2394552230834961, "rewards/margins": 0.0990573987364769, "rewards/rejected": -0.3385125994682312, "step": 11820 }, { "epoch": 0.77, "learning_rate": 7.38400969857847e-07, "logits/chosen": -0.5730519890785217, "logits/rejected": -0.6655212640762329, "logps/chosen": -471.81939697265625, "logps/rejected": -610.7142333984375, "loss": 0.0336, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.28867703676223755, "rewards/margins": 0.1357426792383194, "rewards/rejected": -0.42441970109939575, "step": 11830 }, { "epoch": 0.77, "learning_rate": 7.343540134276225e-07, "logits/chosen": -0.7581150531768799, "logits/rejected": -0.7705793380737305, "logps/chosen": -382.58856201171875, "logps/rejected": -450.62774658203125, "loss": 0.0248, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21187695860862732, "rewards/margins": 0.06614295393228531, "rewards/rejected": -0.2780199348926544, "step": 11840 }, { "epoch": 0.78, "learning_rate": 7.303162674717762e-07, "logits/chosen": -0.2548813223838806, "logits/rejected": -0.20554086565971375, "logps/chosen": -475.32366943359375, "logps/rejected": -506.905517578125, "loss": 0.0362, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.26509588956832886, "rewards/margins": 0.08741726726293564, "rewards/rejected": -0.3525131940841675, "step": 11850 }, { "epoch": 0.78, "learning_rate": 7.26287753053167e-07, "logits/chosen": -0.7853603363037109, "logits/rejected": -0.5816177129745483, "logps/chosen": -540.7530517578125, "logps/rejected": -633.2919311523438, "loss": 0.0196, "rewards/accuracies": 0.75, "rewards/chosen": -0.2757769227027893, "rewards/margins": 0.09380709379911423, "rewards/rejected": -0.36958402395248413, "step": 11860 }, { "epoch": 0.78, "learning_rate": 7.222684911865013e-07, "logits/chosen": -0.6901577711105347, "logits/rejected": -0.629686176776886, "logps/chosen": -444.2909240722656, "logps/rejected": -568.2418212890625, "loss": 0.0392, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23712222278118134, "rewards/margins": 0.10226553678512573, "rewards/rejected": -0.3393877446651459, "step": 11870 }, { "epoch": 0.78, "learning_rate": 7.182585028382166e-07, "logits/chosen": -0.868871808052063, "logits/rejected": -0.7003182172775269, "logps/chosen": -535.326171875, "logps/rejected": -596.7152709960938, "loss": 0.0315, "rewards/accuracies": 0.75, "rewards/chosen": -0.2591745853424072, "rewards/margins": 0.09383906424045563, "rewards/rejected": -0.35301369428634644, "step": 11880 }, { "epoch": 0.78, "learning_rate": 7.142578089263769e-07, "logits/chosen": -0.9448205232620239, "logits/rejected": -1.0499770641326904, "logps/chosen": -559.8673706054688, "logps/rejected": -609.140869140625, "loss": 0.0281, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2414388358592987, "rewards/margins": 0.10807342827320099, "rewards/rejected": -0.3495122790336609, "step": 11890 }, { "epoch": 0.78, "learning_rate": 7.102664303205611e-07, "logits/chosen": -0.6312407851219177, "logits/rejected": -0.8986209630966187, "logps/chosen": -483.555419921875, "logps/rejected": -549.9271240234375, "loss": 0.0204, "rewards/accuracies": 0.625, "rewards/chosen": -0.2629929482936859, "rewards/margins": 0.08424819260835648, "rewards/rejected": -0.3472411632537842, "step": 11900 }, { "epoch": 0.78, "eval_logits/chosen": -0.7203341722488403, "eval_logits/rejected": -0.6060306429862976, "eval_logps/chosen": -482.67681884765625, "eval_logps/rejected": -552.1953735351562, "eval_loss": 0.022822316735982895, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.2506718337535858, "eval_rewards/margins": 0.08991160243749619, "eval_rewards/rejected": -0.340583473443985, "eval_runtime": 716.6944, "eval_samples_per_second": 2.791, "eval_steps_per_second": 1.395, "step": 11900 }, { "epoch": 0.78, "learning_rate": 7.062843878417566e-07, "logits/chosen": -1.0795499086380005, "logits/rejected": -0.8569973707199097, "logps/chosen": -428.8421936035156, "logps/rejected": -489.1209411621094, "loss": 0.0237, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20540961623191833, "rewards/margins": 0.09332293272018433, "rewards/rejected": -0.29873257875442505, "step": 11910 }, { "epoch": 0.78, "learning_rate": 7.023117022622458e-07, "logits/chosen": -0.8484246134757996, "logits/rejected": -0.5322074890136719, "logps/chosen": -519.0354614257812, "logps/rejected": -590.203857421875, "loss": 0.0243, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28052017092704773, "rewards/margins": 0.09443672746419907, "rewards/rejected": -0.374956876039505, "step": 11920 }, { "epoch": 0.78, "learning_rate": 6.983483943055042e-07, "logits/chosen": -0.8660456538200378, "logits/rejected": -0.7042397260665894, "logps/chosen": -516.9036865234375, "logps/rejected": -528.4324951171875, "loss": 0.033, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.236437126994133, "rewards/margins": 0.06830186396837234, "rewards/rejected": -0.30473896861076355, "step": 11930 }, { "epoch": 0.78, "learning_rate": 6.943944846460859e-07, "logits/chosen": -0.5017508268356323, "logits/rejected": -0.5840286612510681, "logps/chosen": -433.42974853515625, "logps/rejected": -444.52435302734375, "loss": 0.0244, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21467538177967072, "rewards/margins": 0.057836759835481644, "rewards/rejected": -0.27251213788986206, "step": 11940 }, { "epoch": 0.78, "learning_rate": 6.904499939095225e-07, "logits/chosen": -0.830405592918396, "logits/rejected": -0.7617335915565491, "logps/chosen": -435.3990173339844, "logps/rejected": -532.5736083984375, "loss": 0.013, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21305187046527863, "rewards/margins": 0.11250293254852295, "rewards/rejected": -0.3255547881126404, "step": 11950 }, { "epoch": 0.78, "learning_rate": 6.865149426722079e-07, "logits/chosen": -0.6073800921440125, "logits/rejected": -0.6010125875473022, "logps/chosen": -539.557861328125, "logps/rejected": -611.2875366210938, "loss": 0.0118, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26837271451950073, "rewards/margins": 0.1024237647652626, "rewards/rejected": -0.3707965016365051, "step": 11960 }, { "epoch": 0.78, "learning_rate": 6.825893514612985e-07, "logits/chosen": -0.5490539073944092, "logits/rejected": -0.4451957643032074, "logps/chosen": -452.33001708984375, "logps/rejected": -557.008056640625, "loss": 0.0373, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21576321125030518, "rewards/margins": 0.10686157643795013, "rewards/rejected": -0.3226248323917389, "step": 11970 }, { "epoch": 0.78, "learning_rate": 6.786732407546001e-07, "logits/chosen": -0.6100732684135437, "logits/rejected": -0.4643806517124176, "logps/chosen": -416.3296813964844, "logps/rejected": -459.27545166015625, "loss": 0.0266, "rewards/accuracies": 0.625, "rewards/chosen": -0.2154485285282135, "rewards/margins": 0.08266131579875946, "rewards/rejected": -0.29810982942581177, "step": 11980 }, { "epoch": 0.78, "learning_rate": 6.747666309804654e-07, "logits/chosen": -1.1219953298568726, "logits/rejected": -0.8012224435806274, "logps/chosen": -515.18212890625, "logps/rejected": -505.71270751953125, "loss": 0.021, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2275048941373825, "rewards/margins": 0.07006601989269257, "rewards/rejected": -0.2975709140300751, "step": 11990 }, { "epoch": 0.79, "learning_rate": 6.708695425176831e-07, "logits/chosen": -0.7982760667800903, "logits/rejected": -0.5225854516029358, "logps/chosen": -422.2466735839844, "logps/rejected": -530.6298217773438, "loss": 0.0332, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2532079517841339, "rewards/margins": 0.08762778341770172, "rewards/rejected": -0.3408357501029968, "step": 12000 }, { "epoch": 0.79, "eval_logits/chosen": -0.8177240490913391, "eval_logits/rejected": -0.699147641658783, "eval_logps/chosen": -454.897705078125, "eval_logps/rejected": -521.051025390625, "eval_loss": 0.02275935374200344, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -0.22289280593395233, "eval_rewards/margins": 0.08654629439115524, "eval_rewards/rejected": -0.30943912267684937, "eval_runtime": 714.8237, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 12000 }, { "epoch": 0.79, "learning_rate": 6.669819956953768e-07, "logits/chosen": -0.5398935675621033, "logits/rejected": -0.6451722383499146, "logps/chosen": -378.42083740234375, "logps/rejected": -471.20318603515625, "loss": 0.0091, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20808963477611542, "rewards/margins": 0.08424235880374908, "rewards/rejected": -0.2923319935798645, "step": 12010 }, { "epoch": 0.79, "learning_rate": 6.631040107928957e-07, "logits/chosen": -1.2210640907287598, "logits/rejected": -0.6192626357078552, "logps/chosen": -506.04461669921875, "logps/rejected": -479.8175354003906, "loss": 0.0326, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2378603219985962, "rewards/margins": 0.07007137686014175, "rewards/rejected": -0.30793172121047974, "step": 12020 }, { "epoch": 0.79, "learning_rate": 6.592356080397072e-07, "logits/chosen": -0.9121416211128235, "logits/rejected": -0.6956174969673157, "logps/chosen": -439.8330993652344, "logps/rejected": -481.54620361328125, "loss": 0.0274, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21154391765594482, "rewards/margins": 0.09817642718553543, "rewards/rejected": -0.30972033739089966, "step": 12030 }, { "epoch": 0.79, "learning_rate": 6.553768076152963e-07, "logits/chosen": -0.5977886915206909, "logits/rejected": -0.7797805666923523, "logps/chosen": -361.0947265625, "logps/rejected": -513.20849609375, "loss": 0.0488, "rewards/accuracies": 0.75, "rewards/chosen": -0.20299717783927917, "rewards/margins": 0.12500973045825958, "rewards/rejected": -0.32800689339637756, "step": 12040 }, { "epoch": 0.79, "learning_rate": 6.51527629649055e-07, "logits/chosen": -1.1164342164993286, "logits/rejected": -0.9199824333190918, "logps/chosen": -484.06280517578125, "logps/rejected": -505.15325927734375, "loss": 0.0176, "rewards/accuracies": 0.625, "rewards/chosen": -0.23047968745231628, "rewards/margins": 0.05023752525448799, "rewards/rejected": -0.28071725368499756, "step": 12050 }, { "epoch": 0.79, "learning_rate": 6.476880942201824e-07, "logits/chosen": -1.3156317472457886, "logits/rejected": -0.9061687588691711, "logps/chosen": -406.8352966308594, "logps/rejected": -434.9729919433594, "loss": 0.0124, "rewards/accuracies": 0.625, "rewards/chosen": -0.16549763083457947, "rewards/margins": 0.08676186949014664, "rewards/rejected": -0.2522595226764679, "step": 12060 }, { "epoch": 0.79, "learning_rate": 6.438582213575748e-07, "logits/chosen": -0.9157454371452332, "logits/rejected": -1.0080962181091309, "logps/chosen": -423.99932861328125, "logps/rejected": -521.3067626953125, "loss": 0.0303, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1940828114748001, "rewards/margins": 0.08616189658641815, "rewards/rejected": -0.2802446782588959, "step": 12070 }, { "epoch": 0.79, "learning_rate": 6.400380310397267e-07, "logits/chosen": -0.9054854512214661, "logits/rejected": -0.5949335694313049, "logps/chosen": -434.8663635253906, "logps/rejected": -517.9624633789062, "loss": 0.0173, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.20275601744651794, "rewards/margins": 0.05168426036834717, "rewards/rejected": -0.2544403076171875, "step": 12080 }, { "epoch": 0.79, "learning_rate": 6.362275431946202e-07, "logits/chosen": -0.7275618314743042, "logits/rejected": -0.7322520017623901, "logps/chosen": -433.801513671875, "logps/rejected": -492.52197265625, "loss": 0.0453, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1974479705095291, "rewards/margins": 0.052672289311885834, "rewards/rejected": -0.25012025237083435, "step": 12090 }, { "epoch": 0.79, "learning_rate": 6.324267776996285e-07, "logits/chosen": -1.1647471189498901, "logits/rejected": -0.6961271166801453, "logps/chosen": -614.8536376953125, "logps/rejected": -612.9824829101562, "loss": 0.0127, "rewards/accuracies": 0.75, "rewards/chosen": -0.2402745932340622, "rewards/margins": 0.12010933458805084, "rewards/rejected": -0.36038392782211304, "step": 12100 }, { "epoch": 0.79, "eval_logits/chosen": -0.8830052614212036, "eval_logits/rejected": -0.7612468004226685, "eval_logps/chosen": -434.7568054199219, "eval_logps/rejected": -498.7012939453125, "eval_loss": 0.02273384854197502, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.20275187492370605, "eval_rewards/margins": 0.0843375101685524, "eval_rewards/rejected": -0.28708934783935547, "eval_runtime": 714.8334, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 12100 }, { "epoch": 0.79, "learning_rate": 6.286357543814045e-07, "logits/chosen": -0.7750638127326965, "logits/rejected": -0.8950562477111816, "logps/chosen": -393.9158935546875, "logps/rejected": -573.3221435546875, "loss": 0.0447, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20038607716560364, "rewards/margins": 0.11139500141143799, "rewards/rejected": -0.3117810785770416, "step": 12110 }, { "epoch": 0.79, "learning_rate": 6.248544930157838e-07, "logits/chosen": -0.9617152214050293, "logits/rejected": -0.8115730285644531, "logps/chosen": -401.9766540527344, "logps/rejected": -506.9098205566406, "loss": 0.0307, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2178911417722702, "rewards/margins": 0.12271358072757721, "rewards/rejected": -0.3406047224998474, "step": 12120 }, { "epoch": 0.79, "learning_rate": 6.21083013327678e-07, "logits/chosen": -0.8542438745498657, "logits/rejected": -0.8073781132698059, "logps/chosen": -472.4697265625, "logps/rejected": -475.1494140625, "loss": 0.0216, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16822898387908936, "rewards/margins": 0.05860542133450508, "rewards/rejected": -0.22683438658714294, "step": 12130 }, { "epoch": 0.79, "learning_rate": 6.17321334990973e-07, "logits/chosen": -1.0390619039535522, "logits/rejected": -0.7560518980026245, "logps/chosen": -383.1478576660156, "logps/rejected": -410.5859375, "loss": 0.0149, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18234333395957947, "rewards/margins": 0.06267540901899338, "rewards/rejected": -0.24501876533031464, "step": 12140 }, { "epoch": 0.79, "learning_rate": 6.135694776284243e-07, "logits/chosen": -1.260353446006775, "logits/rejected": -0.7988125085830688, "logps/chosen": -442.739013671875, "logps/rejected": -484.9513244628906, "loss": 0.0289, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17194800078868866, "rewards/margins": 0.10698451846837997, "rewards/rejected": -0.2789325416088104, "step": 12150 }, { "epoch": 0.8, "learning_rate": 6.098274608115595e-07, "logits/chosen": -1.0702807903289795, "logits/rejected": -0.6633713841438293, "logps/chosen": -363.2427062988281, "logps/rejected": -370.0378112792969, "loss": 0.0687, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1575920581817627, "rewards/margins": 0.03542754426598549, "rewards/rejected": -0.19301962852478027, "step": 12160 }, { "epoch": 0.8, "learning_rate": 6.060953040605697e-07, "logits/chosen": -1.1180672645568848, "logits/rejected": -0.7205093502998352, "logps/chosen": -493.958740234375, "logps/rejected": -520.9921875, "loss": 0.0093, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15482911467552185, "rewards/margins": 0.09692830592393875, "rewards/rejected": -0.25175741314888, "step": 12170 }, { "epoch": 0.8, "learning_rate": 6.023730268442144e-07, "logits/chosen": -0.8893157839775085, "logits/rejected": -0.7367485761642456, "logps/chosen": -390.333984375, "logps/rejected": -476.3172912597656, "loss": 0.0099, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18551313877105713, "rewards/margins": 0.11442817747592926, "rewards/rejected": -0.2999412715435028, "step": 12180 }, { "epoch": 0.8, "learning_rate": 5.986606485797131e-07, "logits/chosen": -0.8810988664627075, "logits/rejected": -0.9880490303039551, "logps/chosen": -354.8076477050781, "logps/rejected": -428.9920349121094, "loss": 0.0289, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.15491709113121033, "rewards/margins": 0.06911829859018326, "rewards/rejected": -0.22403541207313538, "step": 12190 }, { "epoch": 0.8, "learning_rate": 5.949581886326511e-07, "logits/chosen": -0.8513473272323608, "logits/rejected": -1.0082075595855713, "logps/chosen": -444.63873291015625, "logps/rejected": -473.0108337402344, "loss": 0.0325, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14684663712978363, "rewards/margins": 0.05728645995259285, "rewards/rejected": -0.20413312315940857, "step": 12200 }, { "epoch": 0.8, "eval_logits/chosen": -1.0058047771453857, "eval_logits/rejected": -0.877938985824585, "eval_logps/chosen": -400.7754211425781, "eval_logps/rejected": -462.2358093261719, "eval_loss": 0.022773578763008118, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -0.1687704622745514, "eval_rewards/margins": 0.0818534567952156, "eval_rewards/rejected": -0.2506239116191864, "eval_runtime": 714.7337, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 12200 }, { "epoch": 0.8, "learning_rate": 5.912656663168717e-07, "logits/chosen": -1.1726329326629639, "logits/rejected": -1.1064045429229736, "logps/chosen": -377.85443115234375, "logps/rejected": -434.265380859375, "loss": 0.0188, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1538451462984085, "rewards/margins": 0.06278332322835922, "rewards/rejected": -0.21662846207618713, "step": 12210 }, { "epoch": 0.8, "learning_rate": 5.875831008943817e-07, "logits/chosen": -0.8785686492919922, "logits/rejected": -0.8450040817260742, "logps/chosen": -346.0321350097656, "logps/rejected": -375.39605712890625, "loss": 0.0291, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16838808357715607, "rewards/margins": 0.05196414142847061, "rewards/rejected": -0.22035221755504608, "step": 12220 }, { "epoch": 0.8, "learning_rate": 5.839105115752442e-07, "logits/chosen": -0.8276346325874329, "logits/rejected": -0.7338351011276245, "logps/chosen": -434.658203125, "logps/rejected": -472.2508239746094, "loss": 0.0208, "rewards/accuracies": 0.625, "rewards/chosen": -0.21168074011802673, "rewards/margins": 0.07760036736726761, "rewards/rejected": -0.28928110003471375, "step": 12230 }, { "epoch": 0.8, "learning_rate": 5.802479175174855e-07, "logits/chosen": -0.8685129284858704, "logits/rejected": -0.8740677833557129, "logps/chosen": -363.8148498535156, "logps/rejected": -453.449951171875, "loss": 0.0109, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19782057404518127, "rewards/margins": 0.08370805531740189, "rewards/rejected": -0.28152862191200256, "step": 12240 }, { "epoch": 0.8, "learning_rate": 5.765953378269901e-07, "logits/chosen": -1.0801695585250854, "logits/rejected": -1.1017118692398071, "logps/chosen": -397.7713317871094, "logps/rejected": -545.7034301757812, "loss": 0.0262, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1907779723405838, "rewards/margins": 0.11688830703496933, "rewards/rejected": -0.30766627192497253, "step": 12250 }, { "epoch": 0.8, "learning_rate": 5.729527915574037e-07, "logits/chosen": -0.8680895566940308, "logits/rejected": -0.9653156399726868, "logps/chosen": -402.61236572265625, "logps/rejected": -497.59747314453125, "loss": 0.0251, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.182193785905838, "rewards/margins": 0.09464605152606964, "rewards/rejected": -0.27683982253074646, "step": 12260 }, { "epoch": 0.8, "learning_rate": 5.693202977100304e-07, "logits/chosen": -0.8371729850769043, "logits/rejected": -0.7371872663497925, "logps/chosen": -358.5738830566406, "logps/rejected": -436.27886962890625, "loss": 0.0234, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18842875957489014, "rewards/margins": 0.08104129135608673, "rewards/rejected": -0.2694700360298157, "step": 12270 }, { "epoch": 0.8, "learning_rate": 5.656978752337389e-07, "logits/chosen": -0.9971674084663391, "logits/rejected": -1.0150810480117798, "logps/chosen": -417.9154357910156, "logps/rejected": -540.6372680664062, "loss": 0.0272, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21826031804084778, "rewards/margins": 0.12132208049297333, "rewards/rejected": -0.3395823836326599, "step": 12280 }, { "epoch": 0.8, "learning_rate": 5.620855430248581e-07, "logits/chosen": -0.7814995646476746, "logits/rejected": -0.8672903776168823, "logps/chosen": -300.8846435546875, "logps/rejected": -414.7732849121094, "loss": 0.0246, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13888958096504211, "rewards/margins": 0.11606705188751221, "rewards/rejected": -0.2549566626548767, "step": 12290 }, { "epoch": 0.8, "learning_rate": 5.584833199270837e-07, "logits/chosen": -1.1541928052902222, "logits/rejected": -0.8122884631156921, "logps/chosen": -428.8125915527344, "logps/rejected": -499.4772033691406, "loss": 0.0312, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2001802921295166, "rewards/margins": 0.07835780084133148, "rewards/rejected": -0.2785380780696869, "step": 12300 }, { "epoch": 0.8, "eval_logits/chosen": -0.9771337509155273, "eval_logits/rejected": -0.8499288558959961, "eval_logps/chosen": -410.9584655761719, "eval_logps/rejected": -475.4502868652344, "eval_loss": 0.02263132855296135, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -0.17895352840423584, "eval_rewards/margins": 0.0848848894238472, "eval_rewards/rejected": -0.26383841037750244, "eval_runtime": 715.9104, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 12300 }, { "epoch": 0.81, "learning_rate": 5.548912247313742e-07, "logits/chosen": -1.3977093696594238, "logits/rejected": -1.096999168395996, "logps/chosen": -480.2452087402344, "logps/rejected": -501.95843505859375, "loss": 0.0186, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19644519686698914, "rewards/margins": 0.0692889466881752, "rewards/rejected": -0.26573416590690613, "step": 12310 }, { "epoch": 0.81, "learning_rate": 5.513092761758596e-07, "logits/chosen": -1.2774897813796997, "logits/rejected": -1.0175151824951172, "logps/chosen": -486.94268798828125, "logps/rejected": -468.3524475097656, "loss": 0.0238, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21573276817798615, "rewards/margins": 0.0483907125890255, "rewards/rejected": -0.26412349939346313, "step": 12320 }, { "epoch": 0.81, "learning_rate": 5.477374929457363e-07, "logits/chosen": -1.2530696392059326, "logits/rejected": -1.0140804052352905, "logps/chosen": -386.936767578125, "logps/rejected": -430.3931579589844, "loss": 0.014, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18487557768821716, "rewards/margins": 0.06262831389904022, "rewards/rejected": -0.24750392138957977, "step": 12330 }, { "epoch": 0.81, "learning_rate": 5.441758936731772e-07, "logits/chosen": -1.0178519487380981, "logits/rejected": -0.7181859016418457, "logps/chosen": -421.2232971191406, "logps/rejected": -483.2220153808594, "loss": 0.022, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18587802350521088, "rewards/margins": 0.07978905737400055, "rewards/rejected": -0.26566705107688904, "step": 12340 }, { "epoch": 0.81, "learning_rate": 5.406244969372273e-07, "logits/chosen": -1.027550458908081, "logits/rejected": -0.8534027338027954, "logps/chosen": -383.47186279296875, "logps/rejected": -526.52294921875, "loss": 0.0217, "rewards/accuracies": 0.75, "rewards/chosen": -0.1837489753961563, "rewards/margins": 0.136549174785614, "rewards/rejected": -0.32029813528060913, "step": 12350 }, { "epoch": 0.81, "learning_rate": 5.370833212637122e-07, "logits/chosen": -0.7744110822677612, "logits/rejected": -0.669444739818573, "logps/chosen": -382.06842041015625, "logps/rejected": -457.8172912597656, "loss": 0.018, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1635574847459793, "rewards/margins": 0.08953403681516647, "rewards/rejected": -0.2530915141105652, "step": 12360 }, { "epoch": 0.81, "learning_rate": 5.335523851251392e-07, "logits/chosen": -1.047857642173767, "logits/rejected": -0.9234442710876465, "logps/chosen": -410.169189453125, "logps/rejected": -483.4208984375, "loss": 0.032, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20403525233268738, "rewards/margins": 0.09404997527599335, "rewards/rejected": -0.29808521270751953, "step": 12370 }, { "epoch": 0.81, "learning_rate": 5.300317069406003e-07, "logits/chosen": -1.0623254776000977, "logits/rejected": -0.8672064542770386, "logps/chosen": -323.99957275390625, "logps/rejected": -435.005859375, "loss": 0.0113, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16325370967388153, "rewards/margins": 0.1072395071387291, "rewards/rejected": -0.2704932391643524, "step": 12380 }, { "epoch": 0.81, "learning_rate": 5.265213050756782e-07, "logits/chosen": -1.2872618436813354, "logits/rejected": -1.0191078186035156, "logps/chosen": -397.19378662109375, "logps/rejected": -492.97357177734375, "loss": 0.0194, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1651425063610077, "rewards/margins": 0.09791094809770584, "rewards/rejected": -0.26305344700813293, "step": 12390 }, { "epoch": 0.81, "learning_rate": 5.230211978423477e-07, "logits/chosen": -1.1180744171142578, "logits/rejected": -1.0045652389526367, "logps/chosen": -408.6681823730469, "logps/rejected": -449.1815490722656, "loss": 0.0288, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18986797332763672, "rewards/margins": 0.05568217113614082, "rewards/rejected": -0.24555012583732605, "step": 12400 }, { "epoch": 0.81, "eval_logits/chosen": -0.9853109121322632, "eval_logits/rejected": -0.8574509024620056, "eval_logps/chosen": -417.2077331542969, "eval_logps/rejected": -482.1120300292969, "eval_loss": 0.022581050172448158, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -0.18520276248455048, "eval_rewards/margins": 0.0852973535656929, "eval_rewards/rejected": -0.27050015330314636, "eval_runtime": 715.4983, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.398, "step": 12400 }, { "epoch": 0.81, "learning_rate": 5.195314034988835e-07, "logits/chosen": -1.2784175872802734, "logits/rejected": -1.0826590061187744, "logps/chosen": -370.7709045410156, "logps/rejected": -395.14276123046875, "loss": 0.04, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15148386359214783, "rewards/margins": 0.0914110466837883, "rewards/rejected": -0.24289488792419434, "step": 12410 }, { "epoch": 0.81, "learning_rate": 5.160519402497616e-07, "logits/chosen": -1.0334594249725342, "logits/rejected": -0.9419137835502625, "logps/chosen": -430.1622009277344, "logps/rejected": -520.0856323242188, "loss": 0.0373, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20010820031166077, "rewards/margins": 0.09398336708545685, "rewards/rejected": -0.2940915524959564, "step": 12420 }, { "epoch": 0.81, "learning_rate": 5.125828262455679e-07, "logits/chosen": -1.0224422216415405, "logits/rejected": -0.8301979899406433, "logps/chosen": -428.63726806640625, "logps/rejected": -492.8619689941406, "loss": 0.02, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1729241907596588, "rewards/margins": 0.09456421434879303, "rewards/rejected": -0.26748839020729065, "step": 12430 }, { "epoch": 0.81, "learning_rate": 5.091240795828992e-07, "logits/chosen": -0.7814083099365234, "logits/rejected": -0.711333155632019, "logps/chosen": -371.58026123046875, "logps/rejected": -484.06060791015625, "loss": 0.0488, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16908983886241913, "rewards/margins": 0.09264969825744629, "rewards/rejected": -0.2617395222187042, "step": 12440 }, { "epoch": 0.81, "learning_rate": 5.056757183042732e-07, "logits/chosen": -1.0357064008712769, "logits/rejected": -0.8471451997756958, "logps/chosen": -420.8604431152344, "logps/rejected": -493.57965087890625, "loss": 0.0124, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18730428814888, "rewards/margins": 0.09259669482707977, "rewards/rejected": -0.2799009680747986, "step": 12450 }, { "epoch": 0.82, "learning_rate": 5.022377603980308e-07, "logits/chosen": -1.2519150972366333, "logits/rejected": -0.8020488023757935, "logps/chosen": -436.32794189453125, "logps/rejected": -466.385498046875, "loss": 0.0219, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.187043234705925, "rewards/margins": 0.09652890264987946, "rewards/rejected": -0.28357213735580444, "step": 12460 }, { "epoch": 0.82, "learning_rate": 4.988102237982454e-07, "logits/chosen": -1.0746042728424072, "logits/rejected": -0.8406085968017578, "logps/chosen": -418.4119567871094, "logps/rejected": -434.460693359375, "loss": 0.0175, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19623149931430817, "rewards/margins": 0.05662398412823677, "rewards/rejected": -0.25285547971725464, "step": 12470 }, { "epoch": 0.82, "learning_rate": 4.953931263846251e-07, "logits/chosen": -1.057842493057251, "logits/rejected": -0.8594833612442017, "logps/chosen": -470.97216796875, "logps/rejected": -527.2671508789062, "loss": 0.0325, "rewards/accuracies": 0.75, "rewards/chosen": -0.20926328003406525, "rewards/margins": 0.10025568306446075, "rewards/rejected": -0.3095189332962036, "step": 12480 }, { "epoch": 0.82, "learning_rate": 4.919864859824266e-07, "logits/chosen": -0.9544647336006165, "logits/rejected": -0.9563530683517456, "logps/chosen": -430.4136657714844, "logps/rejected": -465.03692626953125, "loss": 0.0316, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19297946989536285, "rewards/margins": 0.07965844124555588, "rewards/rejected": -0.27263790369033813, "step": 12490 }, { "epoch": 0.82, "learning_rate": 4.885903203623532e-07, "logits/chosen": -1.4066529273986816, "logits/rejected": -0.8286053538322449, "logps/chosen": -448.26409912109375, "logps/rejected": -481.58905029296875, "loss": 0.0124, "rewards/accuracies": 0.75, "rewards/chosen": -0.16325023770332336, "rewards/margins": 0.09709838032722473, "rewards/rejected": -0.2603486180305481, "step": 12500 }, { "epoch": 0.82, "eval_logits/chosen": -1.0003312826156616, "eval_logits/rejected": -0.8719565868377686, "eval_logps/chosen": -414.9066467285156, "eval_logps/rejected": -478.6212158203125, "eval_loss": 0.022660432383418083, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -0.18290171027183533, "eval_rewards/margins": 0.0841076448559761, "eval_rewards/rejected": -0.26700934767723083, "eval_runtime": 716.7678, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 12500 }, { "epoch": 0.82, "learning_rate": 4.852046472404695e-07, "logits/chosen": -1.0721367597579956, "logits/rejected": -0.6387485265731812, "logps/chosen": -438.8202209472656, "logps/rejected": -397.42254638671875, "loss": 0.0341, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14093145728111267, "rewards/margins": 0.08427577465772629, "rewards/rejected": -0.22520723938941956, "step": 12510 }, { "epoch": 0.82, "learning_rate": 4.818294842781035e-07, "logits/chosen": -1.2152434587478638, "logits/rejected": -0.8837118148803711, "logps/chosen": -382.65692138671875, "logps/rejected": -459.76922607421875, "loss": 0.0279, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15644845366477966, "rewards/margins": 0.12947024405002594, "rewards/rejected": -0.285918653011322, "step": 12520 }, { "epoch": 0.82, "learning_rate": 4.784648490817601e-07, "logits/chosen": -1.0936287641525269, "logits/rejected": -0.9688494801521301, "logps/chosen": -386.1371765136719, "logps/rejected": -404.65875244140625, "loss": 0.0378, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1660304069519043, "rewards/margins": 0.061780087649822235, "rewards/rejected": -0.22781050205230713, "step": 12530 }, { "epoch": 0.82, "learning_rate": 4.751107592030235e-07, "logits/chosen": -1.0623115301132202, "logits/rejected": -0.7822055220603943, "logps/chosen": -318.0442810058594, "logps/rejected": -402.30645751953125, "loss": 0.0228, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14819252490997314, "rewards/margins": 0.10136213153600693, "rewards/rejected": -0.24955467879772186, "step": 12540 }, { "epoch": 0.82, "learning_rate": 4.717672321384703e-07, "logits/chosen": -1.016322374343872, "logits/rejected": -0.5345064401626587, "logps/chosen": -394.35369873046875, "logps/rejected": -439.4088439941406, "loss": 0.0244, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17110803723335266, "rewards/margins": 0.08725089579820633, "rewards/rejected": -0.2583589255809784, "step": 12550 }, { "epoch": 0.82, "learning_rate": 4.684342853295748e-07, "logits/chosen": -0.8431817889213562, "logits/rejected": -0.840168833732605, "logps/chosen": -357.74639892578125, "logps/rejected": -444.4488220214844, "loss": 0.0274, "rewards/accuracies": 0.625, "rewards/chosen": -0.17004966735839844, "rewards/margins": 0.09062638878822327, "rewards/rejected": -0.2606760859489441, "step": 12560 }, { "epoch": 0.82, "learning_rate": 4.651119361626213e-07, "logits/chosen": -1.4477976560592651, "logits/rejected": -0.9536776542663574, "logps/chosen": -373.7337646484375, "logps/rejected": -404.3658142089844, "loss": 0.021, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13261517882347107, "rewards/margins": 0.07686091214418411, "rewards/rejected": -0.20947608351707458, "step": 12570 }, { "epoch": 0.82, "learning_rate": 4.618002019686091e-07, "logits/chosen": -1.0770597457885742, "logits/rejected": -1.092034101486206, "logps/chosen": -442.4501953125, "logps/rejected": -462.3739318847656, "loss": 0.0218, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16323785483837128, "rewards/margins": 0.07655976712703705, "rewards/rejected": -0.23979762196540833, "step": 12580 }, { "epoch": 0.82, "learning_rate": 4.5849910002316757e-07, "logits/chosen": -1.0524985790252686, "logits/rejected": -0.934877872467041, "logps/chosen": -384.8161315917969, "logps/rejected": -443.95166015625, "loss": 0.029, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20430400967597961, "rewards/margins": 0.09239979088306427, "rewards/rejected": -0.2967037856578827, "step": 12590 }, { "epoch": 0.82, "learning_rate": 4.5520864754645984e-07, "logits/chosen": -1.3697372674942017, "logits/rejected": -1.143179178237915, "logps/chosen": -456.32476806640625, "logps/rejected": -477.613525390625, "loss": 0.0164, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1749749779701233, "rewards/margins": 0.06511374562978745, "rewards/rejected": -0.24008873105049133, "step": 12600 }, { "epoch": 0.82, "eval_logits/chosen": -1.0030584335327148, "eval_logits/rejected": -0.8740435242652893, "eval_logps/chosen": -418.0470275878906, "eval_logps/rejected": -482.15838623046875, "eval_loss": 0.022645851597189903, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -0.18604205548763275, "eval_rewards/margins": 0.08450444042682648, "eval_rewards/rejected": -0.27054649591445923, "eval_runtime": 716.8324, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 12600 }, { "epoch": 0.83, "learning_rate": 4.5192886170309896e-07, "logits/chosen": -0.9266126751899719, "logits/rejected": -0.8926378488540649, "logps/chosen": -397.9290771484375, "logps/rejected": -444.92889404296875, "loss": 0.0193, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20716063678264618, "rewards/margins": 0.04681571573019028, "rewards/rejected": -0.25397634506225586, "step": 12610 }, { "epoch": 0.83, "learning_rate": 4.486597596020548e-07, "logits/chosen": -0.8484398722648621, "logits/rejected": -0.9409611821174622, "logps/chosen": -421.64593505859375, "logps/rejected": -457.50848388671875, "loss": 0.02, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20206467807292938, "rewards/margins": 0.07418427616357803, "rewards/rejected": -0.276248961687088, "step": 12620 }, { "epoch": 0.83, "learning_rate": 4.454013582965644e-07, "logits/chosen": -0.7837668061256409, "logits/rejected": -0.5988458395004272, "logps/chosen": -480.2074279785156, "logps/rejected": -486.00274658203125, "loss": 0.018, "rewards/accuracies": 0.625, "rewards/chosen": -0.21675066649913788, "rewards/margins": 0.05623869225382805, "rewards/rejected": -0.2729893922805786, "step": 12630 }, { "epoch": 0.83, "learning_rate": 4.4215367478404605e-07, "logits/chosen": -0.8874751329421997, "logits/rejected": -0.8098077774047852, "logps/chosen": -479.22003173828125, "logps/rejected": -576.7542114257812, "loss": 0.0431, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20846012234687805, "rewards/margins": 0.07316248118877411, "rewards/rejected": -0.28162258863449097, "step": 12640 }, { "epoch": 0.83, "learning_rate": 4.389167260060068e-07, "logits/chosen": -0.9569392204284668, "logits/rejected": -0.9504988789558411, "logps/chosen": -371.9100646972656, "logps/rejected": -450.93829345703125, "loss": 0.0167, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16321979463100433, "rewards/margins": 0.1172027438879013, "rewards/rejected": -0.28042253851890564, "step": 12650 }, { "epoch": 0.83, "learning_rate": 4.356905288479579e-07, "logits/chosen": -0.9845812916755676, "logits/rejected": -0.724572479724884, "logps/chosen": -411.0823669433594, "logps/rejected": -521.138671875, "loss": 0.0255, "rewards/accuracies": 0.75, "rewards/chosen": -0.18456149101257324, "rewards/margins": 0.1388203501701355, "rewards/rejected": -0.32338184118270874, "step": 12660 }, { "epoch": 0.83, "learning_rate": 4.3247510013932377e-07, "logits/chosen": -0.6843754053115845, "logits/rejected": -0.7799949645996094, "logps/chosen": -447.1705017089844, "logps/rejected": -546.6895751953125, "loss": 0.0352, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19498208165168762, "rewards/margins": 0.09969909489154816, "rewards/rejected": -0.2946811616420746, "step": 12670 }, { "epoch": 0.83, "learning_rate": 4.2927045665335594e-07, "logits/chosen": -0.41060298681259155, "logits/rejected": -0.578055739402771, "logps/chosen": -374.926513671875, "logps/rejected": -457.4046325683594, "loss": 0.018, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21004876494407654, "rewards/margins": 0.086033396422863, "rewards/rejected": -0.29608216881752014, "step": 12680 }, { "epoch": 0.83, "learning_rate": 4.260766151070439e-07, "logits/chosen": -0.7426326274871826, "logits/rejected": -0.7203096747398376, "logps/chosen": -432.20257568359375, "logps/rejected": -512.4547119140625, "loss": 0.0264, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20776908099651337, "rewards/margins": 0.09151271730661392, "rewards/rejected": -0.2992818057537079, "step": 12690 }, { "epoch": 0.83, "learning_rate": 4.228935921610308e-07, "logits/chosen": -1.1275103092193604, "logits/rejected": -0.8781867027282715, "logps/chosen": -416.4281311035156, "logps/rejected": -412.3861389160156, "loss": 0.0123, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1527094691991806, "rewards/margins": 0.06000950187444687, "rewards/rejected": -0.21271896362304688, "step": 12700 }, { "epoch": 0.83, "eval_logits/chosen": -1.0219651460647583, "eval_logits/rejected": -0.8919270038604736, "eval_logps/chosen": -409.658447265625, "eval_logps/rejected": -474.2336120605469, "eval_loss": 0.022616883739829063, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -0.1776534765958786, "eval_rewards/margins": 0.08496828377246857, "eval_rewards/rejected": -0.26262176036834717, "eval_runtime": 716.6994, "eval_samples_per_second": 2.791, "eval_steps_per_second": 1.395, "step": 12700 }, { "epoch": 0.83, "learning_rate": 4.1972140441952246e-07, "logits/chosen": -0.9346400499343872, "logits/rejected": -0.9922512173652649, "logps/chosen": -407.9190979003906, "logps/rejected": -477.9268493652344, "loss": 0.0463, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16833576560020447, "rewards/margins": 0.06799861043691635, "rewards/rejected": -0.23633436858654022, "step": 12710 }, { "epoch": 0.83, "learning_rate": 4.165600684302046e-07, "logits/chosen": -1.1187856197357178, "logits/rejected": -1.0260140895843506, "logps/chosen": -329.97979736328125, "logps/rejected": -432.9800720214844, "loss": 0.0197, "rewards/accuracies": 0.625, "rewards/chosen": -0.15419578552246094, "rewards/margins": 0.09304238855838776, "rewards/rejected": -0.2472381591796875, "step": 12720 }, { "epoch": 0.83, "learning_rate": 4.13409600684154e-07, "logits/chosen": -1.1065990924835205, "logits/rejected": -0.9036999940872192, "logps/chosen": -399.81854248046875, "logps/rejected": -464.4483947753906, "loss": 0.0544, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1847989559173584, "rewards/margins": 0.09381312131881714, "rewards/rejected": -0.27861207723617554, "step": 12730 }, { "epoch": 0.83, "learning_rate": 4.102700176157548e-07, "logits/chosen": -1.1882383823394775, "logits/rejected": -0.8874948620796204, "logps/chosen": -519.7543334960938, "logps/rejected": -490.3672790527344, "loss": 0.0265, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19499912858009338, "rewards/margins": 0.06839949637651443, "rewards/rejected": -0.2633986175060272, "step": 12740 }, { "epoch": 0.83, "learning_rate": 4.0714133560260884e-07, "logits/chosen": -1.0939228534698486, "logits/rejected": -1.0753412246704102, "logps/chosen": -437.94091796875, "logps/rejected": -433.62347412109375, "loss": 0.0306, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17840786278247833, "rewards/margins": 0.05391346290707588, "rewards/rejected": -0.23232130706310272, "step": 12750 }, { "epoch": 0.83, "learning_rate": 4.0402357096545527e-07, "logits/chosen": -0.8398815989494324, "logits/rejected": -1.012155294418335, "logps/chosen": -427.52569580078125, "logps/rejected": -496.2490234375, "loss": 0.0186, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1757640689611435, "rewards/margins": 0.07889539003372192, "rewards/rejected": -0.2546594738960266, "step": 12760 }, { "epoch": 0.84, "learning_rate": 4.0091673996808025e-07, "logits/chosen": -1.2899396419525146, "logits/rejected": -1.0862394571304321, "logps/chosen": -393.2114562988281, "logps/rejected": -451.89007568359375, "loss": 0.0208, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19615134596824646, "rewards/margins": 0.08330585807561874, "rewards/rejected": -0.2794572412967682, "step": 12770 }, { "epoch": 0.84, "learning_rate": 3.9782085881723776e-07, "logits/chosen": -1.0116846561431885, "logits/rejected": -0.9444645047187805, "logps/chosen": -334.98541259765625, "logps/rejected": -458.76495361328125, "loss": 0.0244, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1693878471851349, "rewards/margins": 0.11181281507015228, "rewards/rejected": -0.28120067715644836, "step": 12780 }, { "epoch": 0.84, "learning_rate": 3.947359436625592e-07, "logits/chosen": -1.0056908130645752, "logits/rejected": -0.7778598070144653, "logps/chosen": -397.36151123046875, "logps/rejected": -461.7703552246094, "loss": 0.0116, "rewards/accuracies": 0.75, "rewards/chosen": -0.1626986265182495, "rewards/margins": 0.10197291523218155, "rewards/rejected": -0.26467153429985046, "step": 12790 }, { "epoch": 0.84, "learning_rate": 3.9166201059647386e-07, "logits/chosen": -0.9896391034126282, "logits/rejected": -1.065930724143982, "logps/chosen": -450.7867736816406, "logps/rejected": -459.914306640625, "loss": 0.0172, "rewards/accuracies": 0.625, "rewards/chosen": -0.18893657624721527, "rewards/margins": 0.04605841636657715, "rewards/rejected": -0.23499497771263123, "step": 12800 }, { "epoch": 0.84, "eval_logits/chosen": -1.0182397365570068, "eval_logits/rejected": -0.888500988483429, "eval_logps/chosen": -406.83538818359375, "eval_logps/rejected": -471.62237548828125, "eval_loss": 0.022614242509007454, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -0.17483040690422058, "eval_rewards/margins": 0.08518005907535553, "eval_rewards/rejected": -0.2600104808807373, "eval_runtime": 714.5678, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 12800 }, { "epoch": 0.84, "learning_rate": 3.8859907565412194e-07, "logits/chosen": -0.9135338664054871, "logits/rejected": -1.2353423833847046, "logps/chosen": -366.582763671875, "logps/rejected": -451.2088928222656, "loss": 0.0476, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18693353235721588, "rewards/margins": 0.08520451933145523, "rewards/rejected": -0.2721380591392517, "step": 12810 }, { "epoch": 0.84, "learning_rate": 3.8554715481327303e-07, "logits/chosen": -0.8923002481460571, "logits/rejected": -0.9084317088127136, "logps/chosen": -435.1770935058594, "logps/rejected": -507.140869140625, "loss": 0.0348, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20191805064678192, "rewards/margins": 0.1073615774512291, "rewards/rejected": -0.30927959084510803, "step": 12820 }, { "epoch": 0.84, "learning_rate": 3.8250626399424007e-07, "logits/chosen": -1.115748405456543, "logits/rejected": -1.0517231225967407, "logps/chosen": -450.9652404785156, "logps/rejected": -505.8202209472656, "loss": 0.0308, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19948706030845642, "rewards/margins": 0.07370523363351822, "rewards/rejected": -0.27319228649139404, "step": 12830 }, { "epoch": 0.84, "learning_rate": 3.7947641905980104e-07, "logits/chosen": -0.9417749643325806, "logits/rejected": -1.0478404760360718, "logps/chosen": -352.5267333984375, "logps/rejected": -401.9620666503906, "loss": 0.028, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14520829916000366, "rewards/margins": 0.08411599695682526, "rewards/rejected": -0.2293243110179901, "step": 12840 }, { "epoch": 0.84, "learning_rate": 3.764576358151098e-07, "logits/chosen": -1.0585134029388428, "logits/rejected": -0.9640265703201294, "logps/chosen": -325.43975830078125, "logps/rejected": -382.0396728515625, "loss": 0.0097, "rewards/accuracies": 0.625, "rewards/chosen": -0.15062275528907776, "rewards/margins": 0.06844418495893478, "rewards/rejected": -0.21906694769859314, "step": 12850 }, { "epoch": 0.84, "learning_rate": 3.7344993000761944e-07, "logits/chosen": -1.0502904653549194, "logits/rejected": -0.9844939112663269, "logps/chosen": -374.8507995605469, "logps/rejected": -512.6021728515625, "loss": 0.016, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1975356936454773, "rewards/margins": 0.09477958083152771, "rewards/rejected": -0.2923153042793274, "step": 12860 }, { "epoch": 0.84, "learning_rate": 3.7045331732699585e-07, "logits/chosen": -1.0361741781234741, "logits/rejected": -0.9053858518600464, "logps/chosen": -392.5011291503906, "logps/rejected": -479.70330810546875, "loss": 0.0344, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1873030960559845, "rewards/margins": 0.12290160357952118, "rewards/rejected": -0.3102046847343445, "step": 12870 }, { "epoch": 0.84, "learning_rate": 3.6746781340503993e-07, "logits/chosen": -1.040926456451416, "logits/rejected": -0.7658315300941467, "logps/chosen": -389.17352294921875, "logps/rejected": -483.9557189941406, "loss": 0.0225, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15475773811340332, "rewards/margins": 0.10280919075012207, "rewards/rejected": -0.2575669288635254, "step": 12880 }, { "epoch": 0.84, "learning_rate": 3.6449343381560116e-07, "logits/chosen": -0.8729962110519409, "logits/rejected": -0.7056654095649719, "logps/chosen": -451.79083251953125, "logps/rejected": -553.8628540039062, "loss": 0.0321, "rewards/accuracies": 0.75, "rewards/chosen": -0.22036516666412354, "rewards/margins": 0.11546160280704498, "rewards/rejected": -0.3358268141746521, "step": 12890 }, { "epoch": 0.84, "learning_rate": 3.615301940745017e-07, "logits/chosen": -1.4511282444000244, "logits/rejected": -0.9121654629707336, "logps/chosen": -478.91021728515625, "logps/rejected": -449.032958984375, "loss": 0.0077, "rewards/accuracies": 0.75, "rewards/chosen": -0.1617150455713272, "rewards/margins": 0.07529975473880768, "rewards/rejected": -0.2370148003101349, "step": 12900 }, { "epoch": 0.84, "eval_logits/chosen": -1.058924674987793, "eval_logits/rejected": -0.9263111352920532, "eval_logps/chosen": -409.0994873046875, "eval_logps/rejected": -475.6176452636719, "eval_loss": 0.022546077147126198, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -0.17709454894065857, "eval_rewards/margins": 0.08691117912530899, "eval_rewards/rejected": -0.26400575041770935, "eval_runtime": 715.1118, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 12900 }, { "epoch": 0.84, "learning_rate": 3.5857810963945084e-07, "logits/chosen": -0.7007251381874084, "logits/rejected": -0.7645866274833679, "logps/chosen": -427.35235595703125, "logps/rejected": -501.2770080566406, "loss": 0.0478, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21284326910972595, "rewards/margins": 0.08874757587909698, "rewards/rejected": -0.30159085988998413, "step": 12910 }, { "epoch": 0.85, "learning_rate": 3.556371959099678e-07, "logits/chosen": -1.3255102634429932, "logits/rejected": -1.0441545248031616, "logps/chosen": -454.9971618652344, "logps/rejected": -483.7090759277344, "loss": 0.0148, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15782283246517181, "rewards/margins": 0.06868244707584381, "rewards/rejected": -0.22650527954101562, "step": 12920 }, { "epoch": 0.85, "learning_rate": 3.5270746822729797e-07, "logits/chosen": -1.1288038492202759, "logits/rejected": -0.9465574026107788, "logps/chosen": -432.2096252441406, "logps/rejected": -541.5054931640625, "loss": 0.0357, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18472759425640106, "rewards/margins": 0.09672068059444427, "rewards/rejected": -0.28144827485084534, "step": 12930 }, { "epoch": 0.85, "learning_rate": 3.4978894187433746e-07, "logits/chosen": -0.97435462474823, "logits/rejected": -0.923972487449646, "logps/chosen": -309.80853271484375, "logps/rejected": -354.0821228027344, "loss": 0.0431, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.1587332785129547, "rewards/margins": 0.05518204718828201, "rewards/rejected": -0.21391534805297852, "step": 12940 }, { "epoch": 0.85, "learning_rate": 3.468816320755486e-07, "logits/chosen": -0.7508054971694946, "logits/rejected": -0.8016918897628784, "logps/chosen": -371.10235595703125, "logps/rejected": -404.095947265625, "loss": 0.0117, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14717456698417664, "rewards/margins": 0.07260333001613617, "rewards/rejected": -0.219777911901474, "step": 12950 }, { "epoch": 0.85, "learning_rate": 3.4398555399688336e-07, "logits/chosen": -1.075822353363037, "logits/rejected": -0.9145625233650208, "logps/chosen": -422.84991455078125, "logps/rejected": -429.92694091796875, "loss": 0.0231, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21149948239326477, "rewards/margins": 0.027741169556975365, "rewards/rejected": -0.2392406165599823, "step": 12960 }, { "epoch": 0.85, "learning_rate": 3.411007227457047e-07, "logits/chosen": -1.1963735818862915, "logits/rejected": -0.8579666018486023, "logps/chosen": -407.8987731933594, "logps/rejected": -490.4803161621094, "loss": 0.0191, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16013750433921814, "rewards/margins": 0.11485633999109268, "rewards/rejected": -0.2749938368797302, "step": 12970 }, { "epoch": 0.85, "learning_rate": 3.382271533707043e-07, "logits/chosen": -0.862114429473877, "logits/rejected": -0.7896581888198853, "logps/chosen": -355.82037353515625, "logps/rejected": -383.87542724609375, "loss": 0.0274, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16421517729759216, "rewards/margins": 0.050938718020915985, "rewards/rejected": -0.21515390276908875, "step": 12980 }, { "epoch": 0.85, "learning_rate": 3.353648608618287e-07, "logits/chosen": -1.022515058517456, "logits/rejected": -0.916111946105957, "logps/chosen": -313.8973693847656, "logps/rejected": -382.427978515625, "loss": 0.0305, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14602608978748322, "rewards/margins": 0.07850136607885361, "rewards/rejected": -0.22452743351459503, "step": 12990 }, { "epoch": 0.85, "learning_rate": 3.3251386015019676e-07, "logits/chosen": -1.3658655881881714, "logits/rejected": -1.0993107557296753, "logps/chosen": -370.0747985839844, "logps/rejected": -418.81182861328125, "loss": 0.0102, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16731537878513336, "rewards/margins": 0.08248157799243927, "rewards/rejected": -0.24979694187641144, "step": 13000 }, { "epoch": 0.85, "eval_logits/chosen": -1.0553302764892578, "eval_logits/rejected": -0.9231022000312805, "eval_logps/chosen": -402.1976318359375, "eval_logps/rejected": -468.24981689453125, "eval_loss": 0.022522147744894028, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -0.170192688703537, "eval_rewards/margins": 0.08644524216651917, "eval_rewards/rejected": -0.25663793087005615, "eval_runtime": 718.3961, "eval_samples_per_second": 2.784, "eval_steps_per_second": 1.392, "step": 13000 }, { "epoch": 0.85, "learning_rate": 3.296741661080255e-07, "logits/chosen": -1.2025619745254517, "logits/rejected": -1.0354385375976562, "logps/chosen": -422.6883850097656, "logps/rejected": -514.5258178710938, "loss": 0.0167, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1895500123500824, "rewards/margins": 0.09837868809700012, "rewards/rejected": -0.28792867064476013, "step": 13010 }, { "epoch": 0.85, "learning_rate": 3.2684579354854974e-07, "logits/chosen": -1.2653496265411377, "logits/rejected": -1.1614527702331543, "logps/chosen": -503.04901123046875, "logps/rejected": -593.2863159179688, "loss": 0.0354, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2105904072523117, "rewards/margins": 0.07997885346412659, "rewards/rejected": -0.2905692756175995, "step": 13020 }, { "epoch": 0.85, "learning_rate": 3.2402875722594653e-07, "logits/chosen": -0.953220009803772, "logits/rejected": -1.0833784341812134, "logps/chosen": -322.46295166015625, "logps/rejected": -410.1162109375, "loss": 0.0156, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15341752767562866, "rewards/margins": 0.08150170743465424, "rewards/rejected": -0.2349192351102829, "step": 13030 }, { "epoch": 0.85, "learning_rate": 3.212230718352566e-07, "logits/chosen": -0.9537510871887207, "logits/rejected": -0.882818877696991, "logps/chosen": -380.3872375488281, "logps/rejected": -341.0324401855469, "loss": 0.028, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1553373783826828, "rewards/margins": 0.024342210963368416, "rewards/rejected": -0.17967958748340607, "step": 13040 }, { "epoch": 0.85, "learning_rate": 3.1842875201231025e-07, "logits/chosen": -0.9480336904525757, "logits/rejected": -0.8143274188041687, "logps/chosen": -377.17877197265625, "logps/rejected": -420.50018310546875, "loss": 0.0322, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15543873608112335, "rewards/margins": 0.06909666210412979, "rewards/rejected": -0.22453537583351135, "step": 13050 }, { "epoch": 0.85, "learning_rate": 3.156458123336478e-07, "logits/chosen": -0.7479599714279175, "logits/rejected": -0.6692909002304077, "logps/chosen": -305.3641662597656, "logps/rejected": -416.10723876953125, "loss": 0.0216, "rewards/accuracies": 0.625, "rewards/chosen": -0.14685234427452087, "rewards/margins": 0.12089182436466217, "rewards/rejected": -0.26774412393569946, "step": 13060 }, { "epoch": 0.86, "learning_rate": 3.128742673164459e-07, "logits/chosen": -1.274544358253479, "logits/rejected": -0.7893189787864685, "logps/chosen": -474.60174560546875, "logps/rejected": -511.2594299316406, "loss": 0.0081, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19167451560497284, "rewards/margins": 0.08117364346981049, "rewards/rejected": -0.2728481888771057, "step": 13070 }, { "epoch": 0.86, "learning_rate": 3.101141314184414e-07, "logits/chosen": -1.4236832857131958, "logits/rejected": -1.1978580951690674, "logps/chosen": -364.2232971191406, "logps/rejected": -410.74273681640625, "loss": 0.0256, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15883874893188477, "rewards/margins": 0.05573068931698799, "rewards/rejected": -0.21456941962242126, "step": 13080 }, { "epoch": 0.86, "learning_rate": 3.0736541903785526e-07, "logits/chosen": -0.8333417177200317, "logits/rejected": -0.9561074376106262, "logps/chosen": -380.85760498046875, "logps/rejected": -520.1369018554688, "loss": 0.0172, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17184266448020935, "rewards/margins": 0.08981967717409134, "rewards/rejected": -0.2616623640060425, "step": 13090 }, { "epoch": 0.86, "learning_rate": 3.0462814451331704e-07, "logits/chosen": -1.0788439512252808, "logits/rejected": -0.8012931942939758, "logps/chosen": -433.00164794921875, "logps/rejected": -476.09820556640625, "loss": 0.0352, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2023252546787262, "rewards/margins": 0.043191470205783844, "rewards/rejected": -0.24551673233509064, "step": 13100 }, { "epoch": 0.86, "eval_logits/chosen": -1.05148184299469, "eval_logits/rejected": -0.9195185303688049, "eval_logps/chosen": -404.3332214355469, "eval_logps/rejected": -469.222900390625, "eval_loss": 0.022564597427845, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -0.17232826352119446, "eval_rewards/margins": 0.08528277277946472, "eval_rewards/rejected": -0.2576110064983368, "eval_runtime": 717.2407, "eval_samples_per_second": 2.788, "eval_steps_per_second": 1.394, "step": 13100 }, { "epoch": 0.86, "learning_rate": 3.019023221237927e-07, "logits/chosen": -1.0007266998291016, "logits/rejected": -0.8436404466629028, "logps/chosen": -414.0870056152344, "logps/rejected": -454.7466735839844, "loss": 0.0165, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17735466361045837, "rewards/margins": 0.09822709858417511, "rewards/rejected": -0.2755817472934723, "step": 13110 }, { "epoch": 0.86, "learning_rate": 2.991879660885058e-07, "logits/chosen": -1.2198227643966675, "logits/rejected": -1.148110270500183, "logps/chosen": -414.25958251953125, "logps/rejected": -485.59417724609375, "loss": 0.0313, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15071269869804382, "rewards/margins": 0.08769487589597702, "rewards/rejected": -0.23840756714344025, "step": 13120 }, { "epoch": 0.86, "learning_rate": 2.9648509056686786e-07, "logits/chosen": -1.1154844760894775, "logits/rejected": -0.946179211139679, "logps/chosen": -346.7869567871094, "logps/rejected": -409.2403259277344, "loss": 0.0284, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16984522342681885, "rewards/margins": 0.08802910894155502, "rewards/rejected": -0.2578743100166321, "step": 13130 }, { "epoch": 0.86, "learning_rate": 2.937937096584012e-07, "logits/chosen": -1.1360067129135132, "logits/rejected": -0.794166088104248, "logps/chosen": -462.79815673828125, "logps/rejected": -477.1358337402344, "loss": 0.0264, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1723971664905548, "rewards/margins": 0.07482168823480606, "rewards/rejected": -0.24721881747245789, "step": 13140 }, { "epoch": 0.86, "learning_rate": 2.9111383740266756e-07, "logits/chosen": -0.8627032041549683, "logits/rejected": -0.8739458322525024, "logps/chosen": -427.9334411621094, "logps/rejected": -469.6631774902344, "loss": 0.0234, "rewards/accuracies": 0.625, "rewards/chosen": -0.1921493113040924, "rewards/margins": 0.047517795115709305, "rewards/rejected": -0.2396671026945114, "step": 13150 }, { "epoch": 0.86, "learning_rate": 2.8844548777919255e-07, "logits/chosen": -1.0639727115631104, "logits/rejected": -0.9701464772224426, "logps/chosen": -354.8576354980469, "logps/rejected": -410.620849609375, "loss": 0.0219, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15097381174564362, "rewards/margins": 0.07804340124130249, "rewards/rejected": -0.2290172278881073, "step": 13160 }, { "epoch": 0.86, "learning_rate": 2.8578867470739594e-07, "logits/chosen": -0.6956424713134766, "logits/rejected": -0.6151020526885986, "logps/chosen": -415.44940185546875, "logps/rejected": -490.1346130371094, "loss": 0.0479, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23096111416816711, "rewards/margins": 0.1061079129576683, "rewards/rejected": -0.33706900477409363, "step": 13170 }, { "epoch": 0.86, "learning_rate": 2.8314341204651484e-07, "logits/chosen": -1.3746941089630127, "logits/rejected": -1.182732105255127, "logps/chosen": -427.72833251953125, "logps/rejected": -462.6935119628906, "loss": 0.0151, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15168920159339905, "rewards/margins": 0.10774780809879303, "rewards/rejected": -0.25943702459335327, "step": 13180 }, { "epoch": 0.86, "learning_rate": 2.805097135955362e-07, "logits/chosen": -0.9341131448745728, "logits/rejected": -0.8025106191635132, "logps/chosen": -394.438720703125, "logps/rejected": -464.203125, "loss": 0.0343, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18191476166248322, "rewards/margins": 0.10469025373458862, "rewards/rejected": -0.28660500049591064, "step": 13190 }, { "epoch": 0.86, "learning_rate": 2.778875930931213e-07, "logits/chosen": -0.9983268976211548, "logits/rejected": -0.7534890174865723, "logps/chosen": -407.2567138671875, "logps/rejected": -498.3690490722656, "loss": 0.017, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17213784158229828, "rewards/margins": 0.10847457498311996, "rewards/rejected": -0.28061240911483765, "step": 13200 }, { "epoch": 0.86, "eval_logits/chosen": -1.0253299474716187, "eval_logits/rejected": -0.8943244218826294, "eval_logps/chosen": -413.80242919921875, "eval_logps/rejected": -481.2682189941406, "eval_loss": 0.02250593528151512, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -0.18179750442504883, "eval_rewards/margins": 0.08785880357027054, "eval_rewards/rejected": -0.26965630054473877, "eval_runtime": 715.7469, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 13200 }, { "epoch": 0.86, "learning_rate": 2.7527706421753426e-07, "logits/chosen": -1.2023802995681763, "logits/rejected": -1.0169878005981445, "logps/chosen": -383.7974853515625, "logps/rejected": -449.32574462890625, "loss": 0.0265, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.18539631366729736, "rewards/margins": 0.0610395148396492, "rewards/rejected": -0.24643580615520477, "step": 13210 }, { "epoch": 0.86, "learning_rate": 2.726781405865736e-07, "logits/chosen": -1.059326410293579, "logits/rejected": -0.8816215395927429, "logps/chosen": -473.3182678222656, "logps/rejected": -448.09942626953125, "loss": 0.0156, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17065247893333435, "rewards/margins": 0.09054967761039734, "rewards/rejected": -0.2612021565437317, "step": 13220 }, { "epoch": 0.87, "learning_rate": 2.7009083575749687e-07, "logits/chosen": -0.8829665184020996, "logits/rejected": -0.8453516960144043, "logps/chosen": -423.53436279296875, "logps/rejected": -495.7106018066406, "loss": 0.0117, "rewards/accuracies": 0.625, "rewards/chosen": -0.17967407405376434, "rewards/margins": 0.07381172478199005, "rewards/rejected": -0.2534857988357544, "step": 13230 }, { "epoch": 0.87, "learning_rate": 2.6751516322695457e-07, "logits/chosen": -1.0402884483337402, "logits/rejected": -1.0268046855926514, "logps/chosen": -384.08197021484375, "logps/rejected": -424.99725341796875, "loss": 0.0234, "rewards/accuracies": 0.625, "rewards/chosen": -0.1938384473323822, "rewards/margins": 0.04695520177483559, "rewards/rejected": -0.2407936304807663, "step": 13240 }, { "epoch": 0.87, "learning_rate": 2.649511364309154e-07, "logits/chosen": -1.2867207527160645, "logits/rejected": -1.1256946325302124, "logps/chosen": -383.88616943359375, "logps/rejected": -450.26318359375, "loss": 0.0055, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18159647285938263, "rewards/margins": 0.08725818246603012, "rewards/rejected": -0.26885461807250977, "step": 13250 }, { "epoch": 0.87, "learning_rate": 2.6239876874460003e-07, "logits/chosen": -1.2547171115875244, "logits/rejected": -1.2407386302947998, "logps/chosen": -463.03192138671875, "logps/rejected": -547.2875366210938, "loss": 0.0315, "rewards/accuracies": 0.75, "rewards/chosen": -0.17861682176589966, "rewards/margins": 0.11481177806854248, "rewards/rejected": -0.29342857003211975, "step": 13260 }, { "epoch": 0.87, "learning_rate": 2.5985807348240744e-07, "logits/chosen": -1.1246567964553833, "logits/rejected": -0.6838828325271606, "logps/chosen": -414.3392028808594, "logps/rejected": -487.4080505371094, "loss": 0.0184, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18199566006660461, "rewards/margins": 0.12101595103740692, "rewards/rejected": -0.30301159620285034, "step": 13270 }, { "epoch": 0.87, "learning_rate": 2.5732906389785014e-07, "logits/chosen": -1.2120535373687744, "logits/rejected": -1.1492611169815063, "logps/chosen": -438.5909729003906, "logps/rejected": -519.461669921875, "loss": 0.0157, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16088488698005676, "rewards/margins": 0.11901631206274033, "rewards/rejected": -0.2799011766910553, "step": 13280 }, { "epoch": 0.87, "learning_rate": 2.5481175318347956e-07, "logits/chosen": -0.9526659846305847, "logits/rejected": -1.1553233861923218, "logps/chosen": -376.82379150390625, "logps/rejected": -481.47686767578125, "loss": 0.0216, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1493261754512787, "rewards/margins": 0.08675697445869446, "rewards/rejected": -0.23608312010765076, "step": 13290 }, { "epoch": 0.87, "learning_rate": 2.5230615447082246e-07, "logits/chosen": -0.9624271392822266, "logits/rejected": -0.8144418597221375, "logps/chosen": -419.8833923339844, "logps/rejected": -490.603759765625, "loss": 0.0207, "rewards/accuracies": 0.625, "rewards/chosen": -0.16856670379638672, "rewards/margins": 0.08550556004047394, "rewards/rejected": -0.25407224893569946, "step": 13300 }, { "epoch": 0.87, "eval_logits/chosen": -1.036911129951477, "eval_logits/rejected": -0.9057242274284363, "eval_logps/chosen": -404.0227355957031, "eval_logps/rejected": -469.95465087890625, "eval_loss": 0.022511208429932594, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -0.17201782763004303, "eval_rewards/margins": 0.08632489293813705, "eval_rewards/rejected": -0.2583427131175995, "eval_runtime": 715.961, "eval_samples_per_second": 2.793, "eval_steps_per_second": 1.397, "step": 13300 }, { "epoch": 0.87, "learning_rate": 2.49812280830308e-07, "logits/chosen": -0.9963768124580383, "logits/rejected": -0.7489258646965027, "logps/chosen": -407.6488342285156, "logps/rejected": -536.24169921875, "loss": 0.0221, "rewards/accuracies": 0.75, "rewards/chosen": -0.1826116144657135, "rewards/margins": 0.16344889998435974, "rewards/rejected": -0.34606051445007324, "step": 13310 }, { "epoch": 0.87, "learning_rate": 2.4733014527120457e-07, "logits/chosen": -0.7520327568054199, "logits/rejected": -0.8920272588729858, "logps/chosen": -453.8097229003906, "logps/rejected": -538.2177124023438, "loss": 0.026, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2513948082923889, "rewards/margins": 0.10730129480361938, "rewards/rejected": -0.3586960732936859, "step": 13320 }, { "epoch": 0.87, "learning_rate": 2.4485976074154565e-07, "logits/chosen": -1.087537169456482, "logits/rejected": -1.1595380306243896, "logps/chosen": -388.9942932128906, "logps/rejected": -443.033203125, "loss": 0.0194, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.17999626696109772, "rewards/margins": 0.02399570122361183, "rewards/rejected": -0.20399196445941925, "step": 13330 }, { "epoch": 0.87, "learning_rate": 2.4240114012806763e-07, "logits/chosen": -0.9978880882263184, "logits/rejected": -1.0209993124008179, "logps/chosen": -371.9396667480469, "logps/rejected": -414.16510009765625, "loss": 0.023, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1641414910554886, "rewards/margins": 0.05929529666900635, "rewards/rejected": -0.22343675792217255, "step": 13340 }, { "epoch": 0.87, "learning_rate": 2.399542962561399e-07, "logits/chosen": -1.0199801921844482, "logits/rejected": -0.8027508854866028, "logps/chosen": -386.0677795410156, "logps/rejected": -451.94500732421875, "loss": 0.0261, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1606118232011795, "rewards/margins": 0.1132865771651268, "rewards/rejected": -0.2738984227180481, "step": 13350 }, { "epoch": 0.87, "learning_rate": 2.3751924188969876e-07, "logits/chosen": -0.9870854616165161, "logits/rejected": -0.840030312538147, "logps/chosen": -424.66900634765625, "logps/rejected": -501.39349365234375, "loss": 0.022, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17293047904968262, "rewards/margins": 0.09655916690826416, "rewards/rejected": -0.2694896459579468, "step": 13360 }, { "epoch": 0.87, "learning_rate": 2.3509598973118024e-07, "logits/chosen": -1.3064030408859253, "logits/rejected": -1.1354042291641235, "logps/chosen": -372.07568359375, "logps/rejected": -367.5455627441406, "loss": 0.0144, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15107032656669617, "rewards/margins": 0.05825766921043396, "rewards/rejected": -0.20932801067829132, "step": 13370 }, { "epoch": 0.88, "learning_rate": 2.326845524214555e-07, "logits/chosen": -0.9536269307136536, "logits/rejected": -0.9666744470596313, "logps/chosen": -417.5445251464844, "logps/rejected": -406.8939514160156, "loss": 0.0327, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.18249019980430603, "rewards/margins": 0.015474098734557629, "rewards/rejected": -0.19796431064605713, "step": 13380 }, { "epoch": 0.88, "learning_rate": 2.3028494253976158e-07, "logits/chosen": -1.063410758972168, "logits/rejected": -0.8925013542175293, "logps/chosen": -530.798095703125, "logps/rejected": -529.5015869140625, "loss": 0.0213, "rewards/accuracies": 0.625, "rewards/chosen": -0.18953266739845276, "rewards/margins": 0.06328479945659637, "rewards/rejected": -0.25281745195388794, "step": 13390 }, { "epoch": 0.88, "learning_rate": 2.2789717260364026e-07, "logits/chosen": -1.0887086391448975, "logits/rejected": -0.9040567278862, "logps/chosen": -315.3928527832031, "logps/rejected": -351.1702575683594, "loss": 0.0315, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15028676390647888, "rewards/margins": 0.057505130767822266, "rewards/rejected": -0.20779189467430115, "step": 13400 }, { "epoch": 0.88, "eval_logits/chosen": -1.0402843952178955, "eval_logits/rejected": -0.9093206524848938, "eval_logps/chosen": -401.3037109375, "eval_logps/rejected": -466.2375793457031, "eval_loss": 0.022522244602441788, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -0.16929879784584045, "eval_rewards/margins": 0.0853269100189209, "eval_rewards/rejected": -0.25462570786476135, "eval_runtime": 714.6799, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 13400 }, { "epoch": 0.88, "learning_rate": 2.255212550688682e-07, "logits/chosen": -1.1159807443618774, "logits/rejected": -1.4684662818908691, "logps/chosen": -399.5811767578125, "logps/rejected": -564.281005859375, "loss": 0.0172, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18162615597248077, "rewards/margins": 0.10051321983337402, "rewards/rejected": -0.2821393609046936, "step": 13410 }, { "epoch": 0.88, "learning_rate": 2.2315720232939598e-07, "logits/chosen": -1.586183786392212, "logits/rejected": -1.0722787380218506, "logps/chosen": -407.93914794921875, "logps/rejected": -429.28021240234375, "loss": 0.0135, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14582547545433044, "rewards/margins": 0.11094337701797485, "rewards/rejected": -0.2567688524723053, "step": 13420 }, { "epoch": 0.88, "learning_rate": 2.2080502671727956e-07, "logits/chosen": -1.1968393325805664, "logits/rejected": -1.0013238191604614, "logps/chosen": -337.3051452636719, "logps/rejected": -387.70159912109375, "loss": 0.0285, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11722330749034882, "rewards/margins": 0.07222410291433334, "rewards/rejected": -0.18944741785526276, "step": 13430 }, { "epoch": 0.88, "learning_rate": 2.1846474050262078e-07, "logits/chosen": -1.0117199420928955, "logits/rejected": -0.7946184873580933, "logps/chosen": -390.4440002441406, "logps/rejected": -392.02056884765625, "loss": 0.0114, "rewards/accuracies": 0.625, "rewards/chosen": -0.14388325810432434, "rewards/margins": 0.06737370789051056, "rewards/rejected": -0.2112569808959961, "step": 13440 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -0.8066182136535645, "logits/rejected": -0.7442101240158081, "logps/chosen": -350.8916320800781, "logps/rejected": -481.36199951171875, "loss": 0.0364, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15372835099697113, "rewards/margins": 0.10294969379901886, "rewards/rejected": -0.25667804479599, "step": 13450 }, { "epoch": 0.88, "learning_rate": 2.1381988503590578e-07, "logits/chosen": -0.7249947786331177, "logits/rejected": -0.8389297723770142, "logps/chosen": -408.78936767578125, "logps/rejected": -505.5535583496094, "loss": 0.0124, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19291751086711884, "rewards/margins": 0.10291405022144318, "rewards/rejected": -0.2958315908908844, "step": 13460 }, { "epoch": 0.88, "learning_rate": 2.11515340013691e-07, "logits/chosen": -1.2795196771621704, "logits/rejected": -1.1303491592407227, "logps/chosen": -411.79388427734375, "logps/rejected": -502.69580078125, "loss": 0.0188, "rewards/accuracies": 0.75, "rewards/chosen": -0.18062135577201843, "rewards/margins": 0.11248214542865753, "rewards/rejected": -0.29310348629951477, "step": 13470 }, { "epoch": 0.88, "learning_rate": 2.092227328484897e-07, "logits/chosen": -0.8292211294174194, "logits/rejected": -0.8686593770980835, "logps/chosen": -364.2059631347656, "logps/rejected": -496.27191162109375, "loss": 0.0154, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16019728779792786, "rewards/margins": 0.10140843689441681, "rewards/rejected": -0.26160573959350586, "step": 13480 }, { "epoch": 0.88, "learning_rate": 2.0694207549966345e-07, "logits/chosen": -0.7915389537811279, "logits/rejected": -0.9777692556381226, "logps/chosen": -400.21636962890625, "logps/rejected": -420.87811279296875, "loss": 0.0376, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1913079172372818, "rewards/margins": 0.03753117471933365, "rewards/rejected": -0.22883908450603485, "step": 13490 }, { "epoch": 0.88, "learning_rate": 2.0467337986423864e-07, "logits/chosen": -1.25767982006073, "logits/rejected": -1.0839223861694336, "logps/chosen": -465.10784912109375, "logps/rejected": -500.28948974609375, "loss": 0.0148, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1593843698501587, "rewards/margins": 0.06839440762996674, "rewards/rejected": -0.22777876257896423, "step": 13500 }, { "epoch": 0.88, "eval_logits/chosen": -1.0379060506820679, "eval_logits/rejected": -0.9070315361022949, "eval_logps/chosen": -402.1565856933594, "eval_logps/rejected": -467.22930908203125, "eval_loss": 0.022507674992084503, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -0.17015163600444794, "eval_rewards/margins": 0.08546581864356995, "eval_rewards/rejected": -0.2556174695491791, "eval_runtime": 714.0487, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 13500 }, { "epoch": 0.88, "learning_rate": 2.0241665777684272e-07, "logits/chosen": -1.303655982017517, "logits/rejected": -1.0874269008636475, "logps/chosen": -427.10565185546875, "logps/rejected": -508.0550231933594, "loss": 0.0222, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1588127166032791, "rewards/margins": 0.12456454336643219, "rewards/rejected": -0.2833772301673889, "step": 13510 }, { "epoch": 0.88, "learning_rate": 2.0017192100964366e-07, "logits/chosen": -0.8127733469009399, "logits/rejected": -0.8098758459091187, "logps/chosen": -394.0885925292969, "logps/rejected": -473.2001953125, "loss": 0.0197, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19413261115550995, "rewards/margins": 0.07699447870254517, "rewards/rejected": -0.2711270749568939, "step": 13520 }, { "epoch": 0.89, "learning_rate": 1.9793918127228777e-07, "logits/chosen": -1.3260560035705566, "logits/rejected": -0.9111803770065308, "logps/chosen": -522.55224609375, "logps/rejected": -552.2357788085938, "loss": 0.027, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20625276863574982, "rewards/margins": 0.08376564085483551, "rewards/rejected": -0.29001837968826294, "step": 13530 }, { "epoch": 0.89, "learning_rate": 1.9571845021184005e-07, "logits/chosen": -0.7867355942726135, "logits/rejected": -0.8443099856376648, "logps/chosen": -409.19671630859375, "logps/rejected": -503.88470458984375, "loss": 0.0232, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1782108098268509, "rewards/margins": 0.09056999534368515, "rewards/rejected": -0.26878079771995544, "step": 13540 }, { "epoch": 0.89, "learning_rate": 1.9350973941272027e-07, "logits/chosen": -1.209465742111206, "logits/rejected": -0.9606415629386902, "logps/chosen": -371.02252197265625, "logps/rejected": -433.84552001953125, "loss": 0.0317, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1672474592924118, "rewards/margins": 0.08332379907369614, "rewards/rejected": -0.25057128071784973, "step": 13550 }, { "epoch": 0.89, "learning_rate": 1.9131306039664676e-07, "logits/chosen": -0.9260093569755554, "logits/rejected": -0.8099089860916138, "logps/chosen": -362.210205078125, "logps/rejected": -478.4556579589844, "loss": 0.0512, "rewards/accuracies": 0.75, "rewards/chosen": -0.16230614483356476, "rewards/margins": 0.09188903123140335, "rewards/rejected": -0.2541951537132263, "step": 13560 }, { "epoch": 0.89, "learning_rate": 1.8912842462257358e-07, "logits/chosen": -1.0246858596801758, "logits/rejected": -0.8914594650268555, "logps/chosen": -382.34039306640625, "logps/rejected": -469.93951416015625, "loss": 0.039, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1621711403131485, "rewards/margins": 0.10458607971668243, "rewards/rejected": -0.26675719022750854, "step": 13570 }, { "epoch": 0.89, "learning_rate": 1.869558434866303e-07, "logits/chosen": -1.0384668111801147, "logits/rejected": -1.2082610130310059, "logps/chosen": -356.4000549316406, "logps/rejected": -486.45904541015625, "loss": 0.0246, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17531022429466248, "rewards/margins": 0.11202911287546158, "rewards/rejected": -0.28733932971954346, "step": 13580 }, { "epoch": 0.89, "learning_rate": 1.847953283220652e-07, "logits/chosen": -1.1473596096038818, "logits/rejected": -0.896187961101532, "logps/chosen": -413.2381286621094, "logps/rejected": -476.4823303222656, "loss": 0.0137, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15712840855121613, "rewards/margins": 0.1418803632259369, "rewards/rejected": -0.2990087568759918, "step": 13590 }, { "epoch": 0.89, "learning_rate": 1.8264689039918265e-07, "logits/chosen": -1.0373870134353638, "logits/rejected": -0.9713393449783325, "logps/chosen": -441.6070861816406, "logps/rejected": -489.19537353515625, "loss": 0.0191, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18177670240402222, "rewards/margins": 0.07501448690891266, "rewards/rejected": -0.25679120421409607, "step": 13600 }, { "epoch": 0.89, "eval_logits/chosen": -1.036992073059082, "eval_logits/rejected": -0.9058991074562073, "eval_logps/chosen": -402.9744567871094, "eval_logps/rejected": -469.4185791015625, "eval_loss": 0.02245408110320568, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -0.17096956074237823, "eval_rewards/margins": 0.08683714270591736, "eval_rewards/rejected": -0.2578066885471344, "eval_runtime": 716.0601, "eval_samples_per_second": 2.793, "eval_steps_per_second": 1.397, "step": 13600 }, { "epoch": 0.89, "learning_rate": 1.8051054092528857e-07, "logits/chosen": -1.0938720703125, "logits/rejected": -0.8935802578926086, "logps/chosen": -416.72784423828125, "logps/rejected": -520.7975463867188, "loss": 0.0219, "rewards/accuracies": 0.625, "rewards/chosen": -0.15562644600868225, "rewards/margins": 0.11382999271154404, "rewards/rejected": -0.2694564461708069, "step": 13610 }, { "epoch": 0.89, "learning_rate": 1.783862910446271e-07, "logits/chosen": -0.8184884786605835, "logits/rejected": -0.8318710327148438, "logps/chosen": -346.8604736328125, "logps/rejected": -457.2109375, "loss": 0.029, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17400714755058289, "rewards/margins": 0.11966536194086075, "rewards/rejected": -0.29367250204086304, "step": 13620 }, { "epoch": 0.89, "learning_rate": 1.762741518383271e-07, "logits/chosen": -1.1689157485961914, "logits/rejected": -0.8800691366195679, "logps/chosen": -378.4546203613281, "logps/rejected": -437.0880432128906, "loss": 0.0222, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1566222608089447, "rewards/margins": 0.08629752695560455, "rewards/rejected": -0.24291977286338806, "step": 13630 }, { "epoch": 0.89, "learning_rate": 1.7417413432434082e-07, "logits/chosen": -1.0122010707855225, "logits/rejected": -0.9226837158203125, "logps/chosen": -435.48602294921875, "logps/rejected": -451.33953857421875, "loss": 0.0449, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18282637000083923, "rewards/margins": 0.06862086057662964, "rewards/rejected": -0.25144723057746887, "step": 13640 }, { "epoch": 0.89, "learning_rate": 1.7208624945738855e-07, "logits/chosen": -1.2694337368011475, "logits/rejected": -1.216371774673462, "logps/chosen": -372.20440673828125, "logps/rejected": -427.10980224609375, "loss": 0.018, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15412010252475739, "rewards/margins": 0.04746802896261215, "rewards/rejected": -0.20158810913562775, "step": 13650 }, { "epoch": 0.89, "learning_rate": 1.7001050812889995e-07, "logits/chosen": -1.3329761028289795, "logits/rejected": -1.1384284496307373, "logps/chosen": -455.50689697265625, "logps/rejected": -500.7456970214844, "loss": 0.0165, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19997408986091614, "rewards/margins": 0.08548974245786667, "rewards/rejected": -0.2854638695716858, "step": 13660 }, { "epoch": 0.89, "learning_rate": 1.679469211669596e-07, "logits/chosen": -1.0572293996810913, "logits/rejected": -0.8633424639701843, "logps/chosen": -426.3917541503906, "logps/rejected": -491.0323791503906, "loss": 0.0185, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20144657790660858, "rewards/margins": 0.11624503135681152, "rewards/rejected": -0.3176916241645813, "step": 13670 }, { "epoch": 0.9, "learning_rate": 1.6589549933624715e-07, "logits/chosen": -1.0715855360031128, "logits/rejected": -0.8907572031021118, "logps/chosen": -367.4554748535156, "logps/rejected": -469.3475036621094, "loss": 0.014, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12724845111370087, "rewards/margins": 0.1533774435520172, "rewards/rejected": -0.2806258797645569, "step": 13680 }, { "epoch": 0.9, "learning_rate": 1.638562533379845e-07, "logits/chosen": -0.9648802876472473, "logits/rejected": -0.8489893674850464, "logps/chosen": -400.01300048828125, "logps/rejected": -409.990966796875, "loss": 0.0268, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14431039988994598, "rewards/margins": 0.07224598526954651, "rewards/rejected": -0.2165563851594925, "step": 13690 }, { "epoch": 0.9, "learning_rate": 1.6182919380987676e-07, "logits/chosen": -1.0805898904800415, "logits/rejected": -1.0202234983444214, "logps/chosen": -394.5084228515625, "logps/rejected": -434.6302795410156, "loss": 0.0221, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16879045963287354, "rewards/margins": 0.055499009788036346, "rewards/rejected": -0.22428946197032928, "step": 13700 }, { "epoch": 0.9, "eval_logits/chosen": -1.0509698390960693, "eval_logits/rejected": -0.9192084670066833, "eval_logps/chosen": -400.35870361328125, "eval_logps/rejected": -466.0537414550781, "eval_loss": 0.022439854219555855, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -0.16835379600524902, "eval_rewards/margins": 0.08608809113502502, "eval_rewards/rejected": -0.25444188714027405, "eval_runtime": 712.2972, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 13700 }, { "epoch": 0.9, "learning_rate": 1.598143313260603e-07, "logits/chosen": -0.7808517217636108, "logits/rejected": -0.8405755162239075, "logps/chosen": -345.8737487792969, "logps/rejected": -403.0904235839844, "loss": 0.0355, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15782006084918976, "rewards/margins": 0.06923118233680725, "rewards/rejected": -0.22705122828483582, "step": 13710 }, { "epoch": 0.9, "learning_rate": 1.5781167639704415e-07, "logits/chosen": -1.2056787014007568, "logits/rejected": -0.8297403454780579, "logps/chosen": -476.67755126953125, "logps/rejected": -422.1780700683594, "loss": 0.0265, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14451077580451965, "rewards/margins": 0.06974398344755173, "rewards/rejected": -0.21425476670265198, "step": 13720 }, { "epoch": 0.9, "learning_rate": 1.5582123946965787e-07, "logits/chosen": -0.9438670873641968, "logits/rejected": -0.7419862747192383, "logps/chosen": -385.6568908691406, "logps/rejected": -478.90106201171875, "loss": 0.0337, "rewards/accuracies": 0.625, "rewards/chosen": -0.15126189589500427, "rewards/margins": 0.08309624344110489, "rewards/rejected": -0.23435814678668976, "step": 13730 }, { "epoch": 0.9, "learning_rate": 1.5384303092699504e-07, "logits/chosen": -1.1633503437042236, "logits/rejected": -0.7491482496261597, "logps/chosen": -446.1181640625, "logps/rejected": -557.1561889648438, "loss": 0.008, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15913358330726624, "rewards/margins": 0.11271911859512329, "rewards/rejected": -0.2718527019023895, "step": 13740 }, { "epoch": 0.9, "learning_rate": 1.518770610883613e-07, "logits/chosen": -0.8916155099868774, "logits/rejected": -0.7886601686477661, "logps/chosen": -448.6434631347656, "logps/rejected": -545.1658935546875, "loss": 0.019, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23410484194755554, "rewards/margins": 0.12593629956245422, "rewards/rejected": -0.36004114151000977, "step": 13750 }, { "epoch": 0.9, "learning_rate": 1.4992334020921735e-07, "logits/chosen": -1.0785119533538818, "logits/rejected": -1.0565673112869263, "logps/chosen": -329.98724365234375, "logps/rejected": -415.31671142578125, "loss": 0.0218, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1570148468017578, "rewards/margins": 0.11494328826665878, "rewards/rejected": -0.2719581425189972, "step": 13760 }, { "epoch": 0.9, "learning_rate": 1.4798187848112905e-07, "logits/chosen": -1.0934499502182007, "logits/rejected": -0.7420969009399414, "logps/chosen": -430.92041015625, "logps/rejected": -494.830078125, "loss": 0.0241, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2079496830701828, "rewards/margins": 0.10476718842983246, "rewards/rejected": -0.31271690130233765, "step": 13770 }, { "epoch": 0.9, "learning_rate": 1.460526860317113e-07, "logits/chosen": -1.2032901048660278, "logits/rejected": -1.0729628801345825, "logps/chosen": -357.88177490234375, "logps/rejected": -527.55419921875, "loss": 0.0251, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18459714949131012, "rewards/margins": 0.13792560994625092, "rewards/rejected": -0.32252275943756104, "step": 13780 }, { "epoch": 0.9, "learning_rate": 1.441357729245771e-07, "logits/chosen": -1.2668187618255615, "logits/rejected": -0.9400604963302612, "logps/chosen": -459.077880859375, "logps/rejected": -497.6475524902344, "loss": 0.0172, "rewards/accuracies": 0.75, "rewards/chosen": -0.21108606457710266, "rewards/margins": 0.09455683082342148, "rewards/rejected": -0.30564290285110474, "step": 13790 }, { "epoch": 0.9, "learning_rate": 1.4223114915928482e-07, "logits/chosen": -0.6138666868209839, "logits/rejected": -0.6970023512840271, "logps/chosen": -407.88861083984375, "logps/rejected": -488.20867919921875, "loss": 0.0299, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18196603655815125, "rewards/margins": 0.07355289161205292, "rewards/rejected": -0.25551897287368774, "step": 13800 }, { "epoch": 0.9, "eval_logits/chosen": -1.0439250469207764, "eval_logits/rejected": -0.9124934077262878, "eval_logps/chosen": -402.755126953125, "eval_logps/rejected": -469.4453430175781, "eval_loss": 0.022447433322668076, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -0.17075017094612122, "eval_rewards/margins": 0.08708327263593674, "eval_rewards/rejected": -0.25783342123031616, "eval_runtime": 715.7099, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 13800 }, { "epoch": 0.9, "learning_rate": 1.403388246712842e-07, "logits/chosen": -1.0415693521499634, "logits/rejected": -0.9058489799499512, "logps/chosen": -321.95208740234375, "logps/rejected": -381.9420471191406, "loss": 0.0234, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.15770100057125092, "rewards/margins": 0.06714179366827011, "rewards/rejected": -0.22484278678894043, "step": 13810 }, { "epoch": 0.9, "learning_rate": 1.3845880933186757e-07, "logits/chosen": -1.1712547540664673, "logits/rejected": -1.0686523914337158, "logps/chosen": -413.47235107421875, "logps/rejected": -418.3628845214844, "loss": 0.0239, "rewards/accuracies": 0.5, "rewards/chosen": -0.17473356425762177, "rewards/margins": 0.03912647068500519, "rewards/rejected": -0.21386003494262695, "step": 13820 }, { "epoch": 0.9, "learning_rate": 1.3659111294811457e-07, "logits/chosen": -1.038834571838379, "logits/rejected": -1.0247809886932373, "logps/chosen": -391.3648681640625, "logps/rejected": -437.35418701171875, "loss": 0.0285, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19952170550823212, "rewards/margins": 0.06388555467128754, "rewards/rejected": -0.26340723037719727, "step": 13830 }, { "epoch": 0.91, "learning_rate": 1.347357452628459e-07, "logits/chosen": -1.477870225906372, "logits/rejected": -1.3423594236373901, "logps/chosen": -397.6361389160156, "logps/rejected": -457.351318359375, "loss": 0.032, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15233857929706573, "rewards/margins": 0.0672169104218483, "rewards/rejected": -0.21955546736717224, "step": 13840 }, { "epoch": 0.91, "learning_rate": 1.3289271595456732e-07, "logits/chosen": -1.0627280473709106, "logits/rejected": -0.7165713906288147, "logps/chosen": -415.29852294921875, "logps/rejected": -494.1417541503906, "loss": 0.0121, "rewards/accuracies": 0.75, "rewards/chosen": -0.21096935868263245, "rewards/margins": 0.10463689267635345, "rewards/rejected": -0.3156062662601471, "step": 13850 }, { "epoch": 0.91, "learning_rate": 1.310620346374228e-07, "logits/chosen": -0.9884432554244995, "logits/rejected": -0.8231443166732788, "logps/chosen": -438.54638671875, "logps/rejected": -521.2867431640625, "loss": 0.0151, "rewards/accuracies": 0.75, "rewards/chosen": -0.20632442831993103, "rewards/margins": 0.11954349279403687, "rewards/rejected": -0.3258678913116455, "step": 13860 }, { "epoch": 0.91, "learning_rate": 1.2924371086114274e-07, "logits/chosen": -1.160275936126709, "logits/rejected": -0.7538944482803345, "logps/chosen": -408.69793701171875, "logps/rejected": -480.8848571777344, "loss": 0.0135, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.172636479139328, "rewards/margins": 0.07821375876665115, "rewards/rejected": -0.25085026025772095, "step": 13870 }, { "epoch": 0.91, "learning_rate": 1.274377541109953e-07, "logits/chosen": -0.924946129322052, "logits/rejected": -0.9856632947921753, "logps/chosen": -344.84173583984375, "logps/rejected": -488.0869140625, "loss": 0.0216, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18049120903015137, "rewards/margins": 0.06532148271799088, "rewards/rejected": -0.24581269919872284, "step": 13880 }, { "epoch": 0.91, "learning_rate": 1.2564417380773435e-07, "logits/chosen": -0.7976151704788208, "logits/rejected": -0.5144010782241821, "logps/chosen": -359.33807373046875, "logps/rejected": -486.020751953125, "loss": 0.0273, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18084099888801575, "rewards/margins": 0.10420070588588715, "rewards/rejected": -0.2850417196750641, "step": 13890 }, { "epoch": 0.91, "learning_rate": 1.2386297930755436e-07, "logits/chosen": -1.2421022653579712, "logits/rejected": -1.122827410697937, "logps/chosen": -472.3711853027344, "logps/rejected": -560.810791015625, "loss": 0.0219, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22448544204235077, "rewards/margins": 0.09326603263616562, "rewards/rejected": -0.3177514672279358, "step": 13900 }, { "epoch": 0.91, "eval_logits/chosen": -1.0379273891448975, "eval_logits/rejected": -0.9064913988113403, "eval_logps/chosen": -406.2875671386719, "eval_logps/rejected": -473.8788146972656, "eval_loss": 0.02243398316204548, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -0.17428259551525116, "eval_rewards/margins": 0.08798431605100632, "eval_rewards/rejected": -0.2622669041156769, "eval_runtime": 714.9738, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.399, "step": 13900 }, { "epoch": 0.91, "learning_rate": 1.220941799020378e-07, "logits/chosen": -0.9408222436904907, "logits/rejected": -0.8067396879196167, "logps/chosen": -382.4049072265625, "logps/rejected": -454.5931701660156, "loss": 0.0235, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16332384943962097, "rewards/margins": 0.09454109519720078, "rewards/rejected": -0.25786495208740234, "step": 13910 }, { "epoch": 0.91, "learning_rate": 1.2033778481810975e-07, "logits/chosen": -1.0745205879211426, "logits/rejected": -1.0396884679794312, "logps/chosen": -377.3226013183594, "logps/rejected": -450.72601318359375, "loss": 0.0328, "rewards/accuracies": 0.625, "rewards/chosen": -0.15732908248901367, "rewards/margins": 0.11074963957071304, "rewards/rejected": -0.2680787146091461, "step": 13920 }, { "epoch": 0.91, "learning_rate": 1.1859380321798591e-07, "logits/chosen": -1.1037908792495728, "logits/rejected": -1.342409372329712, "logps/chosen": -367.7962646484375, "logps/rejected": -460.5440368652344, "loss": 0.019, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16523495316505432, "rewards/margins": 0.0780029147863388, "rewards/rejected": -0.24323788285255432, "step": 13930 }, { "epoch": 0.91, "learning_rate": 1.1686224419912989e-07, "logits/chosen": -1.0082924365997314, "logits/rejected": -0.7994570136070251, "logps/chosen": -460.673828125, "logps/rejected": -541.8020629882812, "loss": 0.0174, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20780125260353088, "rewards/margins": 0.11041899025440216, "rewards/rejected": -0.31822022795677185, "step": 13940 }, { "epoch": 0.91, "learning_rate": 1.1514311679420104e-07, "logits/chosen": -0.6400123834609985, "logits/rejected": -0.6988319158554077, "logps/chosen": -334.10919189453125, "logps/rejected": -499.1197204589844, "loss": 0.0236, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17896060645580292, "rewards/margins": 0.10919086635112762, "rewards/rejected": -0.28815150260925293, "step": 13950 }, { "epoch": 0.91, "learning_rate": 1.1343642997101029e-07, "logits/chosen": -1.0996602773666382, "logits/rejected": -0.9189102053642273, "logps/chosen": -386.5760498046875, "logps/rejected": -466.99810791015625, "loss": 0.0292, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1842227280139923, "rewards/margins": 0.0936439111828804, "rewards/rejected": -0.2778666019439697, "step": 13960 }, { "epoch": 0.91, "learning_rate": 1.1174219263247188e-07, "logits/chosen": -0.603789746761322, "logits/rejected": -0.6049255728721619, "logps/chosen": -390.61785888671875, "logps/rejected": -475.6376037597656, "loss": 0.0161, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19180023670196533, "rewards/margins": 0.1025509387254715, "rewards/rejected": -0.29435116052627563, "step": 13970 }, { "epoch": 0.91, "learning_rate": 1.1006041361655839e-07, "logits/chosen": -1.2232646942138672, "logits/rejected": -0.8052148818969727, "logps/chosen": -383.7058410644531, "logps/rejected": -410.437255859375, "loss": 0.0243, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17526856064796448, "rewards/margins": 0.06666896492242813, "rewards/rejected": -0.241937518119812, "step": 13980 }, { "epoch": 0.92, "learning_rate": 1.0839110169625189e-07, "logits/chosen": -0.8743176460266113, "logits/rejected": -0.9571408033370972, "logps/chosen": -408.46722412109375, "logps/rejected": -536.3209838867188, "loss": 0.0241, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20162491500377655, "rewards/margins": 0.13520914316177368, "rewards/rejected": -0.33683404326438904, "step": 13990 }, { "epoch": 0.92, "learning_rate": 1.06734265579502e-07, "logits/chosen": -1.0423598289489746, "logits/rejected": -0.7194265723228455, "logps/chosen": -447.3946228027344, "logps/rejected": -481.02099609375, "loss": 0.024, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18587790429592133, "rewards/margins": 0.10013161599636078, "rewards/rejected": -0.2860095500946045, "step": 14000 }, { "epoch": 0.92, "eval_logits/chosen": -1.0153886079788208, "eval_logits/rejected": -0.8850435018539429, "eval_logps/chosen": -410.66156005859375, "eval_logps/rejected": -478.7597961425781, "eval_loss": 0.02244659699499607, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -0.17865662276744843, "eval_rewards/margins": 0.088491290807724, "eval_rewards/rejected": -0.2671479284763336, "eval_runtime": 716.6156, "eval_samples_per_second": 2.791, "eval_steps_per_second": 1.395, "step": 14000 }, { "epoch": 0.92, "learning_rate": 1.050899139091771e-07, "logits/chosen": -1.2670387029647827, "logits/rejected": -0.8496761322021484, "logps/chosen": -466.0245056152344, "logps/rejected": -513.6765747070312, "loss": 0.0211, "rewards/accuracies": 0.75, "rewards/chosen": -0.1873970329761505, "rewards/margins": 0.09417024999856949, "rewards/rejected": -0.2815672755241394, "step": 14010 }, { "epoch": 0.92, "learning_rate": 1.0345805526302072e-07, "logits/chosen": -1.1485652923583984, "logits/rejected": -0.9716947674751282, "logps/chosen": -370.2688293457031, "logps/rejected": -454.795654296875, "loss": 0.0121, "rewards/accuracies": 0.75, "rewards/chosen": -0.16910648345947266, "rewards/margins": 0.09176457673311234, "rewards/rejected": -0.260871022939682, "step": 14020 }, { "epoch": 0.92, "learning_rate": 1.0183869815360764e-07, "logits/chosen": -1.0980041027069092, "logits/rejected": -1.1585925817489624, "logps/chosen": -353.38116455078125, "logps/rejected": -456.03021240234375, "loss": 0.0157, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16146765649318695, "rewards/margins": 0.07000899314880371, "rewards/rejected": -0.23147663474082947, "step": 14030 }, { "epoch": 0.92, "learning_rate": 1.0023185102829763e-07, "logits/chosen": -0.7969232797622681, "logits/rejected": -0.8762233853340149, "logps/chosen": -427.91766357421875, "logps/rejected": -525.0812377929688, "loss": 0.0221, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19709549844264984, "rewards/margins": 0.09297887980937958, "rewards/rejected": -0.2900743782520294, "step": 14040 }, { "epoch": 0.92, "learning_rate": 9.863752226919182e-08, "logits/chosen": -0.8830846548080444, "logits/rejected": -0.729462742805481, "logps/chosen": -407.84466552734375, "logps/rejected": -470.2268981933594, "loss": 0.0314, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1701125055551529, "rewards/margins": 0.12796083092689514, "rewards/rejected": -0.29807335138320923, "step": 14050 }, { "epoch": 0.92, "learning_rate": 9.705572019309107e-08, "logits/chosen": -0.9977526664733887, "logits/rejected": -0.8371391296386719, "logps/chosen": -445.82940673828125, "logps/rejected": -518.234619140625, "loss": 0.0199, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1778019666671753, "rewards/margins": 0.10388918220996857, "rewards/rejected": -0.28169113397598267, "step": 14060 }, { "epoch": 0.92, "learning_rate": 9.548645305144849e-08, "logits/chosen": -1.2183663845062256, "logits/rejected": -0.9598463177680969, "logps/chosen": -334.23150634765625, "logps/rejected": -424.7630310058594, "loss": 0.0261, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1607363522052765, "rewards/margins": 0.08655780553817749, "rewards/rejected": -0.24729418754577637, "step": 14070 }, { "epoch": 0.92, "learning_rate": 9.392972903033149e-08, "logits/chosen": -0.7881813049316406, "logits/rejected": -1.0731167793273926, "logps/chosen": -379.19891357421875, "logps/rejected": -420.2137145996094, "loss": 0.0153, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1560676246881485, "rewards/margins": 0.048558320850133896, "rewards/rejected": -0.2046259343624115, "step": 14080 }, { "epoch": 0.92, "learning_rate": 9.238555625037449e-08, "logits/chosen": -0.8488396406173706, "logits/rejected": -0.771621584892273, "logps/chosen": -370.9525451660156, "logps/rejected": -400.3120422363281, "loss": 0.0192, "rewards/accuracies": 0.625, "rewards/chosen": -0.18024437129497528, "rewards/margins": 0.05904129147529602, "rewards/rejected": -0.2392856627702713, "step": 14090 }, { "epoch": 0.92, "learning_rate": 9.085394276673903e-08, "logits/chosen": -0.997685432434082, "logits/rejected": -0.993954062461853, "logps/chosen": -445.4573669433594, "logps/rejected": -523.0450439453125, "loss": 0.0228, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17729127407073975, "rewards/margins": 0.0920371487736702, "rewards/rejected": -0.26932841539382935, "step": 14100 }, { "epoch": 0.92, "eval_logits/chosen": -1.0223374366760254, "eval_logits/rejected": -0.891913890838623, "eval_logps/chosen": -409.0929870605469, "eval_logps/rejected": -476.6038818359375, "eval_loss": 0.022453147917985916, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -0.17708806693553925, "eval_rewards/margins": 0.0879039466381073, "eval_rewards/rejected": -0.26499199867248535, "eval_runtime": 715.1694, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 14100 }, { "epoch": 0.92, "learning_rate": 8.933489656907157e-08, "logits/chosen": -0.9175044894218445, "logits/rejected": -1.005128026008606, "logps/chosen": -398.2541198730469, "logps/rejected": -475.3755798339844, "loss": 0.0264, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18091870844364166, "rewards/margins": 0.05218110606074333, "rewards/rejected": -0.2330997884273529, "step": 14110 }, { "epoch": 0.92, "learning_rate": 8.782842558146127e-08, "logits/chosen": -0.8923619389533997, "logits/rejected": -0.8135698437690735, "logps/chosen": -319.1666259765625, "logps/rejected": -425.32061767578125, "loss": 0.0335, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15728271007537842, "rewards/margins": 0.1078605204820633, "rewards/rejected": -0.2651432454586029, "step": 14120 }, { "epoch": 0.92, "learning_rate": 8.633453766239836e-08, "logits/chosen": -1.1690844297409058, "logits/rejected": -1.0830466747283936, "logps/chosen": -390.0625305175781, "logps/rejected": -415.4368591308594, "loss": 0.0101, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1549413800239563, "rewards/margins": 0.05722958967089653, "rewards/rejected": -0.21217098832130432, "step": 14130 }, { "epoch": 0.93, "learning_rate": 8.485324060473448e-08, "logits/chosen": -1.0862903594970703, "logits/rejected": -0.9004877805709839, "logps/chosen": -413.40179443359375, "logps/rejected": -463.774658203125, "loss": 0.0161, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.17311972379684448, "rewards/margins": 0.06276368349790573, "rewards/rejected": -0.23588339984416962, "step": 14140 }, { "epoch": 0.93, "learning_rate": 8.338454213564052e-08, "logits/chosen": -1.0443912744522095, "logits/rejected": -0.7935680747032166, "logps/chosen": -430.1393127441406, "logps/rejected": -523.74462890625, "loss": 0.0272, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19924233853816986, "rewards/margins": 0.11079951375722885, "rewards/rejected": -0.3100418448448181, "step": 14150 }, { "epoch": 0.93, "learning_rate": 8.192844991656679e-08, "logits/chosen": -0.9405023455619812, "logits/rejected": -0.6561242938041687, "logps/chosen": -451.96368408203125, "logps/rejected": -484.1292419433594, "loss": 0.0232, "rewards/accuracies": 0.625, "rewards/chosen": -0.21324701607227325, "rewards/margins": 0.06857358664274216, "rewards/rejected": -0.2818205952644348, "step": 14160 }, { "epoch": 0.93, "learning_rate": 8.048497154320434e-08, "logits/chosen": -0.9651077389717102, "logits/rejected": -1.0678898096084595, "logps/chosen": -330.3751220703125, "logps/rejected": -409.69891357421875, "loss": 0.0233, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20082560181617737, "rewards/margins": 0.07022635638713837, "rewards/rejected": -0.27105194330215454, "step": 14170 }, { "epoch": 0.93, "learning_rate": 7.905411454544265e-08, "logits/chosen": -1.0331910848617554, "logits/rejected": -1.0138767957687378, "logps/chosen": -418.9769592285156, "logps/rejected": -485.62738037109375, "loss": 0.0336, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18247273564338684, "rewards/margins": 0.061908088624477386, "rewards/rejected": -0.24438080191612244, "step": 14180 }, { "epoch": 0.93, "learning_rate": 7.763588638733332e-08, "logits/chosen": -1.0064284801483154, "logits/rejected": -1.03469979763031, "logps/chosen": -431.0116271972656, "logps/rejected": -503.02362060546875, "loss": 0.0179, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16859152913093567, "rewards/margins": 0.09183613955974579, "rewards/rejected": -0.26042765378952026, "step": 14190 }, { "epoch": 0.93, "learning_rate": 7.623029446704899e-08, "logits/chosen": -1.2003682851791382, "logits/rejected": -1.2555644512176514, "logps/chosen": -486.3758239746094, "logps/rejected": -565.3789672851562, "loss": 0.0146, "rewards/accuracies": 0.75, "rewards/chosen": -0.17493435740470886, "rewards/margins": 0.11589778959751129, "rewards/rejected": -0.29083213210105896, "step": 14200 }, { "epoch": 0.93, "eval_logits/chosen": -1.0146698951721191, "eval_logits/rejected": -0.8844304084777832, "eval_logps/chosen": -412.2579040527344, "eval_logps/rejected": -480.30926513671875, "eval_loss": 0.022437186911702156, "eval_rewards/accuracies": 0.6769999861717224, "eval_rewards/chosen": -0.18025293946266174, "eval_rewards/margins": 0.08844440430402756, "eval_rewards/rejected": -0.2686973512172699, "eval_runtime": 716.5025, "eval_samples_per_second": 2.791, "eval_steps_per_second": 1.396, "step": 14200 }, { "epoch": 0.93, "learning_rate": 7.483734611684557e-08, "logits/chosen": -0.7734705209732056, "logits/rejected": -0.597266674041748, "logps/chosen": -435.97052001953125, "logps/rejected": -463.5069885253906, "loss": 0.0381, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1790846884250641, "rewards/margins": 0.08169746398925781, "rewards/rejected": -0.2607821524143219, "step": 14210 }, { "epoch": 0.93, "learning_rate": 7.345704860302366e-08, "logits/chosen": -1.4090945720672607, "logits/rejected": -1.0228337049484253, "logps/chosen": -430.98065185546875, "logps/rejected": -537.8982543945312, "loss": 0.0123, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18272797763347626, "rewards/margins": 0.10759595781564713, "rewards/rejected": -0.290323942899704, "step": 14220 }, { "epoch": 0.93, "learning_rate": 7.208940912589224e-08, "logits/chosen": -1.0084621906280518, "logits/rejected": -0.80125492811203, "logps/chosen": -422.548828125, "logps/rejected": -506.9256896972656, "loss": 0.0301, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2129625529050827, "rewards/margins": 0.12175538390874863, "rewards/rejected": -0.33471792936325073, "step": 14230 }, { "epoch": 0.93, "learning_rate": 7.073443481972753e-08, "logits/chosen": -0.9411062002182007, "logits/rejected": -0.8530802726745605, "logps/chosen": -385.3193664550781, "logps/rejected": -490.29522705078125, "loss": 0.0104, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2011636197566986, "rewards/margins": 0.0854741781949997, "rewards/rejected": -0.2866377830505371, "step": 14240 }, { "epoch": 0.93, "learning_rate": 6.939213275274027e-08, "logits/chosen": -1.0928478240966797, "logits/rejected": -1.1013089418411255, "logps/chosen": -414.11541748046875, "logps/rejected": -465.2770080566406, "loss": 0.0211, "rewards/accuracies": 0.625, "rewards/chosen": -0.17486163973808289, "rewards/margins": 0.07076430320739746, "rewards/rejected": -0.24562592804431915, "step": 14250 }, { "epoch": 0.93, "learning_rate": 6.806250992703461e-08, "logits/chosen": -0.9533036351203918, "logits/rejected": -0.8699678182601929, "logps/chosen": -403.17156982421875, "logps/rejected": -447.97607421875, "loss": 0.0268, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18647697567939758, "rewards/margins": 0.07101467251777649, "rewards/rejected": -0.25749167799949646, "step": 14260 }, { "epoch": 0.93, "learning_rate": 6.674557327857572e-08, "logits/chosen": -1.1737830638885498, "logits/rejected": -1.0281310081481934, "logps/chosen": -428.2664489746094, "logps/rejected": -540.7010498046875, "loss": 0.0253, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17399579286575317, "rewards/margins": 0.12152265012264252, "rewards/rejected": -0.2955184578895569, "step": 14270 }, { "epoch": 0.93, "learning_rate": 6.544132967714917e-08, "logits/chosen": -0.7922371029853821, "logits/rejected": -0.7069785594940186, "logps/chosen": -478.7749938964844, "logps/rejected": -583.5745849609375, "loss": 0.02, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23191507160663605, "rewards/margins": 0.11851205676794052, "rewards/rejected": -0.35042712092399597, "step": 14280 }, { "epoch": 0.93, "learning_rate": 6.414978592632932e-08, "logits/chosen": -0.9644430875778198, "logits/rejected": -0.9138165712356567, "logps/chosen": -443.72802734375, "logps/rejected": -488.78271484375, "loss": 0.0114, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1826287806034088, "rewards/margins": 0.0915100947022438, "rewards/rejected": -0.274138867855072, "step": 14290 }, { "epoch": 0.94, "learning_rate": 6.287094876344046e-08, "logits/chosen": -1.2461270093917847, "logits/rejected": -1.1099523305892944, "logps/chosen": -296.6013488769531, "logps/rejected": -379.26690673828125, "loss": 0.0164, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12418107688426971, "rewards/margins": 0.07602651417255402, "rewards/rejected": -0.20020759105682373, "step": 14300 }, { "epoch": 0.94, "eval_logits/chosen": -1.0156885385513306, "eval_logits/rejected": -0.8855485320091248, "eval_logps/chosen": -411.2285461425781, "eval_logps/rejected": -478.8005065917969, "eval_loss": 0.022453712299466133, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -0.17922355234622955, "eval_rewards/margins": 0.08796507120132446, "eval_rewards/rejected": -0.2671886384487152, "eval_runtime": 714.5827, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 14300 }, { "epoch": 0.94, "learning_rate": 6.160482485952413e-08, "logits/chosen": -1.2100712060928345, "logits/rejected": -1.1880000829696655, "logps/chosen": -445.8309020996094, "logps/rejected": -494.109375, "loss": 0.0261, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20938114821910858, "rewards/margins": 0.08007434755563736, "rewards/rejected": -0.28945547342300415, "step": 14310 }, { "epoch": 0.94, "learning_rate": 6.035142081930234e-08, "logits/chosen": -1.0176794528961182, "logits/rejected": -0.8223511576652527, "logps/chosen": -480.5355529785156, "logps/rejected": -484.24530029296875, "loss": 0.0191, "rewards/accuracies": 0.75, "rewards/chosen": -0.21773914992809296, "rewards/margins": 0.08072670549154282, "rewards/rejected": -0.29846587777137756, "step": 14320 }, { "epoch": 0.94, "learning_rate": 5.911074318114496e-08, "logits/chosen": -0.9228354692459106, "logits/rejected": -0.7767246961593628, "logps/chosen": -393.4886779785156, "logps/rejected": -529.6148681640625, "loss": 0.0099, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18990051746368408, "rewards/margins": 0.09400142729282379, "rewards/rejected": -0.28390195965766907, "step": 14330 }, { "epoch": 0.94, "learning_rate": 5.788279841703381e-08, "logits/chosen": -1.2187045812606812, "logits/rejected": -0.9419827461242676, "logps/chosen": -357.7316589355469, "logps/rejected": -447.77716064453125, "loss": 0.0223, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1738220900297165, "rewards/margins": 0.09771256893873215, "rewards/rejected": -0.2715347111225128, "step": 14340 }, { "epoch": 0.94, "learning_rate": 5.66675929325311e-08, "logits/chosen": -1.153968334197998, "logits/rejected": -0.9159612655639648, "logps/chosen": -401.0085144042969, "logps/rejected": -442.57098388671875, "loss": 0.0161, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18034406006336212, "rewards/margins": 0.05291125923395157, "rewards/rejected": -0.2332552969455719, "step": 14350 }, { "epoch": 0.94, "learning_rate": 5.546513306674301e-08, "logits/chosen": -0.8485516309738159, "logits/rejected": -0.7479302287101746, "logps/chosen": -471.10931396484375, "logps/rejected": -492.97698974609375, "loss": 0.0214, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19259265065193176, "rewards/margins": 0.09695064276456833, "rewards/rejected": -0.2895432710647583, "step": 14360 }, { "epoch": 0.94, "learning_rate": 5.4275425092290004e-08, "logits/chosen": -1.5290069580078125, "logits/rejected": -1.3027410507202148, "logps/chosen": -433.9794921875, "logps/rejected": -498.85516357421875, "loss": 0.0237, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17918336391448975, "rewards/margins": 0.08331837505102158, "rewards/rejected": -0.2625017464160919, "step": 14370 }, { "epoch": 0.94, "learning_rate": 5.309847521527078e-08, "logits/chosen": -0.7416258454322815, "logits/rejected": -0.650170624256134, "logps/chosen": -477.94537353515625, "logps/rejected": -499.11785888671875, "loss": 0.0265, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19337202608585358, "rewards/margins": 0.06410078704357147, "rewards/rejected": -0.25747281312942505, "step": 14380 }, { "epoch": 0.94, "learning_rate": 5.1934289575233385e-08, "logits/chosen": -0.7874212861061096, "logits/rejected": -0.5679928064346313, "logps/chosen": -418.0940856933594, "logps/rejected": -491.8233337402344, "loss": 0.0241, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1785646677017212, "rewards/margins": 0.10809657722711563, "rewards/rejected": -0.286661297082901, "step": 14390 }, { "epoch": 0.94, "learning_rate": 5.078287424513994e-08, "logits/chosen": -1.23927640914917, "logits/rejected": -1.0591644048690796, "logps/chosen": -474.15997314453125, "logps/rejected": -502.009033203125, "loss": 0.0248, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.204112246632576, "rewards/margins": 0.10267938673496246, "rewards/rejected": -0.30679160356521606, "step": 14400 }, { "epoch": 0.94, "eval_logits/chosen": -1.0148255825042725, "eval_logits/rejected": -0.8846120834350586, "eval_logps/chosen": -412.7734680175781, "eval_logps/rejected": -480.7046813964844, "eval_loss": 0.022418931126594543, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -0.18076851963996887, "eval_rewards/margins": 0.08832425624132156, "eval_rewards/rejected": -0.26909276843070984, "eval_runtime": 715.3708, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 14400 }, { "epoch": 0.94, "learning_rate": 4.964423523133671e-08, "logits/chosen": -1.3571563959121704, "logits/rejected": -0.8952462077140808, "logps/chosen": -370.10089111328125, "logps/rejected": -409.4176330566406, "loss": 0.0268, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1523693948984146, "rewards/margins": 0.07366596162319183, "rewards/rejected": -0.22603532671928406, "step": 14410 }, { "epoch": 0.94, "learning_rate": 4.8518378473522976e-08, "logits/chosen": -1.1160211563110352, "logits/rejected": -0.8559325933456421, "logps/chosen": -434.001220703125, "logps/rejected": -512.3402099609375, "loss": 0.0283, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18349134922027588, "rewards/margins": 0.08506624400615692, "rewards/rejected": -0.268557608127594, "step": 14420 }, { "epoch": 0.94, "learning_rate": 4.7405309844718584e-08, "logits/chosen": -1.1042518615722656, "logits/rejected": -0.9363161325454712, "logps/chosen": -389.70477294921875, "logps/rejected": -527.3927612304688, "loss": 0.0206, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19720575213432312, "rewards/margins": 0.12849177420139313, "rewards/rejected": -0.32569754123687744, "step": 14430 }, { "epoch": 0.94, "learning_rate": 4.630503515123508e-08, "logits/chosen": -1.2130210399627686, "logits/rejected": -0.9072374105453491, "logps/chosen": -388.48541259765625, "logps/rejected": -420.8318786621094, "loss": 0.0318, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1876194179058075, "rewards/margins": 0.08221286535263062, "rewards/rejected": -0.2698322832584381, "step": 14440 }, { "epoch": 0.95, "learning_rate": 4.5217560132644056e-08, "logits/chosen": -0.9046792984008789, "logits/rejected": -0.7383350729942322, "logps/chosen": -326.1619873046875, "logps/rejected": -404.20404052734375, "loss": 0.0589, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17554239928722382, "rewards/margins": 0.05964116007089615, "rewards/rejected": -0.23518356680870056, "step": 14450 }, { "epoch": 0.95, "learning_rate": 4.41428904617483e-08, "logits/chosen": -1.0154130458831787, "logits/rejected": -1.1010584831237793, "logps/chosen": -354.14996337890625, "logps/rejected": -424.96337890625, "loss": 0.0415, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.17704129219055176, "rewards/margins": 0.0661528930068016, "rewards/rejected": -0.24319419264793396, "step": 14460 }, { "epoch": 0.95, "learning_rate": 4.3081031744550696e-08, "logits/chosen": -1.1790661811828613, "logits/rejected": -1.2092974185943604, "logps/chosen": -388.162353515625, "logps/rejected": -461.73529052734375, "loss": 0.0311, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1350170373916626, "rewards/margins": 0.09590072929859161, "rewards/rejected": -0.2309177815914154, "step": 14470 }, { "epoch": 0.95, "learning_rate": 4.2031989520227025e-08, "logits/chosen": -0.9632661938667297, "logits/rejected": -0.883492112159729, "logps/chosen": -433.160400390625, "logps/rejected": -479.6390686035156, "loss": 0.0097, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20484599471092224, "rewards/margins": 0.06836618483066559, "rewards/rejected": -0.273212194442749, "step": 14480 }, { "epoch": 0.95, "learning_rate": 4.099576926109461e-08, "logits/chosen": -1.2957643270492554, "logits/rejected": -0.937993049621582, "logps/chosen": -400.6993713378906, "logps/rejected": -399.5106201171875, "loss": 0.0256, "rewards/accuracies": 0.625, "rewards/chosen": -0.1609175205230713, "rewards/margins": 0.07796572148799896, "rewards/rejected": -0.23888322710990906, "step": 14490 }, { "epoch": 0.95, "learning_rate": 3.997237637258705e-08, "logits/chosen": -1.1294687986373901, "logits/rejected": -0.90412837266922, "logps/chosen": -473.8423767089844, "logps/rejected": -516.46240234375, "loss": 0.0118, "rewards/accuracies": 0.625, "rewards/chosen": -0.15658903121948242, "rewards/margins": 0.08596320450305939, "rewards/rejected": -0.2425522357225418, "step": 14500 }, { "epoch": 0.95, "eval_logits/chosen": -1.0131404399871826, "eval_logits/rejected": -0.8831061124801636, "eval_logps/chosen": -413.388427734375, "eval_logps/rejected": -481.34869384765625, "eval_loss": 0.022429080680012703, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -0.18138352036476135, "eval_rewards/margins": 0.08835328370332718, "eval_rewards/rejected": -0.2697368264198303, "eval_runtime": 713.4293, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 14500 }, { "epoch": 0.95, "learning_rate": 3.8961816193222035e-08, "logits/chosen": -1.0870587825775146, "logits/rejected": -0.8369253277778625, "logps/chosen": -465.21588134765625, "logps/rejected": -458.713623046875, "loss": 0.0257, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22936555743217468, "rewards/margins": 0.050831060856580734, "rewards/rejected": -0.2801966071128845, "step": 14510 }, { "epoch": 0.95, "learning_rate": 3.79640939945769e-08, "logits/chosen": -1.0559475421905518, "logits/rejected": -0.8710237741470337, "logps/chosen": -407.1949768066406, "logps/rejected": -370.2845153808594, "loss": 0.0054, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12097406387329102, "rewards/margins": 0.049353644251823425, "rewards/rejected": -0.17032770812511444, "step": 14520 }, { "epoch": 0.95, "learning_rate": 3.697921498125895e-08, "logits/chosen": -0.8816161155700684, "logits/rejected": -1.0352247953414917, "logps/chosen": -419.63311767578125, "logps/rejected": -526.7732543945312, "loss": 0.0178, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20968011021614075, "rewards/margins": 0.10126037895679474, "rewards/rejected": -0.3109404742717743, "step": 14530 }, { "epoch": 0.95, "learning_rate": 3.6007184290880456e-08, "logits/chosen": -1.1614787578582764, "logits/rejected": -1.0354723930358887, "logps/chosen": -426.97955322265625, "logps/rejected": -476.740234375, "loss": 0.0433, "rewards/accuracies": 0.625, "rewards/chosen": -0.21500656008720398, "rewards/margins": 0.06642434746026993, "rewards/rejected": -0.2814309298992157, "step": 14540 }, { "epoch": 0.95, "learning_rate": 3.504800699402872e-08, "logits/chosen": -1.273871898651123, "logits/rejected": -1.0708858966827393, "logps/chosen": -520.5189208984375, "logps/rejected": -488.89630126953125, "loss": 0.0123, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1748647391796112, "rewards/margins": 0.041744206100702286, "rewards/rejected": -0.2166089564561844, "step": 14550 }, { "epoch": 0.95, "learning_rate": 3.4101688094242967e-08, "logits/chosen": -1.0231026411056519, "logits/rejected": -0.8495405912399292, "logps/chosen": -508.6773986816406, "logps/rejected": -606.7073974609375, "loss": 0.0416, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23911647498607635, "rewards/margins": 0.1232062354683876, "rewards/rejected": -0.36232268810272217, "step": 14560 }, { "epoch": 0.95, "learning_rate": 3.3168232527985564e-08, "logits/chosen": -0.657323956489563, "logits/rejected": -0.648857057094574, "logps/chosen": -421.421875, "logps/rejected": -456.7588806152344, "loss": 0.0255, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17980334162712097, "rewards/margins": 0.09008689969778061, "rewards/rejected": -0.2698902487754822, "step": 14570 }, { "epoch": 0.95, "learning_rate": 3.224764516461892e-08, "logits/chosen": -1.0926493406295776, "logits/rejected": -0.7998725771903992, "logps/chosen": -409.55810546875, "logps/rejected": -496.1477966308594, "loss": 0.0155, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15455926954746246, "rewards/margins": 0.1173384040594101, "rewards/rejected": -0.27189767360687256, "step": 14580 }, { "epoch": 0.95, "learning_rate": 3.133993080637665e-08, "logits/chosen": -1.1204018592834473, "logits/rejected": -0.8580106496810913, "logps/chosen": -394.9521484375, "logps/rejected": -485.66778564453125, "loss": 0.0271, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19207128882408142, "rewards/margins": 0.10312651097774506, "rewards/rejected": -0.2951977849006653, "step": 14590 }, { "epoch": 0.96, "learning_rate": 3.0445094188342186e-08, "logits/chosen": -0.4857551157474518, "logits/rejected": -0.36516401171684265, "logps/chosen": -439.83953857421875, "logps/rejected": -452.116455078125, "loss": 0.0346, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18636301159858704, "rewards/margins": 0.09254106134176254, "rewards/rejected": -0.2789040803909302, "step": 14600 }, { "epoch": 0.96, "eval_logits/chosen": -1.0151700973510742, "eval_logits/rejected": -0.8849442601203918, "eval_logps/chosen": -412.47344970703125, "eval_logps/rejected": -479.9362487792969, "eval_loss": 0.022418811917304993, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -0.18046845495700836, "eval_rewards/margins": 0.08785588294267654, "eval_rewards/rejected": -0.2683243453502655, "eval_runtime": 713.0037, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 14600 }, { "epoch": 0.96, "learning_rate": 2.9563139978421028e-08, "logits/chosen": -0.8836932182312012, "logits/rejected": -0.9460372924804688, "logps/chosen": -387.1368103027344, "logps/rejected": -439.05859375, "loss": 0.011, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16126489639282227, "rewards/margins": 0.05970633029937744, "rewards/rejected": -0.2209712266921997, "step": 14610 }, { "epoch": 0.96, "learning_rate": 2.869407277731939e-08, "logits/chosen": -0.6969620585441589, "logits/rejected": -0.7331337332725525, "logps/chosen": -350.57977294921875, "logps/rejected": -412.5320739746094, "loss": 0.0161, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16532263159751892, "rewards/margins": 0.08512188494205475, "rewards/rejected": -0.25044453144073486, "step": 14620 }, { "epoch": 0.96, "learning_rate": 2.783789711851642e-08, "logits/chosen": -1.1387749910354614, "logits/rejected": -0.8433340191841125, "logps/chosen": -339.46319580078125, "logps/rejected": -442.01519775390625, "loss": 0.0212, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17761218547821045, "rewards/margins": 0.1249847412109375, "rewards/rejected": -0.30259689688682556, "step": 14630 }, { "epoch": 0.96, "learning_rate": 2.6994617468244778e-08, "logits/chosen": -1.0166022777557373, "logits/rejected": -0.8803132772445679, "logps/chosen": -385.8862609863281, "logps/rejected": -430.8728942871094, "loss": 0.022, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1728655993938446, "rewards/margins": 0.10890533030033112, "rewards/rejected": -0.2817709445953369, "step": 14640 }, { "epoch": 0.96, "learning_rate": 2.6164238225463155e-08, "logits/chosen": -0.8888136148452759, "logits/rejected": -0.6145034432411194, "logps/chosen": -466.16888427734375, "logps/rejected": -483.38232421875, "loss": 0.0351, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1837417632341385, "rewards/margins": 0.09572137892246246, "rewards/rejected": -0.27946314215660095, "step": 14650 }, { "epoch": 0.96, "learning_rate": 2.534676372183742e-08, "logits/chosen": -0.7719782590866089, "logits/rejected": -0.8031314015388489, "logps/chosen": -468.6703186035156, "logps/rejected": -496.5489807128906, "loss": 0.028, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18471595644950867, "rewards/margins": 0.08020530641078949, "rewards/rejected": -0.26492124795913696, "step": 14660 }, { "epoch": 0.96, "learning_rate": 2.4542198221714218e-08, "logits/chosen": -0.6857430338859558, "logits/rejected": -0.5728986859321594, "logps/chosen": -323.4570007324219, "logps/rejected": -417.074951171875, "loss": 0.0244, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.18535009026527405, "rewards/margins": 0.0933910459280014, "rewards/rejected": -0.27874115109443665, "step": 14670 }, { "epoch": 0.96, "learning_rate": 2.3750545922101854e-08, "logits/chosen": -1.4618000984191895, "logits/rejected": -0.8416509628295898, "logps/chosen": -479.78729248046875, "logps/rejected": -498.7672424316406, "loss": 0.0323, "rewards/accuracies": 0.625, "rewards/chosen": -0.1703961193561554, "rewards/margins": 0.0873739942908287, "rewards/rejected": -0.2577701210975647, "step": 14680 }, { "epoch": 0.96, "learning_rate": 2.2971810952646112e-08, "logits/chosen": -1.1706682443618774, "logits/rejected": -0.967304527759552, "logps/chosen": -440.339599609375, "logps/rejected": -454.8982849121094, "loss": 0.028, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18197688460350037, "rewards/margins": 0.062264494597911835, "rewards/rejected": -0.2442413866519928, "step": 14690 }, { "epoch": 0.96, "learning_rate": 2.2205997375610576e-08, "logits/chosen": -0.7693328261375427, "logits/rejected": -0.7278302907943726, "logps/chosen": -322.7300720214844, "logps/rejected": -428.13983154296875, "loss": 0.0182, "rewards/accuracies": 0.625, "rewards/chosen": -0.13722415268421173, "rewards/margins": 0.10024970769882202, "rewards/rejected": -0.23747387528419495, "step": 14700 }, { "epoch": 0.96, "eval_logits/chosen": -1.0139862298965454, "eval_logits/rejected": -0.8840221166610718, "eval_logps/chosen": -412.03338623046875, "eval_logps/rejected": -479.36956787109375, "eval_loss": 0.0224233977496624, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -0.18002845346927643, "eval_rewards/margins": 0.08772921562194824, "eval_rewards/rejected": -0.2677576541900635, "eval_runtime": 714.621, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 14700 }, { "epoch": 0.96, "learning_rate": 2.1453109185853304e-08, "logits/chosen": -1.0962893962860107, "logits/rejected": -1.0992854833602905, "logps/chosen": -347.388916015625, "logps/rejected": -440.79327392578125, "loss": 0.0416, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1453104168176651, "rewards/margins": 0.09165708720684052, "rewards/rejected": -0.2369675189256668, "step": 14710 }, { "epoch": 0.96, "learning_rate": 2.0713150310808784e-08, "logits/chosen": -1.1072834730148315, "logits/rejected": -1.0298482179641724, "logps/chosen": -416.1766052246094, "logps/rejected": -458.0320739746094, "loss": 0.0196, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.190688818693161, "rewards/margins": 0.036092083901166916, "rewards/rejected": -0.22678089141845703, "step": 14720 }, { "epoch": 0.96, "learning_rate": 1.9986124610464064e-08, "logits/chosen": -0.7501775026321411, "logits/rejected": -0.6109546422958374, "logps/chosen": -498.6607360839844, "logps/rejected": -559.8812255859375, "loss": 0.0144, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2114305943250656, "rewards/margins": 0.1310327649116516, "rewards/rejected": -0.3424634039402008, "step": 14730 }, { "epoch": 0.96, "learning_rate": 1.927203587734211e-08, "logits/chosen": -0.668878436088562, "logits/rejected": -0.6683077812194824, "logps/chosen": -430.70391845703125, "logps/rejected": -474.8045959472656, "loss": 0.0329, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17771223187446594, "rewards/margins": 0.09262716770172119, "rewards/rejected": -0.2703394293785095, "step": 14740 }, { "epoch": 0.97, "learning_rate": 1.8570887836479034e-08, "logits/chosen": -0.9091743230819702, "logits/rejected": -0.6617622375488281, "logps/chosen": -405.105224609375, "logps/rejected": -520.5613403320312, "loss": 0.0276, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21148207783699036, "rewards/margins": 0.06264074891805649, "rewards/rejected": -0.27412277460098267, "step": 14750 }, { "epoch": 0.97, "learning_rate": 1.7882684145406616e-08, "logits/chosen": -1.0645984411239624, "logits/rejected": -1.0433337688446045, "logps/chosen": -467.4551696777344, "logps/rejected": -538.0322875976562, "loss": 0.0225, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1710345447063446, "rewards/margins": 0.07246598601341248, "rewards/rejected": -0.24350054562091827, "step": 14760 }, { "epoch": 0.97, "learning_rate": 1.7207428394132865e-08, "logits/chosen": -1.2732738256454468, "logits/rejected": -0.9673255681991577, "logps/chosen": -455.8346252441406, "logps/rejected": -517.7564697265625, "loss": 0.0114, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1860542893409729, "rewards/margins": 0.11660908162593842, "rewards/rejected": -0.3026633858680725, "step": 14770 }, { "epoch": 0.97, "learning_rate": 1.654512410512177e-08, "logits/chosen": -1.0593675374984741, "logits/rejected": -0.7931973338127136, "logps/chosen": -427.0390625, "logps/rejected": -438.77752685546875, "loss": 0.0347, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1734621673822403, "rewards/margins": 0.07412689179182053, "rewards/rejected": -0.24758903682231903, "step": 14780 }, { "epoch": 0.97, "learning_rate": 1.5895774733277468e-08, "logits/chosen": -0.9484906196594238, "logits/rejected": -0.8236709833145142, "logps/chosen": -467.310546875, "logps/rejected": -504.48272705078125, "loss": 0.0151, "rewards/accuracies": 0.625, "rewards/chosen": -0.18749356269836426, "rewards/margins": 0.08740875869989395, "rewards/rejected": -0.2749023735523224, "step": 14790 }, { "epoch": 0.97, "learning_rate": 1.5259383665924e-08, "logits/chosen": -1.515362024307251, "logits/rejected": -1.1565622091293335, "logps/chosen": -486.74072265625, "logps/rejected": -475.08990478515625, "loss": 0.0084, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14464879035949707, "rewards/margins": 0.08235809206962585, "rewards/rejected": -0.22700688242912292, "step": 14800 }, { "epoch": 0.97, "eval_logits/chosen": -1.0146613121032715, "eval_logits/rejected": -0.8845694661140442, "eval_logps/chosen": -412.5492248535156, "eval_logps/rejected": -480.0011291503906, "eval_loss": 0.022431422024965286, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -0.18054428696632385, "eval_rewards/margins": 0.08784495294094086, "eval_rewards/rejected": -0.2683892250061035, "eval_runtime": 715.4154, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 14800 }, { "epoch": 0.97, "learning_rate": 1.4635954222789461e-08, "logits/chosen": -1.1866047382354736, "logits/rejected": -1.132948875427246, "logps/chosen": -377.77508544921875, "logps/rejected": -459.7662048339844, "loss": 0.0156, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1606675088405609, "rewards/margins": 0.07688317447900772, "rewards/rejected": -0.23755066096782684, "step": 14810 }, { "epoch": 0.97, "learning_rate": 1.402548965598688e-08, "logits/chosen": -1.1166980266571045, "logits/rejected": -0.9035059213638306, "logps/chosen": -405.94122314453125, "logps/rejected": -459.23541259765625, "loss": 0.0124, "rewards/accuracies": 0.75, "rewards/chosen": -0.20131394267082214, "rewards/margins": 0.05651068687438965, "rewards/rejected": -0.2578246295452118, "step": 14820 }, { "epoch": 0.97, "learning_rate": 1.3427993149998375e-08, "logits/chosen": -1.1824123859405518, "logits/rejected": -1.0716147422790527, "logps/chosen": -413.370361328125, "logps/rejected": -454.5038146972656, "loss": 0.0274, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1713331788778305, "rewards/margins": 0.0965035930275917, "rewards/rejected": -0.2678368091583252, "step": 14830 }, { "epoch": 0.97, "learning_rate": 1.2843467821658518e-08, "logits/chosen": -1.222220778465271, "logits/rejected": -1.0622915029525757, "logps/chosen": -390.2178649902344, "logps/rejected": -484.57305908203125, "loss": 0.0143, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16399499773979187, "rewards/margins": 0.0947236567735672, "rewards/rejected": -0.2587186396121979, "step": 14840 }, { "epoch": 0.97, "learning_rate": 1.2271916720137666e-08, "logits/chosen": -1.45135498046875, "logits/rejected": -1.1300289630889893, "logps/chosen": -469.70233154296875, "logps/rejected": -475.525146484375, "loss": 0.0352, "rewards/accuracies": 0.625, "rewards/chosen": -0.1839328110218048, "rewards/margins": 0.0519578643143177, "rewards/rejected": -0.2358907014131546, "step": 14850 }, { "epoch": 0.97, "learning_rate": 1.171334282692671e-08, "logits/chosen": -1.2485885620117188, "logits/rejected": -1.0097912549972534, "logps/chosen": -461.7266540527344, "logps/rejected": -522.4251098632812, "loss": 0.0137, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17912821471691132, "rewards/margins": 0.09133033454418182, "rewards/rejected": -0.27045851945877075, "step": 14860 }, { "epoch": 0.97, "learning_rate": 1.116774905582041e-08, "logits/chosen": -1.079002022743225, "logits/rejected": -0.9466491937637329, "logps/chosen": -349.8353271484375, "logps/rejected": -398.86834716796875, "loss": 0.0178, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.16501681506633759, "rewards/margins": 0.061736248433589935, "rewards/rejected": -0.22675307095050812, "step": 14870 }, { "epoch": 0.97, "learning_rate": 1.0635138252902966e-08, "logits/chosen": -1.2555723190307617, "logits/rejected": -1.129564881324768, "logps/chosen": -398.8155822753906, "logps/rejected": -466.9261169433594, "loss": 0.0257, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1684015840291977, "rewards/margins": 0.09023905545473099, "rewards/rejected": -0.2586406469345093, "step": 14880 }, { "epoch": 0.97, "learning_rate": 1.0115513196533589e-08, "logits/chosen": -1.189020037651062, "logits/rejected": -0.978621780872345, "logps/chosen": -424.1830139160156, "logps/rejected": -472.623779296875, "loss": 0.0104, "rewards/accuracies": 0.625, "rewards/chosen": -0.16157285869121552, "rewards/margins": 0.06574898958206177, "rewards/rejected": -0.2273218333721161, "step": 14890 }, { "epoch": 0.97, "learning_rate": 9.608876597330952e-09, "logits/chosen": -1.1088173389434814, "logits/rejected": -0.8403303027153015, "logps/chosen": -508.7870178222656, "logps/rejected": -598.1332397460938, "loss": 0.0249, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2221643030643463, "rewards/margins": 0.10103151947259903, "rewards/rejected": -0.32319581508636475, "step": 14900 }, { "epoch": 0.97, "eval_logits/chosen": -1.0151350498199463, "eval_logits/rejected": -0.8850466012954712, "eval_logps/chosen": -412.6695556640625, "eval_logps/rejected": -480.15216064453125, "eval_loss": 0.022422218695282936, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -0.18066461384296417, "eval_rewards/margins": 0.08787565678358078, "eval_rewards/rejected": -0.26854029297828674, "eval_runtime": 712.9764, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 14900 }, { "epoch": 0.98, "learning_rate": 9.115231098159594e-09, "logits/chosen": -1.211159110069275, "logits/rejected": -1.17600417137146, "logps/chosen": -427.99261474609375, "logps/rejected": -479.72509765625, "loss": 0.0242, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17467887699604034, "rewards/margins": 0.07203441858291626, "rewards/rejected": -0.246713325381279, "step": 14910 }, { "epoch": 0.98, "learning_rate": 8.634579274116317e-09, "logits/chosen": -0.9472244381904602, "logits/rejected": -0.9791864156723022, "logps/chosen": -360.17901611328125, "logps/rejected": -470.5113220214844, "loss": 0.0367, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1701197624206543, "rewards/margins": 0.08972278982400894, "rewards/rejected": -0.25984251499176025, "step": 14920 }, { "epoch": 0.98, "learning_rate": 8.166923632516865e-09, "logits/chosen": -1.213653564453125, "logits/rejected": -0.9738423228263855, "logps/chosen": -412.91644287109375, "logps/rejected": -596.3888549804688, "loss": 0.016, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1851852536201477, "rewards/margins": 0.14584533870220184, "rewards/rejected": -0.33103060722351074, "step": 14930 }, { "epoch": 0.98, "learning_rate": 7.712266612881492e-09, "logits/chosen": -0.8014596104621887, "logits/rejected": -0.6822771430015564, "logps/chosen": -322.455810546875, "logps/rejected": -399.3749084472656, "loss": 0.0301, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1372191160917282, "rewards/margins": 0.08179493248462677, "rewards/rejected": -0.21901404857635498, "step": 14940 }, { "epoch": 0.98, "learning_rate": 7.270610586924687e-09, "logits/chosen": -1.2096117734909058, "logits/rejected": -1.0834453105926514, "logps/chosen": -419.6796875, "logps/rejected": -459.508056640625, "loss": 0.0096, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15471436083316803, "rewards/margins": 0.08390868455171585, "rewards/rejected": -0.23862306773662567, "step": 14950 }, { "epoch": 0.98, "learning_rate": 6.841957858539916e-09, "logits/chosen": -0.9617868661880493, "logits/rejected": -0.9451783299446106, "logps/chosen": -370.98614501953125, "logps/rejected": -446.4874572753906, "loss": 0.0355, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20433056354522705, "rewards/margins": 0.06465059518814087, "rewards/rejected": -0.26898112893104553, "step": 14960 }, { "epoch": 0.98, "learning_rate": 6.426310663790181e-09, "logits/chosen": -0.9597945213317871, "logits/rejected": -0.6100262999534607, "logps/chosen": -415.11669921875, "logps/rejected": -447.75128173828125, "loss": 0.0128, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.17814096808433533, "rewards/margins": 0.06410295516252518, "rewards/rejected": -0.2422439157962799, "step": 14970 }, { "epoch": 0.98, "learning_rate": 6.023671170894696e-09, "logits/chosen": -1.3031483888626099, "logits/rejected": -0.9531248211860657, "logps/chosen": -453.65570068359375, "logps/rejected": -495.48455810546875, "loss": 0.0179, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16132713854312897, "rewards/margins": 0.12860846519470215, "rewards/rejected": -0.2899356186389923, "step": 14980 }, { "epoch": 0.98, "learning_rate": 5.634041480218344e-09, "logits/chosen": -1.0245569944381714, "logits/rejected": -1.2269831895828247, "logps/chosen": -443.31768798828125, "logps/rejected": -510.7289123535156, "loss": 0.0134, "rewards/accuracies": 0.75, "rewards/chosen": -0.19024410843849182, "rewards/margins": 0.07185164839029312, "rewards/rejected": -0.26209574937820435, "step": 14990 }, { "epoch": 0.98, "learning_rate": 5.257423624260849e-09, "logits/chosen": -1.22832453250885, "logits/rejected": -0.9209644198417664, "logps/chosen": -442.29913330078125, "logps/rejected": -468.2791442871094, "loss": 0.0184, "rewards/accuracies": 0.625, "rewards/chosen": -0.17897573113441467, "rewards/margins": 0.06922327727079391, "rewards/rejected": -0.24819903075695038, "step": 15000 }, { "epoch": 0.98, "eval_logits/chosen": -1.01541268825531, "eval_logits/rejected": -0.8854283094406128, "eval_logps/chosen": -412.4375, "eval_logps/rejected": -479.8431701660156, "eval_loss": 0.022424427792429924, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -0.18043257296085358, "eval_rewards/margins": 0.08779869973659515, "eval_rewards/rejected": -0.26823127269744873, "eval_runtime": 714.372, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 15000 }, { "epoch": 0.98, "learning_rate": 4.893819567644564e-09, "logits/chosen": -0.9104039072990417, "logits/rejected": -0.8607856631278992, "logps/chosen": -349.2388916015625, "logps/rejected": -408.3136901855469, "loss": 0.0325, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16124212741851807, "rewards/margins": 0.048898786306381226, "rewards/rejected": -0.21014094352722168, "step": 15010 }, { "epoch": 0.98, "learning_rate": 4.543231207107257e-09, "logits/chosen": -0.8613435626029968, "logits/rejected": -0.9513009190559387, "logps/chosen": -437.56036376953125, "logps/rejected": -487.5596618652344, "loss": 0.0351, "rewards/accuracies": 0.625, "rewards/chosen": -0.17973878979682922, "rewards/margins": 0.07360804080963135, "rewards/rejected": -0.25334686040878296, "step": 15020 }, { "epoch": 0.98, "learning_rate": 4.205660371488785e-09, "logits/chosen": -1.2786108255386353, "logits/rejected": -1.2010009288787842, "logps/chosen": -474.392578125, "logps/rejected": -490.1263122558594, "loss": 0.0157, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1902393102645874, "rewards/margins": 0.05752365663647652, "rewards/rejected": -0.24776296317577362, "step": 15030 }, { "epoch": 0.98, "learning_rate": 3.88110882172471e-09, "logits/chosen": -1.0316927433013916, "logits/rejected": -1.0397982597351074, "logps/chosen": -406.83074951171875, "logps/rejected": -462.4082946777344, "loss": 0.0195, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18613232672214508, "rewards/margins": 0.060214150696992874, "rewards/rejected": -0.24634647369384766, "step": 15040 }, { "epoch": 0.98, "learning_rate": 3.569578250834371e-09, "logits/chosen": -0.9364603161811829, "logits/rejected": -1.027195692062378, "logps/chosen": -473.58526611328125, "logps/rejected": -559.6849365234375, "loss": 0.0209, "rewards/accuracies": 0.75, "rewards/chosen": -0.17160747945308685, "rewards/margins": 0.12386244535446167, "rewards/rejected": -0.29546990990638733, "step": 15050 }, { "epoch": 0.99, "learning_rate": 3.2710702839139353e-09, "logits/chosen": -1.0977728366851807, "logits/rejected": -1.0135737657546997, "logps/chosen": -374.6515808105469, "logps/rejected": -436.9683532714844, "loss": 0.0187, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17005228996276855, "rewards/margins": 0.053043585270643234, "rewards/rejected": -0.22309589385986328, "step": 15060 }, { "epoch": 0.99, "learning_rate": 2.9855864781272448e-09, "logits/chosen": -1.1772810220718384, "logits/rejected": -1.3017627000808716, "logps/chosen": -384.97149658203125, "logps/rejected": -480.2608947753906, "loss": 0.0125, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17329783737659454, "rewards/margins": 0.06049531698226929, "rewards/rejected": -0.23379313945770264, "step": 15070 }, { "epoch": 0.99, "learning_rate": 2.7131283226977665e-09, "logits/chosen": -1.0709807872772217, "logits/rejected": -1.150120496749878, "logps/chosen": -384.86865234375, "logps/rejected": -486.97589111328125, "loss": 0.011, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16542038321495056, "rewards/margins": 0.0882570743560791, "rewards/rejected": -0.25367745757102966, "step": 15080 }, { "epoch": 0.99, "learning_rate": 2.4536972389008205e-09, "logits/chosen": -0.9590663909912109, "logits/rejected": -0.9262090921401978, "logps/chosen": -408.18389892578125, "logps/rejected": -482.551513671875, "loss": 0.0263, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17104893922805786, "rewards/margins": 0.1120586171746254, "rewards/rejected": -0.28310757875442505, "step": 15090 }, { "epoch": 0.99, "learning_rate": 2.20729458005553e-09, "logits/chosen": -0.8740189671516418, "logits/rejected": -0.5953198671340942, "logps/chosen": -368.6212463378906, "logps/rejected": -458.10992431640625, "loss": 0.0345, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1742120236158371, "rewards/margins": 0.11169035732746124, "rewards/rejected": -0.28590235114097595, "step": 15100 }, { "epoch": 0.99, "eval_logits/chosen": -1.0139069557189941, "eval_logits/rejected": -0.8839650750160217, "eval_logps/chosen": -412.1547546386719, "eval_logps/rejected": -479.4682922363281, "eval_loss": 0.022419892251491547, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -0.18014982342720032, "eval_rewards/margins": 0.0877065360546112, "eval_rewards/rejected": -0.2678563892841339, "eval_runtime": 716.0486, "eval_samples_per_second": 2.793, "eval_steps_per_second": 1.397, "step": 15100 }, { "epoch": 0.99, "learning_rate": 1.9739216315192712e-09, "logits/chosen": -1.0638076066970825, "logits/rejected": -0.8134995698928833, "logps/chosen": -402.1220703125, "logps/rejected": -433.20745849609375, "loss": 0.0208, "rewards/accuracies": 0.625, "rewards/chosen": -0.1728132665157318, "rewards/margins": 0.05478282645344734, "rewards/rejected": -0.22759607434272766, "step": 15110 }, { "epoch": 0.99, "learning_rate": 1.7535796106796231e-09, "logits/chosen": -1.108524203300476, "logits/rejected": -0.831976056098938, "logps/chosen": -449.5509338378906, "logps/rejected": -442.140625, "loss": 0.0156, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17401449382305145, "rewards/margins": 0.07292065769433975, "rewards/rejected": -0.24693512916564941, "step": 15120 }, { "epoch": 0.99, "learning_rate": 1.5462696669482636e-09, "logits/chosen": -1.2247201204299927, "logits/rejected": -1.0145584344863892, "logps/chosen": -369.4170837402344, "logps/rejected": -464.53961181640625, "loss": 0.0083, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14817500114440918, "rewards/margins": 0.08747845888137817, "rewards/rejected": -0.23565344512462616, "step": 15130 }, { "epoch": 0.99, "learning_rate": 1.3519928817556927e-09, "logits/chosen": -0.9744860529899597, "logits/rejected": -0.8265706896781921, "logps/chosen": -325.4532775878906, "logps/rejected": -394.0386657714844, "loss": 0.0305, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.15205980837345123, "rewards/margins": 0.0680384486913681, "rewards/rejected": -0.22009825706481934, "step": 15140 }, { "epoch": 0.99, "learning_rate": 1.1707502685448512e-09, "logits/chosen": -1.222806692123413, "logits/rejected": -0.991363525390625, "logps/chosen": -432.4925842285156, "logps/rejected": -479.3206481933594, "loss": 0.0226, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21022257208824158, "rewards/margins": 0.10501657426357269, "rewards/rejected": -0.31523916125297546, "step": 15150 }, { "epoch": 0.99, "learning_rate": 1.002542772765569e-09, "logits/chosen": -0.8543386459350586, "logits/rejected": -0.6841505765914917, "logps/chosen": -362.15948486328125, "logps/rejected": -415.60968017578125, "loss": 0.026, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17355568706989288, "rewards/margins": 0.09175783395767212, "rewards/rejected": -0.2653135359287262, "step": 15160 }, { "epoch": 0.99, "learning_rate": 8.473712718709559e-10, "logits/chosen": -1.057591199874878, "logits/rejected": -1.0639384984970093, "logps/chosen": -368.8695068359375, "logps/rejected": -393.35028076171875, "loss": 0.0282, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.1751246303319931, "rewards/margins": 0.03525886684656143, "rewards/rejected": -0.21038350462913513, "step": 15170 }, { "epoch": 0.99, "learning_rate": 7.052365753112966e-10, "logits/chosen": -0.7798526287078857, "logits/rejected": -0.821710467338562, "logps/chosen": -421.478271484375, "logps/rejected": -502.4510803222656, "loss": 0.0299, "rewards/accuracies": 0.625, "rewards/chosen": -0.18616220355033875, "rewards/margins": 0.09974847733974457, "rewards/rejected": -0.2859106659889221, "step": 15180 }, { "epoch": 0.99, "learning_rate": 5.761394245307195e-10, "logits/chosen": -0.8407600522041321, "logits/rejected": -0.9530426263809204, "logps/chosen": -414.9033203125, "logps/rejected": -461.0182189941406, "loss": 0.006, "rewards/accuracies": 0.5, "rewards/chosen": -0.17782112956047058, "rewards/margins": 0.04732637479901314, "rewards/rejected": -0.22514751553535461, "step": 15190 }, { "epoch": 0.99, "learning_rate": 4.6008049296358826e-10, "logits/chosen": -1.132432222366333, "logits/rejected": -0.9648089408874512, "logps/chosen": -373.22039794921875, "logps/rejected": -426.13726806640625, "loss": 0.0244, "rewards/accuracies": 0.625, "rewards/chosen": -0.18719780445098877, "rewards/margins": 0.0811605304479599, "rewards/rejected": -0.26835834980010986, "step": 15200 }, { "epoch": 0.99, "eval_logits/chosen": -1.0162615776062012, "eval_logits/rejected": -0.8862256407737732, "eval_logps/chosen": -412.27239990234375, "eval_logps/rejected": -479.6047668457031, "eval_loss": 0.022429008036851883, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -0.18026746809482574, "eval_rewards/margins": 0.08772538602352142, "eval_rewards/rejected": -0.26799285411834717, "eval_runtime": 714.0884, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 15200 }, { "epoch": 1.0, "learning_rate": 3.5706038603006146e-10, "logits/chosen": -1.0432007312774658, "logits/rejected": -1.0450470447540283, "logps/chosen": -428.7537536621094, "logps/rejected": -490.9500427246094, "loss": 0.0161, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13914336264133453, "rewards/margins": 0.07643941789865494, "rewards/rejected": -0.21558280289173126, "step": 15210 }, { "epoch": 1.0, "learning_rate": 2.670796411333165e-10, "logits/chosen": -1.371409296989441, "logits/rejected": -1.0303653478622437, "logps/chosen": -380.7409973144531, "logps/rejected": -470.3077697753906, "loss": 0.0213, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1609295755624771, "rewards/margins": 0.10187284648418427, "rewards/rejected": -0.262802392244339, "step": 15220 }, { "epoch": 1.0, "learning_rate": 1.9013872765677455e-10, "logits/chosen": -1.090969204902649, "logits/rejected": -0.9855135083198547, "logps/chosen": -365.7977600097656, "logps/rejected": -405.6406555175781, "loss": 0.0162, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14628463983535767, "rewards/margins": 0.05237749218940735, "rewards/rejected": -0.19866213202476501, "step": 15230 }, { "epoch": 1.0, "learning_rate": 1.262380469624347e-10, "logits/chosen": -1.0214941501617432, "logits/rejected": -0.8351333737373352, "logps/chosen": -360.6355895996094, "logps/rejected": -408.03729248046875, "loss": 0.0347, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15662868320941925, "rewards/margins": 0.07031144201755524, "rewards/rejected": -0.2269401103258133, "step": 15240 }, { "epoch": 1.0, "learning_rate": 7.53779323872661e-11, "logits/chosen": -0.9991758465766907, "logits/rejected": -0.9424724578857422, "logps/chosen": -371.97161865234375, "logps/rejected": -472.28411865234375, "loss": 0.0235, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18010824918746948, "rewards/margins": 0.09426857531070709, "rewards/rejected": -0.2743768095970154, "step": 15250 }, { "epoch": 1.0, "learning_rate": 3.7558649242652734e-11, "logits/chosen": -1.4500172138214111, "logits/rejected": -1.1172897815704346, "logps/chosen": -591.0137939453125, "logps/rejected": -583.2401733398438, "loss": 0.0191, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19433000683784485, "rewards/margins": 0.07179728150367737, "rewards/rejected": -0.2661272883415222, "step": 15260 }, { "epoch": 1.0, "learning_rate": 1.2780394812450526e-11, "logits/chosen": -0.8290607333183289, "logits/rejected": -0.8750106692314148, "logps/chosen": -424.06207275390625, "logps/rejected": -523.3733520507812, "loss": 0.0151, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.191918283700943, "rewards/margins": 0.09939392656087875, "rewards/rejected": -0.29131221771240234, "step": 15270 }, { "epoch": 1.0, "learning_rate": 1.0432983521546646e-12, "logits/chosen": -0.891390323638916, "logits/rejected": -0.8818936347961426, "logps/chosen": -372.8388366699219, "logps/rejected": -484.38360595703125, "loss": 0.0256, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18937267363071442, "rewards/margins": 0.0926990732550621, "rewards/rejected": -0.2820717394351959, "step": 15280 }, { "epoch": 1.0, "step": 15284, "total_flos": 0.0, "train_loss": 0.028396672180453324, "train_runtime": 172620.3313, "train_samples_per_second": 0.354, "train_steps_per_second": 0.089 } ], "logging_steps": 10, "max_steps": 15284, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }