{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 15284, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.270111183780249e-09, "logits/chosen": -2.634561777114868, "logits/rejected": -2.673060417175293, "logps/chosen": -207.5323944091797, "logps/rejected": -286.9266052246094, "loss": 0.0999, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.270111183780249e-08, "logits/chosen": -2.2176363468170166, "logits/rejected": -1.965279459953308, "logps/chosen": -185.93206787109375, "logps/rejected": -165.39874267578125, "loss": 0.0677, "rewards/accuracies": 0.2777777910232544, "rewards/chosen": -5.8274250477552414e-05, "rewards/margins": -0.0002318211190868169, "rewards/rejected": 0.0001735468686092645, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.540222367560497e-08, "logits/chosen": -2.4317684173583984, "logits/rejected": -2.2229201793670654, "logps/chosen": -232.4213409423828, "logps/rejected": -231.39962768554688, "loss": 0.0519, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.00013405675417743623, "rewards/margins": 1.7013855540426448e-05, "rewards/rejected": 0.00011704283679137006, "step": 20 }, { "epoch": 0.0, "learning_rate": 9.810333551340746e-08, "logits/chosen": -2.258877754211426, "logits/rejected": -2.162977695465088, "logps/chosen": -197.35354614257812, "logps/rejected": -219.13766479492188, "loss": 0.0565, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.00035292975371703506, "rewards/margins": 0.0005842813989147544, "rewards/rejected": -0.00023135158699005842, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.3080444735120995e-07, "logits/chosen": -2.2115323543548584, "logits/rejected": -2.2505645751953125, "logps/chosen": -276.000244140625, "logps/rejected": -265.74652099609375, "loss": 0.0472, "rewards/accuracies": 0.375, "rewards/chosen": -6.294570630416274e-05, "rewards/margins": -0.00012715658522211015, "rewards/rejected": 6.421087891794741e-05, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.6350555918901243e-07, "logits/chosen": -2.348076581954956, "logits/rejected": -2.141223192214966, "logps/chosen": -204.8376007080078, "logps/rejected": -184.71292114257812, "loss": 0.0762, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.00016025640070438385, "rewards/margins": -7.07902800058946e-05, "rewards/rejected": -8.946609159465879e-05, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.9620667102681492e-07, "logits/chosen": -2.3073477745056152, "logits/rejected": -2.067842483520508, "logps/chosen": -209.7217254638672, "logps/rejected": -185.87832641601562, "loss": 0.0891, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": 0.00042088530608452857, "rewards/margins": -0.00010498131450731307, "rewards/rejected": 0.000525866576936096, "step": 60 }, { "epoch": 0.0, "learning_rate": 2.289077828646174e-07, "logits/chosen": -2.2689318656921387, "logits/rejected": -2.1558871269226074, "logps/chosen": -218.03952026367188, "logps/rejected": -207.98361206054688, "loss": 0.0432, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.00035006407415494323, "rewards/margins": 6.475891859736294e-05, "rewards/rejected": 0.0002853051701094955, "step": 70 }, { "epoch": 0.01, "learning_rate": 2.616088947024199e-07, "logits/chosen": -2.5095014572143555, "logits/rejected": -2.228682041168213, "logps/chosen": -258.87213134765625, "logps/rejected": -213.68508911132812, "loss": 0.0631, "rewards/accuracies": 0.375, "rewards/chosen": 0.0003914732369594276, "rewards/margins": -9.930254600476474e-05, "rewards/rejected": 0.0004907757975161076, "step": 80 }, { "epoch": 0.01, "learning_rate": 2.943100065402224e-07, "logits/chosen": -2.258244037628174, "logits/rejected": -2.173391819000244, "logps/chosen": -184.66891479492188, "logps/rejected": -165.49636840820312, "loss": 0.0224, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0005794145399704576, "rewards/margins": 0.0002798104251269251, "rewards/rejected": 0.00029960396932438016, "step": 90 }, { "epoch": 0.01, "learning_rate": 3.2701111837802487e-07, "logits/chosen": -2.431363344192505, "logits/rejected": -2.425813674926758, "logps/chosen": -168.80804443359375, "logps/rejected": -183.89962768554688, "loss": 0.0706, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.0006342270062305033, "rewards/margins": -0.0003130243276245892, "rewards/rejected": 0.0009472514502704144, "step": 100 }, { "epoch": 0.01, "eval_logits/chosen": -2.3487260341644287, "eval_logits/rejected": -2.1602516174316406, "eval_logps/chosen": -231.7681884765625, "eval_logps/rejected": -211.45697021484375, "eval_loss": 0.05356210842728615, "eval_rewards/accuracies": 0.4925000071525574, "eval_rewards/chosen": 0.0011838467326015234, "eval_rewards/margins": 0.0004092271556146443, "eval_rewards/rejected": 0.0007746195769868791, "eval_runtime": 712.6016, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 100 }, { "epoch": 0.01, "learning_rate": 3.5971223021582736e-07, "logits/chosen": -2.3473939895629883, "logits/rejected": -1.9998290538787842, "logps/chosen": -222.42153930664062, "logps/rejected": -167.03335571289062, "loss": 0.0746, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0018700523069128394, "rewards/margins": 0.000863722525537014, "rewards/rejected": 0.0010063296649605036, "step": 110 }, { "epoch": 0.01, "learning_rate": 3.9241334205362984e-07, "logits/chosen": -2.340332508087158, "logits/rejected": -2.245119571685791, "logps/chosen": -224.01327514648438, "logps/rejected": -234.0855255126953, "loss": 0.0405, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": 0.0013942383229732513, "rewards/margins": -3.433373785810545e-05, "rewards/rejected": 0.0014285718789324164, "step": 120 }, { "epoch": 0.01, "learning_rate": 4.251144538914324e-07, "logits/chosen": -2.261899471282959, "logits/rejected": -2.217729091644287, "logps/chosen": -149.3909454345703, "logps/rejected": -148.33004760742188, "loss": 0.0442, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0012747000437229872, "rewards/margins": 0.0008335200254805386, "rewards/rejected": 0.0004411800764501095, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.578155657292348e-07, "logits/chosen": -2.3222341537475586, "logits/rejected": -2.223220109939575, "logps/chosen": -225.5640106201172, "logps/rejected": -159.45921325683594, "loss": 0.0515, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0019747335463762283, "rewards/margins": 0.000815176113974303, "rewards/rejected": 0.0011595574906095862, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.905166775670374e-07, "logits/chosen": -2.367952585220337, "logits/rejected": -2.1589996814727783, "logps/chosen": -231.03799438476562, "logps/rejected": -229.10598754882812, "loss": 0.097, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0015882834559306502, "rewards/margins": 0.001329472055658698, "rewards/rejected": 0.0002588116331025958, "step": 150 }, { "epoch": 0.01, "learning_rate": 5.232177894048398e-07, "logits/chosen": -2.213862180709839, "logits/rejected": -2.2266170978546143, "logps/chosen": -260.2805480957031, "logps/rejected": -224.8443145751953, "loss": 0.0642, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.002939788158982992, "rewards/margins": 0.002163815079256892, "rewards/rejected": 0.0007759730797261, "step": 160 }, { "epoch": 0.01, "learning_rate": 5.559189012426422e-07, "logits/chosen": -2.314908981323242, "logits/rejected": -2.0375542640686035, "logps/chosen": -180.51136779785156, "logps/rejected": -156.74420166015625, "loss": 0.0532, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0017061032122001052, "rewards/margins": 0.0009744632989168167, "rewards/rejected": 0.0007316397386603057, "step": 170 }, { "epoch": 0.01, "learning_rate": 5.886200130804448e-07, "logits/chosen": -2.3991408348083496, "logits/rejected": -2.3391599655151367, "logps/chosen": -217.56588745117188, "logps/rejected": -198.68756103515625, "loss": 0.0484, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0033133826218545437, "rewards/margins": 0.0027288985438644886, "rewards/rejected": 0.0005844842526130378, "step": 180 }, { "epoch": 0.01, "learning_rate": 6.213211249182473e-07, "logits/chosen": -2.0700626373291016, "logits/rejected": -2.175288677215576, "logps/chosen": -190.9939727783203, "logps/rejected": -208.7053985595703, "loss": 0.0427, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0025113599840551615, "rewards/margins": 0.0027750071603804827, "rewards/rejected": -0.000263646972598508, "step": 190 }, { "epoch": 0.01, "learning_rate": 6.540222367560497e-07, "logits/chosen": -2.2762703895568848, "logits/rejected": -2.2404098510742188, "logps/chosen": -146.71702575683594, "logps/rejected": -177.6888427734375, "loss": 0.0614, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0024750891607254744, "rewards/margins": 0.003276436123996973, "rewards/rejected": -0.0008013470214791596, "step": 200 }, { "epoch": 0.01, "eval_logits/chosen": -2.349438428878784, "eval_logits/rejected": -2.1609809398651123, "eval_logps/chosen": -231.4156036376953, "eval_logps/rejected": -211.6427459716797, "eval_loss": 0.05235092341899872, "eval_rewards/accuracies": 0.5889999866485596, "eval_rewards/chosen": 0.0029466315172612667, "eval_rewards/margins": 0.0031008960213512182, "eval_rewards/rejected": -0.00015426499885506928, "eval_runtime": 715.0358, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.399, "step": 200 }, { "epoch": 0.01, "learning_rate": 6.867233485938523e-07, "logits/chosen": -2.4264657497406006, "logits/rejected": -2.202401638031006, "logps/chosen": -218.64462280273438, "logps/rejected": -188.3276824951172, "loss": 0.0449, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.004444460850208998, "rewards/margins": 0.005253319162875414, "rewards/rejected": -0.000808858429081738, "step": 210 }, { "epoch": 0.01, "learning_rate": 7.194244604316547e-07, "logits/chosen": -2.2130541801452637, "logits/rejected": -2.040802240371704, "logps/chosen": -182.76869201660156, "logps/rejected": -174.8404083251953, "loss": 0.0287, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.002838475862517953, "rewards/margins": 0.0023240107111632824, "rewards/rejected": 0.000514464860316366, "step": 220 }, { "epoch": 0.02, "learning_rate": 7.521255722694571e-07, "logits/chosen": -2.4207396507263184, "logits/rejected": -2.0251927375793457, "logps/chosen": -278.82373046875, "logps/rejected": -184.07823181152344, "loss": 0.0582, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.004778166767209768, "rewards/margins": 0.0035492833703756332, "rewards/rejected": 0.001228884095326066, "step": 230 }, { "epoch": 0.02, "learning_rate": 7.848266841072597e-07, "logits/chosen": -2.2012083530426025, "logits/rejected": -2.1659531593322754, "logps/chosen": -214.45327758789062, "logps/rejected": -206.27804565429688, "loss": 0.0648, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.006988098379224539, "rewards/margins": 0.0069609819911420345, "rewards/rejected": 2.711568959057331e-05, "step": 240 }, { "epoch": 0.02, "learning_rate": 8.175277959450622e-07, "logits/chosen": -2.168360471725464, "logits/rejected": -2.3281311988830566, "logps/chosen": -217.9949493408203, "logps/rejected": -220.6697540283203, "loss": 0.0215, "rewards/accuracies": 0.625, "rewards/chosen": 0.012156028300523758, "rewards/margins": 0.005674418993294239, "rewards/rejected": 0.0064816102385520935, "step": 250 }, { "epoch": 0.02, "learning_rate": 8.502289077828648e-07, "logits/chosen": -2.5064778327941895, "logits/rejected": -2.1448421478271484, "logps/chosen": -254.0000457763672, "logps/rejected": -188.9385986328125, "loss": 0.0525, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.015774715691804886, "rewards/margins": 0.006485472433269024, "rewards/rejected": 0.009289243258535862, "step": 260 }, { "epoch": 0.02, "learning_rate": 8.829300196206672e-07, "logits/chosen": -2.423475742340088, "logits/rejected": -2.1279618740081787, "logps/chosen": -246.0577392578125, "logps/rejected": -230.53659057617188, "loss": 0.0873, "rewards/accuracies": 0.625, "rewards/chosen": 0.013765650801360607, "rewards/margins": 0.008541886694729328, "rewards/rejected": 0.005223765503615141, "step": 270 }, { "epoch": 0.02, "learning_rate": 9.156311314584696e-07, "logits/chosen": -2.3068556785583496, "logits/rejected": -2.193019151687622, "logps/chosen": -159.5777130126953, "logps/rejected": -146.2738494873047, "loss": 0.0397, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.013053646311163902, "rewards/margins": 0.0058270045556128025, "rewards/rejected": 0.007226643152534962, "step": 280 }, { "epoch": 0.02, "learning_rate": 9.483322432962722e-07, "logits/chosen": -2.554816722869873, "logits/rejected": -2.160402536392212, "logps/chosen": -281.8065185546875, "logps/rejected": -225.3990478515625, "loss": 0.0275, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.016861405223608017, "rewards/margins": 0.005430780816823244, "rewards/rejected": 0.011430625803768635, "step": 290 }, { "epoch": 0.02, "learning_rate": 9.810333551340747e-07, "logits/chosen": -2.341587543487549, "logits/rejected": -2.1964211463928223, "logps/chosen": -264.63897705078125, "logps/rejected": -238.7764129638672, "loss": 0.0495, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.020103512331843376, "rewards/margins": 0.01393399853259325, "rewards/rejected": 0.006169513799250126, "step": 300 }, { "epoch": 0.02, "eval_logits/chosen": -2.3531229496002197, "eval_logits/rejected": -2.1645307540893555, "eval_logps/chosen": -228.2109832763672, "eval_logps/rejected": -209.68736267089844, "eval_loss": 0.049944277852773666, "eval_rewards/accuracies": 0.5805000066757202, "eval_rewards/chosen": 0.018969887867569923, "eval_rewards/margins": 0.009347214363515377, "eval_rewards/rejected": 0.009622674435377121, "eval_runtime": 711.7456, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 300 }, { "epoch": 0.02, "learning_rate": 1.0137344669718771e-06, "logits/chosen": -2.350700855255127, "logits/rejected": -2.365297317504883, "logps/chosen": -166.68600463867188, "logps/rejected": -155.6295623779297, "loss": 0.0358, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.014021609909832478, "rewards/margins": 0.002970766741782427, "rewards/rejected": 0.011050843633711338, "step": 310 }, { "epoch": 0.02, "learning_rate": 1.0464355788096796e-06, "logits/chosen": -2.456861972808838, "logits/rejected": -2.061683177947998, "logps/chosen": -220.7519073486328, "logps/rejected": -191.7381591796875, "loss": 0.0499, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.022065896540880203, "rewards/margins": 0.008685490116477013, "rewards/rejected": 0.013380405493080616, "step": 320 }, { "epoch": 0.02, "learning_rate": 1.079136690647482e-06, "logits/chosen": -2.437948703765869, "logits/rejected": -2.2054593563079834, "logps/chosen": -202.8629150390625, "logps/rejected": -175.33372497558594, "loss": 0.0886, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02774173393845558, "rewards/margins": 0.013089954853057861, "rewards/rejected": 0.014651775360107422, "step": 330 }, { "epoch": 0.02, "learning_rate": 1.1118378024852844e-06, "logits/chosen": -2.1878809928894043, "logits/rejected": -2.354051113128662, "logps/chosen": -150.0569610595703, "logps/rejected": -177.16937255859375, "loss": 0.0381, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.018287161365151405, "rewards/margins": -0.0016521146753802896, "rewards/rejected": 0.019939277321100235, "step": 340 }, { "epoch": 0.02, "learning_rate": 1.144538914323087e-06, "logits/chosen": -2.4268805980682373, "logits/rejected": -1.9881579875946045, "logps/chosen": -317.0654602050781, "logps/rejected": -247.81021118164062, "loss": 0.0528, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.025226067751646042, "rewards/margins": 0.017956417053937912, "rewards/rejected": 0.007269649300724268, "step": 350 }, { "epoch": 0.02, "learning_rate": 1.1772400261608895e-06, "logits/chosen": -2.4913463592529297, "logits/rejected": -2.197359085083008, "logps/chosen": -219.1552276611328, "logps/rejected": -192.1640167236328, "loss": 0.0422, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03162415325641632, "rewards/margins": 0.01586632803082466, "rewards/rejected": 0.01575782522559166, "step": 360 }, { "epoch": 0.02, "learning_rate": 1.2099411379986922e-06, "logits/chosen": -2.1705100536346436, "logits/rejected": -2.2467100620269775, "logps/chosen": -191.59078979492188, "logps/rejected": -205.70358276367188, "loss": 0.0569, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": 0.029529735445976257, "rewards/margins": 0.006725566927343607, "rewards/rejected": 0.022804168984293938, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.2426422498364946e-06, "logits/chosen": -2.332655429840088, "logits/rejected": -2.044221878051758, "logps/chosen": -215.96444702148438, "logps/rejected": -161.74484252929688, "loss": 0.0443, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.02841884456574917, "rewards/margins": 0.016788840293884277, "rewards/rejected": 0.011630003340542316, "step": 380 }, { "epoch": 0.03, "learning_rate": 1.2753433616742968e-06, "logits/chosen": -2.329526424407959, "logits/rejected": -2.2495083808898926, "logps/chosen": -180.93544006347656, "logps/rejected": -244.7272186279297, "loss": 0.0764, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02182791940867901, "rewards/margins": 0.01554826833307743, "rewards/rejected": 0.006279653403908014, "step": 390 }, { "epoch": 0.03, "learning_rate": 1.3080444735120995e-06, "logits/chosen": -2.489765167236328, "logits/rejected": -2.1206138134002686, "logps/chosen": -219.66854858398438, "logps/rejected": -179.7932891845703, "loss": 0.065, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0291206743568182, "rewards/margins": 0.026874784380197525, "rewards/rejected": 0.002245891373604536, "step": 400 }, { "epoch": 0.03, "eval_logits/chosen": -2.3541910648345947, "eval_logits/rejected": -2.1655361652374268, "eval_logps/chosen": -227.16796875, "eval_logps/rejected": -210.1238555908203, "eval_loss": 0.04696543887257576, "eval_rewards/accuracies": 0.5979999899864197, "eval_rewards/chosen": 0.024184904992580414, "eval_rewards/margins": 0.016744675114750862, "eval_rewards/rejected": 0.007440229412168264, "eval_runtime": 711.4016, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 400 }, { "epoch": 0.03, "learning_rate": 1.3407455853499021e-06, "logits/chosen": -2.4746851921081543, "logits/rejected": -2.323978900909424, "logps/chosen": -256.46856689453125, "logps/rejected": -223.9653778076172, "loss": 0.0368, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.029423344880342484, "rewards/margins": 0.01946599781513214, "rewards/rejected": 0.009957349859178066, "step": 410 }, { "epoch": 0.03, "learning_rate": 1.3734466971877046e-06, "logits/chosen": -2.3022310733795166, "logits/rejected": -2.201465606689453, "logps/chosen": -176.49359130859375, "logps/rejected": -173.44068908691406, "loss": 0.0444, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0271303653717041, "rewards/margins": 0.022079220041632652, "rewards/rejected": 0.005051146261394024, "step": 420 }, { "epoch": 0.03, "learning_rate": 1.406147809025507e-06, "logits/chosen": -2.2948622703552246, "logits/rejected": -2.107673168182373, "logps/chosen": -209.0362091064453, "logps/rejected": -181.45956420898438, "loss": 0.0591, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.03055526874959469, "rewards/margins": 0.024613896384835243, "rewards/rejected": 0.005941365379840136, "step": 430 }, { "epoch": 0.03, "learning_rate": 1.4388489208633094e-06, "logits/chosen": -2.394108295440674, "logits/rejected": -2.107898235321045, "logps/chosen": -250.1384735107422, "logps/rejected": -222.7085418701172, "loss": 0.045, "rewards/accuracies": 0.5, "rewards/chosen": 0.021793970838189125, "rewards/margins": 0.012408947572112083, "rewards/rejected": 0.009385021403431892, "step": 440 }, { "epoch": 0.03, "learning_rate": 1.471550032701112e-06, "logits/chosen": -2.413292169570923, "logits/rejected": -2.2094132900238037, "logps/chosen": -185.10714721679688, "logps/rejected": -198.40866088867188, "loss": 0.0766, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.022703688591718674, "rewards/margins": 0.0375007800757885, "rewards/rejected": -0.014797091484069824, "step": 450 }, { "epoch": 0.03, "learning_rate": 1.5042511445389143e-06, "logits/chosen": -2.156865358352661, "logits/rejected": -2.204716920852661, "logps/chosen": -146.9668426513672, "logps/rejected": -214.1243133544922, "loss": 0.0952, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.01986575871706009, "rewards/margins": 0.01865716651082039, "rewards/rejected": 0.0012085925554856658, "step": 460 }, { "epoch": 0.03, "learning_rate": 1.536952256376717e-06, "logits/chosen": -2.046077013015747, "logits/rejected": -2.108354330062866, "logps/chosen": -197.14544677734375, "logps/rejected": -252.22900390625, "loss": 0.0563, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01892588473856449, "rewards/margins": 0.04759754613041878, "rewards/rejected": -0.028671661391854286, "step": 470 }, { "epoch": 0.03, "learning_rate": 1.5696533682145194e-06, "logits/chosen": -2.4412786960601807, "logits/rejected": -2.2085611820220947, "logps/chosen": -178.488037109375, "logps/rejected": -153.7083740234375, "loss": 0.0382, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.02879374288022518, "rewards/margins": 0.03139546141028404, "rewards/rejected": -0.0026017185300588608, "step": 480 }, { "epoch": 0.03, "learning_rate": 1.602354480052322e-06, "logits/chosen": -2.4102816581726074, "logits/rejected": -2.3218040466308594, "logps/chosen": -260.2879638671875, "logps/rejected": -209.857177734375, "loss": 0.0475, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.03175920993089676, "rewards/margins": 0.02260466292500496, "rewards/rejected": 0.00915454886853695, "step": 490 }, { "epoch": 0.03, "learning_rate": 1.6350555918901245e-06, "logits/chosen": -2.1970908641815186, "logits/rejected": -1.8694721460342407, "logps/chosen": -218.60458374023438, "logps/rejected": -216.32119750976562, "loss": 0.04, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0026772418059408665, "rewards/margins": 0.061300646513700485, "rewards/rejected": -0.06397788971662521, "step": 500 }, { "epoch": 0.03, "eval_logits/chosen": -2.3290927410125732, "eval_logits/rejected": -2.1417860984802246, "eval_logps/chosen": -242.22723388671875, "eval_logps/rejected": -229.62611389160156, "eval_loss": 0.04155317693948746, "eval_rewards/accuracies": 0.612500011920929, "eval_rewards/chosen": -0.05111142620444298, "eval_rewards/margins": 0.0389595590531826, "eval_rewards/rejected": -0.09007100015878677, "eval_runtime": 715.9988, "eval_samples_per_second": 2.793, "eval_steps_per_second": 1.397, "step": 500 }, { "epoch": 0.03, "learning_rate": 1.6677567037279269e-06, "logits/chosen": -2.4629549980163574, "logits/rejected": -2.116116523742676, "logps/chosen": -308.35797119140625, "logps/rejected": -279.0551452636719, "loss": 0.015, "rewards/accuracies": 0.625, "rewards/chosen": -0.060471516102552414, "rewards/margins": 0.055410999804735184, "rewards/rejected": -0.1158825010061264, "step": 510 }, { "epoch": 0.03, "learning_rate": 1.7004578155657295e-06, "logits/chosen": -2.2604610919952393, "logits/rejected": -2.2810425758361816, "logps/chosen": -222.1859588623047, "logps/rejected": -210.0410614013672, "loss": 0.0338, "rewards/accuracies": 0.75, "rewards/chosen": -0.07733304798603058, "rewards/margins": 0.04137866944074631, "rewards/rejected": -0.11871170997619629, "step": 520 }, { "epoch": 0.03, "learning_rate": 1.7331589274035318e-06, "logits/chosen": -2.134936809539795, "logits/rejected": -2.0191311836242676, "logps/chosen": -194.81320190429688, "logps/rejected": -204.94418334960938, "loss": 0.047, "rewards/accuracies": 0.625, "rewards/chosen": -0.05443320795893669, "rewards/margins": 0.05342613533139229, "rewards/rejected": -0.10785935074090958, "step": 530 }, { "epoch": 0.04, "learning_rate": 1.7658600392413344e-06, "logits/chosen": -2.3616204261779785, "logits/rejected": -2.203824520111084, "logps/chosen": -219.65774536132812, "logps/rejected": -208.9588165283203, "loss": 0.0389, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08401162922382355, "rewards/margins": 0.0333532877266407, "rewards/rejected": -0.11736490577459335, "step": 540 }, { "epoch": 0.04, "learning_rate": 1.7985611510791368e-06, "logits/chosen": -2.3590781688690186, "logits/rejected": -1.9083945751190186, "logps/chosen": -275.25823974609375, "logps/rejected": -266.81097412109375, "loss": 0.034, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.058542896062135696, "rewards/margins": 0.03970601409673691, "rewards/rejected": -0.0982489138841629, "step": 550 }, { "epoch": 0.04, "learning_rate": 1.8312622629169393e-06, "logits/chosen": -2.269632577896118, "logits/rejected": -2.1327080726623535, "logps/chosen": -279.70147705078125, "logps/rejected": -247.4019012451172, "loss": 0.0447, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.08123533427715302, "rewards/margins": 0.011105300858616829, "rewards/rejected": -0.0923406332731247, "step": 560 }, { "epoch": 0.04, "learning_rate": 1.8639633747547417e-06, "logits/chosen": -2.358708620071411, "logits/rejected": -2.187506675720215, "logps/chosen": -244.6337127685547, "logps/rejected": -213.9905548095703, "loss": 0.0687, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07744290679693222, "rewards/margins": 0.024968545883893967, "rewards/rejected": -0.10241146385669708, "step": 570 }, { "epoch": 0.04, "learning_rate": 1.8966644865925443e-06, "logits/chosen": -2.209902286529541, "logits/rejected": -2.238129138946533, "logps/chosen": -255.62509155273438, "logps/rejected": -270.8184814453125, "loss": 0.0611, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08208145946264267, "rewards/margins": 0.018994811922311783, "rewards/rejected": -0.10107628256082535, "step": 580 }, { "epoch": 0.04, "learning_rate": 1.9293655984303466e-06, "logits/chosen": -2.6335816383361816, "logits/rejected": -2.211545467376709, "logps/chosen": -296.67333984375, "logps/rejected": -218.890869140625, "loss": 0.062, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06853550672531128, "rewards/margins": 0.026111140847206116, "rewards/rejected": -0.0946466475725174, "step": 590 }, { "epoch": 0.04, "learning_rate": 1.9620667102681494e-06, "logits/chosen": -2.366116762161255, "logits/rejected": -2.3787477016448975, "logps/chosen": -199.81320190429688, "logps/rejected": -188.07069396972656, "loss": 0.0313, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.052803684026002884, "rewards/margins": 0.012890547513961792, "rewards/rejected": -0.06569422781467438, "step": 600 }, { "epoch": 0.04, "eval_logits/chosen": -2.3278696537017822, "eval_logits/rejected": -2.140028715133667, "eval_logps/chosen": -241.02426147460938, "eval_logps/rejected": -227.2308807373047, "eval_loss": 0.041324835270643234, "eval_rewards/accuracies": 0.6159999966621399, "eval_rewards/chosen": -0.04509655386209488, "eval_rewards/margins": 0.032998330891132355, "eval_rewards/rejected": -0.07809487730264664, "eval_runtime": 714.8275, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 600 }, { "epoch": 0.04, "learning_rate": 1.994767822105952e-06, "logits/chosen": -2.27354097366333, "logits/rejected": -2.180142402648926, "logps/chosen": -182.80899047851562, "logps/rejected": -197.1210174560547, "loss": 0.0782, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.038026656955480576, "rewards/margins": 0.03938506916165352, "rewards/rejected": -0.0774117261171341, "step": 610 }, { "epoch": 0.04, "learning_rate": 2.0274689339437543e-06, "logits/chosen": -2.2067208290100098, "logits/rejected": -1.9959625005722046, "logps/chosen": -282.072509765625, "logps/rejected": -252.33084106445312, "loss": 0.0265, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.016913847997784615, "rewards/margins": 0.0280628465116024, "rewards/rejected": -0.044976696372032166, "step": 620 }, { "epoch": 0.04, "learning_rate": 2.0601700457815567e-06, "logits/chosen": -2.360585927963257, "logits/rejected": -1.9948257207870483, "logps/chosen": -266.9506530761719, "logps/rejected": -228.4451141357422, "loss": 0.0394, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03797965496778488, "rewards/margins": 0.025038376450538635, "rewards/rejected": -0.06301803141832352, "step": 630 }, { "epoch": 0.04, "learning_rate": 2.092871157619359e-06, "logits/chosen": -2.409940004348755, "logits/rejected": -2.2259554862976074, "logps/chosen": -180.29090881347656, "logps/rejected": -202.35862731933594, "loss": 0.0327, "rewards/accuracies": 0.625, "rewards/chosen": -0.018143683671951294, "rewards/margins": 0.04178124666213989, "rewards/rejected": -0.059924930334091187, "step": 640 }, { "epoch": 0.04, "learning_rate": 2.1255722694571616e-06, "logits/chosen": -2.4271957874298096, "logits/rejected": -2.0304219722747803, "logps/chosen": -279.4947204589844, "logps/rejected": -195.42269897460938, "loss": 0.0465, "rewards/accuracies": 0.625, "rewards/chosen": -0.003000382799655199, "rewards/margins": 0.04447781294584274, "rewards/rejected": -0.04747819900512695, "step": 650 }, { "epoch": 0.04, "learning_rate": 2.158273381294964e-06, "logits/chosen": -2.3339033126831055, "logits/rejected": -2.228659152984619, "logps/chosen": -211.0321044921875, "logps/rejected": -219.0637664794922, "loss": 0.041, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.006974533200263977, "rewards/margins": 0.012601134367287159, "rewards/rejected": -0.005626601167023182, "step": 660 }, { "epoch": 0.04, "learning_rate": 2.190974493132767e-06, "logits/chosen": -2.330313205718994, "logits/rejected": -2.0267796516418457, "logps/chosen": -235.9166259765625, "logps/rejected": -188.2518768310547, "loss": 0.0471, "rewards/accuracies": 0.625, "rewards/chosen": 0.008812380954623222, "rewards/margins": 0.042960282415151596, "rewards/rejected": -0.034147895872592926, "step": 670 }, { "epoch": 0.04, "learning_rate": 2.223675604970569e-06, "logits/chosen": -2.39384388923645, "logits/rejected": -2.1898155212402344, "logps/chosen": -208.85214233398438, "logps/rejected": -198.39894104003906, "loss": 0.0353, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.009410612285137177, "rewards/margins": 0.034385450184345245, "rewards/rejected": -0.02497483789920807, "step": 680 }, { "epoch": 0.05, "learning_rate": 2.2563767168083718e-06, "logits/chosen": -2.4716198444366455, "logits/rejected": -1.982600212097168, "logps/chosen": -253.2783660888672, "logps/rejected": -198.31503295898438, "loss": 0.0571, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.019866937771439552, "rewards/margins": 0.04034467041492462, "rewards/rejected": -0.02047773264348507, "step": 690 }, { "epoch": 0.05, "learning_rate": 2.289077828646174e-06, "logits/chosen": -2.3059027194976807, "logits/rejected": -2.070786952972412, "logps/chosen": -238.1765594482422, "logps/rejected": -219.28604125976562, "loss": 0.0519, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.004421982914209366, "rewards/margins": 0.029075268656015396, "rewards/rejected": -0.024653282016515732, "step": 700 }, { "epoch": 0.05, "eval_logits/chosen": -2.3373641967773438, "eval_logits/rejected": -2.1490402221679688, "eval_logps/chosen": -224.81228637695312, "eval_logps/rejected": -211.1453399658203, "eval_loss": 0.04084743186831474, "eval_rewards/accuracies": 0.6154999732971191, "eval_rewards/chosen": 0.035963330417871475, "eval_rewards/margins": 0.03363055735826492, "eval_rewards/rejected": 0.0023327735252678394, "eval_runtime": 711.8752, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 700 }, { "epoch": 0.05, "learning_rate": 2.3217789404839766e-06, "logits/chosen": -2.1930441856384277, "logits/rejected": -2.278282880783081, "logps/chosen": -153.32977294921875, "logps/rejected": -205.1284637451172, "loss": 0.0277, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.01750679686665535, "rewards/margins": 0.024280013516545296, "rewards/rejected": -0.006773218512535095, "step": 710 }, { "epoch": 0.05, "learning_rate": 2.354480052321779e-06, "logits/chosen": -2.521277666091919, "logits/rejected": -2.0904335975646973, "logps/chosen": -249.94601440429688, "logps/rejected": -201.07015991210938, "loss": 0.0492, "rewards/accuracies": 0.625, "rewards/chosen": 0.042217105627059937, "rewards/margins": 0.05382740497589111, "rewards/rejected": -0.0116103021427989, "step": 720 }, { "epoch": 0.05, "learning_rate": 2.3871811641595815e-06, "logits/chosen": -2.344003200531006, "logits/rejected": -2.1647818088531494, "logps/chosen": -242.52621459960938, "logps/rejected": -190.82498168945312, "loss": 0.0245, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.021331200376152992, "rewards/margins": 0.03477324917912483, "rewards/rejected": -0.013442049734294415, "step": 730 }, { "epoch": 0.05, "learning_rate": 2.4198822759973843e-06, "logits/chosen": -2.177978992462158, "logits/rejected": -2.1955149173736572, "logps/chosen": -192.08859252929688, "logps/rejected": -215.31790161132812, "loss": 0.0619, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.007412579841911793, "rewards/margins": 0.046483904123306274, "rewards/rejected": -0.05389648675918579, "step": 740 }, { "epoch": 0.05, "learning_rate": 2.4525833878351864e-06, "logits/chosen": -2.4476373195648193, "logits/rejected": -2.2447783946990967, "logps/chosen": -247.8760528564453, "logps/rejected": -192.84451293945312, "loss": 0.0299, "rewards/accuracies": 0.625, "rewards/chosen": 0.02462439239025116, "rewards/margins": 0.056242309510707855, "rewards/rejected": -0.031617920845746994, "step": 750 }, { "epoch": 0.05, "learning_rate": 2.4852844996729892e-06, "logits/chosen": -2.2039551734924316, "logits/rejected": -2.108445405960083, "logps/chosen": -238.0047607421875, "logps/rejected": -252.89584350585938, "loss": 0.0629, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.005597786512225866, "rewards/margins": 0.0671648383140564, "rewards/rejected": -0.061567049473524094, "step": 760 }, { "epoch": 0.05, "learning_rate": 2.5179856115107916e-06, "logits/chosen": -2.3416836261749268, "logits/rejected": -2.0228421688079834, "logps/chosen": -284.6044006347656, "logps/rejected": -243.303955078125, "loss": 0.0465, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05554069206118584, "rewards/margins": 0.06323963403701782, "rewards/rejected": -0.11878032982349396, "step": 770 }, { "epoch": 0.05, "learning_rate": 2.5506867233485937e-06, "logits/chosen": -2.4263076782226562, "logits/rejected": -2.002530336380005, "logps/chosen": -286.38623046875, "logps/rejected": -245.4661407470703, "loss": 0.0476, "rewards/accuracies": 0.75, "rewards/chosen": -0.1034514307975769, "rewards/margins": 0.08289273083209991, "rewards/rejected": -0.18634414672851562, "step": 780 }, { "epoch": 0.05, "learning_rate": 2.5833878351863965e-06, "logits/chosen": -2.4578182697296143, "logits/rejected": -2.389268159866333, "logps/chosen": -260.0544738769531, "logps/rejected": -270.0205993652344, "loss": 0.0349, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04028266668319702, "rewards/margins": 0.06293262541294098, "rewards/rejected": -0.103215292096138, "step": 790 }, { "epoch": 0.05, "learning_rate": 2.616088947024199e-06, "logits/chosen": -2.1505801677703857, "logits/rejected": -2.310819387435913, "logps/chosen": -200.5609588623047, "logps/rejected": -234.65188598632812, "loss": 0.034, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.020357560366392136, "rewards/margins": 0.047822583466768265, "rewards/rejected": -0.0681801438331604, "step": 800 }, { "epoch": 0.05, "eval_logits/chosen": -2.322722911834717, "eval_logits/rejected": -2.133898973464966, "eval_logps/chosen": -236.33895874023438, "eval_logps/rejected": -226.1372528076172, "eval_loss": 0.03693564981222153, "eval_rewards/accuracies": 0.612500011920929, "eval_rewards/chosen": -0.021670011803507805, "eval_rewards/margins": 0.05095669999718666, "eval_rewards/rejected": -0.07262670993804932, "eval_runtime": 714.003, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 800 }, { "epoch": 0.05, "learning_rate": 2.6487900588620014e-06, "logits/chosen": -2.0959088802337646, "logits/rejected": -1.863910436630249, "logps/chosen": -205.66055297851562, "logps/rejected": -168.29428100585938, "loss": 0.0523, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.027724791318178177, "rewards/margins": 0.02442805841565132, "rewards/rejected": -0.0521528497338295, "step": 810 }, { "epoch": 0.05, "learning_rate": 2.6814911706998042e-06, "logits/chosen": -2.2854485511779785, "logits/rejected": -2.113757371902466, "logps/chosen": -228.0641326904297, "logps/rejected": -201.61636352539062, "loss": 0.0299, "rewards/accuracies": 0.625, "rewards/chosen": -0.013624387793242931, "rewards/margins": 0.02153131738305092, "rewards/rejected": -0.035155706107616425, "step": 820 }, { "epoch": 0.05, "learning_rate": 2.7141922825376067e-06, "logits/chosen": -2.1840600967407227, "logits/rejected": -2.1884942054748535, "logps/chosen": -256.2579040527344, "logps/rejected": -279.2230529785156, "loss": 0.0252, "rewards/accuracies": 0.75, "rewards/chosen": 0.00833116751164198, "rewards/margins": 0.0651707872748375, "rewards/rejected": -0.05683961510658264, "step": 830 }, { "epoch": 0.05, "learning_rate": 2.746893394375409e-06, "logits/chosen": -2.3274707794189453, "logits/rejected": -2.1984059810638428, "logps/chosen": -228.68453979492188, "logps/rejected": -234.34878540039062, "loss": 0.0293, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02854933775961399, "rewards/margins": 0.03722766786813736, "rewards/rejected": -0.008678330108523369, "step": 840 }, { "epoch": 0.06, "learning_rate": 2.779594506213211e-06, "logits/chosen": -2.22013258934021, "logits/rejected": -1.9830286502838135, "logps/chosen": -189.00003051757812, "logps/rejected": -197.3227081298828, "loss": 0.0422, "rewards/accuracies": 0.5, "rewards/chosen": 0.004294353537261486, "rewards/margins": 0.0431797169148922, "rewards/rejected": -0.038885362446308136, "step": 850 }, { "epoch": 0.06, "learning_rate": 2.812295618051014e-06, "logits/chosen": -2.4056875705718994, "logits/rejected": -2.2440567016601562, "logps/chosen": -275.1996154785156, "logps/rejected": -223.43252563476562, "loss": 0.0238, "rewards/accuracies": 0.75, "rewards/chosen": 0.02880767546594143, "rewards/margins": 0.04980029910802841, "rewards/rejected": -0.020992618054151535, "step": 860 }, { "epoch": 0.06, "learning_rate": 2.8449967298888164e-06, "logits/chosen": -2.2810773849487305, "logits/rejected": -2.0782113075256348, "logps/chosen": -177.49105834960938, "logps/rejected": -155.1948699951172, "loss": 0.0286, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.003335345070809126, "rewards/margins": 0.028810903429985046, "rewards/rejected": -0.03214624896645546, "step": 870 }, { "epoch": 0.06, "learning_rate": 2.877697841726619e-06, "logits/chosen": -2.36423921585083, "logits/rejected": -2.2662618160247803, "logps/chosen": -217.5684814453125, "logps/rejected": -207.69131469726562, "loss": 0.04, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.006336563732475042, "rewards/margins": 0.0346880666911602, "rewards/rejected": -0.028351500630378723, "step": 880 }, { "epoch": 0.06, "learning_rate": 2.9103989535644217e-06, "logits/chosen": -2.288578748703003, "logits/rejected": -2.4027316570281982, "logps/chosen": -210.6328125, "logps/rejected": -246.1505126953125, "loss": 0.0351, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02425410784780979, "rewards/margins": 0.02493629790842533, "rewards/rejected": -0.0006821897113695741, "step": 890 }, { "epoch": 0.06, "learning_rate": 2.943100065402224e-06, "logits/chosen": -2.3284528255462646, "logits/rejected": -2.18131685256958, "logps/chosen": -289.2096862792969, "logps/rejected": -278.4032897949219, "loss": 0.0343, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.01716744527220726, "rewards/margins": 0.022551458328962326, "rewards/rejected": -0.0053840139880776405, "step": 900 }, { "epoch": 0.06, "eval_logits/chosen": -2.3029963970184326, "eval_logits/rejected": -2.1160192489624023, "eval_logps/chosen": -231.2078094482422, "eval_logps/rejected": -220.38314819335938, "eval_loss": 0.03613131120800972, "eval_rewards/accuracies": 0.6004999876022339, "eval_rewards/chosen": 0.0039856997318565845, "eval_rewards/margins": 0.0478418804705143, "eval_rewards/rejected": -0.043856181204319, "eval_runtime": 711.5047, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 900 }, { "epoch": 0.06, "learning_rate": 2.9758011772400266e-06, "logits/chosen": -2.226459264755249, "logits/rejected": -2.2192370891571045, "logps/chosen": -258.35595703125, "logps/rejected": -271.165283203125, "loss": 0.0186, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.005233976989984512, "rewards/margins": 0.027173664420843124, "rewards/rejected": -0.032407645136117935, "step": 910 }, { "epoch": 0.06, "learning_rate": 3.0085022890778286e-06, "logits/chosen": -2.2650656700134277, "logits/rejected": -2.0100250244140625, "logps/chosen": -182.3318634033203, "logps/rejected": -164.67588806152344, "loss": 0.0392, "rewards/accuracies": 0.625, "rewards/chosen": -0.032961465418338776, "rewards/margins": 0.04633763059973717, "rewards/rejected": -0.07929908484220505, "step": 920 }, { "epoch": 0.06, "learning_rate": 3.0412034009156314e-06, "logits/chosen": -2.1937007904052734, "logits/rejected": -2.340569019317627, "logps/chosen": -255.5501708984375, "logps/rejected": -256.684814453125, "loss": 0.0342, "rewards/accuracies": 0.5, "rewards/chosen": -0.09931950271129608, "rewards/margins": 0.026981692761182785, "rewards/rejected": -0.12630119919776917, "step": 930 }, { "epoch": 0.06, "learning_rate": 3.073904512753434e-06, "logits/chosen": -2.3723065853118896, "logits/rejected": -2.013484477996826, "logps/chosen": -247.80746459960938, "logps/rejected": -222.70068359375, "loss": 0.0387, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09079430997371674, "rewards/margins": 0.04910730570554733, "rewards/rejected": -0.13990160822868347, "step": 940 }, { "epoch": 0.06, "learning_rate": 3.1066056245912363e-06, "logits/chosen": -2.237149953842163, "logits/rejected": -2.311701774597168, "logps/chosen": -274.0514221191406, "logps/rejected": -253.67379760742188, "loss": 0.0369, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21533706784248352, "rewards/margins": 0.05790703371167183, "rewards/rejected": -0.27324408292770386, "step": 950 }, { "epoch": 0.06, "learning_rate": 3.1393067364290387e-06, "logits/chosen": -2.2728891372680664, "logits/rejected": -2.0648486614227295, "logps/chosen": -269.82293701171875, "logps/rejected": -241.70669555664062, "loss": 0.05, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22348091006278992, "rewards/margins": 0.05234675481915474, "rewards/rejected": -0.27582764625549316, "step": 960 }, { "epoch": 0.06, "learning_rate": 3.1720078482668416e-06, "logits/chosen": -2.210095167160034, "logits/rejected": -1.9791282415390015, "logps/chosen": -241.1488494873047, "logps/rejected": -205.7684326171875, "loss": 0.0277, "rewards/accuracies": 0.625, "rewards/chosen": -0.1478911191225052, "rewards/margins": 0.05863233655691147, "rewards/rejected": -0.20652346312999725, "step": 970 }, { "epoch": 0.06, "learning_rate": 3.204708960104644e-06, "logits/chosen": -2.2816624641418457, "logits/rejected": -1.940813422203064, "logps/chosen": -230.817138671875, "logps/rejected": -211.7601776123047, "loss": 0.0295, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06707186251878738, "rewards/margins": 0.06765398383140564, "rewards/rejected": -0.13472583889961243, "step": 980 }, { "epoch": 0.06, "learning_rate": 3.237410071942446e-06, "logits/chosen": -2.326986312866211, "logits/rejected": -2.061952829360962, "logps/chosen": -221.90771484375, "logps/rejected": -188.48504638671875, "loss": 0.0291, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.07773331552743912, "rewards/margins": 0.0007057116599753499, "rewards/rejected": -0.07843901962041855, "step": 990 }, { "epoch": 0.07, "learning_rate": 3.270111183780249e-06, "logits/chosen": -2.259770393371582, "logits/rejected": -2.0372934341430664, "logps/chosen": -218.0242156982422, "logps/rejected": -207.89443969726562, "loss": 0.0482, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09000325947999954, "rewards/margins": 0.03936609625816345, "rewards/rejected": -0.1293693482875824, "step": 1000 }, { "epoch": 0.07, "eval_logits/chosen": -2.2986462116241455, "eval_logits/rejected": -2.1121134757995605, "eval_logps/chosen": -251.03607177734375, "eval_logps/rejected": -241.4310760498047, "eval_loss": 0.03604179993271828, "eval_rewards/accuracies": 0.6100000143051147, "eval_rewards/chosen": -0.09515552967786789, "eval_rewards/margins": 0.053940195590257645, "eval_rewards/rejected": -0.14909571409225464, "eval_runtime": 713.0944, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 1000 }, { "epoch": 0.07, "learning_rate": 3.3028122956180513e-06, "logits/chosen": -2.163093328475952, "logits/rejected": -2.220931053161621, "logps/chosen": -231.88125610351562, "logps/rejected": -268.298828125, "loss": 0.0426, "rewards/accuracies": 0.5, "rewards/chosen": -0.0989166647195816, "rewards/margins": 0.044743381440639496, "rewards/rejected": -0.1436600387096405, "step": 1010 }, { "epoch": 0.07, "learning_rate": 3.3355134074558538e-06, "logits/chosen": -2.179591655731201, "logits/rejected": -2.0796704292297363, "logps/chosen": -249.61349487304688, "logps/rejected": -233.9440155029297, "loss": 0.0365, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14440378546714783, "rewards/margins": 0.049590133130550385, "rewards/rejected": -0.1939939260482788, "step": 1020 }, { "epoch": 0.07, "learning_rate": 3.368214519293656e-06, "logits/chosen": -2.1318297386169434, "logits/rejected": -1.919557809829712, "logps/chosen": -225.65652465820312, "logps/rejected": -212.3909912109375, "loss": 0.0425, "rewards/accuracies": 0.625, "rewards/chosen": -0.13343092799186707, "rewards/margins": 0.038164906203746796, "rewards/rejected": -0.17159582674503326, "step": 1030 }, { "epoch": 0.07, "learning_rate": 3.400915631131459e-06, "logits/chosen": -2.105020046234131, "logits/rejected": -2.1336417198181152, "logps/chosen": -227.83447265625, "logps/rejected": -270.49041748046875, "loss": 0.057, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14931000769138336, "rewards/margins": 0.07002317160367966, "rewards/rejected": -0.2193332016468048, "step": 1040 }, { "epoch": 0.07, "learning_rate": 3.4336167429692615e-06, "logits/chosen": -2.3600306510925293, "logits/rejected": -2.140617609024048, "logps/chosen": -239.68075561523438, "logps/rejected": -232.2493133544922, "loss": 0.0248, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12565191090106964, "rewards/margins": 0.04411160573363304, "rewards/rejected": -0.16976352035999298, "step": 1050 }, { "epoch": 0.07, "learning_rate": 3.4663178548070635e-06, "logits/chosen": -2.173041582107544, "logits/rejected": -2.229940176010132, "logps/chosen": -227.50448608398438, "logps/rejected": -222.6018524169922, "loss": 0.0397, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.06275273859500885, "rewards/margins": 0.05769491195678711, "rewards/rejected": -0.12044765800237656, "step": 1060 }, { "epoch": 0.07, "learning_rate": 3.499018966644866e-06, "logits/chosen": -2.328925609588623, "logits/rejected": -2.1343941688537598, "logps/chosen": -204.39268493652344, "logps/rejected": -208.36209106445312, "loss": 0.0302, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06283830106258392, "rewards/margins": 0.053908735513687134, "rewards/rejected": -0.11674702167510986, "step": 1070 }, { "epoch": 0.07, "learning_rate": 3.531720078482669e-06, "logits/chosen": -2.2606778144836426, "logits/rejected": -1.9218966960906982, "logps/chosen": -233.92880249023438, "logps/rejected": -235.57632446289062, "loss": 0.0652, "rewards/accuracies": 0.5, "rewards/chosen": -0.040295444428920746, "rewards/margins": 0.06439373642206192, "rewards/rejected": -0.10468918085098267, "step": 1080 }, { "epoch": 0.07, "learning_rate": 3.5644211903204712e-06, "logits/chosen": -2.2620184421539307, "logits/rejected": -2.3110594749450684, "logps/chosen": -203.2140655517578, "logps/rejected": -214.1947784423828, "loss": 0.0469, "rewards/accuracies": 0.625, "rewards/chosen": -0.02165791392326355, "rewards/margins": 0.040836118161678314, "rewards/rejected": -0.062494028359651566, "step": 1090 }, { "epoch": 0.07, "learning_rate": 3.5971223021582737e-06, "logits/chosen": -2.4198689460754395, "logits/rejected": -2.280808925628662, "logps/chosen": -265.135009765625, "logps/rejected": -214.5912322998047, "loss": 0.0316, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04667113721370697, "rewards/margins": 0.03285752236843109, "rewards/rejected": -0.07952866703271866, "step": 1100 }, { "epoch": 0.07, "eval_logits/chosen": -2.2966220378875732, "eval_logits/rejected": -2.11063551902771, "eval_logps/chosen": -243.27603149414062, "eval_logps/rejected": -236.5680694580078, "eval_loss": 0.04146208241581917, "eval_rewards/accuracies": 0.6044999957084656, "eval_rewards/chosen": -0.056355465203523636, "eval_rewards/margins": 0.06842530518770218, "eval_rewards/rejected": -0.12478075921535492, "eval_runtime": 716.3476, "eval_samples_per_second": 2.792, "eval_steps_per_second": 1.396, "step": 1100 }, { "epoch": 0.07, "learning_rate": 3.6298234139960765e-06, "logits/chosen": -2.3681674003601074, "logits/rejected": -2.0149314403533936, "logps/chosen": -232.5952606201172, "logps/rejected": -194.56239318847656, "loss": 0.0579, "rewards/accuracies": 0.5, "rewards/chosen": -0.07413096725940704, "rewards/margins": 0.05819229036569595, "rewards/rejected": -0.1323232501745224, "step": 1110 }, { "epoch": 0.07, "learning_rate": 3.6625245258338785e-06, "logits/chosen": -2.1520678997039795, "logits/rejected": -2.0752205848693848, "logps/chosen": -262.2667541503906, "logps/rejected": -357.5063171386719, "loss": 0.0465, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11744166910648346, "rewards/margins": 0.09389489889144897, "rewards/rejected": -0.21133653819561005, "step": 1120 }, { "epoch": 0.07, "learning_rate": 3.695225637671681e-06, "logits/chosen": -2.3903768062591553, "logits/rejected": -2.191129684448242, "logps/chosen": -222.4154510498047, "logps/rejected": -201.04275512695312, "loss": 0.0436, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0756942480802536, "rewards/margins": 0.05851646512746811, "rewards/rejected": -0.1342107057571411, "step": 1130 }, { "epoch": 0.07, "learning_rate": 3.7279267495094834e-06, "logits/chosen": -2.305600166320801, "logits/rejected": -2.173811435699463, "logps/chosen": -168.5917205810547, "logps/rejected": -197.49795532226562, "loss": 0.0342, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0408240482211113, "rewards/margins": 0.09390485286712646, "rewards/rejected": -0.13472887873649597, "step": 1140 }, { "epoch": 0.08, "learning_rate": 3.7606278613472863e-06, "logits/chosen": -2.372283458709717, "logits/rejected": -2.028932571411133, "logps/chosen": -291.4856262207031, "logps/rejected": -222.21963500976562, "loss": 0.0498, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04607173055410385, "rewards/margins": 0.04151051491498947, "rewards/rejected": -0.08758225291967392, "step": 1150 }, { "epoch": 0.08, "learning_rate": 3.7933289731850887e-06, "logits/chosen": -2.1950457096099854, "logits/rejected": -1.9749637842178345, "logps/chosen": -226.8663787841797, "logps/rejected": -215.56277465820312, "loss": 0.0176, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.008471393026411533, "rewards/margins": 0.08477363735437393, "rewards/rejected": -0.07630225270986557, "step": 1160 }, { "epoch": 0.08, "learning_rate": 3.826030085022891e-06, "logits/chosen": -2.3531956672668457, "logits/rejected": -2.1216812133789062, "logps/chosen": -250.2491912841797, "logps/rejected": -225.8344268798828, "loss": 0.0404, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.012996921315789223, "rewards/margins": 0.05868647247552872, "rewards/rejected": -0.0716833844780922, "step": 1170 }, { "epoch": 0.08, "learning_rate": 3.858731196860693e-06, "logits/chosen": -2.401371479034424, "logits/rejected": -1.9539715051651, "logps/chosen": -254.2443389892578, "logps/rejected": -216.92532348632812, "loss": 0.0393, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.017054477706551552, "rewards/margins": 0.024582907557487488, "rewards/rejected": -0.04163738340139389, "step": 1180 }, { "epoch": 0.08, "learning_rate": 3.891432308698496e-06, "logits/chosen": -2.0956592559814453, "logits/rejected": -2.03969669342041, "logps/chosen": -193.86561584472656, "logps/rejected": -230.97763061523438, "loss": 0.05, "rewards/accuracies": 0.625, "rewards/chosen": 0.006823881063610315, "rewards/margins": 0.05933377146720886, "rewards/rejected": -0.05250988528132439, "step": 1190 }, { "epoch": 0.08, "learning_rate": 3.924133420536299e-06, "logits/chosen": -2.20378041267395, "logits/rejected": -2.013878345489502, "logps/chosen": -191.03379821777344, "logps/rejected": -170.40415954589844, "loss": 0.0326, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.044445477426052094, "rewards/margins": 0.0638650581240654, "rewards/rejected": -0.10831055790185928, "step": 1200 }, { "epoch": 0.08, "eval_logits/chosen": -2.2876923084259033, "eval_logits/rejected": -2.102726697921753, "eval_logps/chosen": -243.8900909423828, "eval_logps/rejected": -237.54696655273438, "eval_loss": 0.03310655057430267, "eval_rewards/accuracies": 0.640500009059906, "eval_rewards/chosen": -0.05942576006054878, "eval_rewards/margins": 0.07024962455034256, "eval_rewards/rejected": -0.12967538833618164, "eval_runtime": 711.2733, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 1200 }, { "epoch": 0.08, "learning_rate": 3.956834532374101e-06, "logits/chosen": -2.400481700897217, "logits/rejected": -2.0159413814544678, "logps/chosen": -216.20870971679688, "logps/rejected": -194.70791625976562, "loss": 0.0294, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05473031476140022, "rewards/margins": 0.11225248873233795, "rewards/rejected": -0.16698279976844788, "step": 1210 }, { "epoch": 0.08, "learning_rate": 3.989535644211904e-06, "logits/chosen": -2.2711410522460938, "logits/rejected": -2.007876396179199, "logps/chosen": -228.341552734375, "logps/rejected": -223.26144409179688, "loss": 0.0488, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05607137084007263, "rewards/margins": 0.08770473301410675, "rewards/rejected": -0.1437760889530182, "step": 1220 }, { "epoch": 0.08, "learning_rate": 4.022236756049706e-06, "logits/chosen": -2.4662814140319824, "logits/rejected": -2.0836684703826904, "logps/chosen": -278.93316650390625, "logps/rejected": -255.7671356201172, "loss": 0.0194, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.008979836478829384, "rewards/margins": 0.046547479927539825, "rewards/rejected": -0.05552731081843376, "step": 1230 }, { "epoch": 0.08, "learning_rate": 4.054937867887509e-06, "logits/chosen": -2.339547634124756, "logits/rejected": -1.9021472930908203, "logps/chosen": -223.43222045898438, "logps/rejected": -222.7755889892578, "loss": 0.0203, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.004172854125499725, "rewards/margins": 0.050659067928791046, "rewards/rejected": -0.04648621380329132, "step": 1240 }, { "epoch": 0.08, "learning_rate": 4.087638979725311e-06, "logits/chosen": -2.250339984893799, "logits/rejected": -2.3162214756011963, "logps/chosen": -226.05819702148438, "logps/rejected": -234.34072875976562, "loss": 0.029, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.008907961659133434, "rewards/margins": 0.034137699753046036, "rewards/rejected": -0.02522973157465458, "step": 1250 }, { "epoch": 0.08, "learning_rate": 4.1203400915631135e-06, "logits/chosen": -2.1639037132263184, "logits/rejected": -2.088408946990967, "logps/chosen": -240.08273315429688, "logps/rejected": -220.23916625976562, "loss": 0.0764, "rewards/accuracies": 0.625, "rewards/chosen": -0.013758843764662743, "rewards/margins": 0.042185962200164795, "rewards/rejected": -0.05594480782747269, "step": 1260 }, { "epoch": 0.08, "learning_rate": 4.153041203400916e-06, "logits/chosen": -2.3826661109924316, "logits/rejected": -2.2230827808380127, "logps/chosen": -258.5919189453125, "logps/rejected": -240.993408203125, "loss": 0.0076, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.02915186807513237, "rewards/margins": 0.03518051654100418, "rewards/rejected": -0.0060286447405815125, "step": 1270 }, { "epoch": 0.08, "learning_rate": 4.185742315238718e-06, "logits/chosen": -2.2890408039093018, "logits/rejected": -2.0284957885742188, "logps/chosen": -194.86972045898438, "logps/rejected": -194.9738006591797, "loss": 0.0293, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01615927927196026, "rewards/margins": 0.05985509231686592, "rewards/rejected": -0.04369581490755081, "step": 1280 }, { "epoch": 0.08, "learning_rate": 4.218443427076521e-06, "logits/chosen": -2.233799457550049, "logits/rejected": -2.0911786556243896, "logps/chosen": -184.27633666992188, "logps/rejected": -192.11636352539062, "loss": 0.0532, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.00758881401270628, "rewards/margins": 0.03184492141008377, "rewards/rejected": -0.039433736354112625, "step": 1290 }, { "epoch": 0.09, "learning_rate": 4.251144538914323e-06, "logits/chosen": -2.207096576690674, "logits/rejected": -2.070316791534424, "logps/chosen": -224.5314178466797, "logps/rejected": -222.47225952148438, "loss": 0.0313, "rewards/accuracies": 0.625, "rewards/chosen": -0.019705668091773987, "rewards/margins": 0.05568262189626694, "rewards/rejected": -0.07538828998804092, "step": 1300 }, { "epoch": 0.09, "eval_logits/chosen": -2.299447774887085, "eval_logits/rejected": -2.113861560821533, "eval_logps/chosen": -232.5399627685547, "eval_logps/rejected": -223.8951873779297, "eval_loss": 0.0312928780913353, "eval_rewards/accuracies": 0.6320000290870667, "eval_rewards/chosen": -0.002674978692084551, "eval_rewards/margins": 0.058741528540849686, "eval_rewards/rejected": -0.06141650676727295, "eval_runtime": 715.0773, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 1300 }, { "epoch": 0.09, "learning_rate": 4.283845650752126e-06, "logits/chosen": -2.3264384269714355, "logits/rejected": -2.1164462566375732, "logps/chosen": -287.3799743652344, "logps/rejected": -236.7661895751953, "loss": 0.0243, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.005840280093252659, "rewards/margins": 0.06788822263479233, "rewards/rejected": -0.07372850924730301, "step": 1310 }, { "epoch": 0.09, "learning_rate": 4.316546762589928e-06, "logits/chosen": -2.2621026039123535, "logits/rejected": -2.1790411472320557, "logps/chosen": -217.2468719482422, "logps/rejected": -196.61880493164062, "loss": 0.055, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.046596817672252655, "rewards/margins": 0.0683503970503807, "rewards/rejected": -0.11494722217321396, "step": 1320 }, { "epoch": 0.09, "learning_rate": 4.349247874427731e-06, "logits/chosen": -2.310753345489502, "logits/rejected": -2.022246837615967, "logps/chosen": -238.8385467529297, "logps/rejected": -293.2956237792969, "loss": 0.0251, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.061522673815488815, "rewards/margins": 0.07187248766422272, "rewards/rejected": -0.13339515030384064, "step": 1330 }, { "epoch": 0.09, "learning_rate": 4.381948986265534e-06, "logits/chosen": -2.4638831615448, "logits/rejected": -2.203160285949707, "logps/chosen": -264.6558532714844, "logps/rejected": -270.74285888671875, "loss": 0.0286, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.00905582308769226, "rewards/margins": 0.09546998143196106, "rewards/rejected": -0.10452580451965332, "step": 1340 }, { "epoch": 0.09, "learning_rate": 4.414650098103336e-06, "logits/chosen": -2.6136481761932373, "logits/rejected": -2.3436598777770996, "logps/chosen": -261.24114990234375, "logps/rejected": -248.14596557617188, "loss": 0.0305, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0035567854065448046, "rewards/margins": 0.04664462059736252, "rewards/rejected": -0.043087832629680634, "step": 1350 }, { "epoch": 0.09, "learning_rate": 4.447351209941138e-06, "logits/chosen": -2.295720338821411, "logits/rejected": -2.031731367111206, "logps/chosen": -219.7317657470703, "logps/rejected": -209.2897186279297, "loss": 0.0456, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05033557862043381, "rewards/margins": 0.03849739953875542, "rewards/rejected": -0.08883297443389893, "step": 1360 }, { "epoch": 0.09, "learning_rate": 4.480052321778941e-06, "logits/chosen": -2.247638463973999, "logits/rejected": -2.257481575012207, "logps/chosen": -226.5643768310547, "logps/rejected": -217.638671875, "loss": 0.0375, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0017294908175244927, "rewards/margins": 0.05885141342878342, "rewards/rejected": -0.05712192505598068, "step": 1370 }, { "epoch": 0.09, "learning_rate": 4.5127534336167435e-06, "logits/chosen": -2.31245493888855, "logits/rejected": -2.042786121368408, "logps/chosen": -251.3003692626953, "logps/rejected": -216.68307495117188, "loss": 0.042, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.020632009953260422, "rewards/margins": 0.04521378129720688, "rewards/rejected": -0.0658458024263382, "step": 1380 }, { "epoch": 0.09, "learning_rate": 4.5454545454545455e-06, "logits/chosen": -2.2685723304748535, "logits/rejected": -2.216639280319214, "logps/chosen": -173.1841583251953, "logps/rejected": -176.2059783935547, "loss": 0.0509, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.01862729713320732, "rewards/margins": 0.06615440547466278, "rewards/rejected": -0.084781713783741, "step": 1390 }, { "epoch": 0.09, "learning_rate": 4.578155657292348e-06, "logits/chosen": -2.2430644035339355, "logits/rejected": -2.2024948596954346, "logps/chosen": -255.028076171875, "logps/rejected": -279.7099304199219, "loss": 0.0345, "rewards/accuracies": 0.625, "rewards/chosen": 0.0018879823619499803, "rewards/margins": 0.07879987359046936, "rewards/rejected": -0.07691188901662827, "step": 1400 }, { "epoch": 0.09, "eval_logits/chosen": -2.2307913303375244, "eval_logits/rejected": -2.05076265335083, "eval_logps/chosen": -234.12046813964844, "eval_logps/rejected": -224.87071228027344, "eval_loss": 0.033073075115680695, "eval_rewards/accuracies": 0.6274999976158142, "eval_rewards/chosen": -0.010577631182968616, "eval_rewards/margins": 0.05571650341153145, "eval_rewards/rejected": -0.0662941262125969, "eval_runtime": 712.6064, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 1400 }, { "epoch": 0.09, "learning_rate": 4.610856769130151e-06, "logits/chosen": -2.3056623935699463, "logits/rejected": -2.176156520843506, "logps/chosen": -241.0175323486328, "logps/rejected": -222.33029174804688, "loss": 0.0122, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.009366462007164955, "rewards/margins": 0.05407185107469559, "rewards/rejected": -0.0634383112192154, "step": 1410 }, { "epoch": 0.09, "learning_rate": 4.643557880967953e-06, "logits/chosen": -2.2978873252868652, "logits/rejected": -2.1032276153564453, "logps/chosen": -203.88720703125, "logps/rejected": -210.26089477539062, "loss": 0.0356, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.025442058220505714, "rewards/margins": 0.04436718672513962, "rewards/rejected": -0.06980924308300018, "step": 1420 }, { "epoch": 0.09, "learning_rate": 4.676258992805755e-06, "logits/chosen": -2.2503609657287598, "logits/rejected": -1.983534812927246, "logps/chosen": -273.1319580078125, "logps/rejected": -244.86367797851562, "loss": 0.0223, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.01756000705063343, "rewards/margins": 0.06940056383609772, "rewards/rejected": -0.086960569024086, "step": 1430 }, { "epoch": 0.09, "learning_rate": 4.708960104643558e-06, "logits/chosen": -2.248351573944092, "logits/rejected": -2.2094709873199463, "logps/chosen": -309.40234375, "logps/rejected": -283.4943542480469, "loss": 0.0197, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.015962861478328705, "rewards/margins": 0.05426085740327835, "rewards/rejected": -0.07022371143102646, "step": 1440 }, { "epoch": 0.09, "learning_rate": 4.741661216481361e-06, "logits/chosen": -2.1934053897857666, "logits/rejected": -2.1544718742370605, "logps/chosen": -239.0615234375, "logps/rejected": -268.44793701171875, "loss": 0.0252, "rewards/accuracies": 0.625, "rewards/chosen": -0.013579867780208588, "rewards/margins": 0.09070058166980743, "rewards/rejected": -0.10428045690059662, "step": 1450 }, { "epoch": 0.1, "learning_rate": 4.774362328319163e-06, "logits/chosen": -2.1640710830688477, "logits/rejected": -2.009326696395874, "logps/chosen": -220.5846710205078, "logps/rejected": -196.79995727539062, "loss": 0.0201, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.023972881957888603, "rewards/margins": 0.03082362748682499, "rewards/rejected": -0.05479650944471359, "step": 1460 }, { "epoch": 0.1, "learning_rate": 4.807063440156966e-06, "logits/chosen": -2.1977009773254395, "logits/rejected": -1.9679205417633057, "logps/chosen": -264.31158447265625, "logps/rejected": -225.3589324951172, "loss": 0.0354, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.005010455381125212, "rewards/margins": 0.11208884418010712, "rewards/rejected": -0.11709930747747421, "step": 1470 }, { "epoch": 0.1, "learning_rate": 4.839764551994769e-06, "logits/chosen": -2.254770278930664, "logits/rejected": -1.957808256149292, "logps/chosen": -258.8214111328125, "logps/rejected": -239.7161407470703, "loss": 0.0432, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05056464672088623, "rewards/margins": 0.09314145147800446, "rewards/rejected": -0.1437060832977295, "step": 1480 }, { "epoch": 0.1, "learning_rate": 4.872465663832571e-06, "logits/chosen": -2.11043119430542, "logits/rejected": -1.9940143823623657, "logps/chosen": -223.37466430664062, "logps/rejected": -210.67971801757812, "loss": 0.0507, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07808069884777069, "rewards/margins": 0.038826070725917816, "rewards/rejected": -0.1169067770242691, "step": 1490 }, { "epoch": 0.1, "learning_rate": 4.905166775670373e-06, "logits/chosen": -2.224799394607544, "logits/rejected": -1.9152275323867798, "logps/chosen": -230.69625854492188, "logps/rejected": -216.72360229492188, "loss": 0.0629, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04468151926994324, "rewards/margins": 0.06710448861122131, "rewards/rejected": -0.11178600788116455, "step": 1500 }, { "epoch": 0.1, "eval_logits/chosen": -2.177633285522461, "eval_logits/rejected": -2.000908136367798, "eval_logps/chosen": -242.75218200683594, "eval_logps/rejected": -237.29566955566406, "eval_loss": 0.03400981053709984, "eval_rewards/accuracies": 0.6439999938011169, "eval_rewards/chosen": -0.053736183792352676, "eval_rewards/margins": 0.0746825784444809, "eval_rewards/rejected": -0.12841875851154327, "eval_runtime": 714.8353, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 1500 }, { "epoch": 0.1, "learning_rate": 4.9378678875081756e-06, "logits/chosen": -2.325528860092163, "logits/rejected": -2.0406839847564697, "logps/chosen": -222.1483917236328, "logps/rejected": -206.7466583251953, "loss": 0.0351, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03702390938997269, "rewards/margins": 0.07803212106227875, "rewards/rejected": -0.11505603790283203, "step": 1510 }, { "epoch": 0.1, "learning_rate": 4.9705689993459784e-06, "logits/chosen": -2.305387496948242, "logits/rejected": -1.8941253423690796, "logps/chosen": -218.213623046875, "logps/rejected": -181.945556640625, "loss": 0.0471, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06644146144390106, "rewards/margins": 0.10722502321004868, "rewards/rejected": -0.17366649210453033, "step": 1520 }, { "epoch": 0.1, "learning_rate": 4.999999934793849e-06, "logits/chosen": -2.258678674697876, "logits/rejected": -2.1702895164489746, "logps/chosen": -255.1768035888672, "logps/rejected": -231.4876708984375, "loss": 0.0354, "rewards/accuracies": 0.5, "rewards/chosen": -0.03611770272254944, "rewards/margins": 0.028636153787374496, "rewards/rejected": -0.06475386023521423, "step": 1530 }, { "epoch": 0.1, "learning_rate": 4.999992110059814e-06, "logits/chosen": -2.24711275100708, "logits/rejected": -2.2306511402130127, "logps/chosen": -278.0412902832031, "logps/rejected": -269.61517333984375, "loss": 0.0262, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0017689462983980775, "rewards/margins": 0.05843646451830864, "rewards/rejected": -0.060205407440662384, "step": 1540 }, { "epoch": 0.1, "learning_rate": 4.999971244142299e-06, "logits/chosen": -2.398763418197632, "logits/rejected": -2.106553554534912, "logps/chosen": -274.9897766113281, "logps/rejected": -249.78518676757812, "loss": 0.0168, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.007683471776545048, "rewards/margins": 0.07020819187164307, "rewards/rejected": -0.07789166271686554, "step": 1550 }, { "epoch": 0.1, "learning_rate": 4.999937337150149e-06, "logits/chosen": -2.0848517417907715, "logits/rejected": -2.0338871479034424, "logps/chosen": -237.2061309814453, "logps/rejected": -231.5194091796875, "loss": 0.0411, "rewards/accuracies": 0.625, "rewards/chosen": -0.013456342741847038, "rewards/margins": 0.048470061272382736, "rewards/rejected": -0.061926405876874924, "step": 1560 }, { "epoch": 0.1, "learning_rate": 4.99989038926024e-06, "logits/chosen": -2.029017925262451, "logits/rejected": -2.1551504135131836, "logps/chosen": -208.94229125976562, "logps/rejected": -225.96371459960938, "loss": 0.0249, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0525711365044117, "rewards/margins": 0.07409703731536865, "rewards/rejected": -0.12666817009449005, "step": 1570 }, { "epoch": 0.1, "learning_rate": 4.999830400717476e-06, "logits/chosen": -2.213772773742676, "logits/rejected": -2.084939479827881, "logps/chosen": -295.6604309082031, "logps/rejected": -295.1983947753906, "loss": 0.0152, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04136674478650093, "rewards/margins": 0.0790671780705452, "rewards/rejected": -0.12043392658233643, "step": 1580 }, { "epoch": 0.1, "learning_rate": 4.999757371834787e-06, "logits/chosen": -2.088376045227051, "logits/rejected": -2.008604049682617, "logps/chosen": -254.0402374267578, "logps/rejected": -268.31842041015625, "loss": 0.021, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07110683619976044, "rewards/margins": 0.1489681601524353, "rewards/rejected": -0.22007498145103455, "step": 1590 }, { "epoch": 0.1, "learning_rate": 4.999671302993125e-06, "logits/chosen": -2.0464565753936768, "logits/rejected": -1.986285924911499, "logps/chosen": -263.01983642578125, "logps/rejected": -294.6103515625, "loss": 0.0313, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07219687849283218, "rewards/margins": 0.08101221174001694, "rewards/rejected": -0.15320907533168793, "step": 1600 }, { "epoch": 0.1, "eval_logits/chosen": -2.2403852939605713, "eval_logits/rejected": -2.057793378829956, "eval_logps/chosen": -244.79440307617188, "eval_logps/rejected": -239.94647216796875, "eval_loss": 0.031098267063498497, "eval_rewards/accuracies": 0.6309999823570251, "eval_rewards/chosen": -0.06394734233617783, "eval_rewards/margins": 0.07772543281316757, "eval_rewards/rejected": -0.1416727900505066, "eval_runtime": 713.2681, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 1600 }, { "epoch": 0.11, "learning_rate": 4.999572194641471e-06, "logits/chosen": -2.2111706733703613, "logits/rejected": -2.0525684356689453, "logps/chosen": -289.5194091796875, "logps/rejected": -258.9083557128906, "loss": 0.0393, "rewards/accuracies": 0.75, "rewards/chosen": -0.07000543922185898, "rewards/margins": 0.109294593334198, "rewards/rejected": -0.17930002510547638, "step": 1610 }, { "epoch": 0.11, "learning_rate": 4.999460047296819e-06, "logits/chosen": -2.1922264099121094, "logits/rejected": -2.0641796588897705, "logps/chosen": -238.1181640625, "logps/rejected": -233.39846801757812, "loss": 0.0221, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11257002502679825, "rewards/margins": 0.08094724267721176, "rewards/rejected": -0.1935172826051712, "step": 1620 }, { "epoch": 0.11, "learning_rate": 4.999334861544186e-06, "logits/chosen": -2.312042474746704, "logits/rejected": -1.9850116968154907, "logps/chosen": -240.84591674804688, "logps/rejected": -206.13882446289062, "loss": 0.038, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05696337670087814, "rewards/margins": 0.10648103058338165, "rewards/rejected": -0.1634444147348404, "step": 1630 }, { "epoch": 0.11, "learning_rate": 4.999196638036604e-06, "logits/chosen": -2.379241943359375, "logits/rejected": -2.183292865753174, "logps/chosen": -300.8702087402344, "logps/rejected": -272.499267578125, "loss": 0.0068, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06727579981088638, "rewards/margins": 0.034327052533626556, "rewards/rejected": -0.10160285234451294, "step": 1640 }, { "epoch": 0.11, "learning_rate": 4.999045377495111e-06, "logits/chosen": -2.0479378700256348, "logits/rejected": -2.301999568939209, "logps/chosen": -191.126708984375, "logps/rejected": -295.76654052734375, "loss": 0.0358, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08396010845899582, "rewards/margins": 0.08428677171468735, "rewards/rejected": -0.16824688017368317, "step": 1650 }, { "epoch": 0.11, "learning_rate": 4.998881080708759e-06, "logits/chosen": -2.2068817615509033, "logits/rejected": -2.130096673965454, "logps/chosen": -253.0929718017578, "logps/rejected": -226.85226440429688, "loss": 0.0386, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0638570636510849, "rewards/margins": 0.02352083846926689, "rewards/rejected": -0.0873778909444809, "step": 1660 }, { "epoch": 0.11, "learning_rate": 4.998703748534599e-06, "logits/chosen": -2.0564498901367188, "logits/rejected": -1.8033416271209717, "logps/chosen": -246.2660675048828, "logps/rejected": -203.96421813964844, "loss": 0.0612, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.02818126417696476, "rewards/margins": 0.041943810880184174, "rewards/rejected": -0.07012508064508438, "step": 1670 }, { "epoch": 0.11, "learning_rate": 4.998513381897683e-06, "logits/chosen": -2.287282943725586, "logits/rejected": -2.057615041732788, "logps/chosen": -242.29696655273438, "logps/rejected": -190.59234619140625, "loss": 0.0348, "rewards/accuracies": 0.5, "rewards/chosen": -0.015266014263033867, "rewards/margins": 0.02644859254360199, "rewards/rejected": -0.041714608669281006, "step": 1680 }, { "epoch": 0.11, "learning_rate": 4.9983099817910565e-06, "logits/chosen": -2.205864191055298, "logits/rejected": -2.0282130241394043, "logps/chosen": -252.55630493164062, "logps/rejected": -259.43914794921875, "loss": 0.0346, "rewards/accuracies": 0.625, "rewards/chosen": -0.036472074687480927, "rewards/margins": 0.056804411113262177, "rewards/rejected": -0.0932764858007431, "step": 1690 }, { "epoch": 0.11, "learning_rate": 4.998093549275754e-06, "logits/chosen": -2.187556505203247, "logits/rejected": -2.182443141937256, "logps/chosen": -263.99151611328125, "logps/rejected": -301.42822265625, "loss": 0.0287, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.01828739047050476, "rewards/margins": 0.07821901887655258, "rewards/rejected": -0.09650642424821854, "step": 1700 }, { "epoch": 0.11, "eval_logits/chosen": -2.286701202392578, "eval_logits/rejected": -2.1022112369537354, "eval_logps/chosen": -237.6215057373047, "eval_logps/rejected": -230.36648559570312, "eval_loss": 0.030306359753012657, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": -0.02808281220495701, "eval_rewards/margins": 0.0656900480389595, "eval_rewards/rejected": -0.09377285838127136, "eval_runtime": 712.8613, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 1700 }, { "epoch": 0.11, "learning_rate": 4.997864085480794e-06, "logits/chosen": -2.333749771118164, "logits/rejected": -2.161771774291992, "logps/chosen": -271.87738037109375, "logps/rejected": -267.2635192871094, "loss": 0.0136, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.027655865997076035, "rewards/margins": 0.06961538642644882, "rewards/rejected": -0.09727124869823456, "step": 1710 }, { "epoch": 0.11, "learning_rate": 4.997621591603171e-06, "logits/chosen": -2.2935166358947754, "logits/rejected": -2.1049695014953613, "logps/chosen": -166.76449584960938, "logps/rejected": -180.1530303955078, "loss": 0.0556, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04365837201476097, "rewards/margins": 0.07036669552326202, "rewards/rejected": -0.1140250712633133, "step": 1720 }, { "epoch": 0.11, "learning_rate": 4.997366068907853e-06, "logits/chosen": -2.275902271270752, "logits/rejected": -2.221653461456299, "logps/chosen": -260.5364074707031, "logps/rejected": -249.9969024658203, "loss": 0.0358, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.010633264668285847, "rewards/margins": 0.046677958220243454, "rewards/rejected": -0.05731121823191643, "step": 1730 }, { "epoch": 0.11, "learning_rate": 4.997097518727771e-06, "logits/chosen": -2.3315846920013428, "logits/rejected": -2.088407039642334, "logps/chosen": -227.2711181640625, "logps/rejected": -207.3436737060547, "loss": 0.0383, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.021573388949036598, "rewards/margins": 0.06597500294446945, "rewards/rejected": -0.0875483900308609, "step": 1740 }, { "epoch": 0.11, "learning_rate": 4.9968159424638155e-06, "logits/chosen": -2.1644344329833984, "logits/rejected": -2.3732848167419434, "logps/chosen": -221.4119415283203, "logps/rejected": -281.27215576171875, "loss": 0.024, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.01902773603796959, "rewards/margins": 0.040075235068798065, "rewards/rejected": -0.059102971106767654, "step": 1750 }, { "epoch": 0.12, "learning_rate": 4.9965213415848235e-06, "logits/chosen": -2.2339000701904297, "logits/rejected": -1.8621151447296143, "logps/chosen": -237.12387084960938, "logps/rejected": -214.28848266601562, "loss": 0.0263, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04359662905335426, "rewards/margins": 0.07012289762496948, "rewards/rejected": -0.11371952295303345, "step": 1760 }, { "epoch": 0.12, "learning_rate": 4.9962137176275805e-06, "logits/chosen": -2.3237321376800537, "logits/rejected": -2.1237401962280273, "logps/chosen": -233.67996215820312, "logps/rejected": -243.418212890625, "loss": 0.0125, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0030772259924560785, "rewards/margins": 0.05148119851946831, "rewards/rejected": -0.04840397089719772, "step": 1770 }, { "epoch": 0.12, "learning_rate": 4.9958930721968015e-06, "logits/chosen": -2.193448543548584, "logits/rejected": -2.2796072959899902, "logps/chosen": -215.48025512695312, "logps/rejected": -235.7574005126953, "loss": 0.0294, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.01722542755305767, "rewards/margins": 0.04387689009308815, "rewards/rejected": -0.061102308332920074, "step": 1780 }, { "epoch": 0.12, "learning_rate": 4.995559406965132e-06, "logits/chosen": -2.4174957275390625, "logits/rejected": -2.064757823944092, "logps/chosen": -232.0088348388672, "logps/rejected": -215.20706176757812, "loss": 0.0262, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.000467650213977322, "rewards/margins": 0.06406942009925842, "rewards/rejected": -0.06453706324100494, "step": 1790 }, { "epoch": 0.12, "learning_rate": 4.995212723673131e-06, "logits/chosen": -2.374816417694092, "logits/rejected": -2.1405506134033203, "logps/chosen": -227.5681610107422, "logps/rejected": -192.76089477539062, "loss": 0.0335, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.013174636289477348, "rewards/margins": 0.07587815076112747, "rewards/rejected": -0.06270351260900497, "step": 1800 }, { "epoch": 0.12, "eval_logits/chosen": -2.3010594844818115, "eval_logits/rejected": -2.115757465362549, "eval_logps/chosen": -231.86740112304688, "eval_logps/rejected": -222.27854919433594, "eval_loss": 0.03156345337629318, "eval_rewards/accuracies": 0.6259999871253967, "eval_rewards/chosen": 0.0006877080886624753, "eval_rewards/margins": 0.054020799696445465, "eval_rewards/rejected": -0.0533330924808979, "eval_runtime": 711.5953, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 1800 }, { "epoch": 0.12, "learning_rate": 4.99485302412927e-06, "logits/chosen": -2.0493340492248535, "logits/rejected": -1.9822384119033813, "logps/chosen": -206.83642578125, "logps/rejected": -222.2565155029297, "loss": 0.0412, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.005873044487088919, "rewards/margins": 0.07363691926002502, "rewards/rejected": -0.06776387244462967, "step": 1810 }, { "epoch": 0.12, "learning_rate": 4.994480310209918e-06, "logits/chosen": -2.2592854499816895, "logits/rejected": -2.417466402053833, "logps/chosen": -239.39797973632812, "logps/rejected": -262.6976623535156, "loss": 0.0275, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.012925192713737488, "rewards/margins": 0.0477270744740963, "rewards/rejected": -0.03480188176035881, "step": 1820 }, { "epoch": 0.12, "learning_rate": 4.994094583859332e-06, "logits/chosen": -2.2714853286743164, "logits/rejected": -2.022733211517334, "logps/chosen": -160.02210998535156, "logps/rejected": -203.35848999023438, "loss": 0.0416, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.02445300668478012, "rewards/margins": 0.05653420090675354, "rewards/rejected": -0.03208119422197342, "step": 1830 }, { "epoch": 0.12, "learning_rate": 4.9936958470896525e-06, "logits/chosen": -2.2610347270965576, "logits/rejected": -2.020301580429077, "logps/chosen": -229.99380493164062, "logps/rejected": -208.194091796875, "loss": 0.0402, "rewards/accuracies": 0.75, "rewards/chosen": -0.03124316595494747, "rewards/margins": 0.10551712661981583, "rewards/rejected": -0.13676029443740845, "step": 1840 }, { "epoch": 0.12, "learning_rate": 4.993284101980883e-06, "logits/chosen": -2.201612949371338, "logits/rejected": -2.017625331878662, "logps/chosen": -261.00958251953125, "logps/rejected": -254.49404907226562, "loss": 0.0373, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09257099777460098, "rewards/margins": 0.16558702290058136, "rewards/rejected": -0.25815802812576294, "step": 1850 }, { "epoch": 0.12, "learning_rate": 4.9928593506808885e-06, "logits/chosen": -2.3184077739715576, "logits/rejected": -2.123654365539551, "logps/chosen": -275.52569580078125, "logps/rejected": -253.30953979492188, "loss": 0.0606, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0935564711689949, "rewards/margins": 0.07380557060241699, "rewards/rejected": -0.1673620492219925, "step": 1860 }, { "epoch": 0.12, "learning_rate": 4.992421595405381e-06, "logits/chosen": -2.2751145362854004, "logits/rejected": -2.027334690093994, "logps/chosen": -239.30380249023438, "logps/rejected": -177.33480834960938, "loss": 0.054, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05555707961320877, "rewards/margins": 0.04438935965299606, "rewards/rejected": -0.09994643181562424, "step": 1870 }, { "epoch": 0.12, "learning_rate": 4.991970838437905e-06, "logits/chosen": -2.213554620742798, "logits/rejected": -2.11810040473938, "logps/chosen": -239.0460968017578, "logps/rejected": -277.94354248046875, "loss": 0.0483, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06858281046152115, "rewards/margins": 0.053963709622621536, "rewards/rejected": -0.12254651635885239, "step": 1880 }, { "epoch": 0.12, "learning_rate": 4.9915070821298294e-06, "logits/chosen": -2.317843198776245, "logits/rejected": -1.995692491531372, "logps/chosen": -180.86282348632812, "logps/rejected": -177.86102294921875, "loss": 0.0228, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.06946498900651932, "rewards/margins": 0.0304332934319973, "rewards/rejected": -0.09989828616380692, "step": 1890 }, { "epoch": 0.12, "learning_rate": 4.991030328900336e-06, "logits/chosen": -2.239023208618164, "logits/rejected": -1.9848921298980713, "logps/chosen": -294.36920166015625, "logps/rejected": -237.84078979492188, "loss": 0.0209, "rewards/accuracies": 0.75, "rewards/chosen": -0.04899997264146805, "rewards/margins": 0.06753884255886078, "rewards/rejected": -0.11653882265090942, "step": 1900 }, { "epoch": 0.12, "eval_logits/chosen": -2.289330244064331, "eval_logits/rejected": -2.104606866836548, "eval_logps/chosen": -244.22193908691406, "eval_logps/rejected": -234.0950164794922, "eval_loss": 0.033284034579992294, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": -0.06108501926064491, "eval_rewards/margins": 0.051330603659152985, "eval_rewards/rejected": -0.1124156191945076, "eval_runtime": 714.8462, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 1900 }, { "epoch": 0.12, "learning_rate": 4.9905405812364014e-06, "logits/chosen": -2.2534711360931396, "logits/rejected": -2.238356828689575, "logps/chosen": -213.00283813476562, "logps/rejected": -226.8634490966797, "loss": 0.0351, "rewards/accuracies": 0.625, "rewards/chosen": -0.05353314429521561, "rewards/margins": 0.05697651952505112, "rewards/rejected": -0.11050967127084732, "step": 1910 }, { "epoch": 0.13, "learning_rate": 4.990037841692791e-06, "logits/chosen": -2.214384078979492, "logits/rejected": -1.9799760580062866, "logps/chosen": -217.17306518554688, "logps/rejected": -192.190185546875, "loss": 0.046, "rewards/accuracies": 0.625, "rewards/chosen": -0.05566094070672989, "rewards/margins": 0.09150851517915726, "rewards/rejected": -0.14716944098472595, "step": 1920 }, { "epoch": 0.13, "learning_rate": 4.989522112892039e-06, "logits/chosen": -2.2701072692871094, "logits/rejected": -2.235013246536255, "logps/chosen": -212.87271118164062, "logps/rejected": -225.2215118408203, "loss": 0.0411, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07621657103300095, "rewards/margins": 0.06619422137737274, "rewards/rejected": -0.1424107849597931, "step": 1930 }, { "epoch": 0.13, "learning_rate": 4.98899339752444e-06, "logits/chosen": -2.3187501430511475, "logits/rejected": -2.0728302001953125, "logps/chosen": -236.3536834716797, "logps/rejected": -225.99551391601562, "loss": 0.0484, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0337567999958992, "rewards/margins": 0.09292588382959366, "rewards/rejected": -0.12668268382549286, "step": 1940 }, { "epoch": 0.13, "learning_rate": 4.988451698348033e-06, "logits/chosen": -2.243349552154541, "logits/rejected": -2.227822780609131, "logps/chosen": -185.9561309814453, "logps/rejected": -213.9745330810547, "loss": 0.0379, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.038142912089824677, "rewards/margins": 0.03874513879418373, "rewards/rejected": -0.0768880546092987, "step": 1950 }, { "epoch": 0.13, "learning_rate": 4.987897018188585e-06, "logits/chosen": -2.235739231109619, "logits/rejected": -1.9942162036895752, "logps/chosen": -229.75186157226562, "logps/rejected": -184.48373413085938, "loss": 0.0256, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03566255420446396, "rewards/margins": 0.05703993886709213, "rewards/rejected": -0.09270249307155609, "step": 1960 }, { "epoch": 0.13, "learning_rate": 4.9873293599395814e-06, "logits/chosen": -2.2663984298706055, "logits/rejected": -2.1350181102752686, "logps/chosen": -200.5733184814453, "logps/rejected": -212.0470733642578, "loss": 0.0495, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03570770472288132, "rewards/margins": 0.07728613913059235, "rewards/rejected": -0.11299383640289307, "step": 1970 }, { "epoch": 0.13, "learning_rate": 4.986748726562203e-06, "logits/chosen": -2.293747663497925, "logits/rejected": -2.1642115116119385, "logps/chosen": -221.1876983642578, "logps/rejected": -205.5155029296875, "loss": 0.0261, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.026666143909096718, "rewards/margins": 0.046100765466690063, "rewards/rejected": -0.07276691496372223, "step": 1980 }, { "epoch": 0.13, "learning_rate": 4.98615512108532e-06, "logits/chosen": -2.384824275970459, "logits/rejected": -2.2395741939544678, "logps/chosen": -225.223388671875, "logps/rejected": -237.7670135498047, "loss": 0.0459, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.018563272431492805, "rewards/margins": 0.05380113795399666, "rewards/rejected": -0.07236441224813461, "step": 1990 }, { "epoch": 0.13, "learning_rate": 4.985548546605469e-06, "logits/chosen": -2.1302530765533447, "logits/rejected": -2.267808437347412, "logps/chosen": -227.83688354492188, "logps/rejected": -253.89321899414062, "loss": 0.0183, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07365544140338898, "rewards/margins": 0.0420791395008564, "rewards/rejected": -0.11573459208011627, "step": 2000 }, { "epoch": 0.13, "eval_logits/chosen": -2.3084776401519775, "eval_logits/rejected": -2.121295928955078, "eval_logps/chosen": -244.44662475585938, "eval_logps/rejected": -238.9348907470703, "eval_loss": 0.030150586739182472, "eval_rewards/accuracies": 0.6499999761581421, "eval_rewards/chosen": -0.062208425253629684, "eval_rewards/margins": 0.07440651953220367, "eval_rewards/rejected": -0.13661494851112366, "eval_runtime": 712.1385, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 2000 }, { "epoch": 0.13, "learning_rate": 4.984929006286838e-06, "logits/chosen": -2.146847724914551, "logits/rejected": -2.093820333480835, "logps/chosen": -222.1940460205078, "logps/rejected": -230.22390747070312, "loss": 0.0533, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.06440014392137527, "rewards/margins": 0.022867945954203606, "rewards/rejected": -0.08726808428764343, "step": 2010 }, { "epoch": 0.13, "learning_rate": 4.984296503361256e-06, "logits/chosen": -2.3741021156311035, "logits/rejected": -2.0217337608337402, "logps/chosen": -210.68020629882812, "logps/rejected": -181.44540405273438, "loss": 0.0185, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04380156099796295, "rewards/margins": 0.06384517252445221, "rewards/rejected": -0.10764674842357635, "step": 2020 }, { "epoch": 0.13, "learning_rate": 4.9836510411281645e-06, "logits/chosen": -2.2192230224609375, "logits/rejected": -2.1009650230407715, "logps/chosen": -286.519287109375, "logps/rejected": -271.2211608886719, "loss": 0.028, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.03993731737136841, "rewards/margins": 0.12052154541015625, "rewards/rejected": -0.16045884788036346, "step": 2030 }, { "epoch": 0.13, "learning_rate": 4.982992622954613e-06, "logits/chosen": -2.352003812789917, "logits/rejected": -2.058192729949951, "logps/chosen": -291.65899658203125, "logps/rejected": -192.23446655273438, "loss": 0.0452, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0406540110707283, "rewards/margins": 0.07372018694877625, "rewards/rejected": -0.11437419801950455, "step": 2040 }, { "epoch": 0.13, "learning_rate": 4.9823212522752325e-06, "logits/chosen": -2.445817470550537, "logits/rejected": -2.1775293350219727, "logps/chosen": -280.30621337890625, "logps/rejected": -267.19482421875, "loss": 0.0254, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.012462446466088295, "rewards/margins": 0.10462311655282974, "rewards/rejected": -0.11708555370569229, "step": 2050 }, { "epoch": 0.13, "learning_rate": 4.981636932592222e-06, "logits/chosen": -2.201385021209717, "logits/rejected": -2.096060276031494, "logps/chosen": -209.18569946289062, "logps/rejected": -218.5018768310547, "loss": 0.0178, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.018636580556631088, "rewards/margins": 0.06668587028980255, "rewards/rejected": -0.04804928973317146, "step": 2060 }, { "epoch": 0.14, "learning_rate": 4.980939667475328e-06, "logits/chosen": -2.4133598804473877, "logits/rejected": -2.060161590576172, "logps/chosen": -271.41278076171875, "logps/rejected": -221.82763671875, "loss": 0.0222, "rewards/accuracies": 0.625, "rewards/chosen": 0.01381192822009325, "rewards/margins": 0.055034469813108444, "rewards/rejected": -0.041222553700208664, "step": 2070 }, { "epoch": 0.14, "learning_rate": 4.980229460561826e-06, "logits/chosen": -2.2730422019958496, "logits/rejected": -2.2006144523620605, "logps/chosen": -217.75338745117188, "logps/rejected": -225.47537231445312, "loss": 0.0177, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.001639772206544876, "rewards/margins": 0.11514203250408173, "rewards/rejected": -0.11350226402282715, "step": 2080 }, { "epoch": 0.14, "learning_rate": 4.979506315556503e-06, "logits/chosen": -2.2308144569396973, "logits/rejected": -1.9013233184814453, "logps/chosen": -286.318359375, "logps/rejected": -250.24365234375, "loss": 0.013, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0008690860122442245, "rewards/margins": 0.07456686347723007, "rewards/rejected": -0.0736977756023407, "step": 2090 }, { "epoch": 0.14, "learning_rate": 4.9787702362316395e-06, "logits/chosen": -2.2909300327301025, "logits/rejected": -2.505516529083252, "logps/chosen": -195.52487182617188, "logps/rejected": -230.25, "loss": 0.0235, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.02348853088915348, "rewards/margins": 0.04605535790324211, "rewards/rejected": -0.06954388320446014, "step": 2100 }, { "epoch": 0.14, "eval_logits/chosen": -2.278724431991577, "eval_logits/rejected": -2.0933449268341064, "eval_logps/chosen": -239.6732940673828, "eval_logps/rejected": -234.6192626953125, "eval_loss": 0.028895698487758636, "eval_rewards/accuracies": 0.6474999785423279, "eval_rewards/chosen": -0.0383417047560215, "eval_rewards/margins": 0.07669514417648315, "eval_rewards/rejected": -0.11503685265779495, "eval_runtime": 712.4011, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 2100 }, { "epoch": 0.14, "learning_rate": 4.9780212264269835e-06, "logits/chosen": -2.210439443588257, "logits/rejected": -1.9650099277496338, "logps/chosen": -195.4822998046875, "logps/rejected": -188.45339965820312, "loss": 0.0189, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05331619456410408, "rewards/margins": 0.05053389072418213, "rewards/rejected": -0.10385008901357651, "step": 2110 }, { "epoch": 0.14, "learning_rate": 4.977259290049739e-06, "logits/chosen": -2.466681480407715, "logits/rejected": -1.907947301864624, "logps/chosen": -293.2063293457031, "logps/rejected": -251.5894012451172, "loss": 0.0152, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.031319208443164825, "rewards/margins": 0.13950778543949127, "rewards/rejected": -0.1708270013332367, "step": 2120 }, { "epoch": 0.14, "learning_rate": 4.976484431074538e-06, "logits/chosen": -2.2047533988952637, "logits/rejected": -2.1507277488708496, "logps/chosen": -201.1926727294922, "logps/rejected": -195.83145141601562, "loss": 0.051, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03693586587905884, "rewards/margins": 0.06120552867650986, "rewards/rejected": -0.09814140945672989, "step": 2130 }, { "epoch": 0.14, "learning_rate": 4.975696653543425e-06, "logits/chosen": -2.2936761379241943, "logits/rejected": -2.0483224391937256, "logps/chosen": -258.71966552734375, "logps/rejected": -264.5902404785156, "loss": 0.0289, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04535426199436188, "rewards/margins": 0.09219890832901001, "rewards/rejected": -0.13755318522453308, "step": 2140 }, { "epoch": 0.14, "learning_rate": 4.974895961565835e-06, "logits/chosen": -2.20737886428833, "logits/rejected": -1.882495641708374, "logps/chosen": -188.53988647460938, "logps/rejected": -210.54006958007812, "loss": 0.0309, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06644473224878311, "rewards/margins": 0.08386100828647614, "rewards/rejected": -0.15030571818351746, "step": 2150 }, { "epoch": 0.14, "learning_rate": 4.974082359318566e-06, "logits/chosen": -2.206138849258423, "logits/rejected": -2.036780595779419, "logps/chosen": -264.5321960449219, "logps/rejected": -241.1393585205078, "loss": 0.0221, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.054410744458436966, "rewards/margins": 0.11043145507574081, "rewards/rejected": -0.16484220325946808, "step": 2160 }, { "epoch": 0.14, "learning_rate": 4.973255851045769e-06, "logits/chosen": -2.209059238433838, "logits/rejected": -2.216773509979248, "logps/chosen": -225.8814697265625, "logps/rejected": -204.2803955078125, "loss": 0.038, "rewards/accuracies": 0.625, "rewards/chosen": -0.04261628910899162, "rewards/margins": 0.07573209702968597, "rewards/rejected": -0.11834839731454849, "step": 2170 }, { "epoch": 0.14, "learning_rate": 4.972416441058915e-06, "logits/chosen": -2.144322633743286, "logits/rejected": -1.9922664165496826, "logps/chosen": -242.1470947265625, "logps/rejected": -235.1527862548828, "loss": 0.0328, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06195644289255142, "rewards/margins": 0.09823786467313766, "rewards/rejected": -0.1601943075656891, "step": 2180 }, { "epoch": 0.14, "learning_rate": 4.971564133736777e-06, "logits/chosen": -2.0853326320648193, "logits/rejected": -1.9092786312103271, "logps/chosen": -185.86534118652344, "logps/rejected": -210.11111450195312, "loss": 0.0469, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03932350128889084, "rewards/margins": 0.09632328152656555, "rewards/rejected": -0.135646790266037, "step": 2190 }, { "epoch": 0.14, "learning_rate": 4.970698933525409e-06, "logits/chosen": -2.394420862197876, "logits/rejected": -2.1105847358703613, "logps/chosen": -295.5723571777344, "logps/rejected": -272.0895080566406, "loss": 0.0401, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0811271220445633, "rewards/margins": 0.08094374090433121, "rewards/rejected": -0.1620708405971527, "step": 2200 }, { "epoch": 0.14, "eval_logits/chosen": -2.2756831645965576, "eval_logits/rejected": -2.089794397354126, "eval_logps/chosen": -243.54811096191406, "eval_logps/rejected": -238.95559692382812, "eval_loss": 0.02840990014374256, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": -0.057715822011232376, "eval_rewards/margins": 0.07900260388851166, "eval_rewards/rejected": -0.13671842217445374, "eval_runtime": 711.0099, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.406, "step": 2200 }, { "epoch": 0.14, "learning_rate": 4.969820844938118e-06, "logits/chosen": -2.3965530395507812, "logits/rejected": -2.073350429534912, "logps/chosen": -232.9188232421875, "logps/rejected": -195.14671325683594, "loss": 0.0208, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05157778412103653, "rewards/margins": 0.08065281808376312, "rewards/rejected": -0.13223060965538025, "step": 2210 }, { "epoch": 0.15, "learning_rate": 4.968929872555444e-06, "logits/chosen": -1.928214430809021, "logits/rejected": -2.0975098609924316, "logps/chosen": -236.0997772216797, "logps/rejected": -281.67559814453125, "loss": 0.033, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09482688456773758, "rewards/margins": 0.049563802778720856, "rewards/rejected": -0.14439070224761963, "step": 2220 }, { "epoch": 0.15, "learning_rate": 4.968026021025137e-06, "logits/chosen": -2.3393139839172363, "logits/rejected": -2.1214444637298584, "logps/chosen": -209.51931762695312, "logps/rejected": -189.69772338867188, "loss": 0.0226, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.039772000163793564, "rewards/margins": 0.1012188047170639, "rewards/rejected": -0.14099080860614777, "step": 2230 }, { "epoch": 0.15, "learning_rate": 4.967109295062128e-06, "logits/chosen": -2.1921162605285645, "logits/rejected": -1.9794528484344482, "logps/chosen": -234.14883422851562, "logps/rejected": -273.10064697265625, "loss": 0.0219, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04573686048388481, "rewards/margins": 0.09015445411205292, "rewards/rejected": -0.13589131832122803, "step": 2240 }, { "epoch": 0.15, "learning_rate": 4.966179699448509e-06, "logits/chosen": -2.1765666007995605, "logits/rejected": -1.9855120182037354, "logps/chosen": -201.95956420898438, "logps/rejected": -190.47109985351562, "loss": 0.042, "rewards/accuracies": 0.5, "rewards/chosen": -0.05602306127548218, "rewards/margins": 0.03168831020593643, "rewards/rejected": -0.0877113789319992, "step": 2250 }, { "epoch": 0.15, "learning_rate": 4.965237239033506e-06, "logits/chosen": -2.3550071716308594, "logits/rejected": -2.1704366207122803, "logps/chosen": -295.1136169433594, "logps/rejected": -280.4603271484375, "loss": 0.0318, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02727457322180271, "rewards/margins": 0.150642991065979, "rewards/rejected": -0.17791756987571716, "step": 2260 }, { "epoch": 0.15, "learning_rate": 4.964281918733453e-06, "logits/chosen": -2.2895469665527344, "logits/rejected": -2.046518325805664, "logps/chosen": -194.8838653564453, "logps/rejected": -209.9123077392578, "loss": 0.0393, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05884706974029541, "rewards/margins": 0.09742596000432968, "rewards/rejected": -0.1562730371952057, "step": 2270 }, { "epoch": 0.15, "learning_rate": 4.9633137435317715e-06, "logits/chosen": -2.292795181274414, "logits/rejected": -1.662411093711853, "logps/chosen": -234.4496612548828, "logps/rejected": -186.72970581054688, "loss": 0.0241, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05777313560247421, "rewards/margins": 0.10032601654529572, "rewards/rejected": -0.15809914469718933, "step": 2280 }, { "epoch": 0.15, "learning_rate": 4.9623327184789355e-06, "logits/chosen": -2.3812899589538574, "logits/rejected": -2.300156831741333, "logps/chosen": -227.98934936523438, "logps/rejected": -233.48876953125, "loss": 0.0191, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05124448984861374, "rewards/margins": 0.05162835866212845, "rewards/rejected": -0.10287284851074219, "step": 2290 }, { "epoch": 0.15, "learning_rate": 4.9613388486924525e-06, "logits/chosen": -1.9750115871429443, "logits/rejected": -2.1227240562438965, "logps/chosen": -191.4607391357422, "logps/rejected": -226.56640625, "loss": 0.0257, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05663357302546501, "rewards/margins": 0.088627889752388, "rewards/rejected": -0.1452614665031433, "step": 2300 }, { "epoch": 0.15, "eval_logits/chosen": -2.2834084033966064, "eval_logits/rejected": -2.0974628925323486, "eval_logps/chosen": -236.52151489257812, "eval_logps/rejected": -233.99488830566406, "eval_loss": 0.03041422739624977, "eval_rewards/accuracies": 0.6299999952316284, "eval_rewards/chosen": -0.022582821547985077, "eval_rewards/margins": 0.08933208882808685, "eval_rewards/rejected": -0.11191490292549133, "eval_runtime": 712.7513, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 2300 }, { "epoch": 0.15, "learning_rate": 4.960332139356834e-06, "logits/chosen": -2.243394136428833, "logits/rejected": -2.0578529834747314, "logps/chosen": -214.17446899414062, "logps/rejected": -208.0134735107422, "loss": 0.0534, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.026787936687469482, "rewards/margins": 0.10139461606740952, "rewards/rejected": -0.1281825602054596, "step": 2310 }, { "epoch": 0.15, "learning_rate": 4.95931259572357e-06, "logits/chosen": -2.3460609912872314, "logits/rejected": -1.9933313131332397, "logps/chosen": -236.15878295898438, "logps/rejected": -278.4571838378906, "loss": 0.0318, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.009593973867595196, "rewards/margins": 0.08312395960092545, "rewards/rejected": -0.09271793067455292, "step": 2320 }, { "epoch": 0.15, "learning_rate": 4.9582802231111e-06, "logits/chosen": -2.1679487228393555, "logits/rejected": -2.2463414669036865, "logps/chosen": -211.53768920898438, "logps/rejected": -200.51268005371094, "loss": 0.0361, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.001682019210420549, "rewards/margins": 0.0792674571275711, "rewards/rejected": -0.07758542895317078, "step": 2330 }, { "epoch": 0.15, "learning_rate": 4.957235026904782e-06, "logits/chosen": -2.340217113494873, "logits/rejected": -2.0187582969665527, "logps/chosen": -258.4695129394531, "logps/rejected": -216.89511108398438, "loss": 0.0192, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.018414536491036415, "rewards/margins": 0.04770408570766449, "rewards/rejected": -0.029289543628692627, "step": 2340 }, { "epoch": 0.15, "learning_rate": 4.956177012556875e-06, "logits/chosen": -2.4300289154052734, "logits/rejected": -2.1952102184295654, "logps/chosen": -245.9125213623047, "logps/rejected": -190.65444946289062, "loss": 0.0307, "rewards/accuracies": 0.625, "rewards/chosen": -0.008598506450653076, "rewards/margins": 0.08154983818531036, "rewards/rejected": -0.09014834463596344, "step": 2350 }, { "epoch": 0.15, "learning_rate": 4.9551061855864976e-06, "logits/chosen": -2.0655150413513184, "logits/rejected": -2.118483066558838, "logps/chosen": -196.32266235351562, "logps/rejected": -212.44137573242188, "loss": 0.0327, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.01747271604835987, "rewards/margins": 0.0595405288040638, "rewards/rejected": -0.07701323926448822, "step": 2360 }, { "epoch": 0.16, "learning_rate": 4.95402255157961e-06, "logits/chosen": -2.1108739376068115, "logits/rejected": -2.167656898498535, "logps/chosen": -183.8750762939453, "logps/rejected": -268.9769287109375, "loss": 0.035, "rewards/accuracies": 0.625, "rewards/chosen": 0.005905964411795139, "rewards/margins": 0.10277839750051498, "rewards/rejected": -0.09687243402004242, "step": 2370 }, { "epoch": 0.16, "learning_rate": 4.952926116188977e-06, "logits/chosen": -2.406338691711426, "logits/rejected": -2.3419036865234375, "logps/chosen": -189.09738159179688, "logps/rejected": -236.28744506835938, "loss": 0.0557, "rewards/accuracies": 0.5, "rewards/chosen": -0.028600236400961876, "rewards/margins": 0.039927661418914795, "rewards/rejected": -0.06852789968252182, "step": 2380 }, { "epoch": 0.16, "learning_rate": 4.951816885134143e-06, "logits/chosen": -2.2776050567626953, "logits/rejected": -2.2614612579345703, "logps/chosen": -205.91110229492188, "logps/rejected": -219.66207885742188, "loss": 0.0334, "rewards/accuracies": 0.625, "rewards/chosen": -0.02179075963795185, "rewards/margins": 0.07305854558944702, "rewards/rejected": -0.09484930336475372, "step": 2390 }, { "epoch": 0.16, "learning_rate": 4.950694864201399e-06, "logits/chosen": -2.29520845413208, "logits/rejected": -2.2112789154052734, "logps/chosen": -237.30136108398438, "logps/rejected": -254.004638671875, "loss": 0.0339, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.006902826018631458, "rewards/margins": 0.07287013530731201, "rewards/rejected": -0.06596730649471283, "step": 2400 }, { "epoch": 0.16, "eval_logits/chosen": -2.317573070526123, "eval_logits/rejected": -2.1317877769470215, "eval_logps/chosen": -230.5072784423828, "eval_logps/rejected": -222.4461212158203, "eval_loss": 0.030627042055130005, "eval_rewards/accuracies": 0.6349999904632568, "eval_rewards/chosen": 0.0074883149936795235, "eval_rewards/margins": 0.06165945902466774, "eval_rewards/rejected": -0.05417114123702049, "eval_runtime": 714.8382, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 2400 }, { "epoch": 0.16, "learning_rate": 4.9495600592437575e-06, "logits/chosen": -2.366551399230957, "logits/rejected": -2.2003931999206543, "logps/chosen": -234.5296173095703, "logps/rejected": -245.6568145751953, "loss": 0.0414, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.038175903260707855, "rewards/margins": 0.03972792625427246, "rewards/rejected": -0.07790382206439972, "step": 2410 }, { "epoch": 0.16, "learning_rate": 4.948412476180917e-06, "logits/chosen": -2.272407054901123, "logits/rejected": -1.991207480430603, "logps/chosen": -190.45693969726562, "logps/rejected": -184.49008178710938, "loss": 0.0254, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.031905245035886765, "rewards/margins": 0.08384934067726135, "rewards/rejected": -0.11575458198785782, "step": 2420 }, { "epoch": 0.16, "learning_rate": 4.947252120999232e-06, "logits/chosen": -2.3069729804992676, "logits/rejected": -2.0320534706115723, "logps/chosen": -272.39654541015625, "logps/rejected": -220.7366180419922, "loss": 0.0386, "rewards/accuracies": 0.5, "rewards/chosen": -0.02645879052579403, "rewards/margins": 0.03556925058364868, "rewards/rejected": -0.06202805042266846, "step": 2430 }, { "epoch": 0.16, "learning_rate": 4.946078999751683e-06, "logits/chosen": -2.2218480110168457, "logits/rejected": -2.1495237350463867, "logps/chosen": -176.87838745117188, "logps/rejected": -168.43165588378906, "loss": 0.0289, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.013743760995566845, "rewards/margins": 0.06554745137691498, "rewards/rejected": -0.051803696900606155, "step": 2440 }, { "epoch": 0.16, "learning_rate": 4.944893118557847e-06, "logits/chosen": -2.143193244934082, "logits/rejected": -2.1038451194763184, "logps/chosen": -206.0088348388672, "logps/rejected": -165.9881591796875, "loss": 0.0371, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.002378863049671054, "rewards/margins": 0.06834892183542252, "rewards/rejected": -0.06597007066011429, "step": 2450 }, { "epoch": 0.16, "learning_rate": 4.943694483603861e-06, "logits/chosen": -2.440500259399414, "logits/rejected": -2.0264945030212402, "logps/chosen": -224.66738891601562, "logps/rejected": -187.57005310058594, "loss": 0.0249, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.006683288607746363, "rewards/margins": 0.06940829753875732, "rewards/rejected": -0.0627250224351883, "step": 2460 }, { "epoch": 0.16, "learning_rate": 4.9424831011423914e-06, "logits/chosen": -2.3970818519592285, "logits/rejected": -2.305779218673706, "logps/chosen": -292.70465087890625, "logps/rejected": -259.5212097167969, "loss": 0.0325, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.021390587091445923, "rewards/margins": 0.019155841320753098, "rewards/rejected": -0.04054642841219902, "step": 2470 }, { "epoch": 0.16, "learning_rate": 4.9412589774926015e-06, "logits/chosen": -2.3957889080047607, "logits/rejected": -2.0970237255096436, "logps/chosen": -278.4657287597656, "logps/rejected": -246.1154327392578, "loss": 0.0562, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.021379968151450157, "rewards/margins": 0.0913659855723381, "rewards/rejected": -0.11274596303701401, "step": 2480 }, { "epoch": 0.16, "learning_rate": 4.940022119040121e-06, "logits/chosen": -2.4432716369628906, "logits/rejected": -2.1419849395751953, "logps/chosen": -291.38311767578125, "logps/rejected": -272.6324462890625, "loss": 0.0321, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.002664142055436969, "rewards/margins": 0.04533766582608223, "rewards/rejected": -0.04267352074384689, "step": 2490 }, { "epoch": 0.16, "learning_rate": 4.93877253223701e-06, "logits/chosen": -2.3866477012634277, "logits/rejected": -2.1521363258361816, "logps/chosen": -288.61883544921875, "logps/rejected": -267.2017822265625, "loss": 0.0132, "rewards/accuracies": 0.75, "rewards/chosen": 0.018266785889863968, "rewards/margins": 0.05077819898724556, "rewards/rejected": -0.03251141309738159, "step": 2500 }, { "epoch": 0.16, "eval_logits/chosen": -2.3256454467773438, "eval_logits/rejected": -2.139024496078491, "eval_logps/chosen": -232.43704223632812, "eval_logps/rejected": -223.72633361816406, "eval_loss": 0.03118470311164856, "eval_rewards/accuracies": 0.6349999904632568, "eval_rewards/chosen": -0.002160488162189722, "eval_rewards/margins": 0.0584116131067276, "eval_rewards/rejected": -0.060572102665901184, "eval_runtime": 711.145, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 2500 }, { "epoch": 0.16, "learning_rate": 4.937510223601725e-06, "logits/chosen": -2.5465502738952637, "logits/rejected": -2.379866123199463, "logps/chosen": -258.18267822265625, "logps/rejected": -223.7237091064453, "loss": 0.0302, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0217165295034647, "rewards/margins": 0.038762204349040985, "rewards/rejected": -0.01704566739499569, "step": 2510 }, { "epoch": 0.16, "learning_rate": 4.936235199719085e-06, "logits/chosen": -2.3703575134277344, "logits/rejected": -2.2414231300354004, "logps/chosen": -171.20152282714844, "logps/rejected": -155.46945190429688, "loss": 0.0306, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.005502406507730484, "rewards/margins": 0.07223564386367798, "rewards/rejected": -0.0667332261800766, "step": 2520 }, { "epoch": 0.17, "learning_rate": 4.93494746724024e-06, "logits/chosen": -2.3545944690704346, "logits/rejected": -2.0969889163970947, "logps/chosen": -224.9967498779297, "logps/rejected": -256.32989501953125, "loss": 0.0275, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.005961322691291571, "rewards/margins": 0.06936588883399963, "rewards/rejected": -0.07532721757888794, "step": 2530 }, { "epoch": 0.17, "learning_rate": 4.933647032882635e-06, "logits/chosen": -2.496915340423584, "logits/rejected": -2.186249256134033, "logps/chosen": -244.2103729248047, "logps/rejected": -214.40121459960938, "loss": 0.0251, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0032573645003139973, "rewards/margins": 0.07305190712213516, "rewards/rejected": -0.06979455053806305, "step": 2540 }, { "epoch": 0.17, "learning_rate": 4.932333903429969e-06, "logits/chosen": -2.1586389541625977, "logits/rejected": -2.011641263961792, "logps/chosen": -192.63626098632812, "logps/rejected": -168.6728515625, "loss": 0.0251, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.002231207210570574, "rewards/margins": 0.009213193319737911, "rewards/rejected": -0.006981985177844763, "step": 2550 }, { "epoch": 0.17, "learning_rate": 4.931008085732172e-06, "logits/chosen": -2.3497612476348877, "logits/rejected": -1.9341261386871338, "logps/chosen": -200.062255859375, "logps/rejected": -161.38348388671875, "loss": 0.0282, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0138033302500844, "rewards/margins": 0.056612949818372726, "rewards/rejected": -0.0428096204996109, "step": 2560 }, { "epoch": 0.17, "learning_rate": 4.9296695867053565e-06, "logits/chosen": -2.30879807472229, "logits/rejected": -2.090181827545166, "logps/chosen": -297.36773681640625, "logps/rejected": -243.02755737304688, "loss": 0.0177, "rewards/accuracies": 0.75, "rewards/chosen": 0.016169043257832527, "rewards/margins": 0.05809883400797844, "rewards/rejected": -0.041929781436920166, "step": 2570 }, { "epoch": 0.17, "learning_rate": 4.928318413331791e-06, "logits/chosen": -2.3878073692321777, "logits/rejected": -2.1812081336975098, "logps/chosen": -211.0451202392578, "logps/rejected": -205.4989776611328, "loss": 0.0377, "rewards/accuracies": 0.5, "rewards/chosen": 0.000909944239538163, "rewards/margins": 0.05219554901123047, "rewards/rejected": -0.051285602152347565, "step": 2580 }, { "epoch": 0.17, "learning_rate": 4.926954572659855e-06, "logits/chosen": -2.166869878768921, "logits/rejected": -2.1766741275787354, "logps/chosen": -237.266357421875, "logps/rejected": -273.09771728515625, "loss": 0.0334, "rewards/accuracies": 0.75, "rewards/chosen": 0.024646395817399025, "rewards/margins": 0.09322737157344818, "rewards/rejected": -0.06858097016811371, "step": 2590 }, { "epoch": 0.17, "learning_rate": 4.925578071804013e-06, "logits/chosen": -2.1840600967407227, "logits/rejected": -2.1330018043518066, "logps/chosen": -234.78652954101562, "logps/rejected": -307.0347900390625, "loss": 0.0196, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.025403816252946854, "rewards/margins": 0.07418211549520493, "rewards/rejected": -0.09958592802286148, "step": 2600 }, { "epoch": 0.17, "eval_logits/chosen": -2.287087917327881, "eval_logits/rejected": -2.1024510860443115, "eval_logps/chosen": -233.3758544921875, "eval_logps/rejected": -227.7709503173828, "eval_loss": 0.028082743287086487, "eval_rewards/accuracies": 0.6499999761581421, "eval_rewards/chosen": -0.006854598876088858, "eval_rewards/margins": 0.07394073158502579, "eval_rewards/rejected": -0.08079533278942108, "eval_runtime": 714.8887, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 2600 }, { "epoch": 0.17, "learning_rate": 4.924188917944763e-06, "logits/chosen": -2.354065418243408, "logits/rejected": -2.1704587936401367, "logps/chosen": -219.37435913085938, "logps/rejected": -207.4625701904297, "loss": 0.0297, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.003153681056573987, "rewards/margins": 0.10804332792758942, "rewards/rejected": -0.1048896461725235, "step": 2610 }, { "epoch": 0.17, "learning_rate": 4.922787118328617e-06, "logits/chosen": -2.376201629638672, "logits/rejected": -2.0523414611816406, "logps/chosen": -235.42984008789062, "logps/rejected": -170.25169372558594, "loss": 0.0351, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.027060797438025475, "rewards/margins": 0.055673640221357346, "rewards/rejected": -0.08273444324731827, "step": 2620 }, { "epoch": 0.17, "learning_rate": 4.921372680268045e-06, "logits/chosen": -2.3491272926330566, "logits/rejected": -2.028884172439575, "logps/chosen": -239.7322540283203, "logps/rejected": -218.17904663085938, "loss": 0.0337, "rewards/accuracies": 0.5, "rewards/chosen": -0.048637766391038895, "rewards/margins": 0.044077835977077484, "rewards/rejected": -0.09271560609340668, "step": 2630 }, { "epoch": 0.17, "learning_rate": 4.919945611141451e-06, "logits/chosen": -2.4218690395355225, "logits/rejected": -2.043612003326416, "logps/chosen": -225.4250030517578, "logps/rejected": -177.67538452148438, "loss": 0.0324, "rewards/accuracies": 0.625, "rewards/chosen": -0.0009518010774627328, "rewards/margins": 0.05384860187768936, "rewards/rejected": -0.054800402373075485, "step": 2640 }, { "epoch": 0.17, "learning_rate": 4.918505918393125e-06, "logits/chosen": -2.279812812805176, "logits/rejected": -2.1329543590545654, "logps/chosen": -170.7287139892578, "logps/rejected": -207.5980224609375, "loss": 0.0429, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.00849010981619358, "rewards/margins": 0.06747962534427643, "rewards/rejected": -0.07596974074840546, "step": 2650 }, { "epoch": 0.17, "learning_rate": 4.91705360953321e-06, "logits/chosen": -2.3256969451904297, "logits/rejected": -2.0999441146850586, "logps/chosen": -254.2303924560547, "logps/rejected": -237.78878784179688, "loss": 0.0325, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04371384158730507, "rewards/margins": 0.08106885105371475, "rewards/rejected": -0.12478268146514893, "step": 2660 }, { "epoch": 0.17, "learning_rate": 4.9155886921376615e-06, "logits/chosen": -2.2445991039276123, "logits/rejected": -2.2022550106048584, "logps/chosen": -216.46865844726562, "logps/rejected": -250.0487060546875, "loss": 0.0522, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05250120908021927, "rewards/margins": 0.05955871194601059, "rewards/rejected": -0.11205992847681046, "step": 2670 }, { "epoch": 0.18, "learning_rate": 4.914111173848205e-06, "logits/chosen": -2.3046090602874756, "logits/rejected": -2.2570502758026123, "logps/chosen": -238.754150390625, "logps/rejected": -233.6313934326172, "loss": 0.0217, "rewards/accuracies": 0.625, "rewards/chosen": -0.028612712398171425, "rewards/margins": 0.05147603899240494, "rewards/rejected": -0.08008874207735062, "step": 2680 }, { "epoch": 0.18, "learning_rate": 4.9126210623723e-06, "logits/chosen": -2.085294246673584, "logits/rejected": -2.254727840423584, "logps/chosen": -202.56884765625, "logps/rejected": -253.7330322265625, "loss": 0.023, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.010977232828736305, "rewards/margins": 0.08696094900369644, "rewards/rejected": -0.0979381650686264, "step": 2690 }, { "epoch": 0.18, "learning_rate": 4.911118365483098e-06, "logits/chosen": -2.181164264678955, "logits/rejected": -2.2666800022125244, "logps/chosen": -209.20834350585938, "logps/rejected": -235.704833984375, "loss": 0.0317, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0372706837952137, "rewards/margins": 0.08535125106573105, "rewards/rejected": -0.12262193113565445, "step": 2700 }, { "epoch": 0.18, "eval_logits/chosen": -2.294182062149048, "eval_logits/rejected": -2.1089656352996826, "eval_logps/chosen": -238.58056640625, "eval_logps/rejected": -232.78578186035156, "eval_loss": 0.027993008494377136, "eval_rewards/accuracies": 0.6545000076293945, "eval_rewards/chosen": -0.03287803754210472, "eval_rewards/margins": 0.07299138605594635, "eval_rewards/rejected": -0.10586943477392197, "eval_runtime": 713.5186, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 2700 }, { "epoch": 0.18, "learning_rate": 4.909603091019403e-06, "logits/chosen": -2.470644235610962, "logits/rejected": -2.1014533042907715, "logps/chosen": -237.65869140625, "logps/rejected": -215.0699005126953, "loss": 0.0133, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.006406778935343027, "rewards/margins": 0.0701574832201004, "rewards/rejected": -0.07656426727771759, "step": 2710 }, { "epoch": 0.18, "learning_rate": 4.908075246885626e-06, "logits/chosen": -2.247979164123535, "logits/rejected": -2.1756398677825928, "logps/chosen": -155.73165893554688, "logps/rejected": -134.16079711914062, "loss": 0.0646, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02502075955271721, "rewards/margins": 0.0291023887693882, "rewards/rejected": -0.054123152047395706, "step": 2720 }, { "epoch": 0.18, "learning_rate": 4.906534841051755e-06, "logits/chosen": -2.1049163341522217, "logits/rejected": -2.167664051055908, "logps/chosen": -247.0775604248047, "logps/rejected": -268.4331970214844, "loss": 0.0212, "rewards/accuracies": 0.625, "rewards/chosen": -0.018370602279901505, "rewards/margins": 0.051600076258182526, "rewards/rejected": -0.06997067481279373, "step": 2730 }, { "epoch": 0.18, "learning_rate": 4.904981881553297e-06, "logits/chosen": -2.3743033409118652, "logits/rejected": -2.0446043014526367, "logps/chosen": -228.78414916992188, "logps/rejected": -174.08529663085938, "loss": 0.0243, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.022939365357160568, "rewards/margins": 0.0470966100692749, "rewards/rejected": -0.07003597170114517, "step": 2740 }, { "epoch": 0.18, "learning_rate": 4.903416376491252e-06, "logits/chosen": -2.369668960571289, "logits/rejected": -1.9818544387817383, "logps/chosen": -283.78900146484375, "logps/rejected": -263.84686279296875, "loss": 0.026, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.027483994141221046, "rewards/margins": 0.10324974358081818, "rewards/rejected": -0.13073372840881348, "step": 2750 }, { "epoch": 0.18, "learning_rate": 4.90183833403206e-06, "logits/chosen": -2.4412343502044678, "logits/rejected": -2.286303997039795, "logps/chosen": -268.1914978027344, "logps/rejected": -252.0909423828125, "loss": 0.0257, "rewards/accuracies": 0.625, "rewards/chosen": -0.019891971722245216, "rewards/margins": 0.10076215118169785, "rewards/rejected": -0.12065412104129791, "step": 2760 }, { "epoch": 0.18, "learning_rate": 4.900247762407564e-06, "logits/chosen": -2.191983938217163, "logits/rejected": -1.9838281869888306, "logps/chosen": -181.60751342773438, "logps/rejected": -223.0465850830078, "loss": 0.0216, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04657771438360214, "rewards/margins": 0.09698217362165451, "rewards/rejected": -0.14355987310409546, "step": 2770 }, { "epoch": 0.18, "learning_rate": 4.898644669914965e-06, "logits/chosen": -2.253765821456909, "logits/rejected": -2.1682329177856445, "logps/chosen": -232.3789825439453, "logps/rejected": -233.66531372070312, "loss": 0.0279, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04153277724981308, "rewards/margins": 0.07556191831827164, "rewards/rejected": -0.11709471046924591, "step": 2780 }, { "epoch": 0.18, "learning_rate": 4.897029064916778e-06, "logits/chosen": -2.0696494579315186, "logits/rejected": -1.8728134632110596, "logps/chosen": -218.97836303710938, "logps/rejected": -221.40316772460938, "loss": 0.0279, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.054142218083143234, "rewards/margins": 0.07406821846961975, "rewards/rejected": -0.1282104253768921, "step": 2790 }, { "epoch": 0.18, "learning_rate": 4.895400955840791e-06, "logits/chosen": -2.437290668487549, "logits/rejected": -1.7853336334228516, "logps/chosen": -222.3488006591797, "logps/rejected": -196.37698364257812, "loss": 0.036, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.009987411089241505, "rewards/margins": 0.07862985879182816, "rewards/rejected": -0.08861726522445679, "step": 2800 }, { "epoch": 0.18, "eval_logits/chosen": -2.2897098064422607, "eval_logits/rejected": -2.105027437210083, "eval_logps/chosen": -235.5567169189453, "eval_logps/rejected": -228.9888153076172, "eval_loss": 0.02788878232240677, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.017758876085281372, "eval_rewards/margins": 0.06912563741207123, "eval_rewards/rejected": -0.0868845209479332, "eval_runtime": 712.3028, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 2800 }, { "epoch": 0.18, "learning_rate": 4.893760351180018e-06, "logits/chosen": -2.28792142868042, "logits/rejected": -2.2265536785125732, "logps/chosen": -203.72744750976562, "logps/rejected": -219.5680389404297, "loss": 0.0225, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.025930937379598618, "rewards/margins": 0.05369790643453598, "rewards/rejected": -0.0796288475394249, "step": 2810 }, { "epoch": 0.18, "learning_rate": 4.892107259492657e-06, "logits/chosen": -2.2479248046875, "logits/rejected": -2.009706974029541, "logps/chosen": -243.56875610351562, "logps/rejected": -256.754638671875, "loss": 0.0203, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0185473021119833, "rewards/margins": 0.035834766924381256, "rewards/rejected": -0.054382067173719406, "step": 2820 }, { "epoch": 0.19, "learning_rate": 4.890441689402042e-06, "logits/chosen": -2.3838446140289307, "logits/rejected": -2.2034523487091064, "logps/chosen": -338.8334655761719, "logps/rejected": -308.4311218261719, "loss": 0.0111, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.01664288341999054, "rewards/margins": 0.08798809349536896, "rewards/rejected": -0.1046309843659401, "step": 2830 }, { "epoch": 0.19, "learning_rate": 4.888763649596606e-06, "logits/chosen": -2.3975675106048584, "logits/rejected": -2.17059326171875, "logps/chosen": -214.5126190185547, "logps/rejected": -220.7166290283203, "loss": 0.0575, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.021754657849669456, "rewards/margins": 0.07502701133489609, "rewards/rejected": -0.0967816710472107, "step": 2840 }, { "epoch": 0.19, "learning_rate": 4.887073148829824e-06, "logits/chosen": -2.308504581451416, "logits/rejected": -2.1595101356506348, "logps/chosen": -265.9504089355469, "logps/rejected": -261.0517578125, "loss": 0.0261, "rewards/accuracies": 0.625, "rewards/chosen": 0.004765903111547232, "rewards/margins": 0.09018461406230927, "rewards/rejected": -0.0854187160730362, "step": 2850 }, { "epoch": 0.19, "learning_rate": 4.885370195920177e-06, "logits/chosen": -2.1859519481658936, "logits/rejected": -2.124957323074341, "logps/chosen": -198.3850860595703, "logps/rejected": -204.52525329589844, "loss": 0.0366, "rewards/accuracies": 0.625, "rewards/chosen": -0.06530681997537613, "rewards/margins": 0.07356669753789902, "rewards/rejected": -0.13887353241443634, "step": 2860 }, { "epoch": 0.19, "learning_rate": 4.883654799751101e-06, "logits/chosen": -2.1068179607391357, "logits/rejected": -2.3324809074401855, "logps/chosen": -222.74520874023438, "logps/rejected": -269.09893798828125, "loss": 0.0439, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.035180218517780304, "rewards/margins": 0.05806810408830643, "rewards/rejected": -0.09324832260608673, "step": 2870 }, { "epoch": 0.19, "learning_rate": 4.8819269692709435e-06, "logits/chosen": -2.4083595275878906, "logits/rejected": -2.236450672149658, "logps/chosen": -275.53363037109375, "logps/rejected": -219.38052368164062, "loss": 0.016, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04243696108460426, "rewards/margins": 0.08104520291090012, "rewards/rejected": -0.12348216772079468, "step": 2880 }, { "epoch": 0.19, "learning_rate": 4.880186713492915e-06, "logits/chosen": -2.250185251235962, "logits/rejected": -2.0641236305236816, "logps/chosen": -243.86288452148438, "logps/rejected": -200.46041870117188, "loss": 0.02, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08072765171527863, "rewards/margins": 0.051097385585308075, "rewards/rejected": -0.1318250447511673, "step": 2890 }, { "epoch": 0.19, "learning_rate": 4.878434041495041e-06, "logits/chosen": -2.289459466934204, "logits/rejected": -2.408504009246826, "logps/chosen": -245.63339233398438, "logps/rejected": -264.4200439453125, "loss": 0.0353, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.04233827441930771, "rewards/margins": 0.08462206274271011, "rewards/rejected": -0.12696032226085663, "step": 2900 }, { "epoch": 0.19, "eval_logits/chosen": -2.284769296646118, "eval_logits/rejected": -2.0999910831451416, "eval_logps/chosen": -240.31153869628906, "eval_logps/rejected": -233.4534912109375, "eval_loss": 0.02789762057363987, "eval_rewards/accuracies": 0.6445000171661377, "eval_rewards/chosen": -0.04153289273381233, "eval_rewards/margins": 0.06767502427101135, "eval_rewards/rejected": -0.10920792073011398, "eval_runtime": 712.6645, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 2900 }, { "epoch": 0.19, "learning_rate": 4.876668962420117e-06, "logits/chosen": -2.2638027667999268, "logits/rejected": -2.0080816745758057, "logps/chosen": -292.7586975097656, "logps/rejected": -245.6000518798828, "loss": 0.0416, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.015538054518401623, "rewards/margins": 0.062287408858537674, "rewards/rejected": -0.07782547175884247, "step": 2910 }, { "epoch": 0.19, "learning_rate": 4.87489148547566e-06, "logits/chosen": -2.274672031402588, "logits/rejected": -2.128838539123535, "logps/chosen": -270.25103759765625, "logps/rejected": -244.59774780273438, "loss": 0.0387, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05196281149983406, "rewards/margins": 0.03759818524122238, "rewards/rejected": -0.08956098556518555, "step": 2920 }, { "epoch": 0.19, "learning_rate": 4.873101619933862e-06, "logits/chosen": -2.487917900085449, "logits/rejected": -2.1313300132751465, "logps/chosen": -270.21759033203125, "logps/rejected": -234.5707550048828, "loss": 0.0343, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.02845207415521145, "rewards/margins": 0.06834385544061661, "rewards/rejected": -0.0967959314584732, "step": 2930 }, { "epoch": 0.19, "learning_rate": 4.8712993751315385e-06, "logits/chosen": -2.271152973175049, "logits/rejected": -2.21158504486084, "logps/chosen": -124.94087219238281, "logps/rejected": -136.87136840820312, "loss": 0.048, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.02141464687883854, "rewards/margins": 0.04014318436384201, "rewards/rejected": -0.0615578293800354, "step": 2940 }, { "epoch": 0.19, "learning_rate": 4.869484760470079e-06, "logits/chosen": -2.31473970413208, "logits/rejected": -2.109999418258667, "logps/chosen": -193.41445922851562, "logps/rejected": -172.94094848632812, "loss": 0.0164, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.02376813441514969, "rewards/margins": 0.07912831753492355, "rewards/rejected": -0.10289645195007324, "step": 2950 }, { "epoch": 0.19, "learning_rate": 4.867657785415404e-06, "logits/chosen": -2.241927146911621, "logits/rejected": -1.9641263484954834, "logps/chosen": -258.5636291503906, "logps/rejected": -236.51513671875, "loss": 0.0319, "rewards/accuracies": 0.75, "rewards/chosen": -0.04732293635606766, "rewards/margins": 0.09848068654537201, "rewards/rejected": -0.14580364525318146, "step": 2960 }, { "epoch": 0.19, "learning_rate": 4.865818459497911e-06, "logits/chosen": -2.4856972694396973, "logits/rejected": -2.0244452953338623, "logps/chosen": -294.99627685546875, "logps/rejected": -221.91909790039062, "loss": 0.0242, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05408806353807449, "rewards/margins": 0.05860002711415291, "rewards/rejected": -0.1126880869269371, "step": 2970 }, { "epoch": 0.19, "learning_rate": 4.863966792312423e-06, "logits/chosen": -2.355971336364746, "logits/rejected": -2.1137917041778564, "logps/chosen": -245.83828735351562, "logps/rejected": -227.0188446044922, "loss": 0.0206, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02189687080681324, "rewards/margins": 0.11115972697734833, "rewards/rejected": -0.1330566108226776, "step": 2980 }, { "epoch": 0.2, "learning_rate": 4.862102793518145e-06, "logits/chosen": -2.2027151584625244, "logits/rejected": -2.2636585235595703, "logps/chosen": -205.7113494873047, "logps/rejected": -231.49172973632812, "loss": 0.0363, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05714733526110649, "rewards/margins": 0.09427683800458908, "rewards/rejected": -0.15142419934272766, "step": 2990 }, { "epoch": 0.2, "learning_rate": 4.8602264728386075e-06, "logits/chosen": -2.3041369915008545, "logits/rejected": -2.1546449661254883, "logps/chosen": -260.479736328125, "logps/rejected": -273.4519958496094, "loss": 0.0259, "rewards/accuracies": 0.75, "rewards/chosen": -0.03990109637379646, "rewards/margins": 0.0819544792175293, "rewards/rejected": -0.12185557186603546, "step": 3000 }, { "epoch": 0.2, "eval_logits/chosen": -2.274144172668457, "eval_logits/rejected": -2.088632345199585, "eval_logps/chosen": -239.57725524902344, "eval_logps/rejected": -235.87315368652344, "eval_loss": 0.028896113857626915, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -0.037861473858356476, "eval_rewards/margins": 0.08344479650259018, "eval_rewards/rejected": -0.12130627781152725, "eval_runtime": 714.9786, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.399, "step": 3000 }, { "epoch": 0.2, "learning_rate": 4.858337840061616e-06, "logits/chosen": -2.265939712524414, "logits/rejected": -2.188263416290283, "logps/chosen": -187.27552795410156, "logps/rejected": -255.88009643554688, "loss": 0.0266, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02097577229142189, "rewards/margins": 0.07477692514657974, "rewards/rejected": -0.09575269371271133, "step": 3010 }, { "epoch": 0.2, "learning_rate": 4.856436905039208e-06, "logits/chosen": -2.2863521575927734, "logits/rejected": -2.1258704662323, "logps/chosen": -214.4219970703125, "logps/rejected": -198.88104248046875, "loss": 0.0256, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.016084687784314156, "rewards/margins": 0.09423195570707321, "rewards/rejected": -0.11031664907932281, "step": 3020 }, { "epoch": 0.2, "learning_rate": 4.854523677687588e-06, "logits/chosen": -2.148622989654541, "logits/rejected": -2.218465566635132, "logps/chosen": -191.50128173828125, "logps/rejected": -225.01953125, "loss": 0.0247, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0512787401676178, "rewards/margins": 0.07560839504003525, "rewards/rejected": -0.12688712775707245, "step": 3030 }, { "epoch": 0.2, "learning_rate": 4.85259816798709e-06, "logits/chosen": -2.374782085418701, "logits/rejected": -1.833749532699585, "logps/chosen": -297.7157897949219, "logps/rejected": -238.33712768554688, "loss": 0.0243, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.049768656492233276, "rewards/margins": 0.10588622093200684, "rewards/rejected": -0.1556548923254013, "step": 3040 }, { "epoch": 0.2, "learning_rate": 4.850660385982114e-06, "logits/chosen": -2.360474109649658, "logits/rejected": -2.190361499786377, "logps/chosen": -258.0581359863281, "logps/rejected": -216.6773223876953, "loss": 0.047, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.051694877445697784, "rewards/margins": 0.06950264424085617, "rewards/rejected": -0.12119752168655396, "step": 3050 }, { "epoch": 0.2, "learning_rate": 4.848710341781081e-06, "logits/chosen": -2.0902516841888428, "logits/rejected": -2.1962881088256836, "logps/chosen": -201.42294311523438, "logps/rejected": -206.622314453125, "loss": 0.0326, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.13760720193386078, "rewards/margins": 0.05463032051920891, "rewards/rejected": -0.1922374963760376, "step": 3060 }, { "epoch": 0.2, "learning_rate": 4.846748045556377e-06, "logits/chosen": -2.2907516956329346, "logits/rejected": -1.9787200689315796, "logps/chosen": -262.0197448730469, "logps/rejected": -215.56582641601562, "loss": 0.0412, "rewards/accuracies": 0.625, "rewards/chosen": -0.11266320943832397, "rewards/margins": 0.0702851191163063, "rewards/rejected": -0.18294832110404968, "step": 3070 }, { "epoch": 0.2, "learning_rate": 4.8447735075442995e-06, "logits/chosen": -2.1806905269622803, "logits/rejected": -2.2396817207336426, "logps/chosen": -227.7964324951172, "logps/rejected": -241.79281616210938, "loss": 0.0333, "rewards/accuracies": 0.625, "rewards/chosen": -0.15117108821868896, "rewards/margins": 0.08698441833257675, "rewards/rejected": -0.2381555140018463, "step": 3080 }, { "epoch": 0.2, "learning_rate": 4.8427867380450075e-06, "logits/chosen": -2.3623485565185547, "logits/rejected": -1.9704208374023438, "logps/chosen": -254.1536865234375, "logps/rejected": -228.27432250976562, "loss": 0.0302, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11907199770212173, "rewards/margins": 0.10185201466083527, "rewards/rejected": -0.2209240198135376, "step": 3090 }, { "epoch": 0.2, "learning_rate": 4.840787747422462e-06, "logits/chosen": -2.329294443130493, "logits/rejected": -2.075838804244995, "logps/chosen": -215.3441925048828, "logps/rejected": -199.28408813476562, "loss": 0.0362, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09844044595956802, "rewards/margins": 0.07098730653524399, "rewards/rejected": -0.169427752494812, "step": 3100 }, { "epoch": 0.2, "eval_logits/chosen": -2.277235269546509, "eval_logits/rejected": -2.0925114154815674, "eval_logps/chosen": -254.0054931640625, "eval_logps/rejected": -249.93931579589844, "eval_loss": 0.028891319409012794, "eval_rewards/accuracies": 0.6485000252723694, "eval_rewards/chosen": -0.1100027933716774, "eval_rewards/margins": 0.08163423091173172, "eval_rewards/rejected": -0.19163702428340912, "eval_runtime": 711.301, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 3100 }, { "epoch": 0.2, "learning_rate": 4.838776546104378e-06, "logits/chosen": -2.254002332687378, "logits/rejected": -2.2489466667175293, "logps/chosen": -300.79296875, "logps/rejected": -279.73236083984375, "loss": 0.0139, "rewards/accuracies": 0.75, "rewards/chosen": -0.09919861704111099, "rewards/margins": 0.09670659154653549, "rewards/rejected": -0.19590522348880768, "step": 3110 }, { "epoch": 0.2, "learning_rate": 4.836753144582168e-06, "logits/chosen": -2.2163474559783936, "logits/rejected": -1.9966824054718018, "logps/chosen": -262.107666015625, "logps/rejected": -259.97833251953125, "loss": 0.0334, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09938045591115952, "rewards/margins": 0.10991636663675308, "rewards/rejected": -0.2092967927455902, "step": 3120 }, { "epoch": 0.2, "learning_rate": 4.834717553410884e-06, "logits/chosen": -2.2825305461883545, "logits/rejected": -2.0499181747436523, "logps/chosen": -204.13722229003906, "logps/rejected": -237.09097290039062, "loss": 0.0197, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07430613785982132, "rewards/margins": 0.09642286598682404, "rewards/rejected": -0.17072899639606476, "step": 3130 }, { "epoch": 0.21, "learning_rate": 4.832669783209167e-06, "logits/chosen": -2.1582999229431152, "logits/rejected": -2.2071380615234375, "logps/chosen": -259.179931640625, "logps/rejected": -264.71295166015625, "loss": 0.022, "rewards/accuracies": 0.625, "rewards/chosen": -0.07376949489116669, "rewards/margins": 0.02492070011794567, "rewards/rejected": -0.09869018942117691, "step": 3140 }, { "epoch": 0.21, "learning_rate": 4.8306098446591895e-06, "logits/chosen": -1.8651392459869385, "logits/rejected": -2.0267395973205566, "logps/chosen": -192.75889587402344, "logps/rejected": -228.6290740966797, "loss": 0.0408, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.08245015144348145, "rewards/margins": 0.05469425767660141, "rewards/rejected": -0.13714441657066345, "step": 3150 }, { "epoch": 0.21, "learning_rate": 4.828537748506601e-06, "logits/chosen": -2.396944522857666, "logits/rejected": -2.1304783821105957, "logps/chosen": -284.6820068359375, "logps/rejected": -235.306884765625, "loss": 0.0164, "rewards/accuracies": 0.5, "rewards/chosen": -0.05641879513859749, "rewards/margins": 0.034355297684669495, "rewards/rejected": -0.09077408909797668, "step": 3160 }, { "epoch": 0.21, "learning_rate": 4.826453505560469e-06, "logits/chosen": -2.073132038116455, "logits/rejected": -2.0445899963378906, "logps/chosen": -204.7270965576172, "logps/rejected": -200.5258331298828, "loss": 0.0339, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.06873993575572968, "rewards/margins": 0.04747641831636429, "rewards/rejected": -0.11621636152267456, "step": 3170 }, { "epoch": 0.21, "learning_rate": 4.824357126693226e-06, "logits/chosen": -2.1617677211761475, "logits/rejected": -1.79251229763031, "logps/chosen": -271.9390869140625, "logps/rejected": -233.08486938476562, "loss": 0.0251, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06597967445850372, "rewards/margins": 0.049365781247615814, "rewards/rejected": -0.11534545570611954, "step": 3180 }, { "epoch": 0.21, "learning_rate": 4.8222486228406105e-06, "logits/chosen": -2.34342622756958, "logits/rejected": -2.0599923133850098, "logps/chosen": -221.85867309570312, "logps/rejected": -201.22842407226562, "loss": 0.0179, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05309159681200981, "rewards/margins": 0.07025385648012161, "rewards/rejected": -0.12334545701742172, "step": 3190 }, { "epoch": 0.21, "learning_rate": 4.820128005001612e-06, "logits/chosen": -2.009340763092041, "logits/rejected": -1.9286329746246338, "logps/chosen": -225.3570556640625, "logps/rejected": -231.0119171142578, "loss": 0.0319, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.031817398965358734, "rewards/margins": 0.13598847389221191, "rewards/rejected": -0.16780588030815125, "step": 3200 }, { "epoch": 0.21, "eval_logits/chosen": -2.2569305896759033, "eval_logits/rejected": -2.074082374572754, "eval_logps/chosen": -242.5390625, "eval_logps/rejected": -238.02999877929688, "eval_loss": 0.028289152309298515, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": -0.05267051234841347, "eval_rewards/margins": 0.07942002266645432, "eval_rewards/rejected": -0.1320905238389969, "eval_runtime": 711.7306, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 3200 }, { "epoch": 0.21, "learning_rate": 4.817995284238412e-06, "logits/chosen": -2.0534753799438477, "logits/rejected": -2.1341710090637207, "logps/chosen": -207.1750030517578, "logps/rejected": -253.8484344482422, "loss": 0.0204, "rewards/accuracies": 0.625, "rewards/chosen": -0.05325322598218918, "rewards/margins": 0.09594316780567169, "rewards/rejected": -0.14919638633728027, "step": 3210 }, { "epoch": 0.21, "learning_rate": 4.815850471676327e-06, "logits/chosen": -2.1988625526428223, "logits/rejected": -2.0713298320770264, "logps/chosen": -246.859619140625, "logps/rejected": -262.5884704589844, "loss": 0.0291, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04222807660698891, "rewards/margins": 0.09988073259592056, "rewards/rejected": -0.14210879802703857, "step": 3220 }, { "epoch": 0.21, "learning_rate": 4.813693578503751e-06, "logits/chosen": -2.2338128089904785, "logits/rejected": -2.0544989109039307, "logps/chosen": -305.856201171875, "logps/rejected": -265.7468566894531, "loss": 0.016, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04674705117940903, "rewards/margins": 0.076741524040699, "rewards/rejected": -0.12348856776952744, "step": 3230 }, { "epoch": 0.21, "learning_rate": 4.811524615972093e-06, "logits/chosen": -2.2481729984283447, "logits/rejected": -2.1175618171691895, "logps/chosen": -239.32760620117188, "logps/rejected": -264.2997131347656, "loss": 0.0413, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.046959999948740005, "rewards/margins": 0.08384885638952255, "rewards/rejected": -0.13080886006355286, "step": 3240 }, { "epoch": 0.21, "learning_rate": 4.809343595395724e-06, "logits/chosen": -2.4664251804351807, "logits/rejected": -2.2723288536071777, "logps/chosen": -202.58583068847656, "logps/rejected": -181.0174102783203, "loss": 0.0444, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06765086203813553, "rewards/margins": 0.04510151222348213, "rewards/rejected": -0.11275237798690796, "step": 3250 }, { "epoch": 0.21, "learning_rate": 4.807150528151918e-06, "logits/chosen": -2.2760791778564453, "logits/rejected": -2.1086182594299316, "logps/chosen": -177.58126831054688, "logps/rejected": -218.3994140625, "loss": 0.0188, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05444352701306343, "rewards/margins": 0.11475624144077301, "rewards/rejected": -0.16919977962970734, "step": 3260 }, { "epoch": 0.21, "learning_rate": 4.804945425680787e-06, "logits/chosen": -2.236807346343994, "logits/rejected": -2.308253526687622, "logps/chosen": -207.42776489257812, "logps/rejected": -198.57290649414062, "loss": 0.0331, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0949227437376976, "rewards/margins": 0.05611693114042282, "rewards/rejected": -0.15103968977928162, "step": 3270 }, { "epoch": 0.21, "learning_rate": 4.802728299485225e-06, "logits/chosen": -2.0927319526672363, "logits/rejected": -2.0800490379333496, "logps/chosen": -170.92227172851562, "logps/rejected": -203.00479125976562, "loss": 0.0283, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10158933699131012, "rewards/margins": 0.052754104137420654, "rewards/rejected": -0.15434344112873077, "step": 3280 }, { "epoch": 0.22, "learning_rate": 4.8004991611308495e-06, "logits/chosen": -2.391540050506592, "logits/rejected": -2.102921485900879, "logps/chosen": -247.2994842529297, "logps/rejected": -247.5079803466797, "loss": 0.0127, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.028728622943162918, "rewards/margins": 0.08129361271858215, "rewards/rejected": -0.11002223193645477, "step": 3290 }, { "epoch": 0.22, "learning_rate": 4.798258022245937e-06, "logits/chosen": -2.3174631595611572, "logits/rejected": -1.9177128076553345, "logps/chosen": -228.5331268310547, "logps/rejected": -206.4042205810547, "loss": 0.0333, "rewards/accuracies": 0.625, "rewards/chosen": -0.054633207619190216, "rewards/margins": 0.08168105781078339, "rewards/rejected": -0.136314257979393, "step": 3300 }, { "epoch": 0.22, "eval_logits/chosen": -2.252087354660034, "eval_logits/rejected": -2.0689594745635986, "eval_logps/chosen": -242.1912841796875, "eval_logps/rejected": -239.54627990722656, "eval_loss": 0.027968592941761017, "eval_rewards/accuracies": 0.6535000205039978, "eval_rewards/chosen": -0.050931625068187714, "eval_rewards/margins": 0.08874025195837021, "eval_rewards/rejected": -0.13967186212539673, "eval_runtime": 712.4607, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 3300 }, { "epoch": 0.22, "learning_rate": 4.796004894521365e-06, "logits/chosen": -2.2396187782287598, "logits/rejected": -2.054224729537964, "logps/chosen": -238.64242553710938, "logps/rejected": -279.61846923828125, "loss": 0.0388, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.050363294780254364, "rewards/margins": 0.09859765321016312, "rewards/rejected": -0.14896094799041748, "step": 3310 }, { "epoch": 0.22, "learning_rate": 4.7937397897105545e-06, "logits/chosen": -2.2537460327148438, "logits/rejected": -2.175597667694092, "logps/chosen": -211.47531127929688, "logps/rejected": -194.06646728515625, "loss": 0.0226, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03121241368353367, "rewards/margins": 0.03902296721935272, "rewards/rejected": -0.07023537904024124, "step": 3320 }, { "epoch": 0.22, "learning_rate": 4.791462719629399e-06, "logits/chosen": -2.275513172149658, "logits/rejected": -2.131743907928467, "logps/chosen": -192.20516967773438, "logps/rejected": -191.20184326171875, "loss": 0.017, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03240009397268295, "rewards/margins": 0.10704060643911362, "rewards/rejected": -0.13944070041179657, "step": 3330 }, { "epoch": 0.22, "learning_rate": 4.789173696156212e-06, "logits/chosen": -2.2883973121643066, "logits/rejected": -1.9347209930419922, "logps/chosen": -281.06097412109375, "logps/rejected": -289.59222412109375, "loss": 0.0234, "rewards/accuracies": 0.875, "rewards/chosen": -0.02310190722346306, "rewards/margins": 0.13590338826179504, "rewards/rejected": -0.1590052843093872, "step": 3340 }, { "epoch": 0.22, "learning_rate": 4.786872731231662e-06, "logits/chosen": -2.317486047744751, "logits/rejected": -2.1974265575408936, "logps/chosen": -227.2639923095703, "logps/rejected": -233.25320434570312, "loss": 0.0278, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05186513066291809, "rewards/margins": 0.08679147809743881, "rewards/rejected": -0.1386566013097763, "step": 3350 }, { "epoch": 0.22, "learning_rate": 4.784559836858709e-06, "logits/chosen": -2.2945048809051514, "logits/rejected": -1.8284685611724854, "logps/chosen": -242.6492919921875, "logps/rejected": -225.825927734375, "loss": 0.0113, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04366091638803482, "rewards/margins": 0.06798774749040604, "rewards/rejected": -0.11164864152669907, "step": 3360 }, { "epoch": 0.22, "learning_rate": 4.782235025102542e-06, "logits/chosen": -2.2957305908203125, "logits/rejected": -2.2282955646514893, "logps/chosen": -235.95361328125, "logps/rejected": -237.4597625732422, "loss": 0.0292, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04817141592502594, "rewards/margins": 0.09493356943130493, "rewards/rejected": -0.14310500025749207, "step": 3370 }, { "epoch": 0.22, "learning_rate": 4.779898308090519e-06, "logits/chosen": -2.2512454986572266, "logits/rejected": -2.0390985012054443, "logps/chosen": -286.148681640625, "logps/rejected": -264.2273864746094, "loss": 0.0383, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05735722929239273, "rewards/margins": 0.07454714924097061, "rewards/rejected": -0.13190439343452454, "step": 3380 }, { "epoch": 0.22, "learning_rate": 4.777549698012101e-06, "logits/chosen": -2.1888813972473145, "logits/rejected": -2.029754161834717, "logps/chosen": -253.3921356201172, "logps/rejected": -250.53225708007812, "loss": 0.0201, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.050877444446086884, "rewards/margins": 0.0814988762140274, "rewards/rejected": -0.1323763132095337, "step": 3390 }, { "epoch": 0.22, "learning_rate": 4.775189207118787e-06, "logits/chosen": -2.2187693119049072, "logits/rejected": -2.02724027633667, "logps/chosen": -279.66302490234375, "logps/rejected": -274.4883728027344, "loss": 0.0347, "rewards/accuracies": 0.625, "rewards/chosen": -0.03922683373093605, "rewards/margins": 0.08581504225730896, "rewards/rejected": -0.1250418722629547, "step": 3400 }, { "epoch": 0.22, "eval_logits/chosen": -2.2767276763916016, "eval_logits/rejected": -2.093092203140259, "eval_logps/chosen": -240.41015625, "eval_logps/rejected": -234.529296875, "eval_loss": 0.028484875336289406, "eval_rewards/accuracies": 0.6420000195503235, "eval_rewards/chosen": -0.042025960981845856, "eval_rewards/margins": 0.07256097346544266, "eval_rewards/rejected": -0.11458693444728851, "eval_runtime": 713.4014, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 3400 }, { "epoch": 0.22, "learning_rate": 4.772816847724054e-06, "logits/chosen": -2.382833957672119, "logits/rejected": -2.1141469478607178, "logps/chosen": -230.86587524414062, "logps/rejected": -238.9117889404297, "loss": 0.0544, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.041556429117918015, "rewards/margins": 0.04274109750986099, "rewards/rejected": -0.08429752290248871, "step": 3410 }, { "epoch": 0.22, "learning_rate": 4.770432632203294e-06, "logits/chosen": -2.1297221183776855, "logits/rejected": -2.0429019927978516, "logps/chosen": -253.8579864501953, "logps/rejected": -211.4426727294922, "loss": 0.0206, "rewards/accuracies": 0.625, "rewards/chosen": -0.05105436593294144, "rewards/margins": 0.03951476141810417, "rewards/rejected": -0.09056912362575531, "step": 3420 }, { "epoch": 0.22, "learning_rate": 4.768036572993738e-06, "logits/chosen": -2.190948724746704, "logits/rejected": -2.286194086074829, "logps/chosen": -287.060546875, "logps/rejected": -284.55426025390625, "loss": 0.0192, "rewards/accuracies": 0.625, "rewards/chosen": -0.03520800173282623, "rewards/margins": 0.065467968583107, "rewards/rejected": -0.10067595541477203, "step": 3430 }, { "epoch": 0.23, "learning_rate": 4.765628682594409e-06, "logits/chosen": -2.364724636077881, "logits/rejected": -2.186753988265991, "logps/chosen": -246.08349609375, "logps/rejected": -238.25064086914062, "loss": 0.0221, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.01163232047110796, "rewards/margins": 0.08034755289554596, "rewards/rejected": -0.09197986871004105, "step": 3440 }, { "epoch": 0.23, "learning_rate": 4.763208973566041e-06, "logits/chosen": -2.1567559242248535, "logits/rejected": -2.1840717792510986, "logps/chosen": -192.0625, "logps/rejected": -228.4545440673828, "loss": 0.0075, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.039664216339588165, "rewards/margins": 0.0883837565779686, "rewards/rejected": -0.12804797291755676, "step": 3450 }, { "epoch": 0.23, "learning_rate": 4.76077745853102e-06, "logits/chosen": -2.3962085247039795, "logits/rejected": -2.249481439590454, "logps/chosen": -259.07061767578125, "logps/rejected": -278.41705322265625, "loss": 0.0226, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04508228972554207, "rewards/margins": 0.09110657125711441, "rewards/rejected": -0.13618886470794678, "step": 3460 }, { "epoch": 0.23, "learning_rate": 4.758334150173322e-06, "logits/chosen": -2.2859044075012207, "logits/rejected": -2.0987162590026855, "logps/chosen": -261.6954040527344, "logps/rejected": -249.35256958007812, "loss": 0.0216, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.006942708045244217, "rewards/margins": 0.06355363130569458, "rewards/rejected": -0.05661092326045036, "step": 3470 }, { "epoch": 0.23, "learning_rate": 4.755879061238439e-06, "logits/chosen": -2.3577704429626465, "logits/rejected": -2.13626766204834, "logps/chosen": -255.3122100830078, "logps/rejected": -252.277587890625, "loss": 0.0192, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.004305616952478886, "rewards/margins": 0.048012375831604004, "rewards/rejected": -0.04370676726102829, "step": 3480 }, { "epoch": 0.23, "learning_rate": 4.753412204533317e-06, "logits/chosen": -2.486016273498535, "logits/rejected": -2.0144317150115967, "logps/chosen": -263.31915283203125, "logps/rejected": -236.2627716064453, "loss": 0.0158, "rewards/accuracies": 0.75, "rewards/chosen": -0.0023054243065416813, "rewards/margins": 0.0916750431060791, "rewards/rejected": -0.09398047626018524, "step": 3490 }, { "epoch": 0.23, "learning_rate": 4.750933592926292e-06, "logits/chosen": -2.3504817485809326, "logits/rejected": -2.0278964042663574, "logps/chosen": -220.9480438232422, "logps/rejected": -212.4861602783203, "loss": 0.025, "rewards/accuracies": 0.75, "rewards/chosen": -0.011101900599896908, "rewards/margins": 0.0937727838754654, "rewards/rejected": -0.10487468540668488, "step": 3500 }, { "epoch": 0.23, "eval_logits/chosen": -2.251335382461548, "eval_logits/rejected": -2.068535566329956, "eval_logps/chosen": -234.412109375, "eval_logps/rejected": -230.6712646484375, "eval_loss": 0.027730992063879967, "eval_rewards/accuracies": 0.656000018119812, "eval_rewards/chosen": -0.012035808525979519, "eval_rewards/margins": 0.08326105773448944, "eval_rewards/rejected": -0.09529686719179153, "eval_runtime": 714.0893, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 3500 }, { "epoch": 0.23, "learning_rate": 4.7484432393470124e-06, "logits/chosen": -2.4342000484466553, "logits/rejected": -1.919426679611206, "logps/chosen": -203.8688507080078, "logps/rejected": -177.36282348632812, "loss": 0.0271, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.008934552781283855, "rewards/margins": 0.14445406198501587, "rewards/rejected": -0.1533885896205902, "step": 3510 }, { "epoch": 0.23, "learning_rate": 4.745941156786385e-06, "logits/chosen": -2.020981788635254, "logits/rejected": -1.9922730922698975, "logps/chosen": -159.48507690429688, "logps/rejected": -218.340576171875, "loss": 0.0586, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03222980350255966, "rewards/margins": 0.1400715559720993, "rewards/rejected": -0.17230133712291718, "step": 3520 }, { "epoch": 0.23, "learning_rate": 4.743427358296497e-06, "logits/chosen": -2.16739559173584, "logits/rejected": -1.9707978963851929, "logps/chosen": -193.88075256347656, "logps/rejected": -241.9403533935547, "loss": 0.0358, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.020862499251961708, "rewards/margins": 0.18376120924949646, "rewards/rejected": -0.2046237289905548, "step": 3530 }, { "epoch": 0.23, "learning_rate": 4.740901856990553e-06, "logits/chosen": -2.110822916030884, "logits/rejected": -1.9231901168823242, "logps/chosen": -258.498291015625, "logps/rejected": -227.58938598632812, "loss": 0.0458, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.009031767025589943, "rewards/margins": 0.06581844389438629, "rewards/rejected": -0.07485021650791168, "step": 3540 }, { "epoch": 0.23, "learning_rate": 4.738364666042804e-06, "logits/chosen": -2.356178045272827, "logits/rejected": -1.9351288080215454, "logps/chosen": -288.6064453125, "logps/rejected": -238.495849609375, "loss": 0.0293, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.015692852437496185, "rewards/margins": 0.06764644384384155, "rewards/rejected": -0.051953595131635666, "step": 3550 }, { "epoch": 0.23, "learning_rate": 4.735815798688483e-06, "logits/chosen": -2.3009400367736816, "logits/rejected": -2.0884649753570557, "logps/chosen": -197.21116638183594, "logps/rejected": -235.48648071289062, "loss": 0.0198, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.005604019854217768, "rewards/margins": 0.09068088978528976, "rewards/rejected": -0.08507686853408813, "step": 3560 }, { "epoch": 0.23, "learning_rate": 4.7332552682237285e-06, "logits/chosen": -2.3205363750457764, "logits/rejected": -1.8733360767364502, "logps/chosen": -172.0836639404297, "logps/rejected": -176.2654266357422, "loss": 0.0245, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.008381395600736141, "rewards/margins": 0.09977124631404877, "rewards/rejected": -0.09138984978199005, "step": 3570 }, { "epoch": 0.23, "learning_rate": 4.7306830880055234e-06, "logits/chosen": -2.2690162658691406, "logits/rejected": -2.1997575759887695, "logps/chosen": -200.47381591796875, "logps/rejected": -222.2290496826172, "loss": 0.0227, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0486999973654747, "rewards/margins": 0.06709084659814835, "rewards/rejected": -0.11579084396362305, "step": 3580 }, { "epoch": 0.23, "learning_rate": 4.728099271451619e-06, "logits/chosen": -2.3077304363250732, "logits/rejected": -2.2104690074920654, "logps/chosen": -195.2092742919922, "logps/rejected": -206.08236694335938, "loss": 0.0267, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.021006273105740547, "rewards/margins": 0.08912817388772964, "rewards/rejected": -0.11013443768024445, "step": 3590 }, { "epoch": 0.24, "learning_rate": 4.725503832040466e-06, "logits/chosen": -2.1276111602783203, "logits/rejected": -2.127262592315674, "logps/chosen": -153.59677124023438, "logps/rejected": -194.3944549560547, "loss": 0.0305, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.022194508463144302, "rewards/margins": 0.07355144619941711, "rewards/rejected": -0.09574595093727112, "step": 3600 }, { "epoch": 0.24, "eval_logits/chosen": -2.2770073413848877, "eval_logits/rejected": -2.092477321624756, "eval_logps/chosen": -234.256103515625, "eval_logps/rejected": -230.46665954589844, "eval_loss": 0.02762630395591259, "eval_rewards/accuracies": 0.6520000100135803, "eval_rewards/chosen": -0.011255734600126743, "eval_rewards/margins": 0.08301801979541779, "eval_rewards/rejected": -0.09427376091480255, "eval_runtime": 711.701, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 3600 }, { "epoch": 0.24, "learning_rate": 4.722896783311152e-06, "logits/chosen": -2.2544467449188232, "logits/rejected": -2.148684024810791, "logps/chosen": -263.32867431640625, "logps/rejected": -328.82470703125, "loss": 0.0268, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02139856480062008, "rewards/margins": 0.07181017845869064, "rewards/rejected": -0.09320874512195587, "step": 3610 }, { "epoch": 0.24, "learning_rate": 4.720278138863318e-06, "logits/chosen": -2.400420665740967, "logits/rejected": -2.189044952392578, "logps/chosen": -192.6852569580078, "logps/rejected": -173.32254028320312, "loss": 0.0393, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.012817641720175743, "rewards/margins": 0.06222205236554146, "rewards/rejected": -0.07503969967365265, "step": 3620 }, { "epoch": 0.24, "learning_rate": 4.717647912357095e-06, "logits/chosen": -2.3465585708618164, "logits/rejected": -2.433465003967285, "logps/chosen": -279.39959716796875, "logps/rejected": -293.50396728515625, "loss": 0.0238, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.03461218997836113, "rewards/margins": 0.005859696306288242, "rewards/rejected": -0.0404718853533268, "step": 3630 }, { "epoch": 0.24, "learning_rate": 4.715006117513035e-06, "logits/chosen": -2.462501287460327, "logits/rejected": -2.224883556365967, "logps/chosen": -319.16229248046875, "logps/rejected": -278.135498046875, "loss": 0.0307, "rewards/accuracies": 0.625, "rewards/chosen": 0.02481684461236, "rewards/margins": 0.07076647132635117, "rewards/rejected": -0.045949630439281464, "step": 3640 }, { "epoch": 0.24, "learning_rate": 4.7123527681120326e-06, "logits/chosen": -2.2726309299468994, "logits/rejected": -2.1317145824432373, "logps/chosen": -245.8816680908203, "logps/rejected": -234.3596954345703, "loss": 0.034, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.006447536405175924, "rewards/margins": 0.0831485316157341, "rewards/rejected": -0.07670100033283234, "step": 3650 }, { "epoch": 0.24, "learning_rate": 4.7096878779952594e-06, "logits/chosen": -2.351346254348755, "logits/rejected": -2.2983498573303223, "logps/chosen": -278.7234191894531, "logps/rejected": -286.8404846191406, "loss": 0.022, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.01655019074678421, "rewards/margins": 0.053498972207307816, "rewards/rejected": -0.07004915922880173, "step": 3660 }, { "epoch": 0.24, "learning_rate": 4.707011461064086e-06, "logits/chosen": -2.145847797393799, "logits/rejected": -1.9123668670654297, "logps/chosen": -309.30865478515625, "logps/rejected": -283.8884582519531, "loss": 0.0306, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0022481470368802547, "rewards/margins": 0.09385238587856293, "rewards/rejected": -0.09610055387020111, "step": 3670 }, { "epoch": 0.24, "learning_rate": 4.704323531280016e-06, "logits/chosen": -2.2220005989074707, "logits/rejected": -2.047095775604248, "logps/chosen": -328.34033203125, "logps/rejected": -258.6874694824219, "loss": 0.0157, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.005644065327942371, "rewards/margins": 0.06771357357501984, "rewards/rejected": -0.07335765659809113, "step": 3680 }, { "epoch": 0.24, "learning_rate": 4.701624102664606e-06, "logits/chosen": -2.3654987812042236, "logits/rejected": -2.0208535194396973, "logps/chosen": -267.5926208496094, "logps/rejected": -228.2905731201172, "loss": 0.0243, "rewards/accuracies": 0.625, "rewards/chosen": -0.03238372504711151, "rewards/margins": 0.0739908367395401, "rewards/rejected": -0.10637456178665161, "step": 3690 }, { "epoch": 0.24, "learning_rate": 4.698913189299399e-06, "logits/chosen": -2.1872456073760986, "logits/rejected": -2.3029673099517822, "logps/chosen": -196.64865112304688, "logps/rejected": -241.5049591064453, "loss": 0.0331, "rewards/accuracies": 0.625, "rewards/chosen": -0.05248425155878067, "rewards/margins": 0.06174800917506218, "rewards/rejected": -0.11423225700855255, "step": 3700 }, { "epoch": 0.24, "eval_logits/chosen": -2.2712066173553467, "eval_logits/rejected": -2.087006092071533, "eval_logps/chosen": -245.2156524658203, "eval_logps/rejected": -241.05653381347656, "eval_loss": 0.028332557529211044, "eval_rewards/accuracies": 0.6434999704360962, "eval_rewards/chosen": -0.06605348736047745, "eval_rewards/margins": 0.08116975426673889, "eval_rewards/rejected": -0.14722324907779694, "eval_runtime": 712.4623, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 3700 }, { "epoch": 0.24, "learning_rate": 4.696190805325847e-06, "logits/chosen": -2.2979846000671387, "logits/rejected": -2.1545467376708984, "logps/chosen": -217.24502563476562, "logps/rejected": -207.59347534179688, "loss": 0.0129, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.054330892860889435, "rewards/margins": 0.08733570575714111, "rewards/rejected": -0.14166659116744995, "step": 3710 }, { "epoch": 0.24, "learning_rate": 4.693456964945239e-06, "logits/chosen": -2.4013776779174805, "logits/rejected": -1.927514672279358, "logps/chosen": -308.4510192871094, "logps/rejected": -226.40151977539062, "loss": 0.0362, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05172725394368172, "rewards/margins": 0.09521359205245972, "rewards/rejected": -0.14694085717201233, "step": 3720 }, { "epoch": 0.24, "learning_rate": 4.6907116824186245e-06, "logits/chosen": -2.332817792892456, "logits/rejected": -2.287806749343872, "logps/chosen": -233.67672729492188, "logps/rejected": -244.431640625, "loss": 0.0258, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.03174605220556259, "rewards/margins": 0.05475563928484917, "rewards/rejected": -0.08650169521570206, "step": 3730 }, { "epoch": 0.24, "learning_rate": 4.687954972066742e-06, "logits/chosen": -2.222794532775879, "logits/rejected": -1.9519582986831665, "logps/chosen": -229.9512939453125, "logps/rejected": -241.53585815429688, "loss": 0.0305, "rewards/accuracies": 0.75, "rewards/chosen": -0.0006751194596290588, "rewards/margins": 0.16227775812149048, "rewards/rejected": -0.16295287013053894, "step": 3740 }, { "epoch": 0.25, "learning_rate": 4.685186848269944e-06, "logits/chosen": -2.2037625312805176, "logits/rejected": -2.076504945755005, "logps/chosen": -213.8433074951172, "logps/rejected": -188.55340576171875, "loss": 0.0391, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.009854817762970924, "rewards/margins": 0.0604451522231102, "rewards/rejected": -0.07029997557401657, "step": 3750 }, { "epoch": 0.25, "learning_rate": 4.682407325468119e-06, "logits/chosen": -2.3056480884552, "logits/rejected": -1.9488176107406616, "logps/chosen": -216.91049194335938, "logps/rejected": -203.93606567382812, "loss": 0.0189, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.002542532980442047, "rewards/margins": 0.10327658802270889, "rewards/rejected": -0.10073405504226685, "step": 3760 }, { "epoch": 0.25, "learning_rate": 4.67961641816062e-06, "logits/chosen": -2.28625226020813, "logits/rejected": -2.084827423095703, "logps/chosen": -275.9200134277344, "logps/rejected": -245.2871551513672, "loss": 0.0316, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0006162314675748348, "rewards/margins": 0.053486168384552, "rewards/rejected": -0.054102398455142975, "step": 3770 }, { "epoch": 0.25, "learning_rate": 4.676814140906188e-06, "logits/chosen": -2.165799617767334, "logits/rejected": -2.029515504837036, "logps/chosen": -249.5352020263672, "logps/rejected": -234.1592559814453, "loss": 0.0308, "rewards/accuracies": 0.625, "rewards/chosen": -0.049995046108961105, "rewards/margins": 0.07614854723215103, "rewards/rejected": -0.12614358961582184, "step": 3780 }, { "epoch": 0.25, "learning_rate": 4.674000508322872e-06, "logits/chosen": -2.044060230255127, "logits/rejected": -2.1126906871795654, "logps/chosen": -225.5679473876953, "logps/rejected": -250.17538452148438, "loss": 0.0311, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04016520455479622, "rewards/margins": 0.07711917161941528, "rewards/rejected": -0.1172843724489212, "step": 3790 }, { "epoch": 0.25, "learning_rate": 4.671175535087959e-06, "logits/chosen": -2.2188408374786377, "logits/rejected": -2.1730470657348633, "logps/chosen": -298.844482421875, "logps/rejected": -315.20123291015625, "loss": 0.0351, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03758421167731285, "rewards/margins": 0.09794172644615173, "rewards/rejected": -0.1355259269475937, "step": 3800 }, { "epoch": 0.25, "eval_logits/chosen": -2.305997848510742, "eval_logits/rejected": -2.119776964187622, "eval_logps/chosen": -238.6973876953125, "eval_logps/rejected": -231.64306640625, "eval_loss": 0.029111526906490326, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.033462151885032654, "eval_rewards/margins": 0.06669372320175171, "eval_rewards/rejected": -0.10015588253736496, "eval_runtime": 710.7935, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 3800 }, { "epoch": 0.25, "learning_rate": 4.6683392359378924e-06, "logits/chosen": -2.188131809234619, "logits/rejected": -2.008528232574463, "logps/chosen": -240.27590942382812, "logps/rejected": -228.5452117919922, "loss": 0.0122, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02563432976603508, "rewards/margins": 0.06892909854650497, "rewards/rejected": -0.09456343948841095, "step": 3810 }, { "epoch": 0.25, "learning_rate": 4.665491625668198e-06, "logits/chosen": -2.103173017501831, "logits/rejected": -2.1502113342285156, "logps/chosen": -164.92218017578125, "logps/rejected": -204.31317138671875, "loss": 0.0308, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04337451234459877, "rewards/margins": 0.07917975634336472, "rewards/rejected": -0.12255426496267319, "step": 3820 }, { "epoch": 0.25, "learning_rate": 4.662632719133407e-06, "logits/chosen": -2.383805274963379, "logits/rejected": -2.0930488109588623, "logps/chosen": -229.5814971923828, "logps/rejected": -176.27459716796875, "loss": 0.0228, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.006080122198909521, "rewards/margins": 0.06850259006023407, "rewards/rejected": -0.06242247670888901, "step": 3830 }, { "epoch": 0.25, "learning_rate": 4.659762531246974e-06, "logits/chosen": -2.2730250358581543, "logits/rejected": -2.1138651371002197, "logps/chosen": -219.82144165039062, "logps/rejected": -198.40353393554688, "loss": 0.0275, "rewards/accuracies": 0.625, "rewards/chosen": -0.02621351182460785, "rewards/margins": 0.06269388645887375, "rewards/rejected": -0.0889073982834816, "step": 3840 }, { "epoch": 0.25, "learning_rate": 4.656881076981207e-06, "logits/chosen": -2.335495948791504, "logits/rejected": -2.1931393146514893, "logps/chosen": -215.53335571289062, "logps/rejected": -209.0745391845703, "loss": 0.0295, "rewards/accuracies": 0.625, "rewards/chosen": -0.01174996793270111, "rewards/margins": 0.05719948932528496, "rewards/rejected": -0.06894946098327637, "step": 3850 }, { "epoch": 0.25, "learning_rate": 4.653988371367183e-06, "logits/chosen": -2.2694575786590576, "logits/rejected": -2.0069797039031982, "logps/chosen": -240.78125, "logps/rejected": -191.01919555664062, "loss": 0.0385, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.00011368747800588608, "rewards/margins": 0.05722982436418533, "rewards/rejected": -0.05711613968014717, "step": 3860 }, { "epoch": 0.25, "learning_rate": 4.651084429494671e-06, "logits/chosen": -2.3504996299743652, "logits/rejected": -2.065788745880127, "logps/chosen": -274.817138671875, "logps/rejected": -206.2301788330078, "loss": 0.0206, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.002669256180524826, "rewards/margins": 0.05468413233757019, "rewards/rejected": -0.05201487988233566, "step": 3870 }, { "epoch": 0.25, "learning_rate": 4.648169266512053e-06, "logits/chosen": -2.4072623252868652, "logits/rejected": -2.1304709911346436, "logps/chosen": -220.14645385742188, "logps/rejected": -188.8533935546875, "loss": 0.0185, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.025908267125487328, "rewards/margins": 0.0683053508400917, "rewards/rejected": -0.04239708185195923, "step": 3880 }, { "epoch": 0.25, "learning_rate": 4.6452428976262505e-06, "logits/chosen": -2.2137064933776855, "logits/rejected": -1.9928970336914062, "logps/chosen": -199.99948120117188, "logps/rejected": -183.72735595703125, "loss": 0.0277, "rewards/accuracies": 0.75, "rewards/chosen": 0.01895982213318348, "rewards/margins": 0.14025332033634186, "rewards/rejected": -0.12129350006580353, "step": 3890 }, { "epoch": 0.26, "learning_rate": 4.642305338102633e-06, "logits/chosen": -2.261868715286255, "logits/rejected": -2.3374361991882324, "logps/chosen": -161.90980529785156, "logps/rejected": -198.79263305664062, "loss": 0.0164, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0008012913167476654, "rewards/margins": 0.09368561953306198, "rewards/rejected": -0.09448691457509995, "step": 3900 }, { "epoch": 0.26, "eval_logits/chosen": -2.2779669761657715, "eval_logits/rejected": -2.0932793617248535, "eval_logps/chosen": -233.7920684814453, "eval_logps/rejected": -229.32948303222656, "eval_loss": 0.027970939874649048, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -0.008935615420341492, "eval_rewards/margins": 0.0796523168683052, "eval_rewards/rejected": -0.0885879322886467, "eval_runtime": 712.9093, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 3900 }, { "epoch": 0.26, "learning_rate": 4.639356603264953e-06, "logits/chosen": -2.3225855827331543, "logits/rejected": -2.0903942584991455, "logps/chosen": -243.5463409423828, "logps/rejected": -226.7582550048828, "loss": 0.0167, "rewards/accuracies": 0.625, "rewards/chosen": -0.008572788909077644, "rewards/margins": 0.04471360892057419, "rewards/rejected": -0.05328640341758728, "step": 3910 }, { "epoch": 0.26, "learning_rate": 4.636396708495255e-06, "logits/chosen": -2.161994695663452, "logits/rejected": -2.1441612243652344, "logps/chosen": -231.1266326904297, "logps/rejected": -218.8634490966797, "loss": 0.0179, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.009657839313149452, "rewards/margins": 0.06290359795093536, "rewards/rejected": -0.07256142795085907, "step": 3920 }, { "epoch": 0.26, "learning_rate": 4.633425669233799e-06, "logits/chosen": -2.2521562576293945, "logits/rejected": -2.273059606552124, "logps/chosen": -233.33676147460938, "logps/rejected": -245.6598663330078, "loss": 0.0197, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.007449137978255749, "rewards/margins": 0.08385193347930908, "rewards/rejected": -0.07640279829502106, "step": 3930 }, { "epoch": 0.26, "learning_rate": 4.6304435009789825e-06, "logits/chosen": -2.275355815887451, "logits/rejected": -2.0570201873779297, "logps/chosen": -236.5664825439453, "logps/rejected": -185.60482788085938, "loss": 0.0252, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00470371451228857, "rewards/margins": 0.09974116086959839, "rewards/rejected": -0.09503744542598724, "step": 3940 }, { "epoch": 0.26, "learning_rate": 4.627450219287256e-06, "logits/chosen": -2.3196358680725098, "logits/rejected": -2.17457914352417, "logps/chosen": -184.38966369628906, "logps/rejected": -173.1756134033203, "loss": 0.0339, "rewards/accuracies": 0.625, "rewards/chosen": -0.014152881689369678, "rewards/margins": 0.05648508667945862, "rewards/rejected": -0.07063796371221542, "step": 3950 }, { "epoch": 0.26, "learning_rate": 4.624445839773042e-06, "logits/chosen": -2.2729854583740234, "logits/rejected": -2.2008156776428223, "logps/chosen": -174.69515991210938, "logps/rejected": -179.75315856933594, "loss": 0.0541, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.023419296368956566, "rewards/margins": 0.03060915693640709, "rewards/rejected": -0.054028451442718506, "step": 3960 }, { "epoch": 0.26, "learning_rate": 4.621430378108656e-06, "logits/chosen": -2.2478737831115723, "logits/rejected": -2.0963854789733887, "logps/chosen": -260.9545593261719, "logps/rejected": -273.7813415527344, "loss": 0.0124, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.015981189906597137, "rewards/margins": 0.09795571863651276, "rewards/rejected": -0.11393691599369049, "step": 3970 }, { "epoch": 0.26, "learning_rate": 4.618403850024223e-06, "logits/chosen": -2.166574478149414, "logits/rejected": -1.9380964040756226, "logps/chosen": -255.82699584960938, "logps/rejected": -221.594482421875, "loss": 0.0273, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.001684072194620967, "rewards/margins": 0.062149059027433395, "rewards/rejected": -0.06383313238620758, "step": 3980 }, { "epoch": 0.26, "learning_rate": 4.615366271307598e-06, "logits/chosen": -2.309037923812866, "logits/rejected": -2.1538918018341064, "logps/chosen": -198.5775604248047, "logps/rejected": -200.23765563964844, "loss": 0.0227, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03317294269800186, "rewards/margins": 0.06776221096515656, "rewards/rejected": -0.10093514621257782, "step": 3990 }, { "epoch": 0.26, "learning_rate": 4.612317657804277e-06, "logits/chosen": -2.1642422676086426, "logits/rejected": -2.2179436683654785, "logps/chosen": -157.81178283691406, "logps/rejected": -228.7684783935547, "loss": 0.0445, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05289377644658089, "rewards/margins": 0.09804403781890869, "rewards/rejected": -0.15093779563903809, "step": 4000 }, { "epoch": 0.26, "eval_logits/chosen": -2.270484209060669, "eval_logits/rejected": -2.0859897136688232, "eval_logps/chosen": -239.2251434326172, "eval_logps/rejected": -236.20582580566406, "eval_loss": 0.0270866546779871, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": -0.03610089048743248, "eval_rewards/margins": 0.08686867356300354, "eval_rewards/rejected": -0.12296956777572632, "eval_runtime": 712.7526, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 4000 }, { "epoch": 0.26, "learning_rate": 4.6092580254173236e-06, "logits/chosen": -2.1679720878601074, "logits/rejected": -1.9357163906097412, "logps/chosen": -266.56781005859375, "logps/rejected": -269.0433044433594, "loss": 0.029, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04622514545917511, "rewards/margins": 0.10571815818548203, "rewards/rejected": -0.15194329619407654, "step": 4010 }, { "epoch": 0.26, "learning_rate": 4.606187390107277e-06, "logits/chosen": -2.138845443725586, "logits/rejected": -1.956048607826233, "logps/chosen": -242.793701171875, "logps/rejected": -218.82632446289062, "loss": 0.0396, "rewards/accuracies": 0.5, "rewards/chosen": -0.08959133177995682, "rewards/margins": 0.07512683421373367, "rewards/rejected": -0.1647181510925293, "step": 4020 }, { "epoch": 0.26, "learning_rate": 4.603105767892077e-06, "logits/chosen": -2.262580394744873, "logits/rejected": -2.1853690147399902, "logps/chosen": -207.2705841064453, "logps/rejected": -239.85885620117188, "loss": 0.0174, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0629027932882309, "rewards/margins": 0.07317514717578888, "rewards/rejected": -0.13607792556285858, "step": 4030 }, { "epoch": 0.26, "learning_rate": 4.6000131748469725e-06, "logits/chosen": -2.3523991107940674, "logits/rejected": -1.9695736169815063, "logps/chosen": -258.4837646484375, "logps/rejected": -197.4902801513672, "loss": 0.0485, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0417175218462944, "rewards/margins": 0.06662425398826599, "rewards/rejected": -0.10834179073572159, "step": 4040 }, { "epoch": 0.26, "learning_rate": 4.596909627104445e-06, "logits/chosen": -2.3855977058410645, "logits/rejected": -2.309062957763672, "logps/chosen": -263.0769348144531, "logps/rejected": -246.50241088867188, "loss": 0.0143, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07169332355260849, "rewards/margins": 0.08029817044734955, "rewards/rejected": -0.15199150145053864, "step": 4050 }, { "epoch": 0.27, "learning_rate": 4.5937951408541215e-06, "logits/chosen": -2.4470372200012207, "logits/rejected": -1.9213218688964844, "logps/chosen": -263.614990234375, "logps/rejected": -240.8369598388672, "loss": 0.0283, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05488234758377075, "rewards/margins": 0.11299531161785126, "rewards/rejected": -0.167877659201622, "step": 4060 }, { "epoch": 0.27, "learning_rate": 4.590669732342685e-06, "logits/chosen": -2.1618571281433105, "logits/rejected": -2.0170841217041016, "logps/chosen": -222.08682250976562, "logps/rejected": -243.6232452392578, "loss": 0.0521, "rewards/accuracies": 0.625, "rewards/chosen": -0.04969602823257446, "rewards/margins": 0.09821876138448715, "rewards/rejected": -0.1479147970676422, "step": 4070 }, { "epoch": 0.27, "learning_rate": 4.587533417873799e-06, "logits/chosen": -2.2454047203063965, "logits/rejected": -2.341275453567505, "logps/chosen": -205.1430206298828, "logps/rejected": -281.76739501953125, "loss": 0.0161, "rewards/accuracies": 0.75, "rewards/chosen": -0.059894900768995285, "rewards/margins": 0.0846036747097969, "rewards/rejected": -0.1444985717535019, "step": 4080 }, { "epoch": 0.27, "learning_rate": 4.584386213808016e-06, "logits/chosen": -2.246037006378174, "logits/rejected": -1.9014495611190796, "logps/chosen": -232.6446990966797, "logps/rejected": -198.4827880859375, "loss": 0.0454, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.054828494787216187, "rewards/margins": 0.05807109922170639, "rewards/rejected": -0.11289960145950317, "step": 4090 }, { "epoch": 0.27, "learning_rate": 4.581228136562693e-06, "logits/chosen": -2.132202625274658, "logits/rejected": -2.2550837993621826, "logps/chosen": -247.276611328125, "logps/rejected": -227.5145263671875, "loss": 0.0176, "rewards/accuracies": 0.5, "rewards/chosen": -0.04548081010580063, "rewards/margins": 0.02961266040802002, "rewards/rejected": -0.07509347796440125, "step": 4100 }, { "epoch": 0.27, "eval_logits/chosen": -2.3252735137939453, "eval_logits/rejected": -2.1376659870147705, "eval_logps/chosen": -241.9268798828125, "eval_logps/rejected": -234.1422882080078, "eval_loss": 0.028915749862790108, "eval_rewards/accuracies": 0.6470000147819519, "eval_rewards/chosen": -0.049609627574682236, "eval_rewards/margins": 0.06304233521223068, "eval_rewards/rejected": -0.11265195906162262, "eval_runtime": 712.9992, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 4100 }, { "epoch": 0.27, "learning_rate": 4.578059202611909e-06, "logits/chosen": -2.345768690109253, "logits/rejected": -2.1116530895233154, "logps/chosen": -263.25579833984375, "logps/rejected": -259.384765625, "loss": 0.0196, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.030484404414892197, "rewards/margins": 0.05442474037408829, "rewards/rejected": -0.08490914851427078, "step": 4110 }, { "epoch": 0.27, "learning_rate": 4.574879428486376e-06, "logits/chosen": -2.3218655586242676, "logits/rejected": -2.0297176837921143, "logps/chosen": -221.8189697265625, "logps/rejected": -232.28121948242188, "loss": 0.011, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04451214522123337, "rewards/margins": 0.0669221431016922, "rewards/rejected": -0.11143428087234497, "step": 4120 }, { "epoch": 0.27, "learning_rate": 4.571688830773352e-06, "logits/chosen": -2.349295139312744, "logits/rejected": -2.2383410930633545, "logps/chosen": -230.47598266601562, "logps/rejected": -217.79031372070312, "loss": 0.0294, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.042038775980472565, "rewards/margins": 0.0359666645526886, "rewards/rejected": -0.07800544798374176, "step": 4130 }, { "epoch": 0.27, "learning_rate": 4.568487426116559e-06, "logits/chosen": -2.281845808029175, "logits/rejected": -2.297541856765747, "logps/chosen": -179.35195922851562, "logps/rejected": -179.69200134277344, "loss": 0.0405, "rewards/accuracies": 0.625, "rewards/chosen": -0.0369006022810936, "rewards/margins": 0.03936711698770523, "rewards/rejected": -0.07626771181821823, "step": 4140 }, { "epoch": 0.27, "learning_rate": 4.565275231216092e-06, "logits/chosen": -2.1967597007751465, "logits/rejected": -2.1704554557800293, "logps/chosen": -155.69821166992188, "logps/rejected": -211.3583526611328, "loss": 0.0191, "rewards/accuracies": 0.5, "rewards/chosen": -0.0217736829072237, "rewards/margins": 0.0483512282371521, "rewards/rejected": -0.07012491673231125, "step": 4150 }, { "epoch": 0.27, "learning_rate": 4.562052262828331e-06, "logits/chosen": -2.2314505577087402, "logits/rejected": -2.086259603500366, "logps/chosen": -208.48379516601562, "logps/rejected": -217.4300537109375, "loss": 0.0477, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04836384207010269, "rewards/margins": 0.07111117243766785, "rewards/rejected": -0.11947502195835114, "step": 4160 }, { "epoch": 0.27, "learning_rate": 4.558818537765861e-06, "logits/chosen": -2.4081733226776123, "logits/rejected": -2.198742151260376, "logps/chosen": -245.94277954101562, "logps/rejected": -225.3533477783203, "loss": 0.0385, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04964191094040871, "rewards/margins": 0.07139302790164948, "rewards/rejected": -0.12103494256734848, "step": 4170 }, { "epoch": 0.27, "learning_rate": 4.555574072897374e-06, "logits/chosen": -2.3245797157287598, "logits/rejected": -2.3242580890655518, "logps/chosen": -213.04324340820312, "logps/rejected": -226.7286376953125, "loss": 0.0309, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.055409859865903854, "rewards/margins": 0.0797853022813797, "rewards/rejected": -0.13519516587257385, "step": 4180 }, { "epoch": 0.27, "learning_rate": 4.552318885147589e-06, "logits/chosen": -2.433678388595581, "logits/rejected": -2.0744783878326416, "logps/chosen": -249.4149627685547, "logps/rejected": -204.31393432617188, "loss": 0.0253, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04248654097318649, "rewards/margins": 0.07144276797771454, "rewards/rejected": -0.11392930895090103, "step": 4190 }, { "epoch": 0.27, "learning_rate": 4.549052991497159e-06, "logits/chosen": -2.293544292449951, "logits/rejected": -2.262787103652954, "logps/chosen": -190.35092163085938, "logps/rejected": -205.44454956054688, "loss": 0.0244, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.050918687134981155, "rewards/margins": 0.0733739361166954, "rewards/rejected": -0.12429263442754745, "step": 4200 }, { "epoch": 0.27, "eval_logits/chosen": -2.312159776687622, "eval_logits/rejected": -2.1259677410125732, "eval_logps/chosen": -237.25537109375, "eval_logps/rejected": -231.3834991455078, "eval_loss": 0.029266033321619034, "eval_rewards/accuracies": 0.6424999833106995, "eval_rewards/chosen": -0.026252107694745064, "eval_rewards/margins": 0.07260581851005554, "eval_rewards/rejected": -0.09885792434215546, "eval_runtime": 712.4499, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 4200 }, { "epoch": 0.28, "learning_rate": 4.545776408982585e-06, "logits/chosen": -2.2285044193267822, "logits/rejected": -2.2255208492279053, "logps/chosen": -234.24560546875, "logps/rejected": -239.6706085205078, "loss": 0.0283, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.012507572770118713, "rewards/margins": 0.07219143211841583, "rewards/rejected": -0.08469899743795395, "step": 4210 }, { "epoch": 0.28, "learning_rate": 4.542489154696128e-06, "logits/chosen": -2.4383578300476074, "logits/rejected": -2.077122688293457, "logps/chosen": -265.59991455078125, "logps/rejected": -217.50387573242188, "loss": 0.0137, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.010602285154163837, "rewards/margins": 0.06577347218990326, "rewards/rejected": -0.0551711842417717, "step": 4220 }, { "epoch": 0.28, "learning_rate": 4.5391912457857145e-06, "logits/chosen": -2.3199622631073, "logits/rejected": -2.06068754196167, "logps/chosen": -266.38726806640625, "logps/rejected": -232.57876586914062, "loss": 0.0256, "rewards/accuracies": 0.625, "rewards/chosen": -0.0070883058942854404, "rewards/margins": 0.06650589406490326, "rewards/rejected": -0.07359420508146286, "step": 4230 }, { "epoch": 0.28, "learning_rate": 4.535882699454854e-06, "logits/chosen": -2.3102855682373047, "logits/rejected": -2.185673713684082, "logps/chosen": -274.146240234375, "logps/rejected": -315.23004150390625, "loss": 0.0245, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.015475980937480927, "rewards/margins": 0.09710557758808136, "rewards/rejected": -0.11258156597614288, "step": 4240 }, { "epoch": 0.28, "learning_rate": 4.532563532962546e-06, "logits/chosen": -2.357006072998047, "logits/rejected": -2.4395248889923096, "logps/chosen": -199.3505859375, "logps/rejected": -236.73263549804688, "loss": 0.0324, "rewards/accuracies": 0.625, "rewards/chosen": -0.0472816601395607, "rewards/margins": 0.07893550395965576, "rewards/rejected": -0.12621717154979706, "step": 4250 }, { "epoch": 0.28, "learning_rate": 4.529233763623187e-06, "logits/chosen": -2.3164355754852295, "logits/rejected": -2.028876304626465, "logps/chosen": -213.58236694335938, "logps/rejected": -181.50094604492188, "loss": 0.0295, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.057409435510635376, "rewards/margins": 0.07939110696315765, "rewards/rejected": -0.13680054247379303, "step": 4260 }, { "epoch": 0.28, "learning_rate": 4.5258934088064854e-06, "logits/chosen": -2.202575206756592, "logits/rejected": -1.7992494106292725, "logps/chosen": -235.61380004882812, "logps/rejected": -203.1489715576172, "loss": 0.0227, "rewards/accuracies": 0.625, "rewards/chosen": -0.07816511392593384, "rewards/margins": 0.1111634373664856, "rewards/rejected": -0.18932855129241943, "step": 4270 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -2.3277671337127686, "logits/rejected": -2.0657238960266113, "logps/chosen": -298.01959228515625, "logps/rejected": -221.0654754638672, "loss": 0.0125, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03683526813983917, "rewards/margins": 0.09843374788761139, "rewards/rejected": -0.13526901602745056, "step": 4280 }, { "epoch": 0.28, "learning_rate": 4.519181012495892e-06, "logits/chosen": -2.32460355758667, "logits/rejected": -2.201780080795288, "logps/chosen": -246.9171905517578, "logps/rejected": -235.5345458984375, "loss": 0.0312, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02348960004746914, "rewards/margins": 0.08845724910497665, "rewards/rejected": -0.11194685846567154, "step": 4290 }, { "epoch": 0.28, "learning_rate": 4.515809006017147e-06, "logits/chosen": -2.2573161125183105, "logits/rejected": -1.95000422000885, "logps/chosen": -232.3976287841797, "logps/rejected": -212.3801727294922, "loss": 0.0378, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.005059923976659775, "rewards/margins": 0.0774591714143753, "rewards/rejected": -0.07239924371242523, "step": 4300 }, { "epoch": 0.28, "eval_logits/chosen": -2.2686073780059814, "eval_logits/rejected": -2.0843474864959717, "eval_logps/chosen": -231.23663330078125, "eval_logps/rejected": -226.155029296875, "eval_loss": 0.026677994057536125, "eval_rewards/accuracies": 0.6439999938011169, "eval_rewards/chosen": 0.0038415947929024696, "eval_rewards/margins": 0.07655727863311768, "eval_rewards/rejected": -0.07271569967269897, "eval_runtime": 711.9298, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 4300 }, { "epoch": 0.28, "learning_rate": 4.512426484091171e-06, "logits/chosen": -2.40342116355896, "logits/rejected": -2.0684916973114014, "logps/chosen": -271.4616394042969, "logps/rejected": -245.02908325195312, "loss": 0.0383, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.030597597360610962, "rewards/margins": 0.049302391707897186, "rewards/rejected": -0.018704798072576523, "step": 4310 }, { "epoch": 0.28, "learning_rate": 4.509033464362858e-06, "logits/chosen": -2.1147308349609375, "logits/rejected": -2.1781909465789795, "logps/chosen": -236.3292999267578, "logps/rejected": -266.4777526855469, "loss": 0.0186, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.024682987481355667, "rewards/margins": 0.07958771288394928, "rewards/rejected": -0.05490473657846451, "step": 4320 }, { "epoch": 0.28, "learning_rate": 4.505629964531857e-06, "logits/chosen": -2.3749783039093018, "logits/rejected": -2.1724915504455566, "logps/chosen": -222.01260375976562, "logps/rejected": -207.80874633789062, "loss": 0.0288, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.005958900786936283, "rewards/margins": 0.0830468013882637, "rewards/rejected": -0.0770878940820694, "step": 4330 }, { "epoch": 0.28, "learning_rate": 4.502216002352492e-06, "logits/chosen": -2.385356903076172, "logits/rejected": -2.1611905097961426, "logps/chosen": -165.23251342773438, "logps/rejected": -158.66131591796875, "loss": 0.0438, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.010958021506667137, "rewards/margins": 0.06421489268541336, "rewards/rejected": -0.07517291605472565, "step": 4340 }, { "epoch": 0.28, "learning_rate": 4.498791595633663e-06, "logits/chosen": -2.237499952316284, "logits/rejected": -1.8516120910644531, "logps/chosen": -258.93536376953125, "logps/rejected": -184.10693359375, "loss": 0.0285, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01724466308951378, "rewards/margins": 0.061860036104917526, "rewards/rejected": -0.04461536929011345, "step": 4350 }, { "epoch": 0.29, "learning_rate": 4.495356762238751e-06, "logits/chosen": -2.484279155731201, "logits/rejected": -2.0068490505218506, "logps/chosen": -278.0464782714844, "logps/rejected": -193.75340270996094, "loss": 0.0151, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.02575724944472313, "rewards/margins": 0.06579472124576569, "rewards/rejected": -0.04003746062517166, "step": 4360 }, { "epoch": 0.29, "learning_rate": 4.491911520085532e-06, "logits/chosen": -2.06331205368042, "logits/rejected": -1.9536161422729492, "logps/chosen": -196.34120178222656, "logps/rejected": -222.7769317626953, "loss": 0.024, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.020332731306552887, "rewards/margins": 0.07831554114818573, "rewards/rejected": -0.05798282101750374, "step": 4370 }, { "epoch": 0.29, "learning_rate": 4.488455887146075e-06, "logits/chosen": -2.1633567810058594, "logits/rejected": -2.1489412784576416, "logps/chosen": -166.67652893066406, "logps/rejected": -202.71359252929688, "loss": 0.0312, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.017177890986204147, "rewards/margins": 0.11436694860458374, "rewards/rejected": -0.0971890538930893, "step": 4380 }, { "epoch": 0.29, "learning_rate": 4.484989881446654e-06, "logits/chosen": -2.419445276260376, "logits/rejected": -2.214813470840454, "logps/chosen": -202.06753540039062, "logps/rejected": -191.85040283203125, "loss": 0.0379, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0020423284731805325, "rewards/margins": 0.03863789141178131, "rewards/rejected": -0.03659556061029434, "step": 4390 }, { "epoch": 0.29, "learning_rate": 4.481513521067654e-06, "logits/chosen": -2.3927712440490723, "logits/rejected": -2.035768747329712, "logps/chosen": -226.9812469482422, "logps/rejected": -210.1649627685547, "loss": 0.0135, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.011033494956791401, "rewards/margins": 0.09299737960100174, "rewards/rejected": -0.10403086990118027, "step": 4400 }, { "epoch": 0.29, "eval_logits/chosen": -2.2857038974761963, "eval_logits/rejected": -2.099775791168213, "eval_logps/chosen": -236.32452392578125, "eval_logps/rejected": -232.56199645996094, "eval_loss": 0.02732119709253311, "eval_rewards/accuracies": 0.6434999704360962, "eval_rewards/chosen": -0.021597841754555702, "eval_rewards/margins": 0.083152636885643, "eval_rewards/rejected": -0.10475046932697296, "eval_runtime": 713.6572, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 4400 }, { "epoch": 0.29, "learning_rate": 4.478026824143473e-06, "logits/chosen": -2.292775869369507, "logits/rejected": -2.1565842628479004, "logps/chosen": -269.4031677246094, "logps/rejected": -234.95068359375, "loss": 0.0236, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.016707628965377808, "rewards/margins": 0.11057498306035995, "rewards/rejected": -0.12728263437747955, "step": 4410 }, { "epoch": 0.29, "learning_rate": 4.474529808862429e-06, "logits/chosen": -2.1790809631347656, "logits/rejected": -2.103480577468872, "logps/chosen": -191.47373962402344, "logps/rejected": -225.77734375, "loss": 0.0434, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.013267258182168007, "rewards/margins": 0.08339644223451614, "rewards/rejected": -0.096663698554039, "step": 4420 }, { "epoch": 0.29, "learning_rate": 4.471022493466669e-06, "logits/chosen": -2.2934823036193848, "logits/rejected": -1.964529037475586, "logps/chosen": -304.9217529296875, "logps/rejected": -239.77035522460938, "loss": 0.0235, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.014913300052285194, "rewards/margins": 0.05929947644472122, "rewards/rejected": -0.07421278208494186, "step": 4430 }, { "epoch": 0.29, "learning_rate": 4.467504896252066e-06, "logits/chosen": -2.30965256690979, "logits/rejected": -2.1960270404815674, "logps/chosen": -252.5443115234375, "logps/rejected": -247.04800415039062, "loss": 0.0238, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.014214863069355488, "rewards/margins": 0.11622963845729828, "rewards/rejected": -0.1304444968700409, "step": 4440 }, { "epoch": 0.29, "learning_rate": 4.463977035568132e-06, "logits/chosen": -2.175062656402588, "logits/rejected": -2.41196346282959, "logps/chosen": -215.9466094970703, "logps/rejected": -276.2236633300781, "loss": 0.0215, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.017819250002503395, "rewards/margins": 0.04285722225904465, "rewards/rejected": -0.060676466673612595, "step": 4450 }, { "epoch": 0.29, "learning_rate": 4.460438929817914e-06, "logits/chosen": -2.243072986602783, "logits/rejected": -2.0973448753356934, "logps/chosen": -207.04464721679688, "logps/rejected": -216.0046844482422, "loss": 0.0273, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.00312446360476315, "rewards/margins": 0.06624321639537811, "rewards/rejected": -0.0693676769733429, "step": 4460 }, { "epoch": 0.29, "learning_rate": 4.456890597457907e-06, "logits/chosen": -2.094747543334961, "logits/rejected": -2.140568256378174, "logps/chosen": -218.9592742919922, "logps/rejected": -252.67184448242188, "loss": 0.0241, "rewards/accuracies": 0.5, "rewards/chosen": -0.03411801904439926, "rewards/margins": 0.07754088193178177, "rewards/rejected": -0.11165890842676163, "step": 4470 }, { "epoch": 0.29, "learning_rate": 4.453332056997951e-06, "logits/chosen": -2.213270664215088, "logits/rejected": -2.2702724933624268, "logps/chosen": -181.20423889160156, "logps/rejected": -197.28903198242188, "loss": 0.0182, "rewards/accuracies": 0.625, "rewards/chosen": -0.0062237693928182125, "rewards/margins": 0.10204926878213882, "rewards/rejected": -0.10827304422855377, "step": 4480 }, { "epoch": 0.29, "learning_rate": 4.449763327001134e-06, "logits/chosen": -2.249997615814209, "logits/rejected": -2.156466007232666, "logps/chosen": -191.21969604492188, "logps/rejected": -233.1490478515625, "loss": 0.0268, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.012287397868931293, "rewards/margins": 0.06756951659917831, "rewards/rejected": -0.07985690981149673, "step": 4490 }, { "epoch": 0.29, "learning_rate": 4.446184426083702e-06, "logits/chosen": -2.2206637859344482, "logits/rejected": -1.9963423013687134, "logps/chosen": -199.42367553710938, "logps/rejected": -231.6684112548828, "loss": 0.0143, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.029296617954969406, "rewards/margins": 0.11938565969467163, "rewards/rejected": -0.14868226647377014, "step": 4500 }, { "epoch": 0.29, "eval_logits/chosen": -2.284329652786255, "eval_logits/rejected": -2.0987653732299805, "eval_logps/chosen": -238.03781127929688, "eval_logps/rejected": -232.74058532714844, "eval_loss": 0.026792826130986214, "eval_rewards/accuracies": 0.637499988079071, "eval_rewards/chosen": -0.030164305120706558, "eval_rewards/margins": 0.07547909766435623, "eval_rewards/rejected": -0.10564339905977249, "eval_runtime": 712.6767, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 4500 }, { "epoch": 0.3, "learning_rate": 4.442595372914954e-06, "logits/chosen": -2.3305094242095947, "logits/rejected": -2.0600168704986572, "logps/chosen": -239.54110717773438, "logps/rejected": -170.85906982421875, "loss": 0.0155, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.012803817167878151, "rewards/margins": 0.08296112716197968, "rewards/rejected": -0.09576494991779327, "step": 4510 }, { "epoch": 0.3, "learning_rate": 4.43899618621715e-06, "logits/chosen": -2.2653181552886963, "logits/rejected": -2.0612456798553467, "logps/chosen": -261.11163330078125, "logps/rejected": -285.72015380859375, "loss": 0.0401, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0502888560295105, "rewards/margins": 0.10705902427434921, "rewards/rejected": -0.1573478728532791, "step": 4520 }, { "epoch": 0.3, "learning_rate": 4.4353868847654105e-06, "logits/chosen": -2.385094165802002, "logits/rejected": -2.149104595184326, "logps/chosen": -249.2441864013672, "logps/rejected": -231.818359375, "loss": 0.0321, "rewards/accuracies": 0.5, "rewards/chosen": -0.016929741948843002, "rewards/margins": 0.0632086917757988, "rewards/rejected": -0.08013845235109329, "step": 4530 }, { "epoch": 0.3, "learning_rate": 4.43176748738762e-06, "logits/chosen": -2.2918667793273926, "logits/rejected": -2.0501952171325684, "logps/chosen": -240.1075897216797, "logps/rejected": -264.6262512207031, "loss": 0.0155, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04079819470643997, "rewards/margins": 0.10065152496099472, "rewards/rejected": -0.1414497196674347, "step": 4540 }, { "epoch": 0.3, "learning_rate": 4.4281380129643295e-06, "logits/chosen": -2.201007127761841, "logits/rejected": -2.040647506713867, "logps/chosen": -233.6107940673828, "logps/rejected": -243.1392822265625, "loss": 0.0275, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.01930316351354122, "rewards/margins": 0.10001279413700104, "rewards/rejected": -0.11931595951318741, "step": 4550 }, { "epoch": 0.3, "learning_rate": 4.424498480428654e-06, "logits/chosen": -2.240109443664551, "logits/rejected": -2.133995771408081, "logps/chosen": -252.0578155517578, "logps/rejected": -216.51962280273438, "loss": 0.0364, "rewards/accuracies": 0.5, "rewards/chosen": -0.02392945997416973, "rewards/margins": 0.02243615873157978, "rewards/rejected": -0.04636561498045921, "step": 4560 }, { "epoch": 0.3, "learning_rate": 4.420848908766178e-06, "logits/chosen": -2.35581636428833, "logits/rejected": -2.28523588180542, "logps/chosen": -208.04129028320312, "logps/rejected": -228.0386199951172, "loss": 0.0295, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.004072178620845079, "rewards/margins": 0.05558561533689499, "rewards/rejected": -0.05965778976678848, "step": 4570 }, { "epoch": 0.3, "learning_rate": 4.417189317014855e-06, "logits/chosen": -2.1987602710723877, "logits/rejected": -2.439194917678833, "logps/chosen": -203.37686157226562, "logps/rejected": -242.19515991210938, "loss": 0.0299, "rewards/accuracies": 0.5, "rewards/chosen": -0.012904942035675049, "rewards/margins": 0.04060705006122589, "rewards/rejected": -0.05351199582219124, "step": 4580 }, { "epoch": 0.3, "learning_rate": 4.41351972426491e-06, "logits/chosen": -2.1169800758361816, "logits/rejected": -2.1452808380126953, "logps/chosen": -253.7069854736328, "logps/rejected": -320.93609619140625, "loss": 0.0138, "rewards/accuracies": 0.625, "rewards/chosen": -0.03483807295560837, "rewards/margins": 0.06588040292263031, "rewards/rejected": -0.10071848332881927, "step": 4590 }, { "epoch": 0.3, "learning_rate": 4.409840149658735e-06, "logits/chosen": -2.2177650928497314, "logits/rejected": -1.9569743871688843, "logps/chosen": -286.93402099609375, "logps/rejected": -251.4782257080078, "loss": 0.0268, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.012083860114216805, "rewards/margins": 0.07347994297742844, "rewards/rejected": -0.08556380867958069, "step": 4600 }, { "epoch": 0.3, "eval_logits/chosen": -2.2916858196258545, "eval_logits/rejected": -2.106189489364624, "eval_logps/chosen": -236.19700622558594, "eval_logps/rejected": -229.6837615966797, "eval_loss": 0.027024326846003532, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": -0.02096020244061947, "eval_rewards/margins": 0.06939905881881714, "eval_rewards/rejected": -0.09035927057266235, "eval_runtime": 715.4942, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.398, "step": 4600 }, { "epoch": 0.3, "learning_rate": 4.4061506123907925e-06, "logits/chosen": -2.209913969039917, "logits/rejected": -2.0499978065490723, "logps/chosen": -270.0508728027344, "logps/rejected": -241.28359985351562, "loss": 0.0317, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.029795369133353233, "rewards/margins": 0.05769450217485428, "rewards/rejected": -0.08748986572027206, "step": 4610 }, { "epoch": 0.3, "learning_rate": 4.402451131707519e-06, "logits/chosen": -2.403371810913086, "logits/rejected": -1.9444561004638672, "logps/chosen": -213.96875, "logps/rejected": -159.59783935546875, "loss": 0.0126, "rewards/accuracies": 0.75, "rewards/chosen": -0.022516760975122452, "rewards/margins": 0.10555239021778107, "rewards/rejected": -0.12806914746761322, "step": 4620 }, { "epoch": 0.3, "learning_rate": 4.398741726907215e-06, "logits/chosen": -2.4314961433410645, "logits/rejected": -2.115912437438965, "logps/chosen": -281.1767272949219, "logps/rejected": -257.37591552734375, "loss": 0.023, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.009596886113286018, "rewards/margins": 0.08262918889522552, "rewards/rejected": -0.0922260731458664, "step": 4630 }, { "epoch": 0.3, "learning_rate": 4.395022417339955e-06, "logits/chosen": -2.1874938011169434, "logits/rejected": -2.2346978187561035, "logps/chosen": -219.12521362304688, "logps/rejected": -241.1179656982422, "loss": 0.0379, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06250091642141342, "rewards/margins": 0.06751805543899536, "rewards/rejected": -0.13001897931098938, "step": 4640 }, { "epoch": 0.3, "learning_rate": 4.391293222407479e-06, "logits/chosen": -2.2946298122406006, "logits/rejected": -2.295693874359131, "logps/chosen": -140.21095275878906, "logps/rejected": -170.3481903076172, "loss": 0.0228, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.018268903717398643, "rewards/margins": 0.059536147862672806, "rewards/rejected": -0.0778050571680069, "step": 4650 }, { "epoch": 0.3, "learning_rate": 4.387554161563094e-06, "logits/chosen": -2.291577100753784, "logits/rejected": -2.2055227756500244, "logps/chosen": -207.60568237304688, "logps/rejected": -217.01821899414062, "loss": 0.0232, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04370753467082977, "rewards/margins": 0.10616093873977661, "rewards/rejected": -0.1498684585094452, "step": 4660 }, { "epoch": 0.31, "learning_rate": 4.383805254311575e-06, "logits/chosen": -2.48099684715271, "logits/rejected": -2.1088850498199463, "logps/chosen": -266.7301330566406, "logps/rejected": -234.53079223632812, "loss": 0.0447, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0529455728828907, "rewards/margins": 0.06683431565761566, "rewards/rejected": -0.11977989971637726, "step": 4670 }, { "epoch": 0.31, "learning_rate": 4.380046520209056e-06, "logits/chosen": -2.330359935760498, "logits/rejected": -1.9653619527816772, "logps/chosen": -211.83743286132812, "logps/rejected": -205.95068359375, "loss": 0.0206, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.057855475693941116, "rewards/margins": 0.08976194262504578, "rewards/rejected": -0.1476174145936966, "step": 4680 }, { "epoch": 0.31, "learning_rate": 4.376277978862936e-06, "logits/chosen": -2.192985773086548, "logits/rejected": -1.9010270833969116, "logps/chosen": -238.52273559570312, "logps/rejected": -210.4039764404297, "loss": 0.0262, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06267054378986359, "rewards/margins": 0.06206362694501877, "rewards/rejected": -0.12473416328430176, "step": 4690 }, { "epoch": 0.31, "learning_rate": 4.372499649931774e-06, "logits/chosen": -2.141416072845459, "logits/rejected": -2.292172908782959, "logps/chosen": -223.7112579345703, "logps/rejected": -259.6845397949219, "loss": 0.026, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07309852540493011, "rewards/margins": 0.14008431136608124, "rewards/rejected": -0.21318283677101135, "step": 4700 }, { "epoch": 0.31, "eval_logits/chosen": -2.2986583709716797, "eval_logits/rejected": -2.1118977069854736, "eval_logps/chosen": -248.163818359375, "eval_logps/rejected": -244.79864501953125, "eval_loss": 0.02719779685139656, "eval_rewards/accuracies": 0.6495000123977661, "eval_rewards/chosen": -0.08079422265291214, "eval_rewards/margins": 0.08513953536748886, "eval_rewards/rejected": -0.165933758020401, "eval_runtime": 715.3905, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 4700 }, { "epoch": 0.31, "learning_rate": 4.368711553125185e-06, "logits/chosen": -2.4827535152435303, "logits/rejected": -2.2455661296844482, "logps/chosen": -292.4847106933594, "logps/rejected": -248.26742553710938, "loss": 0.0258, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07647337019443512, "rewards/margins": 0.07113082706928253, "rewards/rejected": -0.14760419726371765, "step": 4710 }, { "epoch": 0.31, "learning_rate": 4.364913708203734e-06, "logits/chosen": -2.3889904022216797, "logits/rejected": -2.042200803756714, "logps/chosen": -302.0526428222656, "logps/rejected": -241.18408203125, "loss": 0.019, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08578182011842728, "rewards/margins": 0.0727434754371643, "rewards/rejected": -0.15852530300617218, "step": 4720 }, { "epoch": 0.31, "learning_rate": 4.361106134978844e-06, "logits/chosen": -2.2721707820892334, "logits/rejected": -2.069014072418213, "logps/chosen": -283.931884765625, "logps/rejected": -284.26580810546875, "loss": 0.0276, "rewards/accuracies": 0.625, "rewards/chosen": -0.06750717014074326, "rewards/margins": 0.06642328202724457, "rewards/rejected": -0.13393045961856842, "step": 4730 }, { "epoch": 0.31, "learning_rate": 4.357288853312681e-06, "logits/chosen": -2.3525614738464355, "logits/rejected": -2.2721951007843018, "logps/chosen": -298.66815185546875, "logps/rejected": -304.68682861328125, "loss": 0.0145, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07595177739858627, "rewards/margins": 0.05377800017595291, "rewards/rejected": -0.12972977757453918, "step": 4740 }, { "epoch": 0.31, "learning_rate": 4.353461883118056e-06, "logits/chosen": -2.251840591430664, "logits/rejected": -2.1155953407287598, "logps/chosen": -244.6503143310547, "logps/rejected": -230.669921875, "loss": 0.0379, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0767986848950386, "rewards/margins": 0.03585362061858177, "rewards/rejected": -0.11265231668949127, "step": 4750 }, { "epoch": 0.31, "learning_rate": 4.34962524435832e-06, "logits/chosen": -2.1269595623016357, "logits/rejected": -2.0253615379333496, "logps/chosen": -231.0450439453125, "logps/rejected": -213.3219757080078, "loss": 0.0403, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.056823186576366425, "rewards/margins": 0.09555148333311081, "rewards/rejected": -0.15237466990947723, "step": 4760 }, { "epoch": 0.31, "learning_rate": 4.34577895704726e-06, "logits/chosen": -2.380775213241577, "logits/rejected": -2.2097785472869873, "logps/chosen": -275.146240234375, "logps/rejected": -261.49993896484375, "loss": 0.0325, "rewards/accuracies": 0.625, "rewards/chosen": -0.06766113638877869, "rewards/margins": 0.06038862466812134, "rewards/rejected": -0.12804976105690002, "step": 4770 }, { "epoch": 0.31, "learning_rate": 4.3419230412489954e-06, "logits/chosen": -2.479979991912842, "logits/rejected": -2.248835563659668, "logps/chosen": -301.73004150390625, "logps/rejected": -234.8883514404297, "loss": 0.0449, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06181095913052559, "rewards/margins": 0.046111006289720535, "rewards/rejected": -0.10792195796966553, "step": 4780 }, { "epoch": 0.31, "learning_rate": 4.338057517077872e-06, "logits/chosen": -2.4250409603118896, "logits/rejected": -2.017948627471924, "logps/chosen": -203.47496032714844, "logps/rejected": -193.13108825683594, "loss": 0.0374, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.04716876894235611, "rewards/margins": 0.16504201292991638, "rewards/rejected": -0.21221080422401428, "step": 4790 }, { "epoch": 0.31, "learning_rate": 4.334182404698356e-06, "logits/chosen": -2.419837236404419, "logits/rejected": -1.9811451435089111, "logps/chosen": -246.45455932617188, "logps/rejected": -179.24266052246094, "loss": 0.0447, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0817376896739006, "rewards/margins": 0.05593956634402275, "rewards/rejected": -0.13767726719379425, "step": 4800 }, { "epoch": 0.31, "eval_logits/chosen": -2.2995717525482178, "eval_logits/rejected": -2.113132953643799, "eval_logps/chosen": -244.67567443847656, "eval_logps/rejected": -240.1879425048828, "eval_loss": 0.02654874138534069, "eval_rewards/accuracies": 0.6464999914169312, "eval_rewards/chosen": -0.06335365027189255, "eval_rewards/margins": 0.07952655106782913, "eval_rewards/rejected": -0.14288020133972168, "eval_runtime": 712.5859, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 4800 }, { "epoch": 0.31, "learning_rate": 4.330297724324933e-06, "logits/chosen": -2.548460006713867, "logits/rejected": -2.0144965648651123, "logps/chosen": -317.345947265625, "logps/rejected": -234.26913452148438, "loss": 0.0118, "rewards/accuracies": 0.75, "rewards/chosen": -0.044672705233097076, "rewards/margins": 0.09061713516712189, "rewards/rejected": -0.13528983294963837, "step": 4810 }, { "epoch": 0.32, "learning_rate": 4.326403496221999e-06, "logits/chosen": -2.212214469909668, "logits/rejected": -2.1176552772521973, "logps/chosen": -172.85696411132812, "logps/rejected": -159.83572387695312, "loss": 0.0338, "rewards/accuracies": 0.5, "rewards/chosen": -0.05713977664709091, "rewards/margins": 0.06879337877035141, "rewards/rejected": -0.12593314051628113, "step": 4820 }, { "epoch": 0.32, "learning_rate": 4.322499740703755e-06, "logits/chosen": -2.1856634616851807, "logits/rejected": -2.288818120956421, "logps/chosen": -200.39944458007812, "logps/rejected": -236.4174041748047, "loss": 0.0269, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04150126129388809, "rewards/margins": 0.05747325345873833, "rewards/rejected": -0.09897451102733612, "step": 4830 }, { "epoch": 0.32, "learning_rate": 4.318586478134101e-06, "logits/chosen": -2.2040696144104004, "logits/rejected": -2.1780283451080322, "logps/chosen": -198.16943359375, "logps/rejected": -173.2099151611328, "loss": 0.0309, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.027509670704603195, "rewards/margins": 0.07780478149652481, "rewards/rejected": -0.10531443357467651, "step": 4840 }, { "epoch": 0.32, "learning_rate": 4.314663728926534e-06, "logits/chosen": -2.4546444416046143, "logits/rejected": -2.205855131149292, "logps/chosen": -269.7536315917969, "logps/rejected": -273.2572326660156, "loss": 0.0329, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06438954174518585, "rewards/margins": 0.07040944695472717, "rewards/rejected": -0.13479898869991302, "step": 4850 }, { "epoch": 0.32, "learning_rate": 4.310731513544033e-06, "logits/chosen": -2.256805896759033, "logits/rejected": -2.0916640758514404, "logps/chosen": -253.5753173828125, "logps/rejected": -223.19534301757812, "loss": 0.0317, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.053587764501571655, "rewards/margins": 0.07849793136119843, "rewards/rejected": -0.13208571076393127, "step": 4860 }, { "epoch": 0.32, "learning_rate": 4.30678985249896e-06, "logits/chosen": -2.2457427978515625, "logits/rejected": -2.185237169265747, "logps/chosen": -167.4801025390625, "logps/rejected": -205.831787109375, "loss": 0.0595, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.051650386303663254, "rewards/margins": 0.10044083744287491, "rewards/rejected": -0.15209123492240906, "step": 4870 }, { "epoch": 0.32, "learning_rate": 4.302838766352952e-06, "logits/chosen": -2.257594347000122, "logits/rejected": -2.0400383472442627, "logps/chosen": -265.0000915527344, "logps/rejected": -244.8212890625, "loss": 0.0288, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04703570157289505, "rewards/margins": 0.07806356251239777, "rewards/rejected": -0.12509925663471222, "step": 4880 }, { "epoch": 0.32, "learning_rate": 4.298878275716806e-06, "logits/chosen": -2.1690163612365723, "logits/rejected": -2.1535024642944336, "logps/chosen": -202.11068725585938, "logps/rejected": -219.4486083984375, "loss": 0.0347, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.051156263798475266, "rewards/margins": 0.1006745845079422, "rewards/rejected": -0.15183086693286896, "step": 4890 }, { "epoch": 0.32, "learning_rate": 4.294908401250386e-06, "logits/chosen": -2.38234281539917, "logits/rejected": -1.940474271774292, "logps/chosen": -226.0990753173828, "logps/rejected": -201.27613830566406, "loss": 0.0311, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05678023770451546, "rewards/margins": 0.09854695945978165, "rewards/rejected": -0.1553271859884262, "step": 4900 }, { "epoch": 0.32, "eval_logits/chosen": -2.2562031745910645, "eval_logits/rejected": -2.072751045227051, "eval_logps/chosen": -240.35696411132812, "eval_logps/rejected": -237.6829833984375, "eval_loss": 0.026891114190220833, "eval_rewards/accuracies": 0.6470000147819519, "eval_rewards/chosen": -0.04176010936498642, "eval_rewards/margins": 0.08859530091285706, "eval_rewards/rejected": -0.13035540282726288, "eval_runtime": 715.6727, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.397, "step": 4900 }, { "epoch": 0.32, "learning_rate": 4.290929163662498e-06, "logits/chosen": -2.119206428527832, "logits/rejected": -1.9020423889160156, "logps/chosen": -276.1395568847656, "logps/rejected": -236.4208221435547, "loss": 0.03, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.032021451741456985, "rewards/margins": 0.08998885005712509, "rewards/rejected": -0.12201030552387238, "step": 4910 }, { "epoch": 0.32, "learning_rate": 4.286940583710796e-06, "logits/chosen": -2.294553279876709, "logits/rejected": -2.1739342212677, "logps/chosen": -303.06158447265625, "logps/rejected": -267.4081115722656, "loss": 0.0132, "rewards/accuracies": 0.75, "rewards/chosen": -0.051564522087574005, "rewards/margins": 0.10488219559192657, "rewards/rejected": -0.15644671022891998, "step": 4920 }, { "epoch": 0.32, "learning_rate": 4.282942682201667e-06, "logits/chosen": -2.1719789505004883, "logits/rejected": -1.9019883871078491, "logps/chosen": -264.94378662109375, "logps/rejected": -242.8894500732422, "loss": 0.0372, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06417985260486603, "rewards/margins": 0.08766107261180878, "rewards/rejected": -0.15184089541435242, "step": 4930 }, { "epoch": 0.32, "learning_rate": 4.278935479990123e-06, "logits/chosen": -2.463348388671875, "logits/rejected": -2.2190678119659424, "logps/chosen": -219.1842498779297, "logps/rejected": -188.9755401611328, "loss": 0.0271, "rewards/accuracies": 0.5, "rewards/chosen": -0.07260443270206451, "rewards/margins": 0.0638137012720108, "rewards/rejected": -0.1364181488752365, "step": 4940 }, { "epoch": 0.32, "learning_rate": 4.274918997979695e-06, "logits/chosen": -2.2152819633483887, "logits/rejected": -2.24706768989563, "logps/chosen": -211.72219848632812, "logps/rejected": -226.01937866210938, "loss": 0.0393, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09043584764003754, "rewards/margins": 0.06280899047851562, "rewards/rejected": -0.15324482321739197, "step": 4950 }, { "epoch": 0.32, "learning_rate": 4.270893257122319e-06, "logits/chosen": -2.1276659965515137, "logits/rejected": -1.8986365795135498, "logps/chosen": -242.81045532226562, "logps/rejected": -299.8619689941406, "loss": 0.0196, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07044224441051483, "rewards/margins": 0.13771095871925354, "rewards/rejected": -0.20815320312976837, "step": 4960 }, { "epoch": 0.33, "learning_rate": 4.266858278418232e-06, "logits/chosen": -2.1353728771209717, "logits/rejected": -1.8892120122909546, "logps/chosen": -251.4339141845703, "logps/rejected": -242.8282470703125, "loss": 0.0278, "rewards/accuracies": 0.5, "rewards/chosen": -0.07414842396974564, "rewards/margins": 0.05872755125164986, "rewards/rejected": -0.1328759789466858, "step": 4970 }, { "epoch": 0.33, "learning_rate": 4.26281408291586e-06, "logits/chosen": -2.371889591217041, "logits/rejected": -2.068842887878418, "logps/chosen": -251.660888671875, "logps/rejected": -242.34890747070312, "loss": 0.0345, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.046618033200502396, "rewards/margins": 0.1011015996336937, "rewards/rejected": -0.14771965146064758, "step": 4980 }, { "epoch": 0.33, "learning_rate": 4.258760691711706e-06, "logits/chosen": -2.270484685897827, "logits/rejected": -2.146216630935669, "logps/chosen": -211.3184051513672, "logps/rejected": -222.37344360351562, "loss": 0.0197, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06928422302007675, "rewards/margins": 0.07823914289474487, "rewards/rejected": -0.14752335846424103, "step": 4990 }, { "epoch": 0.33, "learning_rate": 4.254698125950247e-06, "logits/chosen": -2.5137898921966553, "logits/rejected": -2.2680342197418213, "logps/chosen": -311.6083679199219, "logps/rejected": -276.7689208984375, "loss": 0.0241, "rewards/accuracies": 0.625, "rewards/chosen": -0.05296919494867325, "rewards/margins": 0.05913100391626358, "rewards/rejected": -0.11210019886493683, "step": 5000 }, { "epoch": 0.33, "eval_logits/chosen": -2.270616292953491, "eval_logits/rejected": -2.0851943492889404, "eval_logps/chosen": -244.52313232421875, "eval_logps/rejected": -241.18063354492188, "eval_loss": 0.02674746699631214, "eval_rewards/accuracies": 0.6424999833106995, "eval_rewards/chosen": -0.06259080767631531, "eval_rewards/margins": 0.08525291085243225, "eval_rewards/rejected": -0.14784371852874756, "eval_runtime": 711.8804, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 5000 }, { "epoch": 0.33, "learning_rate": 4.250626406823815e-06, "logits/chosen": -2.325876235961914, "logits/rejected": -2.0665838718414307, "logps/chosen": -231.2372589111328, "logps/rejected": -277.8995056152344, "loss": 0.0406, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07187283039093018, "rewards/margins": 0.15242110192775726, "rewards/rejected": -0.22429391741752625, "step": 5010 }, { "epoch": 0.33, "learning_rate": 4.246545555572489e-06, "logits/chosen": -2.245234727859497, "logits/rejected": -2.11665678024292, "logps/chosen": -170.9512481689453, "logps/rejected": -217.70150756835938, "loss": 0.0252, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08377556502819061, "rewards/margins": 0.1089911088347435, "rewards/rejected": -0.1927666962146759, "step": 5020 }, { "epoch": 0.33, "learning_rate": 4.242455593483992e-06, "logits/chosen": -2.338961362838745, "logits/rejected": -2.139385223388672, "logps/chosen": -236.36843872070312, "logps/rejected": -198.01046752929688, "loss": 0.0224, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0860726535320282, "rewards/margins": 0.056757472455501556, "rewards/rejected": -0.14283011853694916, "step": 5030 }, { "epoch": 0.33, "learning_rate": 4.238356541893567e-06, "logits/chosen": -2.221163511276245, "logits/rejected": -2.0838024616241455, "logps/chosen": -210.2323760986328, "logps/rejected": -213.17843627929688, "loss": 0.0203, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10049609839916229, "rewards/margins": 0.09244342148303986, "rewards/rejected": -0.19293954968452454, "step": 5040 }, { "epoch": 0.33, "learning_rate": 4.234248422183876e-06, "logits/chosen": -2.0850110054016113, "logits/rejected": -2.282813310623169, "logps/chosen": -258.5987548828125, "logps/rejected": -266.34283447265625, "loss": 0.0421, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07832027971744537, "rewards/margins": 0.06150209158658981, "rewards/rejected": -0.13982237875461578, "step": 5050 }, { "epoch": 0.33, "learning_rate": 4.230131255784884e-06, "logits/chosen": -2.5053064823150635, "logits/rejected": -2.1963772773742676, "logps/chosen": -271.65020751953125, "logps/rejected": -268.30572509765625, "loss": 0.0258, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06883566081523895, "rewards/margins": 0.0772596225142479, "rewards/rejected": -0.14609530568122864, "step": 5060 }, { "epoch": 0.33, "learning_rate": 4.226005064173748e-06, "logits/chosen": -2.295358180999756, "logits/rejected": -2.1440882682800293, "logps/chosen": -276.32818603515625, "logps/rejected": -307.0455627441406, "loss": 0.0157, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.058419596403837204, "rewards/margins": 0.06130147725343704, "rewards/rejected": -0.11972107738256454, "step": 5070 }, { "epoch": 0.33, "learning_rate": 4.2218698688747035e-06, "logits/chosen": -2.116386890411377, "logits/rejected": -1.962472915649414, "logps/chosen": -252.8600311279297, "logps/rejected": -227.2456817626953, "loss": 0.0202, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1007535308599472, "rewards/margins": 0.09757916629314423, "rewards/rejected": -0.19833272695541382, "step": 5080 }, { "epoch": 0.33, "learning_rate": 4.217725691458957e-06, "logits/chosen": -2.4443917274475098, "logits/rejected": -2.2689287662506104, "logps/chosen": -199.50445556640625, "logps/rejected": -245.4992218017578, "loss": 0.0274, "rewards/accuracies": 0.75, "rewards/chosen": -0.07640162855386734, "rewards/margins": 0.09524401277303696, "rewards/rejected": -0.1716456115245819, "step": 5090 }, { "epoch": 0.33, "learning_rate": 4.213572553544565e-06, "logits/chosen": -2.3300154209136963, "logits/rejected": -2.0967860221862793, "logps/chosen": -254.84469604492188, "logps/rejected": -264.0647277832031, "loss": 0.0183, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06936436146497726, "rewards/margins": 0.08292285352945328, "rewards/rejected": -0.15228721499443054, "step": 5100 }, { "epoch": 0.33, "eval_logits/chosen": -2.284034252166748, "eval_logits/rejected": -2.097978353500366, "eval_logps/chosen": -243.7823944091797, "eval_logps/rejected": -239.09414672851562, "eval_loss": 0.026595089584589005, "eval_rewards/accuracies": 0.6414999961853027, "eval_rewards/chosen": -0.05888722091913223, "eval_rewards/margins": 0.07852396368980408, "eval_rewards/rejected": -0.1374111771583557, "eval_runtime": 712.368, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 5100 }, { "epoch": 0.33, "learning_rate": 4.209410476796331e-06, "logits/chosen": -2.2061429023742676, "logits/rejected": -2.149639844894409, "logps/chosen": -185.64755249023438, "logps/rejected": -192.6439666748047, "loss": 0.0358, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07278040796518326, "rewards/margins": 0.08492831885814667, "rewards/rejected": -0.15770871937274933, "step": 5110 }, { "epoch": 0.33, "learning_rate": 4.205239482925686e-06, "logits/chosen": -2.0843310356140137, "logits/rejected": -2.1315224170684814, "logps/chosen": -200.5720672607422, "logps/rejected": -232.9461669921875, "loss": 0.0355, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06378359347581863, "rewards/margins": 0.051238518208265305, "rewards/rejected": -0.11502210050821304, "step": 5120 }, { "epoch": 0.34, "learning_rate": 4.201059593690577e-06, "logits/chosen": -2.3437514305114746, "logits/rejected": -2.255739688873291, "logps/chosen": -237.38357543945312, "logps/rejected": -225.99667358398438, "loss": 0.0105, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05897898226976395, "rewards/margins": 0.07021085917949677, "rewards/rejected": -0.1291898488998413, "step": 5130 }, { "epoch": 0.34, "learning_rate": 4.196870830895354e-06, "logits/chosen": -2.1586525440216064, "logits/rejected": -2.1691572666168213, "logps/chosen": -270.2343444824219, "logps/rejected": -332.2071228027344, "loss": 0.0195, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0624338760972023, "rewards/margins": 0.06232718750834465, "rewards/rejected": -0.12476108223199844, "step": 5140 }, { "epoch": 0.34, "learning_rate": 4.192673216390657e-06, "logits/chosen": -2.3356637954711914, "logits/rejected": -2.072592258453369, "logps/chosen": -243.0529327392578, "logps/rejected": -221.76089477539062, "loss": 0.0356, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05039996653795242, "rewards/margins": 0.08517764508724213, "rewards/rejected": -0.13557760417461395, "step": 5150 }, { "epoch": 0.34, "learning_rate": 4.188466772073296e-06, "logits/chosen": -2.4300613403320312, "logits/rejected": -2.1226162910461426, "logps/chosen": -234.875732421875, "logps/rejected": -223.30734252929688, "loss": 0.0189, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07175593078136444, "rewards/margins": 0.04596634581685066, "rewards/rejected": -0.1177222728729248, "step": 5160 }, { "epoch": 0.34, "learning_rate": 4.184251519886148e-06, "logits/chosen": -2.170525074005127, "logits/rejected": -2.264866828918457, "logps/chosen": -217.9490203857422, "logps/rejected": -259.9535827636719, "loss": 0.0347, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10920746624469757, "rewards/margins": 0.06674760580062866, "rewards/rejected": -0.17595505714416504, "step": 5170 }, { "epoch": 0.34, "learning_rate": 4.180027481818033e-06, "logits/chosen": -2.2939534187316895, "logits/rejected": -2.2727739810943604, "logps/chosen": -285.91156005859375, "logps/rejected": -258.90972900390625, "loss": 0.0255, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.09606721997261047, "rewards/margins": 0.0541587769985199, "rewards/rejected": -0.15022599697113037, "step": 5180 }, { "epoch": 0.34, "learning_rate": 4.175794679903602e-06, "logits/chosen": -2.3346500396728516, "logits/rejected": -2.097072124481201, "logps/chosen": -246.05990600585938, "logps/rejected": -191.774169921875, "loss": 0.0407, "rewards/accuracies": 0.625, "rewards/chosen": -0.0920952558517456, "rewards/margins": 0.09035644680261612, "rewards/rejected": -0.18245169520378113, "step": 5190 }, { "epoch": 0.34, "learning_rate": 4.171553136223222e-06, "logits/chosen": -2.28583025932312, "logits/rejected": -2.2817013263702393, "logps/chosen": -294.59600830078125, "logps/rejected": -325.22100830078125, "loss": 0.0196, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1363758146762848, "rewards/margins": 0.09957583248615265, "rewards/rejected": -0.23595163226127625, "step": 5200 }, { "epoch": 0.34, "eval_logits/chosen": -2.30310320854187, "eval_logits/rejected": -2.115068197250366, "eval_logps/chosen": -256.06915283203125, "eval_logps/rejected": -251.5511932373047, "eval_loss": 0.028050120919942856, "eval_rewards/accuracies": 0.6439999938011169, "eval_rewards/chosen": -0.12032100558280945, "eval_rewards/margins": 0.07937540858983994, "eval_rewards/rejected": -0.1996964067220688, "eval_runtime": 712.3698, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 5200 }, { "epoch": 0.34, "learning_rate": 4.167302872902865e-06, "logits/chosen": -2.3044514656066895, "logits/rejected": -2.2063915729522705, "logps/chosen": -279.16839599609375, "logps/rejected": -289.25299072265625, "loss": 0.0286, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13850130140781403, "rewards/margins": 0.11011314392089844, "rewards/rejected": -0.24861443042755127, "step": 5210 }, { "epoch": 0.34, "learning_rate": 4.163043912113985e-06, "logits/chosen": -2.3407082557678223, "logits/rejected": -2.1088497638702393, "logps/chosen": -279.55242919921875, "logps/rejected": -257.5092468261719, "loss": 0.0235, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10256153345108032, "rewards/margins": 0.05975686386227608, "rewards/rejected": -0.1623183935880661, "step": 5220 }, { "epoch": 0.34, "learning_rate": 4.15877627607341e-06, "logits/chosen": -2.1317286491394043, "logits/rejected": -2.0381064414978027, "logps/chosen": -235.8717803955078, "logps/rejected": -225.30429077148438, "loss": 0.0238, "rewards/accuracies": 0.625, "rewards/chosen": -0.09638473391532898, "rewards/margins": 0.06827443093061447, "rewards/rejected": -0.16465915739536285, "step": 5230 }, { "epoch": 0.34, "learning_rate": 4.154499987043217e-06, "logits/chosen": -2.360839366912842, "logits/rejected": -2.13840913772583, "logps/chosen": -242.048095703125, "logps/rejected": -244.0096893310547, "loss": 0.0115, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08922268450260162, "rewards/margins": 0.12070658057928085, "rewards/rejected": -0.20992930233478546, "step": 5240 }, { "epoch": 0.34, "learning_rate": 4.150215067330625e-06, "logits/chosen": -2.1910433769226074, "logits/rejected": -2.1369471549987793, "logps/chosen": -230.94845581054688, "logps/rejected": -262.9044494628906, "loss": 0.0371, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10949836671352386, "rewards/margins": 0.08959327638149261, "rewards/rejected": -0.19909165799617767, "step": 5250 }, { "epoch": 0.34, "learning_rate": 4.145921539287876e-06, "logits/chosen": -2.2304296493530273, "logits/rejected": -1.951674222946167, "logps/chosen": -209.9062042236328, "logps/rejected": -213.408935546875, "loss": 0.022, "rewards/accuracies": 0.75, "rewards/chosen": -0.09656783193349838, "rewards/margins": 0.11753030121326447, "rewards/rejected": -0.21409812569618225, "step": 5260 }, { "epoch": 0.34, "learning_rate": 4.141619425312115e-06, "logits/chosen": -2.3175554275512695, "logits/rejected": -1.9529327154159546, "logps/chosen": -228.7890625, "logps/rejected": -215.639892578125, "loss": 0.021, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10068907588720322, "rewards/margins": 0.056463442742824554, "rewards/rejected": -0.15715253353118896, "step": 5270 }, { "epoch": 0.35, "learning_rate": 4.1373087478452735e-06, "logits/chosen": -2.446063756942749, "logits/rejected": -2.040144205093384, "logps/chosen": -240.7342071533203, "logps/rejected": -214.44467163085938, "loss": 0.0402, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07746803760528564, "rewards/margins": 0.14162200689315796, "rewards/rejected": -0.2190900295972824, "step": 5280 }, { "epoch": 0.35, "learning_rate": 4.132989529373959e-06, "logits/chosen": -2.3407349586486816, "logits/rejected": -1.9027103185653687, "logps/chosen": -274.0345458984375, "logps/rejected": -211.4494171142578, "loss": 0.021, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09060852229595184, "rewards/margins": 0.08206693828105927, "rewards/rejected": -0.17267544567584991, "step": 5290 }, { "epoch": 0.35, "learning_rate": 4.128661792429331e-06, "logits/chosen": -2.353055477142334, "logits/rejected": -2.1825497150421143, "logps/chosen": -269.4848327636719, "logps/rejected": -285.455322265625, "loss": 0.0218, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0785713791847229, "rewards/margins": 0.05537046119570732, "rewards/rejected": -0.13394184410572052, "step": 5300 }, { "epoch": 0.35, "eval_logits/chosen": -2.3077661991119385, "eval_logits/rejected": -2.119899034500122, "eval_logps/chosen": -249.09693908691406, "eval_logps/rejected": -245.1140594482422, "eval_loss": 0.02842918410897255, "eval_rewards/accuracies": 0.6445000171661377, "eval_rewards/chosen": -0.08545980602502823, "eval_rewards/margins": 0.08205102384090424, "eval_rewards/rejected": -0.16751083731651306, "eval_runtime": 714.4322, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.4, "step": 5300 }, { "epoch": 0.35, "learning_rate": 4.124325559586985e-06, "logits/chosen": -2.063711643218994, "logits/rejected": -2.1065452098846436, "logps/chosen": -213.73178100585938, "logps/rejected": -230.61123657226562, "loss": 0.0493, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.12837538123130798, "rewards/margins": 0.01977720856666565, "rewards/rejected": -0.14815255999565125, "step": 5310 }, { "epoch": 0.35, "learning_rate": 4.119980853466835e-06, "logits/chosen": -2.285341262817383, "logits/rejected": -1.893385648727417, "logps/chosen": -224.71261596679688, "logps/rejected": -217.3481903076172, "loss": 0.0409, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0777909979224205, "rewards/margins": 0.09874869883060455, "rewards/rejected": -0.17653970420360565, "step": 5320 }, { "epoch": 0.35, "learning_rate": 4.115627696732997e-06, "logits/chosen": -2.2187039852142334, "logits/rejected": -2.0215742588043213, "logps/chosen": -204.32298278808594, "logps/rejected": -198.72189331054688, "loss": 0.0249, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06454373896121979, "rewards/margins": 0.06859930604696274, "rewards/rejected": -0.13314305245876312, "step": 5330 }, { "epoch": 0.35, "learning_rate": 4.111266112093668e-06, "logits/chosen": -2.3168303966522217, "logits/rejected": -2.1125686168670654, "logps/chosen": -218.3974151611328, "logps/rejected": -258.51153564453125, "loss": 0.0219, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07464858889579773, "rewards/margins": 0.10858140140771866, "rewards/rejected": -0.1832299828529358, "step": 5340 }, { "epoch": 0.35, "learning_rate": 4.1068961223010115e-06, "logits/chosen": -2.292290687561035, "logits/rejected": -2.0046138763427734, "logps/chosen": -283.95220947265625, "logps/rejected": -281.2190856933594, "loss": 0.0281, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0662577822804451, "rewards/margins": 0.09971605241298676, "rewards/rejected": -0.16597382724285126, "step": 5350 }, { "epoch": 0.35, "learning_rate": 4.102517750151034e-06, "logits/chosen": -2.3518424034118652, "logits/rejected": -2.109513282775879, "logps/chosen": -303.15936279296875, "logps/rejected": -241.0292205810547, "loss": 0.0306, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04057258740067482, "rewards/margins": 0.04591400548815727, "rewards/rejected": -0.08648659288883209, "step": 5360 }, { "epoch": 0.35, "learning_rate": 4.09813101848347e-06, "logits/chosen": -2.2338290214538574, "logits/rejected": -2.2780566215515137, "logps/chosen": -224.85452270507812, "logps/rejected": -255.304931640625, "loss": 0.0207, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02934291400015354, "rewards/margins": 0.06648119539022446, "rewards/rejected": -0.09582411497831345, "step": 5370 }, { "epoch": 0.35, "learning_rate": 4.093735950181659e-06, "logits/chosen": -2.1906943321228027, "logits/rejected": -2.094470500946045, "logps/chosen": -227.29638671875, "logps/rejected": -271.4776306152344, "loss": 0.0146, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.009115161374211311, "rewards/margins": 0.09818967431783676, "rewards/rejected": -0.10730484873056412, "step": 5380 }, { "epoch": 0.35, "learning_rate": 4.0893325681724326e-06, "logits/chosen": -2.3018577098846436, "logits/rejected": -2.2464919090270996, "logps/chosen": -268.96588134765625, "logps/rejected": -270.78460693359375, "loss": 0.0357, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.052720922976732254, "rewards/margins": 0.07049056887626648, "rewards/rejected": -0.12321150302886963, "step": 5390 }, { "epoch": 0.35, "learning_rate": 4.084920895425988e-06, "logits/chosen": -2.249305009841919, "logits/rejected": -2.2471258640289307, "logps/chosen": -249.9848175048828, "logps/rejected": -277.1879577636719, "loss": 0.0392, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06310076266527176, "rewards/margins": 0.06779120117425919, "rewards/rejected": -0.13089194893836975, "step": 5400 }, { "epoch": 0.35, "eval_logits/chosen": -2.3067572116851807, "eval_logits/rejected": -2.1202468872070312, "eval_logps/chosen": -236.23129272460938, "eval_logps/rejected": -229.63597106933594, "eval_loss": 0.027629448100924492, "eval_rewards/accuracies": 0.6320000290870667, "eval_rewards/chosen": -0.021131761372089386, "eval_rewards/margins": 0.06898857653141022, "eval_rewards/rejected": -0.0901203379034996, "eval_runtime": 712.2572, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 5400 }, { "epoch": 0.35, "learning_rate": 4.080500954955769e-06, "logits/chosen": -2.2228169441223145, "logits/rejected": -1.9339510202407837, "logps/chosen": -263.3153991699219, "logps/rejected": -259.1683349609375, "loss": 0.0337, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03721725195646286, "rewards/margins": 0.07069588452577591, "rewards/rejected": -0.10791312158107758, "step": 5410 }, { "epoch": 0.35, "learning_rate": 4.076072769818354e-06, "logits/chosen": -2.476381301879883, "logits/rejected": -2.027029514312744, "logps/chosen": -248.3318328857422, "logps/rejected": -200.37460327148438, "loss": 0.012, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.023047439754009247, "rewards/margins": 0.07004229724407196, "rewards/rejected": -0.0930897444486618, "step": 5420 }, { "epoch": 0.36, "learning_rate": 4.071636363113323e-06, "logits/chosen": -2.0812182426452637, "logits/rejected": -2.0051701068878174, "logps/chosen": -270.99114990234375, "logps/rejected": -228.74618530273438, "loss": 0.0269, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.014305288903415203, "rewards/margins": 0.05898820608854294, "rewards/rejected": -0.07329348474740982, "step": 5430 }, { "epoch": 0.36, "learning_rate": 4.067191757983146e-06, "logits/chosen": -2.0510897636413574, "logits/rejected": -2.1016201972961426, "logps/chosen": -237.39096069335938, "logps/rejected": -252.74520874023438, "loss": 0.0322, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0390765555202961, "rewards/margins": 0.09284119307994843, "rewards/rejected": -0.13191775977611542, "step": 5440 }, { "epoch": 0.36, "learning_rate": 4.062738977613063e-06, "logits/chosen": -2.253300666809082, "logits/rejected": -2.1287381649017334, "logps/chosen": -238.584228515625, "logps/rejected": -205.4900665283203, "loss": 0.0258, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.020539533346891403, "rewards/margins": 0.07649553567171097, "rewards/rejected": -0.09703507274389267, "step": 5450 }, { "epoch": 0.36, "learning_rate": 4.058278045230957e-06, "logits/chosen": -2.1818337440490723, "logits/rejected": -2.1889472007751465, "logps/chosen": -224.8425750732422, "logps/rejected": -237.52249145507812, "loss": 0.0316, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.032378651201725006, "rewards/margins": 0.055461399257183075, "rewards/rejected": -0.08784005045890808, "step": 5460 }, { "epoch": 0.36, "learning_rate": 4.053808984107235e-06, "logits/chosen": -2.401970863342285, "logits/rejected": -2.076896905899048, "logps/chosen": -234.11154174804688, "logps/rejected": -203.02137756347656, "loss": 0.046, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.006216132082045078, "rewards/margins": 0.04601501673460007, "rewards/rejected": -0.05223115161061287, "step": 5470 }, { "epoch": 0.36, "learning_rate": 4.04933181755471e-06, "logits/chosen": -2.3881890773773193, "logits/rejected": -2.3108270168304443, "logps/chosen": -207.8488006591797, "logps/rejected": -216.2948760986328, "loss": 0.0403, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.006340887397527695, "rewards/margins": 0.07829299569129944, "rewards/rejected": -0.08463388681411743, "step": 5480 }, { "epoch": 0.36, "learning_rate": 4.044846568928477e-06, "logits/chosen": -2.273111343383789, "logits/rejected": -2.347074270248413, "logps/chosen": -264.272216796875, "logps/rejected": -267.3084411621094, "loss": 0.0372, "rewards/accuracies": 0.625, "rewards/chosen": -0.01374004315584898, "rewards/margins": 0.059470999985933304, "rewards/rejected": -0.07321105152368546, "step": 5490 }, { "epoch": 0.36, "learning_rate": 4.040353261625788e-06, "logits/chosen": -2.439272403717041, "logits/rejected": -2.0590574741363525, "logps/chosen": -275.3080139160156, "logps/rejected": -258.0126037597656, "loss": 0.0095, "rewards/accuracies": 0.75, "rewards/chosen": 0.006186123006045818, "rewards/margins": 0.10667552798986435, "rewards/rejected": -0.10048942267894745, "step": 5500 }, { "epoch": 0.36, "eval_logits/chosen": -2.3002686500549316, "eval_logits/rejected": -2.1143617630004883, "eval_logps/chosen": -234.17333984375, "eval_logps/rejected": -228.3682861328125, "eval_loss": 0.027793213725090027, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.010841944254934788, "eval_rewards/margins": 0.07293994724750519, "eval_rewards/rejected": -0.083781898021698, "eval_runtime": 713.3002, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 5500 }, { "epoch": 0.36, "learning_rate": 4.035851919085936e-06, "logits/chosen": -2.2740674018859863, "logits/rejected": -2.147181510925293, "logps/chosen": -273.53179931640625, "logps/rejected": -211.1166229248047, "loss": 0.0156, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.030688535422086716, "rewards/margins": 0.08148294687271118, "rewards/rejected": -0.1121714860200882, "step": 5510 }, { "epoch": 0.36, "learning_rate": 4.031342564790128e-06, "logits/chosen": -2.2105610370635986, "logits/rejected": -2.0671443939208984, "logps/chosen": -207.53897094726562, "logps/rejected": -222.29736328125, "loss": 0.0381, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.007836338132619858, "rewards/margins": 0.09765578806400299, "rewards/rejected": -0.10549211502075195, "step": 5520 }, { "epoch": 0.36, "learning_rate": 4.026825222261367e-06, "logits/chosen": -2.228231906890869, "logits/rejected": -1.981793761253357, "logps/chosen": -187.2061767578125, "logps/rejected": -192.12667846679688, "loss": 0.0467, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07065559923648834, "rewards/margins": 0.06031841039657593, "rewards/rejected": -0.13097400963306427, "step": 5530 }, { "epoch": 0.36, "learning_rate": 4.022299915064321e-06, "logits/chosen": -2.298172950744629, "logits/rejected": -2.099940299987793, "logps/chosen": -309.3241882324219, "logps/rejected": -271.7220458984375, "loss": 0.0359, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.017381075769662857, "rewards/margins": 0.0775168314576149, "rewards/rejected": -0.09489791095256805, "step": 5540 }, { "epoch": 0.36, "learning_rate": 4.017766666805213e-06, "logits/chosen": -2.104163646697998, "logits/rejected": -2.0608696937561035, "logps/chosen": -222.52157592773438, "logps/rejected": -203.38827514648438, "loss": 0.0323, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.045006077736616135, "rewards/margins": 0.081141397356987, "rewards/rejected": -0.12614747881889343, "step": 5550 }, { "epoch": 0.36, "learning_rate": 4.013225501131684e-06, "logits/chosen": -2.3281373977661133, "logits/rejected": -2.0518734455108643, "logps/chosen": -221.41348266601562, "logps/rejected": -207.80209350585938, "loss": 0.0223, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.02017935737967491, "rewards/margins": 0.06680725514888763, "rewards/rejected": -0.08698661625385284, "step": 5560 }, { "epoch": 0.36, "learning_rate": 4.008676441732679e-06, "logits/chosen": -2.27862548828125, "logits/rejected": -1.9543254375457764, "logps/chosen": -214.8693084716797, "logps/rejected": -177.15866088867188, "loss": 0.0424, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.04287482053041458, "rewards/margins": 0.051451247185468674, "rewards/rejected": -0.09432607889175415, "step": 5570 }, { "epoch": 0.37, "learning_rate": 4.00411951233832e-06, "logits/chosen": -2.4121203422546387, "logits/rejected": -2.0738954544067383, "logps/chosen": -225.67214965820312, "logps/rejected": -198.25326538085938, "loss": 0.0355, "rewards/accuracies": 0.625, "rewards/chosen": -0.02973489835858345, "rewards/margins": 0.08627339452505112, "rewards/rejected": -0.11600829660892487, "step": 5580 }, { "epoch": 0.37, "learning_rate": 3.999554736719785e-06, "logits/chosen": -2.138333797454834, "logits/rejected": -2.0238735675811768, "logps/chosen": -297.84124755859375, "logps/rejected": -269.1844177246094, "loss": 0.0249, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02932184934616089, "rewards/margins": 0.07430565357208252, "rewards/rejected": -0.10362748801708221, "step": 5590 }, { "epoch": 0.37, "learning_rate": 3.994982138689177e-06, "logits/chosen": -2.40543794631958, "logits/rejected": -2.21651029586792, "logps/chosen": -243.3259735107422, "logps/rejected": -252.36026000976562, "loss": 0.0199, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02962910570204258, "rewards/margins": 0.054427288472652435, "rewards/rejected": -0.08405639231204987, "step": 5600 }, { "epoch": 0.37, "eval_logits/chosen": -2.2606699466705322, "eval_logits/rejected": -2.076427459716797, "eval_logps/chosen": -241.3705596923828, "eval_logps/rejected": -237.5136260986328, "eval_loss": 0.027888035401701927, "eval_rewards/accuracies": 0.6430000066757202, "eval_rewards/chosen": -0.0468280129134655, "eval_rewards/margins": 0.0826805830001831, "eval_rewards/rejected": -0.1295085847377777, "eval_runtime": 712.0846, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.404, "step": 5600 }, { "epoch": 0.37, "learning_rate": 3.990401742099408e-06, "logits/chosen": -2.0736844539642334, "logits/rejected": -2.093573570251465, "logps/chosen": -187.79244995117188, "logps/rejected": -190.85166931152344, "loss": 0.0315, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.044646747410297394, "rewards/margins": 0.05204001069068909, "rewards/rejected": -0.09668676555156708, "step": 5610 }, { "epoch": 0.37, "learning_rate": 3.985813570844072e-06, "logits/chosen": -2.210111379623413, "logits/rejected": -2.069014310836792, "logps/chosen": -307.0541687011719, "logps/rejected": -293.98638916015625, "loss": 0.0468, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0633372887969017, "rewards/margins": 0.07241859287023544, "rewards/rejected": -0.13575588166713715, "step": 5620 }, { "epoch": 0.37, "learning_rate": 3.981217648857316e-06, "logits/chosen": -2.2960948944091797, "logits/rejected": -2.1048855781555176, "logps/chosen": -179.8003692626953, "logps/rejected": -198.77951049804688, "loss": 0.0142, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.036299627274274826, "rewards/margins": 0.08598671853542328, "rewards/rejected": -0.1222863644361496, "step": 5630 }, { "epoch": 0.37, "learning_rate": 3.97661400011372e-06, "logits/chosen": -2.14565110206604, "logits/rejected": -2.1863856315612793, "logps/chosen": -250.01345825195312, "logps/rejected": -246.5854034423828, "loss": 0.0405, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03493080660700798, "rewards/margins": 0.039973922073841095, "rewards/rejected": -0.07490471750497818, "step": 5640 }, { "epoch": 0.37, "learning_rate": 3.972002648628174e-06, "logits/chosen": -2.1732308864593506, "logits/rejected": -1.8485368490219116, "logps/chosen": -284.80145263671875, "logps/rejected": -247.41360473632812, "loss": 0.0194, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.038853127509355545, "rewards/margins": 0.05152001231908798, "rewards/rejected": -0.09037313610315323, "step": 5650 }, { "epoch": 0.37, "learning_rate": 3.967383618455743e-06, "logits/chosen": -2.3239212036132812, "logits/rejected": -2.153209924697876, "logps/chosen": -239.8135528564453, "logps/rejected": -271.5573425292969, "loss": 0.0481, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06346921622753143, "rewards/margins": 0.06776650995016098, "rewards/rejected": -0.13123571872711182, "step": 5660 }, { "epoch": 0.37, "learning_rate": 3.9627569336915515e-06, "logits/chosen": -2.4477696418762207, "logits/rejected": -2.1409733295440674, "logps/chosen": -251.18179321289062, "logps/rejected": -213.980712890625, "loss": 0.0304, "rewards/accuracies": 0.625, "rewards/chosen": -0.018595131114125252, "rewards/margins": 0.09737774729728699, "rewards/rejected": -0.1159728616476059, "step": 5670 }, { "epoch": 0.37, "learning_rate": 3.9581226184706555e-06, "logits/chosen": -2.294837713241577, "logits/rejected": -2.39911150932312, "logps/chosen": -196.95144653320312, "logps/rejected": -279.7997131347656, "loss": 0.0149, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.011430233716964722, "rewards/margins": 0.06075301766395569, "rewards/rejected": -0.07218325138092041, "step": 5680 }, { "epoch": 0.37, "learning_rate": 3.953480696967912e-06, "logits/chosen": -1.9522489309310913, "logits/rejected": -2.1829464435577393, "logps/chosen": -219.80093383789062, "logps/rejected": -271.7539978027344, "loss": 0.0209, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05214661359786987, "rewards/margins": 0.07753698527812958, "rewards/rejected": -0.12968358397483826, "step": 5690 }, { "epoch": 0.37, "learning_rate": 3.948831193397857e-06, "logits/chosen": -2.1683051586151123, "logits/rejected": -2.150635242462158, "logps/chosen": -172.88192749023438, "logps/rejected": -194.87937927246094, "loss": 0.0237, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.034332599490880966, "rewards/margins": 0.08085787296295166, "rewards/rejected": -0.11519046872854233, "step": 5700 }, { "epoch": 0.37, "eval_logits/chosen": -2.2279889583587646, "eval_logits/rejected": -2.0451738834381104, "eval_logps/chosen": -238.47406005859375, "eval_logps/rejected": -235.90609741210938, "eval_loss": 0.02667395770549774, "eval_rewards/accuracies": 0.6445000171661377, "eval_rewards/chosen": -0.0323454923927784, "eval_rewards/margins": 0.0891253724694252, "eval_rewards/rejected": -0.1214708611369133, "eval_runtime": 713.1838, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 5700 }, { "epoch": 0.37, "learning_rate": 3.94417413201458e-06, "logits/chosen": -2.1256191730499268, "logits/rejected": -1.9788223505020142, "logps/chosen": -214.668212890625, "logps/rejected": -214.07028198242188, "loss": 0.055, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.015483707189559937, "rewards/margins": 0.08584414422512054, "rewards/rejected": -0.10132785141468048, "step": 5710 }, { "epoch": 0.37, "learning_rate": 3.9395095371115935e-06, "logits/chosen": -2.283236265182495, "logits/rejected": -2.00763201713562, "logps/chosen": -215.9113311767578, "logps/rejected": -224.1573028564453, "loss": 0.0335, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.01627127267420292, "rewards/margins": 0.0942857414484024, "rewards/rejected": -0.11055700480937958, "step": 5720 }, { "epoch": 0.37, "learning_rate": 3.93483743302171e-06, "logits/chosen": -2.221068859100342, "logits/rejected": -1.9899393320083618, "logps/chosen": -215.34814453125, "logps/rejected": -204.9816131591797, "loss": 0.0291, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.012782298028469086, "rewards/margins": 0.06684406101703644, "rewards/rejected": -0.07962635904550552, "step": 5730 }, { "epoch": 0.38, "learning_rate": 3.930157844116913e-06, "logits/chosen": -2.0747952461242676, "logits/rejected": -2.009860038757324, "logps/chosen": -208.75759887695312, "logps/rejected": -207.610595703125, "loss": 0.024, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.004998432006686926, "rewards/margins": 0.06785848736763, "rewards/rejected": -0.06286005675792694, "step": 5740 }, { "epoch": 0.38, "learning_rate": 3.925470794808229e-06, "logits/chosen": -2.2149665355682373, "logits/rejected": -1.8618929386138916, "logps/chosen": -245.27243041992188, "logps/rejected": -229.793701171875, "loss": 0.0308, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03142361342906952, "rewards/margins": 0.08669319748878479, "rewards/rejected": -0.1181168183684349, "step": 5750 }, { "epoch": 0.38, "learning_rate": 3.920776309545606e-06, "logits/chosen": -2.2878258228302, "logits/rejected": -2.1229512691497803, "logps/chosen": -152.00323486328125, "logps/rejected": -158.4375457763672, "loss": 0.034, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0035188309848308563, "rewards/margins": 0.06633396446704865, "rewards/rejected": -0.06281514465808868, "step": 5760 }, { "epoch": 0.38, "learning_rate": 3.916074412817778e-06, "logits/chosen": -2.1815593242645264, "logits/rejected": -1.8238589763641357, "logps/chosen": -244.6201629638672, "logps/rejected": -267.8778381347656, "loss": 0.0247, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.006430828478187323, "rewards/margins": 0.11367674916982651, "rewards/rejected": -0.12010756880044937, "step": 5770 }, { "epoch": 0.38, "learning_rate": 3.911365129152139e-06, "logits/chosen": -2.3041155338287354, "logits/rejected": -2.15433669090271, "logps/chosen": -232.7111358642578, "logps/rejected": -244.7630615234375, "loss": 0.0237, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.001386212301440537, "rewards/margins": 0.0879589095711708, "rewards/rejected": -0.08934511989355087, "step": 5780 }, { "epoch": 0.38, "learning_rate": 3.906648483114623e-06, "logits/chosen": -2.1890604496002197, "logits/rejected": -2.0869388580322266, "logps/chosen": -200.27606201171875, "logps/rejected": -189.9747314453125, "loss": 0.0298, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.014762152917683125, "rewards/margins": 0.11141836643218994, "rewards/rejected": -0.1261805295944214, "step": 5790 }, { "epoch": 0.38, "learning_rate": 3.901924499309564e-06, "logits/chosen": -2.1342368125915527, "logits/rejected": -1.9452743530273438, "logps/chosen": -234.81478881835938, "logps/rejected": -219.6333770751953, "loss": 0.0323, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03054950200021267, "rewards/margins": 0.08712270855903625, "rewards/rejected": -0.11767220497131348, "step": 5800 }, { "epoch": 0.38, "eval_logits/chosen": -2.2194855213165283, "eval_logits/rejected": -2.036980152130127, "eval_logps/chosen": -240.2529754638672, "eval_logps/rejected": -237.3893280029297, "eval_loss": 0.026860052719712257, "eval_rewards/accuracies": 0.6460000276565552, "eval_rewards/chosen": -0.04124004766345024, "eval_rewards/margins": 0.08764705806970596, "eval_rewards/rejected": -0.1288871020078659, "eval_runtime": 712.9972, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 5800 }, { "epoch": 0.38, "learning_rate": 3.897193202379575e-06, "logits/chosen": -2.221252202987671, "logits/rejected": -2.055716037750244, "logps/chosen": -207.3871307373047, "logps/rejected": -208.7563018798828, "loss": 0.0246, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03454715758562088, "rewards/margins": 0.0917096808552742, "rewards/rejected": -0.12625685334205627, "step": 5810 }, { "epoch": 0.38, "learning_rate": 3.8924546170054215e-06, "logits/chosen": -2.160278081893921, "logits/rejected": -2.1069438457489014, "logps/chosen": -222.0353546142578, "logps/rejected": -218.66311645507812, "loss": 0.0191, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03063853643834591, "rewards/margins": 0.0805058628320694, "rewards/rejected": -0.11114440113306046, "step": 5820 }, { "epoch": 0.38, "learning_rate": 3.887708767905883e-06, "logits/chosen": -2.4319489002227783, "logits/rejected": -2.033052921295166, "logps/chosen": -251.15188598632812, "logps/rejected": -199.21560668945312, "loss": 0.0255, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03692341595888138, "rewards/margins": 0.06953327357769012, "rewards/rejected": -0.1064566820859909, "step": 5830 }, { "epoch": 0.38, "learning_rate": 3.882955679837636e-06, "logits/chosen": -2.1963651180267334, "logits/rejected": -2.087002992630005, "logps/chosen": -248.7950439453125, "logps/rejected": -263.8435974121094, "loss": 0.0359, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04709015041589737, "rewards/margins": 0.06336608529090881, "rewards/rejected": -0.11045622825622559, "step": 5840 }, { "epoch": 0.38, "learning_rate": 3.878195377595113e-06, "logits/chosen": -2.285632610321045, "logits/rejected": -2.1098172664642334, "logps/chosen": -242.3900909423828, "logps/rejected": -256.57550048828125, "loss": 0.0336, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03242162987589836, "rewards/margins": 0.09874467551708221, "rewards/rejected": -0.13116630911827087, "step": 5850 }, { "epoch": 0.38, "learning_rate": 3.873427886010384e-06, "logits/chosen": -2.236074686050415, "logits/rejected": -2.089146137237549, "logps/chosen": -196.53201293945312, "logps/rejected": -196.675048828125, "loss": 0.0205, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0304003544151783, "rewards/margins": 0.09616055339574814, "rewards/rejected": -0.12656089663505554, "step": 5860 }, { "epoch": 0.38, "learning_rate": 3.868653229953021e-06, "logits/chosen": -2.3070778846740723, "logits/rejected": -2.0845189094543457, "logps/chosen": -242.5325469970703, "logps/rejected": -262.4086608886719, "loss": 0.0129, "rewards/accuracies": 0.75, "rewards/chosen": -0.026588618755340576, "rewards/margins": 0.10969982296228409, "rewards/rejected": -0.13628843426704407, "step": 5870 }, { "epoch": 0.38, "learning_rate": 3.8638714343299675e-06, "logits/chosen": -2.209580183029175, "logits/rejected": -2.1250576972961426, "logps/chosen": -225.6443634033203, "logps/rejected": -257.268798828125, "loss": 0.013, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.03650829195976257, "rewards/margins": 0.08254338055849075, "rewards/rejected": -0.11905165761709213, "step": 5880 }, { "epoch": 0.39, "learning_rate": 3.859082524085414e-06, "logits/chosen": -2.2187929153442383, "logits/rejected": -1.8690040111541748, "logps/chosen": -280.08441162109375, "logps/rejected": -234.6243133544922, "loss": 0.0263, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03870617598295212, "rewards/margins": 0.07389514148235321, "rewards/rejected": -0.11260131746530533, "step": 5890 }, { "epoch": 0.39, "learning_rate": 3.854286524200659e-06, "logits/chosen": -2.360055923461914, "logits/rejected": -2.096848249435425, "logps/chosen": -282.25677490234375, "logps/rejected": -241.24325561523438, "loss": 0.0242, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.029196584597229958, "rewards/margins": 0.04811464250087738, "rewards/rejected": -0.07731121778488159, "step": 5900 }, { "epoch": 0.39, "eval_logits/chosen": -2.2253873348236084, "eval_logits/rejected": -2.042678117752075, "eval_logps/chosen": -238.05584716796875, "eval_logps/rejected": -233.9046630859375, "eval_loss": 0.02604703977704048, "eval_rewards/accuracies": 0.6455000042915344, "eval_rewards/chosen": -0.03025444969534874, "eval_rewards/margins": 0.08120942115783691, "eval_rewards/rejected": -0.11146386712789536, "eval_runtime": 712.5903, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 5900 }, { "epoch": 0.39, "learning_rate": 3.849483459693991e-06, "logits/chosen": -2.299449920654297, "logits/rejected": -2.1660971641540527, "logps/chosen": -215.0598602294922, "logps/rejected": -200.59132385253906, "loss": 0.0136, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.026915917173027992, "rewards/margins": 0.12100283801555634, "rewards/rejected": -0.14791876077651978, "step": 5910 }, { "epoch": 0.39, "learning_rate": 3.844673355620544e-06, "logits/chosen": -2.1884491443634033, "logits/rejected": -2.05025315284729, "logps/chosen": -252.08255004882812, "logps/rejected": -233.2479248046875, "loss": 0.0182, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.027548307552933693, "rewards/margins": 0.09725430607795715, "rewards/rejected": -0.1248026043176651, "step": 5920 }, { "epoch": 0.39, "learning_rate": 3.839856237072178e-06, "logits/chosen": -2.044630527496338, "logits/rejected": -2.0448217391967773, "logps/chosen": -190.75665283203125, "logps/rejected": -232.75241088867188, "loss": 0.0352, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.040912725031375885, "rewards/margins": 0.12401840835809708, "rewards/rejected": -0.16493113338947296, "step": 5930 }, { "epoch": 0.39, "learning_rate": 3.8350321291773455e-06, "logits/chosen": -1.9816339015960693, "logits/rejected": -1.9150645732879639, "logps/chosen": -202.12307739257812, "logps/rejected": -175.07394409179688, "loss": 0.0247, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0019943893421441317, "rewards/margins": 0.08573590219020844, "rewards/rejected": -0.08374151587486267, "step": 5940 }, { "epoch": 0.39, "learning_rate": 3.830201057100953e-06, "logits/chosen": -2.2745325565338135, "logits/rejected": -2.305018424987793, "logps/chosen": -193.10279846191406, "logps/rejected": -228.12277221679688, "loss": 0.0178, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.006648970302194357, "rewards/margins": 0.0976361334323883, "rewards/rejected": -0.1042850986123085, "step": 5950 }, { "epoch": 0.39, "learning_rate": 3.82536304604424e-06, "logits/chosen": -2.071976900100708, "logits/rejected": -2.0189526081085205, "logps/chosen": -233.9423065185547, "logps/rejected": -217.19754028320312, "loss": 0.0426, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.011109702289104462, "rewards/margins": 0.07495688647031784, "rewards/rejected": -0.06384718418121338, "step": 5960 }, { "epoch": 0.39, "learning_rate": 3.8205181212446435e-06, "logits/chosen": -2.460482358932495, "logits/rejected": -2.2313225269317627, "logps/chosen": -266.06683349609375, "logps/rejected": -237.21945190429688, "loss": 0.0216, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.02246047556400299, "rewards/margins": 0.05778072029352188, "rewards/rejected": -0.03532024100422859, "step": 5970 }, { "epoch": 0.39, "learning_rate": 3.815666307975664e-06, "logits/chosen": -2.259298801422119, "logits/rejected": -2.1296067237854004, "logps/chosen": -235.4974822998047, "logps/rejected": -227.8106231689453, "loss": 0.0199, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.009101573377847672, "rewards/margins": 0.05747541785240173, "rewards/rejected": -0.04837384819984436, "step": 5980 }, { "epoch": 0.39, "learning_rate": 3.8108076315467346e-06, "logits/chosen": -2.3414454460144043, "logits/rejected": -2.2081501483917236, "logps/chosen": -259.45941162109375, "logps/rejected": -199.7333221435547, "loss": 0.0243, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.010883106850087643, "rewards/margins": 0.07500231266021729, "rewards/rejected": -0.0858854204416275, "step": 5990 }, { "epoch": 0.39, "learning_rate": 3.805942117303093e-06, "logits/chosen": -2.5289244651794434, "logits/rejected": -2.1916663646698, "logps/chosen": -315.30548095703125, "logps/rejected": -283.5316467285156, "loss": 0.0239, "rewards/accuracies": 0.75, "rewards/chosen": 0.0036502063740044832, "rewards/margins": 0.07374846190214157, "rewards/rejected": -0.07009825855493546, "step": 6000 }, { "epoch": 0.39, "eval_logits/chosen": -2.2698471546173096, "eval_logits/rejected": -2.0839905738830566, "eval_logps/chosen": -233.28067016601562, "eval_logps/rejected": -227.20497131347656, "eval_loss": 0.026537323370575905, "eval_rewards/accuracies": 0.6395000219345093, "eval_rewards/chosen": -0.006378578953444958, "eval_rewards/margins": 0.07158681005239487, "eval_rewards/rejected": -0.0779653862118721, "eval_runtime": 713.6836, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 6000 }, { "epoch": 0.39, "learning_rate": 3.8010697906256446e-06, "logits/chosen": -2.1027355194091797, "logits/rejected": -2.0898165702819824, "logps/chosen": -216.5233917236328, "logps/rejected": -203.8676300048828, "loss": 0.0466, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.049134716391563416, "rewards/margins": 0.07587228715419769, "rewards/rejected": -0.1250070035457611, "step": 6010 }, { "epoch": 0.39, "learning_rate": 3.7961906769308323e-06, "logits/chosen": -2.1587841510772705, "logits/rejected": -1.971003770828247, "logps/chosen": -212.7795867919922, "logps/rejected": -236.19271850585938, "loss": 0.0211, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03272194415330887, "rewards/margins": 0.06669095903635025, "rewards/rejected": -0.09941292554140091, "step": 6020 }, { "epoch": 0.39, "learning_rate": 3.7913048016705028e-06, "logits/chosen": -2.2142512798309326, "logits/rejected": -2.1702721118927, "logps/chosen": -262.4501647949219, "logps/rejected": -271.28912353515625, "loss": 0.0117, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.012552952393889427, "rewards/margins": 0.0544348768889904, "rewards/rejected": -0.06698782742023468, "step": 6030 }, { "epoch": 0.4, "learning_rate": 3.786412190331775e-06, "logits/chosen": -2.408029556274414, "logits/rejected": -2.0841169357299805, "logps/chosen": -205.4874267578125, "logps/rejected": -183.05186462402344, "loss": 0.0236, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.013134591281414032, "rewards/margins": 0.07331900298595428, "rewards/rejected": -0.08645360171794891, "step": 6040 }, { "epoch": 0.4, "learning_rate": 3.781512868436906e-06, "logits/chosen": -2.389183521270752, "logits/rejected": -2.2467615604400635, "logps/chosen": -133.06936645507812, "logps/rejected": -151.9151611328125, "loss": 0.0152, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0022931471467018127, "rewards/margins": 0.05528401583433151, "rewards/rejected": -0.057577162981033325, "step": 6050 }, { "epoch": 0.4, "learning_rate": 3.7766068615431605e-06, "logits/chosen": -2.2205705642700195, "logits/rejected": -2.1680896282196045, "logps/chosen": -256.7353820800781, "logps/rejected": -226.5559539794922, "loss": 0.0427, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0017584555316716433, "rewards/margins": 0.09227786213159561, "rewards/rejected": -0.09051939845085144, "step": 6060 }, { "epoch": 0.4, "learning_rate": 3.771694195242671e-06, "logits/chosen": -2.439492702484131, "logits/rejected": -2.000755786895752, "logps/chosen": -306.957763671875, "logps/rejected": -208.7428436279297, "loss": 0.0417, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03889502212405205, "rewards/margins": 0.06758001446723938, "rewards/rejected": -0.10647504031658173, "step": 6070 }, { "epoch": 0.4, "learning_rate": 3.766774895162314e-06, "logits/chosen": -2.2616937160491943, "logits/rejected": -2.159468412399292, "logps/chosen": -256.6993713378906, "logps/rejected": -209.25021362304688, "loss": 0.0183, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.050798166543245316, "rewards/margins": 0.04343273863196373, "rewards/rejected": -0.09423090517520905, "step": 6080 }, { "epoch": 0.4, "learning_rate": 3.7618489869635666e-06, "logits/chosen": -2.184147357940674, "logits/rejected": -2.1318554878234863, "logps/chosen": -261.07501220703125, "logps/rejected": -250.76461791992188, "loss": 0.0379, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07024045288562775, "rewards/margins": 0.047162558883428574, "rewards/rejected": -0.11740299314260483, "step": 6090 }, { "epoch": 0.4, "learning_rate": 3.756916496342379e-06, "logits/chosen": -2.2132625579833984, "logits/rejected": -2.2690443992614746, "logps/chosen": -196.7451629638672, "logps/rejected": -219.06893920898438, "loss": 0.0246, "rewards/accuracies": 0.625, "rewards/chosen": -0.0493663027882576, "rewards/margins": 0.06935496628284454, "rewards/rejected": -0.11872126907110214, "step": 6100 }, { "epoch": 0.4, "eval_logits/chosen": -2.2834253311157227, "eval_logits/rejected": -2.0963640213012695, "eval_logps/chosen": -241.33116149902344, "eval_logps/rejected": -235.506591796875, "eval_loss": 0.02662130817770958, "eval_rewards/accuracies": 0.6474999785423279, "eval_rewards/chosen": -0.046630993485450745, "eval_rewards/margins": 0.07284247130155563, "eval_rewards/rejected": -0.11947345733642578, "eval_runtime": 713.7437, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 6100 }, { "epoch": 0.4, "learning_rate": 3.751977449029039e-06, "logits/chosen": -1.9438211917877197, "logits/rejected": -1.9466642141342163, "logps/chosen": -272.6602783203125, "logps/rejected": -251.07455444335938, "loss": 0.0397, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06291915476322174, "rewards/margins": 0.07904963940382004, "rewards/rejected": -0.14196878671646118, "step": 6110 }, { "epoch": 0.4, "learning_rate": 3.747031870788037e-06, "logits/chosen": -2.4532384872436523, "logits/rejected": -2.0979743003845215, "logps/chosen": -320.97210693359375, "logps/rejected": -253.79415893554688, "loss": 0.0284, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.021094132214784622, "rewards/margins": 0.0762069970369339, "rewards/rejected": -0.09730114042758942, "step": 6120 }, { "epoch": 0.4, "learning_rate": 3.7420797874179326e-06, "logits/chosen": -2.219827175140381, "logits/rejected": -1.9641387462615967, "logps/chosen": -245.064453125, "logps/rejected": -202.33700561523438, "loss": 0.0222, "rewards/accuracies": 0.625, "rewards/chosen": -0.039774827659130096, "rewards/margins": 0.08080819994211197, "rewards/rejected": -0.12058302015066147, "step": 6130 }, { "epoch": 0.4, "learning_rate": 3.7371212247512167e-06, "logits/chosen": -2.567937135696411, "logits/rejected": -2.2279791831970215, "logps/chosen": -326.991455078125, "logps/rejected": -280.57086181640625, "loss": 0.027, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0029847125988453627, "rewards/margins": 0.09343814849853516, "rewards/rejected": -0.09642285853624344, "step": 6140 }, { "epoch": 0.4, "learning_rate": 3.7321562086541817e-06, "logits/chosen": -2.321716070175171, "logits/rejected": -2.215344190597534, "logps/chosen": -258.36090087890625, "logps/rejected": -273.3643798828125, "loss": 0.0194, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.026903927326202393, "rewards/margins": 0.07106615602970123, "rewards/rejected": -0.09797009080648422, "step": 6150 }, { "epoch": 0.4, "learning_rate": 3.7271847650267834e-06, "logits/chosen": -2.1101181507110596, "logits/rejected": -2.0075926780700684, "logps/chosen": -210.41098022460938, "logps/rejected": -219.27383422851562, "loss": 0.0448, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05238168314099312, "rewards/margins": 0.048765551298856735, "rewards/rejected": -0.10114721953868866, "step": 6160 }, { "epoch": 0.4, "learning_rate": 3.7222069198025086e-06, "logits/chosen": -2.085695743560791, "logits/rejected": -1.8926283121109009, "logps/chosen": -220.03958129882812, "logps/rejected": -218.895263671875, "loss": 0.0201, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05748562142252922, "rewards/margins": 0.09723970293998718, "rewards/rejected": -0.1547253280878067, "step": 6170 }, { "epoch": 0.4, "learning_rate": 3.7172226989482353e-06, "logits/chosen": -2.1056084632873535, "logits/rejected": -1.8976036310195923, "logps/chosen": -221.66238403320312, "logps/rejected": -232.70401000976562, "loss": 0.0279, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05844768136739731, "rewards/margins": 0.06593415886163712, "rewards/rejected": -0.12438184022903442, "step": 6180 }, { "epoch": 0.4, "learning_rate": 3.7122321284641007e-06, "logits/chosen": -2.3730039596557617, "logits/rejected": -1.9491113424301147, "logps/chosen": -376.61395263671875, "logps/rejected": -285.7323913574219, "loss": 0.0208, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06913654506206512, "rewards/margins": 0.09804163873195648, "rewards/rejected": -0.1671781986951828, "step": 6190 }, { "epoch": 0.41, "learning_rate": 3.707235234383365e-06, "logits/chosen": -2.2920448780059814, "logits/rejected": -1.9482746124267578, "logps/chosen": -265.9564514160156, "logps/rejected": -194.7156524658203, "loss": 0.0109, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.023512501269578934, "rewards/margins": 0.07769031822681427, "rewards/rejected": -0.1012028232216835, "step": 6200 }, { "epoch": 0.41, "eval_logits/chosen": -2.244328260421753, "eval_logits/rejected": -2.05887770652771, "eval_logps/chosen": -239.60328674316406, "eval_logps/rejected": -234.93077087402344, "eval_loss": 0.025902314111590385, "eval_rewards/accuracies": 0.6420000195503235, "eval_rewards/chosen": -0.03799163177609444, "eval_rewards/margins": 0.0786026194691658, "eval_rewards/rejected": -0.11659426242113113, "eval_runtime": 714.619, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 6200 }, { "epoch": 0.41, "learning_rate": 3.702232042772277e-06, "logits/chosen": -2.0883724689483643, "logits/rejected": -2.028675079345703, "logps/chosen": -214.29074096679688, "logps/rejected": -213.12255859375, "loss": 0.0247, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.058722980320453644, "rewards/margins": 0.10593204200267792, "rewards/rejected": -0.16465502977371216, "step": 6210 }, { "epoch": 0.41, "learning_rate": 3.6972225797299325e-06, "logits/chosen": -2.2090511322021484, "logits/rejected": -2.240442991256714, "logps/chosen": -264.33868408203125, "logps/rejected": -264.9607849121094, "loss": 0.0349, "rewards/accuracies": 0.625, "rewards/chosen": -0.06915035843849182, "rewards/margins": 0.08278807252645493, "rewards/rejected": -0.15193842351436615, "step": 6220 }, { "epoch": 0.41, "learning_rate": 3.692206871388147e-06, "logits/chosen": -2.308846950531006, "logits/rejected": -1.8605448007583618, "logps/chosen": -243.57913208007812, "logps/rejected": -225.1941680908203, "loss": 0.0174, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05595209077000618, "rewards/margins": 0.11422081291675568, "rewards/rejected": -0.17017289996147156, "step": 6230 }, { "epoch": 0.41, "learning_rate": 3.6871849439113115e-06, "logits/chosen": -1.9982774257659912, "logits/rejected": -1.9305871725082397, "logps/chosen": -241.31790161132812, "logps/rejected": -243.810302734375, "loss": 0.0352, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05703529715538025, "rewards/margins": 0.06020113080739975, "rewards/rejected": -0.1172364354133606, "step": 6240 }, { "epoch": 0.41, "learning_rate": 3.682156823496259e-06, "logits/chosen": -2.2610268592834473, "logits/rejected": -1.9768097400665283, "logps/chosen": -214.890380859375, "logps/rejected": -204.16453552246094, "loss": 0.0189, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.004882182460278273, "rewards/margins": 0.10819858312606812, "rewards/rejected": -0.11308076232671738, "step": 6250 }, { "epoch": 0.41, "learning_rate": 3.67712253637213e-06, "logits/chosen": -2.2968246936798096, "logits/rejected": -2.0636565685272217, "logps/chosen": -300.6327209472656, "logps/rejected": -225.7855224609375, "loss": 0.0191, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0416911318898201, "rewards/margins": 0.07532685250043869, "rewards/rejected": -0.11701799929141998, "step": 6260 }, { "epoch": 0.41, "learning_rate": 3.672082108800231e-06, "logits/chosen": -2.1016416549682617, "logits/rejected": -1.9039815664291382, "logps/chosen": -231.9393768310547, "logps/rejected": -213.286376953125, "loss": 0.037, "rewards/accuracies": 0.625, "rewards/chosen": -0.09061791747808456, "rewards/margins": 0.07693218439817429, "rewards/rejected": -0.16755011677742004, "step": 6270 }, { "epoch": 0.41, "learning_rate": 3.6670355670739012e-06, "logits/chosen": -2.216489791870117, "logits/rejected": -2.027721881866455, "logps/chosen": -168.08026123046875, "logps/rejected": -186.23406982421875, "loss": 0.0119, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.030634235590696335, "rewards/margins": 0.10396245867013931, "rewards/rejected": -0.13459669053554535, "step": 6280 }, { "epoch": 0.41, "learning_rate": 3.6619829375183745e-06, "logits/chosen": -2.369748115539551, "logits/rejected": -2.175687789916992, "logps/chosen": -232.9136505126953, "logps/rejected": -240.1250457763672, "loss": 0.0362, "rewards/accuracies": 0.75, "rewards/chosen": -0.03455563634634018, "rewards/margins": 0.11641144752502441, "rewards/rejected": -0.1509670913219452, "step": 6290 }, { "epoch": 0.41, "learning_rate": 3.6569242464906427e-06, "logits/chosen": -2.2498772144317627, "logits/rejected": -2.073350429534912, "logps/chosen": -209.7557373046875, "logps/rejected": -250.68405151367188, "loss": 0.0289, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.010133450850844383, "rewards/margins": 0.09939597547054291, "rewards/rejected": -0.10952942073345184, "step": 6300 }, { "epoch": 0.41, "eval_logits/chosen": -2.2404847145080566, "eval_logits/rejected": -2.0557026863098145, "eval_logps/chosen": -237.73385620117188, "eval_logps/rejected": -233.16732788085938, "eval_loss": 0.025808749720454216, "eval_rewards/accuracies": 0.6524999737739563, "eval_rewards/chosen": -0.028644531965255737, "eval_rewards/margins": 0.07913253456354141, "eval_rewards/rejected": -0.10777706652879715, "eval_runtime": 713.9294, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 6300 }, { "epoch": 0.41, "learning_rate": 3.6518595203793156e-06, "logits/chosen": -2.092099905014038, "logits/rejected": -2.105501651763916, "logps/chosen": -260.3985900878906, "logps/rejected": -294.7735290527344, "loss": 0.0148, "rewards/accuracies": 0.75, "rewards/chosen": -0.008421550504863262, "rewards/margins": 0.10450099408626556, "rewards/rejected": -0.1129225492477417, "step": 6310 }, { "epoch": 0.41, "learning_rate": 3.646788785604485e-06, "logits/chosen": -2.234051465988159, "logits/rejected": -2.093923330307007, "logps/chosen": -207.9916229248047, "logps/rejected": -217.1757354736328, "loss": 0.0149, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.011666452512145042, "rewards/margins": 0.057145290076732635, "rewards/rejected": -0.06881174445152283, "step": 6320 }, { "epoch": 0.41, "learning_rate": 3.641712068617588e-06, "logits/chosen": -2.2444276809692383, "logits/rejected": -2.1274402141571045, "logps/chosen": -265.61273193359375, "logps/rejected": -224.0839385986328, "loss": 0.0239, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.021263372153043747, "rewards/margins": 0.06007848307490349, "rewards/rejected": -0.08134184777736664, "step": 6330 }, { "epoch": 0.41, "learning_rate": 3.6366293959012673e-06, "logits/chosen": -2.182690143585205, "logits/rejected": -1.9510301351547241, "logps/chosen": -184.07785034179688, "logps/rejected": -183.8928985595703, "loss": 0.0297, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0143792275339365, "rewards/margins": 0.09669135510921478, "rewards/rejected": -0.11107059568166733, "step": 6340 }, { "epoch": 0.42, "learning_rate": 3.631540793969233e-06, "logits/chosen": -2.3409228324890137, "logits/rejected": -2.3132266998291016, "logps/chosen": -191.70883178710938, "logps/rejected": -205.50570678710938, "loss": 0.0242, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.015455419197678566, "rewards/margins": 0.06059306114912033, "rewards/rejected": -0.07604847848415375, "step": 6350 }, { "epoch": 0.42, "learning_rate": 3.626446289366127e-06, "logits/chosen": -2.5035297870635986, "logits/rejected": -2.0659027099609375, "logps/chosen": -228.3686981201172, "logps/rejected": -163.69943237304688, "loss": 0.0265, "rewards/accuracies": 0.625, "rewards/chosen": -0.04109755903482437, "rewards/margins": 0.034247688949108124, "rewards/rejected": -0.0753452405333519, "step": 6360 }, { "epoch": 0.42, "learning_rate": 3.6213459086673786e-06, "logits/chosen": -2.24674654006958, "logits/rejected": -2.307798147201538, "logps/chosen": -177.19583129882812, "logps/rejected": -206.07693481445312, "loss": 0.0266, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03129131719470024, "rewards/margins": 0.09753891080617905, "rewards/rejected": -0.128830224275589, "step": 6370 }, { "epoch": 0.42, "learning_rate": 3.6162396784790737e-06, "logits/chosen": -2.1218373775482178, "logits/rejected": -2.14566707611084, "logps/chosen": -232.34432983398438, "logps/rejected": -245.09262084960938, "loss": 0.0282, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04668625444173813, "rewards/margins": 0.06453945487737656, "rewards/rejected": -0.11122570931911469, "step": 6380 }, { "epoch": 0.42, "learning_rate": 3.6111276254378095e-06, "logits/chosen": -2.2318406105041504, "logits/rejected": -2.222268581390381, "logps/chosen": -229.4927520751953, "logps/rejected": -246.83206176757812, "loss": 0.0172, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.007679253816604614, "rewards/margins": 0.10876430571079254, "rewards/rejected": -0.11644355952739716, "step": 6390 }, { "epoch": 0.42, "learning_rate": 3.606009776210559e-06, "logits/chosen": -2.221876621246338, "logits/rejected": -1.990378975868225, "logps/chosen": -252.7962646484375, "logps/rejected": -225.5522918701172, "loss": 0.0287, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.060862988233566284, "rewards/margins": 0.08207409828901291, "rewards/rejected": -0.1429370939731598, "step": 6400 }, { "epoch": 0.42, "eval_logits/chosen": -2.252486228942871, "eval_logits/rejected": -2.066436290740967, "eval_logps/chosen": -237.19186401367188, "eval_logps/rejected": -234.72076416015625, "eval_loss": 0.0266865361481905, "eval_rewards/accuracies": 0.6430000066757202, "eval_rewards/chosen": -0.025934524834156036, "eval_rewards/margins": 0.0896097868680954, "eval_rewards/rejected": -0.11554431915283203, "eval_runtime": 716.7463, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 6400 }, { "epoch": 0.42, "learning_rate": 3.600886157494531e-06, "logits/chosen": -2.3591103553771973, "logits/rejected": -2.2368922233581543, "logps/chosen": -268.14385986328125, "logps/rejected": -274.5497741699219, "loss": 0.02, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02643665112555027, "rewards/margins": 0.09094007313251495, "rewards/rejected": -0.11737672984600067, "step": 6410 }, { "epoch": 0.42, "learning_rate": 3.5957567960170304e-06, "logits/chosen": -2.455496311187744, "logits/rejected": -1.7506864070892334, "logps/chosen": -294.2264709472656, "logps/rejected": -194.97206115722656, "loss": 0.0274, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.015206987969577312, "rewards/margins": 0.09701627492904663, "rewards/rejected": -0.11222325265407562, "step": 6420 }, { "epoch": 0.42, "learning_rate": 3.590621718535319e-06, "logits/chosen": -2.0646634101867676, "logits/rejected": -1.8767610788345337, "logps/chosen": -206.53396606445312, "logps/rejected": -228.14224243164062, "loss": 0.028, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.049057986587285995, "rewards/margins": 0.1125042662024498, "rewards/rejected": -0.1615622490644455, "step": 6430 }, { "epoch": 0.42, "learning_rate": 3.5854809518364775e-06, "logits/chosen": -2.335057020187378, "logits/rejected": -2.030397415161133, "logps/chosen": -244.3311767578125, "logps/rejected": -221.5835418701172, "loss": 0.0283, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.009078029543161392, "rewards/margins": 0.10882551968097687, "rewards/rejected": -0.09974746406078339, "step": 6440 }, { "epoch": 0.42, "learning_rate": 3.580334522737262e-06, "logits/chosen": -2.247253894805908, "logits/rejected": -1.964611291885376, "logps/chosen": -199.90805053710938, "logps/rejected": -181.4126739501953, "loss": 0.0169, "rewards/accuracies": 0.625, "rewards/chosen": 0.010181794874370098, "rewards/margins": 0.07577107846736908, "rewards/rejected": -0.06558927893638611, "step": 6450 }, { "epoch": 0.42, "learning_rate": 3.575182458083968e-06, "logits/chosen": -2.191323757171631, "logits/rejected": -2.1385130882263184, "logps/chosen": -243.88778686523438, "logps/rejected": -236.3755340576172, "loss": 0.0201, "rewards/accuracies": 0.625, "rewards/chosen": -0.015413627028465271, "rewards/margins": 0.10246391594409943, "rewards/rejected": -0.1178775280714035, "step": 6460 }, { "epoch": 0.42, "learning_rate": 3.5700247847522883e-06, "logits/chosen": -2.296088218688965, "logits/rejected": -2.210977554321289, "logps/chosen": -201.45184326171875, "logps/rejected": -220.83975219726562, "loss": 0.0375, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.006730721797794104, "rewards/margins": 0.09703059494495392, "rewards/rejected": -0.09029986709356308, "step": 6470 }, { "epoch": 0.42, "learning_rate": 3.5648615296471743e-06, "logits/chosen": -2.0994935035705566, "logits/rejected": -2.067354440689087, "logps/chosen": -200.0526123046875, "logps/rejected": -260.09405517578125, "loss": 0.0226, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.013449016027152538, "rewards/margins": 0.10983811318874359, "rewards/rejected": -0.1232871264219284, "step": 6480 }, { "epoch": 0.42, "learning_rate": 3.559692719702693e-06, "logits/chosen": -2.1190197467803955, "logits/rejected": -1.8080532550811768, "logps/chosen": -292.5730285644531, "logps/rejected": -252.4334716796875, "loss": 0.0388, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.032139308750629425, "rewards/margins": 0.08294972777366638, "rewards/rejected": -0.1150890588760376, "step": 6490 }, { "epoch": 0.43, "learning_rate": 3.55451838188189e-06, "logits/chosen": -2.243839979171753, "logits/rejected": -2.140303134918213, "logps/chosen": -263.24530029296875, "logps/rejected": -299.83380126953125, "loss": 0.0631, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.00979236327111721, "rewards/margins": 0.07327703386545181, "rewards/rejected": -0.08306938409805298, "step": 6500 }, { "epoch": 0.43, "eval_logits/chosen": -2.275908946990967, "eval_logits/rejected": -2.089535713195801, "eval_logps/chosen": -238.27188110351562, "eval_logps/rejected": -233.43910217285156, "eval_loss": 0.025913719087839127, "eval_rewards/accuracies": 0.6460000276565552, "eval_rewards/chosen": -0.03133460506796837, "eval_rewards/margins": 0.0778014212846756, "eval_rewards/rejected": -0.10913601517677307, "eval_runtime": 714.2995, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 6500 }, { "epoch": 0.43, "learning_rate": 3.549338543176645e-06, "logits/chosen": -2.296164035797119, "logits/rejected": -2.033365249633789, "logps/chosen": -311.88177490234375, "logps/rejected": -292.5616149902344, "loss": 0.03, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.026966029778122902, "rewards/margins": 0.06516522169113159, "rewards/rejected": -0.09213124215602875, "step": 6510 }, { "epoch": 0.43, "learning_rate": 3.5441532306075342e-06, "logits/chosen": -2.252589464187622, "logits/rejected": -2.237151622772217, "logps/chosen": -240.11624145507812, "logps/rejected": -296.4466857910156, "loss": 0.0173, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04348507523536682, "rewards/margins": 0.04882603511214256, "rewards/rejected": -0.09231110662221909, "step": 6520 }, { "epoch": 0.43, "learning_rate": 3.5389624712236894e-06, "logits/chosen": -2.253516674041748, "logits/rejected": -2.0285708904266357, "logps/chosen": -217.43795776367188, "logps/rejected": -196.87884521484375, "loss": 0.0244, "rewards/accuracies": 0.5, "rewards/chosen": -0.017010662704706192, "rewards/margins": 0.03184361383318901, "rewards/rejected": -0.0488542802631855, "step": 6530 }, { "epoch": 0.43, "learning_rate": 3.533766292102653e-06, "logits/chosen": -2.2152111530303955, "logits/rejected": -2.077603816986084, "logps/chosen": -216.4482421875, "logps/rejected": -216.7845916748047, "loss": 0.0563, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.033018387854099274, "rewards/margins": 0.06012535095214844, "rewards/rejected": -0.09314373880624771, "step": 6540 }, { "epoch": 0.43, "learning_rate": 3.5285647203502404e-06, "logits/chosen": -2.4234461784362793, "logits/rejected": -2.296536922454834, "logps/chosen": -257.1291809082031, "logps/rejected": -229.7969512939453, "loss": 0.0223, "rewards/accuracies": 0.625, "rewards/chosen": 0.0007092708838172257, "rewards/margins": 0.052169036120176315, "rewards/rejected": -0.051459766924381256, "step": 6550 }, { "epoch": 0.43, "learning_rate": 3.5233577831003983e-06, "logits/chosen": -2.2396888732910156, "logits/rejected": -2.0937328338623047, "logps/chosen": -254.46728515625, "logps/rejected": -243.6793670654297, "loss": 0.0214, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.011456744745373726, "rewards/margins": 0.06440354883670807, "rewards/rejected": -0.07586028426885605, "step": 6560 }, { "epoch": 0.43, "learning_rate": 3.5181455075150628e-06, "logits/chosen": -2.1576619148254395, "logits/rejected": -1.7788465023040771, "logps/chosen": -184.81137084960938, "logps/rejected": -151.5428009033203, "loss": 0.0257, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.014326018281280994, "rewards/margins": 0.06677161902189255, "rewards/rejected": -0.08109764009714127, "step": 6570 }, { "epoch": 0.43, "learning_rate": 3.512927920784016e-06, "logits/chosen": -2.3142216205596924, "logits/rejected": -2.175314426422119, "logps/chosen": -226.6049346923828, "logps/rejected": -230.943359375, "loss": 0.0201, "rewards/accuracies": 0.625, "rewards/chosen": -0.004654319025576115, "rewards/margins": 0.12159235775470734, "rewards/rejected": -0.12624667584896088, "step": 6580 }, { "epoch": 0.43, "learning_rate": 3.5077050501247457e-06, "logits/chosen": -2.39911150932312, "logits/rejected": -1.9505914449691772, "logps/chosen": -276.9880676269531, "logps/rejected": -229.661865234375, "loss": 0.0215, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.020134396851062775, "rewards/margins": 0.10031189024448395, "rewards/rejected": -0.08017749339342117, "step": 6590 }, { "epoch": 0.43, "learning_rate": 3.5024769227823042e-06, "logits/chosen": -2.296597957611084, "logits/rejected": -2.161756992340088, "logps/chosen": -169.4569854736328, "logps/rejected": -144.53469848632812, "loss": 0.037, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.032928384840488434, "rewards/margins": 0.07937689125537872, "rewards/rejected": -0.11230529844760895, "step": 6600 }, { "epoch": 0.43, "eval_logits/chosen": -2.2867612838745117, "eval_logits/rejected": -2.0996828079223633, "eval_logps/chosen": -233.88197326660156, "eval_logps/rejected": -229.03372192382812, "eval_loss": 0.02600272372364998, "eval_rewards/accuracies": 0.6489999890327454, "eval_rewards/chosen": -0.009385163895785809, "eval_rewards/margins": 0.0777239203453064, "eval_rewards/rejected": -0.08710909634828568, "eval_runtime": 714.3371, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 6600 }, { "epoch": 0.43, "learning_rate": 3.4972435660291646e-06, "logits/chosen": -2.376971483230591, "logits/rejected": -2.2304623126983643, "logps/chosen": -247.54150390625, "logps/rejected": -238.1759796142578, "loss": 0.0183, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.021986085921525955, "rewards/margins": 0.07787088304758072, "rewards/rejected": -0.09985697269439697, "step": 6610 }, { "epoch": 0.43, "learning_rate": 3.492005007165079e-06, "logits/chosen": -2.251359701156616, "logits/rejected": -2.004368305206299, "logps/chosen": -225.4510498046875, "logps/rejected": -245.10220336914062, "loss": 0.0322, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.02042979560792446, "rewards/margins": 0.06747279316186905, "rewards/rejected": -0.04704299941658974, "step": 6620 }, { "epoch": 0.43, "learning_rate": 3.4867612735169377e-06, "logits/chosen": -2.396332263946533, "logits/rejected": -2.0499672889709473, "logps/chosen": -221.58731079101562, "logps/rejected": -163.45327758789062, "loss": 0.0251, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.017235388979315758, "rewards/margins": 0.1004711240530014, "rewards/rejected": -0.0832357257604599, "step": 6630 }, { "epoch": 0.43, "learning_rate": 3.4815123924386226e-06, "logits/chosen": -2.5589592456817627, "logits/rejected": -2.251009225845337, "logps/chosen": -304.1456604003906, "logps/rejected": -247.13693237304688, "loss": 0.0176, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.011978634633123875, "rewards/margins": 0.06331901252269745, "rewards/rejected": -0.05134038254618645, "step": 6640 }, { "epoch": 0.44, "learning_rate": 3.4762583913108696e-06, "logits/chosen": -2.1234230995178223, "logits/rejected": -1.8880468606948853, "logps/chosen": -269.94964599609375, "logps/rejected": -243.2335205078125, "loss": 0.0241, "rewards/accuracies": 0.625, "rewards/chosen": -0.0009564816718921065, "rewards/margins": 0.07099533826112747, "rewards/rejected": -0.07195182889699936, "step": 6650 }, { "epoch": 0.44, "learning_rate": 3.4709992975411217e-06, "logits/chosen": -2.250683069229126, "logits/rejected": -1.7695224285125732, "logps/chosen": -257.90576171875, "logps/rejected": -223.98779296875, "loss": 0.0305, "rewards/accuracies": 0.625, "rewards/chosen": -0.014731831848621368, "rewards/margins": 0.09174492210149765, "rewards/rejected": -0.10647676140069962, "step": 6660 }, { "epoch": 0.44, "learning_rate": 3.4657351385633886e-06, "logits/chosen": -2.366260528564453, "logits/rejected": -2.0163345336914062, "logps/chosen": -187.8857879638672, "logps/rejected": -198.3480224609375, "loss": 0.0281, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.012975359335541725, "rewards/margins": 0.113312266767025, "rewards/rejected": -0.12628760933876038, "step": 6670 }, { "epoch": 0.44, "learning_rate": 3.4604659418381024e-06, "logits/chosen": -2.276533603668213, "logits/rejected": -1.8742822408676147, "logps/chosen": -230.89651489257812, "logps/rejected": -220.8121337890625, "loss": 0.0349, "rewards/accuracies": 0.75, "rewards/chosen": -0.06421110033988953, "rewards/margins": 0.09723123162984848, "rewards/rejected": -0.1614423394203186, "step": 6680 }, { "epoch": 0.44, "learning_rate": 3.4551917348519744e-06, "logits/chosen": -2.3929922580718994, "logits/rejected": -2.1257662773132324, "logps/chosen": -286.9535827636719, "logps/rejected": -255.3826904296875, "loss": 0.0357, "rewards/accuracies": 0.625, "rewards/chosen": -0.02727435901761055, "rewards/margins": 0.07814844697713852, "rewards/rejected": -0.10542280972003937, "step": 6690 }, { "epoch": 0.44, "learning_rate": 3.4499125451178505e-06, "logits/chosen": -1.9648650884628296, "logits/rejected": -2.052551746368408, "logps/chosen": -216.1411590576172, "logps/rejected": -250.99026489257812, "loss": 0.0296, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05924935266375542, "rewards/margins": 0.05118199437856674, "rewards/rejected": -0.11043135076761246, "step": 6700 }, { "epoch": 0.44, "eval_logits/chosen": -2.2902636528015137, "eval_logits/rejected": -2.1026463508605957, "eval_logps/chosen": -240.9244384765625, "eval_logps/rejected": -237.36306762695312, "eval_loss": 0.026407985016703606, "eval_rewards/accuracies": 0.656499981880188, "eval_rewards/chosen": -0.04459746181964874, "eval_rewards/margins": 0.08415839076042175, "eval_rewards/rejected": -0.1287558525800705, "eval_runtime": 713.723, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 6700 }, { "epoch": 0.44, "learning_rate": 3.4446284001745723e-06, "logits/chosen": -2.1049177646636963, "logits/rejected": -1.856149673461914, "logps/chosen": -226.0932159423828, "logps/rejected": -256.2013854980469, "loss": 0.0309, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09929800033569336, "rewards/margins": 0.06680585443973541, "rewards/rejected": -0.16610386967658997, "step": 6710 }, { "epoch": 0.44, "learning_rate": 3.439339327586827e-06, "logits/chosen": -2.2584190368652344, "logits/rejected": -2.199280261993408, "logps/chosen": -194.5971221923828, "logps/rejected": -206.33316040039062, "loss": 0.0202, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.022896986454725266, "rewards/margins": 0.08549851924180984, "rewards/rejected": -0.10839549452066422, "step": 6720 }, { "epoch": 0.44, "learning_rate": 3.434045354945008e-06, "logits/chosen": -2.346409559249878, "logits/rejected": -2.0874228477478027, "logps/chosen": -289.5050964355469, "logps/rejected": -296.0002746582031, "loss": 0.0305, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07740263640880585, "rewards/margins": 0.05615498498082161, "rewards/rejected": -0.13355764746665955, "step": 6730 }, { "epoch": 0.44, "learning_rate": 3.4287465098650713e-06, "logits/chosen": -2.3473329544067383, "logits/rejected": -2.3483903408050537, "logps/chosen": -259.7020568847656, "logps/rejected": -255.46054077148438, "loss": 0.0245, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05863260477781296, "rewards/margins": 0.0530003197491169, "rewards/rejected": -0.11163292080163956, "step": 6740 }, { "epoch": 0.44, "learning_rate": 3.423442819988387e-06, "logits/chosen": -2.1913259029388428, "logits/rejected": -2.08129620552063, "logps/chosen": -202.03309631347656, "logps/rejected": -206.5372772216797, "loss": 0.0554, "rewards/accuracies": 0.625, "rewards/chosen": -0.0932292714715004, "rewards/margins": 0.05846773460507393, "rewards/rejected": -0.15169700980186462, "step": 6750 }, { "epoch": 0.44, "learning_rate": 3.4181343129816e-06, "logits/chosen": -2.378754138946533, "logits/rejected": -2.0726230144500732, "logps/chosen": -184.03543090820312, "logps/rejected": -183.15615844726562, "loss": 0.0331, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03836870938539505, "rewards/margins": 0.0670543760061264, "rewards/rejected": -0.10542309284210205, "step": 6760 }, { "epoch": 0.44, "learning_rate": 3.4128210165364837e-06, "logits/chosen": -2.111802339553833, "logits/rejected": -2.1186046600341797, "logps/chosen": -197.4541778564453, "logps/rejected": -229.36154174804688, "loss": 0.0212, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.030224764719605446, "rewards/margins": 0.11691973358392715, "rewards/rejected": -0.14714448153972626, "step": 6770 }, { "epoch": 0.44, "learning_rate": 3.407502958369795e-06, "logits/chosen": -2.22578763961792, "logits/rejected": -2.080477476119995, "logps/chosen": -253.41683959960938, "logps/rejected": -243.12594604492188, "loss": 0.0367, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05484537035226822, "rewards/margins": 0.09460707008838654, "rewards/rejected": -0.14945244789123535, "step": 6780 }, { "epoch": 0.44, "learning_rate": 3.4021801662231297e-06, "logits/chosen": -2.2658119201660156, "logits/rejected": -2.1124956607818604, "logps/chosen": -263.18060302734375, "logps/rejected": -253.9446258544922, "loss": 0.0304, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07151900231838226, "rewards/margins": 0.06519722938537598, "rewards/rejected": -0.13671624660491943, "step": 6790 }, { "epoch": 0.44, "learning_rate": 3.3968526678627793e-06, "logits/chosen": -2.2123255729675293, "logits/rejected": -1.8768161535263062, "logps/chosen": -276.95269775390625, "logps/rejected": -247.5306396484375, "loss": 0.038, "rewards/accuracies": 0.625, "rewards/chosen": -0.059611111879348755, "rewards/margins": 0.07340748608112335, "rewards/rejected": -0.1330185830593109, "step": 6800 }, { "epoch": 0.44, "eval_logits/chosen": -2.273937940597534, "eval_logits/rejected": -2.087099075317383, "eval_logps/chosen": -245.886474609375, "eval_logps/rejected": -241.46580505371094, "eval_loss": 0.02618832141160965, "eval_rewards/accuracies": 0.656499981880188, "eval_rewards/chosen": -0.06940756738185883, "eval_rewards/margins": 0.07986201345920563, "eval_rewards/rejected": -0.14926959574222565, "eval_runtime": 712.4632, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 6800 }, { "epoch": 0.45, "learning_rate": 3.391520491079586e-06, "logits/chosen": -2.3683648109436035, "logits/rejected": -2.3810901641845703, "logps/chosen": -207.74728393554688, "logps/rejected": -188.58126831054688, "loss": 0.0582, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05892329663038254, "rewards/margins": 0.05681822821497917, "rewards/rejected": -0.1157415360212326, "step": 6810 }, { "epoch": 0.45, "learning_rate": 3.3861836636887936e-06, "logits/chosen": -2.2756943702697754, "logits/rejected": -2.0937790870666504, "logps/chosen": -277.7412109375, "logps/rejected": -248.2487335205078, "loss": 0.0142, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.054611772298812866, "rewards/margins": 0.08493000268936157, "rewards/rejected": -0.13954177498817444, "step": 6820 }, { "epoch": 0.45, "learning_rate": 3.3808422135299106e-06, "logits/chosen": -2.2582032680511475, "logits/rejected": -2.343784809112549, "logps/chosen": -297.9847106933594, "logps/rejected": -366.10028076171875, "loss": 0.0126, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.061534445732831955, "rewards/margins": 0.07520034909248352, "rewards/rejected": -0.13673481345176697, "step": 6830 }, { "epoch": 0.45, "learning_rate": 3.375496168466556e-06, "logits/chosen": -2.4298951625823975, "logits/rejected": -1.9825141429901123, "logps/chosen": -222.685546875, "logps/rejected": -177.19808959960938, "loss": 0.0184, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03873712942004204, "rewards/margins": 0.05219585821032524, "rewards/rejected": -0.09093298763036728, "step": 6840 }, { "epoch": 0.45, "learning_rate": 3.3701455563863205e-06, "logits/chosen": -2.488891363143921, "logits/rejected": -2.0427050590515137, "logps/chosen": -303.811279296875, "logps/rejected": -287.64788818359375, "loss": 0.0249, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05968303605914116, "rewards/margins": 0.10077802836894989, "rewards/rejected": -0.16046105325222015, "step": 6850 }, { "epoch": 0.45, "learning_rate": 3.3647904052006174e-06, "logits/chosen": -2.2914767265319824, "logits/rejected": -2.227921962738037, "logps/chosen": -273.80303955078125, "logps/rejected": -294.72979736328125, "loss": 0.0212, "rewards/accuracies": 0.625, "rewards/chosen": -0.04252370074391365, "rewards/margins": 0.08960084617137909, "rewards/rejected": -0.13212454319000244, "step": 6860 }, { "epoch": 0.45, "learning_rate": 3.3594307428445383e-06, "logits/chosen": -2.4990713596343994, "logits/rejected": -2.1006529331207275, "logps/chosen": -338.07830810546875, "logps/rejected": -319.7018737792969, "loss": 0.0127, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.024137722328305244, "rewards/margins": 0.06520069390535355, "rewards/rejected": -0.08933840692043304, "step": 6870 }, { "epoch": 0.45, "learning_rate": 3.354066597276707e-06, "logits/chosen": -2.1523618698120117, "logits/rejected": -2.1465184688568115, "logps/chosen": -231.46969604492188, "logps/rejected": -280.14129638671875, "loss": 0.0284, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.03952707722783089, "rewards/margins": 0.06655522435903549, "rewards/rejected": -0.10608228296041489, "step": 6880 }, { "epoch": 0.45, "learning_rate": 3.348697996479136e-06, "logits/chosen": -2.3686251640319824, "logits/rejected": -2.1159777641296387, "logps/chosen": -237.6735382080078, "logps/rejected": -196.40487670898438, "loss": 0.0188, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04932292550802231, "rewards/margins": 0.06101522967219353, "rewards/rejected": -0.11033815145492554, "step": 6890 }, { "epoch": 0.45, "learning_rate": 3.3433249684570757e-06, "logits/chosen": -2.1756784915924072, "logits/rejected": -2.0409185886383057, "logps/chosen": -185.26063537597656, "logps/rejected": -158.40994262695312, "loss": 0.0458, "rewards/accuracies": 0.625, "rewards/chosen": -0.03715535253286362, "rewards/margins": 0.09433852136135101, "rewards/rejected": -0.13149388134479523, "step": 6900 }, { "epoch": 0.45, "eval_logits/chosen": -2.279813051223755, "eval_logits/rejected": -2.0924649238586426, "eval_logps/chosen": -239.05287170410156, "eval_logps/rejected": -234.09742736816406, "eval_loss": 0.02606966905295849, "eval_rewards/accuracies": 0.6524999737739563, "eval_rewards/chosen": -0.03523955121636391, "eval_rewards/margins": 0.07718797028064728, "eval_rewards/rejected": -0.11242751032114029, "eval_runtime": 712.5848, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 6900 }, { "epoch": 0.45, "learning_rate": 3.3379475412388724e-06, "logits/chosen": -2.364109516143799, "logits/rejected": -2.200424909591675, "logps/chosen": -247.0403594970703, "logps/rejected": -235.1184844970703, "loss": 0.0379, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.025426704436540604, "rewards/margins": 0.10888361930847168, "rewards/rejected": -0.13431032001972198, "step": 6910 }, { "epoch": 0.45, "learning_rate": 3.3325657428758207e-06, "logits/chosen": -2.161489248275757, "logits/rejected": -2.1392319202423096, "logps/chosen": -249.1950225830078, "logps/rejected": -270.0361328125, "loss": 0.0307, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02759755589067936, "rewards/margins": 0.1000722274184227, "rewards/rejected": -0.1276697814464569, "step": 6920 }, { "epoch": 0.45, "learning_rate": 3.3271796014420175e-06, "logits/chosen": -2.262923002243042, "logits/rejected": -2.2007548809051514, "logps/chosen": -223.37106323242188, "logps/rejected": -228.798095703125, "loss": 0.0319, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05460399389266968, "rewards/margins": 0.11567654460668564, "rewards/rejected": -0.1702805608510971, "step": 6930 }, { "epoch": 0.45, "learning_rate": 3.3217891450342142e-06, "logits/chosen": -2.259061574935913, "logits/rejected": -1.9373016357421875, "logps/chosen": -268.7401123046875, "logps/rejected": -220.4185791015625, "loss": 0.0102, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05070844292640686, "rewards/margins": 0.10335638374090195, "rewards/rejected": -0.1540648192167282, "step": 6940 }, { "epoch": 0.45, "learning_rate": 3.3163944017716733e-06, "logits/chosen": -2.4251351356506348, "logits/rejected": -2.1257576942443848, "logps/chosen": -224.58676147460938, "logps/rejected": -206.57284545898438, "loss": 0.0135, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.041079506278038025, "rewards/margins": 0.07541914284229279, "rewards/rejected": -0.11649864912033081, "step": 6950 }, { "epoch": 0.46, "learning_rate": 3.310995399796017e-06, "logits/chosen": -2.371244192123413, "logits/rejected": -2.2540652751922607, "logps/chosen": -283.013427734375, "logps/rejected": -285.94757080078125, "loss": 0.0357, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.045136041939258575, "rewards/margins": 0.04868122190237045, "rewards/rejected": -0.09381726384162903, "step": 6960 }, { "epoch": 0.46, "learning_rate": 3.305592167271085e-06, "logits/chosen": -2.257551908493042, "logits/rejected": -2.159736394882202, "logps/chosen": -202.6905059814453, "logps/rejected": -210.4559783935547, "loss": 0.0285, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.027221733704209328, "rewards/margins": 0.09338773787021637, "rewards/rejected": -0.12060944736003876, "step": 6970 }, { "epoch": 0.46, "learning_rate": 3.3001847323827846e-06, "logits/chosen": -2.2555832862854004, "logits/rejected": -2.2027993202209473, "logps/chosen": -274.7593688964844, "logps/rejected": -290.1148986816406, "loss": 0.0213, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.033308275043964386, "rewards/margins": 0.09387167543172836, "rewards/rejected": -0.12717995047569275, "step": 6980 }, { "epoch": 0.46, "learning_rate": 3.2947731233389447e-06, "logits/chosen": -2.337197780609131, "logits/rejected": -1.9657859802246094, "logps/chosen": -255.54342651367188, "logps/rejected": -223.072998046875, "loss": 0.0134, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04018372297286987, "rewards/margins": 0.11478477716445923, "rewards/rejected": -0.1549684852361679, "step": 6990 }, { "epoch": 0.46, "learning_rate": 3.2893573683691706e-06, "logits/chosen": -2.159594774246216, "logits/rejected": -2.1685497760772705, "logps/chosen": -213.18618774414062, "logps/rejected": -216.9435272216797, "loss": 0.0275, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.042463529855012894, "rewards/margins": 0.10826573520898819, "rewards/rejected": -0.1507292538881302, "step": 7000 }, { "epoch": 0.46, "eval_logits/chosen": -2.2773642539978027, "eval_logits/rejected": -2.089714765548706, "eval_logps/chosen": -242.4080810546875, "eval_logps/rejected": -239.64163208007812, "eval_loss": 0.025703566148877144, "eval_rewards/accuracies": 0.6535000205039978, "eval_rewards/chosen": -0.0520155094563961, "eval_rewards/margins": 0.08813316375017166, "eval_rewards/rejected": -0.14014868438243866, "eval_runtime": 714.8357, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 7000 }, { "epoch": 0.46, "learning_rate": 3.2839374957246915e-06, "logits/chosen": -2.3641977310180664, "logits/rejected": -2.0795207023620605, "logps/chosen": -279.41522216796875, "logps/rejected": -200.2919921875, "loss": 0.0218, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06662468612194061, "rewards/margins": 0.062074802815914154, "rewards/rejected": -0.12869949638843536, "step": 7010 }, { "epoch": 0.46, "learning_rate": 3.2785135336782187e-06, "logits/chosen": -2.214670419692993, "logits/rejected": -2.041901111602783, "logps/chosen": -247.4052276611328, "logps/rejected": -288.4595642089844, "loss": 0.0143, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06744905561208725, "rewards/margins": 0.09370996057987213, "rewards/rejected": -0.16115902364253998, "step": 7020 }, { "epoch": 0.46, "learning_rate": 3.2730855105237952e-06, "logits/chosen": -2.3737800121307373, "logits/rejected": -2.180330276489258, "logps/chosen": -229.07577514648438, "logps/rejected": -290.7456970214844, "loss": 0.0314, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04399664327502251, "rewards/margins": 0.08744674921035767, "rewards/rejected": -0.13144339621067047, "step": 7030 }, { "epoch": 0.46, "learning_rate": 3.2676534545766486e-06, "logits/chosen": -2.1809287071228027, "logits/rejected": -2.151038885116577, "logps/chosen": -220.2820281982422, "logps/rejected": -220.81185913085938, "loss": 0.0272, "rewards/accuracies": 0.625, "rewards/chosen": -0.039062611758708954, "rewards/margins": 0.05211324617266655, "rewards/rejected": -0.09117583930492401, "step": 7040 }, { "epoch": 0.46, "learning_rate": 3.262217394173043e-06, "logits/chosen": -2.285374164581299, "logits/rejected": -2.0053107738494873, "logps/chosen": -252.326416015625, "logps/rejected": -265.61517333984375, "loss": 0.0327, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05737876147031784, "rewards/margins": 0.09685908257961273, "rewards/rejected": -0.15423783659934998, "step": 7050 }, { "epoch": 0.46, "learning_rate": 3.2567773576701333e-06, "logits/chosen": -2.114854574203491, "logits/rejected": -1.9429212808609009, "logps/chosen": -265.2227783203125, "logps/rejected": -261.1485595703125, "loss": 0.0297, "rewards/accuracies": 0.625, "rewards/chosen": -0.047326505184173584, "rewards/margins": 0.13120698928833008, "rewards/rejected": -0.17853349447250366, "step": 7060 }, { "epoch": 0.46, "learning_rate": 3.2513333734458154e-06, "logits/chosen": -2.3451738357543945, "logits/rejected": -2.249483108520508, "logps/chosen": -216.91574096679688, "logps/rejected": -211.2926483154297, "loss": 0.024, "rewards/accuracies": 0.625, "rewards/chosen": -0.050372231751680374, "rewards/margins": 0.05492279678583145, "rewards/rejected": -0.10529503971338272, "step": 7070 }, { "epoch": 0.46, "learning_rate": 3.245885469898576e-06, "logits/chosen": -2.2400033473968506, "logits/rejected": -2.0241847038269043, "logps/chosen": -310.1903381347656, "logps/rejected": -264.5427551269531, "loss": 0.0214, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04817565903067589, "rewards/margins": 0.09020276367664337, "rewards/rejected": -0.13837842643260956, "step": 7080 }, { "epoch": 0.46, "learning_rate": 3.2404336754473497e-06, "logits/chosen": -2.2381837368011475, "logits/rejected": -1.9788227081298828, "logps/chosen": -273.0093688964844, "logps/rejected": -220.49594116210938, "loss": 0.0179, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02751203440129757, "rewards/margins": 0.06128401681780815, "rewards/rejected": -0.08879604935646057, "step": 7090 }, { "epoch": 0.46, "learning_rate": 3.234978018531367e-06, "logits/chosen": -2.5674209594726562, "logits/rejected": -2.139941692352295, "logps/chosen": -263.71978759765625, "logps/rejected": -214.44461059570312, "loss": 0.0175, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.02609780803322792, "rewards/margins": 0.077622190117836, "rewards/rejected": -0.10372000932693481, "step": 7100 }, { "epoch": 0.46, "eval_logits/chosen": -2.293259859085083, "eval_logits/rejected": -2.1057543754577637, "eval_logps/chosen": -239.9512939453125, "eval_logps/rejected": -235.46559143066406, "eval_loss": 0.025518544018268585, "eval_rewards/accuracies": 0.652999997138977, "eval_rewards/chosen": -0.03973172605037689, "eval_rewards/margins": 0.07953677326440811, "eval_rewards/rejected": -0.1192684918642044, "eval_runtime": 712.5716, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 7100 }, { "epoch": 0.47, "learning_rate": 3.229518527610006e-06, "logits/chosen": -2.388373851776123, "logits/rejected": -2.092414140701294, "logps/chosen": -299.6824951171875, "logps/rejected": -266.2288513183594, "loss": 0.0162, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.034462135285139084, "rewards/margins": 0.05861176922917366, "rewards/rejected": -0.09307390451431274, "step": 7110 }, { "epoch": 0.47, "learning_rate": 3.2240552311626465e-06, "logits/chosen": -2.3738510608673096, "logits/rejected": -2.137483596801758, "logps/chosen": -248.15280151367188, "logps/rejected": -244.7117462158203, "loss": 0.0197, "rewards/accuracies": 0.625, "rewards/chosen": -0.0346953347325325, "rewards/margins": 0.057519711554050446, "rewards/rejected": -0.09221504628658295, "step": 7120 }, { "epoch": 0.47, "learning_rate": 3.2185881576885193e-06, "logits/chosen": -2.3900671005249023, "logits/rejected": -2.0392982959747314, "logps/chosen": -225.50747680664062, "logps/rejected": -200.21090698242188, "loss": 0.0298, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06892313063144684, "rewards/margins": 0.06685464829206467, "rewards/rejected": -0.1357778012752533, "step": 7130 }, { "epoch": 0.47, "learning_rate": 3.213117335706557e-06, "logits/chosen": -2.262974262237549, "logits/rejected": -2.3756120204925537, "logps/chosen": -271.0682373046875, "logps/rejected": -291.7215270996094, "loss": 0.025, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0751163512468338, "rewards/margins": 0.05723525211215019, "rewards/rejected": -0.1323516070842743, "step": 7140 }, { "epoch": 0.47, "learning_rate": 3.2076427937552473e-06, "logits/chosen": -2.3007476329803467, "logits/rejected": -2.078320264816284, "logps/chosen": -252.1639862060547, "logps/rejected": -255.96145629882812, "loss": 0.0297, "rewards/accuracies": 0.625, "rewards/chosen": -0.0333712212741375, "rewards/margins": 0.1146056205034256, "rewards/rejected": -0.1479768455028534, "step": 7150 }, { "epoch": 0.47, "learning_rate": 3.2021645603924827e-06, "logits/chosen": -2.1458470821380615, "logits/rejected": -2.0497066974639893, "logps/chosen": -148.47946166992188, "logps/rejected": -179.91090393066406, "loss": 0.0244, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05012665316462517, "rewards/margins": 0.11033505201339722, "rewards/rejected": -0.16046170890331268, "step": 7160 }, { "epoch": 0.47, "learning_rate": 3.196682664195412e-06, "logits/chosen": -2.2713592052459717, "logits/rejected": -1.9928386211395264, "logps/chosen": -217.07821655273438, "logps/rejected": -192.48062133789062, "loss": 0.0353, "rewards/accuracies": 0.75, "rewards/chosen": -0.06515000015497208, "rewards/margins": 0.05146735906600952, "rewards/rejected": -0.1166173666715622, "step": 7170 }, { "epoch": 0.47, "learning_rate": 3.191197133760291e-06, "logits/chosen": -2.5346858501434326, "logits/rejected": -2.2119202613830566, "logps/chosen": -271.4148254394531, "logps/rejected": -222.3765411376953, "loss": 0.028, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.020056938752532005, "rewards/margins": 0.12605223059654236, "rewards/rejected": -0.14610914885997772, "step": 7180 }, { "epoch": 0.47, "learning_rate": 3.185707997702334e-06, "logits/chosen": -2.19646954536438, "logits/rejected": -2.0350489616394043, "logps/chosen": -255.05160522460938, "logps/rejected": -236.4979248046875, "loss": 0.0147, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0714336410164833, "rewards/margins": 0.09648963809013367, "rewards/rejected": -0.16792325675487518, "step": 7190 }, { "epoch": 0.47, "learning_rate": 3.1802152846555624e-06, "logits/chosen": -2.2263684272766113, "logits/rejected": -2.203920841217041, "logps/chosen": -234.8302764892578, "logps/rejected": -235.4951171875, "loss": 0.035, "rewards/accuracies": 0.625, "rewards/chosen": -0.05293622612953186, "rewards/margins": 0.08092103898525238, "rewards/rejected": -0.13385728001594543, "step": 7200 }, { "epoch": 0.47, "eval_logits/chosen": -2.308262825012207, "eval_logits/rejected": -2.1193060874938965, "eval_logps/chosen": -242.8714599609375, "eval_logps/rejected": -236.956787109375, "eval_loss": 0.026033619418740273, "eval_rewards/accuracies": 0.6485000252723694, "eval_rewards/chosen": -0.054332468658685684, "eval_rewards/margins": 0.07239188253879547, "eval_rewards/rejected": -0.12672434747219086, "eval_runtime": 712.7385, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 7200 }, { "epoch": 0.47, "learning_rate": 3.174719023272659e-06, "logits/chosen": -2.383152484893799, "logits/rejected": -2.4308807849884033, "logps/chosen": -224.28189086914062, "logps/rejected": -286.50152587890625, "loss": 0.0194, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05293875187635422, "rewards/margins": 0.07935214787721634, "rewards/rejected": -0.13229091465473175, "step": 7210 }, { "epoch": 0.47, "learning_rate": 3.169219242224816e-06, "logits/chosen": -2.3175277709960938, "logits/rejected": -2.1515915393829346, "logps/chosen": -254.6971893310547, "logps/rejected": -261.6206970214844, "loss": 0.0118, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07384978979825974, "rewards/margins": 0.06509410589933395, "rewards/rejected": -0.1389438956975937, "step": 7220 }, { "epoch": 0.47, "learning_rate": 3.1637159702015837e-06, "logits/chosen": -2.320132255554199, "logits/rejected": -2.0065832138061523, "logps/chosen": -216.2084503173828, "logps/rejected": -218.912841796875, "loss": 0.021, "rewards/accuracies": 0.75, "rewards/chosen": -0.0515814833343029, "rewards/margins": 0.110136017203331, "rewards/rejected": -0.1617175042629242, "step": 7230 }, { "epoch": 0.47, "learning_rate": 3.1582092359107263e-06, "logits/chosen": -2.375622272491455, "logits/rejected": -2.1548819541931152, "logps/chosen": -289.31683349609375, "logps/rejected": -261.6122741699219, "loss": 0.0222, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.055195726454257965, "rewards/margins": 0.09063062071800232, "rewards/rejected": -0.14582636952400208, "step": 7240 }, { "epoch": 0.47, "learning_rate": 3.152699068078067e-06, "logits/chosen": -2.1964268684387207, "logits/rejected": -1.9905602931976318, "logps/chosen": -301.0905456542969, "logps/rejected": -305.6982421875, "loss": 0.0211, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1040010005235672, "rewards/margins": 0.12574461102485657, "rewards/rejected": -0.22974559664726257, "step": 7250 }, { "epoch": 0.48, "learning_rate": 3.1471854954473415e-06, "logits/chosen": -2.345653772354126, "logits/rejected": -2.389192581176758, "logps/chosen": -258.79254150390625, "logps/rejected": -274.45782470703125, "loss": 0.0179, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0401294082403183, "rewards/margins": 0.09716589748859406, "rewards/rejected": -0.13729530572891235, "step": 7260 }, { "epoch": 0.48, "learning_rate": 3.1416685467800436e-06, "logits/chosen": -2.1382288932800293, "logits/rejected": -2.1094374656677246, "logps/chosen": -202.70059204101562, "logps/rejected": -200.3344268798828, "loss": 0.0216, "rewards/accuracies": 0.75, "rewards/chosen": -0.08558902144432068, "rewards/margins": 0.10081753879785538, "rewards/rejected": -0.18640658259391785, "step": 7270 }, { "epoch": 0.48, "learning_rate": 3.1361482508552803e-06, "logits/chosen": -2.3437018394470215, "logits/rejected": -1.8407390117645264, "logps/chosen": -257.7562561035156, "logps/rejected": -227.2680206298828, "loss": 0.0341, "rewards/accuracies": 0.625, "rewards/chosen": -0.07902660965919495, "rewards/margins": 0.06449031084775925, "rewards/rejected": -0.1435169279575348, "step": 7280 }, { "epoch": 0.48, "learning_rate": 3.1306246364696198e-06, "logits/chosen": -2.467689037322998, "logits/rejected": -2.2647864818573, "logps/chosen": -265.7434997558594, "logps/rejected": -264.20330810546875, "loss": 0.0182, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06634589284658432, "rewards/margins": 0.0798247903585434, "rewards/rejected": -0.1461706906557083, "step": 7290 }, { "epoch": 0.48, "learning_rate": 3.1250977324369413e-06, "logits/chosen": -2.253009080886841, "logits/rejected": -2.173417091369629, "logps/chosen": -171.82815551757812, "logps/rejected": -197.09963989257812, "loss": 0.015, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06437839567661285, "rewards/margins": 0.09279756993055344, "rewards/rejected": -0.15717598795890808, "step": 7300 }, { "epoch": 0.48, "eval_logits/chosen": -2.3008673191070557, "eval_logits/rejected": -2.1122865676879883, "eval_logps/chosen": -249.43240356445312, "eval_logps/rejected": -244.0609130859375, "eval_loss": 0.025661982595920563, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": -0.08713724464178085, "eval_rewards/margins": 0.07510782033205032, "eval_rewards/rejected": -0.16224505007266998, "eval_runtime": 713.91, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 7300 }, { "epoch": 0.48, "learning_rate": 3.1195675675882825e-06, "logits/chosen": -2.2031381130218506, "logits/rejected": -2.096050977706909, "logps/chosen": -258.1648864746094, "logps/rejected": -240.55856323242188, "loss": 0.0283, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11486943811178207, "rewards/margins": 0.06499240547418594, "rewards/rejected": -0.17986184358596802, "step": 7310 }, { "epoch": 0.48, "learning_rate": 3.1140341707716926e-06, "logits/chosen": -2.168344259262085, "logits/rejected": -1.9166107177734375, "logps/chosen": -215.80386352539062, "logps/rejected": -195.4596710205078, "loss": 0.039, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0999809131026268, "rewards/margins": 0.11335988342761993, "rewards/rejected": -0.21334078907966614, "step": 7320 }, { "epoch": 0.48, "learning_rate": 3.1084975708520803e-06, "logits/chosen": -2.4190192222595215, "logits/rejected": -2.0219929218292236, "logps/chosen": -277.07574462890625, "logps/rejected": -224.830322265625, "loss": 0.0176, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07675959169864655, "rewards/margins": 0.09250538051128387, "rewards/rejected": -0.1692649871110916, "step": 7330 }, { "epoch": 0.48, "learning_rate": 3.1029577967110625e-06, "logits/chosen": -2.418910026550293, "logits/rejected": -2.204145669937134, "logps/chosen": -233.3201446533203, "logps/rejected": -192.99319458007812, "loss": 0.0374, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08462905138731003, "rewards/margins": 0.043656978756189346, "rewards/rejected": -0.12828603386878967, "step": 7340 }, { "epoch": 0.48, "learning_rate": 3.097414877246814e-06, "logits/chosen": -2.2419230937957764, "logits/rejected": -1.994065523147583, "logps/chosen": -215.1243133544922, "logps/rejected": -206.0422821044922, "loss": 0.0339, "rewards/accuracies": 0.625, "rewards/chosen": -0.08240430057048798, "rewards/margins": 0.10215411335229874, "rewards/rejected": -0.18455840647220612, "step": 7350 }, { "epoch": 0.48, "learning_rate": 3.0918688413739197e-06, "logits/chosen": -2.328660488128662, "logits/rejected": -2.0175442695617676, "logps/chosen": -243.36770629882812, "logps/rejected": -201.67184448242188, "loss": 0.02, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.053423862904310226, "rewards/margins": 0.11288408935070038, "rewards/rejected": -0.1663079559803009, "step": 7360 }, { "epoch": 0.48, "learning_rate": 3.0863197180232178e-06, "logits/chosen": -2.3689608573913574, "logits/rejected": -1.9991772174835205, "logps/chosen": -213.11666870117188, "logps/rejected": -216.60641479492188, "loss": 0.0205, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06686246395111084, "rewards/margins": 0.07855210453271866, "rewards/rejected": -0.1454145610332489, "step": 7370 }, { "epoch": 0.48, "learning_rate": 3.0807675361416554e-06, "logits/chosen": -2.268491744995117, "logits/rejected": -2.0238747596740723, "logps/chosen": -196.4187774658203, "logps/rejected": -134.76646423339844, "loss": 0.0356, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.037692587822675705, "rewards/margins": 0.08691040426492691, "rewards/rejected": -0.12460298836231232, "step": 7380 }, { "epoch": 0.48, "learning_rate": 3.0752123246921327e-06, "logits/chosen": -2.3893237113952637, "logits/rejected": -2.114490509033203, "logps/chosen": -290.68157958984375, "logps/rejected": -240.748779296875, "loss": 0.0188, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06384162604808807, "rewards/margins": 0.08499327301979065, "rewards/rejected": -0.14883491396903992, "step": 7390 }, { "epoch": 0.48, "learning_rate": 3.069654112653353e-06, "logits/chosen": -2.406578540802002, "logits/rejected": -2.199592351913452, "logps/chosen": -231.58706665039062, "logps/rejected": -207.14590454101562, "loss": 0.0231, "rewards/accuracies": 0.625, "rewards/chosen": -0.07551421225070953, "rewards/margins": 0.04055469110608101, "rewards/rejected": -0.11606889963150024, "step": 7400 }, { "epoch": 0.48, "eval_logits/chosen": -2.2913472652435303, "eval_logits/rejected": -2.103548049926758, "eval_logps/chosen": -245.1847686767578, "eval_logps/rejected": -240.86825561523438, "eval_loss": 0.0254887156188488, "eval_rewards/accuracies": 0.6489999890327454, "eval_rewards/chosen": -0.06589899212121964, "eval_rewards/margins": 0.08038286119699478, "eval_rewards/rejected": -0.14628185331821442, "eval_runtime": 710.4192, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.408, "step": 7400 }, { "epoch": 0.48, "learning_rate": 3.064092929019673e-06, "logits/chosen": -2.283900737762451, "logits/rejected": -2.317167282104492, "logps/chosen": -269.21429443359375, "logps/rejected": -300.99951171875, "loss": 0.0248, "rewards/accuracies": 0.625, "rewards/chosen": -0.0583542063832283, "rewards/margins": 0.0595521442592144, "rewards/rejected": -0.117906354367733, "step": 7410 }, { "epoch": 0.49, "learning_rate": 3.058528802800952e-06, "logits/chosen": -2.321166753768921, "logits/rejected": -2.080514907836914, "logps/chosen": -303.0827331542969, "logps/rejected": -282.8626403808594, "loss": 0.0204, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05224515125155449, "rewards/margins": 0.08176366984844208, "rewards/rejected": -0.13400882482528687, "step": 7420 }, { "epoch": 0.49, "learning_rate": 3.052961763022397e-06, "logits/chosen": -2.4596550464630127, "logits/rejected": -2.1328208446502686, "logps/chosen": -197.9812469482422, "logps/rejected": -182.56109619140625, "loss": 0.0378, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07550039142370224, "rewards/margins": 0.1165175586938858, "rewards/rejected": -0.19201794266700745, "step": 7430 }, { "epoch": 0.49, "learning_rate": 3.047391838724415e-06, "logits/chosen": -2.4146902561187744, "logits/rejected": -1.951748251914978, "logps/chosen": -248.65621948242188, "logps/rejected": -253.9379119873047, "loss": 0.0308, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06568771600723267, "rewards/margins": 0.10674123466014862, "rewards/rejected": -0.1724289357662201, "step": 7440 }, { "epoch": 0.49, "learning_rate": 3.0418190589624587e-06, "logits/chosen": -2.335435152053833, "logits/rejected": -2.073451519012451, "logps/chosen": -191.60012817382812, "logps/rejected": -212.0890655517578, "loss": 0.0241, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06989014893770218, "rewards/margins": 0.060682911425828934, "rewards/rejected": -0.1305730640888214, "step": 7450 }, { "epoch": 0.49, "learning_rate": 3.0362434528068784e-06, "logits/chosen": -2.3186919689178467, "logits/rejected": -1.8934450149536133, "logps/chosen": -285.0685119628906, "logps/rejected": -220.4761962890625, "loss": 0.0099, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07872681319713593, "rewards/margins": 0.10450903326272964, "rewards/rejected": -0.18323580920696259, "step": 7460 }, { "epoch": 0.49, "learning_rate": 3.0306650493427657e-06, "logits/chosen": -2.214409828186035, "logits/rejected": -2.1102757453918457, "logps/chosen": -245.85733032226562, "logps/rejected": -254.14730834960938, "loss": 0.0301, "rewards/accuracies": 0.625, "rewards/chosen": -0.07119054347276688, "rewards/margins": 0.08625958859920502, "rewards/rejected": -0.1574501246213913, "step": 7470 }, { "epoch": 0.49, "learning_rate": 3.0250838776698077e-06, "logits/chosen": -2.0632340908050537, "logits/rejected": -2.1093077659606934, "logps/chosen": -204.76260375976562, "logps/rejected": -221.22854614257812, "loss": 0.021, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10421650111675262, "rewards/margins": 0.09606151282787323, "rewards/rejected": -0.20027799904346466, "step": 7480 }, { "epoch": 0.49, "learning_rate": 3.0194999669021275e-06, "logits/chosen": -2.0692830085754395, "logits/rejected": -1.7427141666412354, "logps/chosen": -239.0797119140625, "logps/rejected": -213.71298217773438, "loss": 0.027, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.061056338250637054, "rewards/margins": 0.10842617601156235, "rewards/rejected": -0.1694825291633606, "step": 7490 }, { "epoch": 0.49, "learning_rate": 3.0139133461681403e-06, "logits/chosen": -2.2071690559387207, "logits/rejected": -2.060537815093994, "logps/chosen": -275.1077575683594, "logps/rejected": -238.1566619873047, "loss": 0.0211, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.05356758087873459, "rewards/margins": 0.0959450975060463, "rewards/rejected": -0.14951267838478088, "step": 7500 }, { "epoch": 0.49, "eval_logits/chosen": -2.248464345932007, "eval_logits/rejected": -2.063547372817993, "eval_logps/chosen": -244.62350463867188, "eval_logps/rejected": -240.8419647216797, "eval_loss": 0.02583528310060501, "eval_rewards/accuracies": 0.6520000100135803, "eval_rewards/chosen": -0.06309277564287186, "eval_rewards/margins": 0.08305763453245163, "eval_rewards/rejected": -0.1461504101753235, "eval_runtime": 711.8448, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 7500 }, { "epoch": 0.49, "learning_rate": 3.0083240446103965e-06, "logits/chosen": -1.9640617370605469, "logits/rejected": -1.9273643493652344, "logps/chosen": -195.1829376220703, "logps/rejected": -223.74267578125, "loss": 0.0156, "rewards/accuracies": 0.625, "rewards/chosen": -0.05574166774749756, "rewards/margins": 0.1044529527425766, "rewards/rejected": -0.16019462049007416, "step": 7510 }, { "epoch": 0.49, "learning_rate": 3.0027320913854306e-06, "logits/chosen": -2.451677083969116, "logits/rejected": -2.165865421295166, "logps/chosen": -301.517333984375, "logps/rejected": -258.03253173828125, "loss": 0.022, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04527861624956131, "rewards/margins": 0.09612289816141129, "rewards/rejected": -0.1414014995098114, "step": 7520 }, { "epoch": 0.49, "learning_rate": 2.997137515663609e-06, "logits/chosen": -2.2002179622650146, "logits/rejected": -2.116541862487793, "logps/chosen": -230.6831817626953, "logps/rejected": -211.8942108154297, "loss": 0.0552, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.026528939604759216, "rewards/margins": 0.0945509821176529, "rewards/rejected": -0.1210799366235733, "step": 7530 }, { "epoch": 0.49, "learning_rate": 2.991540346628981e-06, "logits/chosen": -2.3002963066101074, "logits/rejected": -2.118590831756592, "logps/chosen": -248.7118682861328, "logps/rejected": -234.67245483398438, "loss": 0.0165, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.045557767152786255, "rewards/margins": 0.060344088822603226, "rewards/rejected": -0.10590185225009918, "step": 7540 }, { "epoch": 0.49, "learning_rate": 2.985940613479121e-06, "logits/chosen": -2.3952338695526123, "logits/rejected": -2.2858328819274902, "logps/chosen": -302.9120788574219, "logps/rejected": -258.48260498046875, "loss": 0.0282, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04531232267618179, "rewards/margins": 0.07471176236867905, "rewards/rejected": -0.12002407014369965, "step": 7550 }, { "epoch": 0.49, "learning_rate": 2.980338345424981e-06, "logits/chosen": -2.255479574203491, "logits/rejected": -1.950728416442871, "logps/chosen": -257.19573974609375, "logps/rejected": -223.8048858642578, "loss": 0.0157, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.039879582822322845, "rewards/margins": 0.07442986220121384, "rewards/rejected": -0.11430943012237549, "step": 7560 }, { "epoch": 0.5, "learning_rate": 2.974733571690735e-06, "logits/chosen": -2.3409745693206787, "logits/rejected": -2.0642552375793457, "logps/chosen": -254.4838409423828, "logps/rejected": -212.90652465820312, "loss": 0.0446, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08538492023944855, "rewards/margins": 0.08857604116201401, "rewards/rejected": -0.17396095395088196, "step": 7570 }, { "epoch": 0.5, "learning_rate": 2.9691263215136274e-06, "logits/chosen": -2.2871947288513184, "logits/rejected": -2.2657456398010254, "logps/chosen": -274.03948974609375, "logps/rejected": -258.7521057128906, "loss": 0.0118, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.035195596516132355, "rewards/margins": 0.08183564990758896, "rewards/rejected": -0.11703125387430191, "step": 7580 }, { "epoch": 0.5, "learning_rate": 2.963516624143823e-06, "logits/chosen": -2.181278705596924, "logits/rejected": -2.06855845451355, "logps/chosen": -231.0608673095703, "logps/rejected": -211.93508911132812, "loss": 0.0217, "rewards/accuracies": 0.625, "rewards/chosen": -0.09247070550918579, "rewards/margins": 0.0920250415802002, "rewards/rejected": -0.184495747089386, "step": 7590 }, { "epoch": 0.5, "learning_rate": 2.9579045088442504e-06, "logits/chosen": -2.076108455657959, "logits/rejected": -2.109138011932373, "logps/chosen": -205.429931640625, "logps/rejected": -247.7603759765625, "loss": 0.0379, "rewards/accuracies": 0.625, "rewards/chosen": -0.0846247524023056, "rewards/margins": 0.1129075139760971, "rewards/rejected": -0.1975322812795639, "step": 7600 }, { "epoch": 0.5, "eval_logits/chosen": -2.2404394149780273, "eval_logits/rejected": -2.05661678314209, "eval_logps/chosen": -246.95504760742188, "eval_logps/rejected": -243.5423126220703, "eval_loss": 0.02591700106859207, "eval_rewards/accuracies": 0.6474999785423279, "eval_rewards/chosen": -0.07475046068429947, "eval_rewards/margins": 0.08490156382322311, "eval_rewards/rejected": -0.1596520096063614, "eval_runtime": 711.6852, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 7600 }, { "epoch": 0.5, "learning_rate": 2.9522900048904534e-06, "logits/chosen": -2.151641845703125, "logits/rejected": -2.059218168258667, "logps/chosen": -261.9276123046875, "logps/rejected": -240.96853637695312, "loss": 0.0265, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11094732582569122, "rewards/margins": 0.04531213641166687, "rewards/rejected": -0.1562594622373581, "step": 7610 }, { "epoch": 0.5, "learning_rate": 2.9466731415704343e-06, "logits/chosen": -2.2437233924865723, "logits/rejected": -2.116011381149292, "logps/chosen": -239.49179077148438, "logps/rejected": -253.66470336914062, "loss": 0.0229, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0785256177186966, "rewards/margins": 0.09270112216472626, "rewards/rejected": -0.17122673988342285, "step": 7620 }, { "epoch": 0.5, "learning_rate": 2.941053948184503e-06, "logits/chosen": -2.306546211242676, "logits/rejected": -2.155078649520874, "logps/chosen": -293.14190673828125, "logps/rejected": -267.7697448730469, "loss": 0.0338, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06601421535015106, "rewards/margins": 0.04770015925168991, "rewards/rejected": -0.11371437460184097, "step": 7630 }, { "epoch": 0.5, "learning_rate": 2.935432454045125e-06, "logits/chosen": -2.10274076461792, "logits/rejected": -2.168013095855713, "logps/chosen": -249.28128051757812, "logps/rejected": -234.67724609375, "loss": 0.0248, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08658457547426224, "rewards/margins": 0.03301741182804108, "rewards/rejected": -0.11960198730230331, "step": 7640 }, { "epoch": 0.5, "learning_rate": 2.929808688476768e-06, "logits/chosen": -2.340390682220459, "logits/rejected": -2.217571496963501, "logps/chosen": -254.6952667236328, "logps/rejected": -253.04006958007812, "loss": 0.0389, "rewards/accuracies": 0.625, "rewards/chosen": -0.0766642838716507, "rewards/margins": 0.07973639667034149, "rewards/rejected": -0.1564006805419922, "step": 7650 }, { "epoch": 0.5, "learning_rate": 2.924182680815748e-06, "logits/chosen": -2.258434534072876, "logits/rejected": -2.182904005050659, "logps/chosen": -244.4628448486328, "logps/rejected": -245.7698211669922, "loss": 0.0149, "rewards/accuracies": 0.75, "rewards/chosen": -0.04916124790906906, "rewards/margins": 0.12007315456867218, "rewards/rejected": -0.16923440992832184, "step": 7660 }, { "epoch": 0.5, "learning_rate": 2.9185544604100765e-06, "logits/chosen": -2.0396854877471924, "logits/rejected": -1.946394681930542, "logps/chosen": -211.8596954345703, "logps/rejected": -221.6324005126953, "loss": 0.0246, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.07974790781736374, "rewards/margins": 0.07014047354459763, "rewards/rejected": -0.14988838136196136, "step": 7670 }, { "epoch": 0.5, "learning_rate": 2.9129240566193083e-06, "logits/chosen": -2.34912371635437, "logits/rejected": -2.0322132110595703, "logps/chosen": -215.6170654296875, "logps/rejected": -219.56967163085938, "loss": 0.022, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06768370419740677, "rewards/margins": 0.08620280772447586, "rewards/rejected": -0.15388651192188263, "step": 7680 }, { "epoch": 0.5, "learning_rate": 2.9072914988143874e-06, "logits/chosen": -2.108494758605957, "logits/rejected": -2.0200300216674805, "logps/chosen": -213.08517456054688, "logps/rejected": -228.9197998046875, "loss": 0.0381, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05827696993947029, "rewards/margins": 0.1215302124619484, "rewards/rejected": -0.17980718612670898, "step": 7690 }, { "epoch": 0.5, "learning_rate": 2.9016568163774956e-06, "logits/chosen": -2.3322207927703857, "logits/rejected": -2.09818434715271, "logps/chosen": -183.9313507080078, "logps/rejected": -165.3214874267578, "loss": 0.0117, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.058510202914476395, "rewards/margins": 0.07721497863531113, "rewards/rejected": -0.13572517037391663, "step": 7700 }, { "epoch": 0.5, "eval_logits/chosen": -2.2502083778381348, "eval_logits/rejected": -2.0660572052001953, "eval_logps/chosen": -243.0759735107422, "eval_logps/rejected": -239.77200317382812, "eval_loss": 0.02568177692592144, "eval_rewards/accuracies": 0.6620000004768372, "eval_rewards/chosen": -0.0553550161421299, "eval_rewards/margins": 0.08544543385505676, "eval_rewards/rejected": -0.14080046117305756, "eval_runtime": 713.4471, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 7700 }, { "epoch": 0.5, "learning_rate": 2.8960200387018942e-06, "logits/chosen": -2.0974814891815186, "logits/rejected": -2.0618884563446045, "logps/chosen": -321.8419494628906, "logps/rejected": -286.4595031738281, "loss": 0.0235, "rewards/accuracies": 0.625, "rewards/chosen": -0.07054462283849716, "rewards/margins": 0.06725998967885971, "rewards/rejected": -0.13780462741851807, "step": 7710 }, { "epoch": 0.51, "learning_rate": 2.8903811951917792e-06, "logits/chosen": -2.2555909156799316, "logits/rejected": -2.1047475337982178, "logps/chosen": -207.61007690429688, "logps/rejected": -176.0470428466797, "loss": 0.0266, "rewards/accuracies": 0.75, "rewards/chosen": -0.04665211960673332, "rewards/margins": 0.07789582014083862, "rewards/rejected": -0.12454793602228165, "step": 7720 }, { "epoch": 0.51, "learning_rate": 2.88474031526212e-06, "logits/chosen": -2.212296962738037, "logits/rejected": -2.180422306060791, "logps/chosen": -213.6601104736328, "logps/rejected": -240.7231903076172, "loss": 0.0163, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05410192161798477, "rewards/margins": 0.06817255169153214, "rewards/rejected": -0.1222744733095169, "step": 7730 }, { "epoch": 0.51, "learning_rate": 2.879097428338509e-06, "logits/chosen": -2.208082437515259, "logits/rejected": -1.9035813808441162, "logps/chosen": -224.62008666992188, "logps/rejected": -218.4266815185547, "loss": 0.0316, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.043625570833683014, "rewards/margins": 0.08067473769187927, "rewards/rejected": -0.12430031597614288, "step": 7740 }, { "epoch": 0.51, "learning_rate": 2.8734525638570094e-06, "logits/chosen": -2.2176127433776855, "logits/rejected": -2.1426639556884766, "logps/chosen": -241.19674682617188, "logps/rejected": -242.5107421875, "loss": 0.0232, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03501252457499504, "rewards/margins": 0.06843651831150055, "rewards/rejected": -0.1034490317106247, "step": 7750 }, { "epoch": 0.51, "learning_rate": 2.8678057512639982e-06, "logits/chosen": -2.170894145965576, "logits/rejected": -2.076629638671875, "logps/chosen": -288.15228271484375, "logps/rejected": -292.22430419921875, "loss": 0.0225, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.01042949128895998, "rewards/margins": 0.13065668940544128, "rewards/rejected": -0.1410861760377884, "step": 7760 }, { "epoch": 0.51, "learning_rate": 2.8621570200160172e-06, "logits/chosen": -2.0633962154388428, "logits/rejected": -1.9723310470581055, "logps/chosen": -172.1400909423828, "logps/rejected": -184.14730834960938, "loss": 0.0184, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.017855005338788033, "rewards/margins": 0.10092601925134659, "rewards/rejected": -0.11878103017807007, "step": 7770 }, { "epoch": 0.51, "learning_rate": 2.856506399579615e-06, "logits/chosen": -2.417569637298584, "logits/rejected": -2.0469810962677, "logps/chosen": -231.6193389892578, "logps/rejected": -227.15756225585938, "loss": 0.0301, "rewards/accuracies": 0.625, "rewards/chosen": -0.06627919524908066, "rewards/margins": 0.07513771951198578, "rewards/rejected": -0.14141690731048584, "step": 7780 }, { "epoch": 0.51, "learning_rate": 2.8508539194311964e-06, "logits/chosen": -2.329987049102783, "logits/rejected": -2.3192734718322754, "logps/chosen": -261.53924560546875, "logps/rejected": -287.02728271484375, "loss": 0.0106, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.039029061794281006, "rewards/margins": 0.06329012662172318, "rewards/rejected": -0.10231919586658478, "step": 7790 }, { "epoch": 0.51, "learning_rate": 2.8451996090568656e-06, "logits/chosen": -2.2257461547851562, "logits/rejected": -2.104602098464966, "logps/chosen": -202.16307067871094, "logps/rejected": -204.09323120117188, "loss": 0.0197, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08011750876903534, "rewards/margins": 0.09945876896381378, "rewards/rejected": -0.17957626283168793, "step": 7800 }, { "epoch": 0.51, "eval_logits/chosen": -2.27226185798645, "eval_logits/rejected": -2.0867011547088623, "eval_logps/chosen": -245.60133361816406, "eval_logps/rejected": -242.34844970703125, "eval_loss": 0.026134636253118515, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -0.06798180192708969, "eval_rewards/margins": 0.08570097386837006, "eval_rewards/rejected": -0.15368276834487915, "eval_runtime": 710.4359, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.408, "step": 7800 }, { "epoch": 0.51, "learning_rate": 2.839543497952276e-06, "logits/chosen": -2.1585605144500732, "logits/rejected": -2.267885208129883, "logps/chosen": -199.9320831298828, "logps/rejected": -208.8585662841797, "loss": 0.0458, "rewards/accuracies": 0.625, "rewards/chosen": -0.07660982012748718, "rewards/margins": 0.08815185725688934, "rewards/rejected": -0.16476169228553772, "step": 7810 }, { "epoch": 0.51, "learning_rate": 2.833885615622474e-06, "logits/chosen": -2.2131218910217285, "logits/rejected": -2.064406394958496, "logps/chosen": -221.01327514648438, "logps/rejected": -244.9972686767578, "loss": 0.0318, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09218723326921463, "rewards/margins": 0.07254284620285034, "rewards/rejected": -0.16473010182380676, "step": 7820 }, { "epoch": 0.51, "learning_rate": 2.8282259915817454e-06, "logits/chosen": -1.9040521383285522, "logits/rejected": -2.1038641929626465, "logps/chosen": -155.58538818359375, "logps/rejected": -215.759521484375, "loss": 0.0184, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.073203444480896, "rewards/margins": 0.09981255233287811, "rewards/rejected": -0.17301598191261292, "step": 7830 }, { "epoch": 0.51, "learning_rate": 2.8225646553534614e-06, "logits/chosen": -2.0671770572662354, "logits/rejected": -1.9584850072860718, "logps/chosen": -208.79440307617188, "logps/rejected": -218.4818878173828, "loss": 0.0309, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.047392092645168304, "rewards/margins": 0.06489382684230804, "rewards/rejected": -0.11228591203689575, "step": 7840 }, { "epoch": 0.51, "learning_rate": 2.8169016364699255e-06, "logits/chosen": -2.264528512954712, "logits/rejected": -1.993786096572876, "logps/chosen": -228.7519073486328, "logps/rejected": -244.06967163085938, "loss": 0.0279, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08431997150182724, "rewards/margins": 0.06783817708492279, "rewards/rejected": -0.15215817093849182, "step": 7850 }, { "epoch": 0.51, "learning_rate": 2.811236964472217e-06, "logits/chosen": -2.3775601387023926, "logits/rejected": -2.0033047199249268, "logps/chosen": -323.74560546875, "logps/rejected": -277.99798583984375, "loss": 0.0308, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06669513881206512, "rewards/margins": 0.07186156511306763, "rewards/rejected": -0.13855668902397156, "step": 7860 }, { "epoch": 0.51, "learning_rate": 2.805570668910041e-06, "logits/chosen": -2.0790164470672607, "logits/rejected": -2.051755666732788, "logps/chosen": -191.5697479248047, "logps/rejected": -271.2172546386719, "loss": 0.0203, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10965213924646378, "rewards/margins": 0.08073590695858002, "rewards/rejected": -0.1903880536556244, "step": 7870 }, { "epoch": 0.52, "learning_rate": 2.7999027793415695e-06, "logits/chosen": -2.4737279415130615, "logits/rejected": -2.0029778480529785, "logps/chosen": -260.46319580078125, "logps/rejected": -227.0538787841797, "loss": 0.0147, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07773597538471222, "rewards/margins": 0.057581812143325806, "rewards/rejected": -0.13531777262687683, "step": 7880 }, { "epoch": 0.52, "learning_rate": 2.794233325333293e-06, "logits/chosen": -2.160679340362549, "logits/rejected": -2.0541396141052246, "logps/chosen": -270.203857421875, "logps/rejected": -267.7791442871094, "loss": 0.0209, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05961208418011665, "rewards/margins": 0.10626313835382462, "rewards/rejected": -0.16587522625923157, "step": 7890 }, { "epoch": 0.52, "learning_rate": 2.7885623364598597e-06, "logits/chosen": -2.3812639713287354, "logits/rejected": -2.0741419792175293, "logps/chosen": -280.6651306152344, "logps/rejected": -257.989501953125, "loss": 0.0296, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0848626121878624, "rewards/margins": 0.09621632099151611, "rewards/rejected": -0.1810789406299591, "step": 7900 }, { "epoch": 0.52, "eval_logits/chosen": -2.2761878967285156, "eval_logits/rejected": -2.0899689197540283, "eval_logps/chosen": -245.6046600341797, "eval_logps/rejected": -241.3648681640625, "eval_loss": 0.025326939299702644, "eval_rewards/accuracies": 0.6554999947547913, "eval_rewards/chosen": -0.06799853593111038, "eval_rewards/margins": 0.08076643198728561, "eval_rewards/rejected": -0.148764967918396, "eval_runtime": 712.7189, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 7900 }, { "epoch": 0.52, "learning_rate": 2.782889842303926e-06, "logits/chosen": -2.247314453125, "logits/rejected": -2.0741233825683594, "logps/chosen": -183.73057556152344, "logps/rejected": -185.82192993164062, "loss": 0.0417, "rewards/accuracies": 0.625, "rewards/chosen": -0.11750733852386475, "rewards/margins": 0.03794608265161514, "rewards/rejected": -0.1554533988237381, "step": 7910 }, { "epoch": 0.52, "learning_rate": 2.7772158724559987e-06, "logits/chosen": -2.0826973915100098, "logits/rejected": -1.9278663396835327, "logps/chosen": -228.0181427001953, "logps/rejected": -298.50860595703125, "loss": 0.0105, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05454551428556442, "rewards/margins": 0.16509851813316345, "rewards/rejected": -0.21964402496814728, "step": 7920 }, { "epoch": 0.52, "learning_rate": 2.7715404565142856e-06, "logits/chosen": -2.2426955699920654, "logits/rejected": -2.106792449951172, "logps/chosen": -215.12130737304688, "logps/rejected": -220.0441436767578, "loss": 0.0179, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0757858008146286, "rewards/margins": 0.05572701245546341, "rewards/rejected": -0.1315128058195114, "step": 7930 }, { "epoch": 0.52, "learning_rate": 2.7658636240845354e-06, "logits/chosen": -2.390791416168213, "logits/rejected": -2.2865827083587646, "logps/chosen": -236.14505004882812, "logps/rejected": -268.32696533203125, "loss": 0.0107, "rewards/accuracies": 0.75, "rewards/chosen": -0.0634835809469223, "rewards/margins": 0.08781920373439789, "rewards/rejected": -0.1513027846813202, "step": 7940 }, { "epoch": 0.52, "learning_rate": 2.7601854047798872e-06, "logits/chosen": -2.180553913116455, "logits/rejected": -2.220141887664795, "logps/chosen": -237.9814453125, "logps/rejected": -271.52630615234375, "loss": 0.0278, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.07029031217098236, "rewards/margins": 0.07064785063266754, "rewards/rejected": -0.1409381479024887, "step": 7950 }, { "epoch": 0.52, "learning_rate": 2.7545058282207148e-06, "logits/chosen": -2.315669059753418, "logits/rejected": -1.9250940084457397, "logps/chosen": -225.5505828857422, "logps/rejected": -209.63894653320312, "loss": 0.0269, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07004386186599731, "rewards/margins": 0.07333650439977646, "rewards/rejected": -0.14338035881519318, "step": 7960 }, { "epoch": 0.52, "learning_rate": 2.748824924034471e-06, "logits/chosen": -2.244135618209839, "logits/rejected": -2.105903148651123, "logps/chosen": -240.2060089111328, "logps/rejected": -238.26852416992188, "loss": 0.0133, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10397887229919434, "rewards/margins": 0.08479610085487366, "rewards/rejected": -0.188774973154068, "step": 7970 }, { "epoch": 0.52, "learning_rate": 2.743142721855536e-06, "logits/chosen": -2.1047046184539795, "logits/rejected": -2.0798017978668213, "logps/chosen": -165.8533477783203, "logps/rejected": -167.95614624023438, "loss": 0.0372, "rewards/accuracies": 0.625, "rewards/chosen": -0.0699661374092102, "rewards/margins": 0.052256323397159576, "rewards/rejected": -0.12222246825695038, "step": 7980 }, { "epoch": 0.52, "learning_rate": 2.737459251325058e-06, "logits/chosen": -2.2246992588043213, "logits/rejected": -2.1735329627990723, "logps/chosen": -279.4375305175781, "logps/rejected": -266.1138610839844, "loss": 0.0129, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04731985926628113, "rewards/margins": 0.046038512140512466, "rewards/rejected": -0.0933583602309227, "step": 7990 }, { "epoch": 0.52, "learning_rate": 2.731774542090804e-06, "logits/chosen": -2.1986324787139893, "logits/rejected": -1.7890323400497437, "logps/chosen": -204.6566619873047, "logps/rejected": -194.89529418945312, "loss": 0.0385, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.05940311402082443, "rewards/margins": 0.04636824131011963, "rewards/rejected": -0.10577134042978287, "step": 8000 }, { "epoch": 0.52, "eval_logits/chosen": -2.2589457035064697, "eval_logits/rejected": -2.073702573776245, "eval_logps/chosen": -241.4889373779297, "eval_logps/rejected": -237.5529022216797, "eval_loss": 0.025104772299528122, "eval_rewards/accuracies": 0.6499999761581421, "eval_rewards/chosen": -0.04741990193724632, "eval_rewards/margins": 0.08228505402803421, "eval_rewards/rejected": -0.12970495223999023, "eval_runtime": 713.1967, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 8000 }, { "epoch": 0.52, "learning_rate": 2.7260886238070034e-06, "logits/chosen": -2.26794171333313, "logits/rejected": -2.1829566955566406, "logps/chosen": -205.74667358398438, "logps/rejected": -216.74722290039062, "loss": 0.0387, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.046383585780858994, "rewards/margins": 0.07854325324296951, "rewards/rejected": -0.1249268501996994, "step": 8010 }, { "epoch": 0.52, "learning_rate": 2.72040152613419e-06, "logits/chosen": -2.296430826187134, "logits/rejected": -1.7960484027862549, "logps/chosen": -228.84841918945312, "logps/rejected": -171.97018432617188, "loss": 0.0344, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05986651033163071, "rewards/margins": 0.13173654675483704, "rewards/rejected": -0.19160306453704834, "step": 8020 }, { "epoch": 0.53, "learning_rate": 2.7147132787390516e-06, "logits/chosen": -2.277390956878662, "logits/rejected": -1.9836620092391968, "logps/chosen": -236.23367309570312, "logps/rejected": -234.6664581298828, "loss": 0.022, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.043777886778116226, "rewards/margins": 0.07709158211946487, "rewards/rejected": -0.1208694726228714, "step": 8030 }, { "epoch": 0.53, "learning_rate": 2.709023911294273e-06, "logits/chosen": -2.3583455085754395, "logits/rejected": -1.8926982879638672, "logps/chosen": -247.84768676757812, "logps/rejected": -246.155517578125, "loss": 0.0446, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.027644217014312744, "rewards/margins": 0.14603692293167114, "rewards/rejected": -0.1736811399459839, "step": 8040 }, { "epoch": 0.53, "learning_rate": 2.7033334534783806e-06, "logits/chosen": -2.251115083694458, "logits/rejected": -2.343580961227417, "logps/chosen": -208.17770385742188, "logps/rejected": -244.0928192138672, "loss": 0.0245, "rewards/accuracies": 0.625, "rewards/chosen": -0.045159198343753815, "rewards/margins": 0.09858004003763199, "rewards/rejected": -0.1437392383813858, "step": 8050 }, { "epoch": 0.53, "learning_rate": 2.697641934975592e-06, "logits/chosen": -2.2605040073394775, "logits/rejected": -2.0341665744781494, "logps/chosen": -238.95156860351562, "logps/rejected": -222.9936065673828, "loss": 0.0342, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05582579970359802, "rewards/margins": 0.08297914266586304, "rewards/rejected": -0.13880494236946106, "step": 8060 }, { "epoch": 0.53, "learning_rate": 2.691949385475654e-06, "logits/chosen": -2.2924370765686035, "logits/rejected": -2.0420851707458496, "logps/chosen": -257.25457763671875, "logps/rejected": -246.86679077148438, "loss": 0.0376, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06681881099939346, "rewards/margins": 0.07591725140810013, "rewards/rejected": -0.1427360475063324, "step": 8070 }, { "epoch": 0.53, "learning_rate": 2.6862558346736937e-06, "logits/chosen": -2.205564260482788, "logits/rejected": -2.058074951171875, "logps/chosen": -252.31167602539062, "logps/rejected": -277.84716796875, "loss": 0.0192, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06979704648256302, "rewards/margins": 0.15368643403053284, "rewards/rejected": -0.22348348796367645, "step": 8080 }, { "epoch": 0.53, "learning_rate": 2.6805613122700617e-06, "logits/chosen": -2.2460989952087402, "logits/rejected": -1.916083574295044, "logps/chosen": -242.57296752929688, "logps/rejected": -261.56463623046875, "loss": 0.0219, "rewards/accuracies": 0.625, "rewards/chosen": -0.09500262886285782, "rewards/margins": 0.09024744480848312, "rewards/rejected": -0.18525007367134094, "step": 8090 }, { "epoch": 0.53, "learning_rate": 2.674865847970176e-06, "logits/chosen": -2.1714882850646973, "logits/rejected": -1.9047397375106812, "logps/chosen": -222.4866485595703, "logps/rejected": -262.7538146972656, "loss": 0.0295, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07435137033462524, "rewards/margins": 0.0890941396355629, "rewards/rejected": -0.16344550251960754, "step": 8100 }, { "epoch": 0.53, "eval_logits/chosen": -2.2293014526367188, "eval_logits/rejected": -2.0447380542755127, "eval_logps/chosen": -246.51162719726562, "eval_logps/rejected": -242.96429443359375, "eval_loss": 0.02487250603735447, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -0.07253342866897583, "eval_rewards/margins": 0.08422857522964478, "eval_rewards/rejected": -0.1567619889974594, "eval_runtime": 712.8219, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 8100 }, { "epoch": 0.53, "learning_rate": 2.669169471484368e-06, "logits/chosen": -1.9838573932647705, "logits/rejected": -2.03348708152771, "logps/chosen": -182.14694213867188, "logps/rejected": -187.5550079345703, "loss": 0.0315, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.085444375872612, "rewards/margins": 0.040241360664367676, "rewards/rejected": -0.12568573653697968, "step": 8110 }, { "epoch": 0.53, "learning_rate": 2.6634722125277278e-06, "logits/chosen": -2.333920955657959, "logits/rejected": -2.0139307975769043, "logps/chosen": -250.2372589111328, "logps/rejected": -274.3821105957031, "loss": 0.0275, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09058667719364166, "rewards/margins": 0.079750195145607, "rewards/rejected": -0.17033687233924866, "step": 8120 }, { "epoch": 0.53, "learning_rate": 2.6577741008199498e-06, "logits/chosen": -2.2460877895355225, "logits/rejected": -1.8959665298461914, "logps/chosen": -270.9378356933594, "logps/rejected": -243.338134765625, "loss": 0.025, "rewards/accuracies": 0.75, "rewards/chosen": -0.07389827072620392, "rewards/margins": 0.16220808029174805, "rewards/rejected": -0.23610636591911316, "step": 8130 }, { "epoch": 0.53, "learning_rate": 2.652075166085175e-06, "logits/chosen": -2.146793842315674, "logits/rejected": -2.1021342277526855, "logps/chosen": -252.03121948242188, "logps/rejected": -300.4096984863281, "loss": 0.0242, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08725959807634354, "rewards/margins": 0.12416623532772064, "rewards/rejected": -0.21142585575580597, "step": 8140 }, { "epoch": 0.53, "learning_rate": 2.6463754380518395e-06, "logits/chosen": -2.1202099323272705, "logits/rejected": -1.9160016775131226, "logps/chosen": -251.58853149414062, "logps/rejected": -213.3192901611328, "loss": 0.0292, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10315438359975815, "rewards/margins": 0.08278907835483551, "rewards/rejected": -0.18594345450401306, "step": 8150 }, { "epoch": 0.53, "learning_rate": 2.6406749464525167e-06, "logits/chosen": -2.2524800300598145, "logits/rejected": -1.9723221063613892, "logps/chosen": -242.8863067626953, "logps/rejected": -214.191650390625, "loss": 0.0389, "rewards/accuracies": 0.625, "rewards/chosen": -0.046635985374450684, "rewards/margins": 0.09268581122159958, "rewards/rejected": -0.13932180404663086, "step": 8160 }, { "epoch": 0.53, "learning_rate": 2.634973721023762e-06, "logits/chosen": -2.288912534713745, "logits/rejected": -2.132657051086426, "logps/chosen": -273.7260437011719, "logps/rejected": -234.4434051513672, "loss": 0.0504, "rewards/accuracies": 0.625, "rewards/chosen": -0.0963190421462059, "rewards/margins": 0.05077965185046196, "rewards/rejected": -0.14709869027137756, "step": 8170 }, { "epoch": 0.54, "learning_rate": 2.6292717915059605e-06, "logits/chosen": -2.334805965423584, "logits/rejected": -2.0667991638183594, "logps/chosen": -291.00115966796875, "logps/rejected": -256.76666259765625, "loss": 0.0146, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07685528695583344, "rewards/margins": 0.10979298502206802, "rewards/rejected": -0.18664827942848206, "step": 8180 }, { "epoch": 0.54, "learning_rate": 2.6235691876431706e-06, "logits/chosen": -2.138881206512451, "logits/rejected": -2.1886343955993652, "logps/chosen": -233.529541015625, "logps/rejected": -253.8592987060547, "loss": 0.0195, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07456973940134048, "rewards/margins": 0.07365313172340393, "rewards/rejected": -0.1482228934764862, "step": 8190 }, { "epoch": 0.54, "learning_rate": 2.6178659391829673e-06, "logits/chosen": -2.356948137283325, "logits/rejected": -2.0806820392608643, "logps/chosen": -248.50607299804688, "logps/rejected": -223.2180633544922, "loss": 0.0147, "rewards/accuracies": 0.75, "rewards/chosen": -0.055140793323516846, "rewards/margins": 0.07777590304613113, "rewards/rejected": -0.13291668891906738, "step": 8200 }, { "epoch": 0.54, "eval_logits/chosen": -2.2300643920898438, "eval_logits/rejected": -2.045851945877075, "eval_logps/chosen": -248.29393005371094, "eval_logps/rejected": -244.34071350097656, "eval_loss": 0.025006111711263657, "eval_rewards/accuracies": 0.6455000042915344, "eval_rewards/chosen": -0.08144490420818329, "eval_rewards/margins": 0.08219918608665466, "eval_rewards/rejected": -0.16364407539367676, "eval_runtime": 712.7354, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 8200 }, { "epoch": 0.54, "learning_rate": 2.6121620758762877e-06, "logits/chosen": -2.213970422744751, "logits/rejected": -1.9608663320541382, "logps/chosen": -209.7481231689453, "logps/rejected": -221.8324432373047, "loss": 0.0346, "rewards/accuracies": 0.625, "rewards/chosen": -0.08831901103258133, "rewards/margins": 0.06441696733236313, "rewards/rejected": -0.15273597836494446, "step": 8210 }, { "epoch": 0.54, "learning_rate": 2.606457627477277e-06, "logits/chosen": -2.149473190307617, "logits/rejected": -2.0927867889404297, "logps/chosen": -189.00047302246094, "logps/rejected": -210.3500213623047, "loss": 0.0487, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06685586273670197, "rewards/margins": 0.0899873748421669, "rewards/rejected": -0.15684325993061066, "step": 8220 }, { "epoch": 0.54, "learning_rate": 2.6007526237431324e-06, "logits/chosen": -2.3013267517089844, "logits/rejected": -2.2491354942321777, "logps/chosen": -196.39431762695312, "logps/rejected": -228.51321411132812, "loss": 0.0192, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07182405889034271, "rewards/margins": 0.09000401198863983, "rewards/rejected": -0.16182805597782135, "step": 8230 }, { "epoch": 0.54, "learning_rate": 2.5950470944339478e-06, "logits/chosen": -2.0825228691101074, "logits/rejected": -2.1439049243927, "logps/chosen": -231.905517578125, "logps/rejected": -234.68832397460938, "loss": 0.0335, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.050518035888671875, "rewards/margins": 0.03139277547597885, "rewards/rejected": -0.08191081136465073, "step": 8240 }, { "epoch": 0.54, "learning_rate": 2.58934106931256e-06, "logits/chosen": -2.217744827270508, "logits/rejected": -1.9220752716064453, "logps/chosen": -237.2313690185547, "logps/rejected": -231.385986328125, "loss": 0.0302, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08229657262563705, "rewards/margins": 0.06666077673435211, "rewards/rejected": -0.14895735681056976, "step": 8250 }, { "epoch": 0.54, "learning_rate": 2.58363457814439e-06, "logits/chosen": -2.212259292602539, "logits/rejected": -1.929527997970581, "logps/chosen": -230.8060760498047, "logps/rejected": -234.58523559570312, "loss": 0.0308, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.10067076981067657, "rewards/margins": 0.08751292526721954, "rewards/rejected": -0.18818369507789612, "step": 8260 }, { "epoch": 0.54, "learning_rate": 2.5779276506972924e-06, "logits/chosen": -2.1959948539733887, "logits/rejected": -2.173485279083252, "logps/chosen": -247.912841796875, "logps/rejected": -220.7331085205078, "loss": 0.0237, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0797998383641243, "rewards/margins": 0.06101224943995476, "rewards/rejected": -0.14081206917762756, "step": 8270 }, { "epoch": 0.54, "learning_rate": 2.5722203167413945e-06, "logits/chosen": -2.316793441772461, "logits/rejected": -1.9904791116714478, "logps/chosen": -299.3642272949219, "logps/rejected": -234.1168975830078, "loss": 0.016, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08033281564712524, "rewards/margins": 0.08336742967367172, "rewards/rejected": -0.16370025277137756, "step": 8280 }, { "epoch": 0.54, "learning_rate": 2.5665126060489476e-06, "logits/chosen": -2.2750446796417236, "logits/rejected": -2.1247048377990723, "logps/chosen": -204.67526245117188, "logps/rejected": -246.42562866210938, "loss": 0.0132, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08286619186401367, "rewards/margins": 0.06451877951622009, "rewards/rejected": -0.14738497138023376, "step": 8290 }, { "epoch": 0.54, "learning_rate": 2.560804548394165e-06, "logits/chosen": -2.1876912117004395, "logits/rejected": -1.9274814128875732, "logps/chosen": -263.56103515625, "logps/rejected": -235.9149932861328, "loss": 0.0166, "rewards/accuracies": 0.625, "rewards/chosen": -0.08140331506729126, "rewards/margins": 0.08813115209341049, "rewards/rejected": -0.16953447461128235, "step": 8300 }, { "epoch": 0.54, "eval_logits/chosen": -2.2465648651123047, "eval_logits/rejected": -2.06178617477417, "eval_logps/chosen": -244.70086669921875, "eval_logps/rejected": -239.91378784179688, "eval_loss": 0.025410430505871773, "eval_rewards/accuracies": 0.6535000205039978, "eval_rewards/chosen": -0.06347952038049698, "eval_rewards/margins": 0.07802990823984146, "eval_rewards/rejected": -0.14150942862033844, "eval_runtime": 712.3701, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 8300 }, { "epoch": 0.54, "learning_rate": 2.5550961735530734e-06, "logits/chosen": -2.0759482383728027, "logits/rejected": -2.251865863800049, "logps/chosen": -172.39732360839844, "logps/rejected": -215.35757446289062, "loss": 0.0252, "rewards/accuracies": 0.625, "rewards/chosen": -0.046807561069726944, "rewards/margins": 0.06184772402048111, "rewards/rejected": -0.10865527391433716, "step": 8310 }, { "epoch": 0.54, "learning_rate": 2.549387511303351e-06, "logits/chosen": -2.231900215148926, "logits/rejected": -2.271174669265747, "logps/chosen": -181.90859985351562, "logps/rejected": -237.9917755126953, "loss": 0.0127, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.060858823359012604, "rewards/margins": 0.05901271849870682, "rewards/rejected": -0.11987153440713882, "step": 8320 }, { "epoch": 0.55, "learning_rate": 2.5436785914241774e-06, "logits/chosen": -2.1687874794006348, "logits/rejected": -2.1932742595672607, "logps/chosen": -214.6407012939453, "logps/rejected": -211.1691131591797, "loss": 0.0365, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0784073919057846, "rewards/margins": 0.14249031245708466, "rewards/rejected": -0.22089770436286926, "step": 8330 }, { "epoch": 0.55, "learning_rate": 2.5379694436960746e-06, "logits/chosen": -2.350149393081665, "logits/rejected": -2.1551597118377686, "logps/chosen": -255.09774780273438, "logps/rejected": -279.1950378417969, "loss": 0.0251, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.045433927327394485, "rewards/margins": 0.06766197085380554, "rewards/rejected": -0.11309590190649033, "step": 8340 }, { "epoch": 0.55, "learning_rate": 2.5322600979007533e-06, "logits/chosen": -2.3590166568756104, "logits/rejected": -2.1173205375671387, "logps/chosen": -226.3520050048828, "logps/rejected": -220.6983642578125, "loss": 0.0225, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06928284466266632, "rewards/margins": 0.07522784173488617, "rewards/rejected": -0.1445106863975525, "step": 8350 }, { "epoch": 0.55, "learning_rate": 2.5265505838209592e-06, "logits/chosen": -2.378201723098755, "logits/rejected": -2.035088062286377, "logps/chosen": -273.260986328125, "logps/rejected": -238.40170288085938, "loss": 0.0294, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08148683607578278, "rewards/margins": 0.057769954204559326, "rewards/rejected": -0.1392567902803421, "step": 8360 }, { "epoch": 0.55, "learning_rate": 2.520840931240314e-06, "logits/chosen": -2.4000751972198486, "logits/rejected": -1.9181627035140991, "logps/chosen": -222.73373413085938, "logps/rejected": -174.30149841308594, "loss": 0.021, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06462567299604416, "rewards/margins": 0.07644327729940414, "rewards/rejected": -0.1410689651966095, "step": 8370 }, { "epoch": 0.55, "learning_rate": 2.515131169943162e-06, "logits/chosen": -1.9641252756118774, "logits/rejected": -2.0435292720794678, "logps/chosen": -275.6920471191406, "logps/rejected": -287.4151611328125, "loss": 0.0239, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08798758685588837, "rewards/margins": 0.09347712248563766, "rewards/rejected": -0.18146470189094543, "step": 8380 }, { "epoch": 0.55, "learning_rate": 2.509421329714416e-06, "logits/chosen": -2.0971388816833496, "logits/rejected": -2.129549980163574, "logps/chosen": -218.18551635742188, "logps/rejected": -247.5358123779297, "loss": 0.0362, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06023172661662102, "rewards/margins": 0.040791187435388565, "rewards/rejected": -0.10102292150259018, "step": 8390 }, { "epoch": 0.55, "learning_rate": 2.5037114403393987e-06, "logits/chosen": -2.2117886543273926, "logits/rejected": -1.9651107788085938, "logps/chosen": -221.71493530273438, "logps/rejected": -200.9381866455078, "loss": 0.0177, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05367283895611763, "rewards/margins": 0.06501881778240204, "rewards/rejected": -0.11869166791439056, "step": 8400 }, { "epoch": 0.55, "eval_logits/chosen": -2.2463998794555664, "eval_logits/rejected": -2.062314510345459, "eval_logps/chosen": -243.38662719726562, "eval_logps/rejected": -236.77584838867188, "eval_loss": 0.02601229026913643, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -0.05690838024020195, "eval_rewards/margins": 0.0689113661646843, "eval_rewards/rejected": -0.12581974267959595, "eval_runtime": 712.0051, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.404, "step": 8400 }, { "epoch": 0.55, "learning_rate": 2.4980015316036908e-06, "logits/chosen": -2.080289363861084, "logits/rejected": -2.130309581756592, "logps/chosen": -185.6266326904297, "logps/rejected": -229.3311309814453, "loss": 0.0218, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.049084652215242386, "rewards/margins": 0.11059342324733734, "rewards/rejected": -0.15967807173728943, "step": 8410 }, { "epoch": 0.55, "learning_rate": 2.4922916332929725e-06, "logits/chosen": -2.413717269897461, "logits/rejected": -2.1551430225372314, "logps/chosen": -245.4679718017578, "logps/rejected": -211.2089080810547, "loss": 0.0285, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05021866410970688, "rewards/margins": 0.026777099817991257, "rewards/rejected": -0.07699576765298843, "step": 8420 }, { "epoch": 0.55, "learning_rate": 2.4865817751928716e-06, "logits/chosen": -2.151843786239624, "logits/rejected": -2.135716199874878, "logps/chosen": -206.24124145507812, "logps/rejected": -257.28741455078125, "loss": 0.0401, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.053762663155794144, "rewards/margins": 0.12312023341655731, "rewards/rejected": -0.17688289284706116, "step": 8430 }, { "epoch": 0.55, "learning_rate": 2.4808719870888037e-06, "logits/chosen": -2.012324333190918, "logits/rejected": -1.9392001628875732, "logps/chosen": -227.54605102539062, "logps/rejected": -215.35977172851562, "loss": 0.0174, "rewards/accuracies": 0.625, "rewards/chosen": -0.04937288910150528, "rewards/margins": 0.10543738305568695, "rewards/rejected": -0.15481027960777283, "step": 8440 }, { "epoch": 0.55, "learning_rate": 2.4751622987658206e-06, "logits/chosen": -2.4310302734375, "logits/rejected": -2.250427722930908, "logps/chosen": -246.6014404296875, "logps/rejected": -248.3928680419922, "loss": 0.0246, "rewards/accuracies": 0.625, "rewards/chosen": -0.04325593635439873, "rewards/margins": 0.06266029924154282, "rewards/rejected": -0.10591623932123184, "step": 8450 }, { "epoch": 0.55, "learning_rate": 2.4694527400084546e-06, "logits/chosen": -2.2096364498138428, "logits/rejected": -2.1173431873321533, "logps/chosen": -234.0587615966797, "logps/rejected": -243.300048828125, "loss": 0.0249, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.059070538729429245, "rewards/margins": 0.0754295364022255, "rewards/rejected": -0.13450007140636444, "step": 8460 }, { "epoch": 0.55, "learning_rate": 2.4637433406005607e-06, "logits/chosen": -2.410126209259033, "logits/rejected": -2.3014655113220215, "logps/chosen": -324.4518127441406, "logps/rejected": -303.3929748535156, "loss": 0.023, "rewards/accuracies": 0.625, "rewards/chosen": -0.07362736016511917, "rewards/margins": 0.034921690821647644, "rewards/rejected": -0.10854904353618622, "step": 8470 }, { "epoch": 0.55, "learning_rate": 2.4580341303251628e-06, "logits/chosen": -2.2237625122070312, "logits/rejected": -1.9655005931854248, "logps/chosen": -270.9044189453125, "logps/rejected": -251.58609008789062, "loss": 0.032, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.054766081273555756, "rewards/margins": 0.08540228009223938, "rewards/rejected": -0.14016836881637573, "step": 8480 }, { "epoch": 0.56, "learning_rate": 2.4523251389642984e-06, "logits/chosen": -2.1201000213623047, "logits/rejected": -1.9871246814727783, "logps/chosen": -269.6641845703125, "logps/rejected": -253.3271026611328, "loss": 0.0413, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0755244717001915, "rewards/margins": 0.10400693118572235, "rewards/rejected": -0.17953141033649445, "step": 8490 }, { "epoch": 0.56, "learning_rate": 2.4466163962988626e-06, "logits/chosen": -2.437121629714966, "logits/rejected": -2.0692362785339355, "logps/chosen": -293.087646484375, "logps/rejected": -214.70263671875, "loss": 0.0323, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.06243325024843216, "rewards/margins": 0.10167159885168076, "rewards/rejected": -0.16410483419895172, "step": 8500 }, { "epoch": 0.56, "eval_logits/chosen": -2.235217571258545, "eval_logits/rejected": -2.0510294437408447, "eval_logps/chosen": -244.1342315673828, "eval_logps/rejected": -241.17881774902344, "eval_loss": 0.024714848026633263, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -0.06064639613032341, "eval_rewards/margins": 0.08718820661306381, "eval_rewards/rejected": -0.14783459901809692, "eval_runtime": 714.7546, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 8500 }, { "epoch": 0.56, "learning_rate": 2.4409079321084543e-06, "logits/chosen": -2.1872916221618652, "logits/rejected": -2.2446069717407227, "logps/chosen": -222.677490234375, "logps/rejected": -270.8872985839844, "loss": 0.0329, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03778903931379318, "rewards/margins": 0.09135451167821884, "rewards/rejected": -0.12914356589317322, "step": 8510 }, { "epoch": 0.56, "learning_rate": 2.4351997761712184e-06, "logits/chosen": -2.450037956237793, "logits/rejected": -1.995489478111267, "logps/chosen": -255.88497924804688, "logps/rejected": -208.86666870117188, "loss": 0.0117, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05248330906033516, "rewards/margins": 0.09843796491622925, "rewards/rejected": -0.1509212702512741, "step": 8520 }, { "epoch": 0.56, "learning_rate": 2.4294919582636933e-06, "logits/chosen": -2.240788221359253, "logits/rejected": -2.096522092819214, "logps/chosen": -218.12100219726562, "logps/rejected": -224.65902709960938, "loss": 0.0286, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.035092391073703766, "rewards/margins": 0.09188707917928696, "rewards/rejected": -0.12697947025299072, "step": 8530 }, { "epoch": 0.56, "learning_rate": 2.423784508160652e-06, "logits/chosen": -2.3209753036499023, "logits/rejected": -2.0700995922088623, "logps/chosen": -269.3289489746094, "logps/rejected": -236.85446166992188, "loss": 0.0144, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07049113512039185, "rewards/margins": 0.07681427896022797, "rewards/rejected": -0.14730539917945862, "step": 8540 }, { "epoch": 0.56, "learning_rate": 2.418077455634951e-06, "logits/chosen": -2.1461005210876465, "logits/rejected": -2.193398952484131, "logps/chosen": -231.21365356445312, "logps/rejected": -268.70916748046875, "loss": 0.0164, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07321284711360931, "rewards/margins": 0.049691587686538696, "rewards/rejected": -0.12290443480014801, "step": 8550 }, { "epoch": 0.56, "learning_rate": 2.4123708304573714e-06, "logits/chosen": -2.339146852493286, "logits/rejected": -2.191551446914673, "logps/chosen": -300.6154479980469, "logps/rejected": -300.11444091796875, "loss": 0.034, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05424055457115173, "rewards/margins": 0.08098848909139633, "rewards/rejected": -0.13522902131080627, "step": 8560 }, { "epoch": 0.56, "learning_rate": 2.406664662396465e-06, "logits/chosen": -2.1146233081817627, "logits/rejected": -1.964341163635254, "logps/chosen": -203.61959838867188, "logps/rejected": -201.922607421875, "loss": 0.0154, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09618864208459854, "rewards/margins": 0.06992589682340622, "rewards/rejected": -0.16611452400684357, "step": 8570 }, { "epoch": 0.56, "learning_rate": 2.4009589812184012e-06, "logits/chosen": -2.2772903442382812, "logits/rejected": -1.8964955806732178, "logps/chosen": -215.86569213867188, "logps/rejected": -179.8876953125, "loss": 0.0142, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06322894990444183, "rewards/margins": 0.08566492795944214, "rewards/rejected": -0.14889389276504517, "step": 8580 }, { "epoch": 0.56, "learning_rate": 2.3952538166868073e-06, "logits/chosen": -2.0397861003875732, "logits/rejected": -2.1017701625823975, "logps/chosen": -232.49710083007812, "logps/rejected": -240.0894317626953, "loss": 0.0268, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07475374639034271, "rewards/margins": 0.12092749774456024, "rewards/rejected": -0.19568124413490295, "step": 8590 }, { "epoch": 0.56, "learning_rate": 2.389549198562616e-06, "logits/chosen": -2.240582227706909, "logits/rejected": -1.814883828163147, "logps/chosen": -238.9700469970703, "logps/rejected": -230.742919921875, "loss": 0.0178, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06444202363491058, "rewards/margins": 0.10876522958278656, "rewards/rejected": -0.17320728302001953, "step": 8600 }, { "epoch": 0.56, "eval_logits/chosen": -2.245410203933716, "eval_logits/rejected": -2.0606565475463867, "eval_logps/chosen": -245.94483947753906, "eval_logps/rejected": -243.05995178222656, "eval_loss": 0.024543337523937225, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -0.06969940662384033, "eval_rewards/margins": 0.08754073828458786, "eval_rewards/rejected": -0.1572401374578476, "eval_runtime": 712.5003, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 8600 }, { "epoch": 0.56, "learning_rate": 2.3838451566039098e-06, "logits/chosen": -2.270934581756592, "logits/rejected": -2.096938133239746, "logps/chosen": -255.7738037109375, "logps/rejected": -253.7856903076172, "loss": 0.0296, "rewards/accuracies": 0.625, "rewards/chosen": -0.08631912618875504, "rewards/margins": 0.040729813277721405, "rewards/rejected": -0.12704893946647644, "step": 8610 }, { "epoch": 0.56, "learning_rate": 2.3781417205657662e-06, "logits/chosen": -2.2661519050598145, "logits/rejected": -1.9691057205200195, "logps/chosen": -211.8663330078125, "logps/rejected": -189.75233459472656, "loss": 0.048, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07054928690195084, "rewards/margins": 0.08046600222587585, "rewards/rejected": -0.1510152816772461, "step": 8620 }, { "epoch": 0.56, "learning_rate": 2.3724389202001006e-06, "logits/chosen": -2.2923483848571777, "logits/rejected": -2.0269789695739746, "logps/chosen": -218.217529296875, "logps/rejected": -207.7074737548828, "loss": 0.0202, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07947049289941788, "rewards/margins": 0.05593295767903328, "rewards/rejected": -0.13540346920490265, "step": 8630 }, { "epoch": 0.57, "learning_rate": 2.366736785255514e-06, "logits/chosen": -2.1759963035583496, "logits/rejected": -2.105229139328003, "logps/chosen": -216.8457489013672, "logps/rejected": -219.7010498046875, "loss": 0.0184, "rewards/accuracies": 0.625, "rewards/chosen": -0.0905846655368805, "rewards/margins": 0.0703764483332634, "rewards/rejected": -0.1609611064195633, "step": 8640 }, { "epoch": 0.57, "learning_rate": 2.3610353454771355e-06, "logits/chosen": -2.075446605682373, "logits/rejected": -2.004032850265503, "logps/chosen": -204.26815795898438, "logps/rejected": -202.31051635742188, "loss": 0.0396, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07160364091396332, "rewards/margins": 0.0716845691204071, "rewards/rejected": -0.14328821003437042, "step": 8650 }, { "epoch": 0.57, "learning_rate": 2.355334630606467e-06, "logits/chosen": -2.4558193683624268, "logits/rejected": -1.9807491302490234, "logps/chosen": -254.63613891601562, "logps/rejected": -205.7122039794922, "loss": 0.0128, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07172928750514984, "rewards/margins": 0.07310833036899567, "rewards/rejected": -0.1448376178741455, "step": 8660 }, { "epoch": 0.57, "learning_rate": 2.349634670381231e-06, "logits/chosen": -2.062732696533203, "logits/rejected": -2.0113823413848877, "logps/chosen": -223.168701171875, "logps/rejected": -247.4897918701172, "loss": 0.0395, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0798480361700058, "rewards/margins": 0.07261226326227188, "rewards/rejected": -0.15246029198169708, "step": 8670 }, { "epoch": 0.57, "learning_rate": 2.3439354945352104e-06, "logits/chosen": -2.3090953826904297, "logits/rejected": -2.245356559753418, "logps/chosen": -259.2715148925781, "logps/rejected": -221.3633575439453, "loss": 0.066, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.07873961329460144, "rewards/margins": 0.032447971403598785, "rewards/rejected": -0.11118757724761963, "step": 8680 }, { "epoch": 0.57, "learning_rate": 2.3382371327981e-06, "logits/chosen": -2.170595645904541, "logits/rejected": -2.1648993492126465, "logps/chosen": -243.6365203857422, "logps/rejected": -248.4460906982422, "loss": 0.0252, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06080128625035286, "rewards/margins": 0.0955219715833664, "rewards/rejected": -0.15632323920726776, "step": 8690 }, { "epoch": 0.57, "learning_rate": 2.3325396148953456e-06, "logits/chosen": -2.034379482269287, "logits/rejected": -2.143871784210205, "logps/chosen": -190.2666473388672, "logps/rejected": -265.8991394042969, "loss": 0.0473, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09482596069574356, "rewards/margins": 0.10751118510961533, "rewards/rejected": -0.2023371458053589, "step": 8700 }, { "epoch": 0.57, "eval_logits/chosen": -2.2517881393432617, "eval_logits/rejected": -2.0663349628448486, "eval_logps/chosen": -245.9043426513672, "eval_logps/rejected": -242.30227661132812, "eval_loss": 0.024720149114727974, "eval_rewards/accuracies": 0.656499981880188, "eval_rewards/chosen": -0.06949705630540848, "eval_rewards/margins": 0.08395478129386902, "eval_rewards/rejected": -0.1534518301486969, "eval_runtime": 712.8179, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 8700 }, { "epoch": 0.57, "learning_rate": 2.3268429705479915e-06, "logits/chosen": -2.4342236518859863, "logits/rejected": -2.078758716583252, "logps/chosen": -238.2794189453125, "logps/rejected": -214.2716064453125, "loss": 0.0192, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06733395159244537, "rewards/margins": 0.08655620366334915, "rewards/rejected": -0.15389014780521393, "step": 8710 }, { "epoch": 0.57, "learning_rate": 2.3211472294725248e-06, "logits/chosen": -2.279585599899292, "logits/rejected": -2.1437907218933105, "logps/chosen": -225.87057495117188, "logps/rejected": -233.3922882080078, "loss": 0.0289, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04727703332901001, "rewards/margins": 0.1009441465139389, "rewards/rejected": -0.1482211798429489, "step": 8720 }, { "epoch": 0.57, "learning_rate": 2.315452421380721e-06, "logits/chosen": -2.160231113433838, "logits/rejected": -1.7129781246185303, "logps/chosen": -268.67547607421875, "logps/rejected": -238.1331024169922, "loss": 0.0218, "rewards/accuracies": 0.625, "rewards/chosen": -0.068655826151371, "rewards/margins": 0.08725351095199585, "rewards/rejected": -0.15590932965278625, "step": 8730 }, { "epoch": 0.57, "learning_rate": 2.3097585759794886e-06, "logits/chosen": -2.252720832824707, "logits/rejected": -1.8722463846206665, "logps/chosen": -263.8703308105469, "logps/rejected": -228.2633056640625, "loss": 0.0213, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04764527827501297, "rewards/margins": 0.1389247328042984, "rewards/rejected": -0.18657000362873077, "step": 8740 }, { "epoch": 0.57, "learning_rate": 2.3040657229707155e-06, "logits/chosen": -2.262620210647583, "logits/rejected": -2.1514811515808105, "logps/chosen": -183.5465850830078, "logps/rejected": -213.42861938476562, "loss": 0.0202, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0574948713183403, "rewards/margins": 0.09163203835487366, "rewards/rejected": -0.14912688732147217, "step": 8750 }, { "epoch": 0.57, "learning_rate": 2.2983738920511104e-06, "logits/chosen": -2.420673370361328, "logits/rejected": -1.949000597000122, "logps/chosen": -275.28204345703125, "logps/rejected": -241.54721069335938, "loss": 0.0235, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.027445774525403976, "rewards/margins": 0.08283614367246628, "rewards/rejected": -0.11028194427490234, "step": 8760 }, { "epoch": 0.57, "learning_rate": 2.2926831129120523e-06, "logits/chosen": -2.072657346725464, "logits/rejected": -2.0243425369262695, "logps/chosen": -245.273193359375, "logps/rejected": -228.0929718017578, "loss": 0.0221, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04740776866674423, "rewards/margins": 0.055877458304166794, "rewards/rejected": -0.10328521579504013, "step": 8770 }, { "epoch": 0.57, "learning_rate": 2.2869934152394323e-06, "logits/chosen": -2.268575668334961, "logits/rejected": -2.009955883026123, "logps/chosen": -283.28582763671875, "logps/rejected": -244.081787109375, "loss": 0.0299, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.07160186767578125, "rewards/margins": 0.08199025690555573, "rewards/rejected": -0.15359210968017578, "step": 8780 }, { "epoch": 0.58, "learning_rate": 2.281304828713501e-06, "logits/chosen": -2.165152072906494, "logits/rejected": -2.090395212173462, "logps/chosen": -247.3802947998047, "logps/rejected": -253.34701538085938, "loss": 0.0257, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0719650387763977, "rewards/margins": 0.06734617799520493, "rewards/rejected": -0.13931122422218323, "step": 8790 }, { "epoch": 0.58, "learning_rate": 2.275617383008711e-06, "logits/chosen": -2.2487900257110596, "logits/rejected": -2.1492092609405518, "logps/chosen": -248.94287109375, "logps/rejected": -257.15435791015625, "loss": 0.0302, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06727709621191025, "rewards/margins": 0.05439624935388565, "rewards/rejected": -0.1216733306646347, "step": 8800 }, { "epoch": 0.58, "eval_logits/chosen": -2.244839906692505, "eval_logits/rejected": -2.059295177459717, "eval_logps/chosen": -241.635009765625, "eval_logps/rejected": -237.9780731201172, "eval_loss": 0.024881817400455475, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -0.04815037548542023, "eval_rewards/margins": 0.08368047326803207, "eval_rewards/rejected": -0.1318308562040329, "eval_runtime": 714.7964, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 8800 }, { "epoch": 0.58, "learning_rate": 2.269931107793567e-06, "logits/chosen": -2.1685874462127686, "logits/rejected": -2.078900098800659, "logps/chosen": -219.8336944580078, "logps/rejected": -240.2872772216797, "loss": 0.0254, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03776669502258301, "rewards/margins": 0.06180558353662491, "rewards/rejected": -0.09957227855920792, "step": 8810 }, { "epoch": 0.58, "learning_rate": 2.2642460327304655e-06, "logits/chosen": -2.1021993160247803, "logits/rejected": -2.151242256164551, "logps/chosen": -254.8282928466797, "logps/rejected": -253.4233856201172, "loss": 0.0213, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05134277418255806, "rewards/margins": 0.0722130537033081, "rewards/rejected": -0.12355582416057587, "step": 8820 }, { "epoch": 0.58, "learning_rate": 2.258562187475543e-06, "logits/chosen": -2.0955007076263428, "logits/rejected": -2.0419440269470215, "logps/chosen": -237.06796264648438, "logps/rejected": -215.91860961914062, "loss": 0.0155, "rewards/accuracies": 0.625, "rewards/chosen": -0.05060536786913872, "rewards/margins": 0.08181245625019073, "rewards/rejected": -0.13241782784461975, "step": 8830 }, { "epoch": 0.58, "learning_rate": 2.2528796016785196e-06, "logits/chosen": -2.134673595428467, "logits/rejected": -1.9623810052871704, "logps/chosen": -200.1417694091797, "logps/rejected": -232.51611328125, "loss": 0.0249, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.051119185984134674, "rewards/margins": 0.11354148387908936, "rewards/rejected": -0.16466066241264343, "step": 8840 }, { "epoch": 0.58, "learning_rate": 2.247198304982548e-06, "logits/chosen": -2.17331600189209, "logits/rejected": -1.9789447784423828, "logps/chosen": -168.8157501220703, "logps/rejected": -181.06729125976562, "loss": 0.0251, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.034221209585666656, "rewards/margins": 0.08100482821464539, "rewards/rejected": -0.11522604525089264, "step": 8850 }, { "epoch": 0.58, "learning_rate": 2.2415183270240533e-06, "logits/chosen": -2.458310604095459, "logits/rejected": -2.2174506187438965, "logps/chosen": -208.83425903320312, "logps/rejected": -231.552001953125, "loss": 0.0448, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04752310365438461, "rewards/margins": 0.0920606404542923, "rewards/rejected": -0.1395837366580963, "step": 8860 }, { "epoch": 0.58, "learning_rate": 2.2358396974325837e-06, "logits/chosen": -2.2556686401367188, "logits/rejected": -2.0556740760803223, "logps/chosen": -250.03115844726562, "logps/rejected": -243.87911987304688, "loss": 0.035, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03881791979074478, "rewards/margins": 0.10057507455348969, "rewards/rejected": -0.13939300179481506, "step": 8870 }, { "epoch": 0.58, "learning_rate": 2.2301624458306525e-06, "logits/chosen": -2.3523645401000977, "logits/rejected": -2.0720772743225098, "logps/chosen": -273.26007080078125, "logps/rejected": -234.77783203125, "loss": 0.0221, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06955992430448532, "rewards/margins": 0.06429468095302582, "rewards/rejected": -0.13385462760925293, "step": 8880 }, { "epoch": 0.58, "learning_rate": 2.2244866018335855e-06, "logits/chosen": -2.220266819000244, "logits/rejected": -2.207763671875, "logps/chosen": -227.5157928466797, "logps/rejected": -256.3835144042969, "loss": 0.0308, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05229061841964722, "rewards/margins": 0.07230345159769058, "rewards/rejected": -0.1245940700173378, "step": 8890 }, { "epoch": 0.58, "learning_rate": 2.2188121950493648e-06, "logits/chosen": -2.357114791870117, "logits/rejected": -1.9857616424560547, "logps/chosen": -233.6726837158203, "logps/rejected": -167.13748168945312, "loss": 0.0391, "rewards/accuracies": 0.625, "rewards/chosen": -0.07464625686407089, "rewards/margins": 0.05858578532934189, "rewards/rejected": -0.13323204219341278, "step": 8900 }, { "epoch": 0.58, "eval_logits/chosen": -2.252225399017334, "eval_logits/rejected": -2.0657691955566406, "eval_logps/chosen": -244.75286865234375, "eval_logps/rejected": -242.57672119140625, "eval_loss": 0.02482656203210354, "eval_rewards/accuracies": 0.6620000004768372, "eval_rewards/chosen": -0.06373953819274902, "eval_rewards/margins": 0.09108465164899826, "eval_rewards/rejected": -0.15482419729232788, "eval_runtime": 715.6883, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.397, "step": 8900 }, { "epoch": 0.58, "learning_rate": 2.2131392550784766e-06, "logits/chosen": -2.3761825561523438, "logits/rejected": -1.8096641302108765, "logps/chosen": -298.76287841796875, "logps/rejected": -220.29977416992188, "loss": 0.0231, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06942112743854523, "rewards/margins": 0.08550569415092468, "rewards/rejected": -0.1549268215894699, "step": 8910 }, { "epoch": 0.58, "learning_rate": 2.2074678115137533e-06, "logits/chosen": -2.0481929779052734, "logits/rejected": -1.9484479427337646, "logps/chosen": -207.98080444335938, "logps/rejected": -239.3912353515625, "loss": 0.0285, "rewards/accuracies": 0.75, "rewards/chosen": -0.06631931662559509, "rewards/margins": 0.13246922194957733, "rewards/rejected": -0.19878853857517242, "step": 8920 }, { "epoch": 0.58, "learning_rate": 2.201797893940224e-06, "logits/chosen": -2.143235921859741, "logits/rejected": -1.944832444190979, "logps/chosen": -245.1650390625, "logps/rejected": -282.40557861328125, "loss": 0.0125, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.059938348829746246, "rewards/margins": 0.08444986492395401, "rewards/rejected": -0.14438821375370026, "step": 8930 }, { "epoch": 0.58, "learning_rate": 2.196129531934956e-06, "logits/chosen": -2.189138889312744, "logits/rejected": -1.9247684478759766, "logps/chosen": -246.6875, "logps/rejected": -246.45431518554688, "loss": 0.0124, "rewards/accuracies": 0.625, "rewards/chosen": -0.04386736825108528, "rewards/margins": 0.08914701640605927, "rewards/rejected": -0.13301438093185425, "step": 8940 }, { "epoch": 0.59, "learning_rate": 2.190462755066902e-06, "logits/chosen": -2.207003593444824, "logits/rejected": -1.9766992330551147, "logps/chosen": -278.06781005859375, "logps/rejected": -265.1041564941406, "loss": 0.0133, "rewards/accuracies": 0.75, "rewards/chosen": -0.07833322882652283, "rewards/margins": 0.0626354068517685, "rewards/rejected": -0.14096863567829132, "step": 8950 }, { "epoch": 0.59, "learning_rate": 2.184797592896746e-06, "logits/chosen": -2.334012985229492, "logits/rejected": -2.29227876663208, "logps/chosen": -243.9264373779297, "logps/rejected": -234.7732391357422, "loss": 0.0115, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04745422303676605, "rewards/margins": 0.08689974248409271, "rewards/rejected": -0.13435396552085876, "step": 8960 }, { "epoch": 0.59, "learning_rate": 2.17913407497675e-06, "logits/chosen": -2.2696456909179688, "logits/rejected": -2.334486961364746, "logps/chosen": -184.894287109375, "logps/rejected": -233.4813690185547, "loss": 0.0531, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.03256151080131531, "rewards/margins": 0.07317936420440674, "rewards/rejected": -0.10574086755514145, "step": 8970 }, { "epoch": 0.59, "learning_rate": 2.173472230850596e-06, "logits/chosen": -2.382445812225342, "logits/rejected": -2.188955307006836, "logps/chosen": -213.95181274414062, "logps/rejected": -181.55081176757812, "loss": 0.0555, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.06844629347324371, "rewards/margins": 0.04021351411938667, "rewards/rejected": -0.10865980386734009, "step": 8980 }, { "epoch": 0.59, "learning_rate": 2.1678120900532375e-06, "logits/chosen": -2.349627733230591, "logits/rejected": -2.056666851043701, "logps/chosen": -248.0489501953125, "logps/rejected": -237.8482666015625, "loss": 0.0268, "rewards/accuracies": 0.75, "rewards/chosen": -0.06067051738500595, "rewards/margins": 0.09529812633991241, "rewards/rejected": -0.15596863627433777, "step": 8990 }, { "epoch": 0.59, "learning_rate": 2.1621536821107412e-06, "logits/chosen": -2.244640827178955, "logits/rejected": -2.11474609375, "logps/chosen": -210.2344512939453, "logps/rejected": -187.39842224121094, "loss": 0.0377, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03832307457923889, "rewards/margins": 0.09598144143819809, "rewards/rejected": -0.13430452346801758, "step": 9000 }, { "epoch": 0.59, "eval_logits/chosen": -2.261298418045044, "eval_logits/rejected": -2.074450731277466, "eval_logps/chosen": -239.097412109375, "eval_logps/rejected": -235.3852996826172, "eval_loss": 0.024586597457528114, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.03546227514743805, "eval_rewards/margins": 0.08340466767549515, "eval_rewards/rejected": -0.11886695772409439, "eval_runtime": 710.489, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.407, "step": 9000 }, { "epoch": 0.59, "learning_rate": 2.1564970365401346e-06, "logits/chosen": -2.3016772270202637, "logits/rejected": -2.036717653274536, "logps/chosen": -191.85366821289062, "logps/rejected": -171.2881317138672, "loss": 0.0267, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.038512926548719406, "rewards/margins": 0.08721283823251724, "rewards/rejected": -0.12572576105594635, "step": 9010 }, { "epoch": 0.59, "learning_rate": 2.1508421828492527e-06, "logits/chosen": -2.4234752655029297, "logits/rejected": -2.0773463249206543, "logps/chosen": -228.48855590820312, "logps/rejected": -175.48695373535156, "loss": 0.0359, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.016479194164276123, "rewards/margins": 0.07619353383779526, "rewards/rejected": -0.09267272800207138, "step": 9020 }, { "epoch": 0.59, "learning_rate": 2.145189150536582e-06, "logits/chosen": -2.104311466217041, "logits/rejected": -2.0074868202209473, "logps/chosen": -226.5256805419922, "logps/rejected": -189.82669067382812, "loss": 0.0397, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0217808298766613, "rewards/margins": 0.06351622194051743, "rewards/rejected": -0.08529704809188843, "step": 9030 }, { "epoch": 0.59, "learning_rate": 2.139537969091107e-06, "logits/chosen": -2.132573366165161, "logits/rejected": -2.097846508026123, "logps/chosen": -273.43206787109375, "logps/rejected": -218.9208526611328, "loss": 0.0275, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03924193233251572, "rewards/margins": 0.038296617567539215, "rewards/rejected": -0.07753854990005493, "step": 9040 }, { "epoch": 0.59, "learning_rate": 2.1338886679921603e-06, "logits/chosen": -2.211646795272827, "logits/rejected": -2.1421194076538086, "logps/chosen": -247.41549682617188, "logps/rejected": -239.46170043945312, "loss": 0.0306, "rewards/accuracies": 0.625, "rewards/chosen": -0.029362160712480545, "rewards/margins": 0.06228286027908325, "rewards/rejected": -0.0916450172662735, "step": 9050 }, { "epoch": 0.59, "learning_rate": 2.128241276709263e-06, "logits/chosen": -2.2989306449890137, "logits/rejected": -2.2443785667419434, "logps/chosen": -204.4984588623047, "logps/rejected": -238.19253540039062, "loss": 0.0304, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.006183523219078779, "rewards/margins": 0.068283312022686, "rewards/rejected": -0.07446683943271637, "step": 9060 }, { "epoch": 0.59, "learning_rate": 2.1225958247019746e-06, "logits/chosen": -2.3331146240234375, "logits/rejected": -2.462541103363037, "logps/chosen": -191.1440887451172, "logps/rejected": -221.80014038085938, "loss": 0.0138, "rewards/accuracies": 0.625, "rewards/chosen": -0.02001136541366577, "rewards/margins": 0.05258213356137276, "rewards/rejected": -0.07259351015090942, "step": 9070 }, { "epoch": 0.59, "learning_rate": 2.1169523414197383e-06, "logits/chosen": -2.11511492729187, "logits/rejected": -2.1142194271087646, "logps/chosen": -203.34683227539062, "logps/rejected": -236.80703735351562, "loss": 0.0143, "rewards/accuracies": 0.5, "rewards/chosen": -0.022705694660544395, "rewards/margins": 0.05069264769554138, "rewards/rejected": -0.07339833676815033, "step": 9080 }, { "epoch": 0.59, "learning_rate": 2.1113108563017267e-06, "logits/chosen": -2.1969635486602783, "logits/rejected": -1.9869884252548218, "logps/chosen": -224.9974822998047, "logps/rejected": -210.8538055419922, "loss": 0.0251, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.061071284115314484, "rewards/margins": 0.0824274942278862, "rewards/rejected": -0.14349877834320068, "step": 9090 }, { "epoch": 0.6, "learning_rate": 2.1056713987766905e-06, "logits/chosen": -2.407088041305542, "logits/rejected": -2.0729117393493652, "logps/chosen": -222.61288452148438, "logps/rejected": -190.90902709960938, "loss": 0.0296, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03706289082765579, "rewards/margins": 0.08505786955356598, "rewards/rejected": -0.12212076038122177, "step": 9100 }, { "epoch": 0.6, "eval_logits/chosen": -2.2747437953948975, "eval_logits/rejected": -2.087127208709717, "eval_logps/chosen": -239.7537078857422, "eval_logps/rejected": -234.9412078857422, "eval_loss": 0.024893444031476974, "eval_rewards/accuracies": 0.6549999713897705, "eval_rewards/chosen": -0.03874371945858002, "eval_rewards/margins": 0.07790277898311615, "eval_rewards/rejected": -0.11664648354053497, "eval_runtime": 713.9364, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 9100 }, { "epoch": 0.6, "learning_rate": 2.1000339982628022e-06, "logits/chosen": -2.072683334350586, "logits/rejected": -2.154127597808838, "logps/chosen": -262.1419372558594, "logps/rejected": -240.17245483398438, "loss": 0.0208, "rewards/accuracies": 0.625, "rewards/chosen": -0.05434552580118179, "rewards/margins": 0.06175314635038376, "rewards/rejected": -0.11609866470098495, "step": 9110 }, { "epoch": 0.6, "learning_rate": 2.0943986841675043e-06, "logits/chosen": -2.290806293487549, "logits/rejected": -2.060344696044922, "logps/chosen": -209.92916870117188, "logps/rejected": -210.94656372070312, "loss": 0.012, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03359084203839302, "rewards/margins": 0.1017860397696495, "rewards/rejected": -0.13537687063217163, "step": 9120 }, { "epoch": 0.6, "learning_rate": 2.088765485887356e-06, "logits/chosen": -2.2724077701568604, "logits/rejected": -2.0708746910095215, "logps/chosen": -252.2576141357422, "logps/rejected": -218.6079559326172, "loss": 0.0253, "rewards/accuracies": 0.5, "rewards/chosen": -0.03771350532770157, "rewards/margins": 0.039878927171230316, "rewards/rejected": -0.07759243249893188, "step": 9130 }, { "epoch": 0.6, "learning_rate": 2.083134432807879e-06, "logits/chosen": -2.2039096355438232, "logits/rejected": -2.1305642127990723, "logps/chosen": -206.875244140625, "logps/rejected": -245.89584350585938, "loss": 0.0208, "rewards/accuracies": 0.625, "rewards/chosen": -0.06301550567150116, "rewards/margins": 0.08751269429922104, "rewards/rejected": -0.1505282074213028, "step": 9140 }, { "epoch": 0.6, "learning_rate": 2.077505554303404e-06, "logits/chosen": -2.2766964435577393, "logits/rejected": -2.2406208515167236, "logps/chosen": -176.80477905273438, "logps/rejected": -193.82846069335938, "loss": 0.0159, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.011263975873589516, "rewards/margins": 0.06328533589839935, "rewards/rejected": -0.07454931735992432, "step": 9150 }, { "epoch": 0.6, "learning_rate": 2.071878879736918e-06, "logits/chosen": -2.2843353748321533, "logits/rejected": -2.0918726921081543, "logps/chosen": -257.02337646484375, "logps/rejected": -349.31024169921875, "loss": 0.0241, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05091879516839981, "rewards/margins": 0.07001566886901855, "rewards/rejected": -0.12093446403741837, "step": 9160 }, { "epoch": 0.6, "learning_rate": 2.0662544384599136e-06, "logits/chosen": -2.1885151863098145, "logits/rejected": -2.1158032417297363, "logps/chosen": -206.37759399414062, "logps/rejected": -206.095703125, "loss": 0.0314, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.021354788914322853, "rewards/margins": 0.0810171440243721, "rewards/rejected": -0.1023719310760498, "step": 9170 }, { "epoch": 0.6, "learning_rate": 2.0606322598122314e-06, "logits/chosen": -2.1867973804473877, "logits/rejected": -2.303119421005249, "logps/chosen": -194.89300537109375, "logps/rejected": -221.6018829345703, "loss": 0.0151, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.041897259652614594, "rewards/margins": 0.035239990800619125, "rewards/rejected": -0.07713725417852402, "step": 9180 }, { "epoch": 0.6, "learning_rate": 2.0550123731219085e-06, "logits/chosen": -2.4753687381744385, "logits/rejected": -2.300013303756714, "logps/chosen": -258.02777099609375, "logps/rejected": -230.7709197998047, "loss": 0.0252, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.024239610880613327, "rewards/margins": 0.06944013386964798, "rewards/rejected": -0.09367974102497101, "step": 9190 }, { "epoch": 0.6, "learning_rate": 2.0493948077050267e-06, "logits/chosen": -2.128877639770508, "logits/rejected": -1.9453260898590088, "logps/chosen": -199.78512573242188, "logps/rejected": -198.4648895263672, "loss": 0.0241, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03264569118618965, "rewards/margins": 0.08384671807289124, "rewards/rejected": -0.11649241298437119, "step": 9200 }, { "epoch": 0.6, "eval_logits/chosen": -2.2942728996276855, "eval_logits/rejected": -2.1060142517089844, "eval_logps/chosen": -239.1661376953125, "eval_logps/rejected": -233.8347930908203, "eval_loss": 0.025179192423820496, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.03580596297979355, "eval_rewards/margins": 0.07530846446752548, "eval_rewards/rejected": -0.11111443489789963, "eval_runtime": 713.7289, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 9200 }, { "epoch": 0.6, "learning_rate": 2.0437795928655596e-06, "logits/chosen": -2.318690538406372, "logits/rejected": -2.2952334880828857, "logps/chosen": -288.6143493652344, "logps/rejected": -281.8822326660156, "loss": 0.0261, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03450857847929001, "rewards/margins": 0.05725806951522827, "rewards/rejected": -0.09176664799451828, "step": 9210 }, { "epoch": 0.6, "learning_rate": 2.0381667578952184e-06, "logits/chosen": -2.395092010498047, "logits/rejected": -2.1757171154022217, "logps/chosen": -219.50790405273438, "logps/rejected": -246.2615966796875, "loss": 0.0494, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04317527264356613, "rewards/margins": 0.10568326711654663, "rewards/rejected": -0.14885856211185455, "step": 9220 }, { "epoch": 0.6, "learning_rate": 2.0325563320732995e-06, "logits/chosen": -2.4702086448669434, "logits/rejected": -2.082486629486084, "logps/chosen": -271.30841064453125, "logps/rejected": -244.59213256835938, "loss": 0.0197, "rewards/accuracies": 0.625, "rewards/chosen": -0.03985026478767395, "rewards/margins": 0.08650810271501541, "rewards/rejected": -0.12635836005210876, "step": 9230 }, { "epoch": 0.6, "learning_rate": 2.026948344666532e-06, "logits/chosen": -2.2059807777404785, "logits/rejected": -2.1636924743652344, "logps/chosen": -211.00961303710938, "logps/rejected": -226.2769012451172, "loss": 0.0311, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05941852927207947, "rewards/margins": 0.07855961471796036, "rewards/rejected": -0.13797815144062042, "step": 9240 }, { "epoch": 0.61, "learning_rate": 2.0213428249289257e-06, "logits/chosen": -2.1782264709472656, "logits/rejected": -2.0983102321624756, "logps/chosen": -207.1298828125, "logps/rejected": -227.71847534179688, "loss": 0.0269, "rewards/accuracies": 0.625, "rewards/chosen": -0.042946770787239075, "rewards/margins": 0.09390541166067123, "rewards/rejected": -0.1368521898984909, "step": 9250 }, { "epoch": 0.61, "learning_rate": 2.0157398021016175e-06, "logits/chosen": -2.191451072692871, "logits/rejected": -2.1529908180236816, "logps/chosen": -153.64662170410156, "logps/rejected": -216.68057250976562, "loss": 0.0254, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.030556291341781616, "rewards/margins": 0.08414594829082489, "rewards/rejected": -0.1147022470831871, "step": 9260 }, { "epoch": 0.61, "learning_rate": 2.010139305412719e-06, "logits/chosen": -2.468810558319092, "logits/rejected": -2.2477877140045166, "logps/chosen": -286.96978759765625, "logps/rejected": -258.82525634765625, "loss": 0.0128, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.051870543509721756, "rewards/margins": 0.07109765708446503, "rewards/rejected": -0.12296819686889648, "step": 9270 }, { "epoch": 0.61, "learning_rate": 2.0045413640771644e-06, "logits/chosen": -2.197082042694092, "logits/rejected": -2.3372139930725098, "logps/chosen": -265.10992431640625, "logps/rejected": -287.07794189453125, "loss": 0.0286, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04972299933433533, "rewards/margins": 0.09356241673231125, "rewards/rejected": -0.14328542351722717, "step": 9280 }, { "epoch": 0.61, "learning_rate": 1.998946007296558e-06, "logits/chosen": -2.4369912147521973, "logits/rejected": -2.100095510482788, "logps/chosen": -317.42645263671875, "logps/rejected": -271.50347900390625, "loss": 0.0171, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04412918910384178, "rewards/margins": 0.08756975829601288, "rewards/rejected": -0.13169893622398376, "step": 9290 }, { "epoch": 0.61, "learning_rate": 1.9933532642590215e-06, "logits/chosen": -2.1721298694610596, "logits/rejected": -1.797800064086914, "logps/chosen": -192.38026428222656, "logps/rejected": -159.61685180664062, "loss": 0.019, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.016109880059957504, "rewards/margins": 0.08698701858520508, "rewards/rejected": -0.10309691727161407, "step": 9300 }, { "epoch": 0.61, "eval_logits/chosen": -2.2889318466186523, "eval_logits/rejected": -2.1003968715667725, "eval_logps/chosen": -242.31744384765625, "eval_logps/rejected": -239.0792694091797, "eval_loss": 0.0250336192548275, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.051562484353780746, "eval_rewards/margins": 0.08577432483434677, "eval_rewards/rejected": -0.13733680546283722, "eval_runtime": 714.1938, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 9300 }, { "epoch": 0.61, "learning_rate": 1.987763164139042e-06, "logits/chosen": -2.322814464569092, "logits/rejected": -2.1172091960906982, "logps/chosen": -214.98190307617188, "logps/rejected": -233.5045928955078, "loss": 0.0172, "rewards/accuracies": 0.625, "rewards/chosen": -0.048634134232997894, "rewards/margins": 0.08134166896343231, "rewards/rejected": -0.1299758106470108, "step": 9310 }, { "epoch": 0.61, "learning_rate": 1.982175736097321e-06, "logits/chosen": -2.013521194458008, "logits/rejected": -2.0271811485290527, "logps/chosen": -287.46978759765625, "logps/rejected": -321.9363708496094, "loss": 0.0165, "rewards/accuracies": 0.625, "rewards/chosen": -0.06464368104934692, "rewards/margins": 0.08723381161689758, "rewards/rejected": -0.1518774926662445, "step": 9320 }, { "epoch": 0.61, "learning_rate": 1.9765910092806196e-06, "logits/chosen": -2.212627410888672, "logits/rejected": -2.1153512001037598, "logps/chosen": -184.89788818359375, "logps/rejected": -178.43759155273438, "loss": 0.0383, "rewards/accuracies": 0.5, "rewards/chosen": -0.034111760556697845, "rewards/margins": 0.06598981469869614, "rewards/rejected": -0.10010156780481339, "step": 9330 }, { "epoch": 0.61, "learning_rate": 1.9710090128216083e-06, "logits/chosen": -2.266669750213623, "logits/rejected": -2.1711018085479736, "logps/chosen": -234.8772430419922, "logps/rejected": -242.46630859375, "loss": 0.0254, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06073601171374321, "rewards/margins": 0.11876089870929718, "rewards/rejected": -0.1794969141483307, "step": 9340 }, { "epoch": 0.61, "learning_rate": 1.9654297758387155e-06, "logits/chosen": -2.1124844551086426, "logits/rejected": -2.0897462368011475, "logps/chosen": -177.40499877929688, "logps/rejected": -213.63632202148438, "loss": 0.0304, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08267485350370407, "rewards/margins": 0.07752931863069534, "rewards/rejected": -0.1602041870355606, "step": 9350 }, { "epoch": 0.61, "learning_rate": 1.9598533274359736e-06, "logits/chosen": -2.28397798538208, "logits/rejected": -2.193634510040283, "logps/chosen": -256.9334411621094, "logps/rejected": -270.81170654296875, "loss": 0.0267, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.08626577258110046, "rewards/margins": 0.03258334472775459, "rewards/rejected": -0.11884911358356476, "step": 9360 }, { "epoch": 0.61, "learning_rate": 1.9542796967028697e-06, "logits/chosen": -2.295401096343994, "logits/rejected": -2.1766135692596436, "logps/chosen": -232.0633087158203, "logps/rejected": -225.03836059570312, "loss": 0.02, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06911368668079376, "rewards/margins": 0.056089501827955246, "rewards/rejected": -0.1252032071352005, "step": 9370 }, { "epoch": 0.61, "learning_rate": 1.948708912714192e-06, "logits/chosen": -2.192821979522705, "logits/rejected": -1.9560225009918213, "logps/chosen": -269.24444580078125, "logps/rejected": -250.17758178710938, "loss": 0.0272, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11149577796459198, "rewards/margins": 0.05934471637010574, "rewards/rejected": -0.17084050178527832, "step": 9380 }, { "epoch": 0.61, "learning_rate": 1.9431410045298786e-06, "logits/chosen": -2.0476009845733643, "logits/rejected": -1.9805431365966797, "logps/chosen": -228.1685333251953, "logps/rejected": -242.92599487304688, "loss": 0.0206, "rewards/accuracies": 0.75, "rewards/chosen": -0.05521588400006294, "rewards/margins": 0.07866604626178741, "rewards/rejected": -0.13388192653656006, "step": 9390 }, { "epoch": 0.62, "learning_rate": 1.9375760011948654e-06, "logits/chosen": -2.3774194717407227, "logits/rejected": -2.2332215309143066, "logps/chosen": -211.9001007080078, "logps/rejected": -255.4772186279297, "loss": 0.0247, "rewards/accuracies": 0.75, "rewards/chosen": -0.061110854148864746, "rewards/margins": 0.10628316551446915, "rewards/rejected": -0.1673940122127533, "step": 9400 }, { "epoch": 0.62, "eval_logits/chosen": -2.29257869720459, "eval_logits/rejected": -2.1041393280029297, "eval_logps/chosen": -246.23622131347656, "eval_logps/rejected": -241.68353271484375, "eval_loss": 0.025083504617214203, "eval_rewards/accuracies": 0.6545000076293945, "eval_rewards/chosen": -0.07115628570318222, "eval_rewards/margins": 0.07920186221599579, "eval_rewards/rejected": -0.1503581553697586, "eval_runtime": 715.2109, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 9400 }, { "epoch": 0.62, "learning_rate": 1.932013931738937e-06, "logits/chosen": -2.2685647010803223, "logits/rejected": -2.040989398956299, "logps/chosen": -219.1533203125, "logps/rejected": -258.0494079589844, "loss": 0.0346, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07775808870792389, "rewards/margins": 0.13007166981697083, "rewards/rejected": -0.20782975852489471, "step": 9410 }, { "epoch": 0.62, "learning_rate": 1.9264548251765717e-06, "logits/chosen": -2.3763561248779297, "logits/rejected": -2.1895689964294434, "logps/chosen": -215.4586181640625, "logps/rejected": -227.1342315673828, "loss": 0.0165, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.058279722929000854, "rewards/margins": 0.0739368349313736, "rewards/rejected": -0.13221655786037445, "step": 9420 }, { "epoch": 0.62, "learning_rate": 1.9208987105067924e-06, "logits/chosen": -2.1715502738952637, "logits/rejected": -2.031933307647705, "logps/chosen": -226.62173461914062, "logps/rejected": -214.856689453125, "loss": 0.028, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.060323454439640045, "rewards/margins": 0.060612984001636505, "rewards/rejected": -0.12093643844127655, "step": 9430 }, { "epoch": 0.62, "learning_rate": 1.9153456167130154e-06, "logits/chosen": -2.281136989593506, "logits/rejected": -2.2747669219970703, "logps/chosen": -216.7677001953125, "logps/rejected": -256.7870178222656, "loss": 0.0444, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06348533928394318, "rewards/margins": 0.0691031813621521, "rewards/rejected": -0.13258852064609528, "step": 9440 }, { "epoch": 0.62, "learning_rate": 1.9097955727628975e-06, "logits/chosen": -2.309915065765381, "logits/rejected": -2.3080644607543945, "logps/chosen": -202.22084045410156, "logps/rejected": -229.6215057373047, "loss": 0.0363, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.031176622956991196, "rewards/margins": 0.07732857763767242, "rewards/rejected": -0.10850518941879272, "step": 9450 }, { "epoch": 0.62, "learning_rate": 1.904248607608187e-06, "logits/chosen": -2.2073731422424316, "logits/rejected": -2.2413322925567627, "logps/chosen": -264.6854248046875, "logps/rejected": -228.81912231445312, "loss": 0.0209, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03732733801007271, "rewards/margins": 0.05620206519961357, "rewards/rejected": -0.09352940320968628, "step": 9460 }, { "epoch": 0.62, "learning_rate": 1.8987047501845714e-06, "logits/chosen": -2.2785770893096924, "logits/rejected": -2.2424912452697754, "logps/chosen": -174.671142578125, "logps/rejected": -187.09288024902344, "loss": 0.0343, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03873337432742119, "rewards/margins": 0.09333629906177521, "rewards/rejected": -0.1320696771144867, "step": 9470 }, { "epoch": 0.62, "learning_rate": 1.8931640294115267e-06, "logits/chosen": -2.0822432041168213, "logits/rejected": -1.9636636972427368, "logps/chosen": -201.2562713623047, "logps/rejected": -207.4918212890625, "loss": 0.0389, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.03625180199742317, "rewards/margins": 0.10882870852947235, "rewards/rejected": -0.14508050680160522, "step": 9480 }, { "epoch": 0.62, "learning_rate": 1.8876264741921662e-06, "logits/chosen": -2.0588245391845703, "logits/rejected": -2.0789387226104736, "logps/chosen": -196.6965789794922, "logps/rejected": -215.16513061523438, "loss": 0.0208, "rewards/accuracies": 0.75, "rewards/chosen": -0.03285546973347664, "rewards/margins": 0.11952035129070282, "rewards/rejected": -0.15237581729888916, "step": 9490 }, { "epoch": 0.62, "learning_rate": 1.8820921134130912e-06, "logits/chosen": -2.2807486057281494, "logits/rejected": -1.9099798202514648, "logps/chosen": -239.4491729736328, "logps/rejected": -223.02163696289062, "loss": 0.0161, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.044685035943984985, "rewards/margins": 0.14240868389606476, "rewards/rejected": -0.18709370493888855, "step": 9500 }, { "epoch": 0.62, "eval_logits/chosen": -2.2826719284057617, "eval_logits/rejected": -2.094907283782959, "eval_logps/chosen": -242.37461853027344, "eval_logps/rejected": -238.37704467773438, "eval_loss": 0.024934230372309685, "eval_rewards/accuracies": 0.6485000252723694, "eval_rewards/chosen": -0.05184837058186531, "eval_rewards/margins": 0.08197740465402603, "eval_rewards/rejected": -0.13382577896118164, "eval_runtime": 711.7958, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 9500 }, { "epoch": 0.62, "learning_rate": 1.8765609759442378e-06, "logits/chosen": -2.140984058380127, "logits/rejected": -2.035247325897217, "logps/chosen": -250.96426391601562, "logps/rejected": -253.80502319335938, "loss": 0.0223, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04821477085351944, "rewards/margins": 0.06826470792293549, "rewards/rejected": -0.11647947877645493, "step": 9510 }, { "epoch": 0.62, "learning_rate": 1.8710330906387288e-06, "logits/chosen": -2.348546266555786, "logits/rejected": -2.2972640991210938, "logps/chosen": -249.5334014892578, "logps/rejected": -290.72796630859375, "loss": 0.0291, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05522875860333443, "rewards/margins": 0.08802361786365509, "rewards/rejected": -0.1432523876428604, "step": 9520 }, { "epoch": 0.62, "learning_rate": 1.8655084863327222e-06, "logits/chosen": -2.2527644634246826, "logits/rejected": -2.2713284492492676, "logps/chosen": -190.42855834960938, "logps/rejected": -210.0547332763672, "loss": 0.0261, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.03485274314880371, "rewards/margins": 0.06732039153575897, "rewards/rejected": -0.10217314958572388, "step": 9530 }, { "epoch": 0.62, "learning_rate": 1.8599871918452603e-06, "logits/chosen": -2.1004343032836914, "logits/rejected": -2.093557357788086, "logps/chosen": -229.6087188720703, "logps/rejected": -261.7853088378906, "loss": 0.0093, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03258442506194115, "rewards/margins": 0.09463578462600708, "rewards/rejected": -0.12722022831439972, "step": 9540 }, { "epoch": 0.62, "learning_rate": 1.8544692359781192e-06, "logits/chosen": -2.2808165550231934, "logits/rejected": -2.0491530895233154, "logps/chosen": -191.7283477783203, "logps/rejected": -175.3466033935547, "loss": 0.0365, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02071845903992653, "rewards/margins": 0.07282758504152298, "rewards/rejected": -0.09354604780673981, "step": 9550 }, { "epoch": 0.63, "learning_rate": 1.8489546475156602e-06, "logits/chosen": -2.4583964347839355, "logits/rejected": -2.2367022037506104, "logps/chosen": -223.090576171875, "logps/rejected": -219.71420288085938, "loss": 0.0158, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0179886631667614, "rewards/margins": 0.07395794242620468, "rewards/rejected": -0.09194660931825638, "step": 9560 }, { "epoch": 0.63, "learning_rate": 1.8434434552246778e-06, "logits/chosen": -2.090735673904419, "logits/rejected": -2.021360397338867, "logps/chosen": -215.08010864257812, "logps/rejected": -218.2127685546875, "loss": 0.0179, "rewards/accuracies": 0.625, "rewards/chosen": -0.02724291943013668, "rewards/margins": 0.07197809219360352, "rewards/rejected": -0.09922101348638535, "step": 9570 }, { "epoch": 0.63, "learning_rate": 1.837935687854251e-06, "logits/chosen": -2.3042380809783936, "logits/rejected": -2.060258150100708, "logps/chosen": -225.2897186279297, "logps/rejected": -210.74374389648438, "loss": 0.0322, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.018714962527155876, "rewards/margins": 0.08627069741487503, "rewards/rejected": -0.10498566925525665, "step": 9580 }, { "epoch": 0.63, "learning_rate": 1.832431374135592e-06, "logits/chosen": -2.4167397022247314, "logits/rejected": -2.029613494873047, "logps/chosen": -256.81829833984375, "logps/rejected": -262.15032958984375, "loss": 0.0161, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.041914623230695724, "rewards/margins": 0.12102575600147247, "rewards/rejected": -0.1629403680562973, "step": 9590 }, { "epoch": 0.63, "learning_rate": 1.8269305427818977e-06, "logits/chosen": -2.4077861309051514, "logits/rejected": -2.265671730041504, "logps/chosen": -214.59957885742188, "logps/rejected": -200.29623413085938, "loss": 0.0198, "rewards/accuracies": 0.625, "rewards/chosen": -0.029916757717728615, "rewards/margins": 0.06680215895175934, "rewards/rejected": -0.0967189222574234, "step": 9600 }, { "epoch": 0.63, "eval_logits/chosen": -2.2786946296691895, "eval_logits/rejected": -2.0912680625915527, "eval_logps/chosen": -237.63516235351562, "eval_logps/rejected": -234.0897979736328, "eval_loss": 0.0249630156904459, "eval_rewards/accuracies": 0.6499999761581421, "eval_rewards/chosen": -0.02815098501741886, "eval_rewards/margins": 0.08423858880996704, "eval_rewards/rejected": -0.11238957196474075, "eval_runtime": 711.979, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 9600 }, { "epoch": 0.63, "learning_rate": 1.821433222488199e-06, "logits/chosen": -2.272484302520752, "logits/rejected": -1.9709657430648804, "logps/chosen": -228.2349395751953, "logps/rejected": -217.74484252929688, "loss": 0.0118, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.013100971467792988, "rewards/margins": 0.0862874835729599, "rewards/rejected": -0.09938845038414001, "step": 9610 }, { "epoch": 0.63, "learning_rate": 1.8159394419312112e-06, "logits/chosen": -2.3230252265930176, "logits/rejected": -2.130493640899658, "logps/chosen": -263.906982421875, "logps/rejected": -244.85977172851562, "loss": 0.0284, "rewards/accuracies": 0.75, "rewards/chosen": -0.025884822010993958, "rewards/margins": 0.13745743036270142, "rewards/rejected": -0.16334223747253418, "step": 9620 }, { "epoch": 0.63, "learning_rate": 1.8104492297691845e-06, "logits/chosen": -2.2625083923339844, "logits/rejected": -2.043485641479492, "logps/chosen": -241.1596221923828, "logps/rejected": -231.29623413085938, "loss": 0.0347, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07318250834941864, "rewards/margins": 0.07124580442905426, "rewards/rejected": -0.1444282978773117, "step": 9630 }, { "epoch": 0.63, "learning_rate": 1.8049626146417562e-06, "logits/chosen": -2.0654587745666504, "logits/rejected": -1.9282382726669312, "logps/chosen": -165.26133728027344, "logps/rejected": -181.92007446289062, "loss": 0.0504, "rewards/accuracies": 0.625, "rewards/chosen": -0.05092727392911911, "rewards/margins": 0.10185831785202026, "rewards/rejected": -0.15278561413288116, "step": 9640 }, { "epoch": 0.63, "learning_rate": 1.7994796251697983e-06, "logits/chosen": -2.1830663681030273, "logits/rejected": -2.0419445037841797, "logps/chosen": -210.02487182617188, "logps/rejected": -271.4522705078125, "loss": 0.0159, "rewards/accuracies": 0.75, "rewards/chosen": -0.0899299904704094, "rewards/margins": 0.10931304842233658, "rewards/rejected": -0.19924303889274597, "step": 9650 }, { "epoch": 0.63, "learning_rate": 1.794000289955269e-06, "logits/chosen": -2.229628801345825, "logits/rejected": -2.061565399169922, "logps/chosen": -282.14886474609375, "logps/rejected": -268.85076904296875, "loss": 0.0393, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07544299960136414, "rewards/margins": 0.09074367582798004, "rewards/rejected": -0.16618667542934418, "step": 9660 }, { "epoch": 0.63, "learning_rate": 1.7885246375810646e-06, "logits/chosen": -2.2054381370544434, "logits/rejected": -1.9357036352157593, "logps/chosen": -233.9512481689453, "logps/rejected": -245.37796020507812, "loss": 0.0188, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04838669300079346, "rewards/margins": 0.07594360411167145, "rewards/rejected": -0.1243302971124649, "step": 9670 }, { "epoch": 0.63, "learning_rate": 1.7830526966108713e-06, "logits/chosen": -2.0536415576934814, "logits/rejected": -1.858930230140686, "logps/chosen": -205.92813110351562, "logps/rejected": -206.12521362304688, "loss": 0.0399, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09727603942155838, "rewards/margins": 0.14404307305812836, "rewards/rejected": -0.24131910502910614, "step": 9680 }, { "epoch": 0.63, "learning_rate": 1.7775844955890129e-06, "logits/chosen": -2.209892988204956, "logits/rejected": -2.049481153488159, "logps/chosen": -220.50900268554688, "logps/rejected": -229.58203125, "loss": 0.0224, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04481334984302521, "rewards/margins": 0.10324843972921371, "rewards/rejected": -0.1480618268251419, "step": 9690 }, { "epoch": 0.63, "learning_rate": 1.7721200630403046e-06, "logits/chosen": -2.283764600753784, "logits/rejected": -2.0978660583496094, "logps/chosen": -206.02914428710938, "logps/rejected": -245.3820343017578, "loss": 0.0368, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05133398622274399, "rewards/margins": 0.06903479993343353, "rewards/rejected": -0.12036879360675812, "step": 9700 }, { "epoch": 0.63, "eval_logits/chosen": -2.278688430786133, "eval_logits/rejected": -2.0913586616516113, "eval_logps/chosen": -243.37106323242188, "eval_logps/rejected": -239.7048797607422, "eval_loss": 0.024813618510961533, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.056830476969480515, "eval_rewards/margins": 0.08363436907529831, "eval_rewards/rejected": -0.14046484231948853, "eval_runtime": 713.2782, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 9700 }, { "epoch": 0.64, "learning_rate": 1.7666594274699037e-06, "logits/chosen": -2.1993210315704346, "logits/rejected": -2.048921585083008, "logps/chosen": -261.8725891113281, "logps/rejected": -245.17886352539062, "loss": 0.0156, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04591141268610954, "rewards/margins": 0.10755202919244766, "rewards/rejected": -0.1534634530544281, "step": 9710 }, { "epoch": 0.64, "learning_rate": 1.76120261736316e-06, "logits/chosen": -2.2551050186157227, "logits/rejected": -1.8844871520996094, "logps/chosen": -239.8577423095703, "logps/rejected": -237.2745819091797, "loss": 0.0279, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06571348011493683, "rewards/margins": 0.12280043214559555, "rewards/rejected": -0.18851391971111298, "step": 9720 }, { "epoch": 0.64, "learning_rate": 1.755749661185468e-06, "logits/chosen": -2.3268203735351562, "logits/rejected": -1.8979631662368774, "logps/chosen": -301.92840576171875, "logps/rejected": -262.7877197265625, "loss": 0.0206, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03739435225725174, "rewards/margins": 0.09846501052379608, "rewards/rejected": -0.13585934042930603, "step": 9730 }, { "epoch": 0.64, "learning_rate": 1.7503005873821183e-06, "logits/chosen": -2.265444755554199, "logits/rejected": -2.209855556488037, "logps/chosen": -169.6016082763672, "logps/rejected": -213.1735076904297, "loss": 0.0187, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04586641117930412, "rewards/margins": 0.08653993904590607, "rewards/rejected": -0.1324063390493393, "step": 9740 }, { "epoch": 0.64, "learning_rate": 1.744855424378148e-06, "logits/chosen": -2.1263904571533203, "logits/rejected": -2.1451613903045654, "logps/chosen": -199.63528442382812, "logps/rejected": -240.84548950195312, "loss": 0.019, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05504153296351433, "rewards/margins": 0.08882343024015427, "rewards/rejected": -0.1438649594783783, "step": 9750 }, { "epoch": 0.64, "learning_rate": 1.7394142005781973e-06, "logits/chosen": -2.0734992027282715, "logits/rejected": -2.1553051471710205, "logps/chosen": -273.6174621582031, "logps/rejected": -289.822998046875, "loss": 0.012, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05953211337327957, "rewards/margins": 0.06317280232906342, "rewards/rejected": -0.12270492315292358, "step": 9760 }, { "epoch": 0.64, "learning_rate": 1.7339769443663528e-06, "logits/chosen": -2.2552011013031006, "logits/rejected": -2.1124913692474365, "logps/chosen": -155.26698303222656, "logps/rejected": -174.4816131591797, "loss": 0.0283, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.061623912304639816, "rewards/margins": 0.09255679696798325, "rewards/rejected": -0.15418072044849396, "step": 9770 }, { "epoch": 0.64, "learning_rate": 1.7285436841060078e-06, "logits/chosen": -2.4462904930114746, "logits/rejected": -2.161907196044922, "logps/chosen": -286.5582275390625, "logps/rejected": -260.4649353027344, "loss": 0.0235, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05214942619204521, "rewards/margins": 0.08430466800928116, "rewards/rejected": -0.13645410537719727, "step": 9780 }, { "epoch": 0.64, "learning_rate": 1.7231144481397083e-06, "logits/chosen": -2.34212589263916, "logits/rejected": -2.265117883682251, "logps/chosen": -231.6746826171875, "logps/rejected": -217.1168975830078, "loss": 0.0143, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05638168007135391, "rewards/margins": 0.05440463870763779, "rewards/rejected": -0.1107863038778305, "step": 9790 }, { "epoch": 0.64, "learning_rate": 1.7176892647890092e-06, "logits/chosen": -2.4070024490356445, "logits/rejected": -2.165473222732544, "logps/chosen": -246.8561553955078, "logps/rejected": -219.91323852539062, "loss": 0.0214, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.055950719863176346, "rewards/margins": 0.05969247967004776, "rewards/rejected": -0.1156432032585144, "step": 9800 }, { "epoch": 0.64, "eval_logits/chosen": -2.2843563556671143, "eval_logits/rejected": -2.09706974029541, "eval_logps/chosen": -243.19447326660156, "eval_logps/rejected": -239.02984619140625, "eval_loss": 0.024816662073135376, "eval_rewards/accuracies": 0.6570000052452087, "eval_rewards/chosen": -0.05594758316874504, "eval_rewards/margins": 0.08114214986562729, "eval_rewards/rejected": -0.13708974421024323, "eval_runtime": 715.6997, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 9800 }, { "epoch": 0.64, "learning_rate": 1.7122681623543239e-06, "logits/chosen": -2.426173448562622, "logits/rejected": -2.166084051132202, "logps/chosen": -255.25790405273438, "logps/rejected": -265.0026550292969, "loss": 0.0188, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0413464792072773, "rewards/margins": 0.1047930121421814, "rewards/rejected": -0.1461394876241684, "step": 9810 }, { "epoch": 0.64, "learning_rate": 1.7068511691147788e-06, "logits/chosen": -2.168544292449951, "logits/rejected": -2.208672046661377, "logps/chosen": -202.48780822753906, "logps/rejected": -227.08316040039062, "loss": 0.0091, "rewards/accuracies": 0.625, "rewards/chosen": -0.041783712804317474, "rewards/margins": 0.08112876862287521, "rewards/rejected": -0.12291248887777328, "step": 9820 }, { "epoch": 0.64, "learning_rate": 1.7014383133280636e-06, "logits/chosen": -2.394834041595459, "logits/rejected": -2.033219575881958, "logps/chosen": -264.05914306640625, "logps/rejected": -227.944580078125, "loss": 0.0281, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07712364196777344, "rewards/margins": 0.08639691770076752, "rewards/rejected": -0.16352055966854095, "step": 9830 }, { "epoch": 0.64, "learning_rate": 1.696029623230286e-06, "logits/chosen": -2.366926670074463, "logits/rejected": -2.284317970275879, "logps/chosen": -265.00750732421875, "logps/rejected": -303.00238037109375, "loss": 0.0209, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04542611166834831, "rewards/margins": 0.09607888758182526, "rewards/rejected": -0.14150500297546387, "step": 9840 }, { "epoch": 0.64, "learning_rate": 1.6906251270358229e-06, "logits/chosen": -2.3569698333740234, "logits/rejected": -2.2085394859313965, "logps/chosen": -274.58624267578125, "logps/rejected": -237.61727905273438, "loss": 0.0156, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05865948647260666, "rewards/margins": 0.06386444717645645, "rewards/rejected": -0.12252392619848251, "step": 9850 }, { "epoch": 0.65, "learning_rate": 1.685224852937174e-06, "logits/chosen": -2.1372501850128174, "logits/rejected": -2.017017364501953, "logps/chosen": -203.86383056640625, "logps/rejected": -277.4493103027344, "loss": 0.0382, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.02749522402882576, "rewards/margins": 0.1586325615644455, "rewards/rejected": -0.18612778186798096, "step": 9860 }, { "epoch": 0.65, "learning_rate": 1.6798288291048136e-06, "logits/chosen": -2.1029751300811768, "logits/rejected": -2.0041463375091553, "logps/chosen": -235.9657745361328, "logps/rejected": -231.2330322265625, "loss": 0.025, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06362083554267883, "rewards/margins": 0.11986134946346283, "rewards/rejected": -0.18348219990730286, "step": 9870 }, { "epoch": 0.65, "learning_rate": 1.6744370836870466e-06, "logits/chosen": -2.5070695877075195, "logits/rejected": -2.2133679389953613, "logps/chosen": -342.8887634277344, "logps/rejected": -282.80853271484375, "loss": 0.017, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04097612574696541, "rewards/margins": 0.11718785762786865, "rewards/rejected": -0.15816399455070496, "step": 9880 }, { "epoch": 0.65, "learning_rate": 1.6690496448098576e-06, "logits/chosen": -2.178298234939575, "logits/rejected": -1.882962942123413, "logps/chosen": -241.05026245117188, "logps/rejected": -235.6141815185547, "loss": 0.0223, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.060070209205150604, "rewards/margins": 0.07711504399776459, "rewards/rejected": -0.1371852457523346, "step": 9890 }, { "epoch": 0.65, "learning_rate": 1.6636665405767666e-06, "logits/chosen": -2.278411865234375, "logits/rejected": -2.1138100624084473, "logps/chosen": -241.71383666992188, "logps/rejected": -236.0366973876953, "loss": 0.0331, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.01971452124416828, "rewards/margins": 0.0660644918680191, "rewards/rejected": -0.08577899634838104, "step": 9900 }, { "epoch": 0.65, "eval_logits/chosen": -2.27319598197937, "eval_logits/rejected": -2.0866928100585938, "eval_logps/chosen": -240.8263397216797, "eval_logps/rejected": -238.18746948242188, "eval_loss": 0.024608775973320007, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -0.044106971472501755, "eval_rewards/margins": 0.08877087384462357, "eval_rewards/rejected": -0.13287784159183502, "eval_runtime": 714.6039, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 9900 }, { "epoch": 0.65, "learning_rate": 1.6582877990686827e-06, "logits/chosen": -2.2707982063293457, "logits/rejected": -2.2193493843078613, "logps/chosen": -129.47329711914062, "logps/rejected": -174.6560516357422, "loss": 0.0281, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.040894515812397, "rewards/margins": 0.11425725370645523, "rewards/rejected": -0.15515175461769104, "step": 9910 }, { "epoch": 0.65, "learning_rate": 1.6529134483437562e-06, "logits/chosen": -2.2776689529418945, "logits/rejected": -2.1015264987945557, "logps/chosen": -216.85537719726562, "logps/rejected": -200.73300170898438, "loss": 0.0362, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05640562251210213, "rewards/margins": 0.11665663868188858, "rewards/rejected": -0.17306223511695862, "step": 9920 }, { "epoch": 0.65, "learning_rate": 1.647543516437233e-06, "logits/chosen": -2.2426798343658447, "logits/rejected": -2.1841890811920166, "logps/chosen": -213.09963989257812, "logps/rejected": -249.3842010498047, "loss": 0.0388, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.07030726969242096, "rewards/margins": 0.07799828052520752, "rewards/rejected": -0.14830553531646729, "step": 9930 }, { "epoch": 0.65, "learning_rate": 1.6421780313613088e-06, "logits/chosen": -2.3773422241210938, "logits/rejected": -1.9746917486190796, "logps/chosen": -214.04379272460938, "logps/rejected": -201.2644805908203, "loss": 0.0338, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04651091247797012, "rewards/margins": 0.10205264389514923, "rewards/rejected": -0.14856356382369995, "step": 9940 }, { "epoch": 0.65, "learning_rate": 1.6368170211049816e-06, "logits/chosen": -2.294351816177368, "logits/rejected": -1.810516357421875, "logps/chosen": -294.4578552246094, "logps/rejected": -256.480224609375, "loss": 0.0149, "rewards/accuracies": 0.625, "rewards/chosen": -0.05328863114118576, "rewards/margins": 0.11066905409097672, "rewards/rejected": -0.16395768523216248, "step": 9950 }, { "epoch": 0.65, "learning_rate": 1.6314605136339074e-06, "logits/chosen": -2.2938685417175293, "logits/rejected": -2.132032871246338, "logps/chosen": -211.07138061523438, "logps/rejected": -208.8017120361328, "loss": 0.0433, "rewards/accuracies": 0.625, "rewards/chosen": -0.0774233490228653, "rewards/margins": 0.07814882695674896, "rewards/rejected": -0.15557217597961426, "step": 9960 }, { "epoch": 0.65, "learning_rate": 1.6261085368902526e-06, "logits/chosen": -2.499953269958496, "logits/rejected": -2.15836501121521, "logps/chosen": -275.83831787109375, "logps/rejected": -247.0941619873047, "loss": 0.0218, "rewards/accuracies": 0.625, "rewards/chosen": -0.05448717996478081, "rewards/margins": 0.07442667335271835, "rewards/rejected": -0.12891386449337006, "step": 9970 }, { "epoch": 0.65, "learning_rate": 1.6207611187925503e-06, "logits/chosen": -2.1789181232452393, "logits/rejected": -2.1919662952423096, "logps/chosen": -219.1823272705078, "logps/rejected": -284.10052490234375, "loss": 0.0307, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.051777105778455734, "rewards/margins": 0.09300215542316437, "rewards/rejected": -0.1447792649269104, "step": 9980 }, { "epoch": 0.65, "learning_rate": 1.6154182872355512e-06, "logits/chosen": -2.222287654876709, "logits/rejected": -2.273768186569214, "logps/chosen": -178.19265747070312, "logps/rejected": -206.7621307373047, "loss": 0.052, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07187598198652267, "rewards/margins": 0.07074934989213943, "rewards/rejected": -0.1426253318786621, "step": 9990 }, { "epoch": 0.65, "learning_rate": 1.610080070090084e-06, "logits/chosen": -2.2096710205078125, "logits/rejected": -2.1410071849823, "logps/chosen": -199.0048370361328, "logps/rejected": -210.6748046875, "loss": 0.0316, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09041772037744522, "rewards/margins": 0.10349156707525253, "rewards/rejected": -0.19390928745269775, "step": 10000 }, { "epoch": 0.65, "eval_logits/chosen": -2.263366460800171, "eval_logits/rejected": -2.076998233795166, "eval_logps/chosen": -243.4642333984375, "eval_logps/rejected": -241.0921630859375, "eval_loss": 0.024637000635266304, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.05729633569717407, "eval_rewards/margins": 0.09010498225688934, "eval_rewards/rejected": -0.14740131795406342, "eval_runtime": 714.5264, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.4, "step": 10000 }, { "epoch": 0.65, "learning_rate": 1.6047464952029034e-06, "logits/chosen": -2.402103900909424, "logits/rejected": -2.2708446979522705, "logps/chosen": -263.43438720703125, "logps/rejected": -293.1377258300781, "loss": 0.0134, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.045734953135252, "rewards/margins": 0.10738639533519745, "rewards/rejected": -0.15312136709690094, "step": 10010 }, { "epoch": 0.66, "learning_rate": 1.5994175903965486e-06, "logits/chosen": -2.1264870166778564, "logits/rejected": -2.0037424564361572, "logps/chosen": -266.4388427734375, "logps/rejected": -290.78729248046875, "loss": 0.0353, "rewards/accuracies": 0.75, "rewards/chosen": -0.08054221421480179, "rewards/margins": 0.11296994984149933, "rewards/rejected": -0.19351215660572052, "step": 10020 }, { "epoch": 0.66, "learning_rate": 1.5940933834691977e-06, "logits/chosen": -2.572889804840088, "logits/rejected": -1.9311988353729248, "logps/chosen": -317.31982421875, "logps/rejected": -226.02621459960938, "loss": 0.0231, "rewards/accuracies": 0.75, "rewards/chosen": -0.06026948243379593, "rewards/margins": 0.08301849663257599, "rewards/rejected": -0.14328798651695251, "step": 10030 }, { "epoch": 0.66, "learning_rate": 1.588773902194522e-06, "logits/chosen": -2.0572285652160645, "logits/rejected": -1.7748944759368896, "logps/chosen": -220.39501953125, "logps/rejected": -247.3108367919922, "loss": 0.0168, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08157656341791153, "rewards/margins": 0.1557811051607132, "rewards/rejected": -0.23735766112804413, "step": 10040 }, { "epoch": 0.66, "learning_rate": 1.583459174321541e-06, "logits/chosen": -2.05017352104187, "logits/rejected": -1.8998372554779053, "logps/chosen": -228.8952178955078, "logps/rejected": -221.0664825439453, "loss": 0.0329, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0963895320892334, "rewards/margins": 0.0963265672326088, "rewards/rejected": -0.1927160918712616, "step": 10050 }, { "epoch": 0.66, "learning_rate": 1.5781492275744797e-06, "logits/chosen": -2.4671225547790527, "logits/rejected": -2.0863845348358154, "logps/chosen": -306.43975830078125, "logps/rejected": -306.3621520996094, "loss": 0.0237, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0609763041138649, "rewards/margins": 0.13070151209831238, "rewards/rejected": -0.19167783856391907, "step": 10060 }, { "epoch": 0.66, "learning_rate": 1.5728440896526215e-06, "logits/chosen": -2.184709310531616, "logits/rejected": -2.00661563873291, "logps/chosen": -288.0798645019531, "logps/rejected": -258.2320251464844, "loss": 0.0128, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0733894407749176, "rewards/margins": 0.08177514374256134, "rewards/rejected": -0.15516456961631775, "step": 10070 }, { "epoch": 0.66, "learning_rate": 1.5675437882301633e-06, "logits/chosen": -2.263230323791504, "logits/rejected": -2.0684688091278076, "logps/chosen": -237.2273406982422, "logps/rejected": -208.65243530273438, "loss": 0.039, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09091582894325256, "rewards/margins": 0.031104255467653275, "rewards/rejected": -0.12202008813619614, "step": 10080 }, { "epoch": 0.66, "learning_rate": 1.5622483509560748e-06, "logits/chosen": -2.1492769718170166, "logits/rejected": -2.160562753677368, "logps/chosen": -185.2451629638672, "logps/rejected": -235.5522003173828, "loss": 0.0333, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07258006930351257, "rewards/margins": 0.1006237119436264, "rewards/rejected": -0.17320378124713898, "step": 10090 }, { "epoch": 0.66, "learning_rate": 1.5569578054539506e-06, "logits/chosen": -2.2144834995269775, "logits/rejected": -1.87839674949646, "logps/chosen": -292.9901428222656, "logps/rejected": -237.5506134033203, "loss": 0.0181, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06896514445543289, "rewards/margins": 0.14211763441562653, "rewards/rejected": -0.21108278632164001, "step": 10100 }, { "epoch": 0.66, "eval_logits/chosen": -2.2660727500915527, "eval_logits/rejected": -2.080104351043701, "eval_logps/chosen": -247.1387176513672, "eval_logps/rejected": -243.84608459472656, "eval_loss": 0.024765770882368088, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -0.07566884905099869, "eval_rewards/margins": 0.08550204336643219, "eval_rewards/rejected": -0.16117088496685028, "eval_runtime": 713.5274, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 10100 }, { "epoch": 0.66, "learning_rate": 1.551672179321867e-06, "logits/chosen": -2.1911749839782715, "logits/rejected": -2.258227825164795, "logps/chosen": -229.6168975830078, "logps/rejected": -224.55361938476562, "loss": 0.0168, "rewards/accuracies": 0.625, "rewards/chosen": -0.0687999278306961, "rewards/margins": 0.08053916692733765, "rewards/rejected": -0.14933909475803375, "step": 10110 }, { "epoch": 0.66, "learning_rate": 1.5463915001322398e-06, "logits/chosen": -2.22826886177063, "logits/rejected": -2.0819613933563232, "logps/chosen": -263.48443603515625, "logps/rejected": -267.28863525390625, "loss": 0.0479, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08914048969745636, "rewards/margins": 0.0908593013882637, "rewards/rejected": -0.17999979853630066, "step": 10120 }, { "epoch": 0.66, "learning_rate": 1.5411157954316784e-06, "logits/chosen": -2.2317793369293213, "logits/rejected": -2.131615161895752, "logps/chosen": -214.57431030273438, "logps/rejected": -216.745849609375, "loss": 0.0158, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0634838342666626, "rewards/margins": 0.07930682599544525, "rewards/rejected": -0.14279064536094666, "step": 10130 }, { "epoch": 0.66, "learning_rate": 1.535845092740843e-06, "logits/chosen": -2.3938117027282715, "logits/rejected": -2.235452175140381, "logps/chosen": -248.4803466796875, "logps/rejected": -271.53656005859375, "loss": 0.0297, "rewards/accuracies": 0.5, "rewards/chosen": -0.05579571798443794, "rewards/margins": 0.0533018484711647, "rewards/rejected": -0.10909757763147354, "step": 10140 }, { "epoch": 0.66, "learning_rate": 1.5305794195543005e-06, "logits/chosen": -2.250174045562744, "logits/rejected": -2.3086235523223877, "logps/chosen": -221.4529571533203, "logps/rejected": -223.0041961669922, "loss": 0.0335, "rewards/accuracies": 0.625, "rewards/chosen": -0.07136930525302887, "rewards/margins": 0.0924132838845253, "rewards/rejected": -0.16378256678581238, "step": 10150 }, { "epoch": 0.66, "learning_rate": 1.5253188033403816e-06, "logits/chosen": -2.2710773944854736, "logits/rejected": -2.3633055686950684, "logps/chosen": -183.15744018554688, "logps/rejected": -208.3464813232422, "loss": 0.0237, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06211277097463608, "rewards/margins": 0.0426534041762352, "rewards/rejected": -0.10476617515087128, "step": 10160 }, { "epoch": 0.67, "learning_rate": 1.520063271541037e-06, "logits/chosen": -2.240269660949707, "logits/rejected": -2.1465556621551514, "logps/chosen": -193.79080200195312, "logps/rejected": -208.65560913085938, "loss": 0.0198, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07643021643161774, "rewards/margins": 0.13083912432193756, "rewards/rejected": -0.2072693407535553, "step": 10170 }, { "epoch": 0.67, "learning_rate": 1.5148128515716954e-06, "logits/chosen": -2.4676265716552734, "logits/rejected": -1.8373692035675049, "logps/chosen": -275.50396728515625, "logps/rejected": -226.16683959960938, "loss": 0.0294, "rewards/accuracies": 0.75, "rewards/chosen": -0.044873807579278946, "rewards/margins": 0.11518532037734985, "rewards/rejected": -0.1600591242313385, "step": 10180 }, { "epoch": 0.67, "learning_rate": 1.5095675708211197e-06, "logits/chosen": -2.286296844482422, "logits/rejected": -2.276829719543457, "logps/chosen": -211.50582885742188, "logps/rejected": -243.75173950195312, "loss": 0.0499, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10172464698553085, "rewards/margins": 0.04451145976781845, "rewards/rejected": -0.1462361216545105, "step": 10190 }, { "epoch": 0.67, "learning_rate": 1.504327456651263e-06, "logits/chosen": -2.2106895446777344, "logits/rejected": -2.136573076248169, "logps/chosen": -275.1043701171875, "logps/rejected": -267.5289306640625, "loss": 0.0159, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06850683689117432, "rewards/margins": 0.09694625437259674, "rewards/rejected": -0.16545307636260986, "step": 10200 }, { "epoch": 0.67, "eval_logits/chosen": -2.2463204860687256, "eval_logits/rejected": -2.0610575675964355, "eval_logps/chosen": -244.7626495361328, "eval_logps/rejected": -242.60560607910156, "eval_loss": 0.02451149746775627, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -0.06378858536481857, "eval_rewards/margins": 0.09117982536554337, "eval_rewards/rejected": -0.15496839582920074, "eval_runtime": 715.3275, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 10200 }, { "epoch": 0.67, "learning_rate": 1.4990925363971284e-06, "logits/chosen": -2.304161310195923, "logits/rejected": -1.8916089534759521, "logps/chosen": -304.38824462890625, "logps/rejected": -271.1502685546875, "loss": 0.0352, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07232534885406494, "rewards/margins": 0.176255464553833, "rewards/rejected": -0.24858078360557556, "step": 10210 }, { "epoch": 0.67, "learning_rate": 1.4938628373666236e-06, "logits/chosen": -2.175797939300537, "logits/rejected": -2.2267374992370605, "logps/chosen": -186.3314666748047, "logps/rejected": -202.7518768310547, "loss": 0.0369, "rewards/accuracies": 0.625, "rewards/chosen": -0.06876092404127121, "rewards/margins": 0.0727389007806778, "rewards/rejected": -0.1414998173713684, "step": 10220 }, { "epoch": 0.67, "learning_rate": 1.4886383868404203e-06, "logits/chosen": -2.0863027572631836, "logits/rejected": -1.9565706253051758, "logps/chosen": -175.46356201171875, "logps/rejected": -183.82199096679688, "loss": 0.0168, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07120905071496964, "rewards/margins": 0.09194178879261017, "rewards/rejected": -0.1631508320569992, "step": 10230 }, { "epoch": 0.67, "learning_rate": 1.483419212071813e-06, "logits/chosen": -2.034952163696289, "logits/rejected": -1.865007996559143, "logps/chosen": -202.6958770751953, "logps/rejected": -209.85952758789062, "loss": 0.035, "rewards/accuracies": 0.625, "rewards/chosen": -0.07115866988897324, "rewards/margins": 0.07172514498233795, "rewards/rejected": -0.1428838074207306, "step": 10240 }, { "epoch": 0.67, "learning_rate": 1.478205340286573e-06, "logits/chosen": -2.1753857135772705, "logits/rejected": -2.151221990585327, "logps/chosen": -227.4921417236328, "logps/rejected": -222.22811889648438, "loss": 0.0441, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11941616237163544, "rewards/margins": 0.0637040063738823, "rewards/rejected": -0.18312017619609833, "step": 10250 }, { "epoch": 0.67, "learning_rate": 1.4729967986828104e-06, "logits/chosen": -2.3345413208007812, "logits/rejected": -2.1017842292785645, "logps/chosen": -332.4571533203125, "logps/rejected": -302.4991760253906, "loss": 0.0283, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05236706882715225, "rewards/margins": 0.09005019068717957, "rewards/rejected": -0.14241725206375122, "step": 10260 }, { "epoch": 0.67, "learning_rate": 1.4677936144308286e-06, "logits/chosen": -2.3077831268310547, "logits/rejected": -2.01301646232605, "logps/chosen": -235.05453491210938, "logps/rejected": -231.1743621826172, "loss": 0.0282, "rewards/accuracies": 0.75, "rewards/chosen": -0.054049067199230194, "rewards/margins": 0.12091416120529175, "rewards/rejected": -0.17496320605278015, "step": 10270 }, { "epoch": 0.67, "learning_rate": 1.4625958146729864e-06, "logits/chosen": -2.3322410583496094, "logits/rejected": -2.154740571975708, "logps/chosen": -232.1531982421875, "logps/rejected": -233.8138427734375, "loss": 0.021, "rewards/accuracies": 0.625, "rewards/chosen": -0.059446774423122406, "rewards/margins": 0.08804331719875336, "rewards/rejected": -0.14749008417129517, "step": 10280 }, { "epoch": 0.67, "learning_rate": 1.4574034265235523e-06, "logits/chosen": -2.413435459136963, "logits/rejected": -1.8607642650604248, "logps/chosen": -266.5216979980469, "logps/rejected": -190.62388610839844, "loss": 0.0369, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0534658320248127, "rewards/margins": 0.10932525247335434, "rewards/rejected": -0.16279107332229614, "step": 10290 }, { "epoch": 0.67, "learning_rate": 1.452216477068568e-06, "logits/chosen": -2.2776665687561035, "logits/rejected": -1.7751963138580322, "logps/chosen": -241.0727996826172, "logps/rejected": -170.58828735351562, "loss": 0.018, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.055139027535915375, "rewards/margins": 0.11215144395828247, "rewards/rejected": -0.16729044914245605, "step": 10300 }, { "epoch": 0.67, "eval_logits/chosen": -2.255398988723755, "eval_logits/rejected": -2.0697996616363525, "eval_logps/chosen": -243.80836486816406, "eval_logps/rejected": -240.55059814453125, "eval_loss": 0.024353496730327606, "eval_rewards/accuracies": 0.6614999771118164, "eval_rewards/chosen": -0.05901704356074333, "eval_rewards/margins": 0.08567636460065842, "eval_rewards/rejected": -0.14469340443611145, "eval_runtime": 714.1324, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 10300 }, { "epoch": 0.67, "learning_rate": 1.4470349933657004e-06, "logits/chosen": -2.4610915184020996, "logits/rejected": -2.2262465953826904, "logps/chosen": -233.5832977294922, "logps/rejected": -225.6370849609375, "loss": 0.0275, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06426848471164703, "rewards/margins": 0.08016308397054672, "rewards/rejected": -0.14443157613277435, "step": 10310 }, { "epoch": 0.68, "learning_rate": 1.4418590024441096e-06, "logits/chosen": -2.3426105976104736, "logits/rejected": -1.9475975036621094, "logps/chosen": -261.2298278808594, "logps/rejected": -212.2733917236328, "loss": 0.0216, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0388479009270668, "rewards/margins": 0.08764450252056122, "rewards/rejected": -0.12649241089820862, "step": 10320 }, { "epoch": 0.68, "learning_rate": 1.436688531304297e-06, "logits/chosen": -2.340837001800537, "logits/rejected": -2.0119991302490234, "logps/chosen": -226.4020538330078, "logps/rejected": -240.67123413085938, "loss": 0.026, "rewards/accuracies": 0.625, "rewards/chosen": -0.040372543036937714, "rewards/margins": 0.09879190474748611, "rewards/rejected": -0.13916444778442383, "step": 10330 }, { "epoch": 0.68, "learning_rate": 1.431523606917974e-06, "logits/chosen": -2.163691282272339, "logits/rejected": -2.15777850151062, "logps/chosen": -223.59619140625, "logps/rejected": -248.87796020507812, "loss": 0.0276, "rewards/accuracies": 0.625, "rewards/chosen": -0.09248442202806473, "rewards/margins": 0.09333528578281403, "rewards/rejected": -0.18581970036029816, "step": 10340 }, { "epoch": 0.68, "learning_rate": 1.4263642562279162e-06, "logits/chosen": -1.9505666494369507, "logits/rejected": -1.9221442937850952, "logps/chosen": -263.3883361816406, "logps/rejected": -291.3192138671875, "loss": 0.0182, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06950507313013077, "rewards/margins": 0.09720407426357269, "rewards/rejected": -0.16670915484428406, "step": 10350 }, { "epoch": 0.68, "learning_rate": 1.4212105061478257e-06, "logits/chosen": -2.009002447128296, "logits/rejected": -2.0214123725891113, "logps/chosen": -245.10855102539062, "logps/rejected": -272.1604309082031, "loss": 0.0218, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07609008252620697, "rewards/margins": 0.09614210575819016, "rewards/rejected": -0.17223218083381653, "step": 10360 }, { "epoch": 0.68, "learning_rate": 1.4160623835621848e-06, "logits/chosen": -2.3645236492156982, "logits/rejected": -2.2268776893615723, "logps/chosen": -243.3690948486328, "logps/rejected": -249.6556854248047, "loss": 0.0173, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03028956614434719, "rewards/margins": 0.08882386982440948, "rewards/rejected": -0.11911344528198242, "step": 10370 }, { "epoch": 0.68, "learning_rate": 1.4109199153261249e-06, "logits/chosen": -2.1503076553344727, "logits/rejected": -2.042171001434326, "logps/chosen": -286.06243896484375, "logps/rejected": -275.43499755859375, "loss": 0.0148, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05033920332789421, "rewards/margins": 0.09843063354492188, "rewards/rejected": -0.14876984059810638, "step": 10380 }, { "epoch": 0.68, "learning_rate": 1.405783128265278e-06, "logits/chosen": -2.221115827560425, "logits/rejected": -2.198319673538208, "logps/chosen": -217.111328125, "logps/rejected": -228.8067169189453, "loss": 0.0177, "rewards/accuracies": 0.625, "rewards/chosen": -0.06488905847072601, "rewards/margins": 0.07480791211128235, "rewards/rejected": -0.13969698548316956, "step": 10390 }, { "epoch": 0.68, "learning_rate": 1.4006520491756427e-06, "logits/chosen": -2.3132808208465576, "logits/rejected": -2.080735683441162, "logps/chosen": -205.646484375, "logps/rejected": -161.129638671875, "loss": 0.0144, "rewards/accuracies": 0.625, "rewards/chosen": -0.04711119830608368, "rewards/margins": 0.10062043368816376, "rewards/rejected": -0.14773163199424744, "step": 10400 }, { "epoch": 0.68, "eval_logits/chosen": -2.2488584518432617, "eval_logits/rejected": -2.0630223751068115, "eval_logps/chosen": -239.70639038085938, "eval_logps/rejected": -236.77072143554688, "eval_loss": 0.024509282782673836, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.03850714862346649, "eval_rewards/margins": 0.08728697150945663, "eval_rewards/rejected": -0.12579411268234253, "eval_runtime": 714.6697, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 10400 }, { "epoch": 0.68, "learning_rate": 1.39552670482344e-06, "logits/chosen": -2.1409950256347656, "logits/rejected": -2.213547706604004, "logps/chosen": -184.26515197753906, "logps/rejected": -197.73403930664062, "loss": 0.0188, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.048327818512916565, "rewards/margins": 0.06788711249828339, "rewards/rejected": -0.11621493101119995, "step": 10410 }, { "epoch": 0.68, "learning_rate": 1.3904071219449776e-06, "logits/chosen": -2.226022243499756, "logits/rejected": -1.7965023517608643, "logps/chosen": -202.78274536132812, "logps/rejected": -136.0773162841797, "loss": 0.0232, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.014405476860702038, "rewards/margins": 0.08977358788251877, "rewards/rejected": -0.10417908430099487, "step": 10420 }, { "epoch": 0.68, "learning_rate": 1.3852933272465068e-06, "logits/chosen": -2.3943543434143066, "logits/rejected": -2.1661648750305176, "logps/chosen": -241.92822265625, "logps/rejected": -220.4378662109375, "loss": 0.0254, "rewards/accuracies": 0.5, "rewards/chosen": -0.020340552553534508, "rewards/margins": 0.06866742670536041, "rewards/rejected": -0.08900798857212067, "step": 10430 }, { "epoch": 0.68, "learning_rate": 1.3801853474040873e-06, "logits/chosen": -2.1801600456237793, "logits/rejected": -2.1125564575195312, "logps/chosen": -246.2391357421875, "logps/rejected": -248.90744018554688, "loss": 0.0248, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03858952969312668, "rewards/margins": 0.09564275294542313, "rewards/rejected": -0.1342322826385498, "step": 10440 }, { "epoch": 0.68, "learning_rate": 1.3750832090634417e-06, "logits/chosen": -2.3373451232910156, "logits/rejected": -2.034323215484619, "logps/chosen": -191.03738403320312, "logps/rejected": -193.2030487060547, "loss": 0.0102, "rewards/accuracies": 0.625, "rewards/chosen": -0.024323513731360435, "rewards/margins": 0.08408524841070175, "rewards/rejected": -0.10840876400470734, "step": 10450 }, { "epoch": 0.68, "learning_rate": 1.3699869388398245e-06, "logits/chosen": -2.199007034301758, "logits/rejected": -2.0577285289764404, "logps/chosen": -220.73062133789062, "logps/rejected": -220.3302459716797, "loss": 0.0181, "rewards/accuracies": 0.625, "rewards/chosen": -0.03854639083147049, "rewards/margins": 0.09026548266410828, "rewards/rejected": -0.12881188094615936, "step": 10460 }, { "epoch": 0.69, "learning_rate": 1.3648965633178772e-06, "logits/chosen": -2.2158031463623047, "logits/rejected": -2.126948118209839, "logps/chosen": -209.24462890625, "logps/rejected": -239.54849243164062, "loss": 0.0313, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04256223887205124, "rewards/margins": 0.09446152299642563, "rewards/rejected": -0.13702376186847687, "step": 10470 }, { "epoch": 0.69, "learning_rate": 1.3598121090514938e-06, "logits/chosen": -2.203119993209839, "logits/rejected": -2.0843684673309326, "logps/chosen": -190.83518981933594, "logps/rejected": -183.73692321777344, "loss": 0.0274, "rewards/accuracies": 0.625, "rewards/chosen": -0.03913657367229462, "rewards/margins": 0.08512511104345322, "rewards/rejected": -0.12426167726516724, "step": 10480 }, { "epoch": 0.69, "learning_rate": 1.3547336025636753e-06, "logits/chosen": -2.131678342819214, "logits/rejected": -1.9140796661376953, "logps/chosen": -285.03436279296875, "logps/rejected": -258.30731201171875, "loss": 0.0166, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04868927597999573, "rewards/margins": 0.07400977611541748, "rewards/rejected": -0.1226990595459938, "step": 10490 }, { "epoch": 0.69, "learning_rate": 1.3496610703464022e-06, "logits/chosen": -2.2886691093444824, "logits/rejected": -2.0513675212860107, "logps/chosen": -238.70556640625, "logps/rejected": -210.661376953125, "loss": 0.0273, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.047455064952373505, "rewards/margins": 0.08738575875759125, "rewards/rejected": -0.13484081625938416, "step": 10500 }, { "epoch": 0.69, "eval_logits/chosen": -2.2537121772766113, "eval_logits/rejected": -2.067767858505249, "eval_logps/chosen": -240.62744140625, "eval_logps/rejected": -237.07449340820312, "eval_loss": 0.024398881942033768, "eval_rewards/accuracies": 0.656499981880188, "eval_rewards/chosen": -0.043112535029649734, "eval_rewards/margins": 0.084200419485569, "eval_rewards/rejected": -0.12731294333934784, "eval_runtime": 713.601, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 10500 }, { "epoch": 0.69, "learning_rate": 1.3445945388604848e-06, "logits/chosen": -2.1545567512512207, "logits/rejected": -1.904229760169983, "logps/chosen": -255.2687530517578, "logps/rejected": -235.4828338623047, "loss": 0.0265, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08522634953260422, "rewards/margins": 0.11687376350164413, "rewards/rejected": -0.20210011303424835, "step": 10510 }, { "epoch": 0.69, "learning_rate": 1.3395340345354358e-06, "logits/chosen": -2.1851556301116943, "logits/rejected": -2.310978412628174, "logps/chosen": -235.08981323242188, "logps/rejected": -270.3477478027344, "loss": 0.0253, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05695996806025505, "rewards/margins": 0.09005177766084671, "rewards/rejected": -0.14701174199581146, "step": 10520 }, { "epoch": 0.69, "learning_rate": 1.334479583769322e-06, "logits/chosen": -2.4132397174835205, "logits/rejected": -2.111905336380005, "logps/chosen": -263.5672912597656, "logps/rejected": -223.38528442382812, "loss": 0.0273, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04364245384931564, "rewards/margins": 0.048938460648059845, "rewards/rejected": -0.09258091449737549, "step": 10530 }, { "epoch": 0.69, "learning_rate": 1.3294312129286366e-06, "logits/chosen": -2.2596263885498047, "logits/rejected": -2.141056537628174, "logps/chosen": -272.45751953125, "logps/rejected": -269.8136291503906, "loss": 0.0186, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.028644507750868797, "rewards/margins": 0.05513488128781319, "rewards/rejected": -0.08377937972545624, "step": 10540 }, { "epoch": 0.69, "learning_rate": 1.324388948348153e-06, "logits/chosen": -2.411508560180664, "logits/rejected": -2.021793842315674, "logps/chosen": -291.7307434082031, "logps/rejected": -228.37838745117188, "loss": 0.025, "rewards/accuracies": 0.625, "rewards/chosen": -0.03246372565627098, "rewards/margins": 0.09007063508033752, "rewards/rejected": -0.1225343719124794, "step": 10550 }, { "epoch": 0.69, "learning_rate": 1.319352816330796e-06, "logits/chosen": -2.5005040168762207, "logits/rejected": -1.985607385635376, "logps/chosen": -291.51397705078125, "logps/rejected": -204.1352081298828, "loss": 0.0218, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04158360883593559, "rewards/margins": 0.0953831896185875, "rewards/rejected": -0.13696682453155518, "step": 10560 }, { "epoch": 0.69, "learning_rate": 1.314322843147494e-06, "logits/chosen": -2.103975296020508, "logits/rejected": -2.1999995708465576, "logps/chosen": -186.9462432861328, "logps/rejected": -256.3164978027344, "loss": 0.0222, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08310200273990631, "rewards/margins": 0.07251256704330444, "rewards/rejected": -0.15561453998088837, "step": 10570 }, { "epoch": 0.69, "learning_rate": 1.3092990550370526e-06, "logits/chosen": -2.3676490783691406, "logits/rejected": -2.032197952270508, "logps/chosen": -357.0168151855469, "logps/rejected": -287.89605712890625, "loss": 0.0162, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05629213899374008, "rewards/margins": 0.08115239441394806, "rewards/rejected": -0.13744454085826874, "step": 10580 }, { "epoch": 0.69, "learning_rate": 1.3042814782060131e-06, "logits/chosen": -2.2966275215148926, "logits/rejected": -2.005599021911621, "logps/chosen": -182.35781860351562, "logps/rejected": -181.06105041503906, "loss": 0.0119, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.004274829290807247, "rewards/margins": 0.11309751123189926, "rewards/rejected": -0.11737234890460968, "step": 10590 }, { "epoch": 0.69, "learning_rate": 1.2992701388285112e-06, "logits/chosen": -2.3099424839019775, "logits/rejected": -2.059408664703369, "logps/chosen": -274.67364501953125, "logps/rejected": -245.584228515625, "loss": 0.0194, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.026622802019119263, "rewards/margins": 0.0769127830862999, "rewards/rejected": -0.10353559255599976, "step": 10600 }, { "epoch": 0.69, "eval_logits/chosen": -2.2543232440948486, "eval_logits/rejected": -2.0683767795562744, "eval_logps/chosen": -240.60284423828125, "eval_logps/rejected": -237.0673370361328, "eval_loss": 0.024293892085552216, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -0.04298943653702736, "eval_rewards/margins": 0.08428782224655151, "eval_rewards/rejected": -0.12727726995944977, "eval_runtime": 714.2376, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 10600 }, { "epoch": 0.69, "learning_rate": 1.29426506304615e-06, "logits/chosen": -2.1544384956359863, "logits/rejected": -2.087916374206543, "logps/chosen": -239.1622314453125, "logps/rejected": -227.13082885742188, "loss": 0.0438, "rewards/accuracies": 0.625, "rewards/chosen": -0.08219398558139801, "rewards/margins": 0.03723324462771416, "rewards/rejected": -0.11942724138498306, "step": 10610 }, { "epoch": 0.69, "learning_rate": 1.289266276967855e-06, "logits/chosen": -2.313107967376709, "logits/rejected": -2.1951651573181152, "logps/chosen": -346.13580322265625, "logps/rejected": -270.1081848144531, "loss": 0.0207, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.037769950926303864, "rewards/margins": 0.07065282016992569, "rewards/rejected": -0.10842277109622955, "step": 10620 }, { "epoch": 0.7, "learning_rate": 1.284273806669745e-06, "logits/chosen": -2.264178514480591, "logits/rejected": -2.0449130535125732, "logps/chosen": -255.4210968017578, "logps/rejected": -291.5980224609375, "loss": 0.0198, "rewards/accuracies": 0.625, "rewards/chosen": -0.0804806649684906, "rewards/margins": 0.09318158775568008, "rewards/rejected": -0.17366227507591248, "step": 10630 }, { "epoch": 0.7, "learning_rate": 1.2792876781949884e-06, "logits/chosen": -2.0060455799102783, "logits/rejected": -1.7587082386016846, "logps/chosen": -216.16738891601562, "logps/rejected": -214.92654418945312, "loss": 0.0319, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.050473641604185104, "rewards/margins": 0.09038424491882324, "rewards/rejected": -0.14085790514945984, "step": 10640 }, { "epoch": 0.7, "learning_rate": 1.274307917553676e-06, "logits/chosen": -2.256579637527466, "logits/rejected": -2.1760025024414062, "logps/chosen": -207.838623046875, "logps/rejected": -258.51043701171875, "loss": 0.0265, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06089919060468674, "rewards/margins": 0.13036146759986877, "rewards/rejected": -0.1912606656551361, "step": 10650 }, { "epoch": 0.7, "learning_rate": 1.2693345507226767e-06, "logits/chosen": -2.0505454540252686, "logits/rejected": -2.1315135955810547, "logps/chosen": -245.26171875, "logps/rejected": -271.7505798339844, "loss": 0.0171, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07635726779699326, "rewards/margins": 0.12187687307596207, "rewards/rejected": -0.19823415577411652, "step": 10660 }, { "epoch": 0.7, "learning_rate": 1.2643676036455099e-06, "logits/chosen": -2.3000295162200928, "logits/rejected": -2.206305742263794, "logps/chosen": -299.7503967285156, "logps/rejected": -259.10772705078125, "loss": 0.0212, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.059489428997039795, "rewards/margins": 0.05049192160367966, "rewards/rejected": -0.10998135805130005, "step": 10670 }, { "epoch": 0.7, "learning_rate": 1.259407102232203e-06, "logits/chosen": -2.3993871212005615, "logits/rejected": -2.0211315155029297, "logps/chosen": -288.9398498535156, "logps/rejected": -235.0491180419922, "loss": 0.0156, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05652741715312004, "rewards/margins": 0.09526638686656952, "rewards/rejected": -0.15179380774497986, "step": 10680 }, { "epoch": 0.7, "learning_rate": 1.254453072359163e-06, "logits/chosen": -2.3295562267303467, "logits/rejected": -2.1101839542388916, "logps/chosen": -242.13516235351562, "logps/rejected": -235.0244903564453, "loss": 0.0192, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04057559370994568, "rewards/margins": 0.07377767562866211, "rewards/rejected": -0.11435327678918839, "step": 10690 }, { "epoch": 0.7, "learning_rate": 1.2495055398690337e-06, "logits/chosen": -2.418180465698242, "logits/rejected": -2.1741397380828857, "logps/chosen": -237.8803253173828, "logps/rejected": -240.59848022460938, "loss": 0.0199, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04012968763709068, "rewards/margins": 0.06000664830207825, "rewards/rejected": -0.10013633966445923, "step": 10700 }, { "epoch": 0.7, "eval_logits/chosen": -2.2555792331695557, "eval_logits/rejected": -2.069556713104248, "eval_logps/chosen": -240.78065490722656, "eval_logps/rejected": -236.7907257080078, "eval_loss": 0.02439829520881176, "eval_rewards/accuracies": 0.659500002861023, "eval_rewards/chosen": -0.04387851804494858, "eval_rewards/margins": 0.08201548457145691, "eval_rewards/rejected": -0.1258939951658249, "eval_runtime": 716.3713, "eval_samples_per_second": 2.792, "eval_steps_per_second": 1.396, "step": 10700 }, { "epoch": 0.7, "learning_rate": 1.2445645305705718e-06, "logits/chosen": -2.4278831481933594, "logits/rejected": -2.1002821922302246, "logps/chosen": -218.99618530273438, "logps/rejected": -210.4351806640625, "loss": 0.0272, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06337618827819824, "rewards/margins": 0.06040935590863228, "rewards/rejected": -0.12378554046154022, "step": 10710 }, { "epoch": 0.7, "learning_rate": 1.2396300702384995e-06, "logits/chosen": -2.4178760051727295, "logits/rejected": -2.1771225929260254, "logps/chosen": -267.5671081542969, "logps/rejected": -238.9056396484375, "loss": 0.0135, "rewards/accuracies": 0.625, "rewards/chosen": -0.04879522696137428, "rewards/margins": 0.045019179582595825, "rewards/rejected": -0.09381439536809921, "step": 10720 }, { "epoch": 0.7, "learning_rate": 1.234702184613381e-06, "logits/chosen": -2.0416836738586426, "logits/rejected": -2.115546226501465, "logps/chosen": -224.7384490966797, "logps/rejected": -239.72744750976562, "loss": 0.0159, "rewards/accuracies": 0.625, "rewards/chosen": -0.046280622482299805, "rewards/margins": 0.07662250101566315, "rewards/rejected": -0.12290313094854355, "step": 10730 }, { "epoch": 0.7, "learning_rate": 1.2297808994014793e-06, "logits/chosen": -2.3566012382507324, "logits/rejected": -2.110050678253174, "logps/chosen": -292.0399475097656, "logps/rejected": -270.0953369140625, "loss": 0.0192, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.030525337904691696, "rewards/margins": 0.0647398829460144, "rewards/rejected": -0.0952652245759964, "step": 10740 }, { "epoch": 0.7, "learning_rate": 1.2248662402746314e-06, "logits/chosen": -2.190117359161377, "logits/rejected": -2.0638766288757324, "logps/chosen": -206.7563934326172, "logps/rejected": -214.2916259765625, "loss": 0.0326, "rewards/accuracies": 0.625, "rewards/chosen": -0.08419866859912872, "rewards/margins": 0.06825531274080276, "rewards/rejected": -0.1524539738893509, "step": 10750 }, { "epoch": 0.7, "learning_rate": 1.2199582328701045e-06, "logits/chosen": -2.356628179550171, "logits/rejected": -1.8749473094940186, "logps/chosen": -301.6092529296875, "logps/rejected": -279.6309509277344, "loss": 0.0247, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05487877130508423, "rewards/margins": 0.09156282991170883, "rewards/rejected": -0.14644160866737366, "step": 10760 }, { "epoch": 0.7, "learning_rate": 1.2150569027904712e-06, "logits/chosen": -2.2430715560913086, "logits/rejected": -2.1345303058624268, "logps/chosen": -255.2360076904297, "logps/rejected": -265.4273681640625, "loss": 0.0386, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.046262722462415695, "rewards/margins": 0.08247127383947372, "rewards/rejected": -0.12873399257659912, "step": 10770 }, { "epoch": 0.71, "learning_rate": 1.2101622756034688e-06, "logits/chosen": -2.2396297454833984, "logits/rejected": -2.224565029144287, "logps/chosen": -230.2205810546875, "logps/rejected": -216.84671020507812, "loss": 0.031, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03733275085687637, "rewards/margins": 0.08495677262544632, "rewards/rejected": -0.1222895160317421, "step": 10780 }, { "epoch": 0.71, "learning_rate": 1.2052743768418715e-06, "logits/chosen": -2.295220136642456, "logits/rejected": -2.0792322158813477, "logps/chosen": -252.6884002685547, "logps/rejected": -236.13229370117188, "loss": 0.0098, "rewards/accuracies": 0.625, "rewards/chosen": -0.024396592751145363, "rewards/margins": 0.08725408464670181, "rewards/rejected": -0.11165066808462143, "step": 10790 }, { "epoch": 0.71, "learning_rate": 1.2003932320033523e-06, "logits/chosen": -2.3979673385620117, "logits/rejected": -2.0977725982666016, "logps/chosen": -231.81423950195312, "logps/rejected": -254.93875122070312, "loss": 0.0349, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02655757963657379, "rewards/margins": 0.11246329545974731, "rewards/rejected": -0.1390208601951599, "step": 10800 }, { "epoch": 0.71, "eval_logits/chosen": -2.2533140182495117, "eval_logits/rejected": -2.0673305988311768, "eval_logps/chosen": -239.88389587402344, "eval_logps/rejected": -236.12091064453125, "eval_loss": 0.02448507584631443, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.03939465060830116, "eval_rewards/margins": 0.08315033465623856, "eval_rewards/rejected": -0.12254498898983002, "eval_runtime": 714.098, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 10800 }, { "epoch": 0.71, "learning_rate": 1.1955188665503553e-06, "logits/chosen": -2.064089059829712, "logits/rejected": -2.1178905963897705, "logps/chosen": -217.57968139648438, "logps/rejected": -211.15603637695312, "loss": 0.0306, "rewards/accuracies": 0.625, "rewards/chosen": -0.06227996200323105, "rewards/margins": 0.07183237373828888, "rewards/rejected": -0.13411234319210052, "step": 10810 }, { "epoch": 0.71, "learning_rate": 1.1906513059099566e-06, "logits/chosen": -2.2473361492156982, "logits/rejected": -1.9340236186981201, "logps/chosen": -237.6453857421875, "logps/rejected": -251.6685028076172, "loss": 0.0182, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.030781585723161697, "rewards/margins": 0.11381824314594269, "rewards/rejected": -0.1445998251438141, "step": 10820 }, { "epoch": 0.71, "learning_rate": 1.185790575473738e-06, "logits/chosen": -2.1922922134399414, "logits/rejected": -1.9812322854995728, "logps/chosen": -237.2852325439453, "logps/rejected": -210.5954132080078, "loss": 0.0278, "rewards/accuracies": 0.625, "rewards/chosen": -0.040759555995464325, "rewards/margins": 0.1029219776391983, "rewards/rejected": -0.14368152618408203, "step": 10830 }, { "epoch": 0.71, "learning_rate": 1.1809367005976516e-06, "logits/chosen": -2.258755683898926, "logits/rejected": -2.0178275108337402, "logps/chosen": -290.83380126953125, "logps/rejected": -230.97933959960938, "loss": 0.0348, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.041695039719343185, "rewards/margins": 0.05596474930644035, "rewards/rejected": -0.09765978157520294, "step": 10840 }, { "epoch": 0.71, "learning_rate": 1.1760897066018842e-06, "logits/chosen": -2.1914420127868652, "logits/rejected": -2.0563783645629883, "logps/chosen": -229.926513671875, "logps/rejected": -243.2219696044922, "loss": 0.0136, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03442706912755966, "rewards/margins": 0.09646196663379669, "rewards/rejected": -0.13088904321193695, "step": 10850 }, { "epoch": 0.71, "learning_rate": 1.1712496187707327e-06, "logits/chosen": -2.219277858734131, "logits/rejected": -1.9796173572540283, "logps/chosen": -254.57894897460938, "logps/rejected": -267.26800537109375, "loss": 0.0609, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07137357443571091, "rewards/margins": 0.1337740272283554, "rewards/rejected": -0.20514757931232452, "step": 10860 }, { "epoch": 0.71, "learning_rate": 1.1664164623524646e-06, "logits/chosen": -2.1844263076782227, "logits/rejected": -2.0241284370422363, "logps/chosen": -224.78298950195312, "logps/rejected": -212.51327514648438, "loss": 0.0336, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03203979507088661, "rewards/margins": 0.07993793487548828, "rewards/rejected": -0.11197773367166519, "step": 10870 }, { "epoch": 0.71, "learning_rate": 1.1615902625591926e-06, "logits/chosen": -2.1608195304870605, "logits/rejected": -2.0687127113342285, "logps/chosen": -240.38119506835938, "logps/rejected": -244.77163696289062, "loss": 0.0303, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07579996436834335, "rewards/margins": 0.06902037560939789, "rewards/rejected": -0.14482033252716064, "step": 10880 }, { "epoch": 0.71, "learning_rate": 1.156771044566738e-06, "logits/chosen": -2.2513537406921387, "logits/rejected": -2.177926540374756, "logps/chosen": -276.24200439453125, "logps/rejected": -247.43820190429688, "loss": 0.0151, "rewards/accuracies": 0.625, "rewards/chosen": -0.05813673138618469, "rewards/margins": 0.0720682367682457, "rewards/rejected": -0.1302049607038498, "step": 10890 }, { "epoch": 0.71, "learning_rate": 1.1519588335145037e-06, "logits/chosen": -2.1998701095581055, "logits/rejected": -2.3404908180236816, "logps/chosen": -214.55245971679688, "logps/rejected": -241.935302734375, "loss": 0.0294, "rewards/accuracies": 0.625, "rewards/chosen": -0.03486829251050949, "rewards/margins": 0.04341721534729004, "rewards/rejected": -0.07828550785779953, "step": 10900 }, { "epoch": 0.71, "eval_logits/chosen": -2.255387783050537, "eval_logits/rejected": -2.0696256160736084, "eval_logps/chosen": -241.1903839111328, "eval_logps/rejected": -236.889892578125, "eval_loss": 0.02456137165427208, "eval_rewards/accuracies": 0.6614999771118164, "eval_rewards/chosen": -0.04592716321349144, "eval_rewards/margins": 0.0804627314209938, "eval_rewards/rejected": -0.12638989090919495, "eval_runtime": 712.0708, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.404, "step": 10900 }, { "epoch": 0.71, "learning_rate": 1.1471536545053382e-06, "logits/chosen": -2.291710138320923, "logits/rejected": -2.275481700897217, "logps/chosen": -214.15664672851562, "logps/rejected": -240.5742950439453, "loss": 0.0439, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.025608647614717484, "rewards/margins": 0.094327412545681, "rewards/rejected": -0.11993608623743057, "step": 10910 }, { "epoch": 0.71, "learning_rate": 1.1423555326054112e-06, "logits/chosen": -2.1900248527526855, "logits/rejected": -1.9168975353240967, "logps/chosen": -289.61859130859375, "logps/rejected": -248.3450927734375, "loss": 0.018, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03392020985484123, "rewards/margins": 0.1608126163482666, "rewards/rejected": -0.19473282992839813, "step": 10920 }, { "epoch": 0.72, "learning_rate": 1.1375644928440743e-06, "logits/chosen": -2.3279836177825928, "logits/rejected": -1.9061031341552734, "logps/chosen": -244.189208984375, "logps/rejected": -197.33164978027344, "loss": 0.0143, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.037529680877923965, "rewards/margins": 0.10323164612054825, "rewards/rejected": -0.14076131582260132, "step": 10930 }, { "epoch": 0.72, "learning_rate": 1.1327805602137396e-06, "logits/chosen": -2.2742772102355957, "logits/rejected": -2.094223976135254, "logps/chosen": -272.142333984375, "logps/rejected": -235.36026000976562, "loss": 0.0187, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0669911801815033, "rewards/margins": 0.0809796154499054, "rewards/rejected": -0.1479707956314087, "step": 10940 }, { "epoch": 0.72, "learning_rate": 1.1280037596697426e-06, "logits/chosen": -2.121037006378174, "logits/rejected": -2.0819196701049805, "logps/chosen": -233.88143920898438, "logps/rejected": -297.5550231933594, "loss": 0.0321, "rewards/accuracies": 0.75, "rewards/chosen": -0.06499499827623367, "rewards/margins": 0.1337338089942932, "rewards/rejected": -0.1987287998199463, "step": 10950 }, { "epoch": 0.72, "learning_rate": 1.123234116130216e-06, "logits/chosen": -2.1778290271759033, "logits/rejected": -2.141162872314453, "logps/chosen": -196.35324096679688, "logps/rejected": -225.81494140625, "loss": 0.0302, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05232428386807442, "rewards/margins": 0.11703711748123169, "rewards/rejected": -0.16936138272285461, "step": 10960 }, { "epoch": 0.72, "learning_rate": 1.1184716544759553e-06, "logits/chosen": -2.033923625946045, "logits/rejected": -2.009354591369629, "logps/chosen": -174.05067443847656, "logps/rejected": -196.1552734375, "loss": 0.0388, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.043432433158159256, "rewards/margins": 0.04845009371638298, "rewards/rejected": -0.09188252687454224, "step": 10970 }, { "epoch": 0.72, "learning_rate": 1.1137163995502948e-06, "logits/chosen": -2.4753432273864746, "logits/rejected": -2.243244171142578, "logps/chosen": -228.891357421875, "logps/rejected": -211.28121948242188, "loss": 0.0177, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0367816723883152, "rewards/margins": 0.07545877248048782, "rewards/rejected": -0.11224043369293213, "step": 10980 }, { "epoch": 0.72, "learning_rate": 1.1089683761589717e-06, "logits/chosen": -2.096439838409424, "logits/rejected": -1.9385840892791748, "logps/chosen": -244.000732421875, "logps/rejected": -249.7571258544922, "loss": 0.0134, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.01897195540368557, "rewards/margins": 0.12335314601659775, "rewards/rejected": -0.14232513308525085, "step": 10990 }, { "epoch": 0.72, "learning_rate": 1.1042276090700044e-06, "logits/chosen": -2.251804828643799, "logits/rejected": -2.199323892593384, "logps/chosen": -226.4453582763672, "logps/rejected": -269.26055908203125, "loss": 0.0493, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08071614801883698, "rewards/margins": 0.06322924047708511, "rewards/rejected": -0.1439453810453415, "step": 11000 }, { "epoch": 0.72, "eval_logits/chosen": -2.257098436355591, "eval_logits/rejected": -2.0713884830474854, "eval_logps/chosen": -240.1348876953125, "eval_logps/rejected": -235.5289306640625, "eval_loss": 0.02467404119670391, "eval_rewards/accuracies": 0.6554999947547913, "eval_rewards/chosen": -0.04064975306391716, "eval_rewards/margins": 0.07893543690443039, "eval_rewards/rejected": -0.11958518624305725, "eval_runtime": 712.198, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 11000 }, { "epoch": 0.72, "learning_rate": 1.0994941230135536e-06, "logits/chosen": -2.217406749725342, "logits/rejected": -1.925784707069397, "logps/chosen": -237.1549835205078, "logps/rejected": -228.53414916992188, "loss": 0.0112, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.02152491733431816, "rewards/margins": 0.1371946781873703, "rewards/rejected": -0.15871959924697876, "step": 11010 }, { "epoch": 0.72, "learning_rate": 1.094767942681804e-06, "logits/chosen": -2.495387554168701, "logits/rejected": -2.0764713287353516, "logps/chosen": -265.5306701660156, "logps/rejected": -244.12820434570312, "loss": 0.0315, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08830820024013519, "rewards/margins": 0.08899056166410446, "rewards/rejected": -0.17729876935482025, "step": 11020 }, { "epoch": 0.72, "learning_rate": 1.0900490927288248e-06, "logits/chosen": -2.0295703411102295, "logits/rejected": -1.984675645828247, "logps/chosen": -272.8847351074219, "logps/rejected": -235.7385711669922, "loss": 0.0242, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06208237260580063, "rewards/margins": 0.06165067106485367, "rewards/rejected": -0.1237330436706543, "step": 11030 }, { "epoch": 0.72, "learning_rate": 1.0853375977704511e-06, "logits/chosen": -2.2647392749786377, "logits/rejected": -2.0776801109313965, "logps/chosen": -249.8405303955078, "logps/rejected": -199.8957061767578, "loss": 0.0273, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.047079846262931824, "rewards/margins": 0.07032942026853561, "rewards/rejected": -0.11740926653146744, "step": 11040 }, { "epoch": 0.72, "learning_rate": 1.0806334823841466e-06, "logits/chosen": -2.110968589782715, "logits/rejected": -2.217334747314453, "logps/chosen": -254.81399536132812, "logps/rejected": -295.74554443359375, "loss": 0.0395, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.07599040865898132, "rewards/margins": 0.05764209106564522, "rewards/rejected": -0.13363249599933624, "step": 11050 }, { "epoch": 0.72, "learning_rate": 1.0759367711088825e-06, "logits/chosen": -2.1156725883483887, "logits/rejected": -2.253272294998169, "logps/chosen": -192.5499725341797, "logps/rejected": -236.18276977539062, "loss": 0.0224, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.043587371706962585, "rewards/margins": 0.05077790096402168, "rewards/rejected": -0.09436526894569397, "step": 11060 }, { "epoch": 0.72, "learning_rate": 1.0712474884450056e-06, "logits/chosen": -2.2297728061676025, "logits/rejected": -2.032270908355713, "logps/chosen": -209.87161254882812, "logps/rejected": -199.3758544921875, "loss": 0.0533, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.035191889852285385, "rewards/margins": 0.09145514667034149, "rewards/rejected": -0.12664702534675598, "step": 11070 }, { "epoch": 0.72, "learning_rate": 1.066565658854112e-06, "logits/chosen": -2.174771547317505, "logits/rejected": -2.1367924213409424, "logps/chosen": -139.6210479736328, "logps/rejected": -152.4537811279297, "loss": 0.0249, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.058945734053850174, "rewards/margins": 0.0803261548280716, "rewards/rejected": -0.13927188515663147, "step": 11080 }, { "epoch": 0.73, "learning_rate": 1.0618913067589165e-06, "logits/chosen": -2.283958673477173, "logits/rejected": -2.0666098594665527, "logps/chosen": -222.7408905029297, "logps/rejected": -204.1822509765625, "loss": 0.0378, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.034630995243787766, "rewards/margins": 0.09450625628232956, "rewards/rejected": -0.12913724780082703, "step": 11090 }, { "epoch": 0.73, "learning_rate": 1.0572244565431313e-06, "logits/chosen": -2.13905668258667, "logits/rejected": -2.020878553390503, "logps/chosen": -157.81869506835938, "logps/rejected": -179.3304901123047, "loss": 0.0186, "rewards/accuracies": 0.75, "rewards/chosen": -0.06200949102640152, "rewards/margins": 0.08175288140773773, "rewards/rejected": -0.14376236498355865, "step": 11100 }, { "epoch": 0.73, "eval_logits/chosen": -2.2600183486938477, "eval_logits/rejected": -2.0741000175476074, "eval_logps/chosen": -239.24647521972656, "eval_logps/rejected": -235.19859313964844, "eval_loss": 0.02463115192949772, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.036207519471645355, "eval_rewards/margins": 0.08172591030597687, "eval_rewards/rejected": -0.11793343722820282, "eval_runtime": 712.2662, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 11100 }, { "epoch": 0.73, "learning_rate": 1.0525651325513317e-06, "logits/chosen": -2.2700681686401367, "logits/rejected": -2.2490804195404053, "logps/chosen": -337.8574523925781, "logps/rejected": -326.71588134765625, "loss": 0.0254, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03293894976377487, "rewards/margins": 0.05006919056177139, "rewards/rejected": -0.08300813287496567, "step": 11110 }, { "epoch": 0.73, "learning_rate": 1.0479133590888351e-06, "logits/chosen": -2.3007349967956543, "logits/rejected": -2.011467695236206, "logps/chosen": -262.3280334472656, "logps/rejected": -254.04037475585938, "loss": 0.0181, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03436540812253952, "rewards/margins": 0.09342274814844131, "rewards/rejected": -0.12778815627098083, "step": 11120 }, { "epoch": 0.73, "learning_rate": 1.0432691604215695e-06, "logits/chosen": -2.1729531288146973, "logits/rejected": -2.0845744609832764, "logps/chosen": -236.9325714111328, "logps/rejected": -216.92691040039062, "loss": 0.0292, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.00942798238247633, "rewards/margins": 0.058537401258945465, "rewards/rejected": -0.06796539574861526, "step": 11130 }, { "epoch": 0.73, "learning_rate": 1.0386325607759515e-06, "logits/chosen": -2.188417911529541, "logits/rejected": -2.1244304180145264, "logps/chosen": -193.04457092285156, "logps/rejected": -190.75962829589844, "loss": 0.0263, "rewards/accuracies": 0.625, "rewards/chosen": -0.008272209204733372, "rewards/margins": 0.08892752230167389, "rewards/rejected": -0.09719973802566528, "step": 11140 }, { "epoch": 0.73, "learning_rate": 1.0340035843387544e-06, "logits/chosen": -2.2836594581604004, "logits/rejected": -1.9597225189208984, "logps/chosen": -187.1991729736328, "logps/rejected": -174.34954833984375, "loss": 0.0164, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.037318091839551926, "rewards/margins": 0.07366606593132019, "rewards/rejected": -0.11098414659500122, "step": 11150 }, { "epoch": 0.73, "learning_rate": 1.0293822552569887e-06, "logits/chosen": -2.401132106781006, "logits/rejected": -2.1063313484191895, "logps/chosen": -257.7044372558594, "logps/rejected": -222.4474639892578, "loss": 0.0185, "rewards/accuracies": 0.625, "rewards/chosen": -0.02910296991467476, "rewards/margins": 0.10286752879619598, "rewards/rejected": -0.13197049498558044, "step": 11160 }, { "epoch": 0.73, "learning_rate": 1.0247685976377688e-06, "logits/chosen": -2.1574559211730957, "logits/rejected": -1.9853312969207764, "logps/chosen": -191.28790283203125, "logps/rejected": -168.7615966796875, "loss": 0.0258, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04801440238952637, "rewards/margins": 0.08152531087398529, "rewards/rejected": -0.12953971326351166, "step": 11170 }, { "epoch": 0.73, "learning_rate": 1.0201626355481939e-06, "logits/chosen": -2.3552398681640625, "logits/rejected": -2.0675008296966553, "logps/chosen": -225.4108428955078, "logps/rejected": -189.56910705566406, "loss": 0.0137, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04645168036222458, "rewards/margins": 0.08859079331159592, "rewards/rejected": -0.1350424736738205, "step": 11180 }, { "epoch": 0.73, "learning_rate": 1.0155643930152192e-06, "logits/chosen": -2.370053768157959, "logits/rejected": -2.282047986984253, "logps/chosen": -281.9950866699219, "logps/rejected": -237.9798126220703, "loss": 0.0163, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04693884029984474, "rewards/margins": 0.05315285921096802, "rewards/rejected": -0.10009171068668365, "step": 11190 }, { "epoch": 0.73, "learning_rate": 1.0109738940255286e-06, "logits/chosen": -2.197000503540039, "logits/rejected": -1.9323844909667969, "logps/chosen": -222.72396850585938, "logps/rejected": -200.8384246826172, "loss": 0.0233, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.028707217425107956, "rewards/margins": 0.07697827368974686, "rewards/rejected": -0.10568549484014511, "step": 11200 }, { "epoch": 0.73, "eval_logits/chosen": -2.260996103286743, "eval_logits/rejected": -2.0750019550323486, "eval_logps/chosen": -237.5009002685547, "eval_logps/rejected": -233.3054962158203, "eval_loss": 0.02466612309217453, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.027479784563183784, "eval_rewards/margins": 0.08098819851875305, "eval_rewards/rejected": -0.10846797376871109, "eval_runtime": 713.1565, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 11200 }, { "epoch": 0.73, "learning_rate": 1.0063911625254155e-06, "logits/chosen": -2.264378070831299, "logits/rejected": -2.126863718032837, "logps/chosen": -234.0228729248047, "logps/rejected": -250.18081665039062, "loss": 0.0234, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.009498958475887775, "rewards/margins": 0.07764261215925217, "rewards/rejected": -0.06814365088939667, "step": 11210 }, { "epoch": 0.73, "learning_rate": 1.0018162224206502e-06, "logits/chosen": -2.174161434173584, "logits/rejected": -2.084477663040161, "logps/chosen": -183.6377410888672, "logps/rejected": -209.85507202148438, "loss": 0.0199, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.045814670622348785, "rewards/margins": 0.11253632605075836, "rewards/rejected": -0.15835098922252655, "step": 11220 }, { "epoch": 0.73, "learning_rate": 9.97249097576363e-07, "logits/chosen": -2.424260377883911, "logits/rejected": -2.1379024982452393, "logps/chosen": -238.74502563476562, "logps/rejected": -215.4193878173828, "loss": 0.0288, "rewards/accuracies": 0.75, "rewards/chosen": -0.028442194685339928, "rewards/margins": 0.10083095729351044, "rewards/rejected": -0.12927314639091492, "step": 11230 }, { "epoch": 0.74, "learning_rate": 9.92689811816913e-07, "logits/chosen": -2.290501594543457, "logits/rejected": -2.0555148124694824, "logps/chosen": -225.33627319335938, "logps/rejected": -193.66452026367188, "loss": 0.0369, "rewards/accuracies": 0.625, "rewards/chosen": -0.06407545506954193, "rewards/margins": 0.07865617424249649, "rewards/rejected": -0.14273162186145782, "step": 11240 }, { "epoch": 0.74, "learning_rate": 9.881383889257691e-07, "logits/chosen": -2.2328104972839355, "logits/rejected": -2.2730355262756348, "logps/chosen": -174.77767944335938, "logps/rejected": -251.2799530029297, "loss": 0.0145, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.027403950691223145, "rewards/margins": 0.06211910769343376, "rewards/rejected": -0.0895230621099472, "step": 11250 }, { "epoch": 0.74, "learning_rate": 9.835948526453817e-07, "logits/chosen": -2.0998029708862305, "logits/rejected": -2.2713968753814697, "logps/chosen": -190.5878448486328, "logps/rejected": -244.7119598388672, "loss": 0.0368, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05079245567321777, "rewards/margins": 0.052261579781770706, "rewards/rejected": -0.10305403172969818, "step": 11260 }, { "epoch": 0.74, "learning_rate": 9.790592266770633e-07, "logits/chosen": -2.4601409435272217, "logits/rejected": -2.1775612831115723, "logps/chosen": -276.0139465332031, "logps/rejected": -262.95062255859375, "loss": 0.0285, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03334607556462288, "rewards/margins": 0.07587692886590958, "rewards/rejected": -0.10922299325466156, "step": 11270 }, { "epoch": 0.74, "learning_rate": 9.745315346808584e-07, "logits/chosen": -2.1334099769592285, "logits/rejected": -1.9849653244018555, "logps/chosen": -223.7794952392578, "logps/rejected": -214.8672637939453, "loss": 0.0326, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.02352016046643257, "rewards/margins": 0.0673544630408287, "rewards/rejected": -0.09087462723255157, "step": 11280 }, { "epoch": 0.74, "learning_rate": 9.70011800275428e-07, "logits/chosen": -2.1849100589752197, "logits/rejected": -2.1144683361053467, "logps/chosen": -245.4765625, "logps/rejected": -275.33837890625, "loss": 0.022, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03975829482078552, "rewards/margins": 0.09207911789417267, "rewards/rejected": -0.1318374127149582, "step": 11290 }, { "epoch": 0.74, "learning_rate": 9.655000470379206e-07, "logits/chosen": -2.104613780975342, "logits/rejected": -2.0167109966278076, "logps/chosen": -219.29104614257812, "logps/rejected": -230.323486328125, "loss": 0.0218, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04471781104803085, "rewards/margins": 0.10857494175434113, "rewards/rejected": -0.15329274535179138, "step": 11300 }, { "epoch": 0.74, "eval_logits/chosen": -2.262881278991699, "eval_logits/rejected": -2.0764498710632324, "eval_logps/chosen": -239.4001007080078, "eval_logps/rejected": -235.6196746826172, "eval_loss": 0.02440127171576023, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.036975663155317307, "eval_rewards/margins": 0.08306314796209335, "eval_rewards/rejected": -0.12003880739212036, "eval_runtime": 713.3374, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 11300 }, { "epoch": 0.74, "learning_rate": 9.609962985038517e-07, "logits/chosen": -2.3760733604431152, "logits/rejected": -1.9836667776107788, "logps/chosen": -223.32772827148438, "logps/rejected": -223.2099609375, "loss": 0.0285, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03983340784907341, "rewards/margins": 0.1250358521938324, "rewards/rejected": -0.1648692637681961, "step": 11310 }, { "epoch": 0.74, "learning_rate": 9.565005781669786e-07, "logits/chosen": -2.4255800247192383, "logits/rejected": -2.0649266242980957, "logps/chosen": -265.18695068359375, "logps/rejected": -232.25265502929688, "loss": 0.0244, "rewards/accuracies": 0.75, "rewards/chosen": -0.022981833666563034, "rewards/margins": 0.10189278423786163, "rewards/rejected": -0.12487462908029556, "step": 11320 }, { "epoch": 0.74, "learning_rate": 9.520129094791822e-07, "logits/chosen": -2.2258224487304688, "logits/rejected": -2.0585074424743652, "logps/chosen": -191.6188507080078, "logps/rejected": -201.65322875976562, "loss": 0.0376, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06061561033129692, "rewards/margins": 0.10118802636861801, "rewards/rejected": -0.16180363297462463, "step": 11330 }, { "epoch": 0.74, "learning_rate": 9.475333158503389e-07, "logits/chosen": -2.216360569000244, "logits/rejected": -1.9243495464324951, "logps/chosen": -221.27688598632812, "logps/rejected": -187.52163696289062, "loss": 0.0248, "rewards/accuracies": 0.625, "rewards/chosen": -0.017273029312491417, "rewards/margins": 0.057016678154468536, "rewards/rejected": -0.0742897093296051, "step": 11340 }, { "epoch": 0.74, "learning_rate": 9.430618206482053e-07, "logits/chosen": -2.187056064605713, "logits/rejected": -2.1244726181030273, "logps/chosen": -144.8751220703125, "logps/rejected": -155.0688018798828, "loss": 0.0175, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.022261008620262146, "rewards/margins": 0.047990910708904266, "rewards/rejected": -0.07025192677974701, "step": 11350 }, { "epoch": 0.74, "learning_rate": 9.385984471982892e-07, "logits/chosen": -2.168158531188965, "logits/rejected": -1.8052467107772827, "logps/chosen": -222.80551147460938, "logps/rejected": -199.8382568359375, "loss": 0.0185, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03989826887845993, "rewards/margins": 0.1388367861509323, "rewards/rejected": -0.17873504757881165, "step": 11360 }, { "epoch": 0.74, "learning_rate": 9.341432187837343e-07, "logits/chosen": -2.2368786334991455, "logits/rejected": -2.144381046295166, "logps/chosen": -196.49130249023438, "logps/rejected": -232.60446166992188, "loss": 0.0365, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.030937695875763893, "rewards/margins": 0.1062239557504654, "rewards/rejected": -0.13716165721416473, "step": 11370 }, { "epoch": 0.74, "learning_rate": 9.29696158645193e-07, "logits/chosen": -2.1705503463745117, "logits/rejected": -2.2710890769958496, "logps/chosen": -225.888427734375, "logps/rejected": -282.481201171875, "loss": 0.0152, "rewards/accuracies": 0.75, "rewards/chosen": -0.023930717259645462, "rewards/margins": 0.13801470398902893, "rewards/rejected": -0.1619454324245453, "step": 11380 }, { "epoch": 0.75, "learning_rate": 9.252572899807111e-07, "logits/chosen": -2.2330169677734375, "logits/rejected": -2.245069742202759, "logps/chosen": -278.10125732421875, "logps/rejected": -275.082763671875, "loss": 0.0087, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.03750836104154587, "rewards/margins": 0.11584211885929108, "rewards/rejected": -0.15335044264793396, "step": 11390 }, { "epoch": 0.75, "learning_rate": 9.208266359456003e-07, "logits/chosen": -2.317948818206787, "logits/rejected": -2.0604913234710693, "logps/chosen": -209.72067260742188, "logps/rejected": -233.62484741210938, "loss": 0.0365, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.01957458071410656, "rewards/margins": 0.07166720926761627, "rewards/rejected": -0.09124179929494858, "step": 11400 }, { "epoch": 0.75, "eval_logits/chosen": -2.2583706378936768, "eval_logits/rejected": -2.0719101428985596, "eval_logps/chosen": -239.11155700683594, "eval_logps/rejected": -236.07205200195312, "eval_loss": 0.024521106854081154, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.03553308546543121, "eval_rewards/margins": 0.08676765114068985, "eval_rewards/rejected": -0.12230074405670166, "eval_runtime": 715.1436, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 11400 }, { "epoch": 0.75, "learning_rate": 9.164042196523229e-07, "logits/chosen": -2.4343605041503906, "logits/rejected": -2.129931688308716, "logps/chosen": -198.1632843017578, "logps/rejected": -204.4161834716797, "loss": 0.0197, "rewards/accuracies": 0.625, "rewards/chosen": -0.043811190873384476, "rewards/margins": 0.09885282069444656, "rewards/rejected": -0.14266401529312134, "step": 11410 }, { "epoch": 0.75, "learning_rate": 9.119900641703696e-07, "logits/chosen": -2.3990368843078613, "logits/rejected": -2.1311213970184326, "logps/chosen": -223.1060028076172, "logps/rejected": -191.94911193847656, "loss": 0.0274, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04829835519194603, "rewards/margins": 0.07589010894298553, "rewards/rejected": -0.12418848276138306, "step": 11420 }, { "epoch": 0.75, "learning_rate": 9.075841925261364e-07, "logits/chosen": -2.5026679039001465, "logits/rejected": -2.263340711593628, "logps/chosen": -242.7382049560547, "logps/rejected": -243.7560577392578, "loss": 0.047, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.02409534528851509, "rewards/margins": 0.07723324745893478, "rewards/rejected": -0.10132858902215958, "step": 11430 }, { "epoch": 0.75, "learning_rate": 9.031866277028093e-07, "logits/chosen": -2.1907057762145996, "logits/rejected": -2.2167811393737793, "logps/chosen": -199.39405822753906, "logps/rejected": -242.85537719726562, "loss": 0.0152, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.041596420109272, "rewards/margins": 0.083258256316185, "rewards/rejected": -0.1248546615242958, "step": 11440 }, { "epoch": 0.75, "learning_rate": 8.987973926402391e-07, "logits/chosen": -2.1543126106262207, "logits/rejected": -2.1936657428741455, "logps/chosen": -219.5368194580078, "logps/rejected": -235.53662109375, "loss": 0.0358, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.035986222326755524, "rewards/margins": 0.08293595165014267, "rewards/rejected": -0.1189221516251564, "step": 11450 }, { "epoch": 0.75, "learning_rate": 8.944165102348273e-07, "logits/chosen": -2.370535373687744, "logits/rejected": -2.220362424850464, "logps/chosen": -157.20303344726562, "logps/rejected": -187.8885498046875, "loss": 0.0548, "rewards/accuracies": 0.625, "rewards/chosen": -0.0012795614311471581, "rewards/margins": 0.11714081466197968, "rewards/rejected": -0.11842037737369537, "step": 11460 }, { "epoch": 0.75, "learning_rate": 8.900440033394018e-07, "logits/chosen": -2.189187526702881, "logits/rejected": -2.238858461380005, "logps/chosen": -187.5149383544922, "logps/rejected": -192.39752197265625, "loss": 0.0235, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.024993527680635452, "rewards/margins": 0.07084139436483383, "rewards/rejected": -0.09583492577075958, "step": 11470 }, { "epoch": 0.75, "learning_rate": 8.856798947631009e-07, "logits/chosen": -2.2251715660095215, "logits/rejected": -2.2451891899108887, "logps/chosen": -195.27099609375, "logps/rejected": -234.95126342773438, "loss": 0.0211, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.021976452320814133, "rewards/margins": 0.1136414036154747, "rewards/rejected": -0.13561786711215973, "step": 11480 }, { "epoch": 0.75, "learning_rate": 8.813242072712519e-07, "logits/chosen": -1.963451623916626, "logits/rejected": -1.8482892513275146, "logps/chosen": -173.51779174804688, "logps/rejected": -187.3588409423828, "loss": 0.0343, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05827740579843521, "rewards/margins": 0.06352510303258896, "rewards/rejected": -0.12180250883102417, "step": 11490 }, { "epoch": 0.75, "learning_rate": 8.769769635852557e-07, "logits/chosen": -2.191650867462158, "logits/rejected": -2.2480854988098145, "logps/chosen": -219.7031707763672, "logps/rejected": -196.14064025878906, "loss": 0.0199, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.021976929157972336, "rewards/margins": 0.06597265601158142, "rewards/rejected": -0.08794957399368286, "step": 11500 }, { "epoch": 0.75, "eval_logits/chosen": -2.26953387260437, "eval_logits/rejected": -2.0827267169952393, "eval_logps/chosen": -238.35740661621094, "eval_logps/rejected": -233.97021484375, "eval_loss": 0.02457558736205101, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -0.03176227957010269, "eval_rewards/margins": 0.08002925664186478, "eval_rewards/rejected": -0.11179153621196747, "eval_runtime": 714.8856, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 11500 }, { "epoch": 0.75, "learning_rate": 8.726381863824635e-07, "logits/chosen": -2.4292683601379395, "logits/rejected": -2.1044631004333496, "logps/chosen": -289.0771789550781, "logps/rejected": -236.4252166748047, "loss": 0.0179, "rewards/accuracies": 0.625, "rewards/chosen": -0.020000828430056572, "rewards/margins": 0.07377125322818756, "rewards/rejected": -0.09377209842205048, "step": 11510 }, { "epoch": 0.75, "learning_rate": 8.683078982960638e-07, "logits/chosen": -2.181670665740967, "logits/rejected": -1.8756259679794312, "logps/chosen": -236.1549072265625, "logps/rejected": -207.24911499023438, "loss": 0.0372, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05734957009553909, "rewards/margins": 0.10592573881149292, "rewards/rejected": -0.1632753163576126, "step": 11520 }, { "epoch": 0.75, "learning_rate": 8.639861219149584e-07, "logits/chosen": -2.033862352371216, "logits/rejected": -2.08524751663208, "logps/chosen": -274.317626953125, "logps/rejected": -250.8245849609375, "loss": 0.0265, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05167793110013008, "rewards/margins": 0.09715747833251953, "rewards/rejected": -0.14883539080619812, "step": 11530 }, { "epoch": 0.76, "learning_rate": 8.596728797836532e-07, "logits/chosen": -2.1555728912353516, "logits/rejected": -2.002525806427002, "logps/chosen": -220.53543090820312, "logps/rejected": -275.8230285644531, "loss": 0.0266, "rewards/accuracies": 0.75, "rewards/chosen": -0.026133406907320023, "rewards/margins": 0.1284731775522232, "rewards/rejected": -0.15460659563541412, "step": 11540 }, { "epoch": 0.76, "learning_rate": 8.553681944021294e-07, "logits/chosen": -2.2165935039520264, "logits/rejected": -2.242164134979248, "logps/chosen": -244.7801513671875, "logps/rejected": -243.2750701904297, "loss": 0.0177, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.028480708599090576, "rewards/margins": 0.08921106159687042, "rewards/rejected": -0.117691770195961, "step": 11550 }, { "epoch": 0.76, "learning_rate": 8.510720882257365e-07, "logits/chosen": -1.984035849571228, "logits/rejected": -2.1254215240478516, "logps/chosen": -166.98123168945312, "logps/rejected": -233.5638885498047, "loss": 0.0191, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.028403252363204956, "rewards/margins": 0.12675362825393677, "rewards/rejected": -0.15515688061714172, "step": 11560 }, { "epoch": 0.76, "learning_rate": 8.467845836650667e-07, "logits/chosen": -1.8751825094223022, "logits/rejected": -1.9234832525253296, "logps/chosen": -218.1288299560547, "logps/rejected": -242.38735961914062, "loss": 0.0294, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04931117966771126, "rewards/margins": 0.08851714432239532, "rewards/rejected": -0.13782832026481628, "step": 11570 }, { "epoch": 0.76, "learning_rate": 8.425057030858461e-07, "logits/chosen": -2.068479537963867, "logits/rejected": -1.9056812524795532, "logps/chosen": -162.3206329345703, "logps/rejected": -210.03085327148438, "loss": 0.0166, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03132368624210358, "rewards/margins": 0.09343663603067398, "rewards/rejected": -0.12476031482219696, "step": 11580 }, { "epoch": 0.76, "learning_rate": 8.382354688088098e-07, "logits/chosen": -2.2166271209716797, "logits/rejected": -2.080953359603882, "logps/chosen": -167.2935791015625, "logps/rejected": -197.07273864746094, "loss": 0.0335, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04569109529256821, "rewards/margins": 0.07178109884262085, "rewards/rejected": -0.11747218668460846, "step": 11590 }, { "epoch": 0.76, "learning_rate": 8.33973903109594e-07, "logits/chosen": -2.3416852951049805, "logits/rejected": -2.0910236835479736, "logps/chosen": -227.5818328857422, "logps/rejected": -217.0053253173828, "loss": 0.0296, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05492377281188965, "rewards/margins": 0.10492531955242157, "rewards/rejected": -0.15984909236431122, "step": 11600 }, { "epoch": 0.76, "eval_logits/chosen": -2.2632830142974854, "eval_logits/rejected": -2.0765445232391357, "eval_logps/chosen": -240.4170684814453, "eval_logps/rejected": -237.5938262939453, "eval_loss": 0.024378182366490364, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.042060501873493195, "eval_rewards/margins": 0.08784911781549454, "eval_rewards/rejected": -0.12990963459014893, "eval_runtime": 712.6642, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 11600 }, { "epoch": 0.76, "learning_rate": 8.297210282186102e-07, "logits/chosen": -2.1106295585632324, "logits/rejected": -2.084667682647705, "logps/chosen": -246.1740264892578, "logps/rejected": -282.34100341796875, "loss": 0.0178, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08078263700008392, "rewards/margins": 0.07400176674127579, "rewards/rejected": -0.1547844111919403, "step": 11610 }, { "epoch": 0.76, "learning_rate": 8.254768663209397e-07, "logits/chosen": -2.20991849899292, "logits/rejected": -2.009641170501709, "logps/chosen": -286.65081787109375, "logps/rejected": -235.0749969482422, "loss": 0.0329, "rewards/accuracies": 0.625, "rewards/chosen": -0.040355004370212555, "rewards/margins": 0.05447888374328613, "rewards/rejected": -0.09483388066291809, "step": 11620 }, { "epoch": 0.76, "learning_rate": 8.212414395562079e-07, "logits/chosen": -2.0545597076416016, "logits/rejected": -2.1798338890075684, "logps/chosen": -242.8502655029297, "logps/rejected": -279.1524353027344, "loss": 0.0339, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06318067014217377, "rewards/margins": 0.06190643832087517, "rewards/rejected": -0.12508711218833923, "step": 11630 }, { "epoch": 0.76, "learning_rate": 8.170147700184775e-07, "logits/chosen": -2.2550766468048096, "logits/rejected": -2.138669490814209, "logps/chosen": -262.5296325683594, "logps/rejected": -272.13238525390625, "loss": 0.0237, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.030391475185751915, "rewards/margins": 0.09267780184745789, "rewards/rejected": -0.12306930124759674, "step": 11640 }, { "epoch": 0.76, "learning_rate": 8.127968797561242e-07, "logits/chosen": -2.3019351959228516, "logits/rejected": -2.041527271270752, "logps/chosen": -235.93344116210938, "logps/rejected": -237.5359344482422, "loss": 0.0236, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05981367081403732, "rewards/margins": 0.12012721598148346, "rewards/rejected": -0.17994089424610138, "step": 11650 }, { "epoch": 0.76, "learning_rate": 8.085877907717338e-07, "logits/chosen": -2.1951727867126465, "logits/rejected": -2.1553750038146973, "logps/chosen": -228.0797576904297, "logps/rejected": -237.4129638671875, "loss": 0.0148, "rewards/accuracies": 0.625, "rewards/chosen": -0.03584109619259834, "rewards/margins": 0.10506229102611542, "rewards/rejected": -0.14090339839458466, "step": 11660 }, { "epoch": 0.76, "learning_rate": 8.043875250219732e-07, "logits/chosen": -2.1565158367156982, "logits/rejected": -2.0909366607666016, "logps/chosen": -242.97103881835938, "logps/rejected": -230.5181427001953, "loss": 0.039, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0678316205739975, "rewards/margins": 0.04307982325553894, "rewards/rejected": -0.11091144382953644, "step": 11670 }, { "epoch": 0.76, "learning_rate": 8.001961044174881e-07, "logits/chosen": -2.3242721557617188, "logits/rejected": -2.117072343826294, "logps/chosen": -240.4780731201172, "logps/rejected": -192.73492431640625, "loss": 0.0309, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06006825715303421, "rewards/margins": 0.047224875539541245, "rewards/rejected": -0.10729314386844635, "step": 11680 }, { "epoch": 0.76, "learning_rate": 7.960135508227795e-07, "logits/chosen": -2.3332180976867676, "logits/rejected": -1.9855926036834717, "logps/chosen": -301.40838623046875, "logps/rejected": -253.2394256591797, "loss": 0.0265, "rewards/accuracies": 0.625, "rewards/chosen": -0.03769667446613312, "rewards/margins": 0.08073518425226212, "rewards/rejected": -0.11843186616897583, "step": 11690 }, { "epoch": 0.77, "learning_rate": 7.91839886056098e-07, "logits/chosen": -2.3475003242492676, "logits/rejected": -2.0893478393554688, "logps/chosen": -295.53472900390625, "logps/rejected": -281.19122314453125, "loss": 0.015, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.058862458914518356, "rewards/margins": 0.08113612979650497, "rewards/rejected": -0.13999858498573303, "step": 11700 }, { "epoch": 0.77, "eval_logits/chosen": -2.2643589973449707, "eval_logits/rejected": -2.0778892040252686, "eval_logps/chosen": -241.74781799316406, "eval_logps/rejected": -237.93856811523438, "eval_loss": 0.02438133768737316, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -0.048714205622673035, "eval_rewards/margins": 0.0829191654920578, "eval_rewards/rejected": -0.13163337111473083, "eval_runtime": 713.7867, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 11700 }, { "epoch": 0.77, "learning_rate": 7.876751318893217e-07, "logits/chosen": -2.1693975925445557, "logits/rejected": -1.9223251342773438, "logps/chosen": -247.9962615966797, "logps/rejected": -241.5628662109375, "loss": 0.0229, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05065304785966873, "rewards/margins": 0.07577352225780487, "rewards/rejected": -0.1264265775680542, "step": 11710 }, { "epoch": 0.77, "learning_rate": 7.8351931004785e-07, "logits/chosen": -2.1445133686065674, "logits/rejected": -1.7985641956329346, "logps/chosen": -218.8597412109375, "logps/rejected": -211.9443817138672, "loss": 0.0214, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.051157396286726, "rewards/margins": 0.08983974158763885, "rewards/rejected": -0.14099714159965515, "step": 11720 }, { "epoch": 0.77, "learning_rate": 7.793724422104834e-07, "logits/chosen": -2.0111899375915527, "logits/rejected": -2.158306121826172, "logps/chosen": -218.9979705810547, "logps/rejected": -309.1783447265625, "loss": 0.0314, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0508684441447258, "rewards/margins": 0.10351568460464478, "rewards/rejected": -0.15438412129878998, "step": 11730 }, { "epoch": 0.77, "learning_rate": 7.752345500093184e-07, "logits/chosen": -2.3120059967041016, "logits/rejected": -2.278259038925171, "logps/chosen": -227.6975555419922, "logps/rejected": -209.49868774414062, "loss": 0.046, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08771263062953949, "rewards/margins": 0.057827867567539215, "rewards/rejected": -0.14554047584533691, "step": 11740 }, { "epoch": 0.77, "learning_rate": 7.711056550296253e-07, "logits/chosen": -2.363168239593506, "logits/rejected": -2.191441535949707, "logps/chosen": -253.12173461914062, "logps/rejected": -236.3367156982422, "loss": 0.0425, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.03163955360651016, "rewards/margins": 0.10120024532079697, "rewards/rejected": -0.13283979892730713, "step": 11750 }, { "epoch": 0.77, "learning_rate": 7.669857788097445e-07, "logits/chosen": -2.0963737964630127, "logits/rejected": -1.8543342351913452, "logps/chosen": -176.55654907226562, "logps/rejected": -223.7884063720703, "loss": 0.0171, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07495290786027908, "rewards/margins": 0.10303473472595215, "rewards/rejected": -0.17798765003681183, "step": 11760 }, { "epoch": 0.77, "learning_rate": 7.628749428409676e-07, "logits/chosen": -2.360349178314209, "logits/rejected": -1.9492822885513306, "logps/chosen": -251.5398406982422, "logps/rejected": -211.39425659179688, "loss": 0.0438, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07326027005910873, "rewards/margins": 0.09302968531847, "rewards/rejected": -0.16628995537757874, "step": 11770 }, { "epoch": 0.77, "learning_rate": 7.587731685674288e-07, "logits/chosen": -2.24495792388916, "logits/rejected": -2.2814254760742188, "logps/chosen": -283.14324951171875, "logps/rejected": -308.06915283203125, "loss": 0.0081, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04777499660849571, "rewards/margins": 0.08544452488422394, "rewards/rejected": -0.13321951031684875, "step": 11780 }, { "epoch": 0.77, "learning_rate": 7.546804773859931e-07, "logits/chosen": -2.340010166168213, "logits/rejected": -2.1141135692596436, "logps/chosen": -238.205322265625, "logps/rejected": -242.54702758789062, "loss": 0.0164, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05582636594772339, "rewards/margins": 0.12138841301202774, "rewards/rejected": -0.17721477150917053, "step": 11790 }, { "epoch": 0.77, "learning_rate": 7.505968906461409e-07, "logits/chosen": -2.241865634918213, "logits/rejected": -2.102313280105591, "logps/chosen": -255.6861114501953, "logps/rejected": -248.14163208007812, "loss": 0.0127, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07373024523258209, "rewards/margins": 0.08300880342721939, "rewards/rejected": -0.1567390412092209, "step": 11800 }, { "epoch": 0.77, "eval_logits/chosen": -2.2653133869171143, "eval_logits/rejected": -2.0786798000335693, "eval_logps/chosen": -243.9700927734375, "eval_logps/rejected": -240.0970916748047, "eval_loss": 0.024359513074159622, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.05982571840286255, "eval_rewards/margins": 0.08260022103786469, "eval_rewards/rejected": -0.14242593944072723, "eval_runtime": 711.8556, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 11800 }, { "epoch": 0.77, "learning_rate": 7.465224296498627e-07, "logits/chosen": -2.3098983764648438, "logits/rejected": -1.9284107685089111, "logps/chosen": -245.07510375976562, "logps/rejected": -218.8394317626953, "loss": 0.0285, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.060580063611269, "rewards/margins": 0.07446667551994324, "rewards/rejected": -0.13504675030708313, "step": 11810 }, { "epoch": 0.77, "learning_rate": 7.424571156515412e-07, "logits/chosen": -2.1790266036987305, "logits/rejected": -2.16428542137146, "logps/chosen": -188.22915649414062, "logps/rejected": -232.13473510742188, "loss": 0.0285, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.052619803696870804, "rewards/margins": 0.10614506900310516, "rewards/rejected": -0.15876488387584686, "step": 11820 }, { "epoch": 0.77, "learning_rate": 7.38400969857847e-07, "logits/chosen": -2.130056858062744, "logits/rejected": -1.9207099676132202, "logps/chosen": -205.91537475585938, "logps/rejected": -237.07766723632812, "loss": 0.0335, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11386485397815704, "rewards/margins": 0.14005056023597717, "rewards/rejected": -0.253915399312973, "step": 11830 }, { "epoch": 0.77, "learning_rate": 7.343540134276225e-07, "logits/chosen": -2.262645721435547, "logits/rejected": -2.1876039505004883, "logps/chosen": -178.30035400390625, "logps/rejected": -195.77682495117188, "loss": 0.0253, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03794369101524353, "rewards/margins": 0.07790088653564453, "rewards/rejected": -0.11584459245204926, "step": 11840 }, { "epoch": 0.78, "learning_rate": 7.303162674717762e-07, "logits/chosen": -2.230045795440674, "logits/rejected": -1.846605658531189, "logps/chosen": -228.3832550048828, "logps/rejected": -187.96006774902344, "loss": 0.0383, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09077741205692291, "rewards/margins": 0.07706119865179062, "rewards/rejected": -0.16783861815929413, "step": 11850 }, { "epoch": 0.78, "learning_rate": 7.26287753053167e-07, "logits/chosen": -2.1979362964630127, "logits/rejected": -2.1252353191375732, "logps/chosen": -278.9451904296875, "logps/rejected": -292.9638671875, "loss": 0.0257, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06984533369541168, "rewards/margins": 0.07643438875675201, "rewards/rejected": -0.1462797075510025, "step": 11860 }, { "epoch": 0.78, "learning_rate": 7.222684911865013e-07, "logits/chosen": -2.305039644241333, "logits/rejected": -2.314790964126587, "logps/chosen": -218.77285766601562, "logps/rejected": -262.61029052734375, "loss": 0.0322, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05802080035209656, "rewards/margins": 0.1107601672410965, "rewards/rejected": -0.16878096759319305, "step": 11870 }, { "epoch": 0.78, "learning_rate": 7.182585028382166e-07, "logits/chosen": -2.3521711826324463, "logits/rejected": -2.054400682449341, "logps/chosen": -286.86614990234375, "logps/rejected": -272.48370361328125, "loss": 0.0308, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.053572773933410645, "rewards/margins": 0.09033741056919098, "rewards/rejected": -0.14391018450260162, "step": 11880 }, { "epoch": 0.78, "learning_rate": 7.142578089263769e-07, "logits/chosen": -2.3823986053466797, "logits/rejected": -2.061488389968872, "logps/chosen": -330.8357849121094, "logps/rejected": -289.43865966796875, "loss": 0.028, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06203612685203552, "rewards/margins": 0.08701418340206146, "rewards/rejected": -0.14905031025409698, "step": 11890 }, { "epoch": 0.78, "learning_rate": 7.102664303205611e-07, "logits/chosen": -2.2598845958709717, "logits/rejected": -2.024601697921753, "logps/chosen": -233.4888458251953, "logps/rejected": -232.8012237548828, "loss": 0.0199, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06463171541690826, "rewards/margins": 0.08594464510679245, "rewards/rejected": -0.1505763679742813, "step": 11900 }, { "epoch": 0.78, "eval_logits/chosen": -2.2626161575317383, "eval_logits/rejected": -2.075838565826416, "eval_logps/chosen": -243.8325653076172, "eval_logps/rejected": -240.6167755126953, "eval_loss": 0.02434389479458332, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.05913807079195976, "eval_rewards/margins": 0.08588622510433197, "eval_rewards/rejected": -0.14502428472042084, "eval_runtime": 716.2778, "eval_samples_per_second": 2.792, "eval_steps_per_second": 1.396, "step": 11900 }, { "epoch": 0.78, "learning_rate": 7.062843878417566e-07, "logits/chosen": -2.3879013061523438, "logits/rejected": -2.250087261199951, "logps/chosen": -231.5170440673828, "logps/rejected": -212.97030639648438, "loss": 0.0212, "rewards/accuracies": 0.625, "rewards/chosen": -0.040422223508358, "rewards/margins": 0.07248730212450027, "rewards/rejected": -0.11290953308343887, "step": 11910 }, { "epoch": 0.78, "learning_rate": 7.023117022622458e-07, "logits/chosen": -2.3014559745788574, "logits/rejected": -1.9482473134994507, "logps/chosen": -256.7848815917969, "logps/rejected": -249.9860382080078, "loss": 0.0267, "rewards/accuracies": 0.625, "rewards/chosen": -0.09134788066148758, "rewards/margins": 0.08234737813472748, "rewards/rejected": -0.17369526624679565, "step": 11920 }, { "epoch": 0.78, "learning_rate": 6.983483943055042e-07, "logits/chosen": -2.205634593963623, "logits/rejected": -2.0224173069000244, "logps/chosen": -293.17437744140625, "logps/rejected": -252.63803100585938, "loss": 0.0272, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06353868544101715, "rewards/margins": 0.0811837837100029, "rewards/rejected": -0.14472243189811707, "step": 11930 }, { "epoch": 0.78, "learning_rate": 6.943944846460859e-07, "logits/chosen": -2.2417654991149902, "logits/rejected": -2.1794562339782715, "logps/chosen": -228.4346923828125, "logps/rejected": -194.443603515625, "loss": 0.028, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.048401739448308945, "rewards/margins": 0.06375519186258316, "rewards/rejected": -0.112156942486763, "step": 11940 }, { "epoch": 0.78, "learning_rate": 6.904499939095225e-07, "logits/chosen": -2.2047362327575684, "logits/rejected": -2.151808977127075, "logps/chosen": -233.1191864013672, "logps/rejected": -239.6602020263672, "loss": 0.0145, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.053860194981098175, "rewards/margins": 0.10934630781412125, "rewards/rejected": -0.16320650279521942, "step": 11950 }, { "epoch": 0.78, "learning_rate": 6.865149426722079e-07, "logits/chosen": -2.176809310913086, "logits/rejected": -2.12095308303833, "logps/chosen": -288.0912170410156, "logps/rejected": -276.6849670410156, "loss": 0.0138, "rewards/accuracies": 0.625, "rewards/chosen": -0.08453039824962616, "rewards/margins": 0.09643884003162384, "rewards/rejected": -0.18096923828125, "step": 11960 }, { "epoch": 0.78, "learning_rate": 6.825893514612985e-07, "logits/chosen": -2.0197577476501465, "logits/rejected": -2.115384578704834, "logps/chosen": -246.4369659423828, "logps/rejected": -265.3404235839844, "loss": 0.0381, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04935074225068092, "rewards/margins": 0.1054350957274437, "rewards/rejected": -0.15478582680225372, "step": 11970 }, { "epoch": 0.78, "learning_rate": 6.786732407546001e-07, "logits/chosen": -2.0275187492370605, "logits/rejected": -1.975950837135315, "logps/chosen": -213.6422882080078, "logps/rejected": -191.46072387695312, "loss": 0.0284, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06380544602870941, "rewards/margins": 0.0876699760556221, "rewards/rejected": -0.1514754295349121, "step": 11980 }, { "epoch": 0.78, "learning_rate": 6.747666309804654e-07, "logits/chosen": -2.4276633262634277, "logits/rejected": -2.1066055297851562, "logps/chosen": -299.38726806640625, "logps/rejected": -234.7521514892578, "loss": 0.0215, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.05855029821395874, "rewards/margins": 0.07450170814990997, "rewards/rejected": -0.1330520212650299, "step": 11990 }, { "epoch": 0.79, "learning_rate": 6.708695425176831e-07, "logits/chosen": -2.0516610145568848, "logits/rejected": -2.04587984085083, "logps/chosen": -183.84193420410156, "logps/rejected": -225.9083251953125, "loss": 0.0313, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0740157812833786, "rewards/margins": 0.10655520856380463, "rewards/rejected": -0.18057098984718323, "step": 12000 }, { "epoch": 0.79, "eval_logits/chosen": -2.2668938636779785, "eval_logits/rejected": -2.079688549041748, "eval_logps/chosen": -243.17733764648438, "eval_logps/rejected": -240.09140014648438, "eval_loss": 0.024385279044508934, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.0558619387447834, "eval_rewards/margins": 0.08653547614812851, "eval_rewards/rejected": -0.14239740371704102, "eval_runtime": 714.7901, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 12000 }, { "epoch": 0.79, "learning_rate": 6.669819956953768e-07, "logits/chosen": -2.0975677967071533, "logits/rejected": -2.0333313941955566, "logps/chosen": -180.8198699951172, "logps/rejected": -205.4145965576172, "loss": 0.0106, "rewards/accuracies": 0.625, "rewards/chosen": -0.05244321748614311, "rewards/margins": 0.08027370274066925, "rewards/rejected": -0.13271690905094147, "step": 12010 }, { "epoch": 0.79, "learning_rate": 6.631040107928957e-07, "logits/chosen": -2.4497742652893066, "logits/rejected": -2.1184353828430176, "logps/chosen": -280.5262451171875, "logps/rejected": -200.26541137695312, "loss": 0.0333, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06170976907014847, "rewards/margins": 0.08018840849399567, "rewards/rejected": -0.14189818501472473, "step": 12020 }, { "epoch": 0.79, "learning_rate": 6.592356080397072e-07, "logits/chosen": -2.3427226543426514, "logits/rejected": -1.7892353534698486, "logps/chosen": -238.2996826171875, "logps/rejected": -200.78785705566406, "loss": 0.0259, "rewards/accuracies": 0.625, "rewards/chosen": -0.05005241557955742, "rewards/margins": 0.09475790709257126, "rewards/rejected": -0.14481033384799957, "step": 12030 }, { "epoch": 0.79, "learning_rate": 6.553768076152963e-07, "logits/chosen": -2.22847843170166, "logits/rejected": -2.327822208404541, "logps/chosen": -169.56468200683594, "logps/rejected": -220.78173828125, "loss": 0.0524, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.057335417717695236, "rewards/margins": 0.1205652505159378, "rewards/rejected": -0.17790067195892334, "step": 12040 }, { "epoch": 0.79, "learning_rate": 6.51527629649055e-07, "logits/chosen": -2.374026298522949, "logits/rejected": -2.2255825996398926, "logps/chosen": -269.08404541015625, "logps/rejected": -251.5164031982422, "loss": 0.0149, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07750485837459564, "rewards/margins": 0.057897042483091354, "rewards/rejected": -0.1354019045829773, "step": 12050 }, { "epoch": 0.79, "learning_rate": 6.476880942201824e-07, "logits/chosen": -2.507349967956543, "logits/rejected": -2.1045010089874268, "logps/chosen": -246.28945922851562, "logps/rejected": -207.04495239257812, "loss": 0.011, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.024758975952863693, "rewards/margins": 0.09689836204051971, "rewards/rejected": -0.1216573491692543, "step": 12060 }, { "epoch": 0.79, "learning_rate": 6.438582213575748e-07, "logits/chosen": -2.197597026824951, "logits/rejected": -2.168886184692383, "logps/chosen": -239.6921844482422, "logps/rejected": -266.37591552734375, "loss": 0.0355, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.048878345638513565, "rewards/margins": 0.07769103348255157, "rewards/rejected": -0.12656937539577484, "step": 12070 }, { "epoch": 0.79, "learning_rate": 6.400380310397267e-07, "logits/chosen": -2.1425106525421143, "logits/rejected": -2.1414434909820557, "logps/chosen": -243.58932495117188, "logps/rejected": -286.09259033203125, "loss": 0.015, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.057394880801439285, "rewards/margins": 0.055457305163145065, "rewards/rejected": -0.11285219341516495, "step": 12080 }, { "epoch": 0.79, "learning_rate": 6.362275431946202e-07, "logits/chosen": -2.117353916168213, "logits/rejected": -2.1524546146392822, "logps/chosen": -244.9851837158203, "logps/rejected": -263.6101379394531, "loss": 0.0416, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04315786436200142, "rewards/margins": 0.06288377195596695, "rewards/rejected": -0.10604163259267807, "step": 12090 }, { "epoch": 0.79, "learning_rate": 6.324267776996285e-07, "logits/chosen": -2.3388924598693848, "logits/rejected": -1.9531021118164062, "logps/chosen": -386.77874755859375, "logps/rejected": -292.1388244628906, "loss": 0.0102, "rewards/accuracies": 0.75, "rewards/chosen": -0.06099815294146538, "rewards/margins": 0.1367029845714569, "rewards/rejected": -0.19770114123821259, "step": 12100 }, { "epoch": 0.79, "eval_logits/chosen": -2.2704570293426514, "eval_logits/rejected": -2.082979440689087, "eval_logps/chosen": -242.26406860351562, "eval_logps/rejected": -238.70458984375, "eval_loss": 0.02435409463942051, "eval_rewards/accuracies": 0.656000018119812, "eval_rewards/chosen": -0.051295530050992966, "eval_rewards/margins": 0.08416783064603806, "eval_rewards/rejected": -0.13546337187290192, "eval_runtime": 713.3114, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 12100 }, { "epoch": 0.79, "learning_rate": 6.286357543814045e-07, "logits/chosen": -2.193878650665283, "logits/rejected": -2.1076154708862305, "logps/chosen": -203.20327758789062, "logps/rejected": -295.43634033203125, "loss": 0.0416, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04836731776595116, "rewards/margins": 0.12110915035009384, "rewards/rejected": -0.1694764643907547, "step": 12110 }, { "epoch": 0.79, "learning_rate": 6.248544930157838e-07, "logits/chosen": -2.3076460361480713, "logits/rejected": -2.06266188621521, "logps/chosen": -194.84645080566406, "logps/rejected": -201.7028350830078, "loss": 0.0299, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05380439758300781, "rewards/margins": 0.12318424880504608, "rewards/rejected": -0.17698867619037628, "step": 12120 }, { "epoch": 0.79, "learning_rate": 6.21083013327678e-07, "logits/chosen": -2.2582828998565674, "logits/rejected": -2.157015323638916, "logps/chosen": -309.3568115234375, "logps/rejected": -267.8647155761719, "loss": 0.0184, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.025580257177352905, "rewards/margins": 0.07216853648424149, "rewards/rejected": -0.09774880111217499, "step": 12130 }, { "epoch": 0.79, "learning_rate": 6.17321334990973e-07, "logits/chosen": -2.2270803451538086, "logits/rejected": -2.139096260070801, "logps/chosen": -213.1139373779297, "logps/rejected": -190.55184936523438, "loss": 0.0136, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.061547160148620605, "rewards/margins": 0.06337620317935944, "rewards/rejected": -0.12492338567972183, "step": 12140 }, { "epoch": 0.79, "learning_rate": 6.135694776284243e-07, "logits/chosen": -2.384162425994873, "logits/rejected": -2.108898401260376, "logps/chosen": -279.59844970703125, "logps/rejected": -237.87149047851562, "loss": 0.0252, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04403727129101753, "rewards/margins": 0.11522600799798965, "rewards/rejected": -0.15926328301429749, "step": 12150 }, { "epoch": 0.8, "learning_rate": 6.098274608115595e-07, "logits/chosen": -2.1649386882781982, "logits/rejected": -2.027029037475586, "logps/chosen": -215.2659149169922, "logps/rejected": -193.32310485839844, "loss": 0.0672, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04807636886835098, "rewards/margins": 0.03344811871647835, "rewards/rejected": -0.08152447640895844, "step": 12160 }, { "epoch": 0.8, "learning_rate": 6.060953040605697e-07, "logits/chosen": -2.397775650024414, "logits/rejected": -1.8723652362823486, "logps/chosen": -342.2147216796875, "logps/rejected": -294.21893310546875, "loss": 0.009, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.015425214543938637, "rewards/margins": 0.10949543863534927, "rewards/rejected": -0.12492065131664276, "step": 12170 }, { "epoch": 0.8, "learning_rate": 6.023730268442144e-07, "logits/chosen": -2.186662197113037, "logits/rejected": -2.0057554244995117, "logps/chosen": -212.6865692138672, "logps/rejected": -209.54025268554688, "loss": 0.0094, "rewards/accuracies": 0.75, "rewards/chosen": -0.03932885453104973, "rewards/margins": 0.12649241089820862, "rewards/rejected": -0.16582126915454865, "step": 12180 }, { "epoch": 0.8, "learning_rate": 5.986606485797131e-07, "logits/chosen": -2.182776689529419, "logits/rejected": -1.9772993326187134, "logps/chosen": -209.5937957763672, "logps/rejected": -228.5242462158203, "loss": 0.0295, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04851619526743889, "rewards/margins": 0.06932185590267181, "rewards/rejected": -0.1178380697965622, "step": 12190 }, { "epoch": 0.8, "learning_rate": 5.949581886326511e-07, "logits/chosen": -2.303760051727295, "logits/rejected": -2.287932872772217, "logps/chosen": -303.95550537109375, "logps/rejected": -286.0728759765625, "loss": 0.0325, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03081701323390007, "rewards/margins": 0.05515850707888603, "rewards/rejected": -0.0859755203127861, "step": 12200 }, { "epoch": 0.8, "eval_logits/chosen": -2.2709171772003174, "eval_logits/rejected": -2.0834968090057373, "eval_logps/chosen": -241.12908935546875, "eval_logps/rejected": -237.43380737304688, "eval_loss": 0.02426832541823387, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -0.04562075808644295, "eval_rewards/margins": 0.08348869532346725, "eval_rewards/rejected": -0.1291094571352005, "eval_runtime": 713.0091, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 12200 }, { "epoch": 0.8, "learning_rate": 5.912656663168717e-07, "logits/chosen": -2.3682117462158203, "logits/rejected": -2.309563636779785, "logps/chosen": -230.9191131591797, "logps/rejected": -236.99209594726562, "loss": 0.0195, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03454895317554474, "rewards/margins": 0.062227051705121994, "rewards/rejected": -0.09677600860595703, "step": 12210 }, { "epoch": 0.8, "learning_rate": 5.875831008943817e-07, "logits/chosen": -2.103549003601074, "logits/rejected": -2.0876801013946533, "logps/chosen": -187.82420349121094, "logps/rejected": -176.9785614013672, "loss": 0.0261, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05090073496103287, "rewards/margins": 0.05877278000116348, "rewards/rejected": -0.10967351496219635, "step": 12220 }, { "epoch": 0.8, "learning_rate": 5.839105115752442e-07, "logits/chosen": -2.2246978282928467, "logits/rejected": -2.0403006076812744, "logps/chosen": -238.2069854736328, "logps/rejected": -216.3524932861328, "loss": 0.0205, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07614725083112717, "rewards/margins": 0.09076641499996185, "rewards/rejected": -0.16691365838050842, "step": 12230 }, { "epoch": 0.8, "learning_rate": 5.802479175174855e-07, "logits/chosen": -2.2168078422546387, "logits/rejected": -2.068084716796875, "logps/chosen": -174.10366821289062, "logps/rejected": -198.13929748535156, "loss": 0.0125, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04054706171154976, "rewards/margins": 0.09054260700941086, "rewards/rejected": -0.13108967244625092, "step": 12240 }, { "epoch": 0.8, "learning_rate": 5.765953378269901e-07, "logits/chosen": -2.135885715484619, "logits/rejected": -2.073364734649658, "logps/chosen": -218.061767578125, "logps/rejected": -272.62738037109375, "loss": 0.0271, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05534181743860245, "rewards/margins": 0.11760924011468887, "rewards/rejected": -0.17295105755329132, "step": 12250 }, { "epoch": 0.8, "learning_rate": 5.729527915574037e-07, "logits/chosen": -2.2846944332122803, "logits/rejected": -2.1489596366882324, "logps/chosen": -230.0850372314453, "logps/rejected": -247.8024139404297, "loss": 0.0273, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04833231121301651, "rewards/margins": 0.08689139783382416, "rewards/rejected": -0.13522370159626007, "step": 12260 }, { "epoch": 0.8, "learning_rate": 5.693202977100304e-07, "logits/chosen": -2.291938304901123, "logits/rejected": -2.0095741748809814, "logps/chosen": -179.6736602783203, "logps/rejected": -192.34451293945312, "loss": 0.0234, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04764264076948166, "rewards/margins": 0.08003589510917664, "rewards/rejected": -0.1276785284280777, "step": 12270 }, { "epoch": 0.8, "learning_rate": 5.656978752337389e-07, "logits/chosen": -2.310103178024292, "logits/rejected": -2.1109793186187744, "logps/chosen": -213.84716796875, "logps/rejected": -236.92471313476562, "loss": 0.0331, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07096020877361298, "rewards/margins": 0.10838906466960907, "rewards/rejected": -0.17934927344322205, "step": 12280 }, { "epoch": 0.8, "learning_rate": 5.620855430248581e-07, "logits/chosen": -2.210860013961792, "logits/rejected": -2.074171781539917, "logps/chosen": -166.60595703125, "logps/rejected": -186.58425903320312, "loss": 0.0289, "rewards/accuracies": 0.625, "rewards/chosen": -0.0230544600635767, "rewards/margins": 0.1107834130525589, "rewards/rejected": -0.13383787870407104, "step": 12290 }, { "epoch": 0.8, "learning_rate": 5.584833199270837e-07, "logits/chosen": -2.2861437797546387, "logits/rejected": -2.133598804473877, "logps/chosen": -240.87899780273438, "logps/rejected": -249.25509643554688, "loss": 0.028, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0612337663769722, "rewards/margins": 0.08034642040729523, "rewards/rejected": -0.14158019423484802, "step": 12300 }, { "epoch": 0.8, "eval_logits/chosen": -2.2695388793945312, "eval_logits/rejected": -2.0820517539978027, "eval_logps/chosen": -241.85560607910156, "eval_logps/rejected": -238.8947296142578, "eval_loss": 0.024259360507130623, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.049253277480602264, "eval_rewards/margins": 0.08716095238924026, "eval_rewards/rejected": -0.13641421496868134, "eval_runtime": 713.9946, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 12300 }, { "epoch": 0.81, "learning_rate": 5.548912247313742e-07, "logits/chosen": -2.500735282897949, "logits/rejected": -2.072622299194336, "logps/chosen": -298.7041320800781, "logps/rejected": -262.83197021484375, "loss": 0.0205, "rewards/accuracies": 0.625, "rewards/chosen": -0.07452087104320526, "rewards/margins": 0.05851732939481735, "rewards/rejected": -0.13303819298744202, "step": 12310 }, { "epoch": 0.81, "learning_rate": 5.513092761758596e-07, "logits/chosen": -2.3174662590026855, "logits/rejected": -2.1213202476501465, "logps/chosen": -284.2948913574219, "logps/rejected": -226.7404022216797, "loss": 0.0245, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06542503833770752, "rewards/margins": 0.04713209718465805, "rewards/rejected": -0.11255712807178497, "step": 12320 }, { "epoch": 0.81, "learning_rate": 5.477374929457363e-07, "logits/chosen": -2.235321521759033, "logits/rejected": -2.2302331924438477, "logps/chosen": -215.40139770507812, "logps/rejected": -209.98681640625, "loss": 0.0146, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06670118123292923, "rewards/margins": 0.06878657639026642, "rewards/rejected": -0.13548775017261505, "step": 12330 }, { "epoch": 0.81, "learning_rate": 5.441758936731772e-07, "logits/chosen": -2.25117826461792, "logits/rejected": -2.1371283531188965, "logps/chosen": -245.52734375, "logps/rejected": -244.70254516601562, "loss": 0.0211, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0509103462100029, "rewards/margins": 0.08482729643583298, "rewards/rejected": -0.13573762774467468, "step": 12340 }, { "epoch": 0.81, "learning_rate": 5.406244969372273e-07, "logits/chosen": -2.17441987991333, "logits/rejected": -2.010354518890381, "logps/chosen": -211.4480438232422, "logps/rejected": -247.693603515625, "loss": 0.0226, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05862605571746826, "rewards/margins": 0.14871786534786224, "rewards/rejected": -0.2073439359664917, "step": 12350 }, { "epoch": 0.81, "learning_rate": 5.370833212637122e-07, "logits/chosen": -2.2330093383789062, "logits/rejected": -1.9421203136444092, "logps/chosen": -229.41799926757812, "logps/rejected": -234.67514038085938, "loss": 0.0232, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05453511327505112, "rewards/margins": 0.09521164000034332, "rewards/rejected": -0.14974676072597504, "step": 12360 }, { "epoch": 0.81, "learning_rate": 5.335523851251392e-07, "logits/chosen": -2.1823971271514893, "logits/rejected": -2.107938528060913, "logps/chosen": -219.23770141601562, "logps/rejected": -219.7248077392578, "loss": 0.0329, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06551903486251831, "rewards/margins": 0.1064266562461853, "rewards/rejected": -0.1719456911087036, "step": 12370 }, { "epoch": 0.81, "learning_rate": 5.300317069406003e-07, "logits/chosen": -2.1719276905059814, "logits/rejected": -2.142944812774658, "logps/chosen": -168.9110565185547, "logps/rejected": -194.3233642578125, "loss": 0.0116, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04082585498690605, "rewards/margins": 0.1082276701927185, "rewards/rejected": -0.14905352890491486, "step": 12380 }, { "epoch": 0.81, "learning_rate": 5.265213050756782e-07, "logits/chosen": -2.406087636947632, "logits/rejected": -2.2311203479766846, "logps/chosen": -239.39688110351562, "logps/rejected": -255.44509887695312, "loss": 0.0249, "rewards/accuracies": 0.625, "rewards/chosen": -0.03672807663679123, "rewards/margins": 0.0908966213464737, "rewards/rejected": -0.12762470543384552, "step": 12390 }, { "epoch": 0.81, "learning_rate": 5.230211978423477e-07, "logits/chosen": -2.3066985607147217, "logits/rejected": -2.1874680519104004, "logps/chosen": -232.8973846435547, "logps/rejected": -230.87380981445312, "loss": 0.0278, "rewards/accuracies": 0.625, "rewards/chosen": -0.07048575580120087, "rewards/margins": 0.06572605669498444, "rewards/rejected": -0.1362117975950241, "step": 12400 }, { "epoch": 0.81, "eval_logits/chosen": -2.2793402671813965, "eval_logits/rejected": -2.091266393661499, "eval_logps/chosen": -242.20643615722656, "eval_logps/rejected": -238.47528076171875, "eval_loss": 0.02414710633456707, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -0.051007479429244995, "eval_rewards/margins": 0.08330940455198288, "eval_rewards/rejected": -0.13431687653064728, "eval_runtime": 713.7293, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 12400 }, { "epoch": 0.81, "learning_rate": 5.195314034988835e-07, "logits/chosen": -2.437074661254883, "logits/rejected": -2.1730642318725586, "logps/chosen": -226.95419311523438, "logps/rejected": -178.89450073242188, "loss": 0.0388, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.038335494697093964, "rewards/margins": 0.09489767253398895, "rewards/rejected": -0.1332331895828247, "step": 12410 }, { "epoch": 0.81, "learning_rate": 5.160519402497616e-07, "logits/chosen": -2.3210339546203613, "logits/rejected": -2.1709866523742676, "logps/chosen": -243.803466796875, "logps/rejected": -257.8756103515625, "loss": 0.0414, "rewards/accuracies": 0.625, "rewards/chosen": -0.06874729692935944, "rewards/margins": 0.09066031873226166, "rewards/rejected": -0.1594076305627823, "step": 12420 }, { "epoch": 0.81, "learning_rate": 5.125828262455679e-07, "logits/chosen": -2.212090492248535, "logits/rejected": -2.011735200881958, "logps/chosen": -266.16326904296875, "logps/rejected": -254.65283203125, "loss": 0.0203, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05225072428584099, "rewards/margins": 0.09414578974246979, "rewards/rejected": -0.14639653265476227, "step": 12430 }, { "epoch": 0.81, "learning_rate": 5.091240795828992e-07, "logits/chosen": -1.9795331954956055, "logits/rejected": -2.1605002880096436, "logps/chosen": -212.42431640625, "logps/rejected": -253.1348114013672, "loss": 0.0483, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04966943711042404, "rewards/margins": 0.10439908504486084, "rewards/rejected": -0.15406851470470428, "step": 12440 }, { "epoch": 0.81, "learning_rate": 5.056757183042732e-07, "logits/chosen": -2.2055981159210205, "logits/rejected": -2.1182687282562256, "logps/chosen": -246.42037963867188, "logps/rejected": -246.04562377929688, "loss": 0.0121, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06432102620601654, "rewards/margins": 0.09751377999782562, "rewards/rejected": -0.16183480620384216, "step": 12450 }, { "epoch": 0.82, "learning_rate": 5.022377603980308e-07, "logits/chosen": -2.368230104446411, "logits/rejected": -2.0332324504852295, "logps/chosen": -262.3285217285156, "logps/rejected": -214.49362182617188, "loss": 0.0213, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06521899998188019, "rewards/margins": 0.09318248927593231, "rewards/rejected": -0.1584015190601349, "step": 12460 }, { "epoch": 0.82, "learning_rate": 4.988102237982454e-07, "logits/chosen": -2.319570779800415, "logits/rejected": -2.243185520172119, "logps/chosen": -237.8197479248047, "logps/rejected": -209.4499969482422, "loss": 0.0186, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07819648832082748, "rewards/margins": 0.06102749705314636, "rewards/rejected": -0.13922397792339325, "step": 12470 }, { "epoch": 0.82, "learning_rate": 4.953931263846251e-07, "logits/chosen": -2.303527355194092, "logits/rejected": -2.0281221866607666, "logps/chosen": -276.46331787109375, "logps/rejected": -253.54183959960938, "loss": 0.0356, "rewards/accuracies": 0.75, "rewards/chosen": -0.07377218455076218, "rewards/margins": 0.10519599914550781, "rewards/rejected": -0.1789681762456894, "step": 12480 }, { "epoch": 0.82, "learning_rate": 4.919864859824266e-07, "logits/chosen": -2.2470602989196777, "logits/rejected": -2.104475736618042, "logps/chosen": -254.1764678955078, "logps/rejected": -225.07070922851562, "loss": 0.0339, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0837114155292511, "rewards/margins": 0.0796470046043396, "rewards/rejected": -0.1633584201335907, "step": 12490 }, { "epoch": 0.82, "learning_rate": 4.885903203623532e-07, "logits/chosen": -2.4405932426452637, "logits/rejected": -2.0412323474884033, "logps/chosen": -292.0880126953125, "logps/rejected": -247.72152709960938, "loss": 0.0142, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03537056967616081, "rewards/margins": 0.09703455865383148, "rewards/rejected": -0.1324051171541214, "step": 12500 }, { "epoch": 0.82, "eval_logits/chosen": -2.2793068885803223, "eval_logits/rejected": -2.0913002490997314, "eval_logps/chosen": -242.81405639648438, "eval_logps/rejected": -239.04124450683594, "eval_loss": 0.02413610927760601, "eval_rewards/accuracies": 0.6570000052452087, "eval_rewards/chosen": -0.05404556915163994, "eval_rewards/margins": 0.08310119062662125, "eval_rewards/rejected": -0.1371467560529709, "eval_runtime": 713.8604, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 12500 }, { "epoch": 0.82, "learning_rate": 4.852046472404695e-07, "logits/chosen": -2.4357948303222656, "logits/rejected": -1.7031605243682861, "logps/chosen": -301.8102722167969, "logps/rejected": -193.1060333251953, "loss": 0.0331, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.019607767462730408, "rewards/margins": 0.08484580367803574, "rewards/rejected": -0.10445356369018555, "step": 12510 }, { "epoch": 0.82, "learning_rate": 4.818294842781035e-07, "logits/chosen": -2.3425495624542236, "logits/rejected": -2.1587905883789062, "logps/chosen": -232.7345428466797, "logps/rejected": -205.90792846679688, "loss": 0.0275, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03263060376048088, "rewards/margins": 0.12765631079673767, "rewards/rejected": -0.16028691828250885, "step": 12520 }, { "epoch": 0.82, "learning_rate": 4.784648490817601e-07, "logits/chosen": -2.3454043865203857, "logits/rejected": -2.053494930267334, "logps/chosen": -229.67822265625, "logps/rejected": -200.66259765625, "loss": 0.0315, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.047857291996479034, "rewards/margins": 0.07121441513299942, "rewards/rejected": -0.11907169967889786, "step": 12530 }, { "epoch": 0.82, "learning_rate": 4.751107592030235e-07, "logits/chosen": -2.360136032104492, "logits/rejected": -2.0764052867889404, "logps/chosen": -178.2238006591797, "logps/rejected": -184.0672149658203, "loss": 0.0251, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04186008870601654, "rewards/margins": 0.11471670866012573, "rewards/rejected": -0.15657678246498108, "step": 12540 }, { "epoch": 0.82, "learning_rate": 4.717672321384703e-07, "logits/chosen": -2.2550435066223145, "logits/rejected": -2.0041213035583496, "logps/chosen": -229.6531524658203, "logps/rejected": -207.37255859375, "loss": 0.0208, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03203721344470978, "rewards/margins": 0.09957583248615265, "rewards/rejected": -0.13161304593086243, "step": 12550 }, { "epoch": 0.82, "learning_rate": 4.684342853295748e-07, "logits/chosen": -2.2040514945983887, "logits/rejected": -2.081937313079834, "logps/chosen": -196.20738220214844, "logps/rejected": -212.50997924804688, "loss": 0.0245, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04255301505327225, "rewards/margins": 0.1011330857872963, "rewards/rejected": -0.14368610084056854, "step": 12560 }, { "epoch": 0.82, "learning_rate": 4.651119361626213e-07, "logits/chosen": -2.504356861114502, "logits/rejected": -2.1664860248565674, "logps/chosen": -247.20285034179688, "logps/rejected": -214.8421630859375, "loss": 0.0218, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.030421242117881775, "rewards/margins": 0.06934089213609695, "rewards/rejected": -0.09976212680339813, "step": 12570 }, { "epoch": 0.82, "learning_rate": 4.618002019686091e-07, "logits/chosen": -2.263784885406494, "logits/rejected": -2.087101697921753, "logps/chosen": -290.5827941894531, "logps/rejected": -250.25985717773438, "loss": 0.0219, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05685209110379219, "rewards/margins": 0.08156587183475494, "rewards/rejected": -0.13841792941093445, "step": 12580 }, { "epoch": 0.82, "learning_rate": 4.5849910002316757e-07, "logits/chosen": -2.327174425125122, "logits/rejected": -1.988965630531311, "logps/chosen": -197.8284149169922, "logps/rejected": -183.67318725585938, "loss": 0.0349, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0865812748670578, "rewards/margins": 0.09554515033960342, "rewards/rejected": -0.18212643265724182, "step": 12590 }, { "epoch": 0.82, "learning_rate": 4.5520864754645984e-07, "logits/chosen": -2.384525775909424, "logits/rejected": -2.2449145317077637, "logps/chosen": -291.3929138183594, "logps/rejected": -260.1684875488281, "loss": 0.0177, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05021563917398453, "rewards/margins": 0.06300269067287445, "rewards/rejected": -0.11321830749511719, "step": 12600 }, { "epoch": 0.82, "eval_logits/chosen": -2.2796547412872314, "eval_logits/rejected": -2.0916662216186523, "eval_logps/chosen": -243.12290954589844, "eval_logps/rejected": -239.190185546875, "eval_loss": 0.024224113672971725, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.055589765310287476, "eval_rewards/margins": 0.08230166882276535, "eval_rewards/rejected": -0.13789144158363342, "eval_runtime": 713.353, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 12600 }, { "epoch": 0.83, "learning_rate": 4.5192886170309896e-07, "logits/chosen": -2.2043235301971436, "logits/rejected": -2.105273962020874, "logps/chosen": -202.4031982421875, "logps/rejected": -213.1467742919922, "loss": 0.0194, "rewards/accuracies": 0.625, "rewards/chosen": -0.05817372351884842, "rewards/margins": 0.05279749631881714, "rewards/rejected": -0.11097122728824615, "step": 12610 }, { "epoch": 0.83, "learning_rate": 4.486597596020548e-07, "logits/chosen": -2.2953429222106934, "logits/rejected": -2.0294137001037598, "logps/chosen": -233.8562774658203, "logps/rejected": -210.57388305664062, "loss": 0.0192, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07137481123209, "rewards/margins": 0.07519672065973282, "rewards/rejected": -0.14657153189182281, "step": 12620 }, { "epoch": 0.83, "learning_rate": 4.454013582965644e-07, "logits/chosen": -2.227466106414795, "logits/rejected": -1.818380355834961, "logps/chosen": -275.13970947265625, "logps/rejected": -237.52182006835938, "loss": 0.0191, "rewards/accuracies": 0.625, "rewards/chosen": -0.058415018022060394, "rewards/margins": 0.06412716954946518, "rewards/rejected": -0.12254220247268677, "step": 12630 }, { "epoch": 0.83, "learning_rate": 4.4215367478404605e-07, "logits/chosen": -2.0815138816833496, "logits/rejected": -2.054581642150879, "logps/chosen": -285.30023193359375, "logps/rejected": -324.38555908203125, "loss": 0.0431, "rewards/accuracies": 0.625, "rewards/chosen": -0.07270147651433945, "rewards/margins": 0.07356799393892288, "rewards/rejected": -0.14626947045326233, "step": 12640 }, { "epoch": 0.83, "learning_rate": 4.389167260060068e-07, "logits/chosen": -2.3553760051727295, "logits/rejected": -2.1189093589782715, "logps/chosen": -214.73532104492188, "logps/rejected": -200.194580078125, "loss": 0.0171, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03022538684308529, "rewards/margins": 0.11816896498203278, "rewards/rejected": -0.14839434623718262, "step": 12650 }, { "epoch": 0.83, "learning_rate": 4.356905288479579e-07, "logits/chosen": -2.223787784576416, "logits/rejected": -1.9934899806976318, "logps/chosen": -238.44442749023438, "logps/rejected": -238.2301483154297, "loss": 0.023, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.059618107974529266, "rewards/margins": 0.14274819195270538, "rewards/rejected": -0.20236630737781525, "step": 12660 }, { "epoch": 0.83, "learning_rate": 4.3247510013932377e-07, "logits/chosen": -2.165524482727051, "logits/rejected": -2.005889654159546, "logps/chosen": -264.05902099609375, "logps/rejected": -284.2886047363281, "loss": 0.038, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05935274809598923, "rewards/margins": 0.10204823315143585, "rewards/rejected": -0.16140100359916687, "step": 12670 }, { "epoch": 0.83, "learning_rate": 4.2927045665335594e-07, "logits/chosen": -1.8666915893554688, "logits/rejected": -1.7911564111709595, "logps/chosen": -181.5066375732422, "logps/rejected": -196.82656860351562, "loss": 0.0176, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08314456045627594, "rewards/margins": 0.09437574446201324, "rewards/rejected": -0.17752030491828918, "step": 12680 }, { "epoch": 0.83, "learning_rate": 4.260766151070439e-07, "logits/chosen": -2.090467929840088, "logits/rejected": -2.1346335411071777, "logps/chosen": -235.2054901123047, "logps/rejected": -242.34439086914062, "loss": 0.0274, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05385993793606758, "rewards/margins": 0.09199769794940948, "rewards/rejected": -0.14585763216018677, "step": 12690 }, { "epoch": 0.83, "learning_rate": 4.228935921610308e-07, "logits/chosen": -2.323847770690918, "logits/rejected": -1.9647403955459595, "logps/chosen": -271.04962158203125, "logps/rejected": -221.23440551757812, "loss": 0.0133, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03665490821003914, "rewards/margins": 0.07118140161037445, "rewards/rejected": -0.10783632099628448, "step": 12700 }, { "epoch": 0.83, "eval_logits/chosen": -2.281400680541992, "eval_logits/rejected": -2.09328293800354, "eval_logps/chosen": -241.91531372070312, "eval_logps/rejected": -237.8956298828125, "eval_loss": 0.024218622595071793, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.0495518334209919, "eval_rewards/margins": 0.0818667784333229, "eval_rewards/rejected": -0.1314186155796051, "eval_runtime": 713.0872, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 12700 }, { "epoch": 0.83, "learning_rate": 4.1972140441952246e-07, "logits/chosen": -2.1343767642974854, "logits/rejected": -2.1033663749694824, "logps/chosen": -246.02841186523438, "logps/rejected": -262.2856140136719, "loss": 0.0525, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.032225728034973145, "rewards/margins": 0.07124023139476776, "rewards/rejected": -0.10346595197916031, "step": 12710 }, { "epoch": 0.83, "learning_rate": 4.165600684302046e-07, "logits/chosen": -2.245701789855957, "logits/rejected": -2.2973880767822266, "logps/chosen": -182.59765625, "logps/rejected": -210.4281463623047, "loss": 0.0206, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.034068286418914795, "rewards/margins": 0.08936282247304916, "rewards/rejected": -0.12343110144138336, "step": 12720 }, { "epoch": 0.83, "learning_rate": 4.13409600684154e-07, "logits/chosen": -2.3500359058380127, "logits/rejected": -2.065136671066284, "logps/chosen": -224.13363647460938, "logps/rejected": -213.9673309326172, "loss": 0.0515, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.045570097863674164, "rewards/margins": 0.0950850322842598, "rewards/rejected": -0.14065513014793396, "step": 12730 }, { "epoch": 0.83, "learning_rate": 4.102700176157548e-07, "logits/chosen": -2.415928602218628, "logits/rejected": -2.047894239425659, "logps/chosen": -336.18682861328125, "logps/rejected": -254.1941375732422, "loss": 0.0254, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05715783312916756, "rewards/margins": 0.07896944135427475, "rewards/rejected": -0.1361272633075714, "step": 12740 }, { "epoch": 0.83, "learning_rate": 4.0714133560260884e-07, "logits/chosen": -2.3049137592315674, "logits/rejected": -2.1440796852111816, "logps/chosen": -270.9903259277344, "logps/rejected": -225.3958282470703, "loss": 0.0283, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.057286281138658524, "rewards/margins": 0.06318188458681107, "rewards/rejected": -0.12046817690134048, "step": 12750 }, { "epoch": 0.83, "learning_rate": 4.0402357096545527e-07, "logits/chosen": -2.180220365524292, "logits/rejected": -2.14140248298645, "logps/chosen": -260.32342529296875, "logps/rejected": -265.9286193847656, "loss": 0.0184, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04280921444296837, "rewards/margins": 0.07888636738061905, "rewards/rejected": -0.12169557809829712, "step": 12760 }, { "epoch": 0.84, "learning_rate": 4.0091673996808025e-07, "logits/chosen": -2.389176845550537, "logits/rejected": -2.1885383129119873, "logps/chosen": -211.50894165039062, "logps/rejected": -203.1997833251953, "loss": 0.0251, "rewards/accuracies": 0.625, "rewards/chosen": -0.07224427908658981, "rewards/margins": 0.08159011602401733, "rewards/rejected": -0.15383440256118774, "step": 12770 }, { "epoch": 0.84, "learning_rate": 3.9782085881723776e-07, "logits/chosen": -2.207456588745117, "logits/rejected": -2.061331272125244, "logps/chosen": -176.98324584960938, "logps/rejected": -208.9066619873047, "loss": 0.0298, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05692816898226738, "rewards/margins": 0.09978387504816055, "rewards/rejected": -0.15671204030513763, "step": 12780 }, { "epoch": 0.84, "learning_rate": 3.947359436625592e-07, "logits/chosen": -2.215158462524414, "logits/rejected": -2.1081624031066895, "logps/chosen": -241.7562713623047, "logps/rejected": -225.2100830078125, "loss": 0.0107, "rewards/accuracies": 0.75, "rewards/chosen": -0.03546663373708725, "rewards/margins": 0.10508982837200165, "rewards/rejected": -0.1405564546585083, "step": 12790 }, { "epoch": 0.84, "learning_rate": 3.9166201059647386e-07, "logits/chosen": -2.3521649837493896, "logits/rejected": -2.221381664276123, "logps/chosen": -268.37359619140625, "logps/rejected": -241.0518341064453, "loss": 0.0186, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03261677920818329, "rewards/margins": 0.04804609343409538, "rewards/rejected": -0.08066286146640778, "step": 12800 }, { "epoch": 0.84, "eval_logits/chosen": -2.2818355560302734, "eval_logits/rejected": -2.0935537815093994, "eval_logps/chosen": -241.017578125, "eval_logps/rejected": -237.06175231933594, "eval_loss": 0.02415802702307701, "eval_rewards/accuracies": 0.656499981880188, "eval_rewards/chosen": -0.04506318271160126, "eval_rewards/margins": 0.08218610286712646, "eval_rewards/rejected": -0.12724927067756653, "eval_runtime": 712.6346, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 12800 }, { "epoch": 0.84, "learning_rate": 3.8859907565412194e-07, "logits/chosen": -2.1575679779052734, "logits/rejected": -2.242316722869873, "logps/chosen": -191.49639892578125, "logps/rejected": -207.98928833007812, "loss": 0.0485, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05923575907945633, "rewards/margins": 0.08535636961460114, "rewards/rejected": -0.14459213614463806, "step": 12810 }, { "epoch": 0.84, "learning_rate": 3.8554715481327303e-07, "logits/chosen": -2.3164939880371094, "logits/rejected": -1.9328874349594116, "logps/chosen": -246.2336883544922, "logps/rejected": -230.98385620117188, "loss": 0.0374, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0648733526468277, "rewards/margins": 0.10073963552713394, "rewards/rejected": -0.16561298072338104, "step": 12820 }, { "epoch": 0.84, "learning_rate": 3.8250626399424007e-07, "logits/chosen": -2.3338048458099365, "logits/rejected": -2.086073875427246, "logps/chosen": -261.89605712890625, "logps/rejected": -259.2681579589844, "loss": 0.0293, "rewards/accuracies": 0.625, "rewards/chosen": -0.05208975076675415, "rewards/margins": 0.08111140131950378, "rewards/rejected": -0.13320115208625793, "step": 12830 }, { "epoch": 0.84, "learning_rate": 3.7947641905980104e-07, "logits/chosen": -2.155761957168579, "logits/rejected": -2.1673035621643066, "logps/chosen": -214.59146118164062, "logps/rejected": -195.30322265625, "loss": 0.0337, "rewards/accuracies": 0.625, "rewards/chosen": -0.03636503964662552, "rewards/margins": 0.07696239650249481, "rewards/rejected": -0.11332742869853973, "step": 12840 }, { "epoch": 0.84, "learning_rate": 3.764576358151098e-07, "logits/chosen": -2.19984769821167, "logits/rejected": -2.1929872035980225, "logps/chosen": -182.03663635253906, "logps/rejected": -184.73660278320312, "loss": 0.0103, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.036097969859838486, "rewards/margins": 0.07272133976221085, "rewards/rejected": -0.10881930589675903, "step": 12850 }, { "epoch": 0.84, "learning_rate": 3.7344993000761944e-07, "logits/chosen": -2.3004448413848877, "logits/rejected": -2.1955597400665283, "logps/chosen": -191.7588348388672, "logps/rejected": -252.80300903320312, "loss": 0.0137, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07221857458353043, "rewards/margins": 0.09036187827587128, "rewards/rejected": -0.1625804454088211, "step": 12860 }, { "epoch": 0.84, "learning_rate": 3.7045331732699585e-07, "logits/chosen": -2.3142223358154297, "logits/rejected": -2.1267404556274414, "logps/chosen": -213.4777374267578, "logps/rejected": -203.80746459960938, "loss": 0.0338, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.041398413479328156, "rewards/margins": 0.13014577329158783, "rewards/rejected": -0.17154419422149658, "step": 12870 }, { "epoch": 0.84, "learning_rate": 3.6746781340503993e-07, "logits/chosen": -2.1561598777770996, "logits/rejected": -2.0527725219726562, "logps/chosen": -239.81692504882812, "logps/rejected": -250.43899536132812, "loss": 0.0233, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.027005767449736595, "rewards/margins": 0.09324527531862259, "rewards/rejected": -0.12025104463100433, "step": 12880 }, { "epoch": 0.84, "learning_rate": 3.6449343381560116e-07, "logits/chosen": -2.251657247543335, "logits/rejected": -2.0255634784698486, "logps/chosen": -246.94400024414062, "logps/rejected": -256.09893798828125, "loss": 0.034, "rewards/accuracies": 0.75, "rewards/chosen": -0.07759173959493637, "rewards/margins": 0.11272220313549042, "rewards/rejected": -0.1903139352798462, "step": 12890 }, { "epoch": 0.84, "learning_rate": 3.615301940745017e-07, "logits/chosen": -2.516042947769165, "logits/rejected": -1.9202125072479248, "logps/chosen": -325.527099609375, "logps/rejected": -234.82717895507812, "loss": 0.0117, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.041659772396087646, "rewards/margins": 0.07238514721393585, "rewards/rejected": -0.11404494196176529, "step": 12900 }, { "epoch": 0.84, "eval_logits/chosen": -2.2790322303771973, "eval_logits/rejected": -2.0908236503601074, "eval_logps/chosen": -239.9394989013672, "eval_logps/rejected": -236.24349975585938, "eval_loss": 0.02411588840186596, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.03967278078198433, "eval_rewards/margins": 0.0834852010011673, "eval_rewards/rejected": -0.12315797060728073, "eval_runtime": 710.5468, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.407, "step": 12900 }, { "epoch": 0.84, "learning_rate": 3.5857810963945084e-07, "logits/chosen": -2.1284141540527344, "logits/rejected": -1.9132626056671143, "logps/chosen": -225.8367462158203, "logps/rejected": -226.9214630126953, "loss": 0.0443, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.056638143956661224, "rewards/margins": 0.07953803241252899, "rewards/rejected": -0.1361761838197708, "step": 12910 }, { "epoch": 0.85, "learning_rate": 3.556371959099678e-07, "logits/chosen": -2.348480463027954, "logits/rejected": -2.103860855102539, "logps/chosen": -302.9031066894531, "logps/rejected": -277.6312255859375, "loss": 0.0125, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.028643876314163208, "rewards/margins": 0.07349316030740738, "rewards/rejected": -0.10213702917098999, "step": 12920 }, { "epoch": 0.85, "learning_rate": 3.5270746822729797e-07, "logits/chosen": -2.2369742393493652, "logits/rejected": -2.157525062561035, "logps/chosen": -255.8109588623047, "logps/rejected": -288.16217041015625, "loss": 0.0357, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04164459556341171, "rewards/margins": 0.09887997806072235, "rewards/rejected": -0.14052458107471466, "step": 12930 }, { "epoch": 0.85, "learning_rate": 3.4978894187433746e-07, "logits/chosen": -2.316485643386841, "logits/rejected": -2.1782500743865967, "logps/chosen": -163.74954223632812, "logps/rejected": -163.9324493408203, "loss": 0.0443, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0633717030286789, "rewards/margins": 0.05545664578676224, "rewards/rejected": -0.11882835626602173, "step": 12940 }, { "epoch": 0.85, "learning_rate": 3.468816320755486e-07, "logits/chosen": -2.104240894317627, "logits/rejected": -1.9286854267120361, "logps/chosen": -227.17626953125, "logps/rejected": -201.1591033935547, "loss": 0.0121, "rewards/accuracies": 0.625, "rewards/chosen": -0.016242554411292076, "rewards/margins": 0.06796287000179291, "rewards/rejected": -0.08420543372631073, "step": 12950 }, { "epoch": 0.85, "learning_rate": 3.4398555399688336e-07, "logits/chosen": -2.3788199424743652, "logits/rejected": -2.011913537979126, "logps/chosen": -224.4126739501953, "logps/rejected": -210.57296752929688, "loss": 0.0248, "rewards/accuracies": 0.625, "rewards/chosen": -0.06531089544296265, "rewards/margins": 0.03412250801920891, "rewards/rejected": -0.09943340718746185, "step": 12960 }, { "epoch": 0.85, "learning_rate": 3.411007227457047e-07, "logits/chosen": -2.2853686809539795, "logits/rejected": -2.221477508544922, "logps/chosen": -254.6948699951172, "logps/rejected": -243.5530548095703, "loss": 0.0206, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.034668006002902985, "rewards/margins": 0.10566465556621552, "rewards/rejected": -0.1403326839208603, "step": 12970 }, { "epoch": 0.85, "learning_rate": 3.382271533707043e-07, "logits/chosen": -2.1889026165008545, "logits/rejected": -2.1711583137512207, "logps/chosen": -198.1396026611328, "logps/rejected": -187.36752319335938, "loss": 0.0256, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03267192095518112, "rewards/margins": 0.06055806949734688, "rewards/rejected": -0.0932299867272377, "step": 12980 }, { "epoch": 0.85, "learning_rate": 3.353648608618287e-07, "logits/chosen": -2.2680234909057617, "logits/rejected": -2.0193073749542236, "logps/chosen": -176.73532104492188, "logps/rejected": -182.29981994628906, "loss": 0.0289, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04432029277086258, "rewards/margins": 0.07767603546380997, "rewards/rejected": -0.12199632823467255, "step": 12990 }, { "epoch": 0.85, "learning_rate": 3.3251386015019676e-07, "logits/chosen": -2.3049163818359375, "logits/rejected": -2.0893776416778564, "logps/chosen": -213.1085968017578, "logps/rejected": -196.01004028320312, "loss": 0.0116, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.051745809614658356, "rewards/margins": 0.08322995156049728, "rewards/rejected": -0.13497576117515564, "step": 13000 }, { "epoch": 0.85, "eval_logits/chosen": -2.2780840396881104, "eval_logits/rejected": -2.089902639389038, "eval_logps/chosen": -240.3864288330078, "eval_logps/rejected": -237.061279296875, "eval_loss": 0.0241058599203825, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.04190727323293686, "eval_rewards/margins": 0.0853395164012909, "eval_rewards/rejected": -0.12724678218364716, "eval_runtime": 712.6776, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 13000 }, { "epoch": 0.85, "learning_rate": 3.296741661080255e-07, "logits/chosen": -2.229576349258423, "logits/rejected": -2.135002613067627, "logps/chosen": -243.9384002685547, "logps/rejected": -258.056884765625, "loss": 0.0153, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.054000191390514374, "rewards/margins": 0.10329830646514893, "rewards/rejected": -0.1572985053062439, "step": 13010 }, { "epoch": 0.85, "learning_rate": 3.2684579354854974e-07, "logits/chosen": -2.3377318382263184, "logits/rejected": -2.229548454284668, "logps/chosen": -306.2275390625, "logps/rejected": -335.21124267578125, "loss": 0.0297, "rewards/accuracies": 0.75, "rewards/chosen": -0.06884465366601944, "rewards/margins": 0.09362609684467316, "rewards/rejected": -0.1624707579612732, "step": 13020 }, { "epoch": 0.85, "learning_rate": 3.2402875722594653e-07, "logits/chosen": -2.3580403327941895, "logits/rejected": -2.0886740684509277, "logps/chosen": -174.5337677001953, "logps/rejected": -198.30404663085938, "loss": 0.0152, "rewards/accuracies": 0.625, "rewards/chosen": -0.027441659942269325, "rewards/margins": 0.08809362351894379, "rewards/rejected": -0.11553528159856796, "step": 13030 }, { "epoch": 0.85, "learning_rate": 3.212230718352566e-07, "logits/chosen": -2.2204318046569824, "logits/rejected": -2.235112190246582, "logps/chosen": -235.35092163085938, "logps/rejected": -175.29856872558594, "loss": 0.0313, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05150535702705383, "rewards/margins": 0.018223298713564873, "rewards/rejected": -0.06972865760326385, "step": 13040 }, { "epoch": 0.85, "learning_rate": 3.1842875201231025e-07, "logits/chosen": -2.278357744216919, "logits/rejected": -1.9919675588607788, "logps/chosen": -228.9661102294922, "logps/rejected": -218.33358764648438, "loss": 0.0262, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.036130357533693314, "rewards/margins": 0.07571340352296829, "rewards/rejected": -0.1118437647819519, "step": 13050 }, { "epoch": 0.85, "learning_rate": 3.156458123336478e-07, "logits/chosen": -2.1235363483428955, "logits/rejected": -1.9482349157333374, "logps/chosen": -164.78628540039062, "logps/rejected": -178.83383178710938, "loss": 0.0237, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03137214109301567, "rewards/margins": 0.12098171561956406, "rewards/rejected": -0.15235385298728943, "step": 13060 }, { "epoch": 0.86, "learning_rate": 3.128742673164459e-07, "logits/chosen": -2.3608107566833496, "logits/rejected": -1.9896495342254639, "logps/chosen": -290.57684326171875, "logps/rejected": -264.00274658203125, "loss": 0.0079, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.038247980177402496, "rewards/margins": 0.08970911800861359, "rewards/rejected": -0.12795709073543549, "step": 13070 }, { "epoch": 0.86, "learning_rate": 3.101141314184414e-07, "logits/chosen": -2.4731571674346924, "logits/rejected": -2.226579189300537, "logps/chosen": -212.8175506591797, "logps/rejected": -215.1665802001953, "loss": 0.0264, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03716491162776947, "rewards/margins": 0.05780158191919327, "rewards/rejected": -0.09496650099754333, "step": 13080 }, { "epoch": 0.86, "learning_rate": 3.0736541903785526e-07, "logits/chosen": -2.121049165725708, "logits/rejected": -2.087754487991333, "logps/chosen": -217.7719268798828, "logps/rejected": -284.319091796875, "loss": 0.0207, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04378505051136017, "rewards/margins": 0.0854378491640091, "rewards/rejected": -0.12922288477420807, "step": 13090 }, { "epoch": 0.86, "learning_rate": 3.0462814451331704e-07, "logits/chosen": -2.177192211151123, "logits/rejected": -2.042564630508423, "logps/chosen": -241.98666381835938, "logps/rejected": -251.2749481201172, "loss": 0.0338, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.056551218032836914, "rewards/margins": 0.04691624268889427, "rewards/rejected": -0.10346746444702148, "step": 13100 }, { "epoch": 0.86, "eval_logits/chosen": -2.282399892807007, "eval_logits/rejected": -2.094139337539673, "eval_logps/chosen": -240.08843994140625, "eval_logps/rejected": -236.25448608398438, "eval_loss": 0.02407999336719513, "eval_rewards/accuracies": 0.656499981880188, "eval_rewards/chosen": -0.04041757434606552, "eval_rewards/margins": 0.08279527723789215, "eval_rewards/rejected": -0.12321285903453827, "eval_runtime": 712.8135, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 13100 }, { "epoch": 0.86, "learning_rate": 3.019023221237927e-07, "logits/chosen": -2.239502429962158, "logits/rejected": -2.0651755332946777, "logps/chosen": -246.2601318359375, "logps/rejected": -205.74697875976562, "loss": 0.0222, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04763896390795708, "rewards/margins": 0.08527128398418427, "rewards/rejected": -0.13291025161743164, "step": 13110 }, { "epoch": 0.86, "learning_rate": 2.991879660885058e-07, "logits/chosen": -2.4021129608154297, "logits/rejected": -2.1448426246643066, "logps/chosen": -270.3570861816406, "logps/rejected": -270.1576843261719, "loss": 0.0313, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03405073657631874, "rewards/margins": 0.08080430328845978, "rewards/rejected": -0.11485502868890762, "step": 13120 }, { "epoch": 0.86, "learning_rate": 2.9648509056686786e-07, "logits/chosen": -2.305415630340576, "logits/rejected": -2.1397616863250732, "logps/chosen": -183.93411254882812, "logps/rejected": -175.04684448242188, "loss": 0.0292, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.03496173024177551, "rewards/margins": 0.08344221115112305, "rewards/rejected": -0.11840394884347916, "step": 13130 }, { "epoch": 0.86, "learning_rate": 2.937937096584012e-07, "logits/chosen": -2.29146146774292, "logits/rejected": -2.0643632411956787, "logps/chosen": -297.84332275390625, "logps/rejected": -251.4933624267578, "loss": 0.0271, "rewards/accuracies": 0.625, "rewards/chosen": -0.037211813032627106, "rewards/margins": 0.0706702172756195, "rewards/rejected": -0.10788202285766602, "step": 13140 }, { "epoch": 0.86, "learning_rate": 2.9111383740266756e-07, "logits/chosen": -2.090592622756958, "logits/rejected": -1.9605131149291992, "logps/chosen": -244.744140625, "logps/rejected": -249.68655395507812, "loss": 0.0227, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04479987546801567, "rewards/margins": 0.053652308881282806, "rewards/rejected": -0.09845219552516937, "step": 13150 }, { "epoch": 0.86, "learning_rate": 2.8844548777919255e-07, "logits/chosen": -2.3207592964172363, "logits/rejected": -2.0424532890319824, "logps/chosen": -210.184814453125, "logps/rejected": -202.9254150390625, "loss": 0.0247, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03150486573576927, "rewards/margins": 0.07510420680046082, "rewards/rejected": -0.10660906881093979, "step": 13160 }, { "epoch": 0.86, "learning_rate": 2.8578867470739594e-07, "logits/chosen": -2.156432628631592, "logits/rejected": -2.0161538124084473, "logps/chosen": -197.3763427734375, "logps/rejected": -185.967041015625, "loss": 0.0517, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06443998962640762, "rewards/margins": 0.10006703436374664, "rewards/rejected": -0.16450701653957367, "step": 13170 }, { "epoch": 0.86, "learning_rate": 2.8314341204651484e-07, "logits/chosen": -2.3970794677734375, "logits/rejected": -2.125443458557129, "logps/chosen": -281.5930480957031, "logps/rejected": -231.6305694580078, "loss": 0.0152, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.027769574895501137, "rewards/margins": 0.11410045623779297, "rewards/rejected": -0.14187002182006836, "step": 13180 }, { "epoch": 0.86, "learning_rate": 2.805097135955362e-07, "logits/chosen": -2.310490131378174, "logits/rejected": -2.1002745628356934, "logps/chosen": -219.2947235107422, "logps/rejected": -205.0678253173828, "loss": 0.0379, "rewards/accuracies": 0.5, "rewards/chosen": -0.03385355696082115, "rewards/margins": 0.10349483788013458, "rewards/rejected": -0.13734838366508484, "step": 13190 }, { "epoch": 0.86, "learning_rate": 2.778875930931213e-07, "logits/chosen": -2.306821346282959, "logits/rejected": -1.9826276302337646, "logps/chosen": -242.89950561523438, "logps/rejected": -245.67117309570312, "loss": 0.0206, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03890315443277359, "rewards/margins": 0.10066906362771988, "rewards/rejected": -0.13957220315933228, "step": 13200 }, { "epoch": 0.86, "eval_logits/chosen": -2.2772462368011475, "eval_logits/rejected": -2.089167833328247, "eval_logps/chosen": -240.58746337890625, "eval_logps/rejected": -237.21768188476562, "eval_loss": 0.024047939106822014, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -0.042912621051073074, "eval_rewards/margins": 0.08511631935834885, "eval_rewards/rejected": -0.12802892923355103, "eval_runtime": 714.205, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 13200 }, { "epoch": 0.86, "learning_rate": 2.7527706421753426e-07, "logits/chosen": -2.278181791305542, "logits/rejected": -2.2129273414611816, "logps/chosen": -208.23129272460938, "logps/rejected": -224.854736328125, "loss": 0.0276, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04915057495236397, "rewards/margins": 0.060673534870147705, "rewards/rejected": -0.10982410609722137, "step": 13210 }, { "epoch": 0.86, "learning_rate": 2.726781405865736e-07, "logits/chosen": -2.3695473670959473, "logits/rejected": -1.8260514736175537, "logps/chosen": -309.7107849121094, "logps/rejected": -211.94125366210938, "loss": 0.0176, "rewards/accuracies": 0.625, "rewards/chosen": -0.03522497043013573, "rewards/margins": 0.08999510854482651, "rewards/rejected": -0.12522009015083313, "step": 13220 }, { "epoch": 0.87, "learning_rate": 2.7009083575749687e-07, "logits/chosen": -2.2610929012298584, "logits/rejected": -2.1646924018859863, "logps/chosen": -252.8301544189453, "logps/rejected": -263.22113037109375, "loss": 0.0156, "rewards/accuracies": 0.625, "rewards/chosen": -0.044849202036857605, "rewards/margins": 0.06013251468539238, "rewards/rejected": -0.10498170554637909, "step": 13230 }, { "epoch": 0.87, "learning_rate": 2.6751516322695457e-07, "logits/chosen": -2.3331539630889893, "logits/rejected": -2.2672178745269775, "logps/chosen": -198.41842651367188, "logps/rejected": -202.80697631835938, "loss": 0.0251, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04087433964014053, "rewards/margins": 0.05214228481054306, "rewards/rejected": -0.09301663935184479, "step": 13240 }, { "epoch": 0.87, "learning_rate": 2.649511364309154e-07, "logits/chosen": -2.27077317237854, "logits/rejected": -2.239917278289795, "logps/chosen": -210.52523803710938, "logps/rejected": -208.5736083984375, "loss": 0.0057, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04117783159017563, "rewards/margins": 0.09464763104915619, "rewards/rejected": -0.13582547008991241, "step": 13250 }, { "epoch": 0.87, "learning_rate": 2.6239876874460003e-07, "logits/chosen": -2.3841896057128906, "logits/rejected": -2.257284641265869, "logps/chosen": -290.65576171875, "logps/rejected": -285.47900390625, "loss": 0.0196, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.031202878803014755, "rewards/margins": 0.12689706683158875, "rewards/rejected": -0.158099964261055, "step": 13260 }, { "epoch": 0.87, "learning_rate": 2.5985807348240744e-07, "logits/chosen": -2.424506187438965, "logits/rejected": -1.9588611125946045, "logps/chosen": -238.301025390625, "logps/rejected": -216.34521484375, "loss": 0.0144, "rewards/accuracies": 0.75, "rewards/chosen": -0.02978738211095333, "rewards/margins": 0.12995637953281403, "rewards/rejected": -0.1597437560558319, "step": 13270 }, { "epoch": 0.87, "learning_rate": 2.5732906389785014e-07, "logits/chosen": -2.312986373901367, "logits/rejected": -2.1459386348724365, "logps/chosen": -282.345703125, "logps/rejected": -268.94635009765625, "loss": 0.0139, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.023198014125227928, "rewards/margins": 0.12373118102550507, "rewards/rejected": -0.14692921936511993, "step": 13280 }, { "epoch": 0.87, "learning_rate": 2.5481175318347956e-07, "logits/chosen": -2.174574375152588, "logits/rejected": -2.2210919857025146, "logps/chosen": -233.5167999267578, "logps/rejected": -270.05572509765625, "loss": 0.0204, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.030095994472503662, "rewards/margins": 0.09321445971727371, "rewards/rejected": -0.12331046164035797, "step": 13290 }, { "epoch": 0.87, "learning_rate": 2.5230615447082246e-07, "logits/chosen": -2.273481845855713, "logits/rejected": -1.9318599700927734, "logps/chosen": -260.14398193359375, "logps/rejected": -260.80560302734375, "loss": 0.018, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04413662105798721, "rewards/margins": 0.07723399251699448, "rewards/rejected": -0.1213705986738205, "step": 13300 }, { "epoch": 0.87, "eval_logits/chosen": -2.277225971221924, "eval_logits/rejected": -2.0891315937042236, "eval_logps/chosen": -240.14222717285156, "eval_logps/rejected": -236.75955200195312, "eval_loss": 0.02404077909886837, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -0.04068637639284134, "eval_rewards/margins": 0.08505190163850784, "eval_rewards/rejected": -0.12573827803134918, "eval_runtime": 714.5476, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 13300 }, { "epoch": 0.87, "learning_rate": 2.49812280830308e-07, "logits/chosen": -2.300873279571533, "logits/rejected": -1.8354320526123047, "logps/chosen": -232.4656219482422, "logps/rejected": -232.950927734375, "loss": 0.0207, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03714224696159363, "rewards/margins": 0.17670652270317078, "rewards/rejected": -0.2138487547636032, "step": 13310 }, { "epoch": 0.87, "learning_rate": 2.4733014527120457e-07, "logits/chosen": -2.1679153442382812, "logits/rejected": -1.9986671209335327, "logps/chosen": -221.49765014648438, "logps/rejected": -219.9002685546875, "loss": 0.0274, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0954136997461319, "rewards/margins": 0.10647950321435928, "rewards/rejected": -0.20189321041107178, "step": 13320 }, { "epoch": 0.87, "learning_rate": 2.4485976074154565e-07, "logits/chosen": -2.2446699142456055, "logits/rejected": -2.301520824432373, "logps/chosen": -219.3531036376953, "logps/rejected": -253.1564483642578, "loss": 0.0207, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.05177539587020874, "rewards/margins": 0.018800964578986168, "rewards/rejected": -0.07057635486125946, "step": 13330 }, { "epoch": 0.87, "learning_rate": 2.4240114012806763e-07, "logits/chosen": -2.2601559162139893, "logits/rejected": -2.2380449771881104, "logps/chosen": -212.53067016601562, "logps/rejected": -209.5550994873047, "loss": 0.0197, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.023662324994802475, "rewards/margins": 0.07047148048877716, "rewards/rejected": -0.09413380920886993, "step": 13340 }, { "epoch": 0.87, "learning_rate": 2.399542962561399e-07, "logits/chosen": -2.1661453247070312, "logits/rejected": -2.0010826587677, "logps/chosen": -230.67544555664062, "logps/rejected": -205.7180938720703, "loss": 0.0274, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02609751746058464, "rewards/margins": 0.11225984990596771, "rewards/rejected": -0.13835737109184265, "step": 13350 }, { "epoch": 0.87, "learning_rate": 2.3751924188969876e-07, "logits/chosen": -2.2163939476013184, "logits/rejected": -2.0882577896118164, "logps/chosen": -256.12921142578125, "logps/rejected": -258.23223876953125, "loss": 0.0171, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.021953338757157326, "rewards/margins": 0.10968828201293945, "rewards/rejected": -0.13164159655570984, "step": 13360 }, { "epoch": 0.87, "learning_rate": 2.3509598973118024e-07, "logits/chosen": -2.409304141998291, "logits/rejected": -2.2119812965393066, "logps/chosen": -228.95950317382812, "logps/rejected": -176.398193359375, "loss": 0.015, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.039770763367414474, "rewards/margins": 0.05113250017166138, "rewards/rejected": -0.09090326726436615, "step": 13370 }, { "epoch": 0.88, "learning_rate": 2.326845524214555e-07, "logits/chosen": -2.0703043937683105, "logits/rejected": -2.148268938064575, "logps/chosen": -245.6635284423828, "logps/rejected": -221.56747436523438, "loss": 0.034, "rewards/accuracies": 0.5, "rewards/chosen": -0.053046077489852905, "rewards/margins": 0.010143243707716465, "rewards/rejected": -0.0631893128156662, "step": 13380 }, { "epoch": 0.88, "learning_rate": 2.3028494253976158e-07, "logits/chosen": -2.3399901390075684, "logits/rejected": -2.1317601203918457, "logps/chosen": -353.18402099609375, "logps/rejected": -301.57366943359375, "loss": 0.0209, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.059592921286821365, "rewards/margins": 0.06485487520694733, "rewards/rejected": -0.12444780021905899, "step": 13390 }, { "epoch": 0.88, "learning_rate": 2.2789717260364026e-07, "logits/chosen": -2.3331074714660645, "logits/rejected": -2.1523356437683105, "logps/chosen": -172.91854858398438, "logps/rejected": -164.48684692382812, "loss": 0.0275, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03906229883432388, "rewards/margins": 0.0664801225066185, "rewards/rejected": -0.10554243624210358, "step": 13400 }, { "epoch": 0.88, "eval_logits/chosen": -2.278562545776367, "eval_logits/rejected": -2.0904037952423096, "eval_logps/chosen": -239.84494018554688, "eval_logps/rejected": -236.29257202148438, "eval_loss": 0.02403143234550953, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.039199963212013245, "eval_rewards/margins": 0.08420341461896896, "eval_rewards/rejected": -0.12340336292982101, "eval_runtime": 713.6647, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 13400 }, { "epoch": 0.88, "learning_rate": 2.255212550688682e-07, "logits/chosen": -2.233186721801758, "logits/rejected": -2.340898036956787, "logps/chosen": -227.12271118164062, "logps/rejected": -309.49151611328125, "loss": 0.0252, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0458383783698082, "rewards/margins": 0.09091077744960785, "rewards/rejected": -0.13674917817115784, "step": 13410 }, { "epoch": 0.88, "learning_rate": 2.2315720232939598e-07, "logits/chosen": -2.600893497467041, "logits/rejected": -2.138629913330078, "logps/chosen": -265.4658203125, "logps/rejected": -198.0146026611328, "loss": 0.0157, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.016760537400841713, "rewards/margins": 0.11075560748577118, "rewards/rejected": -0.12751615047454834, "step": 13420 }, { "epoch": 0.88, "learning_rate": 2.2080502671727956e-07, "logits/chosen": -2.3849570751190186, "logits/rejected": -2.043117046356201, "logps/chosen": -224.8113555908203, "logps/rejected": -219.0714874267578, "loss": 0.0282, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02364751696586609, "rewards/margins": 0.08043906092643738, "rewards/rejected": -0.10408657789230347, "step": 13430 }, { "epoch": 0.88, "learning_rate": 2.1846474050262078e-07, "logits/chosen": -2.350308656692505, "logits/rejected": -2.2130496501922607, "logps/chosen": -252.57907104492188, "logps/rejected": -198.87066650390625, "loss": 0.0115, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.030091604217886925, "rewards/margins": 0.06044364720582962, "rewards/rejected": -0.09053526073694229, "step": 13440 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.924780249595642, "logits/rejected": -2.009662628173828, "logps/chosen": -202.04293823242188, "logps/rejected": -251.4788055419922, "loss": 0.0402, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.024398323148489, "rewards/margins": 0.10957573354244232, "rewards/rejected": -0.13397404551506042, "step": 13450 }, { "epoch": 0.88, "learning_rate": 2.1381988503590578e-07, "logits/chosen": -2.011870861053467, "logits/rejected": -2.077077627182007, "logps/chosen": -223.63961791992188, "logps/rejected": -238.5012664794922, "loss": 0.0164, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03883880376815796, "rewards/margins": 0.10505743324756622, "rewards/rejected": -0.14389625191688538, "step": 13460 }, { "epoch": 0.88, "learning_rate": 2.11515340013691e-07, "logits/chosen": -2.3660213947296143, "logits/rejected": -2.3374831676483154, "logps/chosen": -238.0471954345703, "logps/rejected": -241.2670440673828, "loss": 0.0171, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03437312692403793, "rewards/margins": 0.12400054931640625, "rewards/rejected": -0.1583736687898636, "step": 13470 }, { "epoch": 0.88, "learning_rate": 2.092227328484897e-07, "logits/chosen": -2.1448683738708496, "logits/rejected": -2.1051604747772217, "logps/chosen": -211.29458618164062, "logps/rejected": -262.7167663574219, "loss": 0.0152, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03642933443188667, "rewards/margins": 0.1038234680891037, "rewards/rejected": -0.14025278389453888, "step": 13480 }, { "epoch": 0.88, "learning_rate": 2.0694207549966345e-07, "logits/chosen": -2.1631648540496826, "logits/rejected": -2.0374045372009277, "logps/chosen": -221.1369171142578, "logps/rejected": -214.2423095703125, "loss": 0.0332, "rewards/accuracies": 0.625, "rewards/chosen": -0.061142198741436005, "rewards/margins": 0.04987434297800064, "rewards/rejected": -0.11101654917001724, "step": 13490 }, { "epoch": 0.88, "learning_rate": 2.0467337986423864e-07, "logits/chosen": -2.4196977615356445, "logits/rejected": -2.1247169971466064, "logps/chosen": -310.6087341308594, "logps/rejected": -289.87506103515625, "loss": 0.0177, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0244259275496006, "rewards/margins": 0.062395643442869186, "rewards/rejected": -0.08682157099246979, "step": 13500 }, { "epoch": 0.88, "eval_logits/chosen": -2.2791824340820312, "eval_logits/rejected": -2.0911219120025635, "eval_logps/chosen": -239.38253784179688, "eval_logps/rejected": -235.622314453125, "eval_loss": 0.024019325152039528, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.03688788786530495, "eval_rewards/margins": 0.08316419273614883, "eval_rewards/rejected": -0.12005206942558289, "eval_runtime": 713.7035, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 13500 }, { "epoch": 0.88, "learning_rate": 2.0241665777684272e-07, "logits/chosen": -2.3379337787628174, "logits/rejected": -2.2434744834899902, "logps/chosen": -273.8184509277344, "logps/rejected": -255.4578094482422, "loss": 0.0209, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.027627144008874893, "rewards/margins": 0.12627311050891876, "rewards/rejected": -0.15390026569366455, "step": 13510 }, { "epoch": 0.88, "learning_rate": 2.0017192100964366e-07, "logits/chosen": -1.9843534231185913, "logits/rejected": -2.065035820007324, "logps/chosen": -211.2195587158203, "logps/rejected": -229.91024780273438, "loss": 0.0204, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05631772428750992, "rewards/margins": 0.08286778628826141, "rewards/rejected": -0.13918551802635193, "step": 13520 }, { "epoch": 0.89, "learning_rate": 1.9793918127228777e-07, "logits/chosen": -2.366363048553467, "logits/rejected": -1.9975332021713257, "logps/chosen": -326.14263916015625, "logps/rejected": -289.92840576171875, "loss": 0.0234, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04921586066484451, "rewards/margins": 0.08933909237384796, "rewards/rejected": -0.13855496048927307, "step": 13530 }, { "epoch": 0.89, "learning_rate": 1.9571845021184005e-07, "logits/chosen": -2.1216652393341064, "logits/rejected": -2.031754732131958, "logps/chosen": -243.87771606445312, "logps/rejected": -265.47723388671875, "loss": 0.026, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06445915251970291, "rewards/margins": 0.08740756660699844, "rewards/rejected": -0.15186671912670135, "step": 13540 }, { "epoch": 0.89, "learning_rate": 1.9350973941272027e-07, "logits/chosen": -2.26531720161438, "logits/rejected": -2.232865810394287, "logps/chosen": -214.000732421875, "logps/rejected": -210.4711456298828, "loss": 0.033, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05112849920988083, "rewards/margins": 0.08485585451126099, "rewards/rejected": -0.1359843611717224, "step": 13550 }, { "epoch": 0.89, "learning_rate": 1.9131306039664676e-07, "logits/chosen": -2.1346435546875, "logits/rejected": -2.0782511234283447, "logps/chosen": -207.88265991210938, "logps/rejected": -251.60629272460938, "loss": 0.0484, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.039892952889204025, "rewards/margins": 0.09683619439601898, "rewards/rejected": -0.1367291510105133, "step": 13560 }, { "epoch": 0.89, "learning_rate": 1.8912842462257358e-07, "logits/chosen": -2.170205593109131, "logits/rejected": -2.098627805709839, "logps/chosen": -228.48764038085938, "logps/rejected": -234.42172241210938, "loss": 0.0362, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04159203916788101, "rewards/margins": 0.1146048903465271, "rewards/rejected": -0.1561969369649887, "step": 13570 }, { "epoch": 0.89, "learning_rate": 1.869558434866303e-07, "logits/chosen": -2.241560459136963, "logits/rejected": -2.308928966522217, "logps/chosen": -191.30880737304688, "logps/rejected": -229.52975463867188, "loss": 0.0319, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05109493061900139, "rewards/margins": 0.10095541179180145, "rewards/rejected": -0.15205034613609314, "step": 13580 }, { "epoch": 0.89, "learning_rate": 1.847953283220652e-07, "logits/chosen": -2.41326642036438, "logits/rejected": -2.0905160903930664, "logps/chosen": -262.574951171875, "logps/rejected": -211.3037567138672, "loss": 0.0165, "rewards/accuracies": 0.75, "rewards/chosen": -0.032326146960258484, "rewards/margins": 0.13682501018047333, "rewards/rejected": -0.1691511571407318, "step": 13590 }, { "epoch": 0.89, "learning_rate": 1.8264689039918265e-07, "logits/chosen": -2.3743739128112793, "logits/rejected": -2.0387189388275146, "logps/chosen": -269.67041015625, "logps/rejected": -256.69189453125, "loss": 0.0225, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04920024052262306, "rewards/margins": 0.07223823666572571, "rewards/rejected": -0.12143848091363907, "step": 13600 }, { "epoch": 0.89, "eval_logits/chosen": -2.2793779373168945, "eval_logits/rejected": -2.091268539428711, "eval_logps/chosen": -240.11476135253906, "eval_logps/rejected": -236.71998596191406, "eval_loss": 0.0239734910428524, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.04054900258779526, "eval_rewards/margins": 0.08499140292406082, "eval_rewards/rejected": -0.12554040551185608, "eval_runtime": 714.3747, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 13600 }, { "epoch": 0.89, "learning_rate": 1.8051054092528857e-07, "logits/chosen": -2.308885097503662, "logits/rejected": -2.1212146282196045, "logps/chosen": -265.5684509277344, "logps/rejected": -278.5885925292969, "loss": 0.021, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.022335032001137733, "rewards/margins": 0.1139020100235939, "rewards/rejected": -0.13623705506324768, "step": 13610 }, { "epoch": 0.89, "learning_rate": 1.783862910446271e-07, "logits/chosen": -1.9275972843170166, "logits/rejected": -2.0645546913146973, "logps/chosen": -182.4210205078125, "logps/rejected": -196.45352172851562, "loss": 0.0311, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04783863574266434, "rewards/margins": 0.11673679202795029, "rewards/rejected": -0.16457542777061462, "step": 13620 }, { "epoch": 0.89, "learning_rate": 1.762741518383271e-07, "logits/chosen": -2.326813220977783, "logits/rejected": -2.162519693374634, "logps/chosen": -230.1579132080078, "logps/rejected": -220.95681762695312, "loss": 0.0211, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.041627801954746246, "rewards/margins": 0.09231483936309814, "rewards/rejected": -0.133942648768425, "step": 13630 }, { "epoch": 0.89, "learning_rate": 1.7417413432434082e-07, "logits/chosen": -2.386035680770874, "logits/rejected": -1.9975239038467407, "logps/chosen": -263.8116149902344, "logps/rejected": -224.295166015625, "loss": 0.0433, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05575961619615555, "rewards/margins": 0.06625467538833618, "rewards/rejected": -0.12201429903507233, "step": 13640 }, { "epoch": 0.89, "learning_rate": 1.7208624945738855e-07, "logits/chosen": -2.3942575454711914, "logits/rejected": -2.2442939281463623, "logps/chosen": -226.2176513671875, "logps/rejected": -243.75204467773438, "loss": 0.0193, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0406666025519371, "rewards/margins": 0.05048510432243347, "rewards/rejected": -0.09115169942378998, "step": 13650 }, { "epoch": 0.89, "learning_rate": 1.7001050812889995e-07, "logits/chosen": -2.3620524406433105, "logits/rejected": -2.045550584793091, "logps/chosen": -268.92852783203125, "logps/rejected": -246.66116333007812, "loss": 0.018, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0669785588979721, "rewards/margins": 0.08991777151823044, "rewards/rejected": -0.15689633786678314, "step": 13660 }, { "epoch": 0.89, "learning_rate": 1.679469211669596e-07, "logits/chosen": -2.283552646636963, "logits/rejected": -2.1251068115234375, "logps/chosen": -234.9169158935547, "logps/rejected": -205.94332885742188, "loss": 0.02, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04985884949564934, "rewards/margins": 0.11315397918224335, "rewards/rejected": -0.16301283240318298, "step": 13670 }, { "epoch": 0.9, "learning_rate": 1.6589549933624715e-07, "logits/chosen": -2.273036479949951, "logits/rejected": -2.088663101196289, "logps/chosen": -244.7873077392578, "logps/rejected": -223.2783203125, "loss": 0.0129, "rewards/accuracies": 0.75, "rewards/chosen": -0.022901328280568123, "rewards/margins": 0.14988216757774353, "rewards/rejected": -0.1727834939956665, "step": 13680 }, { "epoch": 0.9, "learning_rate": 1.638562533379845e-07, "logits/chosen": -2.278625726699829, "logits/rejected": -2.108764886856079, "logps/chosen": -261.9084167480469, "logps/rejected": -212.7818603515625, "loss": 0.0259, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.031029045581817627, "rewards/margins": 0.06570714712142944, "rewards/rejected": -0.09673619270324707, "step": 13690 }, { "epoch": 0.9, "learning_rate": 1.6182919380987676e-07, "logits/chosen": -2.323634386062622, "logits/rejected": -2.239675521850586, "logps/chosen": -234.4893798828125, "logps/rejected": -231.24038696289062, "loss": 0.0223, "rewards/accuracies": 0.625, "rewards/chosen": -0.043856892734766006, "rewards/margins": 0.060640860348939896, "rewards/rejected": -0.1044977456331253, "step": 13700 }, { "epoch": 0.9, "eval_logits/chosen": -2.280345916748047, "eval_logits/rejected": -2.0922813415527344, "eval_logps/chosen": -240.45132446289062, "eval_logps/rejected": -236.97463989257812, "eval_loss": 0.02395368367433548, "eval_rewards/accuracies": 0.659500002861023, "eval_rewards/chosen": -0.042231932282447815, "eval_rewards/margins": 0.08458175510168076, "eval_rewards/rejected": -0.12681369483470917, "eval_runtime": 713.0169, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 13700 }, { "epoch": 0.9, "learning_rate": 1.598143313260603e-07, "logits/chosen": -2.233161687850952, "logits/rejected": -2.139432907104492, "logps/chosen": -195.6060333251953, "logps/rejected": -198.994873046875, "loss": 0.0342, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03776133805513382, "rewards/margins": 0.0770169347524643, "rewards/rejected": -0.11477828025817871, "step": 13710 }, { "epoch": 0.9, "learning_rate": 1.5781167639704415e-07, "logits/chosen": -2.469926357269287, "logits/rejected": -1.927030324935913, "logps/chosen": -338.1827697753906, "logps/rejected": -227.13156127929688, "loss": 0.0243, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.030080029740929604, "rewards/margins": 0.06596101820468903, "rewards/rejected": -0.09604103863239288, "step": 13720 }, { "epoch": 0.9, "learning_rate": 1.5582123946965787e-07, "logits/chosen": -2.1400372982025146, "logits/rejected": -2.0245399475097656, "logps/chosen": -241.9009552001953, "logps/rejected": -268.08343505859375, "loss": 0.035, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.037529777735471725, "rewards/margins": 0.08017268031835556, "rewards/rejected": -0.11770244687795639, "step": 13730 }, { "epoch": 0.9, "learning_rate": 1.5384303092699504e-07, "logits/chosen": -2.3461155891418457, "logits/rejected": -2.112164258956909, "logps/chosen": -294.508056640625, "logps/rejected": -317.8929138183594, "loss": 0.0107, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.037617024034261703, "rewards/margins": 0.12533004581928253, "rewards/rejected": -0.16294705867767334, "step": 13740 }, { "epoch": 0.9, "learning_rate": 1.518770610883613e-07, "logits/chosen": -2.215355396270752, "logits/rejected": -1.9471629858016968, "logps/chosen": -228.7562255859375, "logps/rejected": -224.67568969726562, "loss": 0.0184, "rewards/accuracies": 0.75, "rewards/chosen": -0.07108782231807709, "rewards/margins": 0.12666651606559753, "rewards/rejected": -0.19775435328483582, "step": 13750 }, { "epoch": 0.9, "learning_rate": 1.4992334020921735e-07, "logits/chosen": -2.228881359100342, "logits/rejected": -2.126504898071289, "logps/chosen": -177.85769653320312, "logps/rejected": -172.0975341796875, "loss": 0.022, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.02442639134824276, "rewards/margins": 0.11926829814910889, "rewards/rejected": -0.1436946839094162, "step": 13760 }, { "epoch": 0.9, "learning_rate": 1.4798187848112905e-07, "logits/chosen": -2.1605820655822754, "logits/rejected": -2.1533656120300293, "logps/chosen": -240.3361358642578, "logps/rejected": -220.5252227783203, "loss": 0.0203, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08682699501514435, "rewards/margins": 0.10523271560668945, "rewards/rejected": -0.1920597106218338, "step": 13770 }, { "epoch": 0.9, "learning_rate": 1.460526860317113e-07, "logits/chosen": -2.348702907562256, "logits/rejected": -2.2805120944976807, "logps/chosen": -182.96865844726562, "logps/rejected": -242.5914306640625, "loss": 0.0231, "rewards/accuracies": 0.75, "rewards/chosen": -0.048420049250125885, "rewards/margins": 0.1393798291683197, "rewards/rejected": -0.187799870967865, "step": 13780 }, { "epoch": 0.9, "learning_rate": 1.441357729245771e-07, "logits/chosen": -2.472022533416748, "logits/rejected": -1.9027433395385742, "logps/chosen": -261.91046142578125, "logps/rejected": -224.39181518554688, "loss": 0.0166, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06959304213523865, "rewards/margins": 0.09234293550252914, "rewards/rejected": -0.1619359850883484, "step": 13790 }, { "epoch": 0.9, "learning_rate": 1.4223114915928482e-07, "logits/chosen": -2.1173558235168457, "logits/rejected": -1.8709170818328857, "logps/chosen": -236.102294921875, "logps/rejected": -256.97296142578125, "loss": 0.0302, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05089854449033737, "rewards/margins": 0.07051734626293182, "rewards/rejected": -0.1214158907532692, "step": 13800 }, { "epoch": 0.9, "eval_logits/chosen": -2.2779958248138428, "eval_logits/rejected": -2.0899648666381836, "eval_logps/chosen": -240.32012939453125, "eval_logps/rejected": -237.0576629638672, "eval_loss": 0.023951876908540726, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.04157585650682449, "eval_rewards/margins": 0.08565285801887512, "eval_rewards/rejected": -0.12722869217395782, "eval_runtime": 713.4223, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 13800 }, { "epoch": 0.9, "learning_rate": 1.403388246712842e-07, "logits/chosen": -2.1812937259674072, "logits/rejected": -1.9446184635162354, "logps/chosen": -176.65940856933594, "logps/rejected": -180.36642456054688, "loss": 0.0253, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.062041789293289185, "rewards/margins": 0.054293982684612274, "rewards/rejected": -0.11633577197790146, "step": 13810 }, { "epoch": 0.9, "learning_rate": 1.3845880933186757e-07, "logits/chosen": -2.4402499198913574, "logits/rejected": -2.179344654083252, "logps/chosen": -248.98678588867188, "logps/rejected": -221.80526733398438, "loss": 0.0273, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05124008655548096, "rewards/margins": 0.03527190536260605, "rewards/rejected": -0.08651198446750641, "step": 13820 }, { "epoch": 0.9, "learning_rate": 1.3659111294811457e-07, "logits/chosen": -2.282447099685669, "logits/rejected": -2.141117572784424, "logps/chosen": -205.77249145507812, "logps/rejected": -201.51483154296875, "loss": 0.0285, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06964652985334396, "rewards/margins": 0.06819276511669159, "rewards/rejected": -0.13783928751945496, "step": 13830 }, { "epoch": 0.91, "learning_rate": 1.347357452628459e-07, "logits/chosen": -2.437429666519165, "logits/rejected": -2.303067207336426, "logps/chosen": -251.4400634765625, "logps/rejected": -259.0691223144531, "loss": 0.0325, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.030712375417351723, "rewards/margins": 0.07565386593341827, "rewards/rejected": -0.10636623948812485, "step": 13840 }, { "epoch": 0.91, "learning_rate": 1.3289271595456732e-07, "logits/chosen": -2.1983304023742676, "logits/rejected": -2.010070323944092, "logps/chosen": -218.0074005126953, "logps/rejected": -214.27383422851562, "loss": 0.0156, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06839106976985931, "rewards/margins": 0.11030055582523346, "rewards/rejected": -0.17869162559509277, "step": 13850 }, { "epoch": 0.91, "learning_rate": 1.310620346374228e-07, "logits/chosen": -2.110769271850586, "logits/rejected": -1.9821794033050537, "logps/chosen": -241.5878143310547, "logps/rejected": -227.92190551757812, "loss": 0.0163, "rewards/accuracies": 0.625, "rewards/chosen": -0.046829283237457275, "rewards/margins": 0.11568622291088104, "rewards/rejected": -0.1625155210494995, "step": 13860 }, { "epoch": 0.91, "learning_rate": 1.2924371086114274e-07, "logits/chosen": -2.171700954437256, "logits/rejected": -1.9256553649902344, "logps/chosen": -246.22705078125, "logps/rejected": -256.12603759765625, "loss": 0.0128, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.050827883183956146, "rewards/margins": 0.07962900400161743, "rewards/rejected": -0.13045687973499298, "step": 13870 }, { "epoch": 0.91, "learning_rate": 1.274377541109953e-07, "logits/chosen": -2.12762451171875, "logits/rejected": -2.201578140258789, "logps/chosen": -174.74855041503906, "logps/rejected": -267.5423889160156, "loss": 0.0203, "rewards/accuracies": 0.5, "rewards/chosen": -0.05199012905359268, "rewards/margins": 0.07435055077075958, "rewards/rejected": -0.12634067237377167, "step": 13880 }, { "epoch": 0.91, "learning_rate": 1.2564417380773435e-07, "logits/chosen": -2.055483102798462, "logits/rejected": -1.917754888534546, "logps/chosen": -188.5877227783203, "logps/rejected": -231.0465545654297, "loss": 0.0292, "rewards/accuracies": 0.625, "rewards/chosen": -0.050453364849090576, "rewards/margins": 0.09988425672054291, "rewards/rejected": -0.1503376066684723, "step": 13890 }, { "epoch": 0.91, "learning_rate": 1.2386297930755436e-07, "logits/chosen": -2.2837576866149902, "logits/rejected": -2.2889015674591064, "logps/chosen": -264.87750244140625, "logps/rejected": -278.1776123046875, "loss": 0.0213, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08495865762233734, "rewards/margins": 0.09063265472650528, "rewards/rejected": -0.17559130489826202, "step": 13900 }, { "epoch": 0.91, "eval_logits/chosen": -2.276700258255005, "eval_logits/rejected": -2.0887749195098877, "eval_logps/chosen": -240.15423583984375, "eval_logps/rejected": -236.94261169433594, "eval_loss": 0.02391652762889862, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.040746383368968964, "eval_rewards/margins": 0.08590715378522873, "eval_rewards/rejected": -0.1266535371541977, "eval_runtime": 712.968, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 13900 }, { "epoch": 0.91, "learning_rate": 1.220941799020378e-07, "logits/chosen": -2.0645687580108643, "logits/rejected": -1.9768037796020508, "logps/chosen": -226.5297393798828, "logps/rejected": -222.92886352539062, "loss": 0.0194, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03724323958158493, "rewards/margins": 0.09375981986522675, "rewards/rejected": -0.13100305199623108, "step": 13910 }, { "epoch": 0.91, "learning_rate": 1.2033778481810975e-07, "logits/chosen": -2.334246873855591, "logits/rejected": -2.0699946880340576, "logps/chosen": -225.9147186279297, "logps/rejected": -209.2530975341797, "loss": 0.0348, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02960597537457943, "rewards/margins": 0.10342290252447128, "rewards/rejected": -0.13302887976169586, "step": 13920 }, { "epoch": 0.91, "learning_rate": 1.1859380321798591e-07, "logits/chosen": -2.2709996700286865, "logits/rejected": -2.342074394226074, "logps/chosen": -212.2975616455078, "logps/rejected": -241.0948486328125, "loss": 0.0208, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.048681434243917465, "rewards/margins": 0.07026176899671555, "rewards/rejected": -0.11894319206476212, "step": 13930 }, { "epoch": 0.91, "learning_rate": 1.1686224419912989e-07, "logits/chosen": -2.1778564453125, "logits/rejected": -1.9656540155410767, "logps/chosen": -263.8016662597656, "logps/rejected": -259.34735107421875, "loss": 0.0151, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05464515835046768, "rewards/margins": 0.12418242543935776, "rewards/rejected": -0.17882758378982544, "step": 13940 }, { "epoch": 0.91, "learning_rate": 1.1514311679420104e-07, "logits/chosen": -1.9906097650527954, "logits/rejected": -2.0624940395355225, "logps/chosen": -166.02975463867188, "logps/rejected": -243.86508178710938, "loss": 0.0254, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0544060580432415, "rewards/margins": 0.11007817834615707, "rewards/rejected": -0.16448423266410828, "step": 13950 }, { "epoch": 0.91, "learning_rate": 1.1343642997101029e-07, "logits/chosen": -2.269257068634033, "logits/rejected": -2.173360824584961, "logps/chosen": -209.3166961669922, "logps/rejected": -215.50198364257812, "loss": 0.0273, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0348169282078743, "rewards/margins": 0.09703563153743744, "rewards/rejected": -0.13185256719589233, "step": 13960 }, { "epoch": 0.91, "learning_rate": 1.1174219263247188e-07, "logits/chosen": -2.010667324066162, "logits/rejected": -1.900843620300293, "logps/chosen": -209.64718627929688, "logps/rejected": -211.0970001220703, "loss": 0.0188, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.054148148745298386, "rewards/margins": 0.094904825091362, "rewards/rejected": -0.1490529626607895, "step": 13970 }, { "epoch": 0.91, "learning_rate": 1.1006041361655839e-07, "logits/chosen": -2.4445457458496094, "logits/rejected": -1.978326439857483, "logps/chosen": -219.8248748779297, "logps/rejected": -193.75340270996094, "loss": 0.0262, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05693792551755905, "rewards/margins": 0.06933034956455231, "rewards/rejected": -0.12626829743385315, "step": 13980 }, { "epoch": 0.92, "learning_rate": 1.0839110169625189e-07, "logits/chosen": -2.005495548248291, "logits/rejected": -2.297020435333252, "logps/chosen": -217.3983917236328, "logps/rejected": -237.1815643310547, "loss": 0.0231, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.052780430763959885, "rewards/margins": 0.1356925070285797, "rewards/rejected": -0.1884729266166687, "step": 13990 }, { "epoch": 0.92, "learning_rate": 1.06734265579502e-07, "logits/chosen": -2.2928407192230225, "logits/rejected": -1.987210988998413, "logps/chosen": -273.75787353515625, "logps/rejected": -230.35916137695312, "loss": 0.0221, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06120569631457329, "rewards/margins": 0.11553256213665009, "rewards/rejected": -0.17673827707767487, "step": 14000 }, { "epoch": 0.92, "eval_logits/chosen": -2.277212381362915, "eval_logits/rejected": -2.089181900024414, "eval_logps/chosen": -240.49688720703125, "eval_logps/rejected": -237.35060119628906, "eval_loss": 0.023917585611343384, "eval_rewards/accuracies": 0.659500002861023, "eval_rewards/chosen": -0.042459722608327866, "eval_rewards/margins": 0.08623373508453369, "eval_rewards/rejected": -0.12869346141815186, "eval_runtime": 714.107, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 14000 }, { "epoch": 0.92, "learning_rate": 1.050899139091771e-07, "logits/chosen": -2.370375394821167, "logits/rejected": -2.018911838531494, "logps/chosen": -289.21685791015625, "logps/rejected": -260.20703125, "loss": 0.0266, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05294713377952576, "rewards/margins": 0.08754151314496994, "rewards/rejected": -0.1404886543750763, "step": 14010 }, { "epoch": 0.92, "learning_rate": 1.0345805526302072e-07, "logits/chosen": -2.194415807723999, "logits/rejected": -2.2927064895629883, "logps/chosen": -210.08584594726562, "logps/rejected": -221.80447387695312, "loss": 0.0167, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.044617511332035065, "rewards/margins": 0.09478165954351425, "rewards/rejected": -0.1393991857767105, "step": 14020 }, { "epoch": 0.92, "learning_rate": 1.0183869815360764e-07, "logits/chosen": -2.188875913619995, "logits/rejected": -2.306763172149658, "logps/chosen": -199.89414978027344, "logps/rejected": -245.0305938720703, "loss": 0.0175, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.039903342723846436, "rewards/margins": 0.06248168274760246, "rewards/rejected": -0.1023850217461586, "step": 14030 }, { "epoch": 0.92, "learning_rate": 1.0023185102829763e-07, "logits/chosen": -1.9965900182724, "logits/rejected": -2.2122974395751953, "logps/chosen": -239.11306762695312, "logps/rejected": -262.6435546875, "loss": 0.0188, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0414542555809021, "rewards/margins": 0.0967288389801979, "rewards/rejected": -0.1381830871105194, "step": 14040 }, { "epoch": 0.92, "learning_rate": 9.863752226919182e-08, "logits/chosen": -2.1965866088867188, "logits/rejected": -1.7160733938217163, "logps/chosen": -244.0505828857422, "logps/rejected": -205.0543212890625, "loss": 0.0302, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03159191086888313, "rewards/margins": 0.13291195034980774, "rewards/rejected": -0.16450384259223938, "step": 14050 }, { "epoch": 0.92, "learning_rate": 9.705572019309107e-08, "logits/chosen": -2.124981164932251, "logits/rejected": -2.1576197147369385, "logps/chosen": -277.95843505859375, "logps/rejected": -269.6822509765625, "loss": 0.0196, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.049655042588710785, "rewards/margins": 0.11603889614343643, "rewards/rejected": -0.16569393873214722, "step": 14060 }, { "epoch": 0.92, "learning_rate": 9.548645305144849e-08, "logits/chosen": -2.3364295959472656, "logits/rejected": -2.1991865634918213, "logps/chosen": -180.5296173095703, "logps/rejected": -202.05340576171875, "loss": 0.0254, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.03517225757241249, "rewards/margins": 0.08775045722723007, "rewards/rejected": -0.12292270362377167, "step": 14070 }, { "epoch": 0.92, "learning_rate": 9.392972903033149e-08, "logits/chosen": -2.2558159828186035, "logits/rejected": -2.1181063652038574, "logps/chosen": -231.4455108642578, "logps/rejected": -232.4448699951172, "loss": 0.0158, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04157125949859619, "rewards/margins": 0.04271433874964714, "rewards/rejected": -0.08428559452295303, "step": 14080 }, { "epoch": 0.92, "learning_rate": 9.238555625037449e-08, "logits/chosen": -2.2797014713287354, "logits/rejected": -2.06451153755188, "logps/chosen": -200.78018188476562, "logps/rejected": -184.2545928955078, "loss": 0.0155, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05035996437072754, "rewards/margins": 0.06578092277050018, "rewards/rejected": -0.11614088714122772, "step": 14090 }, { "epoch": 0.92, "learning_rate": 9.085394276673903e-08, "logits/chosen": -2.2840874195098877, "logits/rejected": -1.9881312847137451, "logps/chosen": -278.541015625, "logps/rejected": -282.0232849121094, "loss": 0.0259, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05187432840466499, "rewards/margins": 0.08965910971164703, "rewards/rejected": -0.14153344929218292, "step": 14100 }, { "epoch": 0.92, "eval_logits/chosen": -2.2771763801574707, "eval_logits/rejected": -2.0892348289489746, "eval_logps/chosen": -240.22537231445312, "eval_logps/rejected": -236.93740844726562, "eval_loss": 0.023912038654088974, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.04110207408666611, "eval_rewards/margins": 0.08552539348602295, "eval_rewards/rejected": -0.12662747502326965, "eval_runtime": 710.9583, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.407, "step": 14100 }, { "epoch": 0.92, "learning_rate": 8.933489656907157e-08, "logits/chosen": -2.263106107711792, "logits/rejected": -2.1606152057647705, "logps/chosen": -227.7388153076172, "logps/rejected": -264.6552429199219, "loss": 0.027, "rewards/accuracies": 0.625, "rewards/chosen": -0.05201687291264534, "rewards/margins": 0.05988026782870293, "rewards/rejected": -0.11189714819192886, "step": 14110 }, { "epoch": 0.92, "learning_rate": 8.782842558146127e-08, "logits/chosen": -2.3026959896087646, "logits/rejected": -2.231685161590576, "logps/chosen": -166.2057647705078, "logps/rejected": -186.4104766845703, "loss": 0.0333, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0216091126203537, "rewards/margins": 0.10955610126256943, "rewards/rejected": -0.13116520643234253, "step": 14120 }, { "epoch": 0.92, "learning_rate": 8.633453766239836e-08, "logits/chosen": -2.3711798191070557, "logits/rejected": -2.1423401832580566, "logps/chosen": -238.1058349609375, "logps/rejected": -218.2222442626953, "loss": 0.0117, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.014923456124961376, "rewards/margins": 0.05985826998949051, "rewards/rejected": -0.07478172332048416, "step": 14130 }, { "epoch": 0.93, "learning_rate": 8.485324060473448e-08, "logits/chosen": -2.2005109786987305, "logits/rejected": -2.0876071453094482, "logps/chosen": -246.9003448486328, "logps/rejected": -247.547119140625, "loss": 0.0137, "rewards/accuracies": 0.625, "rewards/chosen": -0.03309129923582077, "rewards/margins": 0.06518785655498505, "rewards/rejected": -0.09827915579080582, "step": 14140 }, { "epoch": 0.93, "learning_rate": 8.338454213564052e-08, "logits/chosen": -2.246474266052246, "logits/rejected": -1.9718784093856812, "logps/chosen": -241.40493774414062, "logps/rejected": -243.92190551757812, "loss": 0.0272, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05253978818655014, "rewards/margins": 0.09855607897043228, "rewards/rejected": -0.15109586715698242, "step": 14150 }, { "epoch": 0.93, "learning_rate": 8.192844991656679e-08, "logits/chosen": -2.235311985015869, "logits/rejected": -1.9973386526107788, "logps/chosen": -249.0086669921875, "logps/rejected": -228.0577392578125, "loss": 0.0238, "rewards/accuracies": 0.625, "rewards/chosen": -0.0514599084854126, "rewards/margins": 0.07728545367717743, "rewards/rejected": -0.12874536216259003, "step": 14160 }, { "epoch": 0.93, "learning_rate": 8.048497154320434e-08, "logits/chosen": -2.2661285400390625, "logits/rejected": -2.312032699584961, "logps/chosen": -142.10989379882812, "logps/rejected": -165.83572387695312, "loss": 0.0229, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.06280183792114258, "rewards/margins": 0.07314186543226242, "rewards/rejected": -0.1359437108039856, "step": 14170 }, { "epoch": 0.93, "learning_rate": 7.905411454544265e-08, "logits/chosen": -2.281566619873047, "logits/rejected": -2.133256196975708, "logps/chosen": -247.7585906982422, "logps/rejected": -265.89141845703125, "loss": 0.0337, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05627186968922615, "rewards/margins": 0.06695263832807541, "rewards/rejected": -0.12322449684143066, "step": 14180 }, { "epoch": 0.93, "learning_rate": 7.763588638733332e-08, "logits/chosen": -2.294487714767456, "logits/rejected": -2.249011516571045, "logps/chosen": -269.75885009765625, "logps/rejected": -266.6964416503906, "loss": 0.0229, "rewards/accuracies": 0.625, "rewards/chosen": -0.03669353947043419, "rewards/margins": 0.08380848169326782, "rewards/rejected": -0.12050201743841171, "step": 14190 }, { "epoch": 0.93, "learning_rate": 7.623029446704899e-08, "logits/chosen": -2.154623508453369, "logits/rejected": -2.328273296356201, "logps/chosen": -317.51373291015625, "logps/rejected": -303.58099365234375, "loss": 0.0156, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.03036128357052803, "rewards/margins": 0.114809550344944, "rewards/rejected": -0.14517082273960114, "step": 14200 }, { "epoch": 0.93, "eval_logits/chosen": -2.2770776748657227, "eval_logits/rejected": -2.089095115661621, "eval_logps/chosen": -240.37933349609375, "eval_logps/rejected": -237.17074584960938, "eval_loss": 0.023934001103043556, "eval_rewards/accuracies": 0.6614999771118164, "eval_rewards/chosen": -0.04187189042568207, "eval_rewards/margins": 0.08592244237661362, "eval_rewards/rejected": -0.1277943253517151, "eval_runtime": 715.6797, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.397, "step": 14200 }, { "epoch": 0.93, "learning_rate": 7.483734611684557e-08, "logits/chosen": -2.0783333778381348, "logits/rejected": -1.9120200872421265, "logps/chosen": -264.9306640625, "logps/rejected": -227.62576293945312, "loss": 0.0429, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04022422060370445, "rewards/margins": 0.08428023010492325, "rewards/rejected": -0.12450442463159561, "step": 14210 }, { "epoch": 0.93, "learning_rate": 7.345704860302366e-08, "logits/chosen": -2.3603944778442383, "logits/rejected": -2.3407979011535645, "logps/chosen": -257.5492858886719, "logps/rejected": -275.9205322265625, "loss": 0.0135, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04648297280073166, "rewards/margins": 0.09524835646152496, "rewards/rejected": -0.14173133671283722, "step": 14220 }, { "epoch": 0.93, "learning_rate": 7.208940912589224e-08, "logits/chosen": -2.283950090408325, "logits/rejected": -1.9505077600479126, "logps/chosen": -223.5438995361328, "logps/rejected": -212.1008758544922, "loss": 0.0269, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06978797167539597, "rewards/margins": 0.12967699766159058, "rewards/rejected": -0.19946496188640594, "step": 14230 }, { "epoch": 0.93, "learning_rate": 7.073443481972753e-08, "logits/chosen": -2.0981688499450684, "logits/rejected": -2.0736212730407715, "logps/chosen": -194.62655639648438, "logps/rejected": -231.03488159179688, "loss": 0.01, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.052353955805301666, "rewards/margins": 0.08453324437141418, "rewards/rejected": -0.13688719272613525, "step": 14240 }, { "epoch": 0.93, "learning_rate": 6.939213275274027e-08, "logits/chosen": -2.2323665618896484, "logits/rejected": -2.1960151195526123, "logps/chosen": -250.4298858642578, "logps/rejected": -243.5245819091797, "loss": 0.0232, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05588046833872795, "rewards/margins": 0.06348704546689987, "rewards/rejected": -0.11936751753091812, "step": 14250 }, { "epoch": 0.93, "learning_rate": 6.806250992703461e-08, "logits/chosen": -2.259444236755371, "logits/rejected": -2.1075503826141357, "logps/chosen": -224.3917694091797, "logps/rejected": -212.67379760742188, "loss": 0.026, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0384858064353466, "rewards/margins": 0.07246101647615433, "rewards/rejected": -0.11094681918621063, "step": 14260 }, { "epoch": 0.93, "learning_rate": 6.674557327857572e-08, "logits/chosen": -2.253323793411255, "logits/rejected": -2.262890100479126, "logps/chosen": -261.03839111328125, "logps/rejected": -276.0207214355469, "loss": 0.0235, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.033838655799627304, "rewards/margins": 0.1203516274690628, "rewards/rejected": -0.1541902720928192, "step": 14270 }, { "epoch": 0.93, "learning_rate": 6.544132967714917e-08, "logits/chosen": -2.0000040531158447, "logits/rejected": -2.0072243213653564, "logps/chosen": -259.1932678222656, "logps/rejected": -270.4208068847656, "loss": 0.0185, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.061666447669267654, "rewards/margins": 0.12469998747110367, "rewards/rejected": -0.18636643886566162, "step": 14280 }, { "epoch": 0.93, "learning_rate": 6.414978592632932e-08, "logits/chosen": -2.3282947540283203, "logits/rejected": -1.893397331237793, "logps/chosen": -271.5886535644531, "logps/rejected": -242.21109008789062, "loss": 0.0127, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.052446603775024414, "rewards/margins": 0.08538929373025894, "rewards/rejected": -0.13783589005470276, "step": 14290 }, { "epoch": 0.94, "learning_rate": 6.287094876344046e-08, "logits/chosen": -2.2830162048339844, "logits/rejected": -2.3124501705169678, "logps/chosen": -175.96054077148438, "logps/rejected": -198.17027282714844, "loss": 0.0158, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.017701296135783195, "rewards/margins": 0.07785354554653168, "rewards/rejected": -0.09555485099554062, "step": 14300 }, { "epoch": 0.94, "eval_logits/chosen": -2.2765443325042725, "eval_logits/rejected": -2.0886929035186768, "eval_logps/chosen": -240.28904724121094, "eval_logps/rejected": -237.00119018554688, "eval_loss": 0.023930862545967102, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -0.04142040014266968, "eval_rewards/margins": 0.0855260118842125, "eval_rewards/rejected": -0.12694638967514038, "eval_runtime": 713.3157, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 14300 }, { "epoch": 0.94, "learning_rate": 6.160482485952413e-08, "logits/chosen": -2.423222303390503, "logits/rejected": -2.1630914211273193, "logps/chosen": -248.218994140625, "logps/rejected": -230.79067993164062, "loss": 0.025, "rewards/accuracies": 0.625, "rewards/chosen": -0.05884624272584915, "rewards/margins": 0.07183768600225449, "rewards/rejected": -0.13068392872810364, "step": 14310 }, { "epoch": 0.94, "learning_rate": 6.035142081930234e-08, "logits/chosen": -2.288935661315918, "logits/rejected": -1.9192240238189697, "logps/chosen": -274.0880432128906, "logps/rejected": -213.8658447265625, "loss": 0.0178, "rewards/accuracies": 0.75, "rewards/chosen": -0.05645836517214775, "rewards/margins": 0.08397369086742401, "rewards/rejected": -0.14043205976486206, "step": 14320 }, { "epoch": 0.94, "learning_rate": 5.911074318114496e-08, "logits/chosen": -2.117149591445923, "logits/rejected": -2.226193428039551, "logps/chosen": -212.69583129882812, "logps/rejected": -271.21246337890625, "loss": 0.0138, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04553840309381485, "rewards/margins": 0.0819590762257576, "rewards/rejected": -0.12749749422073364, "step": 14330 }, { "epoch": 0.94, "learning_rate": 5.788279841703381e-08, "logits/chosen": -2.3232810497283936, "logits/rejected": -2.0692296028137207, "logps/chosen": -191.612060546875, "logps/rejected": -203.80642700195312, "loss": 0.024, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03851230815052986, "rewards/margins": 0.09930738061666489, "rewards/rejected": -0.13781967759132385, "step": 14340 }, { "epoch": 0.94, "learning_rate": 5.66675929325311e-08, "logits/chosen": -2.321681499481201, "logits/rejected": -2.07851505279541, "logps/chosen": -229.58871459960938, "logps/rejected": -227.32705688476562, "loss": 0.0155, "rewards/accuracies": 0.625, "rewards/chosen": -0.04462137073278427, "rewards/margins": 0.04543551802635193, "rewards/rejected": -0.0900568813085556, "step": 14350 }, { "epoch": 0.94, "learning_rate": 5.546513306674301e-08, "logits/chosen": -2.2515957355499268, "logits/rejected": -1.8618234395980835, "logps/chosen": -287.59844970703125, "logps/rejected": -232.39102172851562, "loss": 0.0239, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.045409008860588074, "rewards/margins": 0.09937725961208344, "rewards/rejected": -0.1447862684726715, "step": 14360 }, { "epoch": 0.94, "learning_rate": 5.4275425092290004e-08, "logits/chosen": -2.382500410079956, "logits/rejected": -2.3284032344818115, "logps/chosen": -260.4015197753906, "logps/rejected": -260.8076171875, "loss": 0.0243, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.02802688255906105, "rewards/margins": 0.09424374997615814, "rewards/rejected": -0.1222706288099289, "step": 14370 }, { "epoch": 0.94, "learning_rate": 5.309847521527078e-08, "logits/chosen": -2.2202110290527344, "logits/rejected": -1.856729507446289, "logps/chosen": -293.5809631347656, "logps/rejected": -266.0841369628906, "loss": 0.0229, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04503798484802246, "rewards/margins": 0.07715752720832825, "rewards/rejected": -0.12219550460577011, "step": 14380 }, { "epoch": 0.94, "learning_rate": 5.1934289575233385e-08, "logits/chosen": -2.094172954559326, "logits/rejected": -1.7569259405136108, "logps/chosen": -250.232421875, "logps/rejected": -235.7849578857422, "loss": 0.0272, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05351518467068672, "rewards/margins": 0.09959940612316132, "rewards/rejected": -0.15311458706855774, "step": 14390 }, { "epoch": 0.94, "learning_rate": 5.078287424513994e-08, "logits/chosen": -2.34496808052063, "logits/rejected": -2.232339382171631, "logps/chosen": -280.80596923828125, "logps/rejected": -227.66921997070312, "loss": 0.0216, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05379108712077141, "rewards/margins": 0.10846780240535736, "rewards/rejected": -0.16225889325141907, "step": 14400 }, { "epoch": 0.94, "eval_logits/chosen": -2.2774136066436768, "eval_logits/rejected": -2.0895285606384277, "eval_logps/chosen": -240.2555694580078, "eval_logps/rejected": -236.98165893554688, "eval_loss": 0.02393474243581295, "eval_rewards/accuracies": 0.6620000004768372, "eval_rewards/chosen": -0.04125319421291351, "eval_rewards/margins": 0.08559557795524597, "eval_rewards/rejected": -0.12684877216815948, "eval_runtime": 713.2387, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 14400 }, { "epoch": 0.94, "learning_rate": 4.964423523133671e-08, "logits/chosen": -2.353637456893921, "logits/rejected": -2.108694314956665, "logps/chosen": -223.6641082763672, "logps/rejected": -203.7133026123047, "loss": 0.0262, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.02966316044330597, "rewards/margins": 0.07199189066886902, "rewards/rejected": -0.10165505111217499, "step": 14410 }, { "epoch": 0.94, "learning_rate": 4.8518378473522976e-08, "logits/chosen": -2.210888624191284, "logits/rejected": -2.0540719032287598, "logps/chosen": -260.373291015625, "logps/rejected": -270.9510192871094, "loss": 0.0279, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.049317214637994766, "rewards/margins": 0.08652471750974655, "rewards/rejected": -0.13584193587303162, "step": 14420 }, { "epoch": 0.94, "learning_rate": 4.7405309844718584e-08, "logits/chosen": -2.1370275020599365, "logits/rejected": -2.032277822494507, "logps/chosen": -204.3740692138672, "logps/rejected": -238.6953582763672, "loss": 0.0243, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.059375204145908356, "rewards/margins": 0.12562525272369385, "rewards/rejected": -0.1850004643201828, "step": 14430 }, { "epoch": 0.94, "learning_rate": 4.630503515123508e-08, "logits/chosen": -2.3820743560791016, "logits/rejected": -2.08414888381958, "logps/chosen": -211.52529907226562, "logps/rejected": -179.07192993164062, "loss": 0.0283, "rewards/accuracies": 0.625, "rewards/chosen": -0.0532965362071991, "rewards/margins": 0.08706490695476532, "rewards/rejected": -0.14036144316196442, "step": 14440 }, { "epoch": 0.95, "learning_rate": 4.5217560132644056e-08, "logits/chosen": -2.2008249759674072, "logits/rejected": -2.1269989013671875, "logps/chosen": -159.01589965820312, "logps/rejected": -192.65785217285156, "loss": 0.0564, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.041981421411037445, "rewards/margins": 0.07620520889759064, "rewards/rejected": -0.11818663775920868, "step": 14450 }, { "epoch": 0.95, "learning_rate": 4.41428904617483e-08, "logits/chosen": -2.236302614212036, "logits/rejected": -2.231376886367798, "logps/chosen": -187.8321533203125, "logps/rejected": -207.3106689453125, "loss": 0.0367, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05361725017428398, "rewards/margins": 0.07409012317657471, "rewards/rejected": -0.12770739197731018, "step": 14460 }, { "epoch": 0.95, "learning_rate": 4.3081031744550696e-08, "logits/chosen": -2.3412978649139404, "logits/rejected": -2.2665255069732666, "logps/chosen": -258.1219482421875, "logps/rejected": -254.7415313720703, "loss": 0.0316, "rewards/accuracies": 0.625, "rewards/chosen": -0.024883154779672623, "rewards/margins": 0.09473639726638794, "rewards/rejected": -0.11961954832077026, "step": 14470 }, { "epoch": 0.95, "learning_rate": 4.2031989520227025e-08, "logits/chosen": -2.316408634185791, "logits/rejected": -2.113938808441162, "logps/chosen": -235.3325653076172, "logps/rejected": -227.225830078125, "loss": 0.0082, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03509070724248886, "rewards/margins": 0.0689038410782814, "rewards/rejected": -0.10399456322193146, "step": 14480 }, { "epoch": 0.95, "learning_rate": 4.099576926109461e-08, "logits/chosen": -2.3996028900146484, "logits/rejected": -1.8967199325561523, "logps/chosen": -246.0100555419922, "logps/rejected": -184.26046752929688, "loss": 0.0251, "rewards/accuracies": 0.625, "rewards/chosen": -0.03114110231399536, "rewards/margins": 0.0870242789387703, "rewards/rejected": -0.11816537380218506, "step": 14490 }, { "epoch": 0.95, "learning_rate": 3.997237637258705e-08, "logits/chosen": -2.230151891708374, "logits/rejected": -2.2772998809814453, "logps/chosen": -321.3664855957031, "logps/rejected": -295.66204833984375, "loss": 0.0126, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.020565873011946678, "rewards/margins": 0.08819369971752167, "rewards/rejected": -0.10875958204269409, "step": 14500 }, { "epoch": 0.95, "eval_logits/chosen": -2.277437925338745, "eval_logits/rejected": -2.0894877910614014, "eval_logps/chosen": -240.2699432373047, "eval_logps/rejected": -237.0005340576172, "eval_loss": 0.023945190012454987, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.04132496565580368, "eval_rewards/margins": 0.08561818301677704, "eval_rewards/rejected": -0.12694314122200012, "eval_runtime": 712.8613, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 14500 }, { "epoch": 0.95, "learning_rate": 3.8961816193222035e-08, "logits/chosen": -2.370445966720581, "logits/rejected": -2.1521902084350586, "logps/chosen": -251.1434326171875, "logps/rejected": -203.28396606445312, "loss": 0.0278, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07646572589874268, "rewards/margins": 0.04736895114183426, "rewards/rejected": -0.12383468449115753, "step": 14510 }, { "epoch": 0.95, "learning_rate": 3.79640939945769e-08, "logits/chosen": -2.332946300506592, "logits/rejected": -2.2011606693267822, "logps/chosen": -288.9212341308594, "logps/rejected": -211.8104248046875, "loss": 0.0074, "rewards/accuracies": 0.5, "rewards/chosen": -0.013501740992069244, "rewards/margins": 0.04576627165079117, "rewards/rejected": -0.05926801636815071, "step": 14520 }, { "epoch": 0.95, "learning_rate": 3.697921498125895e-08, "logits/chosen": -2.0662643909454346, "logits/rejected": -2.1631338596343994, "logps/chosen": -224.1737060546875, "logps/rejected": -247.8341522216797, "loss": 0.0171, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07110340148210526, "rewards/margins": 0.08890338987112045, "rewards/rejected": -0.1600067913532257, "step": 14530 }, { "epoch": 0.95, "learning_rate": 3.6007184290880456e-08, "logits/chosen": -2.2737791538238525, "logits/rejected": -2.1759960651397705, "logps/chosen": -224.28488159179688, "logps/rejected": -222.465576171875, "loss": 0.0443, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.061559468507766724, "rewards/margins": 0.07422170042991638, "rewards/rejected": -0.1357811540365219, "step": 14540 }, { "epoch": 0.95, "learning_rate": 3.504800699402872e-08, "logits/chosen": -2.5146634578704834, "logits/rejected": -2.2180233001708984, "logps/chosen": -351.6441345214844, "logps/rejected": -287.64080810546875, "loss": 0.0109, "rewards/accuracies": 0.625, "rewards/chosen": -0.029949486255645752, "rewards/margins": 0.04681790992617607, "rewards/rejected": -0.07676739990711212, "step": 14550 }, { "epoch": 0.95, "learning_rate": 3.4101688094242967e-08, "logits/chosen": -2.218144178390503, "logits/rejected": -2.109200954437256, "logps/chosen": -285.6780090332031, "logps/rejected": -283.11676025390625, "loss": 0.0429, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08058507740497589, "rewards/margins": 0.11307475715875626, "rewards/rejected": -0.19365984201431274, "step": 14560 }, { "epoch": 0.95, "learning_rate": 3.3168232527985564e-08, "logits/chosen": -2.1541683673858643, "logits/rejected": -1.8619887828826904, "logps/chosen": -252.0712432861328, "logps/rejected": -214.01773071289062, "loss": 0.0285, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05226341634988785, "rewards/margins": 0.08348184078931808, "rewards/rejected": -0.13574525713920593, "step": 14570 }, { "epoch": 0.95, "learning_rate": 3.224764516461892e-08, "logits/chosen": -2.2693839073181152, "logits/rejected": -2.088949680328369, "logps/chosen": -259.0687561035156, "logps/rejected": -251.11636352539062, "loss": 0.0169, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.02034943364560604, "rewards/margins": 0.11398156732320786, "rewards/rejected": -0.13433101773262024, "step": 14580 }, { "epoch": 0.95, "learning_rate": 3.133993080637665e-08, "logits/chosen": -2.2507710456848145, "logits/rejected": -2.0576610565185547, "logps/chosen": -212.4413299560547, "logps/rejected": -220.51016235351562, "loss": 0.022, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.047802168875932693, "rewards/margins": 0.10239864885807037, "rewards/rejected": -0.15020082890987396, "step": 14590 }, { "epoch": 0.96, "learning_rate": 3.0445094188342186e-08, "logits/chosen": -2.0587873458862305, "logits/rejected": -1.7677072286605835, "logps/chosen": -262.41070556640625, "logps/rejected": -200.29734802246094, "loss": 0.0346, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04467066004872322, "rewards/margins": 0.09075422585010529, "rewards/rejected": -0.1354248821735382, "step": 14600 }, { "epoch": 0.96, "eval_logits/chosen": -2.278109312057495, "eval_logits/rejected": -2.0901198387145996, "eval_logps/chosen": -240.32408142089844, "eval_logps/rejected": -236.98968505859375, "eval_loss": 0.02390436641871929, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -0.04159563407301903, "eval_rewards/margins": 0.08529327809810638, "eval_rewards/rejected": -0.1268889158964157, "eval_runtime": 711.7023, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 14600 }, { "epoch": 0.96, "learning_rate": 2.9563139978421028e-08, "logits/chosen": -2.1933608055114746, "logits/rejected": -2.2148003578186035, "logps/chosen": -232.57373046875, "logps/rejected": -236.6164093017578, "loss": 0.0102, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.033509157598018646, "rewards/margins": 0.059135984629392624, "rewards/rejected": -0.09264513850212097, "step": 14610 }, { "epoch": 0.96, "learning_rate": 2.869407277731939e-08, "logits/chosen": -2.1482250690460205, "logits/rejected": -2.0788064002990723, "logps/chosen": -192.88778686523438, "logps/rejected": -185.89356994628906, "loss": 0.0167, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.038153309375047684, "rewards/margins": 0.08087687939405441, "rewards/rejected": -0.11903019994497299, "step": 14620 }, { "epoch": 0.96, "learning_rate": 2.783789711851642e-08, "logits/chosen": -2.28196120262146, "logits/rejected": -2.0531458854675293, "logps/chosen": -167.2568359375, "logps/rejected": -168.91690063476562, "loss": 0.0185, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.027029190212488174, "rewards/margins": 0.12046368420124054, "rewards/rejected": -0.14749285578727722, "step": 14630 }, { "epoch": 0.96, "learning_rate": 2.6994617468244778e-08, "logits/chosen": -2.3830745220184326, "logits/rejected": -1.9102243185043335, "logps/chosen": -220.4351348876953, "logps/rejected": -178.19236755371094, "loss": 0.0223, "rewards/accuracies": 0.75, "rewards/chosen": -0.03707236796617508, "rewards/margins": 0.10837974399328232, "rewards/rejected": -0.1454521119594574, "step": 14640 }, { "epoch": 0.96, "learning_rate": 2.6164238225463155e-08, "logits/chosen": -2.2158422470092773, "logits/rejected": -1.8621547222137451, "logps/chosen": -291.20098876953125, "logps/rejected": -232.3519744873047, "loss": 0.0387, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04386935755610466, "rewards/margins": 0.09829467535018921, "rewards/rejected": -0.14216403663158417, "step": 14650 }, { "epoch": 0.96, "learning_rate": 2.534676372183742e-08, "logits/chosen": -2.2373509407043457, "logits/rejected": -2.0849971771240234, "logps/chosen": -293.45819091796875, "logps/rejected": -254.5859832763672, "loss": 0.0315, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.047519437968730927, "rewards/margins": 0.06727181375026703, "rewards/rejected": -0.11479125171899796, "step": 14660 }, { "epoch": 0.96, "learning_rate": 2.4542198221714218e-08, "logits/chosen": -2.1163833141326904, "logits/rejected": -1.8461744785308838, "logps/chosen": -146.51657104492188, "logps/rejected": -166.13450622558594, "loss": 0.025, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.042048048228025436, "rewards/margins": 0.09695516526699066, "rewards/rejected": -0.1390032321214676, "step": 14670 }, { "epoch": 0.96, "learning_rate": 2.3750545922101854e-08, "logits/chosen": -2.5506093502044678, "logits/rejected": -2.1625025272369385, "logps/chosen": -318.39263916015625, "logps/rejected": -267.0853576660156, "loss": 0.0337, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.045007385313510895, "rewards/margins": 0.08543379604816437, "rewards/rejected": -0.13044118881225586, "step": 14680 }, { "epoch": 0.96, "learning_rate": 2.2971810952646112e-08, "logits/chosen": -2.264488458633423, "logits/rejected": -2.1718125343322754, "logps/chosen": -268.1809997558594, "logps/rejected": -230.80789184570312, "loss": 0.0309, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04909127205610275, "rewards/margins": 0.051663417369127274, "rewards/rejected": -0.10075469315052032, "step": 14690 }, { "epoch": 0.96, "learning_rate": 2.2205997375610576e-08, "logits/chosen": -2.094313383102417, "logits/rejected": -2.0372653007507324, "logps/chosen": -189.8638458251953, "logps/rejected": -215.89511108398438, "loss": 0.0225, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.021789545193314552, "rewards/margins": 0.1043560728430748, "rewards/rejected": -0.126145601272583, "step": 14700 }, { "epoch": 0.96, "eval_logits/chosen": -2.2773990631103516, "eval_logits/rejected": -2.089484453201294, "eval_logps/chosen": -240.30162048339844, "eval_logps/rejected": -236.9473114013672, "eval_loss": 0.023914767429232597, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.041483353823423386, "eval_rewards/margins": 0.08519367128610611, "eval_rewards/rejected": -0.126677006483078, "eval_runtime": 714.1783, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 14700 }, { "epoch": 0.96, "learning_rate": 2.1453109185853304e-08, "logits/chosen": -2.3149261474609375, "logits/rejected": -2.2594079971313477, "logps/chosen": -207.83242797851562, "logps/rejected": -227.0337371826172, "loss": 0.0423, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.02876967191696167, "rewards/margins": 0.08727029711008072, "rewards/rejected": -0.11603996902704239, "step": 14710 }, { "epoch": 0.96, "learning_rate": 2.0713150310808784e-08, "logits/chosen": -2.0834505558013916, "logits/rejected": -2.402132272720337, "logps/chosen": -238.4170684814453, "logps/rejected": -250.8238067626953, "loss": 0.0205, "rewards/accuracies": 0.5, "rewards/chosen": -0.06464622169733047, "rewards/margins": 0.0332169234752655, "rewards/rejected": -0.09786313772201538, "step": 14720 }, { "epoch": 0.96, "learning_rate": 1.9986124610464064e-08, "logits/chosen": -2.1681363582611084, "logits/rejected": -1.8642688989639282, "logps/chosen": -296.47052001953125, "logps/rejected": -253.28335571289062, "loss": 0.0169, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.046201933175325394, "rewards/margins": 0.13312575221061707, "rewards/rejected": -0.17932769656181335, "step": 14730 }, { "epoch": 0.96, "learning_rate": 1.927203587734211e-08, "logits/chosen": -2.1540091037750244, "logits/rejected": -1.766482949256897, "logps/chosen": -259.76141357421875, "logps/rejected": -228.9178924560547, "loss": 0.0347, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03384867683053017, "rewards/margins": 0.08841460943222046, "rewards/rejected": -0.12226328998804092, "step": 14740 }, { "epoch": 0.97, "learning_rate": 1.8570887836479034e-08, "logits/chosen": -2.269257068634033, "logits/rejected": -2.0660524368286133, "logps/chosen": -206.35147094726562, "logps/rejected": -273.08447265625, "loss": 0.0287, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06364154815673828, "rewards/margins": 0.06958835572004318, "rewards/rejected": -0.13322989642620087, "step": 14750 }, { "epoch": 0.97, "learning_rate": 1.7882684145406616e-08, "logits/chosen": -2.339402198791504, "logits/rejected": -2.27685284614563, "logps/chosen": -301.135009765625, "logps/rejected": -314.385986328125, "loss": 0.0238, "rewards/accuracies": 0.625, "rewards/chosen": -0.023571869358420372, "rewards/margins": 0.07569929957389832, "rewards/rejected": -0.09927116334438324, "step": 14760 }, { "epoch": 0.97, "learning_rate": 1.7207428394132865e-08, "logits/chosen": -2.4952762126922607, "logits/rejected": -2.0110652446746826, "logps/chosen": -277.0386962890625, "logps/rejected": -247.00454711914062, "loss": 0.0123, "rewards/accuracies": 0.75, "rewards/chosen": -0.03629208356142044, "rewards/margins": 0.12326530367136002, "rewards/rejected": -0.15955738723278046, "step": 14770 }, { "epoch": 0.97, "learning_rate": 1.654512410512177e-08, "logits/chosen": -2.2198612689971924, "logits/rejected": -1.9765468835830688, "logps/chosen": -262.82794189453125, "logps/rejected": -211.95394897460938, "loss": 0.0392, "rewards/accuracies": 0.625, "rewards/chosen": -0.04625507816672325, "rewards/margins": 0.05757225677371025, "rewards/rejected": -0.1038273423910141, "step": 14780 }, { "epoch": 0.97, "learning_rate": 1.5895774733277468e-08, "logits/chosen": -2.2779715061187744, "logits/rejected": -1.9392287731170654, "logps/chosen": -285.97137451171875, "logps/rejected": -254.766845703125, "loss": 0.0127, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.030772119760513306, "rewards/margins": 0.09515996277332306, "rewards/rejected": -0.12593206763267517, "step": 14790 }, { "epoch": 0.97, "learning_rate": 1.5259383665924e-08, "logits/chosen": -2.547119140625, "logits/rejected": -2.155965805053711, "logps/chosen": -345.37567138671875, "logps/rejected": -267.6927490234375, "loss": 0.0099, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.016418835148215294, "rewards/margins": 0.08162996172904968, "rewards/rejected": -0.09804878383874893, "step": 14800 }, { "epoch": 0.97, "eval_logits/chosen": -2.2770659923553467, "eval_logits/rejected": -2.089148759841919, "eval_logps/chosen": -240.30921936035156, "eval_logps/rejected": -236.97500610351562, "eval_loss": 0.023923002183437347, "eval_rewards/accuracies": 0.659500002861023, "eval_rewards/chosen": -0.041521333158016205, "eval_rewards/margins": 0.0852942168712616, "eval_rewards/rejected": -0.1268155574798584, "eval_runtime": 712.7523, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 14800 }, { "epoch": 0.97, "learning_rate": 1.4635954222789461e-08, "logits/chosen": -2.228029251098633, "logits/rejected": -2.14042592048645, "logps/chosen": -222.356201171875, "logps/rejected": -244.2379608154297, "loss": 0.0156, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.026243016123771667, "rewards/margins": 0.0838693231344223, "rewards/rejected": -0.11011233180761337, "step": 14810 }, { "epoch": 0.97, "learning_rate": 1.402548965598688e-08, "logits/chosen": -2.1659653186798096, "logits/rejected": -2.252767562866211, "logps/chosen": -214.77322387695312, "logps/rejected": -222.8150634765625, "loss": 0.0124, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05072982236742973, "rewards/margins": 0.056291453540325165, "rewards/rejected": -0.1070212721824646, "step": 14820 }, { "epoch": 0.97, "learning_rate": 1.3427993149998375e-08, "logits/chosen": -2.4160420894622803, "logits/rejected": -2.1670732498168945, "logps/chosen": -248.33438110351562, "logps/rejected": -213.2601776123047, "loss": 0.0298, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.031486038118600845, "rewards/margins": 0.10147968679666519, "rewards/rejected": -0.13296571373939514, "step": 14830 }, { "epoch": 0.97, "learning_rate": 1.2843467821658518e-08, "logits/chosen": -2.3670287132263184, "logits/rejected": -2.2997143268585205, "logps/chosen": -229.97329711914062, "logps/rejected": -248.75820922851562, "loss": 0.017, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.018752047792077065, "rewards/margins": 0.09576685726642609, "rewards/rejected": -0.114518903195858, "step": 14840 }, { "epoch": 0.97, "learning_rate": 1.2271916720137666e-08, "logits/chosen": -2.486077308654785, "logits/rejected": -2.167175531387329, "logps/chosen": -296.70526123046875, "logps/rejected": -261.83966064453125, "loss": 0.0365, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.054678551852703094, "rewards/margins": 0.05634712427854538, "rewards/rejected": -0.11102566868066788, "step": 14850 }, { "epoch": 0.97, "learning_rate": 1.171334282692671e-08, "logits/chosen": -2.3231124877929688, "logits/rejected": -2.24255108833313, "logps/chosen": -291.93927001953125, "logps/rejected": -279.7203674316406, "loss": 0.0141, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04670420661568642, "rewards/margins": 0.09206493943929672, "rewards/rejected": -0.13876914978027344, "step": 14860 }, { "epoch": 0.97, "learning_rate": 1.116774905582041e-08, "logits/chosen": -2.3719723224639893, "logits/rejected": -2.0442380905151367, "logps/chosen": -192.50331115722656, "logps/rejected": -192.2175750732422, "loss": 0.0125, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.03842391073703766, "rewards/margins": 0.06208761781454086, "rewards/rejected": -0.10051152855157852, "step": 14870 }, { "epoch": 0.97, "learning_rate": 1.0635138252902966e-08, "logits/chosen": -2.4592127799987793, "logits/rejected": -2.2050700187683105, "logps/chosen": -238.54061889648438, "logps/rejected": -235.62460327148438, "loss": 0.0263, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.040633201599121094, "rewards/margins": 0.09606233984231949, "rewards/rejected": -0.13669554889202118, "step": 14880 }, { "epoch": 0.97, "learning_rate": 1.0115513196533589e-08, "logits/chosen": -2.2937681674957275, "logits/rejected": -2.1239376068115234, "logps/chosen": -268.3608093261719, "logps/rejected": -265.96392822265625, "loss": 0.0121, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02875324711203575, "rewards/margins": 0.07455660402774811, "rewards/rejected": -0.10330984741449356, "step": 14890 }, { "epoch": 0.97, "learning_rate": 9.608876597330952e-09, "logits/chosen": -2.3046488761901855, "logits/rejected": -1.9809150695800781, "logps/chosen": -301.057861328125, "logps/rejected": -309.0120849609375, "loss": 0.0235, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07217559963464737, "rewards/margins": 0.09819747507572174, "rewards/rejected": -0.1703730821609497, "step": 14900 }, { "epoch": 0.97, "eval_logits/chosen": -2.277737855911255, "eval_logits/rejected": -2.089785575866699, "eval_logps/chosen": -240.29910278320312, "eval_logps/rejected": -236.97601318359375, "eval_loss": 0.023935021832585335, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.041470736265182495, "eval_rewards/margins": 0.08534979820251465, "eval_rewards/rejected": -0.12682053446769714, "eval_runtime": 710.892, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.407, "step": 14900 }, { "epoch": 0.98, "learning_rate": 9.115231098159594e-09, "logits/chosen": -2.3617255687713623, "logits/rejected": -2.250319004058838, "logps/chosen": -262.0185852050781, "logps/rejected": -255.55947875976562, "loss": 0.0242, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04352415353059769, "rewards/margins": 0.06921429187059402, "rewards/rejected": -0.11273844540119171, "step": 14910 }, { "epoch": 0.98, "learning_rate": 8.634579274116317e-09, "logits/chosen": -2.211763858795166, "logits/rejected": -2.14729642868042, "logps/chosen": -196.95347595214844, "logps/rejected": -237.3124237060547, "loss": 0.0317, "rewards/accuracies": 0.625, "rewards/chosen": -0.03447120264172554, "rewards/margins": 0.09874703735113144, "rewards/rejected": -0.13321822881698608, "step": 14920 }, { "epoch": 0.98, "learning_rate": 8.166923632516865e-09, "logits/chosen": -2.392099618911743, "logits/rejected": -2.104140281677246, "logps/chosen": -235.0437774658203, "logps/rejected": -304.4234619140625, "loss": 0.0161, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0365629717707634, "rewards/margins": 0.1587628573179245, "rewards/rejected": -0.1953258067369461, "step": 14930 }, { "epoch": 0.98, "learning_rate": 7.712266612881492e-09, "logits/chosen": -2.130908250808716, "logits/rejected": -2.0000076293945312, "logps/chosen": -189.84274291992188, "logps/rejected": -202.4415740966797, "loss": 0.0312, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.023030346259474754, "rewards/margins": 0.08737320452928543, "rewards/rejected": -0.11040355265140533, "step": 14940 }, { "epoch": 0.98, "learning_rate": 7.270610586924687e-09, "logits/chosen": -2.409262180328369, "logits/rejected": -2.1712772846221924, "logps/chosen": -268.30853271484375, "logps/rejected": -239.8023223876953, "loss": 0.009, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.016716131940484047, "rewards/margins": 0.07787027209997177, "rewards/rejected": -0.09458640217781067, "step": 14950 }, { "epoch": 0.98, "learning_rate": 6.841957858539916e-09, "logits/chosen": -2.2161965370178223, "logits/rejected": -2.0737485885620117, "logps/chosen": -181.96450805664062, "logps/rejected": -205.32528686523438, "loss": 0.0355, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.07654479146003723, "rewards/margins": 0.06255006790161133, "rewards/rejected": -0.13909485936164856, "step": 14960 }, { "epoch": 0.98, "learning_rate": 6.426310663790181e-09, "logits/chosen": -2.1482691764831543, "logits/rejected": -2.04679536819458, "logps/chosen": -246.98974609375, "logps/rejected": -229.2218017578125, "loss": 0.0138, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.050069890916347504, "rewards/margins": 0.06850225478410721, "rewards/rejected": -0.11857215315103531, "step": 14970 }, { "epoch": 0.98, "learning_rate": 6.023671170894696e-09, "logits/chosen": -2.4767868518829346, "logits/rejected": -1.8887195587158203, "logps/chosen": -295.3521728515625, "logps/rejected": -234.2387237548828, "loss": 0.017, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.015118017792701721, "rewards/margins": 0.128330796957016, "rewards/rejected": -0.14344879984855652, "step": 14980 }, { "epoch": 0.98, "learning_rate": 5.634041480218344e-09, "logits/chosen": -2.4271140098571777, "logits/rejected": -2.0972952842712402, "logps/chosen": -259.35394287109375, "logps/rejected": -271.25103759765625, "loss": 0.0117, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.031401630491018295, "rewards/margins": 0.08168746531009674, "rewards/rejected": -0.11308909952640533, "step": 14990 }, { "epoch": 0.98, "learning_rate": 5.257423624260849e-09, "logits/chosen": -2.4722533226013184, "logits/rejected": -2.0414023399353027, "logps/chosen": -270.02227783203125, "logps/rejected": -240.05807495117188, "loss": 0.019, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03349440172314644, "rewards/margins": 0.06639524549245834, "rewards/rejected": -0.09988965094089508, "step": 15000 }, { "epoch": 0.98, "eval_logits/chosen": -2.2778244018554688, "eval_logits/rejected": -2.089890718460083, "eval_logps/chosen": -240.30604553222656, "eval_logps/rejected": -236.9526824951172, "eval_loss": 0.023917993530631065, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -0.04150532931089401, "eval_rewards/margins": 0.08519868552684784, "eval_rewards/rejected": -0.12670400738716125, "eval_runtime": 712.9633, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 15000 }, { "epoch": 0.98, "learning_rate": 4.893819567644564e-09, "logits/chosen": -2.157071352005005, "logits/rejected": -2.131338596343994, "logps/chosen": -197.52670288085938, "logps/rejected": -218.3302001953125, "loss": 0.0298, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0476498007774353, "rewards/margins": 0.0531373992562294, "rewards/rejected": -0.1007872000336647, "step": 15010 }, { "epoch": 0.98, "learning_rate": 4.543231207107257e-09, "logits/chosen": -2.2290079593658447, "logits/rejected": -2.018141746520996, "logps/chosen": -269.5322265625, "logps/rejected": -258.697998046875, "loss": 0.0416, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05855320021510124, "rewards/margins": 0.06387249380350113, "rewards/rejected": -0.12242569029331207, "step": 15020 }, { "epoch": 0.98, "learning_rate": 4.205660371488785e-09, "logits/chosen": -2.532921075820923, "logits/rejected": -2.17484712600708, "logps/chosen": -292.44708251953125, "logps/rejected": -261.38031005859375, "loss": 0.0184, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.041468605399131775, "rewards/margins": 0.053616128861904144, "rewards/rejected": -0.09508474171161652, "step": 15030 }, { "epoch": 0.98, "learning_rate": 3.88110882172471e-09, "logits/chosen": -2.1984848976135254, "logits/rejected": -2.1399903297424316, "logps/chosen": -232.79849243164062, "logps/rejected": -237.8025665283203, "loss": 0.0276, "rewards/accuracies": 0.625, "rewards/chosen": -0.06050034239888191, "rewards/margins": 0.04820340871810913, "rewards/rejected": -0.10870374739170074, "step": 15040 }, { "epoch": 0.98, "learning_rate": 3.569578250834371e-09, "logits/chosen": -2.2971742153167725, "logits/rejected": -2.0346813201904297, "logps/chosen": -307.75616455078125, "logps/rejected": -291.34820556640625, "loss": 0.0228, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.028891805559396744, "rewards/margins": 0.10677383095026016, "rewards/rejected": -0.135665625333786, "step": 15050 }, { "epoch": 0.99, "learning_rate": 3.2710702839139353e-09, "logits/chosen": -2.3282997608184814, "logits/rejected": -2.1847825050354004, "logps/chosen": -211.9817352294922, "logps/rejected": -229.8556671142578, "loss": 0.0211, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03691229224205017, "rewards/margins": 0.043003737926483154, "rewards/rejected": -0.07991602271795273, "step": 15060 }, { "epoch": 0.99, "learning_rate": 2.9855864781272448e-09, "logits/chosen": -2.294987440109253, "logits/rejected": -2.332965612411499, "logps/chosen": -219.7119140625, "logps/rejected": -267.57525634765625, "loss": 0.0142, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04019111022353172, "rewards/margins": 0.06534648686647415, "rewards/rejected": -0.10553759336471558, "step": 15070 }, { "epoch": 0.99, "learning_rate": 2.7131283226977665e-09, "logits/chosen": -2.299408197402954, "logits/rejected": -2.3635406494140625, "logps/chosen": -227.406982421875, "logps/rejected": -257.7666931152344, "loss": 0.0119, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.039793360978364944, "rewards/margins": 0.08254800736904144, "rewards/rejected": -0.12234137207269669, "step": 15080 }, { "epoch": 0.99, "learning_rate": 2.4536972389008205e-09, "logits/chosen": -2.236517906188965, "logits/rejected": -2.0118248462677, "logps/chosen": -242.9285430908203, "logps/rejected": -228.8876190185547, "loss": 0.0266, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.028967713937163353, "rewards/margins": 0.118250273168087, "rewards/rejected": -0.1472179889678955, "step": 15090 }, { "epoch": 0.99, "learning_rate": 2.20729458005553e-09, "logits/chosen": -2.152559757232666, "logits/rejected": -1.9877811670303345, "logps/chosen": -200.26695251464844, "logps/rejected": -202.02871704101562, "loss": 0.0368, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.029288524761795998, "rewards/margins": 0.11981719732284546, "rewards/rejected": -0.1491057127714157, "step": 15100 }, { "epoch": 0.99, "eval_logits/chosen": -2.2784409523010254, "eval_logits/rejected": -2.0904457569122314, "eval_logps/chosen": -240.2960662841797, "eval_logps/rejected": -236.9457550048828, "eval_loss": 0.023891154676675797, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.04145561903715134, "eval_rewards/margins": 0.08521371334791183, "eval_rewards/rejected": -0.12666933238506317, "eval_runtime": 716.4143, "eval_samples_per_second": 2.792, "eval_steps_per_second": 1.396, "step": 15100 }, { "epoch": 0.99, "learning_rate": 1.9739216315192712e-09, "logits/chosen": -2.263357162475586, "logits/rejected": -2.052135944366455, "logps/chosen": -236.76602172851562, "logps/rejected": -223.97885131835938, "loss": 0.0216, "rewards/accuracies": 0.625, "rewards/chosen": -0.037286076694726944, "rewards/margins": 0.054551173001527786, "rewards/rejected": -0.09183724969625473, "step": 15110 }, { "epoch": 0.99, "learning_rate": 1.7535796106796231e-09, "logits/chosen": -2.345848321914673, "logits/rejected": -2.0252323150634766, "logps/chosen": -286.8771057128906, "logps/rejected": -220.3518524169922, "loss": 0.0156, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.056703440845012665, "rewards/margins": 0.06902850419282913, "rewards/rejected": -0.1257319450378418, "step": 15120 }, { "epoch": 0.99, "learning_rate": 1.5462696669482636e-09, "logits/chosen": -2.315427303314209, "logits/rejected": -2.1944830417633057, "logps/chosen": -226.43545532226562, "logps/rejected": -250.79995727539062, "loss": 0.009, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.025966573506593704, "rewards/margins": 0.08360234647989273, "rewards/rejected": -0.10956891626119614, "step": 15130 }, { "epoch": 0.99, "learning_rate": 1.3519928817556927e-09, "logits/chosen": -2.167985677719116, "logits/rejected": -2.1198575496673584, "logps/chosen": -179.43984985351562, "logps/rejected": -195.4428253173828, "loss": 0.0315, "rewards/accuracies": 0.625, "rewards/chosen": -0.030231673270463943, "rewards/margins": 0.07728022336959839, "rewards/rejected": -0.10751190036535263, "step": 15140 }, { "epoch": 0.99, "learning_rate": 1.1707502685448512e-09, "logits/chosen": -2.3988311290740967, "logits/rejected": -2.062964677810669, "logps/chosen": -233.6556396484375, "logps/rejected": -200.08004760742188, "loss": 0.0161, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0569281280040741, "rewards/margins": 0.12306445837020874, "rewards/rejected": -0.17999258637428284, "step": 15150 }, { "epoch": 0.99, "learning_rate": 1.002542772765569e-09, "logits/chosen": -2.228262186050415, "logits/rejected": -1.9481480121612549, "logps/chosen": -196.65823364257812, "logps/rejected": -177.3425750732422, "loss": 0.0274, "rewards/accuracies": 0.625, "rewards/chosen": -0.04027201980352402, "rewards/margins": 0.09495989978313446, "rewards/rejected": -0.13523191213607788, "step": 15160 }, { "epoch": 0.99, "learning_rate": 8.473712718709559e-10, "logits/chosen": -2.0948386192321777, "logits/rejected": -2.1079623699188232, "logps/chosen": -205.24417114257812, "logps/rejected": -202.10992431640625, "loss": 0.0276, "rewards/accuracies": 0.5, "rewards/chosen": -0.05749630928039551, "rewards/margins": 0.038219258189201355, "rewards/rejected": -0.09571556746959686, "step": 15170 }, { "epoch": 0.99, "learning_rate": 7.052365753112966e-10, "logits/chosen": -2.1221511363983154, "logits/rejected": -1.8346284627914429, "logps/chosen": -246.028564453125, "logps/rejected": -247.57980346679688, "loss": 0.0285, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05356248468160629, "rewards/margins": 0.10163445770740509, "rewards/rejected": -0.15519694983959198, "step": 15180 }, { "epoch": 0.99, "learning_rate": 5.761394245307195e-10, "logits/chosen": -2.105289936065674, "logits/rejected": -2.1355576515197754, "logps/chosen": -244.42221069335938, "logps/rejected": -252.21102905273438, "loss": 0.0072, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03670011833310127, "rewards/margins": 0.0450015589594841, "rewards/rejected": -0.08170167356729507, "step": 15190 }, { "epoch": 0.99, "learning_rate": 4.6008049296358826e-10, "logits/chosen": -2.1782045364379883, "logits/rejected": -2.0728297233581543, "logps/chosen": -195.4044952392578, "logps/rejected": -183.70651245117188, "loss": 0.0267, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04690961167216301, "rewards/margins": 0.08272843062877655, "rewards/rejected": -0.12963804602622986, "step": 15200 }, { "epoch": 0.99, "eval_logits/chosen": -2.2778265476226807, "eval_logits/rejected": -2.0898988246917725, "eval_logps/chosen": -240.2912139892578, "eval_logps/rejected": -236.9213409423828, "eval_loss": 0.02391921915113926, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.041431326419115067, "eval_rewards/margins": 0.08511585742235184, "eval_rewards/rejected": -0.1265471875667572, "eval_runtime": 712.7473, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 15200 }, { "epoch": 1.0, "learning_rate": 3.5706038603006146e-10, "logits/chosen": -2.3718504905700684, "logits/rejected": -2.3244967460632324, "logps/chosen": -290.2176818847656, "logps/rejected": -294.3794860839844, "loss": 0.02, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0030365826096385717, "rewards/margins": 0.09202475100755692, "rewards/rejected": -0.09506133943796158, "step": 15210 }, { "epoch": 1.0, "learning_rate": 2.670796411333165e-10, "logits/chosen": -2.547684907913208, "logits/rejected": -2.255117893218994, "logps/chosen": -230.6437530517578, "logps/rejected": -235.52615356445312, "loss": 0.0224, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05416148155927658, "rewards/margins": 0.08594264090061188, "rewards/rejected": -0.14010414481163025, "step": 15220 }, { "epoch": 1.0, "learning_rate": 1.9013872765677455e-10, "logits/chosen": -2.2683866024017334, "logits/rejected": -2.062041759490967, "logps/chosen": -222.8066864013672, "logps/rejected": -219.8751220703125, "loss": 0.0143, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.01646781899034977, "rewards/margins": 0.04801513999700546, "rewards/rejected": -0.06448295712471008, "step": 15230 }, { "epoch": 1.0, "learning_rate": 1.262380469624347e-10, "logits/chosen": -2.24546480178833, "logits/rejected": -2.0749285221099854, "logps/chosen": -211.616455078125, "logps/rejected": -201.745361328125, "loss": 0.0339, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.038047708570957184, "rewards/margins": 0.0651930570602417, "rewards/rejected": -0.10324076563119888, "step": 15240 }, { "epoch": 1.0, "learning_rate": 7.53779323872661e-11, "logits/chosen": -2.1421215534210205, "logits/rejected": -2.240142345428467, "logps/chosen": -200.442138671875, "logps/rejected": -227.1268310546875, "loss": 0.0214, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.042893700301647186, "rewards/margins": 0.10320397466421127, "rewards/rejected": -0.14609768986701965, "step": 15250 }, { "epoch": 1.0, "learning_rate": 3.7558649242652734e-11, "logits/chosen": -2.455829381942749, "logits/rejected": -2.216055393218994, "logps/chosen": -404.4881591796875, "logps/rejected": -340.13348388671875, "loss": 0.0209, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.039021652191877365, "rewards/margins": 0.07608093321323395, "rewards/rejected": -0.11510257422924042, "step": 15260 }, { "epoch": 1.0, "learning_rate": 1.2780394812450526e-11, "logits/chosen": -2.086707592010498, "logits/rejected": -2.040337562561035, "logps/chosen": -244.7919921875, "logps/rejected": -264.2740478515625, "loss": 0.0164, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0632411390542984, "rewards/margins": 0.09782315790653229, "rewards/rejected": -0.1610642969608307, "step": 15270 }, { "epoch": 1.0, "learning_rate": 1.0432983521546646e-12, "logits/chosen": -2.1208715438842773, "logits/rejected": -2.0309219360351562, "logps/chosen": -194.59828186035156, "logps/rejected": -233.06478881835938, "loss": 0.0202, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05566044896841049, "rewards/margins": 0.09810435026884079, "rewards/rejected": -0.15376481413841248, "step": 15280 }, { "epoch": 1.0, "step": 15284, "total_flos": 0.0, "train_loss": 0.028345070170466266, "train_runtime": 172193.3354, "train_samples_per_second": 0.355, "train_steps_per_second": 0.089 } ], "logging_steps": 10, "max_steps": 15284, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }