diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 17.954430379746835, + "epoch": 35.95443037974684, "eval_steps": 100, - "global_step": 3546, + "global_step": 7092, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -5334,19 +5334,5344 @@ "step": 3540 }, { - "epoch": 17.954430379746835, - "step": 3546, + "epoch": 18.020253164556962, + "grad_norm": 633377.3531549113, + "learning_rate": 2.774992165465371e-07, + "logits/chosen": 0.778042197227478, + "logits/rejected": 0.4570779800415039, + "logps/chosen": -30.87795639038086, + "logps/rejected": -562.3123168945312, + "loss": 14823.5117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19000156223773956, + "rewards/margins": 0.5307614803314209, + "rewards/rejected": -0.34075987339019775, + "step": 3550 + }, + { + "epoch": 18.070886075949367, + "grad_norm": 536501.4949459385, + "learning_rate": 2.767157630836728e-07, + "logits/chosen": -1.453107476234436, + "logits/rejected": -1.1603299379348755, + "logps/chosen": -48.59767532348633, + "logps/rejected": -607.5595703125, + "loss": 14268.2156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19779345393180847, + "rewards/margins": 0.5535213351249695, + "rewards/rejected": -0.3557279109954834, + "step": 3560 + }, + { + "epoch": 18.121518987341773, + "grad_norm": 613929.4964505757, + "learning_rate": 2.7593230962080847e-07, + "logits/chosen": -1.0204923152923584, + "logits/rejected": -1.006306529045105, + "logps/chosen": -40.379703521728516, + "logps/rejected": -586.8853759765625, + "loss": 14124.1406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18828515708446503, + "rewards/margins": 0.5416163206100464, + "rewards/rejected": -0.35333114862442017, + "step": 3570 + }, + { + "epoch": 18.172151898734178, + "grad_norm": 453188.0208924516, + "learning_rate": 2.751488561579442e-07, + "logits/chosen": 0.978573203086853, + "logits/rejected": 1.6422239542007446, + "logps/chosen": -40.75902557373047, + "logps/rejected": -571.6940307617188, + "loss": 14028.8266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19008655846118927, + "rewards/margins": 0.5349593758583069, + "rewards/rejected": -0.34487277269363403, + "step": 3580 + }, + { + "epoch": 18.222784810126583, + "grad_norm": 470617.1864493106, + "learning_rate": 2.743654026950799e-07, + "logits/chosen": 0.612755298614502, + "logits/rejected": 1.586531639099121, + "logps/chosen": -47.43413162231445, + "logps/rejected": -567.2514038085938, + "loss": 14305.0953, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.18671520054340363, + "rewards/margins": 0.5188931226730347, + "rewards/rejected": -0.33217787742614746, + "step": 3590 + }, + { + "epoch": 18.27341772151899, + "grad_norm": 568328.2123455897, + "learning_rate": 2.7358194923221564e-07, + "logits/chosen": 2.5831315517425537, + "logits/rejected": 2.3743977546691895, + "logps/chosen": -36.72047805786133, + "logps/rejected": -561.4580688476562, + "loss": 14931.7812, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.18578791618347168, + "rewards/margins": 0.5219975113868713, + "rewards/rejected": -0.33620959520339966, + "step": 3600 + }, + { + "epoch": 18.324050632911394, + "grad_norm": 258649.85824251673, + "learning_rate": 2.727984957693513e-07, + "logits/chosen": -0.6456964612007141, + "logits/rejected": 0.10119187831878662, + "logps/chosen": -45.66813659667969, + "logps/rejected": -584.33984375, + "loss": 13962.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19075247645378113, + "rewards/margins": 0.5430020093917847, + "rewards/rejected": -0.35224950313568115, + "step": 3610 + }, + { + "epoch": 18.374683544303796, + "grad_norm": 523823.39531677734, + "learning_rate": 2.72015042306487e-07, + "logits/chosen": -0.1337634027004242, + "logits/rejected": 0.3194190561771393, + "logps/chosen": -43.2452278137207, + "logps/rejected": -576.6324462890625, + "loss": 14478.6656, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19283099472522736, + "rewards/margins": 0.5422399640083313, + "rewards/rejected": -0.34940892457962036, + "step": 3620 + }, + { + "epoch": 18.4253164556962, + "grad_norm": 369527.7483340646, + "learning_rate": 2.712315888436227e-07, + "logits/chosen": -0.5704905390739441, + "logits/rejected": -0.24132680892944336, + "logps/chosen": -39.81604766845703, + "logps/rejected": -579.3060302734375, + "loss": 14853.9188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1893097311258316, + "rewards/margins": 0.5385677218437195, + "rewards/rejected": -0.3492580056190491, + "step": 3630 + }, + { + "epoch": 18.475949367088607, + "grad_norm": 487722.91173438437, + "learning_rate": 2.704481353807584e-07, + "logits/chosen": 0.30203062295913696, + "logits/rejected": 1.367623209953308, + "logps/chosen": -43.79780578613281, + "logps/rejected": -575.3096313476562, + "loss": 14337.7125, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.18831488490104675, + "rewards/margins": 0.5326144099235535, + "rewards/rejected": -0.3442995548248291, + "step": 3640 + }, + { + "epoch": 18.526582278481012, + "grad_norm": 769147.1132735502, + "learning_rate": 2.6966468191789406e-07, + "logits/chosen": 0.5818338990211487, + "logits/rejected": 0.8189504742622375, + "logps/chosen": -40.80295944213867, + "logps/rejected": -569.6201171875, + "loss": 14414.5, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19092252850532532, + "rewards/margins": 0.5284001231193542, + "rewards/rejected": -0.3374776244163513, + "step": 3650 + }, + { + "epoch": 18.577215189873417, + "grad_norm": 423741.6615039136, + "learning_rate": 2.6888122845502977e-07, + "logits/chosen": -2.1757419109344482, + "logits/rejected": -1.7465986013412476, + "logps/chosen": -33.543739318847656, + "logps/rejected": -566.9044189453125, + "loss": 13990.4406, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.18767623603343964, + "rewards/margins": 0.5356841683387756, + "rewards/rejected": -0.3480078876018524, + "step": 3660 + }, + { + "epoch": 18.627848101265823, + "grad_norm": 405282.2937016151, + "learning_rate": 2.680977749921655e-07, + "logits/chosen": -0.054244786500930786, + "logits/rejected": 0.9029023051261902, + "logps/chosen": -49.31962966918945, + "logps/rejected": -585.48779296875, + "loss": 14779.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19979842007160187, + "rewards/margins": 0.5441454648971558, + "rewards/rejected": -0.3443470597267151, + "step": 3670 + }, + { + "epoch": 18.678481012658228, + "grad_norm": 468937.7683958159, + "learning_rate": 2.673143215293012e-07, + "logits/chosen": -0.046643782407045364, + "logits/rejected": -0.1421128809452057, + "logps/chosen": -40.85643768310547, + "logps/rejected": -577.6583862304688, + "loss": 14531.35, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.195206418633461, + "rewards/margins": 0.5361508131027222, + "rewards/rejected": -0.3409443199634552, + "step": 3680 + }, + { + "epoch": 18.729113924050633, + "grad_norm": 627917.5959141933, + "learning_rate": 2.6653086806643683e-07, + "logits/chosen": 1.5284700393676758, + "logits/rejected": 1.2886362075805664, + "logps/chosen": -48.694664001464844, + "logps/rejected": -579.7990112304688, + "loss": 15195.4844, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1851346641778946, + "rewards/margins": 0.531388521194458, + "rewards/rejected": -0.346253901720047, + "step": 3690 + }, + { + "epoch": 18.77974683544304, + "grad_norm": 511207.857422736, + "learning_rate": 2.6574741460357254e-07, + "logits/chosen": 0.03928997367620468, + "logits/rejected": 0.5418666005134583, + "logps/chosen": -50.31745529174805, + "logps/rejected": -593.0169677734375, + "loss": 14929.1469, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19712677597999573, + "rewards/margins": 0.5447811484336853, + "rewards/rejected": -0.3476543724536896, + "step": 3700 + }, + { + "epoch": 18.830379746835444, + "grad_norm": 568133.4282182837, + "learning_rate": 2.6496396114070825e-07, + "logits/chosen": -0.7848063707351685, + "logits/rejected": -0.8312255144119263, + "logps/chosen": -39.726234436035156, + "logps/rejected": -566.0286254882812, + "loss": 14112.2844, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.18878208100795746, + "rewards/margins": 0.5250921249389648, + "rewards/rejected": -0.3363099992275238, + "step": 3710 + }, + { + "epoch": 18.88101265822785, + "grad_norm": 293062.3175283677, + "learning_rate": 2.6418050767784395e-07, + "logits/chosen": -0.22776488959789276, + "logits/rejected": -0.043119143694639206, + "logps/chosen": -47.83971405029297, + "logps/rejected": -575.6166381835938, + "loss": 14345.3813, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19344884157180786, + "rewards/margins": 0.5261351466178894, + "rewards/rejected": -0.33268633484840393, + "step": 3720 + }, + { + "epoch": 18.931645569620255, + "grad_norm": 369584.46121245134, + "learning_rate": 2.633970542149796e-07, + "logits/chosen": 0.6460098028182983, + "logits/rejected": 0.6165057420730591, + "logps/chosen": -53.0880126953125, + "logps/rejected": -602.0147705078125, + "loss": 14143.9609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19915179908275604, + "rewards/margins": 0.5483053922653198, + "rewards/rejected": -0.3491537272930145, + "step": 3730 + }, + { + "epoch": 18.98227848101266, + "grad_norm": 328959.5337312854, + "learning_rate": 2.626136007521153e-07, + "logits/chosen": 0.25958794355392456, + "logits/rejected": 0.5823850631713867, + "logps/chosen": -49.16436004638672, + "logps/rejected": -584.5070190429688, + "loss": 14187.4844, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19965310394763947, + "rewards/margins": 0.5432143211364746, + "rewards/rejected": -0.34356123208999634, + "step": 3740 + }, + { + "epoch": 19.03291139240506, + "grad_norm": 1626740.8696455131, + "learning_rate": 2.61830147289251e-07, + "logits/chosen": -0.6601130366325378, + "logits/rejected": -0.8405634164810181, + "logps/chosen": -46.10778045654297, + "logps/rejected": -587.3377685546875, + "loss": 14051.6469, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1946493685245514, + "rewards/margins": 0.54271399974823, + "rewards/rejected": -0.34806469082832336, + "step": 3750 + }, + { + "epoch": 19.083544303797467, + "grad_norm": 786920.4959477714, + "learning_rate": 2.610466938263867e-07, + "logits/chosen": 0.280475914478302, + "logits/rejected": 1.5355632305145264, + "logps/chosen": -40.83550262451172, + "logps/rejected": -576.2233276367188, + "loss": 14524.6172, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19496676325798035, + "rewards/margins": 0.5368971228599548, + "rewards/rejected": -0.3419303297996521, + "step": 3760 + }, + { + "epoch": 19.134177215189872, + "grad_norm": 670222.9584254185, + "learning_rate": 2.602632403635224e-07, + "logits/chosen": 1.6073856353759766, + "logits/rejected": 2.1679255962371826, + "logps/chosen": -48.07741928100586, + "logps/rejected": -568.386962890625, + "loss": 16064.1922, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19028018414974213, + "rewards/margins": 0.5232519507408142, + "rewards/rejected": -0.3329717516899109, + "step": 3770 + }, + { + "epoch": 19.184810126582278, + "grad_norm": 779401.4265683588, + "learning_rate": 2.594797869006581e-07, + "logits/chosen": -1.2690767049789429, + "logits/rejected": -0.7741214036941528, + "logps/chosen": -35.147666931152344, + "logps/rejected": -588.05810546875, + "loss": 14594.675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20034465193748474, + "rewards/margins": 0.5537833571434021, + "rewards/rejected": -0.3534386456012726, + "step": 3780 + }, + { + "epoch": 19.235443037974683, + "grad_norm": 677896.0436831466, + "learning_rate": 2.586963334377938e-07, + "logits/chosen": 0.381600558757782, + "logits/rejected": 0.3627360761165619, + "logps/chosen": -47.129329681396484, + "logps/rejected": -583.4297485351562, + "loss": 14673.1125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19634023308753967, + "rewards/margins": 0.5409786105155945, + "rewards/rejected": -0.3446383774280548, + "step": 3790 + }, + { + "epoch": 19.28607594936709, + "grad_norm": 1708590.8406628803, + "learning_rate": 2.579128799749295e-07, + "logits/chosen": -0.6463128924369812, + "logits/rejected": -0.1966671198606491, + "logps/chosen": -51.58148193359375, + "logps/rejected": -571.8802490234375, + "loss": 14855.8094, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1947019398212433, + "rewards/margins": 0.5256696343421936, + "rewards/rejected": -0.3309677243232727, + "step": 3800 + }, + { + "epoch": 19.336708860759494, + "grad_norm": 906394.5199246205, + "learning_rate": 2.5712942651206515e-07, + "logits/chosen": 0.6537224054336548, + "logits/rejected": 1.356911301612854, + "logps/chosen": -37.791786193847656, + "logps/rejected": -541.84912109375, + "loss": 14494.675, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.18946874141693115, + "rewards/margins": 0.5098165273666382, + "rewards/rejected": -0.3203478455543518, + "step": 3810 + }, + { + "epoch": 19.3873417721519, + "grad_norm": 1248788.3894635146, + "learning_rate": 2.5634597304920085e-07, + "logits/chosen": -1.4148962497711182, + "logits/rejected": -0.616938591003418, + "logps/chosen": -39.15003204345703, + "logps/rejected": -567.9779052734375, + "loss": 14511.9828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19468382000923157, + "rewards/margins": 0.5306459665298462, + "rewards/rejected": -0.33596211671829224, + "step": 3820 + }, + { + "epoch": 19.437974683544304, + "grad_norm": 699507.4776687805, + "learning_rate": 2.5556251958633656e-07, + "logits/chosen": -0.786666214466095, + "logits/rejected": -0.8524150848388672, + "logps/chosen": -37.31165313720703, + "logps/rejected": -559.5811767578125, + "loss": 15226.8953, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.18610945343971252, + "rewards/margins": 0.5259476900100708, + "rewards/rejected": -0.3398382067680359, + "step": 3830 + }, + { + "epoch": 19.48860759493671, + "grad_norm": 750946.845865734, + "learning_rate": 2.5477906612347227e-07, + "logits/chosen": -0.5914249420166016, + "logits/rejected": -0.1790940761566162, + "logps/chosen": -41.875919342041016, + "logps/rejected": -580.2433471679688, + "loss": 15077.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1914350688457489, + "rewards/margins": 0.5402361154556274, + "rewards/rejected": -0.34880098700523376, + "step": 3840 + }, + { + "epoch": 19.539240506329115, + "grad_norm": 1438213.362152031, + "learning_rate": 2.539956126606079e-07, + "logits/chosen": -1.4764426946640015, + "logits/rejected": -1.0852867364883423, + "logps/chosen": -45.64619064331055, + "logps/rejected": -574.2334594726562, + "loss": 15001.6922, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19227740168571472, + "rewards/margins": 0.527544379234314, + "rewards/rejected": -0.33526697754859924, + "step": 3850 + }, + { + "epoch": 19.58987341772152, + "grad_norm": 1015656.6585732017, + "learning_rate": 2.532121591977436e-07, + "logits/chosen": 0.0265532024204731, + "logits/rejected": 0.4305901527404785, + "logps/chosen": -40.221275329589844, + "logps/rejected": -580.7332763671875, + "loss": 15005.4062, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1900371015071869, + "rewards/margins": 0.5353468656539917, + "rewards/rejected": -0.3453097939491272, + "step": 3860 + }, + { + "epoch": 19.640506329113926, + "grad_norm": 1480021.6334817603, + "learning_rate": 2.5242870573487933e-07, + "logits/chosen": -2.3115265369415283, + "logits/rejected": -1.9450628757476807, + "logps/chosen": -43.31880187988281, + "logps/rejected": -592.5906372070312, + "loss": 14681.3906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20574085414409637, + "rewards/margins": 0.5489095449447632, + "rewards/rejected": -0.3431686758995056, + "step": 3870 + }, + { + "epoch": 19.691139240506327, + "grad_norm": 652464.7916313735, + "learning_rate": 2.5164525227201504e-07, + "logits/chosen": 0.63951176404953, + "logits/rejected": 1.3804535865783691, + "logps/chosen": -33.52408981323242, + "logps/rejected": -556.9231567382812, + "loss": 15124.1328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1876874566078186, + "rewards/margins": 0.5311328172683716, + "rewards/rejected": -0.343445360660553, + "step": 3880 + }, + { + "epoch": 19.741772151898733, + "grad_norm": 697435.0328174214, + "learning_rate": 2.508617988091507e-07, + "logits/chosen": -1.7422069311141968, + "logits/rejected": -1.3413903713226318, + "logps/chosen": -42.224788665771484, + "logps/rejected": -584.3877563476562, + "loss": 15307.875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2013184279203415, + "rewards/margins": 0.5388418436050415, + "rewards/rejected": -0.3375234305858612, + "step": 3890 + }, + { + "epoch": 19.792405063291138, + "grad_norm": 680395.3386900029, + "learning_rate": 2.500783453462864e-07, + "logits/chosen": 0.6034026741981506, + "logits/rejected": 1.1066893339157104, + "logps/chosen": -39.37774658203125, + "logps/rejected": -585.9308471679688, + "loss": 15434.6031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20041151344776154, + "rewards/margins": 0.545585036277771, + "rewards/rejected": -0.34517353773117065, + "step": 3900 + }, + { + "epoch": 19.843037974683543, + "grad_norm": 1036480.9072027011, + "learning_rate": 2.492948918834221e-07, + "logits/chosen": -0.37202078104019165, + "logits/rejected": -0.6633853316307068, + "logps/chosen": -50.845218658447266, + "logps/rejected": -565.1788330078125, + "loss": 14732.9813, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.1860923022031784, + "rewards/margins": 0.5141801834106445, + "rewards/rejected": -0.32808783650398254, + "step": 3910 + }, + { + "epoch": 19.89367088607595, + "grad_norm": 960769.0916438915, + "learning_rate": 2.485114384205578e-07, + "logits/chosen": -2.4451048374176025, + "logits/rejected": -1.8602224588394165, + "logps/chosen": -49.44445037841797, + "logps/rejected": -588.972900390625, + "loss": 14954.0281, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20443923771381378, + "rewards/margins": 0.5393208265304565, + "rewards/rejected": -0.33488157391548157, + "step": 3920 + }, + { + "epoch": 19.944303797468354, + "grad_norm": 637831.6626185304, + "learning_rate": 2.477279849576935e-07, + "logits/chosen": -1.1525195837020874, + "logits/rejected": -0.6883751153945923, + "logps/chosen": -37.11662673950195, + "logps/rejected": -576.5172729492188, + "loss": 14910.0938, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20432814955711365, + "rewards/margins": 0.5393826961517334, + "rewards/rejected": -0.33505457639694214, + "step": 3930 + }, + { + "epoch": 19.99493670886076, + "grad_norm": 926025.3002487151, + "learning_rate": 2.4694453149482917e-07, + "logits/chosen": 0.10631950944662094, + "logits/rejected": 0.8977824449539185, + "logps/chosen": -43.12347412109375, + "logps/rejected": -561.4703369140625, + "loss": 15041.1219, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.18227019906044006, + "rewards/margins": 0.5218333005905151, + "rewards/rejected": -0.3395631015300751, + "step": 3940 + }, + { + "epoch": 20.045569620253165, + "grad_norm": 585485.6374993185, + "learning_rate": 2.461610780319649e-07, + "logits/chosen": -0.45847588777542114, + "logits/rejected": -0.7163432836532593, + "logps/chosen": -52.16509246826172, + "logps/rejected": -574.7341918945312, + "loss": 14472.725, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.18465857207775116, + "rewards/margins": 0.5204340219497681, + "rewards/rejected": -0.3357754647731781, + "step": 3950 + }, + { + "epoch": 20.09620253164557, + "grad_norm": 732893.4730793714, + "learning_rate": 2.453776245691006e-07, + "logits/chosen": -0.8532247543334961, + "logits/rejected": -1.0177998542785645, + "logps/chosen": -34.215248107910156, + "logps/rejected": -545.2357788085938, + "loss": 14258.1938, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.1821087896823883, + "rewards/margins": 0.5106922388076782, + "rewards/rejected": -0.3285834789276123, + "step": 3960 + }, + { + "epoch": 20.146835443037975, + "grad_norm": 566993.8027250646, + "learning_rate": 2.445941711062363e-07, + "logits/chosen": 0.08969805389642715, + "logits/rejected": 0.7759960889816284, + "logps/chosen": -44.99492645263672, + "logps/rejected": -555.2744140625, + "loss": 13412.7156, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.1912485808134079, + "rewards/margins": 0.5124030113220215, + "rewards/rejected": -0.3211544454097748, + "step": 3970 + }, + { + "epoch": 20.19746835443038, + "grad_norm": 694029.5528637858, + "learning_rate": 2.4381071764337194e-07, + "logits/chosen": -1.096225380897522, + "logits/rejected": -0.07990212738513947, + "logps/chosen": -50.788089752197266, + "logps/rejected": -593.2210083007812, + "loss": 14393.8781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20465774834156036, + "rewards/margins": 0.5500935316085815, + "rewards/rejected": -0.34543576836586, + "step": 3980 + }, + { + "epoch": 20.248101265822786, + "grad_norm": 732062.5147915592, + "learning_rate": 2.430272641805077e-07, + "logits/chosen": 2.356168270111084, + "logits/rejected": 2.851551055908203, + "logps/chosen": -47.21477508544922, + "logps/rejected": -550.4632568359375, + "loss": 14544.5953, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.1797908991575241, + "rewards/margins": 0.5041374564170837, + "rewards/rejected": -0.3243466317653656, + "step": 3990 + }, + { + "epoch": 20.29873417721519, + "grad_norm": 679068.526799364, + "learning_rate": 2.4224381071764335e-07, + "logits/chosen": -0.03650767728686333, + "logits/rejected": 0.4142078459262848, + "logps/chosen": -56.95387649536133, + "logps/rejected": -573.4332885742188, + "loss": 14252.7656, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19543471932411194, + "rewards/margins": 0.5279492139816284, + "rewards/rejected": -0.3325144648551941, + "step": 4000 + }, + { + "epoch": 20.349367088607593, + "grad_norm": 654544.0953281225, + "learning_rate": 2.4146035725477906e-07, + "logits/chosen": -1.5134330987930298, + "logits/rejected": -1.2428243160247803, + "logps/chosen": -38.626304626464844, + "logps/rejected": -569.9034423828125, + "loss": 14105.2687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19262897968292236, + "rewards/margins": 0.5324395298957825, + "rewards/rejected": -0.3398105204105377, + "step": 4010 + }, + { + "epoch": 20.4, + "grad_norm": 497489.50957681873, + "learning_rate": 2.4067690379191476e-07, + "logits/chosen": -0.28921595215797424, + "logits/rejected": 0.4628073573112488, + "logps/chosen": -35.01650619506836, + "logps/rejected": -566.645263671875, + "loss": 14669.4656, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19555968046188354, + "rewards/margins": 0.5380457639694214, + "rewards/rejected": -0.3424859941005707, + "step": 4020 + }, + { + "epoch": 20.450632911392404, + "grad_norm": 813543.9506414626, + "learning_rate": 2.3989345032905047e-07, + "logits/chosen": 0.39950722455978394, + "logits/rejected": 0.7022187113761902, + "logps/chosen": -34.654640197753906, + "logps/rejected": -572.8896484375, + "loss": 14513.1688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19765284657478333, + "rewards/margins": 0.5385235548019409, + "rewards/rejected": -0.34087073802948, + "step": 4030 + }, + { + "epoch": 20.50126582278481, + "grad_norm": 492881.9600409883, + "learning_rate": 2.391099968661861e-07, + "logits/chosen": 0.8350554704666138, + "logits/rejected": 1.3902348279953003, + "logps/chosen": -55.87943649291992, + "logps/rejected": -570.520751953125, + "loss": 14474.9922, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.19199691712856293, + "rewards/margins": 0.5237919092178345, + "rewards/rejected": -0.33179494738578796, + "step": 4040 + }, + { + "epoch": 20.551898734177215, + "grad_norm": 550156.3808352741, + "learning_rate": 2.3832654340332183e-07, + "logits/chosen": 0.10426521301269531, + "logits/rejected": -0.0343349352478981, + "logps/chosen": -41.53984451293945, + "logps/rejected": -587.2073974609375, + "loss": 14114.8438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20071008801460266, + "rewards/margins": 0.5482142567634583, + "rewards/rejected": -0.3475041091442108, + "step": 4050 + }, + { + "epoch": 20.60253164556962, + "grad_norm": 442438.75618485885, + "learning_rate": 2.375430899404575e-07, + "logits/chosen": 0.6523551344871521, + "logits/rejected": 1.2190606594085693, + "logps/chosen": -46.92839431762695, + "logps/rejected": -584.9151000976562, + "loss": 14072.4, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19940249621868134, + "rewards/margins": 0.5458526611328125, + "rewards/rejected": -0.3464500606060028, + "step": 4060 + }, + { + "epoch": 20.653164556962025, + "grad_norm": 1289102.185567105, + "learning_rate": 2.3675963647759321e-07, + "logits/chosen": -0.5604708790779114, + "logits/rejected": -0.4172240197658539, + "logps/chosen": -41.39154815673828, + "logps/rejected": -568.0210571289062, + "loss": 14125.9813, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.18824639916419983, + "rewards/margins": 0.5325849652290344, + "rewards/rejected": -0.3443385362625122, + "step": 4070 + }, + { + "epoch": 20.70379746835443, + "grad_norm": 796999.4756244586, + "learning_rate": 2.3597618301472892e-07, + "logits/chosen": -0.1419040858745575, + "logits/rejected": 0.26791125535964966, + "logps/chosen": -37.67776870727539, + "logps/rejected": -578.685302734375, + "loss": 14390.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1924736052751541, + "rewards/margins": 0.5395643711090088, + "rewards/rejected": -0.34709078073501587, + "step": 4080 + }, + { + "epoch": 20.754430379746836, + "grad_norm": 730146.0659684967, + "learning_rate": 2.3519272955186463e-07, + "logits/chosen": -2.1902015209198, + "logits/rejected": -1.9143873453140259, + "logps/chosen": -49.27891159057617, + "logps/rejected": -583.9379272460938, + "loss": 14559.9, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19109120965003967, + "rewards/margins": 0.5332453846931458, + "rewards/rejected": -0.3421540856361389, + "step": 4090 + }, + { + "epoch": 20.80506329113924, + "grad_norm": 722476.022933799, + "learning_rate": 2.344092760890003e-07, + "logits/chosen": -2.4936270713806152, + "logits/rejected": -2.54288649559021, + "logps/chosen": -38.10564041137695, + "logps/rejected": -595.3966064453125, + "loss": 14070.8406, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20286759734153748, + "rewards/margins": 0.5558962821960449, + "rewards/rejected": -0.35302871465682983, + "step": 4100 + }, + { + "epoch": 20.855696202531647, + "grad_norm": 569046.2654479475, + "learning_rate": 2.33625822626136e-07, + "logits/chosen": -1.9905935525894165, + "logits/rejected": -1.920189619064331, + "logps/chosen": -34.30987548828125, + "logps/rejected": -561.2472534179688, + "loss": 14892.8141, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.18726976215839386, + "rewards/margins": 0.5297659039497375, + "rewards/rejected": -0.3424961268901825, + "step": 4110 + }, + { + "epoch": 20.906329113924052, + "grad_norm": 428009.1713524517, + "learning_rate": 2.328423691632717e-07, + "logits/chosen": -1.5145037174224854, + "logits/rejected": -1.5116671323776245, + "logps/chosen": -45.871498107910156, + "logps/rejected": -565.3566284179688, + "loss": 13932.3125, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1972968429327011, + "rewards/margins": 0.5235085487365723, + "rewards/rejected": -0.3262116611003876, + "step": 4120 + }, + { + "epoch": 20.956962025316457, + "grad_norm": 648441.2224283866, + "learning_rate": 2.320589157004074e-07, + "logits/chosen": -0.6003355383872986, + "logits/rejected": 0.15587857365608215, + "logps/chosen": -56.75775146484375, + "logps/rejected": -600.1107177734375, + "loss": 14185.1125, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20023982226848602, + "rewards/margins": 0.5419777631759644, + "rewards/rejected": -0.34173792600631714, + "step": 4130 + }, + { + "epoch": 21.00759493670886, + "grad_norm": 610764.2416650191, + "learning_rate": 2.3127546223754308e-07, + "logits/chosen": -0.7396095395088196, + "logits/rejected": -0.635381281375885, + "logps/chosen": -43.1786994934082, + "logps/rejected": -567.4010620117188, + "loss": 13604.9312, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19861166179180145, + "rewards/margins": 0.5279361605644226, + "rewards/rejected": -0.32932454347610474, + "step": 4140 + }, + { + "epoch": 21.058227848101264, + "grad_norm": 475061.8763801183, + "learning_rate": 2.3049200877467878e-07, + "logits/chosen": -0.8914744257926941, + "logits/rejected": -0.421735942363739, + "logps/chosen": -35.262672424316406, + "logps/rejected": -577.42919921875, + "loss": 14494.0125, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.1971307098865509, + "rewards/margins": 0.5426384210586548, + "rewards/rejected": -0.3455076515674591, + "step": 4150 + }, + { + "epoch": 21.10886075949367, + "grad_norm": 645277.3437759997, + "learning_rate": 2.2970855531181446e-07, + "logits/chosen": -0.8478671908378601, + "logits/rejected": -0.030678223818540573, + "logps/chosen": -41.63772964477539, + "logps/rejected": -578.0343017578125, + "loss": 14695.0688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2051258385181427, + "rewards/margins": 0.5436097979545593, + "rewards/rejected": -0.338483989238739, + "step": 4160 + }, + { + "epoch": 21.159493670886075, + "grad_norm": 585994.7230623597, + "learning_rate": 2.2892510184895017e-07, + "logits/chosen": -0.41463834047317505, + "logits/rejected": 0.02628953382372856, + "logps/chosen": -42.1683464050293, + "logps/rejected": -583.2857055664062, + "loss": 14024.5219, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19314590096473694, + "rewards/margins": 0.542751669883728, + "rewards/rejected": -0.3496057987213135, + "step": 4170 + }, + { + "epoch": 21.21012658227848, + "grad_norm": 484376.19543389586, + "learning_rate": 2.2814164838608585e-07, + "logits/chosen": -1.1306734085083008, + "logits/rejected": -1.8836634159088135, + "logps/chosen": -37.82279968261719, + "logps/rejected": -602.1395874023438, + "loss": 14403.9312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20070350170135498, + "rewards/margins": 0.561983048915863, + "rewards/rejected": -0.36127954721450806, + "step": 4180 + }, + { + "epoch": 21.260759493670886, + "grad_norm": 1094104.6320160846, + "learning_rate": 2.2735819492322155e-07, + "logits/chosen": -1.173640251159668, + "logits/rejected": -1.2174631357192993, + "logps/chosen": -41.484169006347656, + "logps/rejected": -603.4227294921875, + "loss": 13658.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20206312835216522, + "rewards/margins": 0.5591098070144653, + "rewards/rejected": -0.3570466637611389, + "step": 4190 + }, + { + "epoch": 21.31139240506329, + "grad_norm": 479054.6529765657, + "learning_rate": 2.2657474146035723e-07, + "logits/chosen": 0.22174246609210968, + "logits/rejected": 0.7805773615837097, + "logps/chosen": -36.59177780151367, + "logps/rejected": -585.4400634765625, + "loss": 13599.2641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20017173886299133, + "rewards/margins": 0.5513723492622375, + "rewards/rejected": -0.3512006402015686, + "step": 4200 + }, + { + "epoch": 21.362025316455696, + "grad_norm": 397662.76305916836, + "learning_rate": 2.2579128799749294e-07, + "logits/chosen": -0.08221355825662613, + "logits/rejected": 0.6834055185317993, + "logps/chosen": -34.845909118652344, + "logps/rejected": -582.8208618164062, + "loss": 13710.1328, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19616791605949402, + "rewards/margins": 0.548448920249939, + "rewards/rejected": -0.35228094458580017, + "step": 4210 + }, + { + "epoch": 21.4126582278481, + "grad_norm": 558368.8389175668, + "learning_rate": 2.2500783453462862e-07, + "logits/chosen": -1.6244313716888428, + "logits/rejected": -1.1929272413253784, + "logps/chosen": -40.19694900512695, + "logps/rejected": -582.0238647460938, + "loss": 13864.4125, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20411738753318787, + "rewards/margins": 0.5427777767181396, + "rewards/rejected": -0.3386603891849518, + "step": 4220 + }, + { + "epoch": 21.463291139240507, + "grad_norm": 677734.2581549523, + "learning_rate": 2.2422438107176433e-07, + "logits/chosen": -1.662096381187439, + "logits/rejected": -1.0047109127044678, + "logps/chosen": -35.72515106201172, + "logps/rejected": -602.2333374023438, + "loss": 14032.5453, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2048303186893463, + "rewards/margins": 0.5686389207839966, + "rewards/rejected": -0.36380860209465027, + "step": 4230 + }, + { + "epoch": 21.513924050632912, + "grad_norm": 1168191.91590756, + "learning_rate": 2.234409276089e-07, + "logits/chosen": -1.4097559452056885, + "logits/rejected": -1.5273138284683228, + "logps/chosen": -41.850120544433594, + "logps/rejected": -581.159912109375, + "loss": 14159.3531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19850656390190125, + "rewards/margins": 0.5419961810112, + "rewards/rejected": -0.3434896469116211, + "step": 4240 + }, + { + "epoch": 21.564556962025318, + "grad_norm": 458950.46774975123, + "learning_rate": 2.226574741460357e-07, + "logits/chosen": 0.21833649277687073, + "logits/rejected": 0.5494757294654846, + "logps/chosen": -34.96293258666992, + "logps/rejected": -556.4089965820312, + "loss": 14176.0922, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.189616397023201, + "rewards/margins": 0.5252028107643127, + "rewards/rejected": -0.33558645844459534, + "step": 4250 + }, + { + "epoch": 21.615189873417723, + "grad_norm": 421239.1758950125, + "learning_rate": 2.218740206831714e-07, + "logits/chosen": -1.4874672889709473, + "logits/rejected": -0.608989417552948, + "logps/chosen": -41.988407135009766, + "logps/rejected": -571.3652954101562, + "loss": 13889.2, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19781097769737244, + "rewards/margins": 0.534324586391449, + "rewards/rejected": -0.33651357889175415, + "step": 4260 + }, + { + "epoch": 21.665822784810125, + "grad_norm": 454253.5294425473, + "learning_rate": 2.2109056722030712e-07, + "logits/chosen": -2.9821505546569824, + "logits/rejected": -2.786928415298462, + "logps/chosen": -34.64727020263672, + "logps/rejected": -584.9675903320312, + "loss": 13704.1344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20551709830760956, + "rewards/margins": 0.5548884868621826, + "rewards/rejected": -0.3493713140487671, + "step": 4270 + }, + { + "epoch": 21.71645569620253, + "grad_norm": 452082.6783455646, + "learning_rate": 2.203071137574428e-07, + "logits/chosen": 0.3382144868373871, + "logits/rejected": 0.4318400025367737, + "logps/chosen": -45.613182067871094, + "logps/rejected": -586.7680053710938, + "loss": 13852.6578, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20163491368293762, + "rewards/margins": 0.5407856702804565, + "rewards/rejected": -0.3391507565975189, + "step": 4280 + }, + { + "epoch": 21.767088607594935, + "grad_norm": 514094.8538962621, + "learning_rate": 2.195236602945785e-07, + "logits/chosen": -0.4016874432563782, + "logits/rejected": 0.6381920576095581, + "logps/chosen": -43.911903381347656, + "logps/rejected": -575.604736328125, + "loss": 13636.45, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19898727536201477, + "rewards/margins": 0.5374493598937988, + "rewards/rejected": -0.33846214413642883, + "step": 4290 + }, + { + "epoch": 21.81772151898734, + "grad_norm": 566656.255024603, + "learning_rate": 2.187402068317142e-07, + "logits/chosen": 1.3056986331939697, + "logits/rejected": 1.5420660972595215, + "logps/chosen": -26.3211669921875, + "logps/rejected": -560.0467529296875, + "loss": 13776.3141, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19464771449565887, + "rewards/margins": 0.5358431935310364, + "rewards/rejected": -0.3411955237388611, + "step": 4300 + }, + { + "epoch": 21.868354430379746, + "grad_norm": 615988.2584437894, + "learning_rate": 2.179567533688499e-07, + "logits/chosen": -1.382976770401001, + "logits/rejected": -1.09381902217865, + "logps/chosen": -39.40836715698242, + "logps/rejected": -589.1639404296875, + "loss": 14209.4484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20153430104255676, + "rewards/margins": 0.5491803884506226, + "rewards/rejected": -0.3476461172103882, + "step": 4310 + }, + { + "epoch": 21.91898734177215, + "grad_norm": 537804.0354341647, + "learning_rate": 2.1717329990598557e-07, + "logits/chosen": -0.1827346831560135, + "logits/rejected": 0.2996447682380676, + "logps/chosen": -45.03169250488281, + "logps/rejected": -602.3880615234375, + "loss": 13524.8656, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20245595276355743, + "rewards/margins": 0.5563360452651978, + "rewards/rejected": -0.3538800776004791, + "step": 4320 + }, + { + "epoch": 21.969620253164557, + "grad_norm": 1296539.8068217032, + "learning_rate": 2.1638984644312128e-07, + "logits/chosen": -1.9990384578704834, + "logits/rejected": -2.4059531688690186, + "logps/chosen": -43.4345588684082, + "logps/rejected": -604.287841796875, + "loss": 13463.9719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21004056930541992, + "rewards/margins": 0.5605840086936951, + "rewards/rejected": -0.35054340958595276, + "step": 4330 + }, + { + "epoch": 22.020253164556962, + "grad_norm": 844000.7864919893, + "learning_rate": 2.1560639298025696e-07, + "logits/chosen": -0.2103087455034256, + "logits/rejected": 0.07530391216278076, + "logps/chosen": -30.565990447998047, + "logps/rejected": -547.0203857421875, + "loss": 14383.6906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1880597323179245, + "rewards/margins": 0.5152319073677063, + "rewards/rejected": -0.3271721601486206, + "step": 4340 + }, + { + "epoch": 22.070886075949367, + "grad_norm": 597784.613899612, + "learning_rate": 2.1482293951739267e-07, + "logits/chosen": -0.721124529838562, + "logits/rejected": -0.21510323882102966, + "logps/chosen": -37.94996643066406, + "logps/rejected": -587.461181640625, + "loss": 13822.8656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19957685470581055, + "rewards/margins": 0.5496448278427124, + "rewards/rejected": -0.3500679135322571, + "step": 4350 + }, + { + "epoch": 22.121518987341773, + "grad_norm": 468430.91971069, + "learning_rate": 2.1403948605452835e-07, + "logits/chosen": -1.418505072593689, + "logits/rejected": -0.8604210019111633, + "logps/chosen": -38.54343795776367, + "logps/rejected": -585.6038818359375, + "loss": 13499.3969, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2021259367465973, + "rewards/margins": 0.5458530187606812, + "rewards/rejected": -0.34372708201408386, + "step": 4360 + }, + { + "epoch": 22.172151898734178, + "grad_norm": 838303.6265575557, + "learning_rate": 2.1325603259166405e-07, + "logits/chosen": -0.013787698931992054, + "logits/rejected": -0.22224357724189758, + "logps/chosen": -33.32988357543945, + "logps/rejected": -576.55224609375, + "loss": 13816.5812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1937212496995926, + "rewards/margins": 0.5373150110244751, + "rewards/rejected": -0.3435937762260437, + "step": 4370 + }, + { + "epoch": 22.222784810126583, + "grad_norm": 524213.07765733794, + "learning_rate": 2.1247257912879973e-07, + "logits/chosen": 0.3687540888786316, + "logits/rejected": 0.8078397512435913, + "logps/chosen": -38.822872161865234, + "logps/rejected": -553.8123168945312, + "loss": 12435.0875, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.18458959460258484, + "rewards/margins": 0.5130779147148132, + "rewards/rejected": -0.32848840951919556, + "step": 4380 + }, + { + "epoch": 22.27341772151899, + "grad_norm": 476932.283051178, + "learning_rate": 2.1168912566593544e-07, + "logits/chosen": 0.6524232029914856, + "logits/rejected": 0.6763177514076233, + "logps/chosen": -41.4456901550293, + "logps/rejected": -586.055419921875, + "loss": 14132.7062, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20029637217521667, + "rewards/margins": 0.5451359152793884, + "rewards/rejected": -0.34483957290649414, + "step": 4390 + }, + { + "epoch": 22.324050632911394, + "grad_norm": 568972.1382617814, + "learning_rate": 2.1090567220307112e-07, + "logits/chosen": -0.3675435781478882, + "logits/rejected": 0.2508888840675354, + "logps/chosen": -37.127281188964844, + "logps/rejected": -571.7310180664062, + "loss": 13226.8641, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19992589950561523, + "rewards/margins": 0.5378109812736511, + "rewards/rejected": -0.33788514137268066, + "step": 4400 + }, + { + "epoch": 22.374683544303796, + "grad_norm": 549953.3378298564, + "learning_rate": 2.1012221874020682e-07, + "logits/chosen": -0.3316110372543335, + "logits/rejected": 0.12318412959575653, + "logps/chosen": -45.176429748535156, + "logps/rejected": -601.1099243164062, + "loss": 13357.3594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2056044340133667, + "rewards/margins": 0.5584502220153809, + "rewards/rejected": -0.35284581780433655, + "step": 4410 + }, + { + "epoch": 22.4253164556962, + "grad_norm": 487398.89046152594, + "learning_rate": 2.093387652773425e-07, + "logits/chosen": -1.0198824405670166, + "logits/rejected": -0.21292218565940857, + "logps/chosen": -36.835960388183594, + "logps/rejected": -577.4632568359375, + "loss": 13915.3031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20356829464435577, + "rewards/margins": 0.543707013130188, + "rewards/rejected": -0.3401387631893158, + "step": 4420 + }, + { + "epoch": 22.475949367088607, + "grad_norm": 477361.2573301333, + "learning_rate": 2.085553118144782e-07, + "logits/chosen": 0.3704206943511963, + "logits/rejected": 0.693733811378479, + "logps/chosen": -46.64609146118164, + "logps/rejected": -594.5731811523438, + "loss": 13106.9359, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20661070942878723, + "rewards/margins": 0.5531338453292847, + "rewards/rejected": -0.34652310609817505, + "step": 4430 + }, + { + "epoch": 22.526582278481012, + "grad_norm": 597606.9724370906, + "learning_rate": 2.077718583516139e-07, + "logits/chosen": -0.6012102365493774, + "logits/rejected": -0.6212292909622192, + "logps/chosen": -36.24720001220703, + "logps/rejected": -570.0081787109375, + "loss": 13390.8625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20318233966827393, + "rewards/margins": 0.5344886779785156, + "rewards/rejected": -0.3313063085079193, + "step": 4440 + }, + { + "epoch": 22.577215189873417, + "grad_norm": 469529.248927815, + "learning_rate": 2.069884048887496e-07, + "logits/chosen": -0.041382573544979095, + "logits/rejected": 0.7878470420837402, + "logps/chosen": -43.38654708862305, + "logps/rejected": -568.7279052734375, + "loss": 13333.4188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19029700756072998, + "rewards/margins": 0.5311275124549866, + "rewards/rejected": -0.3408304750919342, + "step": 4450 + }, + { + "epoch": 22.627848101265823, + "grad_norm": 402623.00766789017, + "learning_rate": 2.0620495142588527e-07, + "logits/chosen": -0.8500850796699524, + "logits/rejected": 0.10065221786499023, + "logps/chosen": -31.52435302734375, + "logps/rejected": -562.478271484375, + "loss": 13787.8797, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1997881382703781, + "rewards/margins": 0.5369755029678345, + "rewards/rejected": -0.33718740940093994, + "step": 4460 + }, + { + "epoch": 22.678481012658228, + "grad_norm": 373755.1797101064, + "learning_rate": 2.05421497963021e-07, + "logits/chosen": -1.3291960954666138, + "logits/rejected": -1.2023630142211914, + "logps/chosen": -34.12505340576172, + "logps/rejected": -600.3700561523438, + "loss": 13297.9406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20627860724925995, + "rewards/margins": 0.5676389336585999, + "rewards/rejected": -0.3613602817058563, + "step": 4470 + }, + { + "epoch": 22.729113924050633, + "grad_norm": 402761.92027776636, + "learning_rate": 2.0463804450015669e-07, + "logits/chosen": -1.5893421173095703, + "logits/rejected": -1.3823096752166748, + "logps/chosen": -30.61318588256836, + "logps/rejected": -584.5267944335938, + "loss": 14326.1875, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20079275965690613, + "rewards/margins": 0.5540488958358765, + "rewards/rejected": -0.35325610637664795, + "step": 4480 + }, + { + "epoch": 22.77974683544304, + "grad_norm": 547067.5872175789, + "learning_rate": 2.038545910372924e-07, + "logits/chosen": 0.4800703525543213, + "logits/rejected": 1.4792516231536865, + "logps/chosen": -27.049495697021484, + "logps/rejected": -563.0673217773438, + "loss": 14447.3891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19703736901283264, + "rewards/margins": 0.5409034490585327, + "rewards/rejected": -0.3438660502433777, + "step": 4490 + }, + { + "epoch": 22.830379746835444, + "grad_norm": 672757.480231201, + "learning_rate": 2.0307113757442807e-07, + "logits/chosen": 0.13832028210163116, + "logits/rejected": 0.6534411907196045, + "logps/chosen": -48.682350158691406, + "logps/rejected": -608.1444091796875, + "loss": 13146.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20029571652412415, + "rewards/margins": 0.5563368797302246, + "rewards/rejected": -0.35604116320610046, + "step": 4500 + }, + { + "epoch": 22.88101265822785, + "grad_norm": 369986.02432868385, + "learning_rate": 2.0228768411156378e-07, + "logits/chosen": -1.8307338953018188, + "logits/rejected": -1.2095929384231567, + "logps/chosen": -45.19769287109375, + "logps/rejected": -578.0750122070312, + "loss": 14329.1656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1950627863407135, + "rewards/margins": 0.5377144813537598, + "rewards/rejected": -0.3426516652107239, + "step": 4510 + }, + { + "epoch": 22.931645569620255, + "grad_norm": 699107.7543808775, + "learning_rate": 2.0150423064869946e-07, + "logits/chosen": -0.08685462176799774, + "logits/rejected": 0.9019424319267273, + "logps/chosen": -45.735721588134766, + "logps/rejected": -577.2432861328125, + "loss": 13698.3734, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19573049247264862, + "rewards/margins": 0.5354525446891785, + "rewards/rejected": -0.33972200751304626, + "step": 4520 + }, + { + "epoch": 22.98227848101266, + "grad_norm": 406293.788277243, + "learning_rate": 2.0072077718583516e-07, + "logits/chosen": -0.8599483370780945, + "logits/rejected": 0.11351003497838974, + "logps/chosen": -26.267419815063477, + "logps/rejected": -555.6368408203125, + "loss": 13659.9219, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19417151808738708, + "rewards/margins": 0.5340765714645386, + "rewards/rejected": -0.3399050235748291, + "step": 4530 + }, + { + "epoch": 23.03291139240506, + "grad_norm": 260712.73606555417, + "learning_rate": 1.9993732372297084e-07, + "logits/chosen": -0.3511297106742859, + "logits/rejected": 0.3242552876472473, + "logps/chosen": -36.45670700073242, + "logps/rejected": -593.9841918945312, + "loss": 13041.3578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20808692276477814, + "rewards/margins": 0.5588291883468628, + "rewards/rejected": -0.3507421910762787, + "step": 4540 + }, + { + "epoch": 23.083544303797467, + "grad_norm": 814422.569024492, + "learning_rate": 1.9915387026010655e-07, + "logits/chosen": -1.235445499420166, + "logits/rejected": -1.077894926071167, + "logps/chosen": -35.420509338378906, + "logps/rejected": -552.5203857421875, + "loss": 13824.6812, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19372673332691193, + "rewards/margins": 0.5206524729728699, + "rewards/rejected": -0.32692575454711914, + "step": 4550 + }, + { + "epoch": 23.134177215189872, + "grad_norm": 403075.73058182886, + "learning_rate": 1.9837041679724223e-07, + "logits/chosen": -1.2545719146728516, + "logits/rejected": -0.1615985631942749, + "logps/chosen": -39.13911056518555, + "logps/rejected": -572.5064697265625, + "loss": 13053.8422, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20484952628612518, + "rewards/margins": 0.5411498546600342, + "rewards/rejected": -0.3363003432750702, + "step": 4560 + }, + { + "epoch": 23.184810126582278, + "grad_norm": 410323.6766664692, + "learning_rate": 1.9758696333437793e-07, + "logits/chosen": 0.23864197731018066, + "logits/rejected": 0.84992915391922, + "logps/chosen": -27.040796279907227, + "logps/rejected": -566.3431396484375, + "loss": 13967.5844, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1973811388015747, + "rewards/margins": 0.5440403819084167, + "rewards/rejected": -0.34665924310684204, + "step": 4570 + }, + { + "epoch": 23.235443037974683, + "grad_norm": 1678425.4580624043, + "learning_rate": 1.9680350987151361e-07, + "logits/chosen": -1.6769014596939087, + "logits/rejected": -1.3361175060272217, + "logps/chosen": -41.234596252441406, + "logps/rejected": -571.8701782226562, + "loss": 14487.3484, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19792771339416504, + "rewards/margins": 0.5364667177200317, + "rewards/rejected": -0.3385389745235443, + "step": 4580 + }, + { + "epoch": 23.28607594936709, + "grad_norm": 449840.24063538713, + "learning_rate": 1.9602005640864932e-07, + "logits/chosen": 0.17428772151470184, + "logits/rejected": 0.36653703451156616, + "logps/chosen": -42.2408561706543, + "logps/rejected": -559.9259033203125, + "loss": 13708.7656, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.18780556321144104, + "rewards/margins": 0.5175566673278809, + "rewards/rejected": -0.3297511339187622, + "step": 4590 + }, + { + "epoch": 23.336708860759494, + "grad_norm": 679657.3800551172, + "learning_rate": 1.95236602945785e-07, + "logits/chosen": -1.7364814281463623, + "logits/rejected": -0.4961363673210144, + "logps/chosen": -38.69525909423828, + "logps/rejected": -572.1868896484375, + "loss": 13593.8281, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.19368386268615723, + "rewards/margins": 0.5372828245162964, + "rewards/rejected": -0.3435989320278168, + "step": 4600 + }, + { + "epoch": 23.3873417721519, + "grad_norm": 494252.98244154564, + "learning_rate": 1.944531494829207e-07, + "logits/chosen": -1.2254236936569214, + "logits/rejected": -0.549937903881073, + "logps/chosen": -48.976341247558594, + "logps/rejected": -586.7686767578125, + "loss": 13090.5094, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2046699821949005, + "rewards/margins": 0.5475128889083862, + "rewards/rejected": -0.34284287691116333, + "step": 4610 + }, + { + "epoch": 23.437974683544304, + "grad_norm": 465753.7874146901, + "learning_rate": 1.9366969602005639e-07, + "logits/chosen": -0.32544824481010437, + "logits/rejected": 0.21593210101127625, + "logps/chosen": -44.010986328125, + "logps/rejected": -600.7352905273438, + "loss": 13414.7938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19957385957241058, + "rewards/margins": 0.5576717257499695, + "rewards/rejected": -0.3580978512763977, + "step": 4620 + }, + { + "epoch": 23.48860759493671, + "grad_norm": 481484.43193068507, + "learning_rate": 1.928862425571921e-07, + "logits/chosen": 1.6490085124969482, + "logits/rejected": 2.4715371131896973, + "logps/chosen": -33.72826385498047, + "logps/rejected": -570.8497314453125, + "loss": 12998.5312, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20028643310070038, + "rewards/margins": 0.542258083820343, + "rewards/rejected": -0.34197166562080383, + "step": 4630 + }, + { + "epoch": 23.539240506329115, + "grad_norm": 332996.8100920917, + "learning_rate": 1.9210278909432777e-07, + "logits/chosen": 0.4085458219051361, + "logits/rejected": 0.9639450907707214, + "logps/chosen": -34.42941665649414, + "logps/rejected": -557.9277954101562, + "loss": 13038.3531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19136182963848114, + "rewards/margins": 0.5279586911201477, + "rewards/rejected": -0.33659690618515015, + "step": 4640 + }, + { + "epoch": 23.58987341772152, + "grad_norm": 417794.5245281789, + "learning_rate": 1.913193356314635e-07, + "logits/chosen": 0.9038169980049133, + "logits/rejected": 1.2894458770751953, + "logps/chosen": -36.74773406982422, + "logps/rejected": -562.9734497070312, + "loss": 13522.8828, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19096803665161133, + "rewards/margins": 0.528801679611206, + "rewards/rejected": -0.3378336727619171, + "step": 4650 + }, + { + "epoch": 23.640506329113926, + "grad_norm": 676256.0388850861, + "learning_rate": 1.9053588216859918e-07, + "logits/chosen": -1.2860909700393677, + "logits/rejected": -0.9226953387260437, + "logps/chosen": -38.81789016723633, + "logps/rejected": -582.3718872070312, + "loss": 12973.8531, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19840100407600403, + "rewards/margins": 0.5444675087928772, + "rewards/rejected": -0.34606653451919556, + "step": 4660 + }, + { + "epoch": 23.691139240506327, + "grad_norm": 554334.5417021438, + "learning_rate": 1.897524287057349e-07, + "logits/chosen": -0.9978511929512024, + "logits/rejected": -1.0807132720947266, + "logps/chosen": -37.58687210083008, + "logps/rejected": -592.8406982421875, + "loss": 13047.7078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21029341220855713, + "rewards/margins": 0.5588668584823608, + "rewards/rejected": -0.34857338666915894, + "step": 4670 + }, + { + "epoch": 23.741772151898733, + "grad_norm": 369259.1253714009, + "learning_rate": 1.8896897524287057e-07, + "logits/chosen": -0.2619388997554779, + "logits/rejected": 0.5041999816894531, + "logps/chosen": -30.875507354736328, + "logps/rejected": -593.271484375, + "loss": 13440.1, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20856580138206482, + "rewards/margins": 0.5670603513717651, + "rewards/rejected": -0.3584945499897003, + "step": 4680 + }, + { + "epoch": 23.792405063291138, + "grad_norm": 521963.4531232378, + "learning_rate": 1.8818552178000628e-07, + "logits/chosen": 1.4849811792373657, + "logits/rejected": 2.5439536571502686, + "logps/chosen": -48.661781311035156, + "logps/rejected": -584.5633544921875, + "loss": 12945.0281, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2017756700515747, + "rewards/margins": 0.5462977886199951, + "rewards/rejected": -0.34452205896377563, + "step": 4690 + }, + { + "epoch": 23.843037974683543, + "grad_norm": 268733.66014746006, + "learning_rate": 1.8740206831714195e-07, + "logits/chosen": -3.035019636154175, + "logits/rejected": -2.2716848850250244, + "logps/chosen": -37.333274841308594, + "logps/rejected": -573.26806640625, + "loss": 12908.0664, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19613531231880188, + "rewards/margins": 0.5401136875152588, + "rewards/rejected": -0.34397831559181213, + "step": 4700 + }, + { + "epoch": 23.89367088607595, + "grad_norm": 408014.58113424503, + "learning_rate": 1.8661861485427766e-07, + "logits/chosen": -1.863054871559143, + "logits/rejected": -1.9231150150299072, + "logps/chosen": -31.946712493896484, + "logps/rejected": -559.3107299804688, + "loss": 13674.8922, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19874636828899384, + "rewards/margins": 0.5276685357093811, + "rewards/rejected": -0.32892221212387085, + "step": 4710 + }, + { + "epoch": 23.944303797468354, + "grad_norm": 642355.0358722768, + "learning_rate": 1.8583516139141334e-07, + "logits/chosen": 0.2536182999610901, + "logits/rejected": 0.7465096712112427, + "logps/chosen": -30.321950912475586, + "logps/rejected": -569.6714477539062, + "loss": 13267.9422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19373981654644012, + "rewards/margins": 0.5428478717803955, + "rewards/rejected": -0.3491080403327942, + "step": 4720 + }, + { + "epoch": 23.99493670886076, + "grad_norm": 342588.7538586878, + "learning_rate": 1.8505170792854905e-07, + "logits/chosen": 0.4239419400691986, + "logits/rejected": 1.2003661394119263, + "logps/chosen": -37.75706100463867, + "logps/rejected": -594.6668701171875, + "loss": 12895.3687, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2065034806728363, + "rewards/margins": 0.5577437877655029, + "rewards/rejected": -0.3512403070926666, + "step": 4730 + }, + { + "epoch": 24.045569620253165, + "grad_norm": 627241.9523500776, + "learning_rate": 1.8426825446568473e-07, + "logits/chosen": -0.28766584396362305, + "logits/rejected": -0.6269916296005249, + "logps/chosen": -44.65516662597656, + "logps/rejected": -577.6954345703125, + "loss": 12950.232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20441746711730957, + "rewards/margins": 0.5374675393104553, + "rewards/rejected": -0.33305004239082336, + "step": 4740 + }, + { + "epoch": 24.09620253164557, + "grad_norm": 347781.48168387014, + "learning_rate": 1.8348480100282043e-07, + "logits/chosen": -0.7881828546524048, + "logits/rejected": 0.06337795406579971, + "logps/chosen": -27.82383155822754, + "logps/rejected": -579.8468017578125, + "loss": 12986.6266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2079242467880249, + "rewards/margins": 0.5530039668083191, + "rewards/rejected": -0.3450797498226166, + "step": 4750 + }, + { + "epoch": 24.146835443037975, + "grad_norm": 263636.0742414822, + "learning_rate": 1.827013475399561e-07, + "logits/chosen": -1.2877452373504639, + "logits/rejected": -0.24622194468975067, + "logps/chosen": -30.940113067626953, + "logps/rejected": -566.517333984375, + "loss": 12716.3281, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20519797503948212, + "rewards/margins": 0.5418139696121216, + "rewards/rejected": -0.33661606907844543, + "step": 4760 + }, + { + "epoch": 24.19746835443038, + "grad_norm": 343217.0970891512, + "learning_rate": 1.8191789407709182e-07, + "logits/chosen": -1.0294177532196045, + "logits/rejected": -0.4815802574157715, + "logps/chosen": -31.3253231048584, + "logps/rejected": -560.1129150390625, + "loss": 13408.0094, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19244466722011566, + "rewards/margins": 0.5293484926223755, + "rewards/rejected": -0.3369038701057434, + "step": 4770 + }, + { + "epoch": 24.248101265822786, + "grad_norm": 504214.9301429895, + "learning_rate": 1.811344406142275e-07, + "logits/chosen": -0.604789137840271, + "logits/rejected": 0.22590136528015137, + "logps/chosen": -41.899864196777344, + "logps/rejected": -555.2015380859375, + "loss": 13531.9625, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.190764918923378, + "rewards/margins": 0.5197011232376099, + "rewards/rejected": -0.32893624901771545, + "step": 4780 + }, + { + "epoch": 24.29873417721519, + "grad_norm": 376293.61129873747, + "learning_rate": 1.803509871513632e-07, + "logits/chosen": -0.3547658324241638, + "logits/rejected": -0.13969659805297852, + "logps/chosen": -28.85129737854004, + "logps/rejected": -577.5880126953125, + "loss": 12898.8125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19991345703601837, + "rewards/margins": 0.5475735664367676, + "rewards/rejected": -0.3476601243019104, + "step": 4790 + }, + { + "epoch": 24.349367088607593, + "grad_norm": 414135.66871189536, + "learning_rate": 1.7956753368849888e-07, + "logits/chosen": 0.09798486530780792, + "logits/rejected": 0.8055311441421509, + "logps/chosen": -34.26675796508789, + "logps/rejected": -584.9435424804688, + "loss": 12507.7328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20852184295654297, + "rewards/margins": 0.5590152144432068, + "rewards/rejected": -0.3504934012889862, + "step": 4800 + }, + { + "epoch": 24.4, + "grad_norm": 343912.9213203651, + "learning_rate": 1.787840802256346e-07, + "logits/chosen": -0.12874791026115417, + "logits/rejected": 0.15529172122478485, + "logps/chosen": -32.1009635925293, + "logps/rejected": -574.7623291015625, + "loss": 13372.5594, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1985681653022766, + "rewards/margins": 0.546955943107605, + "rewards/rejected": -0.34838777780532837, + "step": 4810 + }, + { + "epoch": 24.450632911392404, + "grad_norm": 697134.5088322331, + "learning_rate": 1.7800062676277027e-07, + "logits/chosen": 1.3246450424194336, + "logits/rejected": 1.760595679283142, + "logps/chosen": -32.29949188232422, + "logps/rejected": -575.516357421875, + "loss": 12415.9672, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19931714236736298, + "rewards/margins": 0.5466721057891846, + "rewards/rejected": -0.3473549485206604, + "step": 4820 + }, + { + "epoch": 24.50126582278481, + "grad_norm": 389676.90431780973, + "learning_rate": 1.7721717329990597e-07, + "logits/chosen": -1.3381322622299194, + "logits/rejected": -0.6404735445976257, + "logps/chosen": -34.635719299316406, + "logps/rejected": -587.3080444335938, + "loss": 13101.6227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20362886786460876, + "rewards/margins": 0.5546956658363342, + "rewards/rejected": -0.3510667383670807, + "step": 4830 + }, + { + "epoch": 24.551898734177215, + "grad_norm": 314600.5637340995, + "learning_rate": 1.7643371983704165e-07, + "logits/chosen": 0.9953921437263489, + "logits/rejected": 0.9643779993057251, + "logps/chosen": -30.646331787109375, + "logps/rejected": -570.855712890625, + "loss": 12974.9633, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19645583629608154, + "rewards/margins": 0.5343093872070312, + "rewards/rejected": -0.3378535211086273, + "step": 4840 + }, + { + "epoch": 24.60253164556962, + "grad_norm": 327013.6839426029, + "learning_rate": 1.7565026637417739e-07, + "logits/chosen": -0.7217426300048828, + "logits/rejected": -0.7290517091751099, + "logps/chosen": -37.666648864746094, + "logps/rejected": -563.5865478515625, + "loss": 13273.0266, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.197954460978508, + "rewards/margins": 0.5282526612281799, + "rewards/rejected": -0.33029812574386597, + "step": 4850 + }, + { + "epoch": 24.653164556962025, + "grad_norm": 425662.97201424866, + "learning_rate": 1.7486681291131307e-07, + "logits/chosen": -0.32900291681289673, + "logits/rejected": 0.18864622712135315, + "logps/chosen": -32.36582565307617, + "logps/rejected": -566.5547485351562, + "loss": 13350.5594, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19610336422920227, + "rewards/margins": 0.5321984887123108, + "rewards/rejected": -0.3360951244831085, + "step": 4860 + }, + { + "epoch": 24.70379746835443, + "grad_norm": 402351.7657170625, + "learning_rate": 1.7408335944844877e-07, + "logits/chosen": -2.112635850906372, + "logits/rejected": -1.4337832927703857, + "logps/chosen": -36.27765655517578, + "logps/rejected": -588.708740234375, + "loss": 13690.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20721419155597687, + "rewards/margins": 0.5532296299934387, + "rewards/rejected": -0.34601545333862305, + "step": 4870 + }, + { + "epoch": 24.754430379746836, + "grad_norm": 357871.7969812476, + "learning_rate": 1.7329990598558445e-07, + "logits/chosen": -0.34599849581718445, + "logits/rejected": 0.0005200624582357705, + "logps/chosen": -28.50592041015625, + "logps/rejected": -560.6458740234375, + "loss": 13133.4, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1971847414970398, + "rewards/margins": 0.5326961278915405, + "rewards/rejected": -0.33551135659217834, + "step": 4880 + }, + { + "epoch": 24.80506329113924, + "grad_norm": 610016.4055058825, + "learning_rate": 1.7251645252272016e-07, + "logits/chosen": -1.225339651107788, + "logits/rejected": -0.8243592977523804, + "logps/chosen": -34.17732620239258, + "logps/rejected": -581.1436767578125, + "loss": 12571.1, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20419716835021973, + "rewards/margins": 0.5520066022872925, + "rewards/rejected": -0.347809374332428, + "step": 4890 + }, + { + "epoch": 24.855696202531647, + "grad_norm": 379732.159808034, + "learning_rate": 1.7173299905985584e-07, + "logits/chosen": -1.817439317703247, + "logits/rejected": -1.4895861148834229, + "logps/chosen": -30.775598526000977, + "logps/rejected": -570.484130859375, + "loss": 13212.3781, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20220620930194855, + "rewards/margins": 0.5392990112304688, + "rewards/rejected": -0.3370928466320038, + "step": 4900 + }, + { + "epoch": 24.906329113924052, + "grad_norm": 324902.93726753094, + "learning_rate": 1.7094954559699154e-07, + "logits/chosen": -0.6947388648986816, + "logits/rejected": -0.4560522437095642, + "logps/chosen": -40.327796936035156, + "logps/rejected": -581.7532348632812, + "loss": 12891.9016, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20357458293437958, + "rewards/margins": 0.5383836030960083, + "rewards/rejected": -0.33480900526046753, + "step": 4910 + }, + { + "epoch": 24.956962025316457, + "grad_norm": 294429.7500390618, + "learning_rate": 1.7016609213412722e-07, + "logits/chosen": -0.9121583104133606, + "logits/rejected": 0.40684938430786133, + "logps/chosen": -28.22664451599121, + "logps/rejected": -580.9795532226562, + "loss": 13184.5812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20078356564044952, + "rewards/margins": 0.5547462701797485, + "rewards/rejected": -0.3539626896381378, + "step": 4920 + }, + { + "epoch": 25.00759493670886, + "grad_norm": 308388.08709269366, + "learning_rate": 1.6938263867126293e-07, + "logits/chosen": -1.6532137393951416, + "logits/rejected": -1.572850227355957, + "logps/chosen": -41.12345886230469, + "logps/rejected": -613.5958862304688, + "loss": 12755.7383, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.21259479224681854, + "rewards/margins": 0.5684026479721069, + "rewards/rejected": -0.3558078408241272, + "step": 4930 + }, + { + "epoch": 25.058227848101264, + "grad_norm": 320761.03886897856, + "learning_rate": 1.685991852083986e-07, + "logits/chosen": -0.11034099757671356, + "logits/rejected": -0.06293153762817383, + "logps/chosen": -34.010704040527344, + "logps/rejected": -583.318359375, + "loss": 13300.3922, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19742931425571442, + "rewards/margins": 0.5516862273216248, + "rewards/rejected": -0.35425692796707153, + "step": 4940 + }, + { + "epoch": 25.10886075949367, + "grad_norm": 282559.397671993, + "learning_rate": 1.6781573174553431e-07, + "logits/chosen": 0.5274404883384705, + "logits/rejected": 1.2507613897323608, + "logps/chosen": -29.299930572509766, + "logps/rejected": -554.8450927734375, + "loss": 12685.2523, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.19330081343650818, + "rewards/margins": 0.5271843671798706, + "rewards/rejected": -0.3338836431503296, + "step": 4950 + }, + { + "epoch": 25.159493670886075, + "grad_norm": 248533.31024359175, + "learning_rate": 1.6703227828267e-07, + "logits/chosen": -1.2484452724456787, + "logits/rejected": -0.5531445741653442, + "logps/chosen": -42.44970703125, + "logps/rejected": -591.9672241210938, + "loss": 12525.2, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20557789504528046, + "rewards/margins": 0.5516069531440735, + "rewards/rejected": -0.34602901339530945, + "step": 4960 + }, + { + "epoch": 25.21012658227848, + "grad_norm": 365840.3682606488, + "learning_rate": 1.662488248198057e-07, + "logits/chosen": -1.5047721862792969, + "logits/rejected": -1.5158735513687134, + "logps/chosen": -31.838958740234375, + "logps/rejected": -581.0045166015625, + "loss": 13041.882, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.201541468501091, + "rewards/margins": 0.5492666959762573, + "rewards/rejected": -0.3477252125740051, + "step": 4970 + }, + { + "epoch": 25.260759493670886, + "grad_norm": 364119.66442401055, + "learning_rate": 1.6546537135694138e-07, + "logits/chosen": -2.0333914756774902, + "logits/rejected": -2.0420191287994385, + "logps/chosen": -33.426788330078125, + "logps/rejected": -577.18212890625, + "loss": 13218.8875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20231468975543976, + "rewards/margins": 0.5456961989402771, + "rewards/rejected": -0.3433815836906433, + "step": 4980 + }, + { + "epoch": 25.31139240506329, + "grad_norm": 434691.5380135347, + "learning_rate": 1.6468191789407709e-07, + "logits/chosen": -0.23437795042991638, + "logits/rejected": -0.03313719108700752, + "logps/chosen": -33.025386810302734, + "logps/rejected": -587.5833740234375, + "loss": 12003.9711, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19747456908226013, + "rewards/margins": 0.553167998790741, + "rewards/rejected": -0.3556934595108032, + "step": 4990 + }, + { + "epoch": 25.362025316455696, + "grad_norm": 257881.6224659914, + "learning_rate": 1.6389846443121277e-07, + "logits/chosen": 1.229998230934143, + "logits/rejected": 1.8426265716552734, + "logps/chosen": -31.151538848876953, + "logps/rejected": -575.4852905273438, + "loss": 13412.7078, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1954251229763031, + "rewards/margins": 0.5429095029830933, + "rewards/rejected": -0.34748440980911255, + "step": 5000 + }, + { + "epoch": 25.4126582278481, + "grad_norm": 425285.73032920854, + "learning_rate": 1.6311501096834847e-07, + "logits/chosen": -1.241003155708313, + "logits/rejected": -0.7176898121833801, + "logps/chosen": -31.115795135498047, + "logps/rejected": -558.19873046875, + "loss": 13301.7094, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.19612053036689758, + "rewards/margins": 0.5247890949249268, + "rewards/rejected": -0.3286685347557068, + "step": 5010 + }, + { + "epoch": 25.463291139240507, + "grad_norm": 372695.4381119174, + "learning_rate": 1.6233155750548415e-07, + "logits/chosen": -1.8982555866241455, + "logits/rejected": -1.494901180267334, + "logps/chosen": -28.403858184814453, + "logps/rejected": -562.348388671875, + "loss": 13093.6797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.203691765666008, + "rewards/margins": 0.5354448556900024, + "rewards/rejected": -0.33175310492515564, + "step": 5020 + }, + { + "epoch": 25.513924050632912, + "grad_norm": 291137.30920257524, + "learning_rate": 1.6154810404261986e-07, + "logits/chosen": -0.2861802577972412, + "logits/rejected": -0.4479186534881592, + "logps/chosen": -23.825702667236328, + "logps/rejected": -559.0096435546875, + "loss": 12589.4609, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1913156658411026, + "rewards/margins": 0.5378258228302002, + "rewards/rejected": -0.346510112285614, + "step": 5030 + }, + { + "epoch": 25.564556962025318, + "grad_norm": 273297.2570355529, + "learning_rate": 1.6076465057975556e-07, + "logits/chosen": -2.0077948570251465, + "logits/rejected": -1.546903371810913, + "logps/chosen": -34.178993225097656, + "logps/rejected": -599.1771240234375, + "loss": 12277.0906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20620949566364288, + "rewards/margins": 0.5666217803955078, + "rewards/rejected": -0.36041226983070374, + "step": 5040 + }, + { + "epoch": 25.615189873417723, + "grad_norm": 287331.7702661688, + "learning_rate": 1.5998119711689127e-07, + "logits/chosen": -0.9829635620117188, + "logits/rejected": -0.3811960220336914, + "logps/chosen": -32.14269256591797, + "logps/rejected": -580.4415283203125, + "loss": 12507.3219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20907440781593323, + "rewards/margins": 0.5523373484611511, + "rewards/rejected": -0.3432629406452179, + "step": 5050 + }, + { + "epoch": 25.665822784810125, + "grad_norm": 896554.0294317787, + "learning_rate": 1.5919774365402695e-07, + "logits/chosen": -1.3259598016738892, + "logits/rejected": -0.9525947570800781, + "logps/chosen": -25.666656494140625, + "logps/rejected": -573.1832885742188, + "loss": 12955.9469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19553272426128387, + "rewards/margins": 0.5421277284622192, + "rewards/rejected": -0.3465949594974518, + "step": 5060 + }, + { + "epoch": 25.71645569620253, + "grad_norm": 360559.08966435614, + "learning_rate": 1.5841429019116266e-07, + "logits/chosen": -2.50518536567688, + "logits/rejected": -2.6326870918273926, + "logps/chosen": -40.73974609375, + "logps/rejected": -598.9993896484375, + "loss": 13192.7609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2110958993434906, + "rewards/margins": 0.559829592704773, + "rewards/rejected": -0.34873366355895996, + "step": 5070 + }, + { + "epoch": 25.767088607594935, + "grad_norm": 354200.8480985467, + "learning_rate": 1.5763083672829833e-07, + "logits/chosen": 0.24985246360301971, + "logits/rejected": 0.11640717834234238, + "logps/chosen": -30.384597778320312, + "logps/rejected": -595.9378662109375, + "loss": 13357.8156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20246371626853943, + "rewards/margins": 0.5666370391845703, + "rewards/rejected": -0.3641732633113861, + "step": 5080 + }, + { + "epoch": 25.81772151898734, + "grad_norm": 419630.4907858681, + "learning_rate": 1.5684738326543404e-07, + "logits/chosen": -2.382422924041748, + "logits/rejected": -1.6780860424041748, + "logps/chosen": -32.89704132080078, + "logps/rejected": -596.2845458984375, + "loss": 13075.125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20516617596149445, + "rewards/margins": 0.5647061467170715, + "rewards/rejected": -0.3595399558544159, + "step": 5090 + }, + { + "epoch": 25.868354430379746, + "grad_norm": 239893.19190802056, + "learning_rate": 1.5606392980256972e-07, + "logits/chosen": -1.5904518365859985, + "logits/rejected": -1.162544846534729, + "logps/chosen": -29.703998565673828, + "logps/rejected": -562.6304321289062, + "loss": 12907.7898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1967582404613495, + "rewards/margins": 0.5360093116760254, + "rewards/rejected": -0.3392511010169983, + "step": 5100 + }, + { + "epoch": 25.91898734177215, + "grad_norm": 2769163.91672907, + "learning_rate": 1.5528047633970543e-07, + "logits/chosen": -0.4542008936405182, + "logits/rejected": 0.3750479519367218, + "logps/chosen": -40.263450622558594, + "logps/rejected": -569.8021240234375, + "loss": 12356.1203, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19747862219810486, + "rewards/margins": 0.5331242680549622, + "rewards/rejected": -0.3356456160545349, + "step": 5110 + }, + { + "epoch": 25.969620253164557, + "grad_norm": 414959.45582905615, + "learning_rate": 1.544970228768411e-07, + "logits/chosen": -2.780273914337158, + "logits/rejected": -2.477725028991699, + "logps/chosen": -34.733909606933594, + "logps/rejected": -598.5794677734375, + "loss": 12866.1969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22083155810832977, + "rewards/margins": 0.5664650797843933, + "rewards/rejected": -0.3456335663795471, + "step": 5120 + }, + { + "epoch": 26.020253164556962, + "grad_norm": 437459.9001544906, + "learning_rate": 1.537135694139768e-07, + "logits/chosen": -1.4238073825836182, + "logits/rejected": -1.5467934608459473, + "logps/chosen": -32.6416015625, + "logps/rejected": -585.3292236328125, + "loss": 12902.432, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.2033694088459015, + "rewards/margins": 0.5521097779273987, + "rewards/rejected": -0.3487403094768524, + "step": 5130 + }, + { + "epoch": 26.070886075949367, + "grad_norm": 461726.55326627713, + "learning_rate": 1.529301159511125e-07, + "logits/chosen": -1.0017569065093994, + "logits/rejected": -0.677699089050293, + "logps/chosen": -33.68021011352539, + "logps/rejected": -586.3854370117188, + "loss": 12206.9266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20325596630573273, + "rewards/margins": 0.5559948682785034, + "rewards/rejected": -0.35273900628089905, + "step": 5140 + }, + { + "epoch": 26.121518987341773, + "grad_norm": 223445.63437535468, + "learning_rate": 1.521466624882482e-07, + "logits/chosen": -1.4141124486923218, + "logits/rejected": -0.6017986536026001, + "logps/chosen": -29.84651756286621, + "logps/rejected": -580.4603271484375, + "loss": 12104.9586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20142440497875214, + "rewards/margins": 0.5523154139518738, + "rewards/rejected": -0.35089102387428284, + "step": 5150 + }, + { + "epoch": 26.172151898734178, + "grad_norm": 232119.6879833388, + "learning_rate": 1.5136320902538388e-07, + "logits/chosen": -0.7647647857666016, + "logits/rejected": -0.6229702830314636, + "logps/chosen": -34.456138610839844, + "logps/rejected": -574.6653442382812, + "loss": 12524.6094, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2094695270061493, + "rewards/margins": 0.539394199848175, + "rewards/rejected": -0.32992464303970337, + "step": 5160 + }, + { + "epoch": 26.222784810126583, + "grad_norm": 478076.71264027077, + "learning_rate": 1.5057975556251958e-07, + "logits/chosen": -2.230821132659912, + "logits/rejected": -2.297372579574585, + "logps/chosen": -29.98971176147461, + "logps/rejected": -588.6803588867188, + "loss": 12188.2758, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21123230457305908, + "rewards/margins": 0.5585904121398926, + "rewards/rejected": -0.3473580479621887, + "step": 5170 + }, + { + "epoch": 26.27341772151899, + "grad_norm": 287477.38205394626, + "learning_rate": 1.4979630209965526e-07, + "logits/chosen": 0.2648393511772156, + "logits/rejected": 1.2140284776687622, + "logps/chosen": -26.058353424072266, + "logps/rejected": -575.137939453125, + "loss": 13004.7297, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20219556987285614, + "rewards/margins": 0.5560811758041382, + "rewards/rejected": -0.35388559103012085, + "step": 5180 + }, + { + "epoch": 26.324050632911394, + "grad_norm": 448400.95809014083, + "learning_rate": 1.4901284863679097e-07, + "logits/chosen": -0.47244685888290405, + "logits/rejected": 0.34987983107566833, + "logps/chosen": -47.41560745239258, + "logps/rejected": -588.7420654296875, + "loss": 12302.9594, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20305314660072327, + "rewards/margins": 0.5488015413284302, + "rewards/rejected": -0.3457483947277069, + "step": 5190 + }, + { + "epoch": 26.374683544303796, + "grad_norm": 290914.6200870196, + "learning_rate": 1.4822939517392665e-07, + "logits/chosen": -1.5243618488311768, + "logits/rejected": -0.6017967462539673, + "logps/chosen": -33.99588394165039, + "logps/rejected": -590.6222534179688, + "loss": 12878.768, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20388083159923553, + "rewards/margins": 0.5592610836029053, + "rewards/rejected": -0.35538023710250854, + "step": 5200 + }, + { + "epoch": 26.4253164556962, + "grad_norm": 715122.6528862711, + "learning_rate": 1.4744594171106235e-07, + "logits/chosen": -1.3308216333389282, + "logits/rejected": -1.0356947183609009, + "logps/chosen": -29.031147003173828, + "logps/rejected": -595.821533203125, + "loss": 12466.6016, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.212965726852417, + "rewards/margins": 0.5688080191612244, + "rewards/rejected": -0.3558422923088074, + "step": 5210 + }, + { + "epoch": 26.475949367088607, + "grad_norm": 266006.16874610144, + "learning_rate": 1.4666248824819803e-07, + "logits/chosen": -0.2248738706111908, + "logits/rejected": 0.37806427478790283, + "logps/chosen": -35.31621551513672, + "logps/rejected": -578.3462524414062, + "loss": 12517.1375, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20571331679821014, + "rewards/margins": 0.5493656396865845, + "rewards/rejected": -0.3436523675918579, + "step": 5220 + }, + { + "epoch": 26.526582278481012, + "grad_norm": 296131.0633758982, + "learning_rate": 1.4587903478533377e-07, + "logits/chosen": -3.198024272918701, + "logits/rejected": -2.1562371253967285, + "logps/chosen": -24.365009307861328, + "logps/rejected": -589.4319458007812, + "loss": 12258.343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2150738686323166, + "rewards/margins": 0.5671111345291138, + "rewards/rejected": -0.352037250995636, + "step": 5230 + }, + { + "epoch": 26.577215189873417, + "grad_norm": 310894.2430575026, + "learning_rate": 1.4509558132246945e-07, + "logits/chosen": 1.5686824321746826, + "logits/rejected": 1.7765287160873413, + "logps/chosen": -25.171403884887695, + "logps/rejected": -559.4937744140625, + "loss": 13451.6969, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.18740372359752655, + "rewards/margins": 0.5345771312713623, + "rewards/rejected": -0.3471735119819641, + "step": 5240 + }, + { + "epoch": 26.627848101265823, + "grad_norm": 273385.34239455353, + "learning_rate": 1.4431212785960515e-07, + "logits/chosen": 0.779743492603302, + "logits/rejected": 0.5761479139328003, + "logps/chosen": -24.774072647094727, + "logps/rejected": -552.44775390625, + "loss": 13444.4844, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1935535967350006, + "rewards/margins": 0.5272840857505798, + "rewards/rejected": -0.3337305188179016, + "step": 5250 + }, + { + "epoch": 26.678481012658228, + "grad_norm": 292701.1225306038, + "learning_rate": 1.4352867439674083e-07, + "logits/chosen": -2.055417060852051, + "logits/rejected": -1.5558016300201416, + "logps/chosen": -34.77043151855469, + "logps/rejected": -578.9781494140625, + "loss": 12698.0, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20298035442829132, + "rewards/margins": 0.543838381767273, + "rewards/rejected": -0.3408580422401428, + "step": 5260 + }, + { + "epoch": 26.729113924050633, + "grad_norm": 274251.20733361214, + "learning_rate": 1.4274522093387654e-07, + "logits/chosen": -0.7560523152351379, + "logits/rejected": -0.4179345667362213, + "logps/chosen": -35.23884201049805, + "logps/rejected": -578.6085815429688, + "loss": 12311.3711, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.19854024052619934, + "rewards/margins": 0.5389177799224854, + "rewards/rejected": -0.3403775095939636, + "step": 5270 + }, + { + "epoch": 26.77974683544304, + "grad_norm": 540941.0207588519, + "learning_rate": 1.4196176747101222e-07, + "logits/chosen": -2.318772792816162, + "logits/rejected": -2.123133420944214, + "logps/chosen": -32.09846878051758, + "logps/rejected": -575.356201171875, + "loss": 12401.8453, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2020426243543625, + "rewards/margins": 0.5411572456359863, + "rewards/rejected": -0.33911454677581787, + "step": 5280 + }, + { + "epoch": 26.830379746835444, + "grad_norm": 441696.9404493494, + "learning_rate": 1.4117831400814792e-07, + "logits/chosen": -2.1685147285461426, + "logits/rejected": -1.5242393016815186, + "logps/chosen": -22.024688720703125, + "logps/rejected": -543.53076171875, + "loss": 13786.8516, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1891135424375534, + "rewards/margins": 0.523938775062561, + "rewards/rejected": -0.33482515811920166, + "step": 5290 + }, + { + "epoch": 26.88101265822785, + "grad_norm": 328168.9416709712, + "learning_rate": 1.403948605452836e-07, + "logits/chosen": -2.390831708908081, + "logits/rejected": -1.6773532629013062, + "logps/chosen": -37.75607681274414, + "logps/rejected": -572.0828247070312, + "loss": 13110.5859, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20247995853424072, + "rewards/margins": 0.539789617061615, + "rewards/rejected": -0.33730968832969666, + "step": 5300 + }, + { + "epoch": 26.931645569620255, + "grad_norm": 342604.3694047161, + "learning_rate": 1.396114070824193e-07, + "logits/chosen": -0.8812211751937866, + "logits/rejected": -0.7407415509223938, + "logps/chosen": -31.870285034179688, + "logps/rejected": -576.1883544921875, + "loss": 12753.4875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2047419548034668, + "rewards/margins": 0.5459688901901245, + "rewards/rejected": -0.3412269353866577, + "step": 5310 + }, + { + "epoch": 26.98227848101266, + "grad_norm": 327636.2077886267, + "learning_rate": 1.38827953619555e-07, + "logits/chosen": -1.1729528903961182, + "logits/rejected": -0.7522214651107788, + "logps/chosen": -41.73408508300781, + "logps/rejected": -603.9337158203125, + "loss": 11920.0102, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.22109094262123108, + "rewards/margins": 0.5666243433952332, + "rewards/rejected": -0.3455334007740021, + "step": 5320 + }, + { + "epoch": 27.03291139240506, + "grad_norm": 306486.1159229183, + "learning_rate": 1.380445001566907e-07, + "logits/chosen": -0.2942148447036743, + "logits/rejected": 0.29008275270462036, + "logps/chosen": -28.0673770904541, + "logps/rejected": -591.0224609375, + "loss": 12393.8094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21129322052001953, + "rewards/margins": 0.562667965888977, + "rewards/rejected": -0.35137468576431274, + "step": 5330 + }, + { + "epoch": 27.083544303797467, + "grad_norm": 291301.0379049935, + "learning_rate": 1.3726104669382637e-07, + "logits/chosen": -0.04897233098745346, + "logits/rejected": 0.2625051736831665, + "logps/chosen": -28.288782119750977, + "logps/rejected": -600.4498291015625, + "loss": 12295.5109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20883643627166748, + "rewards/margins": 0.5699074864387512, + "rewards/rejected": -0.36107105016708374, + "step": 5340 + }, + { + "epoch": 27.134177215189872, + "grad_norm": 336826.5711799587, + "learning_rate": 1.3647759323096208e-07, + "logits/chosen": -3.574831008911133, + "logits/rejected": -3.1615543365478516, + "logps/chosen": -28.667476654052734, + "logps/rejected": -610.046630859375, + "loss": 12205.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21462281048297882, + "rewards/margins": 0.5811195373535156, + "rewards/rejected": -0.3664968013763428, + "step": 5350 + }, + { + "epoch": 27.184810126582278, + "grad_norm": 253108.22870561373, + "learning_rate": 1.3569413976809776e-07, + "logits/chosen": -1.275773048400879, + "logits/rejected": -0.2816539406776428, + "logps/chosen": -27.488027572631836, + "logps/rejected": -576.04443359375, + "loss": 12752.6922, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20755627751350403, + "rewards/margins": 0.5584858059883118, + "rewards/rejected": -0.35092949867248535, + "step": 5360 + }, + { + "epoch": 27.235443037974683, + "grad_norm": 378986.1297500305, + "learning_rate": 1.3491068630523347e-07, + "logits/chosen": -0.7276864051818848, + "logits/rejected": -0.2372014820575714, + "logps/chosen": -27.652713775634766, + "logps/rejected": -574.1866455078125, + "loss": 12491.7875, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19472074508666992, + "rewards/margins": 0.5458452701568604, + "rewards/rejected": -0.35112449526786804, + "step": 5370 + }, + { + "epoch": 27.28607594936709, + "grad_norm": 355029.2404666128, + "learning_rate": 1.3412723284236915e-07, + "logits/chosen": 0.03503293916583061, + "logits/rejected": 0.09463844448328018, + "logps/chosen": -20.01060676574707, + "logps/rejected": -571.03369140625, + "loss": 12906.6961, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1995813399553299, + "rewards/margins": 0.5492128133773804, + "rewards/rejected": -0.3496314287185669, + "step": 5380 + }, + { + "epoch": 27.336708860759494, + "grad_norm": 174005.9141855672, + "learning_rate": 1.3334377937950485e-07, + "logits/chosen": -1.0307856798171997, + "logits/rejected": -0.8787088394165039, + "logps/chosen": -27.538768768310547, + "logps/rejected": -584.1838989257812, + "loss": 12431.2086, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20768491923809052, + "rewards/margins": 0.558754026889801, + "rewards/rejected": -0.3510691225528717, + "step": 5390 + }, + { + "epoch": 27.3873417721519, + "grad_norm": 333107.0988957162, + "learning_rate": 1.3256032591664053e-07, + "logits/chosen": 0.49966010451316833, + "logits/rejected": 1.4367059469223022, + "logps/chosen": -22.20120620727539, + "logps/rejected": -573.7286987304688, + "loss": 12624.7586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19961531460285187, + "rewards/margins": 0.5520228743553162, + "rewards/rejected": -0.35240763425827026, + "step": 5400 + }, + { + "epoch": 27.437974683544304, + "grad_norm": 189125.20245582235, + "learning_rate": 1.3177687245377624e-07, + "logits/chosen": -0.491058886051178, + "logits/rejected": -0.4180983603000641, + "logps/chosen": -24.668697357177734, + "logps/rejected": -574.7880249023438, + "loss": 12818.4906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.203078955411911, + "rewards/margins": 0.5475345849990845, + "rewards/rejected": -0.34445568919181824, + "step": 5410 + }, + { + "epoch": 27.48860759493671, + "grad_norm": 255453.1741505276, + "learning_rate": 1.3099341899091192e-07, + "logits/chosen": -1.3983430862426758, + "logits/rejected": -1.0761035680770874, + "logps/chosen": -28.14908790588379, + "logps/rejected": -567.4022216796875, + "loss": 12266.7156, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19684790074825287, + "rewards/margins": 0.5404728055000305, + "rewards/rejected": -0.34362491965293884, + "step": 5420 + }, + { + "epoch": 27.539240506329115, + "grad_norm": 199249.17490991156, + "learning_rate": 1.3020996552804765e-07, + "logits/chosen": -1.3831968307495117, + "logits/rejected": -0.9957733154296875, + "logps/chosen": -34.38856887817383, + "logps/rejected": -583.5364379882812, + "loss": 12353.943, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20319747924804688, + "rewards/margins": 0.5525364875793457, + "rewards/rejected": -0.34933900833129883, + "step": 5430 + }, + { + "epoch": 27.58987341772152, + "grad_norm": 372801.7448533588, + "learning_rate": 1.2942651206518333e-07, + "logits/chosen": 0.7253493070602417, + "logits/rejected": 0.6416251063346863, + "logps/chosen": -36.44821548461914, + "logps/rejected": -557.3819580078125, + "loss": 12762.9742, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19811172783374786, + "rewards/margins": 0.5241624116897583, + "rewards/rejected": -0.32605066895484924, + "step": 5440 + }, + { + "epoch": 27.640506329113926, + "grad_norm": 250437.30987597498, + "learning_rate": 1.2864305860231904e-07, + "logits/chosen": -1.6367158889770508, + "logits/rejected": -0.9662375450134277, + "logps/chosen": -32.858455657958984, + "logps/rejected": -566.0185546875, + "loss": 13013.8914, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20383331179618835, + "rewards/margins": 0.5383815169334412, + "rewards/rejected": -0.3345482349395752, + "step": 5450 + }, + { + "epoch": 27.691139240506327, + "grad_norm": 395640.3468149828, + "learning_rate": 1.2785960513945471e-07, + "logits/chosen": -1.0696049928665161, + "logits/rejected": -0.7029746770858765, + "logps/chosen": -27.334697723388672, + "logps/rejected": -572.7042846679688, + "loss": 12608.9328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2039167582988739, + "rewards/margins": 0.5491318106651306, + "rewards/rejected": -0.3452150225639343, + "step": 5460 + }, + { + "epoch": 27.741772151898733, + "grad_norm": 737045.8738711793, + "learning_rate": 1.2707615167659042e-07, + "logits/chosen": -1.4398880004882812, + "logits/rejected": -0.3085852265357971, + "logps/chosen": -20.763835906982422, + "logps/rejected": -557.7874755859375, + "loss": 12662.9484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20201142132282257, + "rewards/margins": 0.5384231209754944, + "rewards/rejected": -0.336411714553833, + "step": 5470 + }, + { + "epoch": 27.792405063291138, + "grad_norm": 286929.61277431983, + "learning_rate": 1.262926982137261e-07, + "logits/chosen": -0.6262455582618713, + "logits/rejected": -0.4802684783935547, + "logps/chosen": -23.99846076965332, + "logps/rejected": -575.8038940429688, + "loss": 12141.5641, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19609448313713074, + "rewards/margins": 0.551897406578064, + "rewards/rejected": -0.355802983045578, + "step": 5480 + }, + { + "epoch": 27.843037974683543, + "grad_norm": 749583.0814867924, + "learning_rate": 1.255092447508618e-07, + "logits/chosen": -1.7006927728652954, + "logits/rejected": -1.0466101169586182, + "logps/chosen": -29.710596084594727, + "logps/rejected": -591.6888427734375, + "loss": 12764.4375, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20634475350379944, + "rewards/margins": 0.5570266842842102, + "rewards/rejected": -0.3506819009780884, + "step": 5490 + }, + { + "epoch": 27.89367088607595, + "grad_norm": 380933.42642122327, + "learning_rate": 1.2472579128799749e-07, + "logits/chosen": -1.4751110076904297, + "logits/rejected": -0.9937122464179993, + "logps/chosen": -34.70015335083008, + "logps/rejected": -571.9727172851562, + "loss": 12012.7281, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.2022305727005005, + "rewards/margins": 0.5379850268363953, + "rewards/rejected": -0.3357544541358948, + "step": 5500 + }, + { + "epoch": 27.944303797468354, + "grad_norm": 258509.47313842815, + "learning_rate": 1.2394233782513317e-07, + "logits/chosen": -1.372650384902954, + "logits/rejected": -1.0075037479400635, + "logps/chosen": -35.64197540283203, + "logps/rejected": -593.0923461914062, + "loss": 11889.8898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2117808610200882, + "rewards/margins": 0.5591001510620117, + "rewards/rejected": -0.34731921553611755, + "step": 5510 + }, + { + "epoch": 27.99493670886076, + "grad_norm": 208938.2840249938, + "learning_rate": 1.2315888436226887e-07, + "logits/chosen": -2.1264805793762207, + "logits/rejected": -1.4703245162963867, + "logps/chosen": -32.981266021728516, + "logps/rejected": -597.6434936523438, + "loss": 12257.6922, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21187356114387512, + "rewards/margins": 0.5627579689025879, + "rewards/rejected": -0.3508843779563904, + "step": 5520 + }, + { + "epoch": 28.045569620253165, + "grad_norm": 221196.22582529782, + "learning_rate": 1.2237543089940458e-07, + "logits/chosen": -2.0594754219055176, + "logits/rejected": -0.8701013326644897, + "logps/chosen": -27.682659149169922, + "logps/rejected": -595.9862060546875, + "loss": 12458.1609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21054939925670624, + "rewards/margins": 0.5732256174087524, + "rewards/rejected": -0.362676203250885, + "step": 5530 + }, + { + "epoch": 28.09620253164557, + "grad_norm": 270569.55775822, + "learning_rate": 1.2159197743654026e-07, + "logits/chosen": -1.7393264770507812, + "logits/rejected": -1.1281194686889648, + "logps/chosen": -24.0075626373291, + "logps/rejected": -583.3763427734375, + "loss": 12198.0859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20713207125663757, + "rewards/margins": 0.5608252286911011, + "rewards/rejected": -0.35369327664375305, + "step": 5540 + }, + { + "epoch": 28.146835443037975, + "grad_norm": 294202.9420634267, + "learning_rate": 1.2080852397367596e-07, + "logits/chosen": -1.1881496906280518, + "logits/rejected": -1.9830278158187866, + "logps/chosen": -27.52215003967285, + "logps/rejected": -571.5521240234375, + "loss": 12035.3297, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.198753222823143, + "rewards/margins": 0.5402897596359253, + "rewards/rejected": -0.34153658151626587, + "step": 5550 + }, + { + "epoch": 28.19746835443038, + "grad_norm": 250256.82251298483, + "learning_rate": 1.2002507051081164e-07, + "logits/chosen": -2.069603443145752, + "logits/rejected": -1.1806148290634155, + "logps/chosen": -32.80065155029297, + "logps/rejected": -584.74169921875, + "loss": 12394.9164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21380552649497986, + "rewards/margins": 0.5591468811035156, + "rewards/rejected": -0.345341295003891, + "step": 5560 + }, + { + "epoch": 28.248101265822786, + "grad_norm": 295332.47087175207, + "learning_rate": 1.1924161704794735e-07, + "logits/chosen": -1.0471255779266357, + "logits/rejected": -0.4857943654060364, + "logps/chosen": -21.058979034423828, + "logps/rejected": -572.4304809570312, + "loss": 12514.1922, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1976846158504486, + "rewards/margins": 0.548615574836731, + "rewards/rejected": -0.35093095898628235, + "step": 5570 + }, + { + "epoch": 28.29873417721519, + "grad_norm": 206143.0104728106, + "learning_rate": 1.1845816358508304e-07, + "logits/chosen": -2.556028366088867, + "logits/rejected": -1.9551620483398438, + "logps/chosen": -35.4798698425293, + "logps/rejected": -586.085693359375, + "loss": 12969.0906, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20620682835578918, + "rewards/margins": 0.5480517148971558, + "rewards/rejected": -0.34184494614601135, + "step": 5580 + }, + { + "epoch": 28.349367088607593, + "grad_norm": 264961.57508088043, + "learning_rate": 1.1767471012221873e-07, + "logits/chosen": -1.3203023672103882, + "logits/rejected": -0.41819173097610474, + "logps/chosen": -35.34379959106445, + "logps/rejected": -582.7780151367188, + "loss": 11659.4484, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21271836757659912, + "rewards/margins": 0.5540838837623596, + "rewards/rejected": -0.3413654863834381, + "step": 5590 + }, + { + "epoch": 28.4, + "grad_norm": 475056.8558156038, + "learning_rate": 1.1689125665935443e-07, + "logits/chosen": 0.2806483507156372, + "logits/rejected": 0.8025129437446594, + "logps/chosen": -32.23934555053711, + "logps/rejected": -571.2871704101562, + "loss": 12715.6078, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20215098559856415, + "rewards/margins": 0.5371149778366089, + "rewards/rejected": -0.33496397733688354, + "step": 5600 + }, + { + "epoch": 28.450632911392404, + "grad_norm": 197134.08463352287, + "learning_rate": 1.1610780319649012e-07, + "logits/chosen": -0.42449599504470825, + "logits/rejected": 0.20755800604820251, + "logps/chosen": -30.612218856811523, + "logps/rejected": -578.2881469726562, + "loss": 12352.6281, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2100987732410431, + "rewards/margins": 0.5525861978530884, + "rewards/rejected": -0.34248748421669006, + "step": 5610 + }, + { + "epoch": 28.50126582278481, + "grad_norm": 345771.22083863505, + "learning_rate": 1.1532434973362581e-07, + "logits/chosen": -1.5660457611083984, + "logits/rejected": -0.7327693700790405, + "logps/chosen": -22.58321762084961, + "logps/rejected": -566.7658081054688, + "loss": 11851.4547, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19859129190444946, + "rewards/margins": 0.5416545271873474, + "rewards/rejected": -0.3430632948875427, + "step": 5620 + }, + { + "epoch": 28.551898734177215, + "grad_norm": 168585.12783774585, + "learning_rate": 1.145408962707615e-07, + "logits/chosen": -0.2972283363342285, + "logits/rejected": -0.3674158453941345, + "logps/chosen": -29.131546020507812, + "logps/rejected": -597.9761962890625, + "loss": 11512.6836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21312180161476135, + "rewards/margins": 0.5691269040107727, + "rewards/rejected": -0.35600510239601135, + "step": 5630 + }, + { + "epoch": 28.60253164556962, + "grad_norm": 208909.02036407648, + "learning_rate": 1.137574428078972e-07, + "logits/chosen": -1.0681560039520264, + "logits/rejected": -0.39094457030296326, + "logps/chosen": -36.012596130371094, + "logps/rejected": -586.0516357421875, + "loss": 12808.1297, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20493356883525848, + "rewards/margins": 0.5560083389282227, + "rewards/rejected": -0.3510746955871582, + "step": 5640 + }, + { + "epoch": 28.653164556962025, + "grad_norm": 311846.42372199957, + "learning_rate": 1.1297398934503289e-07, + "logits/chosen": 0.5105953216552734, + "logits/rejected": 1.009169101715088, + "logps/chosen": -27.007156372070312, + "logps/rejected": -599.7515258789062, + "loss": 11991.1812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21386167407035828, + "rewards/margins": 0.572094202041626, + "rewards/rejected": -0.3582325577735901, + "step": 5650 + }, + { + "epoch": 28.70379746835443, + "grad_norm": 268717.3291778612, + "learning_rate": 1.1219053588216858e-07, + "logits/chosen": -0.8255645036697388, + "logits/rejected": -0.8527682423591614, + "logps/chosen": -21.579692840576172, + "logps/rejected": -585.7736206054688, + "loss": 12543.1672, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2072766274213791, + "rewards/margins": 0.5643941760063171, + "rewards/rejected": -0.35711759328842163, + "step": 5660 + }, + { + "epoch": 28.754430379746836, + "grad_norm": 251846.43966430755, + "learning_rate": 1.1140708241930429e-07, + "logits/chosen": -1.6031732559204102, + "logits/rejected": -0.6178330779075623, + "logps/chosen": -30.777385711669922, + "logps/rejected": -578.4786376953125, + "loss": 12538.3711, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20804066956043243, + "rewards/margins": 0.5539838075637817, + "rewards/rejected": -0.3459431827068329, + "step": 5670 + }, + { + "epoch": 28.80506329113924, + "grad_norm": 362284.87054599967, + "learning_rate": 1.1062362895643998e-07, + "logits/chosen": -0.6538245677947998, + "logits/rejected": -0.3702305555343628, + "logps/chosen": -26.261932373046875, + "logps/rejected": -577.5454711914062, + "loss": 13020.9297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20354709029197693, + "rewards/margins": 0.5529359579086304, + "rewards/rejected": -0.34938886761665344, + "step": 5680 + }, + { + "epoch": 28.855696202531647, + "grad_norm": 205761.31564795828, + "learning_rate": 1.0984017549357568e-07, + "logits/chosen": -2.4821999073028564, + "logits/rejected": -2.7491514682769775, + "logps/chosen": -33.985538482666016, + "logps/rejected": -588.4347534179688, + "loss": 12498.5609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2070481777191162, + "rewards/margins": 0.5542899370193481, + "rewards/rejected": -0.34724172949790955, + "step": 5690 + }, + { + "epoch": 28.906329113924052, + "grad_norm": 269249.39255954395, + "learning_rate": 1.0905672203071137e-07, + "logits/chosen": 0.24643035233020782, + "logits/rejected": 0.39688020944595337, + "logps/chosen": -22.365093231201172, + "logps/rejected": -572.4009399414062, + "loss": 12145.2008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20018497109413147, + "rewards/margins": 0.5481060147285461, + "rewards/rejected": -0.3479210138320923, + "step": 5700 + }, + { + "epoch": 28.956962025316457, + "grad_norm": 274939.7399900873, + "learning_rate": 1.0827326856784706e-07, + "logits/chosen": 0.24328431487083435, + "logits/rejected": 0.056040357798337936, + "logps/chosen": -27.859899520874023, + "logps/rejected": -582.8697509765625, + "loss": 12038.6203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20646706223487854, + "rewards/margins": 0.5581387281417847, + "rewards/rejected": -0.35167163610458374, + "step": 5710 + }, + { + "epoch": 29.00759493670886, + "grad_norm": 177867.60188083298, + "learning_rate": 1.0748981510498275e-07, + "logits/chosen": -2.0902795791625977, + "logits/rejected": -1.2426658868789673, + "logps/chosen": -25.984241485595703, + "logps/rejected": -595.5320434570312, + "loss": 12101.3188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21571488678455353, + "rewards/margins": 0.5755189061164856, + "rewards/rejected": -0.3598039150238037, + "step": 5720 + }, + { + "epoch": 29.058227848101264, + "grad_norm": 175055.77768040166, + "learning_rate": 1.0670636164211845e-07, + "logits/chosen": -3.0874876976013184, + "logits/rejected": -1.9259151220321655, + "logps/chosen": -30.317163467407227, + "logps/rejected": -582.4378662109375, + "loss": 12058.957, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20789256691932678, + "rewards/margins": 0.5540691018104553, + "rewards/rejected": -0.3461765944957733, + "step": 5730 + }, + { + "epoch": 29.10886075949367, + "grad_norm": 330095.71026448795, + "learning_rate": 1.0592290817925414e-07, + "logits/chosen": -0.40818461775779724, + "logits/rejected": -0.17450471222400665, + "logps/chosen": -37.967308044433594, + "logps/rejected": -574.5567626953125, + "loss": 12163.9234, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.199508398771286, + "rewards/margins": 0.5361432433128357, + "rewards/rejected": -0.3366348147392273, + "step": 5740 + }, + { + "epoch": 29.159493670886075, + "grad_norm": 207868.2185307626, + "learning_rate": 1.0513945471638983e-07, + "logits/chosen": -1.1228978633880615, + "logits/rejected": -0.8512986302375793, + "logps/chosen": -36.19347381591797, + "logps/rejected": -572.5546264648438, + "loss": 12217.475, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20709916949272156, + "rewards/margins": 0.5452824234962463, + "rewards/rejected": -0.3381832540035248, + "step": 5750 + }, + { + "epoch": 29.21012658227848, + "grad_norm": 180300.955366917, + "learning_rate": 1.0435600125352554e-07, + "logits/chosen": -2.1935715675354004, + "logits/rejected": -1.450584888458252, + "logps/chosen": -41.38114547729492, + "logps/rejected": -551.9308471679688, + "loss": 11531.2219, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": 0.19797861576080322, + "rewards/margins": 0.5134168267250061, + "rewards/rejected": -0.3154382109642029, + "step": 5760 + }, + { + "epoch": 29.260759493670886, + "grad_norm": 230065.76491246693, + "learning_rate": 1.0357254779066123e-07, + "logits/chosen": -2.1162705421447754, + "logits/rejected": -1.343379020690918, + "logps/chosen": -26.30475425720215, + "logps/rejected": -584.0765380859375, + "loss": 12178.225, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21349939703941345, + "rewards/margins": 0.5615987181663513, + "rewards/rejected": -0.34809932112693787, + "step": 5770 + }, + { + "epoch": 29.31139240506329, + "grad_norm": 150891.5620522627, + "learning_rate": 1.0278909432779692e-07, + "logits/chosen": -0.6437171101570129, + "logits/rejected": -0.06186608225107193, + "logps/chosen": -32.27136993408203, + "logps/rejected": -575.0911865234375, + "loss": 12350.1367, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.20223280787467957, + "rewards/margins": 0.5462868213653564, + "rewards/rejected": -0.3440539240837097, + "step": 5780 + }, + { + "epoch": 29.362025316455696, + "grad_norm": 268215.91577526846, + "learning_rate": 1.0200564086493262e-07, + "logits/chosen": -2.4000306129455566, + "logits/rejected": -1.5239673852920532, + "logps/chosen": -44.228759765625, + "logps/rejected": -603.037109375, + "loss": 11602.7789, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21766121685504913, + "rewards/margins": 0.5693429112434387, + "rewards/rejected": -0.3516816794872284, + "step": 5790 + }, + { + "epoch": 29.4126582278481, + "grad_norm": 153754.6030127712, + "learning_rate": 1.0122218740206831e-07, + "logits/chosen": 1.1010842323303223, + "logits/rejected": 1.6098358631134033, + "logps/chosen": -25.794830322265625, + "logps/rejected": -580.6827392578125, + "loss": 12135.457, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20671968162059784, + "rewards/margins": 0.5531316995620728, + "rewards/rejected": -0.3464120328426361, + "step": 5800 + }, + { + "epoch": 29.463291139240507, + "grad_norm": 237857.15032498536, + "learning_rate": 1.00438733939204e-07, + "logits/chosen": -2.2038140296936035, + "logits/rejected": -1.9258426427841187, + "logps/chosen": -24.270652770996094, + "logps/rejected": -592.76806640625, + "loss": 12368.1, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20859424769878387, + "rewards/margins": 0.5708917379379272, + "rewards/rejected": -0.3622974455356598, + "step": 5810 + }, + { + "epoch": 29.513924050632912, + "grad_norm": 229363.27347544604, + "learning_rate": 9.96552804763397e-08, + "logits/chosen": -1.733412742614746, + "logits/rejected": -1.8426891565322876, + "logps/chosen": -27.749902725219727, + "logps/rejected": -591.9719848632812, + "loss": 12434.6094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20853643119335175, + "rewards/margins": 0.559594452381134, + "rewards/rejected": -0.3510579764842987, + "step": 5820 + }, + { + "epoch": 29.564556962025318, + "grad_norm": 204423.82729459935, + "learning_rate": 9.887182701347539e-08, + "logits/chosen": -0.8372312784194946, + "logits/rejected": -0.9436752200126648, + "logps/chosen": -23.713529586791992, + "logps/rejected": -551.91748046875, + "loss": 12191.0797, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1975078582763672, + "rewards/margins": 0.5233575105667114, + "rewards/rejected": -0.32584962248802185, + "step": 5830 + }, + { + "epoch": 29.615189873417723, + "grad_norm": 196500.42803475718, + "learning_rate": 9.808837355061108e-08, + "logits/chosen": -0.07084647566080093, + "logits/rejected": 0.9050701856613159, + "logps/chosen": -29.59817886352539, + "logps/rejected": -567.6174926757812, + "loss": 12194.2234, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20393919944763184, + "rewards/margins": 0.5439929366111755, + "rewards/rejected": -0.3400537371635437, + "step": 5840 + }, + { + "epoch": 29.665822784810125, + "grad_norm": 226455.28104673527, + "learning_rate": 9.730492008774677e-08, + "logits/chosen": -3.320272445678711, + "logits/rejected": -3.3560733795166016, + "logps/chosen": -28.402095794677734, + "logps/rejected": -602.0023193359375, + "loss": 12657.2406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2187713086605072, + "rewards/margins": 0.5724385976791382, + "rewards/rejected": -0.35366731882095337, + "step": 5850 + }, + { + "epoch": 29.71645569620253, + "grad_norm": 162035.60177504522, + "learning_rate": 9.652146662488248e-08, + "logits/chosen": -1.8201286792755127, + "logits/rejected": -1.7938740253448486, + "logps/chosen": -35.96394348144531, + "logps/rejected": -611.4141845703125, + "loss": 12011.9406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21413405239582062, + "rewards/margins": 0.5712839365005493, + "rewards/rejected": -0.3571499288082123, + "step": 5860 + }, + { + "epoch": 29.767088607594935, + "grad_norm": 162090.09030278528, + "learning_rate": 9.573801316201817e-08, + "logits/chosen": -0.6652274131774902, + "logits/rejected": -0.600281834602356, + "logps/chosen": -24.422576904296875, + "logps/rejected": -566.0366821289062, + "loss": 12593.6359, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.1961481273174286, + "rewards/margins": 0.5393214821815491, + "rewards/rejected": -0.3431733250617981, + "step": 5870 + }, + { + "epoch": 29.81772151898734, + "grad_norm": 365229.93961962714, + "learning_rate": 9.495455969915387e-08, + "logits/chosen": -2.613847017288208, + "logits/rejected": -2.108478546142578, + "logps/chosen": -29.573253631591797, + "logps/rejected": -577.60546875, + "loss": 12424.4891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20539173483848572, + "rewards/margins": 0.5470829010009766, + "rewards/rejected": -0.34169113636016846, + "step": 5880 + }, + { + "epoch": 29.868354430379746, + "grad_norm": 173325.82955161307, + "learning_rate": 9.417110623628956e-08, + "logits/chosen": -1.4006824493408203, + "logits/rejected": -0.5856371521949768, + "logps/chosen": -27.345510482788086, + "logps/rejected": -584.8424072265625, + "loss": 12358.3133, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2079104632139206, + "rewards/margins": 0.5603929758071899, + "rewards/rejected": -0.35248249769210815, + "step": 5890 + }, + { + "epoch": 29.91898734177215, + "grad_norm": 287432.0969704827, + "learning_rate": 9.338765277342525e-08, + "logits/chosen": -0.21508927643299103, + "logits/rejected": -0.1394989937543869, + "logps/chosen": -30.839313507080078, + "logps/rejected": -594.2600708007812, + "loss": 11980.4219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21064691245555878, + "rewards/margins": 0.5655493140220642, + "rewards/rejected": -0.354902446269989, + "step": 5900 + }, + { + "epoch": 29.969620253164557, + "grad_norm": 365207.2969153869, + "learning_rate": 9.260419931056094e-08, + "logits/chosen": -0.40759915113449097, + "logits/rejected": 0.3133270740509033, + "logps/chosen": -25.633676528930664, + "logps/rejected": -578.2957763671875, + "loss": 12223.2844, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20843228697776794, + "rewards/margins": 0.5534237027168274, + "rewards/rejected": -0.34499144554138184, + "step": 5910 + }, + { + "epoch": 30.020253164556962, + "grad_norm": 218071.18905642498, + "learning_rate": 9.182074584769664e-08, + "logits/chosen": -0.20139971375465393, + "logits/rejected": 0.6374796628952026, + "logps/chosen": -36.04420852661133, + "logps/rejected": -585.3655395507812, + "loss": 12139.8164, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20962996780872345, + "rewards/margins": 0.5547569394111633, + "rewards/rejected": -0.3451269865036011, + "step": 5920 + }, + { + "epoch": 30.070886075949367, + "grad_norm": 199502.22634833233, + "learning_rate": 9.103729238483233e-08, + "logits/chosen": -0.5093935132026672, + "logits/rejected": -0.9036226272583008, + "logps/chosen": -32.292423248291016, + "logps/rejected": -584.6748046875, + "loss": 11463.5555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20751234889030457, + "rewards/margins": 0.5532687902450562, + "rewards/rejected": -0.345756471157074, + "step": 5930 + }, + { + "epoch": 30.121518987341773, + "grad_norm": 164683.94241544002, + "learning_rate": 9.025383892196802e-08, + "logits/chosen": -1.2027417421340942, + "logits/rejected": -0.21418258547782898, + "logps/chosen": -38.00572967529297, + "logps/rejected": -542.5321044921875, + "loss": 12248.6938, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.1922551691532135, + "rewards/margins": 0.5097079277038574, + "rewards/rejected": -0.3174527585506439, + "step": 5940 + }, + { + "epoch": 30.172151898734178, + "grad_norm": 209885.7696817789, + "learning_rate": 8.947038545910373e-08, + "logits/chosen": -0.5836046934127808, + "logits/rejected": -0.049278389662504196, + "logps/chosen": -26.44875144958496, + "logps/rejected": -577.2633056640625, + "loss": 11882.8156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2028985321521759, + "rewards/margins": 0.548152506351471, + "rewards/rejected": -0.34525397419929504, + "step": 5950 + }, + { + "epoch": 30.222784810126583, + "grad_norm": 116064.20956709805, + "learning_rate": 8.868693199623942e-08, + "logits/chosen": -0.3441212773323059, + "logits/rejected": 0.3469446897506714, + "logps/chosen": -29.866031646728516, + "logps/rejected": -576.8469848632812, + "loss": 11899.9906, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20657262206077576, + "rewards/margins": 0.5527979731559753, + "rewards/rejected": -0.3462253212928772, + "step": 5960 + }, + { + "epoch": 30.27341772151899, + "grad_norm": 213446.8577722312, + "learning_rate": 8.790347853337511e-08, + "logits/chosen": -1.195245623588562, + "logits/rejected": -1.5595389604568481, + "logps/chosen": -26.48971939086914, + "logps/rejected": -562.9793090820312, + "loss": 12288.4188, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.19797027111053467, + "rewards/margins": 0.5365854501724243, + "rewards/rejected": -0.3386152386665344, + "step": 5970 + }, + { + "epoch": 30.324050632911394, + "grad_norm": 150392.6550831942, + "learning_rate": 8.712002507051081e-08, + "logits/chosen": -0.636971116065979, + "logits/rejected": -0.8326961398124695, + "logps/chosen": -31.82355308532715, + "logps/rejected": -572.6309814453125, + "loss": 11735.9594, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20741339027881622, + "rewards/margins": 0.5429075360298157, + "rewards/rejected": -0.33549413084983826, + "step": 5980 + }, + { + "epoch": 30.374683544303796, + "grad_norm": 248873.00017903763, + "learning_rate": 8.63365716076465e-08, + "logits/chosen": -0.8763412237167358, + "logits/rejected": -0.38471752405166626, + "logps/chosen": -33.753509521484375, + "logps/rejected": -577.4928588867188, + "loss": 11981.0336, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20298103988170624, + "rewards/margins": 0.5445905923843384, + "rewards/rejected": -0.34160953760147095, + "step": 5990 + }, + { + "epoch": 30.4253164556962, + "grad_norm": 247123.70966936232, + "learning_rate": 8.555311814478219e-08, + "logits/chosen": -1.4638581275939941, + "logits/rejected": -1.6560137271881104, + "logps/chosen": -27.02420425415039, + "logps/rejected": -579.2672119140625, + "loss": 12743.5711, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20767991244792938, + "rewards/margins": 0.5485936403274536, + "rewards/rejected": -0.3409137427806854, + "step": 6000 + }, + { + "epoch": 30.475949367088607, + "grad_norm": 152247.200364489, + "learning_rate": 8.476966468191789e-08, + "logits/chosen": -1.2626516819000244, + "logits/rejected": -1.3656198978424072, + "logps/chosen": -30.586597442626953, + "logps/rejected": -564.9951782226562, + "loss": 12138.7328, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19354796409606934, + "rewards/margins": 0.5365390181541443, + "rewards/rejected": -0.34299105405807495, + "step": 6010 + }, + { + "epoch": 30.526582278481012, + "grad_norm": 153551.3953399981, + "learning_rate": 8.398621121905358e-08, + "logits/chosen": -0.8625293970108032, + "logits/rejected": -1.6173267364501953, + "logps/chosen": -23.908416748046875, + "logps/rejected": -591.5709228515625, + "loss": 12247.5672, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2063622921705246, + "rewards/margins": 0.5622067451477051, + "rewards/rejected": -0.3558444678783417, + "step": 6020 + }, + { + "epoch": 30.577215189873417, + "grad_norm": 247558.34356145174, + "learning_rate": 8.320275775618927e-08, + "logits/chosen": -0.6456829309463501, + "logits/rejected": -0.25254157185554504, + "logps/chosen": -30.65958023071289, + "logps/rejected": -572.8914794921875, + "loss": 11907.7406, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20363232493400574, + "rewards/margins": 0.5450500845909119, + "rewards/rejected": -0.34141772985458374, + "step": 6030 + }, + { + "epoch": 30.627848101265823, + "grad_norm": 162045.80468301394, + "learning_rate": 8.241930429332496e-08, + "logits/chosen": 0.5193571448326111, + "logits/rejected": 1.0150249004364014, + "logps/chosen": -21.961605072021484, + "logps/rejected": -586.28369140625, + "loss": 11870.2594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20082764327526093, + "rewards/margins": 0.5631116032600403, + "rewards/rejected": -0.36228394508361816, + "step": 6040 + }, + { + "epoch": 30.678481012658228, + "grad_norm": 183677.73161043233, + "learning_rate": 8.163585083046067e-08, + "logits/chosen": -2.2690348625183105, + "logits/rejected": -1.8725353479385376, + "logps/chosen": -34.20100021362305, + "logps/rejected": -571.948486328125, + "loss": 11952.7477, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.2089938223361969, + "rewards/margins": 0.542193591594696, + "rewards/rejected": -0.33319979906082153, + "step": 6050 + }, + { + "epoch": 30.729113924050633, + "grad_norm": 206299.51509471133, + "learning_rate": 8.085239736759636e-08, + "logits/chosen": -1.8886245489120483, + "logits/rejected": -1.428289532661438, + "logps/chosen": -33.442771911621094, + "logps/rejected": -577.7710571289062, + "loss": 12094.3477, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20627331733703613, + "rewards/margins": 0.5446707606315613, + "rewards/rejected": -0.3383975028991699, + "step": 6060 + }, + { + "epoch": 30.77974683544304, + "grad_norm": 178455.47052363763, + "learning_rate": 8.006894390473206e-08, + "logits/chosen": -0.9159374237060547, + "logits/rejected": -0.5700797438621521, + "logps/chosen": -22.003402709960938, + "logps/rejected": -588.1246948242188, + "loss": 12967.7109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20624502003192902, + "rewards/margins": 0.5602100491523743, + "rewards/rejected": -0.35396507382392883, + "step": 6070 + }, + { + "epoch": 30.830379746835444, + "grad_norm": 188994.549896938, + "learning_rate": 7.928549044186775e-08, + "logits/chosen": -2.218046188354492, + "logits/rejected": -2.298725128173828, + "logps/chosen": -36.601036071777344, + "logps/rejected": -578.4909057617188, + "loss": 11942.907, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20413891971111298, + "rewards/margins": 0.5424162149429321, + "rewards/rejected": -0.33827728033065796, + "step": 6080 + }, + { + "epoch": 30.88101265822785, + "grad_norm": 226618.0916543629, + "learning_rate": 7.850203697900344e-08, + "logits/chosen": -0.8958581686019897, + "logits/rejected": -0.3350396454334259, + "logps/chosen": -27.914409637451172, + "logps/rejected": -584.2742919921875, + "loss": 12020.4094, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2059321403503418, + "rewards/margins": 0.5559757947921753, + "rewards/rejected": -0.3500436246395111, + "step": 6090 + }, + { + "epoch": 30.931645569620255, + "grad_norm": 193720.76624447017, + "learning_rate": 7.771858351613913e-08, + "logits/chosen": -0.13203875720500946, + "logits/rejected": -0.22968029975891113, + "logps/chosen": -25.164508819580078, + "logps/rejected": -573.2855224609375, + "loss": 12096.6344, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1958034336566925, + "rewards/margins": 0.5465744137763977, + "rewards/rejected": -0.3507709503173828, + "step": 6100 + }, + { + "epoch": 30.98227848101266, + "grad_norm": 177238.22636435836, + "learning_rate": 7.693513005327483e-08, + "logits/chosen": -2.0759382247924805, + "logits/rejected": -1.4708411693572998, + "logps/chosen": -28.522485733032227, + "logps/rejected": -573.73681640625, + "loss": 12017.775, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20944443345069885, + "rewards/margins": 0.5498504042625427, + "rewards/rejected": -0.34040600061416626, + "step": 6110 + }, + { + "epoch": 31.03291139240506, + "grad_norm": 111295.73044950665, + "learning_rate": 7.615167659041052e-08, + "logits/chosen": -0.7748550772666931, + "logits/rejected": -0.973538875579834, + "logps/chosen": -31.6827335357666, + "logps/rejected": -594.8641357421875, + "loss": 11721.4203, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20987281203269958, + "rewards/margins": 0.5614258050918579, + "rewards/rejected": -0.3515530228614807, + "step": 6120 + }, + { + "epoch": 31.083544303797467, + "grad_norm": 132943.3056647964, + "learning_rate": 7.536822312754621e-08, + "logits/chosen": -2.017181396484375, + "logits/rejected": -1.8383163213729858, + "logps/chosen": -32.51802062988281, + "logps/rejected": -609.6942138671875, + "loss": 12392.7875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22407253086566925, + "rewards/margins": 0.582473874092102, + "rewards/rejected": -0.3584012985229492, + "step": 6130 + }, + { + "epoch": 31.134177215189872, + "grad_norm": 174931.96319021285, + "learning_rate": 7.45847696646819e-08, + "logits/chosen": -0.5535727143287659, + "logits/rejected": 0.6218046545982361, + "logps/chosen": -26.1910457611084, + "logps/rejected": -551.5840454101562, + "loss": 11699.3109, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20296287536621094, + "rewards/margins": 0.5298973917961121, + "rewards/rejected": -0.3269345760345459, + "step": 6140 + }, + { + "epoch": 31.184810126582278, + "grad_norm": 168688.32644125135, + "learning_rate": 7.380131620181761e-08, + "logits/chosen": -1.008988618850708, + "logits/rejected": -0.2778696119785309, + "logps/chosen": -33.33096694946289, + "logps/rejected": -607.976806640625, + "loss": 11916.4016, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21690383553504944, + "rewards/margins": 0.5754967331886292, + "rewards/rejected": -0.3585929274559021, + "step": 6150 + }, + { + "epoch": 31.235443037974683, + "grad_norm": 94661.132576451, + "learning_rate": 7.30178627389533e-08, + "logits/chosen": -3.0997250080108643, + "logits/rejected": -2.1219401359558105, + "logps/chosen": -27.209686279296875, + "logps/rejected": -589.7662353515625, + "loss": 12111.9188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21759450435638428, + "rewards/margins": 0.5674911737442017, + "rewards/rejected": -0.3498966693878174, + "step": 6160 + }, + { + "epoch": 31.28607594936709, + "grad_norm": 129537.98682999605, + "learning_rate": 7.2234409276089e-08, + "logits/chosen": -2.1777210235595703, + "logits/rejected": -2.1664652824401855, + "logps/chosen": -29.21515464782715, + "logps/rejected": -575.5145263671875, + "loss": 12396.4562, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20168697834014893, + "rewards/margins": 0.5468615293502808, + "rewards/rejected": -0.3451746106147766, + "step": 6170 + }, + { + "epoch": 31.336708860759494, + "grad_norm": 146320.37748909468, + "learning_rate": 7.145095581322469e-08, + "logits/chosen": -0.37119048833847046, + "logits/rejected": -0.12678974866867065, + "logps/chosen": -27.464313507080078, + "logps/rejected": -583.199462890625, + "loss": 12035.1789, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20687448978424072, + "rewards/margins": 0.5559764504432678, + "rewards/rejected": -0.3491020202636719, + "step": 6180 + }, + { + "epoch": 31.3873417721519, + "grad_norm": 123464.43072965978, + "learning_rate": 7.066750235036038e-08, + "logits/chosen": -1.114485740661621, + "logits/rejected": -0.36546590924263, + "logps/chosen": -24.96463394165039, + "logps/rejected": -573.1627197265625, + "loss": 12102.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2025957852602005, + "rewards/margins": 0.5483575463294983, + "rewards/rejected": -0.3457617163658142, + "step": 6190 + }, + { + "epoch": 31.437974683544304, + "grad_norm": 182155.23164206932, + "learning_rate": 6.988404888749608e-08, + "logits/chosen": -1.7520939111709595, + "logits/rejected": -1.4854246377944946, + "logps/chosen": -29.002777099609375, + "logps/rejected": -592.4381713867188, + "loss": 11423.6828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2187313735485077, + "rewards/margins": 0.5657260417938232, + "rewards/rejected": -0.34699463844299316, + "step": 6200 + }, + { + "epoch": 31.48860759493671, + "grad_norm": 148737.16455364344, + "learning_rate": 6.910059542463177e-08, + "logits/chosen": 0.025389552116394043, + "logits/rejected": -0.27969443798065186, + "logps/chosen": -17.67035675048828, + "logps/rejected": -546.9998168945312, + "loss": 11498.325, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19227565824985504, + "rewards/margins": 0.5237180590629578, + "rewards/rejected": -0.33144229650497437, + "step": 6210 + }, + { + "epoch": 31.539240506329115, + "grad_norm": 186784.06647045226, + "learning_rate": 6.831714196176746e-08, + "logits/chosen": -3.0769848823547363, + "logits/rejected": -2.87144136428833, + "logps/chosen": -25.640066146850586, + "logps/rejected": -605.6832885742188, + "loss": 11701.2086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21926145255565643, + "rewards/margins": 0.5798953771591187, + "rewards/rejected": -0.3606340289115906, + "step": 6220 + }, + { + "epoch": 31.58987341772152, + "grad_norm": 108314.28535819704, + "learning_rate": 6.753368849890315e-08, + "logits/chosen": -0.5384847521781921, + "logits/rejected": -0.6974294781684875, + "logps/chosen": -26.830814361572266, + "logps/rejected": -587.5255126953125, + "loss": 11231.8016, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20746394991874695, + "rewards/margins": 0.557998776435852, + "rewards/rejected": -0.3505348265171051, + "step": 6230 + }, + { + "epoch": 31.640506329113926, + "grad_norm": 197387.20948770002, + "learning_rate": 6.675023503603886e-08, + "logits/chosen": -0.6654781103134155, + "logits/rejected": -1.1572941541671753, + "logps/chosen": -27.918231964111328, + "logps/rejected": -592.8441162109375, + "loss": 11850.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21056847274303436, + "rewards/margins": 0.5671868920326233, + "rewards/rejected": -0.3566184341907501, + "step": 6240 + }, + { + "epoch": 31.691139240506327, + "grad_norm": 178129.00858003844, + "learning_rate": 6.596678157317455e-08, + "logits/chosen": 0.17990253865718842, + "logits/rejected": 0.15132752060890198, + "logps/chosen": -26.486125946044922, + "logps/rejected": -577.5296020507812, + "loss": 12025.9992, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1961621642112732, + "rewards/margins": 0.5460348725318909, + "rewards/rejected": -0.34987273812294006, + "step": 6250 + }, + { + "epoch": 31.741772151898733, + "grad_norm": 113204.1607298857, + "learning_rate": 6.518332811031025e-08, + "logits/chosen": -0.7701491117477417, + "logits/rejected": -0.5652084946632385, + "logps/chosen": -30.580230712890625, + "logps/rejected": -575.9344482421875, + "loss": 12611.7422, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20266905426979065, + "rewards/margins": 0.5443531274795532, + "rewards/rejected": -0.34168410301208496, + "step": 6260 + }, + { + "epoch": 31.792405063291138, + "grad_norm": 170084.77349090017, + "learning_rate": 6.439987464744594e-08, + "logits/chosen": 0.8593052625656128, + "logits/rejected": 1.1197197437286377, + "logps/chosen": -26.577016830444336, + "logps/rejected": -555.6820068359375, + "loss": 12234.5422, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19716337323188782, + "rewards/margins": 0.5328342318534851, + "rewards/rejected": -0.33567091822624207, + "step": 6270 + }, + { + "epoch": 31.843037974683543, + "grad_norm": 235274.58346107465, + "learning_rate": 6.361642118458163e-08, + "logits/chosen": -1.7307960987091064, + "logits/rejected": -1.3535115718841553, + "logps/chosen": -23.92806625366211, + "logps/rejected": -565.2352294921875, + "loss": 12517.5156, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2007029801607132, + "rewards/margins": 0.5428507924079895, + "rewards/rejected": -0.3421478271484375, + "step": 6280 + }, + { + "epoch": 31.89367088607595, + "grad_norm": 190203.888446938, + "learning_rate": 6.283296772171732e-08, + "logits/chosen": -0.9662951231002808, + "logits/rejected": -0.45983943343162537, + "logps/chosen": -26.488794326782227, + "logps/rejected": -565.1602783203125, + "loss": 12050.4156, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20456723868846893, + "rewards/margins": 0.5392366051673889, + "rewards/rejected": -0.3346693515777588, + "step": 6290 + }, + { + "epoch": 31.944303797468354, + "grad_norm": 169175.47682307824, + "learning_rate": 6.204951425885302e-08, + "logits/chosen": -1.9982364177703857, + "logits/rejected": -1.282958745956421, + "logps/chosen": -25.263113021850586, + "logps/rejected": -584.7576293945312, + "loss": 11806.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20914848148822784, + "rewards/margins": 0.5613822937011719, + "rewards/rejected": -0.35223376750946045, + "step": 6300 + }, + { + "epoch": 31.99493670886076, + "grad_norm": 142938.702725119, + "learning_rate": 6.126606079598871e-08, + "logits/chosen": -2.084618091583252, + "logits/rejected": -1.6745023727416992, + "logps/chosen": -24.918956756591797, + "logps/rejected": -603.4859619140625, + "loss": 12022.5133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21575181186199188, + "rewards/margins": 0.5819977521896362, + "rewards/rejected": -0.36624595522880554, + "step": 6310 + }, + { + "epoch": 32.04556962025316, + "grad_norm": 146925.77007874168, + "learning_rate": 6.04826073331244e-08, + "logits/chosen": -1.0771139860153198, + "logits/rejected": -0.38963261246681213, + "logps/chosen": -25.353687286376953, + "logps/rejected": -599.3104248046875, + "loss": 11649.7609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21451549232006073, + "rewards/margins": 0.5766840577125549, + "rewards/rejected": -0.3621685206890106, + "step": 6320 + }, + { + "epoch": 32.09620253164557, + "grad_norm": 94333.82344683389, + "learning_rate": 5.96991538702601e-08, + "logits/chosen": -2.162341356277466, + "logits/rejected": -1.5530678033828735, + "logps/chosen": -36.120880126953125, + "logps/rejected": -594.9260864257812, + "loss": 11919.4, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2106127291917801, + "rewards/margins": 0.558625340461731, + "rewards/rejected": -0.34801262617111206, + "step": 6330 + }, + { + "epoch": 32.14683544303797, + "grad_norm": 144438.33677050017, + "learning_rate": 5.8915700407395795e-08, + "logits/chosen": -0.8229999542236328, + "logits/rejected": -0.037537313997745514, + "logps/chosen": -25.43358612060547, + "logps/rejected": -557.636474609375, + "loss": 11297.6063, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.1968574970960617, + "rewards/margins": 0.532370388507843, + "rewards/rejected": -0.33551284670829773, + "step": 6340 + }, + { + "epoch": 32.19746835443038, + "grad_norm": 109693.94525690017, + "learning_rate": 5.813224694453149e-08, + "logits/chosen": -3.077913761138916, + "logits/rejected": -2.4543375968933105, + "logps/chosen": -26.92588233947754, + "logps/rejected": -583.3746337890625, + "loss": 12147.5016, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.21152964234352112, + "rewards/margins": 0.5570891499519348, + "rewards/rejected": -0.3455595374107361, + "step": 6350 + }, + { + "epoch": 32.24810126582278, + "grad_norm": 94464.04824246689, + "learning_rate": 5.734879348166719e-08, + "logits/chosen": -0.08146251738071442, + "logits/rejected": -0.1943734884262085, + "logps/chosen": -38.933929443359375, + "logps/rejected": -599.4444580078125, + "loss": 11706.7859, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21242408454418182, + "rewards/margins": 0.5596734881401062, + "rewards/rejected": -0.34724941849708557, + "step": 6360 + }, + { + "epoch": 32.29873417721519, + "grad_norm": 93779.41167523999, + "learning_rate": 5.656534001880288e-08, + "logits/chosen": 0.4058389663696289, + "logits/rejected": 0.994676947593689, + "logps/chosen": -21.240737915039062, + "logps/rejected": -573.2392578125, + "loss": 12153.6359, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19539888203144073, + "rewards/margins": 0.550510048866272, + "rewards/rejected": -0.35511118173599243, + "step": 6370 + }, + { + "epoch": 32.34936708860759, + "grad_norm": 215459.26677533987, + "learning_rate": 5.5781886555938573e-08, + "logits/chosen": -1.0755536556243896, + "logits/rejected": -0.2684146761894226, + "logps/chosen": -25.781116485595703, + "logps/rejected": -580.9659423828125, + "loss": 11508.8133, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21089033782482147, + "rewards/margins": 0.5592586994171143, + "rewards/rejected": -0.34836840629577637, + "step": 6380 + }, + { + "epoch": 32.4, + "grad_norm": 164612.93717131627, + "learning_rate": 5.4998433093074266e-08, + "logits/chosen": -2.730407238006592, + "logits/rejected": -2.2623066902160645, + "logps/chosen": -38.27416229248047, + "logps/rejected": -612.3323364257812, + "loss": 10969.9328, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.22319836914539337, + "rewards/margins": 0.573035478591919, + "rewards/rejected": -0.34983712434768677, + "step": 6390 + }, + { + "epoch": 32.450632911392404, + "grad_norm": 140032.81053392185, + "learning_rate": 5.421497963020996e-08, + "logits/chosen": -0.6492301821708679, + "logits/rejected": -0.778862476348877, + "logps/chosen": -28.754650115966797, + "logps/rejected": -591.8221435546875, + "loss": 12521.7703, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21057042479515076, + "rewards/margins": 0.5634862780570984, + "rewards/rejected": -0.35291582345962524, + "step": 6400 + }, + { + "epoch": 32.50126582278481, + "grad_norm": 102205.70485715618, + "learning_rate": 5.343152616734566e-08, + "logits/chosen": -0.9864907264709473, + "logits/rejected": -0.19051684439182281, + "logps/chosen": -29.4318904876709, + "logps/rejected": -605.131103515625, + "loss": 11591.8508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2185964584350586, + "rewards/margins": 0.579878032207489, + "rewards/rejected": -0.3612816333770752, + "step": 6410 + }, + { + "epoch": 32.551898734177215, + "grad_norm": 103047.13529668628, + "learning_rate": 5.264807270448135e-08, + "logits/chosen": -2.3946361541748047, + "logits/rejected": -1.8663170337677002, + "logps/chosen": -22.362850189208984, + "logps/rejected": -582.4278564453125, + "loss": 11901.1398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21427400410175323, + "rewards/margins": 0.5642385482788086, + "rewards/rejected": -0.34996455907821655, + "step": 6420 + }, + { + "epoch": 32.60253164556962, + "grad_norm": 86074.947460872, + "learning_rate": 5.1864619241617044e-08, + "logits/chosen": 0.2598368227481842, + "logits/rejected": 0.16884984076023102, + "logps/chosen": -22.76316261291504, + "logps/rejected": -594.866455078125, + "loss": 12333.5344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2121623456478119, + "rewards/margins": 0.5697360038757324, + "rewards/rejected": -0.35757365822792053, + "step": 6430 + }, + { + "epoch": 32.653164556962025, + "grad_norm": 137970.73954909868, + "learning_rate": 5.108116577875274e-08, + "logits/chosen": -0.11699090898036957, + "logits/rejected": 0.11212899535894394, + "logps/chosen": -29.464065551757812, + "logps/rejected": -573.3801879882812, + "loss": 11953.9641, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21537606418132782, + "rewards/margins": 0.5438817739486694, + "rewards/rejected": -0.3285056948661804, + "step": 6440 + }, + { + "epoch": 32.70379746835443, + "grad_norm": 460796.64629538235, + "learning_rate": 5.029771231588843e-08, + "logits/chosen": -1.4031693935394287, + "logits/rejected": -2.1060502529144287, + "logps/chosen": -23.794132232666016, + "logps/rejected": -581.7036743164062, + "loss": 12159.9719, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20509609580039978, + "rewards/margins": 0.5560418367385864, + "rewards/rejected": -0.35094568133354187, + "step": 6450 + }, + { + "epoch": 32.754430379746836, + "grad_norm": 88571.49642806537, + "learning_rate": 4.951425885302413e-08, + "logits/chosen": -0.29163846373558044, + "logits/rejected": 0.15456560254096985, + "logps/chosen": -19.800487518310547, + "logps/rejected": -562.6231689453125, + "loss": 11758.9578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2048061192035675, + "rewards/margins": 0.5433157682418823, + "rewards/rejected": -0.33850961923599243, + "step": 6460 + }, + { + "epoch": 32.80506329113924, + "grad_norm": 166818.40028028333, + "learning_rate": 4.873080539015982e-08, + "logits/chosen": 0.3278934061527252, + "logits/rejected": 0.6011670827865601, + "logps/chosen": -33.445350646972656, + "logps/rejected": -590.470703125, + "loss": 11395.1164, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2123481035232544, + "rewards/margins": 0.5555016994476318, + "rewards/rejected": -0.34315359592437744, + "step": 6470 + }, + { + "epoch": 32.85569620253165, + "grad_norm": 80619.8591659213, + "learning_rate": 4.7947351927295515e-08, + "logits/chosen": -1.3291213512420654, + "logits/rejected": -1.6056814193725586, + "logps/chosen": -29.16250228881836, + "logps/rejected": -598.3140869140625, + "loss": 11908.6562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21245749294757843, + "rewards/margins": 0.5684391856193542, + "rewards/rejected": -0.3559816777706146, + "step": 6480 + }, + { + "epoch": 32.90632911392405, + "grad_norm": 109452.38261580766, + "learning_rate": 4.716389846443121e-08, + "logits/chosen": -2.2227654457092285, + "logits/rejected": -2.1318516731262207, + "logps/chosen": -27.57879638671875, + "logps/rejected": -593.1817626953125, + "loss": 11900.8148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2101704627275467, + "rewards/margins": 0.565523624420166, + "rewards/rejected": -0.35535311698913574, + "step": 6490 + }, + { + "epoch": 32.95696202531646, + "grad_norm": 146037.74057243837, + "learning_rate": 4.63804450015669e-08, + "logits/chosen": -0.4855597913265228, + "logits/rejected": -0.07905157655477524, + "logps/chosen": -32.26173782348633, + "logps/rejected": -582.983154296875, + "loss": 12785.9484, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2102789580821991, + "rewards/margins": 0.5541440844535828, + "rewards/rejected": -0.34386518597602844, + "step": 6500 + }, + { + "epoch": 33.00759493670886, + "grad_norm": 80554.44381289573, + "learning_rate": 4.55969915387026e-08, + "logits/chosen": -1.16013503074646, + "logits/rejected": -1.237755537033081, + "logps/chosen": -22.434879302978516, + "logps/rejected": -572.4281005859375, + "loss": 11892.3344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2072155922651291, + "rewards/margins": 0.54491126537323, + "rewards/rejected": -0.3376956880092621, + "step": 6510 + }, + { + "epoch": 33.05822784810127, + "grad_norm": 128557.62032643631, + "learning_rate": 4.481353807583829e-08, + "logits/chosen": -0.2354935109615326, + "logits/rejected": 0.728766143321991, + "logps/chosen": -29.432445526123047, + "logps/rejected": -585.3494262695312, + "loss": 11835.0961, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.2072407454252243, + "rewards/margins": 0.5606441497802734, + "rewards/rejected": -0.35340338945388794, + "step": 6520 + }, + { + "epoch": 33.10886075949367, + "grad_norm": 91776.99508964189, + "learning_rate": 4.4030084612973985e-08, + "logits/chosen": -1.175462007522583, + "logits/rejected": -1.1933832168579102, + "logps/chosen": -21.900630950927734, + "logps/rejected": -574.4762573242188, + "loss": 12157.9109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20543113350868225, + "rewards/margins": 0.5517674684524536, + "rewards/rejected": -0.346336305141449, + "step": 6530 + }, + { + "epoch": 33.15949367088608, + "grad_norm": 89893.29258028018, + "learning_rate": 4.324663115010968e-08, + "logits/chosen": -0.7350924015045166, + "logits/rejected": -0.16997528076171875, + "logps/chosen": -23.83113670349121, + "logps/rejected": -575.5424194335938, + "loss": 11686.9375, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20410069823265076, + "rewards/margins": 0.5512816309928894, + "rewards/rejected": -0.34718090295791626, + "step": 6540 + }, + { + "epoch": 33.210126582278484, + "grad_norm": 120975.35903766478, + "learning_rate": 4.246317768724538e-08, + "logits/chosen": -0.08163319528102875, + "logits/rejected": 0.07650710642337799, + "logps/chosen": -27.332035064697266, + "logps/rejected": -579.8117065429688, + "loss": 11339.9297, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.2052970826625824, + "rewards/margins": 0.5518554449081421, + "rewards/rejected": -0.3465583324432373, + "step": 6550 + }, + { + "epoch": 33.26075949367089, + "grad_norm": 180391.18731890293, + "learning_rate": 4.167972422438107e-08, + "logits/chosen": -0.8266963958740234, + "logits/rejected": 1.0672438144683838, + "logps/chosen": -23.287370681762695, + "logps/rejected": -572.2568969726562, + "loss": 11743.5586, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.19874341785907745, + "rewards/margins": 0.5572081804275513, + "rewards/rejected": -0.35846468806266785, + "step": 6560 + }, + { + "epoch": 33.311392405063295, + "grad_norm": 84282.72341083131, + "learning_rate": 4.0896270761516763e-08, + "logits/chosen": -1.91861093044281, + "logits/rejected": -1.3766604661941528, + "logps/chosen": -24.914443969726562, + "logps/rejected": -581.4729614257812, + "loss": 11078.6969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21130716800689697, + "rewards/margins": 0.5577182769775391, + "rewards/rejected": -0.34641116857528687, + "step": 6570 + }, + { + "epoch": 33.36202531645569, + "grad_norm": 199903.347381946, + "learning_rate": 4.0112817298652456e-08, + "logits/chosen": -1.2995800971984863, + "logits/rejected": -1.6440702676773071, + "logps/chosen": -22.356828689575195, + "logps/rejected": -591.6265869140625, + "loss": 11937.0477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2072306126356125, + "rewards/margins": 0.5668342709541321, + "rewards/rejected": -0.3596035838127136, + "step": 6580 + }, + { + "epoch": 33.4126582278481, + "grad_norm": 138603.96487037002, + "learning_rate": 3.932936383578815e-08, + "logits/chosen": 0.8098524212837219, + "logits/rejected": 1.2947828769683838, + "logps/chosen": -26.31606674194336, + "logps/rejected": -584.9072265625, + "loss": 11177.5336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20386937260627747, + "rewards/margins": 0.5589767694473267, + "rewards/rejected": -0.3551073968410492, + "step": 6590 + }, + { + "epoch": 33.4632911392405, + "grad_norm": 123948.78500072335, + "learning_rate": 3.854591037292385e-08, + "logits/chosen": -2.16947603225708, + "logits/rejected": -1.0904394388198853, + "logps/chosen": -42.8673095703125, + "logps/rejected": -585.2350463867188, + "loss": 11894.6641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2195717990398407, + "rewards/margins": 0.5601873397827148, + "rewards/rejected": -0.34061557054519653, + "step": 6600 + }, + { + "epoch": 33.51392405063291, + "grad_norm": 113327.62874252205, + "learning_rate": 3.776245691005954e-08, + "logits/chosen": -1.375249981880188, + "logits/rejected": -0.7785667181015015, + "logps/chosen": -29.649211883544922, + "logps/rejected": -602.9840698242188, + "loss": 12210.0344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22011515498161316, + "rewards/margins": 0.5793704390525818, + "rewards/rejected": -0.35925528407096863, + "step": 6610 + }, + { + "epoch": 33.564556962025314, + "grad_norm": 79524.96422723045, + "learning_rate": 3.6979003447195234e-08, + "logits/chosen": -0.7508550882339478, + "logits/rejected": -0.23799777030944824, + "logps/chosen": -17.09669303894043, + "logps/rejected": -572.3134155273438, + "loss": 12138.4203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2047223150730133, + "rewards/margins": 0.5538768768310547, + "rewards/rejected": -0.34915462136268616, + "step": 6620 + }, + { + "epoch": 33.61518987341772, + "grad_norm": 80597.64263401506, + "learning_rate": 3.619554998433093e-08, + "logits/chosen": -1.7500404119491577, + "logits/rejected": -1.4937622547149658, + "logps/chosen": -24.847320556640625, + "logps/rejected": -594.1591796875, + "loss": 12270.6344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21394848823547363, + "rewards/margins": 0.5700836181640625, + "rewards/rejected": -0.35613518953323364, + "step": 6630 + }, + { + "epoch": 33.665822784810125, + "grad_norm": 100669.75725024722, + "learning_rate": 3.541209652146662e-08, + "logits/chosen": -0.4524414539337158, + "logits/rejected": -0.5694657564163208, + "logps/chosen": -25.72067642211914, + "logps/rejected": -572.9901123046875, + "loss": 11448.4047, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20151250064373016, + "rewards/margins": 0.5470980405807495, + "rewards/rejected": -0.345585435628891, + "step": 6640 + }, + { + "epoch": 33.71645569620253, + "grad_norm": 136734.1372891588, + "learning_rate": 3.462864305860232e-08, + "logits/chosen": -0.10392338037490845, + "logits/rejected": 0.025324154645204544, + "logps/chosen": -23.138744354248047, + "logps/rejected": -578.2369995117188, + "loss": 11719.0234, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.2065146416425705, + "rewards/margins": 0.5536417365074158, + "rewards/rejected": -0.3471270501613617, + "step": 6650 + }, + { + "epoch": 33.767088607594935, + "grad_norm": 96060.1935775592, + "learning_rate": 3.384518959573801e-08, + "logits/chosen": -1.5298357009887695, + "logits/rejected": -1.111659049987793, + "logps/chosen": -36.602691650390625, + "logps/rejected": -594.2269287109375, + "loss": 11903.4828, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21466748416423798, + "rewards/margins": 0.5613253116607666, + "rewards/rejected": -0.34665781259536743, + "step": 6660 + }, + { + "epoch": 33.81772151898734, + "grad_norm": 82308.39144839271, + "learning_rate": 3.3061736132873705e-08, + "logits/chosen": -1.9629747867584229, + "logits/rejected": -1.8584734201431274, + "logps/chosen": -17.865947723388672, + "logps/rejected": -566.314453125, + "loss": 12147.5891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2041165828704834, + "rewards/margins": 0.5491331219673157, + "rewards/rejected": -0.3450164496898651, + "step": 6670 + }, + { + "epoch": 33.868354430379746, + "grad_norm": 132433.76933098322, + "learning_rate": 3.22782826700094e-08, + "logits/chosen": -0.10643855482339859, + "logits/rejected": 0.1565506011247635, + "logps/chosen": -23.206607818603516, + "logps/rejected": -565.3855590820312, + "loss": 11928.0656, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19701281189918518, + "rewards/margins": 0.5372076630592346, + "rewards/rejected": -0.34019485116004944, + "step": 6680 + }, + { + "epoch": 33.91898734177215, + "grad_norm": 99524.21425394616, + "learning_rate": 3.149482920714509e-08, + "logits/chosen": 0.7746875286102295, + "logits/rejected": 1.4906342029571533, + "logps/chosen": -28.62857437133789, + "logps/rejected": -569.8626708984375, + "loss": 11616.475, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20620207488536835, + "rewards/margins": 0.548004686832428, + "rewards/rejected": -0.3418026268482208, + "step": 6690 + }, + { + "epoch": 33.96962025316456, + "grad_norm": 72753.16066899289, + "learning_rate": 3.071137574428079e-08, + "logits/chosen": 0.6492331624031067, + "logits/rejected": 0.7617141604423523, + "logps/chosen": -25.677988052368164, + "logps/rejected": -560.1131591796875, + "loss": 12074.9086, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19937190413475037, + "rewards/margins": 0.5361818075180054, + "rewards/rejected": -0.33680984377861023, + "step": 6700 + }, + { + "epoch": 34.02025316455696, + "grad_norm": 69602.29112012005, + "learning_rate": 2.992792228141648e-08, + "logits/chosen": -1.09294855594635, + "logits/rejected": -0.46724218130111694, + "logps/chosen": -27.281131744384766, + "logps/rejected": -564.8081665039062, + "loss": 11690.2227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20867964625358582, + "rewards/margins": 0.5397676825523376, + "rewards/rejected": -0.3310880661010742, + "step": 6710 + }, + { + "epoch": 34.07088607594937, + "grad_norm": 124920.949317699, + "learning_rate": 2.9144468818552176e-08, + "logits/chosen": -0.5232747197151184, + "logits/rejected": 0.22049197554588318, + "logps/chosen": -28.443017959594727, + "logps/rejected": -575.3606567382812, + "loss": 11532.3461, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20501036942005157, + "rewards/margins": 0.5482991933822632, + "rewards/rejected": -0.34328892827033997, + "step": 6720 + }, + { + "epoch": 34.12151898734177, + "grad_norm": 83544.836801972, + "learning_rate": 2.836101535568787e-08, + "logits/chosen": -1.3320372104644775, + "logits/rejected": -1.3442357778549194, + "logps/chosen": -22.64333724975586, + "logps/rejected": -572.712890625, + "loss": 12274.693, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20690365135669708, + "rewards/margins": 0.550982654094696, + "rewards/rejected": -0.34407907724380493, + "step": 6730 + }, + { + "epoch": 34.17215189873418, + "grad_norm": 59570.622166337576, + "learning_rate": 2.7577561892823564e-08, + "logits/chosen": -0.21290139853954315, + "logits/rejected": -0.059629153460264206, + "logps/chosen": -25.701587677001953, + "logps/rejected": -593.871826171875, + "loss": 11275.0086, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21512384712696075, + "rewards/margins": 0.5740803480148315, + "rewards/rejected": -0.358956515789032, + "step": 6740 + }, + { + "epoch": 34.22278481012658, + "grad_norm": 65440.143207260466, + "learning_rate": 2.6794108429959257e-08, + "logits/chosen": 0.7750476598739624, + "logits/rejected": 0.7844541668891907, + "logps/chosen": -20.633939743041992, + "logps/rejected": -552.0474853515625, + "loss": 11428.4266, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19471852481365204, + "rewards/margins": 0.5300661325454712, + "rewards/rejected": -0.33534759283065796, + "step": 6750 + }, + { + "epoch": 34.27341772151899, + "grad_norm": 68525.87135636444, + "learning_rate": 2.6010654967094953e-08, + "logits/chosen": -0.38150349259376526, + "logits/rejected": -0.23816132545471191, + "logps/chosen": -22.66727066040039, + "logps/rejected": -573.2194213867188, + "loss": 11908.0906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21106655895709991, + "rewards/margins": 0.551810622215271, + "rewards/rejected": -0.3407440483570099, + "step": 6760 + }, + { + "epoch": 34.324050632911394, + "grad_norm": 106626.98105878729, + "learning_rate": 2.5227201504230646e-08, + "logits/chosen": -1.4471355676651, + "logits/rejected": -1.1485611200332642, + "logps/chosen": -30.057971954345703, + "logps/rejected": -575.90185546875, + "loss": 12232.9586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21069788932800293, + "rewards/margins": 0.5553726553916931, + "rewards/rejected": -0.3446747958660126, + "step": 6770 + }, + { + "epoch": 34.3746835443038, + "grad_norm": 82201.6406165968, + "learning_rate": 2.4443748041366342e-08, + "logits/chosen": -0.32743334770202637, + "logits/rejected": -0.1376263052225113, + "logps/chosen": -21.2067928314209, + "logps/rejected": -579.6965942382812, + "loss": 12257.0297, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.1995442807674408, + "rewards/margins": 0.5562165975570679, + "rewards/rejected": -0.35667237639427185, + "step": 6780 + }, + { + "epoch": 34.425316455696205, + "grad_norm": 101176.94346633952, + "learning_rate": 2.3660294578502035e-08, + "logits/chosen": -0.027527982369065285, + "logits/rejected": 0.533041775226593, + "logps/chosen": -26.760761260986328, + "logps/rejected": -598.9354248046875, + "loss": 11819.9969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21746928989887238, + "rewards/margins": 0.5736481547355652, + "rewards/rejected": -0.356178879737854, + "step": 6790 + }, + { + "epoch": 34.47594936708861, + "grad_norm": 121876.45413951192, + "learning_rate": 2.2876841115637728e-08, + "logits/chosen": 0.13078555464744568, + "logits/rejected": 0.333305299282074, + "logps/chosen": -21.788782119750977, + "logps/rejected": -576.6356201171875, + "loss": 11798.6219, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20838093757629395, + "rewards/margins": 0.5560372471809387, + "rewards/rejected": -0.34765633940696716, + "step": 6800 + }, + { + "epoch": 34.526582278481015, + "grad_norm": 82394.08815026373, + "learning_rate": 2.2093387652773424e-08, + "logits/chosen": -1.4721665382385254, + "logits/rejected": -0.7360283732414246, + "logps/chosen": -25.797271728515625, + "logps/rejected": -575.2802734375, + "loss": 11867.5641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21113534271717072, + "rewards/margins": 0.5519530177116394, + "rewards/rejected": -0.34081774950027466, + "step": 6810 + }, + { + "epoch": 34.57721518987342, + "grad_norm": 95810.78866413939, + "learning_rate": 2.1309934189909117e-08, + "logits/chosen": -1.161169409751892, + "logits/rejected": -0.91253662109375, + "logps/chosen": -27.88307762145996, + "logps/rejected": -587.9513549804688, + "loss": 12214.8375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20574505627155304, + "rewards/margins": 0.5562767386436462, + "rewards/rejected": -0.3505316376686096, + "step": 6820 + }, + { + "epoch": 34.627848101265826, + "grad_norm": 172219.54727864428, + "learning_rate": 2.0526480727044813e-08, + "logits/chosen": -0.3230019509792328, + "logits/rejected": 0.021614838391542435, + "logps/chosen": -25.339590072631836, + "logps/rejected": -562.7479858398438, + "loss": 12054.6734, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.19978457689285278, + "rewards/margins": 0.539312481880188, + "rewards/rejected": -0.3395279347896576, + "step": 6830 + }, + { + "epoch": 34.678481012658224, + "grad_norm": 97553.39405702737, + "learning_rate": 1.9743027264180506e-08, + "logits/chosen": 0.4619535505771637, + "logits/rejected": 1.0029349327087402, + "logps/chosen": -25.021116256713867, + "logps/rejected": -548.2736206054688, + "loss": 11655.4656, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.19646362960338593, + "rewards/margins": 0.5229059457778931, + "rewards/rejected": -0.32644230127334595, + "step": 6840 + }, + { + "epoch": 34.72911392405063, + "grad_norm": 85932.39441572773, + "learning_rate": 1.8959573801316202e-08, + "logits/chosen": -1.6316754817962646, + "logits/rejected": -1.3359291553497314, + "logps/chosen": -29.495471954345703, + "logps/rejected": -592.9409790039062, + "loss": 11807.9469, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21343278884887695, + "rewards/margins": 0.5635305643081665, + "rewards/rejected": -0.3500978350639343, + "step": 6850 + }, + { + "epoch": 34.779746835443035, + "grad_norm": 111046.4046992094, + "learning_rate": 1.8176120338451895e-08, + "logits/chosen": -1.9672811031341553, + "logits/rejected": -1.9657671451568604, + "logps/chosen": -28.83864402770996, + "logps/rejected": -585.9581298828125, + "loss": 11170.3703, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20914232730865479, + "rewards/margins": 0.5588939189910889, + "rewards/rejected": -0.3497515618801117, + "step": 6860 + }, + { + "epoch": 34.83037974683544, + "grad_norm": 85983.71784945966, + "learning_rate": 1.7392666875587588e-08, + "logits/chosen": -0.45147451758384705, + "logits/rejected": -0.5098736882209778, + "logps/chosen": -27.473459243774414, + "logps/rejected": -575.5574340820312, + "loss": 10970.9664, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.20568005740642548, + "rewards/margins": 0.5446674823760986, + "rewards/rejected": -0.33898741006851196, + "step": 6870 + }, + { + "epoch": 34.881012658227846, + "grad_norm": 65511.20912205873, + "learning_rate": 1.6609213412723284e-08, + "logits/chosen": -0.6330152750015259, + "logits/rejected": 0.2800363004207611, + "logps/chosen": -35.236671447753906, + "logps/rejected": -596.6030883789062, + "loss": 11516.7508, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21417097747325897, + "rewards/margins": 0.5598101019859314, + "rewards/rejected": -0.3456391394138336, + "step": 6880 + }, + { + "epoch": 34.93164556962025, + "grad_norm": 92014.0392742902, + "learning_rate": 1.5825759949858977e-08, + "logits/chosen": 0.384327232837677, + "logits/rejected": 0.5035692453384399, + "logps/chosen": -30.62520408630371, + "logps/rejected": -565.2235107421875, + "loss": 11703.7977, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.2038690596818924, + "rewards/margins": 0.5329573750495911, + "rewards/rejected": -0.3290882706642151, + "step": 6890 + }, + { + "epoch": 34.982278481012656, + "grad_norm": 66423.21573889554, + "learning_rate": 1.5042306486994673e-08, + "logits/chosen": 0.34197521209716797, + "logits/rejected": 0.8052785992622375, + "logps/chosen": -28.992361068725586, + "logps/rejected": -575.634033203125, + "loss": 11720.9023, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20474262535572052, + "rewards/margins": 0.5458477735519409, + "rewards/rejected": -0.3411051332950592, + "step": 6900 + }, + { + "epoch": 35.03291139240506, + "grad_norm": 131437.33515286856, + "learning_rate": 1.4258853024130366e-08, + "logits/chosen": -1.0964847803115845, + "logits/rejected": -0.780900239944458, + "logps/chosen": -32.114646911621094, + "logps/rejected": -596.6611328125, + "loss": 11782.3414, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.2081715166568756, + "rewards/margins": 0.5639813542366028, + "rewards/rejected": -0.3558098375797272, + "step": 6910 + }, + { + "epoch": 35.08354430379747, + "grad_norm": 56330.643408705866, + "learning_rate": 1.347539956126606e-08, + "logits/chosen": -0.7748197913169861, + "logits/rejected": -0.39359089732170105, + "logps/chosen": -25.1397705078125, + "logps/rejected": -592.2284545898438, + "loss": 12438.4016, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21363647282123566, + "rewards/margins": 0.5692847371101379, + "rewards/rejected": -0.35564830899238586, + "step": 6920 + }, + { + "epoch": 35.13417721518987, + "grad_norm": 61670.67939940539, + "learning_rate": 1.2691946098401754e-08, + "logits/chosen": -0.4548005163669586, + "logits/rejected": -0.3663405776023865, + "logps/chosen": -30.274677276611328, + "logps/rejected": -566.1094970703125, + "loss": 12029.2031, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.19851429760456085, + "rewards/margins": 0.5346571803092957, + "rewards/rejected": -0.3361428678035736, + "step": 6930 + }, + { + "epoch": 35.18481012658228, + "grad_norm": 58623.275250495586, + "learning_rate": 1.1908492635537449e-08, + "logits/chosen": -0.15851683914661407, + "logits/rejected": 0.04081523418426514, + "logps/chosen": -29.98187828063965, + "logps/rejected": -592.0548095703125, + "loss": 11041.75, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21129722893238068, + "rewards/margins": 0.5635300874710083, + "rewards/rejected": -0.3522329032421112, + "step": 6940 + }, + { + "epoch": 35.23544303797468, + "grad_norm": 64412.181909102495, + "learning_rate": 1.1125039172673142e-08, + "logits/chosen": -1.656226396560669, + "logits/rejected": -0.20408448576927185, + "logps/chosen": -33.12029266357422, + "logps/rejected": -594.759033203125, + "loss": 11519.8148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21017961204051971, + "rewards/margins": 0.5670366883277893, + "rewards/rejected": -0.356857031583786, + "step": 6950 + }, + { + "epoch": 35.28607594936709, + "grad_norm": 56297.660589582665, + "learning_rate": 1.0341585709808836e-08, + "logits/chosen": -1.547500491142273, + "logits/rejected": -0.6769775152206421, + "logps/chosen": -24.704757690429688, + "logps/rejected": -576.3121337890625, + "loss": 11605.0844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21090254187583923, + "rewards/margins": 0.5502737760543823, + "rewards/rejected": -0.3393712043762207, + "step": 6960 + }, + { + "epoch": 35.336708860759494, + "grad_norm": 56909.53482731003, + "learning_rate": 9.55813224694453e-09, + "logits/chosen": -1.0313692092895508, + "logits/rejected": -0.7449108362197876, + "logps/chosen": -26.623498916625977, + "logps/rejected": -587.3609008789062, + "loss": 11753.7047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21617145836353302, + "rewards/margins": 0.5669609308242798, + "rewards/rejected": -0.35078948736190796, + "step": 6970 + }, + { + "epoch": 35.3873417721519, + "grad_norm": 178835.84410749955, + "learning_rate": 8.774678784080225e-09, + "logits/chosen": -1.1763582229614258, + "logits/rejected": -1.1659915447235107, + "logps/chosen": -31.361690521240234, + "logps/rejected": -579.3533325195312, + "loss": 11790.1078, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.20984821021556854, + "rewards/margins": 0.5475795269012451, + "rewards/rejected": -0.337731271982193, + "step": 6980 + }, + { + "epoch": 35.437974683544304, + "grad_norm": 71703.60906538504, + "learning_rate": 7.99122532121592e-09, + "logits/chosen": -0.38636288046836853, + "logits/rejected": -0.6006811857223511, + "logps/chosen": -34.191776275634766, + "logps/rejected": -590.3701171875, + "loss": 11606.5477, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21669046580791473, + "rewards/margins": 0.5592674016952515, + "rewards/rejected": -0.34257692098617554, + "step": 6990 + }, + { + "epoch": 35.48860759493671, + "grad_norm": 63272.25835527106, + "learning_rate": 7.207771858351613e-09, + "logits/chosen": 2.0330252647399902, + "logits/rejected": 1.6279929876327515, + "logps/chosen": -23.355932235717773, + "logps/rejected": -561.3161010742188, + "loss": 11705.225, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20284771919250488, + "rewards/margins": 0.5360475778579712, + "rewards/rejected": -0.3331999182701111, + "step": 7000 + }, + { + "epoch": 35.539240506329115, + "grad_norm": 61249.27413263123, + "learning_rate": 6.424318395487308e-09, + "logits/chosen": -0.4422365128993988, + "logits/rejected": 0.3416946828365326, + "logps/chosen": -23.512128829956055, + "logps/rejected": -589.8380126953125, + "loss": 12052.432, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21374277770519257, + "rewards/margins": 0.5721359252929688, + "rewards/rejected": -0.358393132686615, + "step": 7010 + }, + { + "epoch": 35.58987341772152, + "grad_norm": 71404.84613231423, + "learning_rate": 5.6408649326230014e-09, + "logits/chosen": -0.8322502374649048, + "logits/rejected": -0.6665211319923401, + "logps/chosen": -27.32610511779785, + "logps/rejected": -572.2705078125, + "loss": 11414.2812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20042335987091064, + "rewards/margins": 0.5454776287078857, + "rewards/rejected": -0.3450542390346527, + "step": 7020 + }, + { + "epoch": 35.640506329113926, + "grad_norm": 59338.91271881947, + "learning_rate": 4.857411469758696e-09, + "logits/chosen": -0.6001558303833008, + "logits/rejected": -0.34640446305274963, + "logps/chosen": -27.738061904907227, + "logps/rejected": -566.6168212890625, + "loss": 11256.2984, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.20105385780334473, + "rewards/margins": 0.5371016263961792, + "rewards/rejected": -0.33604779839515686, + "step": 7030 + }, + { + "epoch": 35.69113924050633, + "grad_norm": 58656.86338579773, + "learning_rate": 4.07395800689439e-09, + "logits/chosen": -1.311295509338379, + "logits/rejected": -0.9886859655380249, + "logps/chosen": -34.54024124145508, + "logps/rejected": -594.0494384765625, + "loss": 11898.4797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20786690711975098, + "rewards/margins": 0.5536772608757019, + "rewards/rejected": -0.3458103537559509, + "step": 7040 + }, + { + "epoch": 35.741772151898736, + "grad_norm": 63433.605932250306, + "learning_rate": 3.2905045440300845e-09, + "logits/chosen": -2.9923501014709473, + "logits/rejected": -2.3592472076416016, + "logps/chosen": -27.545886993408203, + "logps/rejected": -589.269775390625, + "loss": 11235.3266, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.21022014319896698, + "rewards/margins": 0.5570772886276245, + "rewards/rejected": -0.34685713052749634, + "step": 7050 + }, + { + "epoch": 35.79240506329114, + "grad_norm": 134377.54958033512, + "learning_rate": 2.5070510811657785e-09, + "logits/chosen": -1.720534324645996, + "logits/rejected": -1.3578100204467773, + "logps/chosen": -29.213916778564453, + "logps/rejected": -567.0501708984375, + "loss": 11589.5594, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.20267781615257263, + "rewards/margins": 0.537537693977356, + "rewards/rejected": -0.3348599672317505, + "step": 7060 + }, + { + "epoch": 35.84303797468355, + "grad_norm": 67914.02766915208, + "learning_rate": 1.7235976183014728e-09, + "logits/chosen": -0.6159377098083496, + "logits/rejected": 0.34991899132728577, + "logps/chosen": -21.085241317749023, + "logps/rejected": -550.9933471679688, + "loss": 11967.5938, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.199566051363945, + "rewards/margins": 0.5368520021438599, + "rewards/rejected": -0.33728593587875366, + "step": 7070 + }, + { + "epoch": 35.89367088607595, + "grad_norm": 59297.02701129836, + "learning_rate": 9.40144155437167e-10, + "logits/chosen": 0.5505496859550476, + "logits/rejected": 1.5590342283248901, + "logps/chosen": -19.077686309814453, + "logps/rejected": -584.4923095703125, + "loss": 11927.0484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20645365118980408, + "rewards/margins": 0.5670816898345947, + "rewards/rejected": -0.36062803864479065, + "step": 7080 + }, + { + "epoch": 35.94430379746836, + "grad_norm": 57292.383516847614, + "learning_rate": 1.5669069257286116e-10, + "logits/chosen": -1.5784406661987305, + "logits/rejected": -1.423514485359192, + "logps/chosen": -30.030658721923828, + "logps/rejected": -591.372314453125, + "loss": 11925.8789, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.2148711234331131, + "rewards/margins": 0.5598932504653931, + "rewards/rejected": -0.3450221121311188, + "step": 7090 + }, + { + "epoch": 35.95443037974684, + "step": 7092, "total_flos": 0.0, - "train_loss": 44832.452316430485, - "train_runtime": 5475.4345, - "train_samples_per_second": 41.5, - "train_steps_per_second": 0.648 + "train_loss": 6426.156043451248, + "train_runtime": 5724.5218, + "train_samples_per_second": 79.389, + "train_steps_per_second": 1.239 } ], "logging_steps": 10, - "max_steps": 3546, + "max_steps": 7092, "num_input_tokens_seen": 0, - "num_train_epochs": 18, + "num_train_epochs": 36, "save_steps": 500, "stateful_callbacks": { "TrainerControl": {