{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999940148073666, "eval_steps": 3000, "global_step": 83539, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.985156811108451e-10, "logits/chosen": -3.1595897674560547, "logits/rejected": -3.1600539684295654, "logps/chosen": -29.751483917236328, "logps/rejected": -32.18402862548828, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.9851568111084515e-09, "logits/chosen": -3.0476577281951904, "logits/rejected": -3.0387895107269287, "logps/chosen": -50.96046447753906, "logps/rejected": -39.55612564086914, "loss": 0.6933, "rewards/accuracies": 0.3888888955116272, "rewards/chosen": 0.0001933934836415574, "rewards/margins": -0.00028623687103390694, "rewards/rejected": 0.0004796303401235491, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.1970313622216903e-08, "logits/chosen": -3.0577995777130127, "logits/rejected": -2.9819416999816895, "logps/chosen": -60.881004333496094, "logps/rejected": -41.831336975097656, "loss": 0.6928, "rewards/accuracies": 0.75, "rewards/chosen": 0.00023711964604444802, "rewards/margins": 0.0005375709151849151, "rewards/rejected": -0.0003004512400366366, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.7955470433325355e-08, "logits/chosen": -3.0252952575683594, "logits/rejected": -2.993367910385132, "logps/chosen": -45.432579040527344, "logps/rejected": -39.98278045654297, "loss": 0.6932, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.00046413802192546427, "rewards/margins": -0.0002343616506550461, "rewards/rejected": -0.0002297763421665877, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.3940627244433806e-08, "logits/chosen": -3.0437285900115967, "logits/rejected": -3.027071952819824, "logps/chosen": -48.441734313964844, "logps/rejected": -40.9954948425293, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": -0.00048252014676108956, "rewards/margins": 9.43575068959035e-05, "rewards/rejected": -0.000576877617277205, "step": 40 }, { "epoch": 0.0, "learning_rate": 2.9925784055542257e-08, "logits/chosen": -3.0695409774780273, "logits/rejected": -3.043973207473755, "logps/chosen": -44.363525390625, "logps/rejected": -38.23174285888672, "loss": 0.6931, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.000403587386244908, "rewards/margins": -0.0003355264780111611, "rewards/rejected": 0.0007391137769445777, "step": 50 }, { "epoch": 0.0, "learning_rate": 3.591094086665071e-08, "logits/chosen": -3.078068256378174, "logits/rejected": -3.0241925716400146, "logps/chosen": -61.52326202392578, "logps/rejected": -40.074649810791016, "loss": 0.6933, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0002323064545635134, "rewards/margins": -0.0002680778852663934, "rewards/rejected": 0.0005003843107260764, "step": 60 }, { "epoch": 0.0, "learning_rate": 4.1896097677759165e-08, "logits/chosen": -3.0115699768066406, "logits/rejected": -2.966271162033081, "logps/chosen": -50.087276458740234, "logps/rejected": -38.335906982421875, "loss": 0.693, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.00023007395793683827, "rewards/margins": 0.00015357972006313503, "rewards/rejected": -0.0003836536197923124, "step": 70 }, { "epoch": 0.0, "learning_rate": 4.788125448886761e-08, "logits/chosen": -3.04616117477417, "logits/rejected": -3.0014028549194336, "logps/chosen": -45.337974548339844, "logps/rejected": -37.777408599853516, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.0006883688038215041, "rewards/margins": 0.0006181478966027498, "rewards/rejected": 7.022095815045759e-05, "step": 80 }, { "epoch": 0.0, "learning_rate": 5.386641129997606e-08, "logits/chosen": -3.057298183441162, "logits/rejected": -3.039503574371338, "logps/chosen": -43.8466682434082, "logps/rejected": -37.9150276184082, "loss": 0.6927, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.001003199489787221, "rewards/margins": 0.0013734169770032167, "rewards/rejected": -0.00037021731259301305, "step": 90 }, { "epoch": 0.0, "learning_rate": 5.985156811108451e-08, "logits/chosen": -3.083526134490967, "logits/rejected": -3.0499730110168457, "logps/chosen": -57.14862060546875, "logps/rejected": -38.714866638183594, "loss": 0.6929, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0004728373605757952, "rewards/margins": 0.0003573312424123287, "rewards/rejected": 0.00011550616181921214, "step": 100 }, { "epoch": 0.0, "learning_rate": 6.583672492219297e-08, "logits/chosen": -3.0298755168914795, "logits/rejected": -2.9800007343292236, "logps/chosen": -48.36798095703125, "logps/rejected": -38.29817581176758, "loss": 0.6931, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.00023258686996996403, "rewards/margins": 0.0005147275514900684, "rewards/rejected": -0.0002821407688315958, "step": 110 }, { "epoch": 0.0, "learning_rate": 7.182188173330142e-08, "logits/chosen": -3.043761730194092, "logits/rejected": -3.033459424972534, "logps/chosen": -42.07256317138672, "logps/rejected": -38.03791809082031, "loss": 0.693, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0005167398485355079, "rewards/margins": 0.0004588412994053215, "rewards/rejected": 5.7898541854228824e-05, "step": 120 }, { "epoch": 0.0, "learning_rate": 7.780703854440987e-08, "logits/chosen": -3.0815553665161133, "logits/rejected": -3.055021286010742, "logps/chosen": -42.96208953857422, "logps/rejected": -38.19512939453125, "loss": 0.6927, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.00030453395447693765, "rewards/margins": 0.0005212526302784681, "rewards/rejected": -0.0008257866138592362, "step": 130 }, { "epoch": 0.0, "learning_rate": 8.379219535551833e-08, "logits/chosen": -3.0571625232696533, "logits/rejected": -3.0482616424560547, "logps/chosen": -35.88307571411133, "logps/rejected": -36.79515838623047, "loss": 0.6927, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 3.2189363992074504e-05, "rewards/margins": 0.0017573032528162003, "rewards/rejected": -0.0017251137178391218, "step": 140 }, { "epoch": 0.0, "learning_rate": 8.977735216662676e-08, "logits/chosen": -3.0584263801574707, "logits/rejected": -3.0264885425567627, "logps/chosen": -41.21123123168945, "logps/rejected": -38.426239013671875, "loss": 0.6929, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0011655932758003473, "rewards/margins": -1.7296779333264567e-05, "rewards/rejected": -0.0011482962872833014, "step": 150 }, { "epoch": 0.0, "learning_rate": 9.576250897773522e-08, "logits/chosen": -3.087806224822998, "logits/rejected": -3.0528178215026855, "logps/chosen": -53.61150360107422, "logps/rejected": -39.902442932128906, "loss": 0.6927, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0002896928635891527, "rewards/margins": 0.0006838254630565643, "rewards/rejected": -0.0009735182975418866, "step": 160 }, { "epoch": 0.0, "learning_rate": 1.0174766578884368e-07, "logits/chosen": -3.0734519958496094, "logits/rejected": -3.0308375358581543, "logps/chosen": -56.85178756713867, "logps/rejected": -39.09946823120117, "loss": 0.6926, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -8.751298446441069e-05, "rewards/margins": 0.0003826694446615875, "rewards/rejected": -0.000470182450953871, "step": 170 }, { "epoch": 0.0, "learning_rate": 1.0773282259995212e-07, "logits/chosen": -3.0535778999328613, "logits/rejected": -3.0448131561279297, "logps/chosen": -43.86568069458008, "logps/rejected": -38.038578033447266, "loss": 0.6921, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0001562252000439912, "rewards/margins": 0.0021981659810990095, "rewards/rejected": -0.0023543909192085266, "step": 180 }, { "epoch": 0.0, "learning_rate": 1.1371797941106058e-07, "logits/chosen": -3.060574531555176, "logits/rejected": -3.0336670875549316, "logps/chosen": -47.913780212402344, "logps/rejected": -40.20252227783203, "loss": 0.6927, "rewards/accuracies": 0.5, "rewards/chosen": -0.0006726656574755907, "rewards/margins": -4.3593463487923145e-05, "rewards/rejected": -0.0006290721939876676, "step": 190 }, { "epoch": 0.0, "learning_rate": 1.1970313622216903e-07, "logits/chosen": -3.0766069889068604, "logits/rejected": -3.056199550628662, "logps/chosen": -53.416534423828125, "logps/rejected": -39.05076217651367, "loss": 0.6922, "rewards/accuracies": 0.75, "rewards/chosen": -0.0009806986199691892, "rewards/margins": 0.0019876775331795216, "rewards/rejected": -0.002968376036733389, "step": 200 }, { "epoch": 0.0, "learning_rate": 1.2568829303327747e-07, "logits/chosen": -3.0950303077697754, "logits/rejected": -3.041083335876465, "logps/chosen": -48.25699234008789, "logps/rejected": -40.36809539794922, "loss": 0.6913, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.00016922382928896695, "rewards/margins": 0.0038205154705792665, "rewards/rejected": -0.003989739343523979, "step": 210 }, { "epoch": 0.0, "learning_rate": 1.3167344984438595e-07, "logits/chosen": -3.0802948474884033, "logits/rejected": -3.052696704864502, "logps/chosen": -52.03981399536133, "logps/rejected": -39.190093994140625, "loss": 0.6915, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.00042807860882021487, "rewards/margins": 0.002897277008742094, "rewards/rejected": -0.0033253554720431566, "step": 220 }, { "epoch": 0.0, "learning_rate": 1.376586066554944e-07, "logits/chosen": -3.069141387939453, "logits/rejected": -3.05415940284729, "logps/chosen": -41.10334014892578, "logps/rejected": -39.70377731323242, "loss": 0.6911, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0009990587132051587, "rewards/margins": 0.003824639366939664, "rewards/rejected": -0.004823697730898857, "step": 230 }, { "epoch": 0.0, "learning_rate": 1.4364376346660284e-07, "logits/chosen": -3.0693342685699463, "logits/rejected": -3.0305442810058594, "logps/chosen": -43.16814041137695, "logps/rejected": -38.5555305480957, "loss": 0.6904, "rewards/accuracies": 1.0, "rewards/chosen": -5.438993684947491e-05, "rewards/margins": 0.005840381141752005, "rewards/rejected": -0.005894770845770836, "step": 240 }, { "epoch": 0.0, "learning_rate": 1.496289202777113e-07, "logits/chosen": -3.067919969558716, "logits/rejected": -3.0440850257873535, "logps/chosen": -40.2978515625, "logps/rejected": -38.00495147705078, "loss": 0.6914, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.000789571728091687, "rewards/margins": 0.004220838658511639, "rewards/rejected": -0.005010410211980343, "step": 250 }, { "epoch": 0.0, "learning_rate": 1.5561407708881974e-07, "logits/chosen": -3.06196928024292, "logits/rejected": -3.031461715698242, "logps/chosen": -47.11362838745117, "logps/rejected": -41.57375717163086, "loss": 0.6902, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0001270122593268752, "rewards/margins": 0.005970128811895847, "rewards/rejected": -0.006097140721976757, "step": 260 }, { "epoch": 0.0, "learning_rate": 1.615992338999282e-07, "logits/chosen": -3.025503635406494, "logits/rejected": -2.9929351806640625, "logps/chosen": -46.321903228759766, "logps/rejected": -39.075592041015625, "loss": 0.6897, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0005372153245843947, "rewards/margins": 0.007370346691459417, "rewards/rejected": -0.007907562889158726, "step": 270 }, { "epoch": 0.0, "learning_rate": 1.6758439071103666e-07, "logits/chosen": -3.0878207683563232, "logits/rejected": -3.037193775177002, "logps/chosen": -57.6303596496582, "logps/rejected": -43.522804260253906, "loss": 0.6903, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0007219409453682601, "rewards/margins": 0.0054283542558550835, "rewards/rejected": -0.006150295492261648, "step": 280 }, { "epoch": 0.0, "learning_rate": 1.7356954752214508e-07, "logits/chosen": -3.062349557876587, "logits/rejected": -3.051316022872925, "logps/chosen": -45.62682342529297, "logps/rejected": -38.98369216918945, "loss": 0.6893, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0008811626466922462, "rewards/margins": 0.007509407587349415, "rewards/rejected": -0.008390570059418678, "step": 290 }, { "epoch": 0.0, "learning_rate": 1.7955470433325353e-07, "logits/chosen": -3.0713284015655518, "logits/rejected": -3.0194857120513916, "logps/chosen": -46.86389923095703, "logps/rejected": -39.8828125, "loss": 0.6888, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0007131118327379227, "rewards/margins": 0.007476303726434708, "rewards/rejected": -0.00818941555917263, "step": 300 }, { "epoch": 0.0, "learning_rate": 1.85539861144362e-07, "logits/chosen": -3.029153347015381, "logits/rejected": -2.971494197845459, "logps/chosen": -46.42670440673828, "logps/rejected": -41.88848876953125, "loss": 0.6882, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.307934068492614e-05, "rewards/margins": 0.010388645343482494, "rewards/rejected": -0.010431724600493908, "step": 310 }, { "epoch": 0.0, "learning_rate": 1.9152501795547045e-07, "logits/chosen": -3.0696699619293213, "logits/rejected": -3.0218729972839355, "logps/chosen": -52.293304443359375, "logps/rejected": -41.09632110595703, "loss": 0.6875, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0006218260969035327, "rewards/margins": 0.010064353235065937, "rewards/rejected": -0.010686179623007774, "step": 320 }, { "epoch": 0.0, "learning_rate": 1.9751017476657892e-07, "logits/chosen": -3.0646018981933594, "logits/rejected": -3.021247148513794, "logps/chosen": -44.57966995239258, "logps/rejected": -39.92510223388672, "loss": 0.6879, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0014626074116677046, "rewards/margins": 0.009476480074226856, "rewards/rejected": -0.010939085856080055, "step": 330 }, { "epoch": 0.0, "learning_rate": 2.0349533157768737e-07, "logits/chosen": -3.0439929962158203, "logits/rejected": -3.0266940593719482, "logps/chosen": -44.40955352783203, "logps/rejected": -39.89966583251953, "loss": 0.6865, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.001486309920437634, "rewards/margins": 0.011990653350949287, "rewards/rejected": -0.013476962223649025, "step": 340 }, { "epoch": 0.0, "learning_rate": 2.094804883887958e-07, "logits/chosen": -3.03920316696167, "logits/rejected": -3.0073885917663574, "logps/chosen": -48.57918930053711, "logps/rejected": -40.43183517456055, "loss": 0.6864, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0006895284168422222, "rewards/margins": 0.015030157752335072, "rewards/rejected": -0.015719685703516006, "step": 350 }, { "epoch": 0.0, "learning_rate": 2.1546564519990424e-07, "logits/chosen": -3.0667834281921387, "logits/rejected": -3.027765989303589, "logps/chosen": -47.616943359375, "logps/rejected": -39.41626739501953, "loss": 0.6865, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.001487168250605464, "rewards/margins": 0.01337848138064146, "rewards/rejected": -0.014865649864077568, "step": 360 }, { "epoch": 0.0, "learning_rate": 2.214508020110127e-07, "logits/chosen": -3.0808329582214355, "logits/rejected": -3.0574917793273926, "logps/chosen": -45.94842529296875, "logps/rejected": -39.347312927246094, "loss": 0.6835, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0007894297013990581, "rewards/margins": 0.018994133919477463, "rewards/rejected": -0.019783565774559975, "step": 370 }, { "epoch": 0.0, "learning_rate": 2.2743595882212116e-07, "logits/chosen": -3.0505480766296387, "logits/rejected": -3.0201103687286377, "logps/chosen": -52.86272430419922, "logps/rejected": -43.470436096191406, "loss": 0.6848, "rewards/accuracies": 1.0, "rewards/chosen": 0.0007727337069809437, "rewards/margins": 0.01653381437063217, "rewards/rejected": -0.015761079266667366, "step": 380 }, { "epoch": 0.0, "learning_rate": 2.334211156332296e-07, "logits/chosen": -3.050389051437378, "logits/rejected": -3.0223817825317383, "logps/chosen": -43.48157501220703, "logps/rejected": -39.71511459350586, "loss": 0.6826, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.000983349746093154, "rewards/margins": 0.01970702037215233, "rewards/rejected": -0.020690370351076126, "step": 390 }, { "epoch": 0.0, "learning_rate": 2.3940627244433805e-07, "logits/chosen": -3.050997018814087, "logits/rejected": -3.043797254562378, "logps/chosen": -44.695518493652344, "logps/rejected": -41.81352233886719, "loss": 0.6823, "rewards/accuracies": 1.0, "rewards/chosen": -5.669577603839571e-06, "rewards/margins": 0.026016298681497574, "rewards/rejected": -0.026021966710686684, "step": 400 }, { "epoch": 0.0, "learning_rate": 2.453914292554465e-07, "logits/chosen": -3.0696067810058594, "logits/rejected": -3.0326802730560303, "logps/chosen": -44.31527328491211, "logps/rejected": -41.38368606567383, "loss": 0.6829, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0020145520102232695, "rewards/margins": 0.015384973958134651, "rewards/rejected": -0.01739952526986599, "step": 410 }, { "epoch": 0.01, "learning_rate": 2.5137658606655495e-07, "logits/chosen": -3.0291175842285156, "logits/rejected": -2.98738694190979, "logps/chosen": -53.09520721435547, "logps/rejected": -41.37700271606445, "loss": 0.68, "rewards/accuracies": 1.0, "rewards/chosen": 0.00026987079763785005, "rewards/margins": 0.02875000238418579, "rewards/rejected": -0.028480133041739464, "step": 420 }, { "epoch": 0.01, "learning_rate": 2.573617428776634e-07, "logits/chosen": -3.0701637268066406, "logits/rejected": -3.055311441421509, "logps/chosen": -44.47490310668945, "logps/rejected": -40.76319122314453, "loss": 0.6788, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.146776700508781e-05, "rewards/margins": 0.028928378596901894, "rewards/rejected": -0.028969844803214073, "step": 430 }, { "epoch": 0.01, "learning_rate": 2.633468996887719e-07, "logits/chosen": -3.1091842651367188, "logits/rejected": -3.066499710083008, "logps/chosen": -48.33821105957031, "logps/rejected": -40.83446502685547, "loss": 0.6781, "rewards/accuracies": 1.0, "rewards/chosen": 0.002302915556356311, "rewards/margins": 0.029928486794233322, "rewards/rejected": -0.027625570073723793, "step": 440 }, { "epoch": 0.01, "learning_rate": 2.6933205649988034e-07, "logits/chosen": -3.014129638671875, "logits/rejected": -3.0047144889831543, "logps/chosen": -40.64393615722656, "logps/rejected": -42.63142395019531, "loss": 0.6768, "rewards/accuracies": 1.0, "rewards/chosen": 0.002194702159613371, "rewards/margins": 0.03960654139518738, "rewards/rejected": -0.03741184622049332, "step": 450 }, { "epoch": 0.01, "learning_rate": 2.753172133109888e-07, "logits/chosen": -3.0689311027526855, "logits/rejected": -3.0351548194885254, "logps/chosen": -45.153297424316406, "logps/rejected": -40.23430633544922, "loss": 0.678, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.0007803267799317837, "rewards/margins": 0.025532295927405357, "rewards/rejected": -0.024751970544457436, "step": 460 }, { "epoch": 0.01, "learning_rate": 2.813023701220972e-07, "logits/chosen": -3.1038196086883545, "logits/rejected": -3.0602777004241943, "logps/chosen": -45.98931121826172, "logps/rejected": -39.04833984375, "loss": 0.6773, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0008036975050345063, "rewards/margins": 0.02913764677941799, "rewards/rejected": -0.029941344633698463, "step": 470 }, { "epoch": 0.01, "learning_rate": 2.872875269332057e-07, "logits/chosen": -3.080728054046631, "logits/rejected": -3.037168025970459, "logps/chosen": -51.12994384765625, "logps/rejected": -43.45133590698242, "loss": 0.6745, "rewards/accuracies": 1.0, "rewards/chosen": 0.0050366963259875774, "rewards/margins": 0.03726188465952873, "rewards/rejected": -0.03222518786787987, "step": 480 }, { "epoch": 0.01, "learning_rate": 2.9327268374431413e-07, "logits/chosen": -3.016801357269287, "logits/rejected": -3.0151047706604004, "logps/chosen": -38.281272888183594, "logps/rejected": -40.763519287109375, "loss": 0.6711, "rewards/accuracies": 1.0, "rewards/chosen": 0.008629046380519867, "rewards/margins": 0.05651744082570076, "rewards/rejected": -0.047888390719890594, "step": 490 }, { "epoch": 0.01, "learning_rate": 2.992578405554226e-07, "logits/chosen": -3.0363667011260986, "logits/rejected": -2.998591423034668, "logps/chosen": -39.72534942626953, "logps/rejected": -44.690093994140625, "loss": 0.6676, "rewards/accuracies": 1.0, "rewards/chosen": 0.007590795401483774, "rewards/margins": 0.052827585488557816, "rewards/rejected": -0.04523678869009018, "step": 500 }, { "epoch": 0.01, "learning_rate": 3.0524299736653103e-07, "logits/chosen": -3.0311942100524902, "logits/rejected": -2.9783129692077637, "logps/chosen": -54.08019256591797, "logps/rejected": -44.83513259887695, "loss": 0.6686, "rewards/accuracies": 1.0, "rewards/chosen": 0.009198513813316822, "rewards/margins": 0.046367499977350235, "rewards/rejected": -0.03716898709535599, "step": 510 }, { "epoch": 0.01, "learning_rate": 3.112281541776395e-07, "logits/chosen": -3.03159761428833, "logits/rejected": -3.0085768699645996, "logps/chosen": -44.25, "logps/rejected": -43.622440338134766, "loss": 0.6679, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.008906746283173561, "rewards/margins": 0.04097289592027664, "rewards/rejected": -0.03206615149974823, "step": 520 }, { "epoch": 0.01, "learning_rate": 3.172133109887479e-07, "logits/chosen": -3.0714640617370605, "logits/rejected": -3.03821063041687, "logps/chosen": -53.8411750793457, "logps/rejected": -44.737648010253906, "loss": 0.6652, "rewards/accuracies": 1.0, "rewards/chosen": 0.01527607161551714, "rewards/margins": 0.05278782919049263, "rewards/rejected": -0.037511758506298065, "step": 530 }, { "epoch": 0.01, "learning_rate": 3.231984677998564e-07, "logits/chosen": -3.042795181274414, "logits/rejected": -3.0434913635253906, "logps/chosen": -29.37396240234375, "logps/rejected": -42.14738082885742, "loss": 0.6536, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.025913873687386513, "rewards/margins": 0.09100977331399918, "rewards/rejected": -0.06509589403867722, "step": 540 }, { "epoch": 0.01, "learning_rate": 3.291836246109648e-07, "logits/chosen": -3.076864719390869, "logits/rejected": -3.0458080768585205, "logps/chosen": -46.40869903564453, "logps/rejected": -45.399864196777344, "loss": 0.6651, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.022049803286790848, "rewards/margins": 0.0622982494533062, "rewards/rejected": -0.04024844616651535, "step": 550 }, { "epoch": 0.01, "learning_rate": 3.351687814220733e-07, "logits/chosen": -3.0634658336639404, "logits/rejected": -3.040860652923584, "logps/chosen": -51.73223876953125, "logps/rejected": -42.33665084838867, "loss": 0.6593, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.02389584481716156, "rewards/margins": 0.059576939791440964, "rewards/rejected": -0.035681094974279404, "step": 560 }, { "epoch": 0.01, "learning_rate": 3.4115393823318176e-07, "logits/chosen": -3.0846035480499268, "logits/rejected": -3.064783811569214, "logps/chosen": -40.86759567260742, "logps/rejected": -45.03015899658203, "loss": 0.6574, "rewards/accuracies": 1.0, "rewards/chosen": 0.0329059436917305, "rewards/margins": 0.08609328418970108, "rewards/rejected": -0.05318734049797058, "step": 570 }, { "epoch": 0.01, "learning_rate": 3.4713909504429016e-07, "logits/chosen": -3.0569701194763184, "logits/rejected": -3.0224270820617676, "logps/chosen": -36.889869689941406, "logps/rejected": -41.86185073852539, "loss": 0.6477, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.04657677561044693, "rewards/margins": 0.09229361265897751, "rewards/rejected": -0.04571682959794998, "step": 580 }, { "epoch": 0.01, "learning_rate": 3.5312425185539866e-07, "logits/chosen": -3.0904345512390137, "logits/rejected": -3.0370583534240723, "logps/chosen": -55.892059326171875, "logps/rejected": -43.979827880859375, "loss": 0.6569, "rewards/accuracies": 1.0, "rewards/chosen": 0.030034217983484268, "rewards/margins": 0.06258118897676468, "rewards/rejected": -0.03254697471857071, "step": 590 }, { "epoch": 0.01, "learning_rate": 3.5910940866650705e-07, "logits/chosen": -3.0653438568115234, "logits/rejected": -3.0586094856262207, "logps/chosen": -42.05280303955078, "logps/rejected": -42.39287567138672, "loss": 0.6461, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.04196373000741005, "rewards/margins": 0.08177045732736588, "rewards/rejected": -0.03980673477053642, "step": 600 }, { "epoch": 0.01, "learning_rate": 3.6509456547761555e-07, "logits/chosen": -3.1048097610473633, "logits/rejected": -3.059645414352417, "logps/chosen": -39.402549743652344, "logps/rejected": -44.28131866455078, "loss": 0.6477, "rewards/accuracies": 1.0, "rewards/chosen": 0.053274523466825485, "rewards/margins": 0.097306028008461, "rewards/rejected": -0.04403150454163551, "step": 610 }, { "epoch": 0.01, "learning_rate": 3.71079722288724e-07, "logits/chosen": -3.068524122238159, "logits/rejected": -3.040530204772949, "logps/chosen": -40.382450103759766, "logps/rejected": -42.73887252807617, "loss": 0.6418, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.056703340262174606, "rewards/margins": 0.10160742700099945, "rewards/rejected": -0.044904083013534546, "step": 620 }, { "epoch": 0.01, "learning_rate": 3.770648790998324e-07, "logits/chosen": -3.070568799972534, "logits/rejected": -3.0271835327148438, "logps/chosen": -42.476524353027344, "logps/rejected": -43.338348388671875, "loss": 0.6364, "rewards/accuracies": 1.0, "rewards/chosen": 0.06348707526922226, "rewards/margins": 0.11451394855976105, "rewards/rejected": -0.05102687329053879, "step": 630 }, { "epoch": 0.01, "learning_rate": 3.830500359109409e-07, "logits/chosen": -3.03696870803833, "logits/rejected": -3.0228168964385986, "logps/chosen": -39.56981658935547, "logps/rejected": -45.16889953613281, "loss": 0.6379, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06646004319190979, "rewards/margins": 0.12737946212291718, "rewards/rejected": -0.06091942638158798, "step": 640 }, { "epoch": 0.01, "learning_rate": 3.8903519272204934e-07, "logits/chosen": -3.0467474460601807, "logits/rejected": -2.9965434074401855, "logps/chosen": -42.86086654663086, "logps/rejected": -45.5021858215332, "loss": 0.6266, "rewards/accuracies": 1.0, "rewards/chosen": 0.06629970669746399, "rewards/margins": 0.13144299387931824, "rewards/rejected": -0.06514328718185425, "step": 650 }, { "epoch": 0.01, "learning_rate": 3.9502034953315784e-07, "logits/chosen": -3.051576852798462, "logits/rejected": -3.031867027282715, "logps/chosen": -38.29902267456055, "logps/rejected": -43.266822814941406, "loss": 0.6467, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.06907321512699127, "rewards/margins": 0.12275753170251846, "rewards/rejected": -0.05368432402610779, "step": 660 }, { "epoch": 0.01, "learning_rate": 4.0100550634426624e-07, "logits/chosen": -3.058424711227417, "logits/rejected": -3.0298609733581543, "logps/chosen": -37.182701110839844, "logps/rejected": -46.32708740234375, "loss": 0.6223, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.07571291923522949, "rewards/margins": 0.15400275588035583, "rewards/rejected": -0.07828985154628754, "step": 670 }, { "epoch": 0.01, "learning_rate": 4.0699066315537474e-07, "logits/chosen": -3.059101104736328, "logits/rejected": -3.0390915870666504, "logps/chosen": -36.58732604980469, "logps/rejected": -47.50157165527344, "loss": 0.6348, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.06443629413843155, "rewards/margins": 0.13398975133895874, "rewards/rejected": -0.06955345720052719, "step": 680 }, { "epoch": 0.01, "learning_rate": 4.1297581996648313e-07, "logits/chosen": -3.0290112495422363, "logits/rejected": -3.014650583267212, "logps/chosen": -33.617591857910156, "logps/rejected": -45.96084976196289, "loss": 0.6195, "rewards/accuracies": 1.0, "rewards/chosen": 0.08976264297962189, "rewards/margins": 0.17355456948280334, "rewards/rejected": -0.08379192650318146, "step": 690 }, { "epoch": 0.01, "learning_rate": 4.189609767775916e-07, "logits/chosen": -3.09535551071167, "logits/rejected": -3.0826432704925537, "logps/chosen": -45.53787612915039, "logps/rejected": -47.43280792236328, "loss": 0.6248, "rewards/accuracies": 1.0, "rewards/chosen": 0.07640780508518219, "rewards/margins": 0.15367943048477173, "rewards/rejected": -0.07727161049842834, "step": 700 }, { "epoch": 0.01, "learning_rate": 4.249461335887001e-07, "logits/chosen": -3.0496697425842285, "logits/rejected": -3.0431792736053467, "logps/chosen": -34.719356536865234, "logps/rejected": -47.756683349609375, "loss": 0.6169, "rewards/accuracies": 1.0, "rewards/chosen": 0.09151086211204529, "rewards/margins": 0.18983399868011475, "rewards/rejected": -0.09832314401865005, "step": 710 }, { "epoch": 0.01, "learning_rate": 4.309312903998085e-07, "logits/chosen": -3.069301128387451, "logits/rejected": -3.033290147781372, "logps/chosen": -28.983383178710938, "logps/rejected": -46.136253356933594, "loss": 0.6254, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.07928386330604553, "rewards/margins": 0.16535212099552155, "rewards/rejected": -0.08606826514005661, "step": 720 }, { "epoch": 0.01, "learning_rate": 4.36916447210917e-07, "logits/chosen": -3.040623903274536, "logits/rejected": -3.025052309036255, "logps/chosen": -42.511756896972656, "logps/rejected": -44.51036071777344, "loss": 0.6219, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.06081690639257431, "rewards/margins": 0.12814059853553772, "rewards/rejected": -0.0673237070441246, "step": 730 }, { "epoch": 0.01, "learning_rate": 4.429016040220254e-07, "logits/chosen": -3.063878297805786, "logits/rejected": -3.024207353591919, "logps/chosen": -49.46342468261719, "logps/rejected": -50.362979888916016, "loss": 0.6205, "rewards/accuracies": 1.0, "rewards/chosen": 0.07308027148246765, "rewards/margins": 0.15449559688568115, "rewards/rejected": -0.0814153254032135, "step": 740 }, { "epoch": 0.01, "learning_rate": 4.488867608331338e-07, "logits/chosen": -3.0555903911590576, "logits/rejected": -3.018319606781006, "logps/chosen": -39.66136932373047, "logps/rejected": -48.467132568359375, "loss": 0.6249, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06573520600795746, "rewards/margins": 0.14521418511867523, "rewards/rejected": -0.07947897166013718, "step": 750 }, { "epoch": 0.01, "learning_rate": 4.548719176442423e-07, "logits/chosen": -3.056149959564209, "logits/rejected": -3.025991916656494, "logps/chosen": -36.454368591308594, "logps/rejected": -51.024208068847656, "loss": 0.5957, "rewards/accuracies": 1.0, "rewards/chosen": 0.10081852972507477, "rewards/margins": 0.2159457951784134, "rewards/rejected": -0.11512728780508041, "step": 760 }, { "epoch": 0.01, "learning_rate": 4.6085707445535076e-07, "logits/chosen": -3.0364956855773926, "logits/rejected": -3.022693157196045, "logps/chosen": -43.356956481933594, "logps/rejected": -46.77887725830078, "loss": 0.6265, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.06869597733020782, "rewards/margins": 0.13210973143577576, "rewards/rejected": -0.06341375410556793, "step": 770 }, { "epoch": 0.01, "learning_rate": 4.668422312664592e-07, "logits/chosen": -3.040861129760742, "logits/rejected": -3.0171220302581787, "logps/chosen": -34.19493865966797, "logps/rejected": -46.00983810424805, "loss": 0.6148, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.09458598494529724, "rewards/margins": 0.18443696200847626, "rewards/rejected": -0.08985097706317902, "step": 780 }, { "epoch": 0.01, "learning_rate": 4.7282738807756766e-07, "logits/chosen": -3.0573570728302, "logits/rejected": -3.0035769939422607, "logps/chosen": -42.471824645996094, "logps/rejected": -49.69591522216797, "loss": 0.6105, "rewards/accuracies": 1.0, "rewards/chosen": 0.07773848623037338, "rewards/margins": 0.17234459519386292, "rewards/rejected": -0.09460610151290894, "step": 790 }, { "epoch": 0.01, "learning_rate": 4.788125448886761e-07, "logits/chosen": -3.0655667781829834, "logits/rejected": -3.0600199699401855, "logps/chosen": -23.766862869262695, "logps/rejected": -50.514991760253906, "loss": 0.5888, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1253340095281601, "rewards/margins": 0.2686927914619446, "rewards/rejected": -0.14335878193378448, "step": 800 }, { "epoch": 0.01, "learning_rate": 4.847977016997846e-07, "logits/chosen": -3.0906805992126465, "logits/rejected": -3.0569374561309814, "logps/chosen": -41.77620315551758, "logps/rejected": -48.903446197509766, "loss": 0.6006, "rewards/accuracies": 1.0, "rewards/chosen": 0.08543531596660614, "rewards/margins": 0.17225661873817444, "rewards/rejected": -0.0868213102221489, "step": 810 }, { "epoch": 0.01, "learning_rate": 4.90782858510893e-07, "logits/chosen": -3.070127010345459, "logits/rejected": -3.024625778198242, "logps/chosen": -30.268680572509766, "logps/rejected": -47.705284118652344, "loss": 0.5969, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.10624829679727554, "rewards/margins": 0.21394133567810059, "rewards/rejected": -0.10769303143024445, "step": 820 }, { "epoch": 0.01, "learning_rate": 4.967680153220014e-07, "logits/chosen": -3.049079418182373, "logits/rejected": -3.025026798248291, "logps/chosen": -36.48975372314453, "logps/rejected": -51.082008361816406, "loss": 0.6003, "rewards/accuracies": 1.0, "rewards/chosen": 0.10956071317195892, "rewards/margins": 0.23226508498191833, "rewards/rejected": -0.12270437180995941, "step": 830 }, { "epoch": 0.01, "learning_rate": 5.027531721331099e-07, "logits/chosen": -3.0775182247161865, "logits/rejected": -3.0177299976348877, "logps/chosen": -46.856170654296875, "logps/rejected": -50.55313491821289, "loss": 0.5892, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.10196463018655777, "rewards/margins": 0.20978756248950958, "rewards/rejected": -0.10782293975353241, "step": 840 }, { "epoch": 0.01, "learning_rate": 5.087383289442184e-07, "logits/chosen": -3.060534954071045, "logits/rejected": -3.029937267303467, "logps/chosen": -29.695724487304688, "logps/rejected": -50.79944610595703, "loss": 0.5761, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.13228335976600647, "rewards/margins": 0.27330833673477173, "rewards/rejected": -0.14102497696876526, "step": 850 }, { "epoch": 0.01, "learning_rate": 5.147234857553268e-07, "logits/chosen": -3.0415148735046387, "logits/rejected": -3.010655641555786, "logps/chosen": -37.47358322143555, "logps/rejected": -54.069496154785156, "loss": 0.5816, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.12295367568731308, "rewards/margins": 0.2581068277359009, "rewards/rejected": -0.1351531744003296, "step": 860 }, { "epoch": 0.01, "learning_rate": 5.207086425664353e-07, "logits/chosen": -3.0554792881011963, "logits/rejected": -3.017611265182495, "logps/chosen": -45.94888687133789, "logps/rejected": -53.275718688964844, "loss": 0.5878, "rewards/accuracies": 1.0, "rewards/chosen": 0.12386427819728851, "rewards/margins": 0.2650128901004791, "rewards/rejected": -0.1411486119031906, "step": 870 }, { "epoch": 0.01, "learning_rate": 5.266937993775438e-07, "logits/chosen": -3.0688371658325195, "logits/rejected": -3.0445075035095215, "logps/chosen": -30.9885311126709, "logps/rejected": -54.10710906982422, "loss": 0.5648, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.12948963046073914, "rewards/margins": 0.28235018253326416, "rewards/rejected": -0.15286055207252502, "step": 880 }, { "epoch": 0.01, "learning_rate": 5.326789561886521e-07, "logits/chosen": -3.0565788745880127, "logits/rejected": -3.0510525703430176, "logps/chosen": -30.35544776916504, "logps/rejected": -53.6165885925293, "loss": 0.5448, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.1446937620639801, "rewards/margins": 0.2961755394935608, "rewards/rejected": -0.1514817476272583, "step": 890 }, { "epoch": 0.01, "learning_rate": 5.386641129997607e-07, "logits/chosen": -3.0517234802246094, "logits/rejected": -3.0086326599121094, "logps/chosen": -35.527503967285156, "logps/rejected": -55.96788787841797, "loss": 0.5444, "rewards/accuracies": 1.0, "rewards/chosen": 0.15287363529205322, "rewards/margins": 0.31541678309440613, "rewards/rejected": -0.1625431478023529, "step": 900 }, { "epoch": 0.01, "learning_rate": 5.44649269810869e-07, "logits/chosen": -3.0665361881256104, "logits/rejected": -3.0546364784240723, "logps/chosen": -38.30365753173828, "logps/rejected": -54.97893142700195, "loss": 0.5378, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1300448477268219, "rewards/margins": 0.27761873602867126, "rewards/rejected": -0.14757387340068817, "step": 910 }, { "epoch": 0.01, "learning_rate": 5.506344266219776e-07, "logits/chosen": -3.0814459323883057, "logits/rejected": -3.0684499740600586, "logps/chosen": -35.77130126953125, "logps/rejected": -52.33100509643555, "loss": 0.5427, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.14069262146949768, "rewards/margins": 0.2707459330558777, "rewards/rejected": -0.13005331158638, "step": 920 }, { "epoch": 0.01, "learning_rate": 5.56619583433086e-07, "logits/chosen": -3.0720372200012207, "logits/rejected": -3.042334794998169, "logps/chosen": -26.791576385498047, "logps/rejected": -60.46002197265625, "loss": 0.5306, "rewards/accuracies": 1.0, "rewards/chosen": 0.2022971361875534, "rewards/margins": 0.43015336990356445, "rewards/rejected": -0.22785624861717224, "step": 930 }, { "epoch": 0.01, "learning_rate": 5.626047402441944e-07, "logits/chosen": -3.064345598220825, "logits/rejected": -3.0358078479766846, "logps/chosen": -38.13947296142578, "logps/rejected": -53.9814338684082, "loss": 0.5697, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.12011130154132843, "rewards/margins": 0.25653791427612305, "rewards/rejected": -0.13642659783363342, "step": 940 }, { "epoch": 0.01, "learning_rate": 5.685898970553029e-07, "logits/chosen": -3.0750555992126465, "logits/rejected": -3.0118043422698975, "logps/chosen": -52.866912841796875, "logps/rejected": -54.222076416015625, "loss": 0.5712, "rewards/accuracies": 1.0, "rewards/chosen": 0.1489093005657196, "rewards/margins": 0.29169610142707825, "rewards/rejected": -0.14278678596019745, "step": 950 }, { "epoch": 0.01, "learning_rate": 5.745750538664114e-07, "logits/chosen": -3.0563018321990967, "logits/rejected": -2.9834182262420654, "logps/chosen": -41.90294647216797, "logps/rejected": -57.78328323364258, "loss": 0.5682, "rewards/accuracies": 1.0, "rewards/chosen": 0.13170316815376282, "rewards/margins": 0.2958912253379822, "rewards/rejected": -0.16418805718421936, "step": 960 }, { "epoch": 0.01, "learning_rate": 5.805602106775198e-07, "logits/chosen": -3.0611228942871094, "logits/rejected": -3.015380859375, "logps/chosen": -32.2266731262207, "logps/rejected": -58.47663497924805, "loss": 0.5418, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1526782214641571, "rewards/margins": 0.3395887017250061, "rewards/rejected": -0.186910480260849, "step": 970 }, { "epoch": 0.01, "learning_rate": 5.865453674886283e-07, "logits/chosen": -3.0702977180480957, "logits/rejected": -3.0488345623016357, "logps/chosen": -33.09114074707031, "logps/rejected": -57.282958984375, "loss": 0.5443, "rewards/accuracies": 1.0, "rewards/chosen": 0.15677312016487122, "rewards/margins": 0.3453297019004822, "rewards/rejected": -0.18855655193328857, "step": 980 }, { "epoch": 0.01, "learning_rate": 5.925305242997367e-07, "logits/chosen": -3.0452651977539062, "logits/rejected": -3.0180792808532715, "logps/chosen": -23.83493423461914, "logps/rejected": -60.44020462036133, "loss": 0.5189, "rewards/accuracies": 1.0, "rewards/chosen": 0.20305263996124268, "rewards/margins": 0.42455458641052246, "rewards/rejected": -0.22150194644927979, "step": 990 }, { "epoch": 0.01, "learning_rate": 5.985156811108452e-07, "logits/chosen": -3.078676223754883, "logits/rejected": -3.038454294204712, "logps/chosen": -23.274982452392578, "logps/rejected": -61.5517463684082, "loss": 0.5198, "rewards/accuracies": 1.0, "rewards/chosen": 0.20741672813892365, "rewards/margins": 0.4474361538887024, "rewards/rejected": -0.24001941084861755, "step": 1000 }, { "epoch": 0.01, "learning_rate": 6.045008379219536e-07, "logits/chosen": -3.0745961666107178, "logits/rejected": -3.0633013248443604, "logps/chosen": -31.140085220336914, "logps/rejected": -62.353538513183594, "loss": 0.536, "rewards/accuracies": 1.0, "rewards/chosen": 0.17957982420921326, "rewards/margins": 0.40785127878189087, "rewards/rejected": -0.228271484375, "step": 1010 }, { "epoch": 0.01, "learning_rate": 6.104859947330621e-07, "logits/chosen": -3.0601108074188232, "logits/rejected": -3.040628671646118, "logps/chosen": -33.3699951171875, "logps/rejected": -63.12241744995117, "loss": 0.541, "rewards/accuracies": 1.0, "rewards/chosen": 0.16759657859802246, "rewards/margins": 0.3988789916038513, "rewards/rejected": -0.23128239810466766, "step": 1020 }, { "epoch": 0.01, "learning_rate": 6.164711515441705e-07, "logits/chosen": -3.0620529651641846, "logits/rejected": -3.009213447570801, "logps/chosen": -39.624900817871094, "logps/rejected": -61.946380615234375, "loss": 0.5379, "rewards/accuracies": 1.0, "rewards/chosen": 0.1616121381521225, "rewards/margins": 0.37719935178756714, "rewards/rejected": -0.21558721363544464, "step": 1030 }, { "epoch": 0.01, "learning_rate": 6.22456308355279e-07, "logits/chosen": -3.0600571632385254, "logits/rejected": -3.0184173583984375, "logps/chosen": -32.427467346191406, "logps/rejected": -63.000823974609375, "loss": 0.5038, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19694915413856506, "rewards/margins": 0.44695335626602173, "rewards/rejected": -0.2500041425228119, "step": 1040 }, { "epoch": 0.01, "learning_rate": 6.284414651663874e-07, "logits/chosen": -3.058791160583496, "logits/rejected": -2.9884583950042725, "logps/chosen": -45.42835998535156, "logps/rejected": -56.75873565673828, "loss": 0.5462, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.12898126244544983, "rewards/margins": 0.2802557945251465, "rewards/rejected": -0.15127453207969666, "step": 1050 }, { "epoch": 0.01, "learning_rate": 6.344266219774958e-07, "logits/chosen": -3.038623809814453, "logits/rejected": -3.024627685546875, "logps/chosen": -35.500282287597656, "logps/rejected": -57.94489288330078, "loss": 0.5366, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1646728366613388, "rewards/margins": 0.3697344958782196, "rewards/rejected": -0.205061674118042, "step": 1060 }, { "epoch": 0.01, "learning_rate": 6.404117787886043e-07, "logits/chosen": -3.094062566757202, "logits/rejected": -3.0636918544769287, "logps/chosen": -37.601341247558594, "logps/rejected": -57.50353240966797, "loss": 0.5432, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.1301889419555664, "rewards/margins": 0.31970101594924927, "rewards/rejected": -0.18951205909252167, "step": 1070 }, { "epoch": 0.01, "learning_rate": 6.463969355997128e-07, "logits/chosen": -3.0838518142700195, "logits/rejected": -3.0547595024108887, "logps/chosen": -27.672195434570312, "logps/rejected": -66.54283142089844, "loss": 0.5201, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.18061983585357666, "rewards/margins": 0.4491749703884125, "rewards/rejected": -0.2685551643371582, "step": 1080 }, { "epoch": 0.01, "learning_rate": 6.523820924108212e-07, "logits/chosen": -3.0706613063812256, "logits/rejected": -3.02168345451355, "logps/chosen": -39.81911087036133, "logps/rejected": -61.09581756591797, "loss": 0.519, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.14906267821788788, "rewards/margins": 0.37282615900039673, "rewards/rejected": -0.22376349568367004, "step": 1090 }, { "epoch": 0.01, "learning_rate": 6.583672492219296e-07, "logits/chosen": -3.032510280609131, "logits/rejected": -3.0121381282806396, "logps/chosen": -23.061138153076172, "logps/rejected": -66.96807861328125, "loss": 0.4887, "rewards/accuracies": 1.0, "rewards/chosen": 0.2246336191892624, "rewards/margins": 0.5164796113967896, "rewards/rejected": -0.29184603691101074, "step": 1100 }, { "epoch": 0.01, "learning_rate": 6.643524060330381e-07, "logits/chosen": -3.0553297996520996, "logits/rejected": -3.0501956939697266, "logps/chosen": -27.63873863220215, "logps/rejected": -68.8206558227539, "loss": 0.5044, "rewards/accuracies": 1.0, "rewards/chosen": 0.1900712251663208, "rewards/margins": 0.4634842872619629, "rewards/rejected": -0.2734130620956421, "step": 1110 }, { "epoch": 0.01, "learning_rate": 6.703375628441466e-07, "logits/chosen": -3.0761725902557373, "logits/rejected": -3.075741767883301, "logps/chosen": -13.794528007507324, "logps/rejected": -71.43467712402344, "loss": 0.4679, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22197505831718445, "rewards/margins": 0.5640290379524231, "rewards/rejected": -0.34205397963523865, "step": 1120 }, { "epoch": 0.01, "learning_rate": 6.763227196552551e-07, "logits/chosen": -3.03470778465271, "logits/rejected": -3.0037264823913574, "logps/chosen": -29.696575164794922, "logps/rejected": -67.93940734863281, "loss": 0.4839, "rewards/accuracies": 1.0, "rewards/chosen": 0.18978071212768555, "rewards/margins": 0.46764445304870605, "rewards/rejected": -0.2778637707233429, "step": 1130 }, { "epoch": 0.01, "learning_rate": 6.823078764663635e-07, "logits/chosen": -3.0582242012023926, "logits/rejected": -3.0273473262786865, "logps/chosen": -17.898174285888672, "logps/rejected": -71.94837188720703, "loss": 0.4614, "rewards/accuracies": 1.0, "rewards/chosen": 0.22596609592437744, "rewards/margins": 0.566737949848175, "rewards/rejected": -0.34077176451683044, "step": 1140 }, { "epoch": 0.01, "learning_rate": 6.882930332774719e-07, "logits/chosen": -3.077049732208252, "logits/rejected": -3.0282790660858154, "logps/chosen": -33.447593688964844, "logps/rejected": -71.80250549316406, "loss": 0.4779, "rewards/accuracies": 1.0, "rewards/chosen": 0.1845531016588211, "rewards/margins": 0.4910987913608551, "rewards/rejected": -0.30654576420783997, "step": 1150 }, { "epoch": 0.01, "learning_rate": 6.942781900885803e-07, "logits/chosen": -3.0560574531555176, "logits/rejected": -3.0242104530334473, "logps/chosen": -29.094074249267578, "logps/rejected": -71.3777847290039, "loss": 0.4778, "rewards/accuracies": 1.0, "rewards/chosen": 0.20712968707084656, "rewards/margins": 0.5297130346298218, "rewards/rejected": -0.32258328795433044, "step": 1160 }, { "epoch": 0.01, "learning_rate": 7.002633468996889e-07, "logits/chosen": -3.0437588691711426, "logits/rejected": -2.9880194664001465, "logps/chosen": -35.666419982910156, "logps/rejected": -68.55256652832031, "loss": 0.4987, "rewards/accuracies": 1.0, "rewards/chosen": 0.17023703455924988, "rewards/margins": 0.43436169624328613, "rewards/rejected": -0.26412469148635864, "step": 1170 }, { "epoch": 0.01, "learning_rate": 7.062485037107973e-07, "logits/chosen": -3.0988271236419678, "logits/rejected": -3.084030866622925, "logps/chosen": -24.17633819580078, "logps/rejected": -75.7172622680664, "loss": 0.4384, "rewards/accuracies": 1.0, "rewards/chosen": 0.21935348212718964, "rewards/margins": 0.5856934785842896, "rewards/rejected": -0.3663399815559387, "step": 1180 }, { "epoch": 0.01, "learning_rate": 7.122336605219058e-07, "logits/chosen": -3.038661479949951, "logits/rejected": -3.0142147541046143, "logps/chosen": -23.507816314697266, "logps/rejected": -71.63668823242188, "loss": 0.4836, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19414828717708588, "rewards/margins": 0.5257659554481506, "rewards/rejected": -0.33161771297454834, "step": 1190 }, { "epoch": 0.01, "learning_rate": 7.182188173330141e-07, "logits/chosen": -3.0395917892456055, "logits/rejected": -3.0183262825012207, "logps/chosen": -16.773101806640625, "logps/rejected": -76.88715362548828, "loss": 0.4467, "rewards/accuracies": 1.0, "rewards/chosen": 0.22862152755260468, "rewards/margins": 0.6243677139282227, "rewards/rejected": -0.3957461714744568, "step": 1200 }, { "epoch": 0.01, "learning_rate": 7.242039741441226e-07, "logits/chosen": -3.105476140975952, "logits/rejected": -3.0567874908447266, "logps/chosen": -24.28872299194336, "logps/rejected": -72.94276428222656, "loss": 0.4607, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.18843722343444824, "rewards/margins": 0.529924213886261, "rewards/rejected": -0.34148696064949036, "step": 1210 }, { "epoch": 0.01, "learning_rate": 7.301891309552311e-07, "logits/chosen": -3.0624749660491943, "logits/rejected": -3.049339532852173, "logps/chosen": -21.105308532714844, "logps/rejected": -81.85830688476562, "loss": 0.4396, "rewards/accuracies": 1.0, "rewards/chosen": 0.24601995944976807, "rewards/margins": 0.674644947052002, "rewards/rejected": -0.4286249577999115, "step": 1220 }, { "epoch": 0.01, "learning_rate": 7.361742877663396e-07, "logits/chosen": -3.0737435817718506, "logits/rejected": -3.0243449211120605, "logps/chosen": -26.553878784179688, "logps/rejected": -77.88673400878906, "loss": 0.4695, "rewards/accuracies": 1.0, "rewards/chosen": 0.21822018921375275, "rewards/margins": 0.5865466594696045, "rewards/rejected": -0.36832648515701294, "step": 1230 }, { "epoch": 0.01, "learning_rate": 7.42159444577448e-07, "logits/chosen": -3.0579981803894043, "logits/rejected": -3.050585985183716, "logps/chosen": -25.138347625732422, "logps/rejected": -75.10093688964844, "loss": 0.4474, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20116576552391052, "rewards/margins": 0.5709176659584045, "rewards/rejected": -0.369751900434494, "step": 1240 }, { "epoch": 0.01, "learning_rate": 7.481446013885564e-07, "logits/chosen": -3.084301710128784, "logits/rejected": -3.0710525512695312, "logps/chosen": -27.728384017944336, "logps/rejected": -76.25328063964844, "loss": 0.4494, "rewards/accuracies": 1.0, "rewards/chosen": 0.21652591228485107, "rewards/margins": 0.5925289988517761, "rewards/rejected": -0.3760031759738922, "step": 1250 }, { "epoch": 0.02, "learning_rate": 7.541297581996648e-07, "logits/chosen": -3.0339765548706055, "logits/rejected": -2.9944610595703125, "logps/chosen": -36.65898895263672, "logps/rejected": -74.37141418457031, "loss": 0.4565, "rewards/accuracies": 1.0, "rewards/chosen": 0.16826283931732178, "rewards/margins": 0.5037393569946289, "rewards/rejected": -0.33547645807266235, "step": 1260 }, { "epoch": 0.02, "learning_rate": 7.601149150107733e-07, "logits/chosen": -3.0486741065979004, "logits/rejected": -3.0183072090148926, "logps/chosen": -22.53329086303711, "logps/rejected": -80.27749633789062, "loss": 0.4304, "rewards/accuracies": 1.0, "rewards/chosen": 0.23287267982959747, "rewards/margins": 0.636967658996582, "rewards/rejected": -0.40409499406814575, "step": 1270 }, { "epoch": 0.02, "learning_rate": 7.661000718218818e-07, "logits/chosen": -3.082063913345337, "logits/rejected": -3.0477092266082764, "logps/chosen": -27.154373168945312, "logps/rejected": -78.62904357910156, "loss": 0.4634, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21126461029052734, "rewards/margins": 0.6057683229446411, "rewards/rejected": -0.394503653049469, "step": 1280 }, { "epoch": 0.02, "learning_rate": 7.720852286329902e-07, "logits/chosen": -3.0408999919891357, "logits/rejected": -3.0246918201446533, "logps/chosen": -21.97327995300293, "logps/rejected": -82.97834777832031, "loss": 0.4163, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21320024132728577, "rewards/margins": 0.6483854055404663, "rewards/rejected": -0.4351852536201477, "step": 1290 }, { "epoch": 0.02, "learning_rate": 7.780703854440987e-07, "logits/chosen": -3.056278944015503, "logits/rejected": -3.0048394203186035, "logps/chosen": -25.793066024780273, "logps/rejected": -79.08477020263672, "loss": 0.4562, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20355430245399475, "rewards/margins": 0.611092209815979, "rewards/rejected": -0.40753793716430664, "step": 1300 }, { "epoch": 0.02, "learning_rate": 7.840555422552072e-07, "logits/chosen": -3.0660247802734375, "logits/rejected": -3.0502898693084717, "logps/chosen": -19.514293670654297, "logps/rejected": -81.77676391601562, "loss": 0.4382, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1997596025466919, "rewards/margins": 0.6385673880577087, "rewards/rejected": -0.43880778551101685, "step": 1310 }, { "epoch": 0.02, "learning_rate": 7.900406990663157e-07, "logits/chosen": -3.059462308883667, "logits/rejected": -3.0456323623657227, "logps/chosen": -31.745630264282227, "logps/rejected": -75.96055603027344, "loss": 0.4601, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.16689220070838928, "rewards/margins": 0.5335757732391357, "rewards/rejected": -0.36668357253074646, "step": 1320 }, { "epoch": 0.02, "learning_rate": 7.96025855877424e-07, "logits/chosen": -3.058964967727661, "logits/rejected": -3.047744035720825, "logps/chosen": -31.67373275756836, "logps/rejected": -74.43034362792969, "loss": 0.4756, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.15765920281410217, "rewards/margins": 0.5029540657997131, "rewards/rejected": -0.3452948033809662, "step": 1330 }, { "epoch": 0.02, "learning_rate": 8.020110126885325e-07, "logits/chosen": -3.0725326538085938, "logits/rejected": -3.0541329383850098, "logps/chosen": -26.21136474609375, "logps/rejected": -80.04096984863281, "loss": 0.4573, "rewards/accuracies": 1.0, "rewards/chosen": 0.19743946194648743, "rewards/margins": 0.5987585783004761, "rewards/rejected": -0.40131911635398865, "step": 1340 }, { "epoch": 0.02, "learning_rate": 8.079961694996409e-07, "logits/chosen": -3.0699779987335205, "logits/rejected": -3.032844305038452, "logps/chosen": -29.024166107177734, "logps/rejected": -76.8138198852539, "loss": 0.452, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.18268047273159027, "rewards/margins": 0.5563156604766846, "rewards/rejected": -0.3736351728439331, "step": 1350 }, { "epoch": 0.02, "learning_rate": 8.139813263107495e-07, "logits/chosen": -3.085879325866699, "logits/rejected": -3.015942096710205, "logps/chosen": -35.27076721191406, "logps/rejected": -81.09175109863281, "loss": 0.4115, "rewards/accuracies": 1.0, "rewards/chosen": 0.1916121542453766, "rewards/margins": 0.5989283323287964, "rewards/rejected": -0.4073162078857422, "step": 1360 }, { "epoch": 0.02, "learning_rate": 8.199664831218579e-07, "logits/chosen": -3.074350118637085, "logits/rejected": -3.059009075164795, "logps/chosen": -22.05868911743164, "logps/rejected": -89.38922882080078, "loss": 0.4111, "rewards/accuracies": 1.0, "rewards/chosen": 0.24976086616516113, "rewards/margins": 0.7527107000350952, "rewards/rejected": -0.5029498338699341, "step": 1370 }, { "epoch": 0.02, "learning_rate": 8.259516399329663e-07, "logits/chosen": -2.997366428375244, "logits/rejected": -2.977353811264038, "logps/chosen": -22.73345375061035, "logps/rejected": -89.5788345336914, "loss": 0.4525, "rewards/accuracies": 1.0, "rewards/chosen": 0.2447294294834137, "rewards/margins": 0.749183177947998, "rewards/rejected": -0.5044537782669067, "step": 1380 }, { "epoch": 0.02, "learning_rate": 8.319367967440747e-07, "logits/chosen": -3.077810764312744, "logits/rejected": -3.055509090423584, "logps/chosen": -27.951274871826172, "logps/rejected": -83.46776580810547, "loss": 0.4294, "rewards/accuracies": 1.0, "rewards/chosen": 0.20826418697834015, "rewards/margins": 0.6546152830123901, "rewards/rejected": -0.4463511109352112, "step": 1390 }, { "epoch": 0.02, "learning_rate": 8.379219535551832e-07, "logits/chosen": -3.095288038253784, "logits/rejected": -3.08059024810791, "logps/chosen": -22.12644386291504, "logps/rejected": -91.1815414428711, "loss": 0.4039, "rewards/accuracies": 1.0, "rewards/chosen": 0.2514724135398865, "rewards/margins": 0.7832176089286804, "rewards/rejected": -0.5317451357841492, "step": 1400 }, { "epoch": 0.02, "learning_rate": 8.439071103662917e-07, "logits/chosen": -3.0529251098632812, "logits/rejected": -3.019284725189209, "logps/chosen": -33.89453125, "logps/rejected": -76.35501861572266, "loss": 0.4416, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.12188174575567245, "rewards/margins": 0.4993898272514343, "rewards/rejected": -0.37750810384750366, "step": 1410 }, { "epoch": 0.02, "learning_rate": 8.498922671774002e-07, "logits/chosen": -3.062037944793701, "logits/rejected": -3.0402629375457764, "logps/chosen": -25.591445922851562, "logps/rejected": -80.3615493774414, "loss": 0.4553, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.1598699539899826, "rewards/margins": 0.5789488554000854, "rewards/rejected": -0.41907891631126404, "step": 1420 }, { "epoch": 0.02, "learning_rate": 8.558774239885086e-07, "logits/chosen": -3.0710034370422363, "logits/rejected": -3.041943073272705, "logps/chosen": -21.975296020507812, "logps/rejected": -85.08016204833984, "loss": 0.4259, "rewards/accuracies": 1.0, "rewards/chosen": 0.21767492592334747, "rewards/margins": 0.6902450323104858, "rewards/rejected": -0.4725700914859772, "step": 1430 }, { "epoch": 0.02, "learning_rate": 8.61862580799617e-07, "logits/chosen": -3.044780731201172, "logits/rejected": -3.009598731994629, "logps/chosen": -28.65957260131836, "logps/rejected": -85.58129119873047, "loss": 0.4261, "rewards/accuracies": 1.0, "rewards/chosen": 0.21638774871826172, "rewards/margins": 0.6730076670646667, "rewards/rejected": -0.4566200375556946, "step": 1440 }, { "epoch": 0.02, "learning_rate": 8.678477376107254e-07, "logits/chosen": -3.038510799407959, "logits/rejected": -2.9948325157165527, "logps/chosen": -17.524328231811523, "logps/rejected": -91.0429916381836, "loss": 0.4065, "rewards/accuracies": 1.0, "rewards/chosen": 0.23911242187023163, "rewards/margins": 0.7605185508728027, "rewards/rejected": -0.5214060544967651, "step": 1450 }, { "epoch": 0.02, "learning_rate": 8.73832894421834e-07, "logits/chosen": -3.0338149070739746, "logits/rejected": -3.031109571456909, "logps/chosen": -17.5502986907959, "logps/rejected": -95.25648498535156, "loss": 0.4134, "rewards/accuracies": 1.0, "rewards/chosen": 0.2455560714006424, "rewards/margins": 0.8212604522705078, "rewards/rejected": -0.5757043957710266, "step": 1460 }, { "epoch": 0.02, "learning_rate": 8.798180512329424e-07, "logits/chosen": -3.0423848628997803, "logits/rejected": -3.031738758087158, "logps/chosen": -34.313148498535156, "logps/rejected": -87.35386657714844, "loss": 0.46, "rewards/accuracies": 1.0, "rewards/chosen": 0.20063567161560059, "rewards/margins": 0.6668441891670227, "rewards/rejected": -0.4662085175514221, "step": 1470 }, { "epoch": 0.02, "learning_rate": 8.858032080440508e-07, "logits/chosen": -3.0766005516052246, "logits/rejected": -3.0702157020568848, "logps/chosen": -29.333484649658203, "logps/rejected": -86.22199249267578, "loss": 0.4736, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19772347807884216, "rewards/margins": 0.6752236485481262, "rewards/rejected": -0.47750020027160645, "step": 1480 }, { "epoch": 0.02, "learning_rate": 8.917883648551593e-07, "logits/chosen": -3.074652671813965, "logits/rejected": -3.055098056793213, "logps/chosen": -37.93566131591797, "logps/rejected": -86.84456634521484, "loss": 0.4464, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1917753517627716, "rewards/margins": 0.6420084238052368, "rewards/rejected": -0.4502330422401428, "step": 1490 }, { "epoch": 0.02, "learning_rate": 8.977735216662676e-07, "logits/chosen": -3.039066791534424, "logits/rejected": -3.0136799812316895, "logps/chosen": -18.53877830505371, "logps/rejected": -92.4996566772461, "loss": 0.3959, "rewards/accuracies": 1.0, "rewards/chosen": 0.24218884110450745, "rewards/margins": 0.7891825437545776, "rewards/rejected": -0.5469937920570374, "step": 1500 }, { "epoch": 0.02, "learning_rate": 9.037586784773762e-07, "logits/chosen": -3.036360263824463, "logits/rejected": -3.004317045211792, "logps/chosen": -27.008377075195312, "logps/rejected": -89.93092346191406, "loss": 0.4106, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1998627483844757, "rewards/margins": 0.7135249376296997, "rewards/rejected": -0.5136622190475464, "step": 1510 }, { "epoch": 0.02, "learning_rate": 9.097438352884846e-07, "logits/chosen": -3.0320258140563965, "logits/rejected": -3.015932559967041, "logps/chosen": -27.528011322021484, "logps/rejected": -88.68113708496094, "loss": 0.4342, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19425277411937714, "rewards/margins": 0.682503879070282, "rewards/rejected": -0.48825111985206604, "step": 1520 }, { "epoch": 0.02, "learning_rate": 9.157289920995931e-07, "logits/chosen": -3.070667266845703, "logits/rejected": -3.026207447052002, "logps/chosen": -33.00151824951172, "logps/rejected": -76.77093505859375, "loss": 0.4261, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.16069857776165009, "rewards/margins": 0.5364342927932739, "rewards/rejected": -0.37573570013046265, "step": 1530 }, { "epoch": 0.02, "learning_rate": 9.217141489107015e-07, "logits/chosen": -2.9935555458068848, "logits/rejected": -2.9893722534179688, "logps/chosen": -25.28873062133789, "logps/rejected": -85.91699981689453, "loss": 0.4286, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.19454850256443024, "rewards/margins": 0.6828798055648804, "rewards/rejected": -0.48833122849464417, "step": 1540 }, { "epoch": 0.02, "learning_rate": 9.276993057218101e-07, "logits/chosen": -3.0648818016052246, "logits/rejected": -3.031414270401001, "logps/chosen": -24.372018814086914, "logps/rejected": -84.48405456542969, "loss": 0.4151, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.1820710152387619, "rewards/margins": 0.6554104089736938, "rewards/rejected": -0.47333937883377075, "step": 1550 }, { "epoch": 0.02, "learning_rate": 9.336844625329184e-07, "logits/chosen": -3.0667128562927246, "logits/rejected": -3.0152626037597656, "logps/chosen": -30.3253173828125, "logps/rejected": -93.14845275878906, "loss": 0.4029, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20698711276054382, "rewards/margins": 0.7401949167251587, "rewards/rejected": -0.5332077741622925, "step": 1560 }, { "epoch": 0.02, "learning_rate": 9.396696193440269e-07, "logits/chosen": -3.0339066982269287, "logits/rejected": -2.9849324226379395, "logps/chosen": -23.181880950927734, "logps/rejected": -94.22126007080078, "loss": 0.391, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19581110775470734, "rewards/margins": 0.752532958984375, "rewards/rejected": -0.5567218065261841, "step": 1570 }, { "epoch": 0.02, "learning_rate": 9.456547761551353e-07, "logits/chosen": -3.0278725624084473, "logits/rejected": -3.0128400325775146, "logps/chosen": -25.80999183654785, "logps/rejected": -93.93496704101562, "loss": 0.4116, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20285029709339142, "rewards/margins": 0.7407376170158386, "rewards/rejected": -0.5378873348236084, "step": 1580 }, { "epoch": 0.02, "learning_rate": 9.516399329662438e-07, "logits/chosen": -3.088174819946289, "logits/rejected": -3.052522659301758, "logps/chosen": -34.72909927368164, "logps/rejected": -94.1514892578125, "loss": 0.3837, "rewards/accuracies": 1.0, "rewards/chosen": 0.22904245555400848, "rewards/margins": 0.7604613900184631, "rewards/rejected": -0.5314189195632935, "step": 1590 }, { "epoch": 0.02, "learning_rate": 9.576250897773522e-07, "logits/chosen": -3.0314674377441406, "logits/rejected": -3.0236897468566895, "logps/chosen": -19.907955169677734, "logps/rejected": -96.0357894897461, "loss": 0.4118, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2083805352449417, "rewards/margins": 0.767540693283081, "rewards/rejected": -0.5591601729393005, "step": 1600 }, { "epoch": 0.02, "learning_rate": 9.636102465884608e-07, "logits/chosen": -3.0803561210632324, "logits/rejected": -3.0518598556518555, "logps/chosen": -23.31368064880371, "logps/rejected": -86.34699249267578, "loss": 0.4256, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19201955199241638, "rewards/margins": 0.6728291511535645, "rewards/rejected": -0.48080962896347046, "step": 1610 }, { "epoch": 0.02, "learning_rate": 9.69595403399569e-07, "logits/chosen": -3.0629701614379883, "logits/rejected": -3.0480713844299316, "logps/chosen": -19.568239212036133, "logps/rejected": -97.77247619628906, "loss": 0.3957, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22637870907783508, "rewards/margins": 0.8255943059921265, "rewards/rejected": -0.5992156267166138, "step": 1620 }, { "epoch": 0.02, "learning_rate": 9.755805602106777e-07, "logits/chosen": -2.9994561672210693, "logits/rejected": -2.984468460083008, "logps/chosen": -24.79628562927246, "logps/rejected": -96.98651123046875, "loss": 0.4247, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22454023361206055, "rewards/margins": 0.8205493688583374, "rewards/rejected": -0.5960091948509216, "step": 1630 }, { "epoch": 0.02, "learning_rate": 9.81565717021786e-07, "logits/chosen": -3.0787339210510254, "logits/rejected": -3.0445311069488525, "logps/chosen": -24.656810760498047, "logps/rejected": -94.51274871826172, "loss": 0.3838, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20954406261444092, "rewards/margins": 0.761633574962616, "rewards/rejected": -0.552089512348175, "step": 1640 }, { "epoch": 0.02, "learning_rate": 9.875508738328946e-07, "logits/chosen": -3.052903413772583, "logits/rejected": -3.0320382118225098, "logps/chosen": -17.534473419189453, "logps/rejected": -98.86961364746094, "loss": 0.4018, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22470669448375702, "rewards/margins": 0.8353234529495239, "rewards/rejected": -0.6106167435646057, "step": 1650 }, { "epoch": 0.02, "learning_rate": 9.93536030644003e-07, "logits/chosen": -3.0549728870391846, "logits/rejected": -3.0253515243530273, "logps/chosen": -30.83590316772461, "logps/rejected": -86.78987884521484, "loss": 0.4521, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.15888604521751404, "rewards/margins": 0.6329306960105896, "rewards/rejected": -0.47404471039772034, "step": 1660 }, { "epoch": 0.02, "learning_rate": 9.995211874551114e-07, "logits/chosen": -3.0157947540283203, "logits/rejected": -2.972538709640503, "logps/chosen": -21.3707275390625, "logps/rejected": -93.74849700927734, "loss": 0.4194, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22093422710895538, "rewards/margins": 0.7784726023674011, "rewards/rejected": -0.5575383305549622, "step": 1670 }, { "epoch": 0.02, "learning_rate": 1.0055063442662198e-06, "logits/chosen": -3.0601017475128174, "logits/rejected": -3.0297532081604004, "logps/chosen": -24.426631927490234, "logps/rejected": -99.90489959716797, "loss": 0.3921, "rewards/accuracies": 1.0, "rewards/chosen": 0.22196133434772491, "rewards/margins": 0.8248566389083862, "rewards/rejected": -0.6028953790664673, "step": 1680 }, { "epoch": 0.02, "learning_rate": 1.0114915010773283e-06, "logits/chosen": -3.046182155609131, "logits/rejected": -3.0158352851867676, "logps/chosen": -30.039623260498047, "logps/rejected": -92.89549255371094, "loss": 0.3878, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20356445014476776, "rewards/margins": 0.7528842687606812, "rewards/rejected": -0.549319863319397, "step": 1690 }, { "epoch": 0.02, "learning_rate": 1.017476657888437e-06, "logits/chosen": -3.075737476348877, "logits/rejected": -3.0379233360290527, "logps/chosen": -34.684932708740234, "logps/rejected": -95.45805358886719, "loss": 0.4162, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19182758033275604, "rewards/margins": 0.750237226486206, "rewards/rejected": -0.5584096908569336, "step": 1700 }, { "epoch": 0.02, "learning_rate": 1.0234618146995452e-06, "logits/chosen": -3.0337846279144287, "logits/rejected": -3.023956775665283, "logps/chosen": -5.4689040184021, "logps/rejected": -107.86357116699219, "loss": 0.3446, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.27694958448410034, "rewards/margins": 0.9918878674507141, "rewards/rejected": -0.7149381637573242, "step": 1710 }, { "epoch": 0.02, "learning_rate": 1.0294469715106536e-06, "logits/chosen": -3.0816197395324707, "logits/rejected": -3.032064914703369, "logps/chosen": -30.746231079101562, "logps/rejected": -95.66383361816406, "loss": 0.4037, "rewards/accuracies": 1.0, "rewards/chosen": 0.1999312788248062, "rewards/margins": 0.7510215640068054, "rewards/rejected": -0.5510902404785156, "step": 1720 }, { "epoch": 0.02, "learning_rate": 1.0354321283217621e-06, "logits/chosen": -3.0286502838134766, "logits/rejected": -3.0236494541168213, "logps/chosen": -17.419750213623047, "logps/rejected": -108.24430084228516, "loss": 0.362, "rewards/accuracies": 1.0, "rewards/chosen": 0.2583833932876587, "rewards/margins": 0.9465440511703491, "rewards/rejected": -0.6881605982780457, "step": 1730 }, { "epoch": 0.02, "learning_rate": 1.0414172851328707e-06, "logits/chosen": -3.0527920722961426, "logits/rejected": -3.0151448249816895, "logps/chosen": -32.03759002685547, "logps/rejected": -99.93470764160156, "loss": 0.443, "rewards/accuracies": 1.0, "rewards/chosen": 0.2171463519334793, "rewards/margins": 0.810742974281311, "rewards/rejected": -0.5935965776443481, "step": 1740 }, { "epoch": 0.02, "learning_rate": 1.047402441943979e-06, "logits/chosen": -3.0239462852478027, "logits/rejected": -3.00567626953125, "logps/chosen": -19.300662994384766, "logps/rejected": -101.30989074707031, "loss": 0.3884, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23620867729187012, "rewards/margins": 0.8580083847045898, "rewards/rejected": -0.621799647808075, "step": 1750 }, { "epoch": 0.02, "learning_rate": 1.0533875987550876e-06, "logits/chosen": -3.029175281524658, "logits/rejected": -2.998206377029419, "logps/chosen": -37.968994140625, "logps/rejected": -90.18030548095703, "loss": 0.409, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.1429453194141388, "rewards/margins": 0.6532583832740784, "rewards/rejected": -0.5103130340576172, "step": 1760 }, { "epoch": 0.02, "learning_rate": 1.059372755566196e-06, "logits/chosen": -3.0812530517578125, "logits/rejected": -3.0359902381896973, "logps/chosen": -27.739160537719727, "logps/rejected": -96.46995544433594, "loss": 0.3568, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21095947921276093, "rewards/margins": 0.7793315649032593, "rewards/rejected": -0.5683721303939819, "step": 1770 }, { "epoch": 0.02, "learning_rate": 1.0653579123773043e-06, "logits/chosen": -3.038149356842041, "logits/rejected": -3.0060665607452393, "logps/chosen": -28.50927734375, "logps/rejected": -105.81211853027344, "loss": 0.3591, "rewards/accuracies": 1.0, "rewards/chosen": 0.22674600780010223, "rewards/margins": 0.8696784973144531, "rewards/rejected": -0.6429325342178345, "step": 1780 }, { "epoch": 0.02, "learning_rate": 1.0713430691884128e-06, "logits/chosen": -3.0340778827667236, "logits/rejected": -3.001209020614624, "logps/chosen": -21.165346145629883, "logps/rejected": -106.3609848022461, "loss": 0.3584, "rewards/accuracies": 1.0, "rewards/chosen": 0.2475188970565796, "rewards/margins": 0.9274236559867859, "rewards/rejected": -0.6799048185348511, "step": 1790 }, { "epoch": 0.02, "learning_rate": 1.0773282259995214e-06, "logits/chosen": -3.039644718170166, "logits/rejected": -3.031904458999634, "logps/chosen": -22.48516845703125, "logps/rejected": -92.93486022949219, "loss": 0.3892, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.16963309049606323, "rewards/margins": 0.714742124080658, "rewards/rejected": -0.5451090335845947, "step": 1800 }, { "epoch": 0.02, "learning_rate": 1.0833133828106297e-06, "logits/chosen": -3.048530101776123, "logits/rejected": -3.025182008743286, "logps/chosen": -34.41590118408203, "logps/rejected": -101.9411392211914, "loss": 0.3695, "rewards/accuracies": 1.0, "rewards/chosen": 0.228490948677063, "rewards/margins": 0.8425586819648743, "rewards/rejected": -0.6140677332878113, "step": 1810 }, { "epoch": 0.02, "learning_rate": 1.089298539621738e-06, "logits/chosen": -3.0751147270202637, "logits/rejected": -3.0614430904388428, "logps/chosen": -28.94723129272461, "logps/rejected": -97.96755981445312, "loss": 0.3879, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.20447519421577454, "rewards/margins": 0.7945129871368408, "rewards/rejected": -0.5900377035140991, "step": 1820 }, { "epoch": 0.02, "learning_rate": 1.0952836964328466e-06, "logits/chosen": -3.0690650939941406, "logits/rejected": -3.0488154888153076, "logps/chosen": -23.02591323852539, "logps/rejected": -100.18788146972656, "loss": 0.3799, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22454464435577393, "rewards/margins": 0.8347294926643372, "rewards/rejected": -0.6101848483085632, "step": 1830 }, { "epoch": 0.02, "learning_rate": 1.1012688532439552e-06, "logits/chosen": -3.0778839588165283, "logits/rejected": -3.001502752304077, "logps/chosen": -34.99867248535156, "logps/rejected": -90.38529968261719, "loss": 0.3891, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.16199402511119843, "rewards/margins": 0.656631350517273, "rewards/rejected": -0.49463725090026855, "step": 1840 }, { "epoch": 0.02, "learning_rate": 1.1072540100550635e-06, "logits/chosen": -3.0535783767700195, "logits/rejected": -3.0502991676330566, "logps/chosen": -22.597461700439453, "logps/rejected": -112.3560562133789, "loss": 0.3376, "rewards/accuracies": 1.0, "rewards/chosen": 0.26196879148483276, "rewards/margins": 0.9957496523857117, "rewards/rejected": -0.7337807416915894, "step": 1850 }, { "epoch": 0.02, "learning_rate": 1.113239166866172e-06, "logits/chosen": -3.0663063526153564, "logits/rejected": -3.0376486778259277, "logps/chosen": -25.91437339782715, "logps/rejected": -100.04304504394531, "loss": 0.4112, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.18641403317451477, "rewards/margins": 0.806198239326477, "rewards/rejected": -0.6197842359542847, "step": 1860 }, { "epoch": 0.02, "learning_rate": 1.1192243236772804e-06, "logits/chosen": -3.0338149070739746, "logits/rejected": -2.980869770050049, "logps/chosen": -44.7831916809082, "logps/rejected": -87.73886108398438, "loss": 0.4209, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.18647965788841248, "rewards/margins": 0.6607432961463928, "rewards/rejected": -0.47426357865333557, "step": 1870 }, { "epoch": 0.02, "learning_rate": 1.1252094804883887e-06, "logits/chosen": -3.0013375282287598, "logits/rejected": -2.9704842567443848, "logps/chosen": -26.716787338256836, "logps/rejected": -100.2330551147461, "loss": 0.3942, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22286434471607208, "rewards/margins": 0.8290780186653137, "rewards/rejected": -0.6062137484550476, "step": 1880 }, { "epoch": 0.02, "learning_rate": 1.1311946372994973e-06, "logits/chosen": -3.0317091941833496, "logits/rejected": -2.9903485774993896, "logps/chosen": -31.294723510742188, "logps/rejected": -95.71357727050781, "loss": 0.4114, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.16174888610839844, "rewards/margins": 0.7310864925384521, "rewards/rejected": -0.5693376660346985, "step": 1890 }, { "epoch": 0.02, "learning_rate": 1.1371797941106058e-06, "logits/chosen": -3.045593500137329, "logits/rejected": -3.016968250274658, "logps/chosen": -18.497907638549805, "logps/rejected": -102.15985107421875, "loss": 0.3433, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2534613013267517, "rewards/margins": 0.9168069958686829, "rewards/rejected": -0.6633457541465759, "step": 1900 }, { "epoch": 0.02, "learning_rate": 1.1431649509217142e-06, "logits/chosen": -3.0326380729675293, "logits/rejected": -3.0087876319885254, "logps/chosen": -17.846771240234375, "logps/rejected": -111.08796691894531, "loss": 0.3322, "rewards/accuracies": 1.0, "rewards/chosen": 0.2703303396701813, "rewards/margins": 1.004606008529663, "rewards/rejected": -0.7342756986618042, "step": 1910 }, { "epoch": 0.02, "learning_rate": 1.1491501077328227e-06, "logits/chosen": -3.077045202255249, "logits/rejected": -3.0324783325195312, "logps/chosen": -31.732818603515625, "logps/rejected": -101.97720336914062, "loss": 0.3922, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21225902438163757, "rewards/margins": 0.8292399644851685, "rewards/rejected": -0.6169809103012085, "step": 1920 }, { "epoch": 0.02, "learning_rate": 1.1551352645439313e-06, "logits/chosen": -3.0256524085998535, "logits/rejected": -2.9996790885925293, "logps/chosen": -28.242727279663086, "logps/rejected": -105.71932220458984, "loss": 0.3551, "rewards/accuracies": 1.0, "rewards/chosen": 0.22901558876037598, "rewards/margins": 0.8862431645393372, "rewards/rejected": -0.657227635383606, "step": 1930 }, { "epoch": 0.02, "learning_rate": 1.1611204213550396e-06, "logits/chosen": -3.008603096008301, "logits/rejected": -2.996995210647583, "logps/chosen": -10.28693962097168, "logps/rejected": -117.1180191040039, "loss": 0.3219, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.25569120049476624, "rewards/margins": 1.0615663528442383, "rewards/rejected": -0.8058750033378601, "step": 1940 }, { "epoch": 0.02, "learning_rate": 1.167105578166148e-06, "logits/chosen": -3.0322351455688477, "logits/rejected": -3.0128438472747803, "logps/chosen": -26.596385955810547, "logps/rejected": -103.83489990234375, "loss": 0.3802, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.1980123519897461, "rewards/margins": 0.8491395115852356, "rewards/rejected": -0.6511272192001343, "step": 1950 }, { "epoch": 0.02, "learning_rate": 1.1730907349772565e-06, "logits/chosen": -3.0822834968566895, "logits/rejected": -3.0485854148864746, "logps/chosen": -28.111251831054688, "logps/rejected": -105.1113052368164, "loss": 0.3855, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20057637989521027, "rewards/margins": 0.8618819117546082, "rewards/rejected": -0.6613055467605591, "step": 1960 }, { "epoch": 0.02, "learning_rate": 1.1790758917883649e-06, "logits/chosen": -3.0761499404907227, "logits/rejected": -3.014012098312378, "logps/chosen": -24.382158279418945, "logps/rejected": -105.1419677734375, "loss": 0.367, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22964176535606384, "rewards/margins": 0.8935451507568359, "rewards/rejected": -0.6639034152030945, "step": 1970 }, { "epoch": 0.02, "learning_rate": 1.1850610485994734e-06, "logits/chosen": -3.021304130554199, "logits/rejected": -3.004340410232544, "logps/chosen": -25.147167205810547, "logps/rejected": -106.95475006103516, "loss": 0.3474, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23467917740345, "rewards/margins": 0.9143525958061218, "rewards/rejected": -0.6796733736991882, "step": 1980 }, { "epoch": 0.02, "learning_rate": 1.191046205410582e-06, "logits/chosen": -3.029210329055786, "logits/rejected": -2.9921560287475586, "logps/chosen": -37.67336654663086, "logps/rejected": -97.21675109863281, "loss": 0.3904, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.17565186321735382, "rewards/margins": 0.7465911507606506, "rewards/rejected": -0.5709392428398132, "step": 1990 }, { "epoch": 0.02, "learning_rate": 1.1970313622216903e-06, "logits/chosen": -3.0214591026306152, "logits/rejected": -3.01017427444458, "logps/chosen": -16.855743408203125, "logps/rejected": -113.9713363647461, "loss": 0.3416, "rewards/accuracies": 1.0, "rewards/chosen": 0.2583279609680176, "rewards/margins": 1.008257269859314, "rewards/rejected": -0.7499293684959412, "step": 2000 }, { "epoch": 0.02, "learning_rate": 1.2030165190327987e-06, "logits/chosen": -3.037456512451172, "logits/rejected": -2.987666606903076, "logps/chosen": -34.9297981262207, "logps/rejected": -98.2209243774414, "loss": 0.3655, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.1968078911304474, "rewards/margins": 0.7739081978797913, "rewards/rejected": -0.577100396156311, "step": 2010 }, { "epoch": 0.02, "learning_rate": 1.2090016758439072e-06, "logits/chosen": -3.0725739002227783, "logits/rejected": -3.0403919219970703, "logps/chosen": -27.548166275024414, "logps/rejected": -109.0919189453125, "loss": 0.3326, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23044660687446594, "rewards/margins": 0.9421889185905457, "rewards/rejected": -0.7117422819137573, "step": 2020 }, { "epoch": 0.02, "learning_rate": 1.2149868326550158e-06, "logits/chosen": -3.0674338340759277, "logits/rejected": -3.033252239227295, "logps/chosen": -23.8726863861084, "logps/rejected": -115.0726547241211, "loss": 0.3419, "rewards/accuracies": 1.0, "rewards/chosen": 0.2621733248233795, "rewards/margins": 1.0135886669158936, "rewards/rejected": -0.7514153122901917, "step": 2030 }, { "epoch": 0.02, "learning_rate": 1.2209719894661241e-06, "logits/chosen": -3.021969795227051, "logits/rejected": -2.9872264862060547, "logps/chosen": -32.81024932861328, "logps/rejected": -100.0850830078125, "loss": 0.399, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.18705210089683533, "rewards/margins": 0.7772096395492554, "rewards/rejected": -0.5901575088500977, "step": 2040 }, { "epoch": 0.02, "learning_rate": 1.2269571462772327e-06, "logits/chosen": -3.0017781257629395, "logits/rejected": -2.985680103302002, "logps/chosen": -8.788549423217773, "logps/rejected": -114.4310073852539, "loss": 0.3423, "rewards/accuracies": 1.0, "rewards/chosen": 0.2762053608894348, "rewards/margins": 1.0463227033615112, "rewards/rejected": -0.7701172828674316, "step": 2050 }, { "epoch": 0.02, "learning_rate": 1.232942303088341e-06, "logits/chosen": -3.0493738651275635, "logits/rejected": -3.0129666328430176, "logps/chosen": -20.041427612304688, "logps/rejected": -117.65519714355469, "loss": 0.3193, "rewards/accuracies": 1.0, "rewards/chosen": 0.25972166657447815, "rewards/margins": 1.0424728393554688, "rewards/rejected": -0.7827510833740234, "step": 2060 }, { "epoch": 0.02, "learning_rate": 1.2389274598994493e-06, "logits/chosen": -3.0529932975769043, "logits/rejected": -3.0006532669067383, "logps/chosen": -35.069129943847656, "logps/rejected": -96.95951080322266, "loss": 0.4019, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.156316876411438, "rewards/margins": 0.7354639768600464, "rewards/rejected": -0.5791471600532532, "step": 2070 }, { "epoch": 0.02, "learning_rate": 1.244912616710558e-06, "logits/chosen": -3.0767500400543213, "logits/rejected": -3.0054843425750732, "logps/chosen": -29.4412784576416, "logps/rejected": -111.0905990600586, "loss": 0.3305, "rewards/accuracies": 1.0, "rewards/chosen": 0.2256404161453247, "rewards/margins": 0.921422004699707, "rewards/rejected": -0.6957815289497375, "step": 2080 }, { "epoch": 0.03, "learning_rate": 1.2508977735216662e-06, "logits/chosen": -3.040009021759033, "logits/rejected": -3.0124306678771973, "logps/chosen": -23.597400665283203, "logps/rejected": -106.9177017211914, "loss": 0.3561, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.21061408519744873, "rewards/margins": 0.8802881240844727, "rewards/rejected": -0.6696740388870239, "step": 2090 }, { "epoch": 0.03, "learning_rate": 1.2568829303327748e-06, "logits/chosen": -3.0096030235290527, "logits/rejected": -2.9883840084075928, "logps/chosen": -12.264450073242188, "logps/rejected": -119.95513916015625, "loss": 0.3631, "rewards/accuracies": 1.0, "rewards/chosen": 0.28072091937065125, "rewards/margins": 1.1035842895507812, "rewards/rejected": -0.8228633999824524, "step": 2100 }, { "epoch": 0.03, "learning_rate": 1.2628680871438833e-06, "logits/chosen": -3.060791254043579, "logits/rejected": -3.012761354446411, "logps/chosen": -23.483449935913086, "logps/rejected": -114.30963134765625, "loss": 0.338, "rewards/accuracies": 1.0, "rewards/chosen": 0.2526419758796692, "rewards/margins": 1.0126031637191772, "rewards/rejected": -0.7599611878395081, "step": 2110 }, { "epoch": 0.03, "learning_rate": 1.2688532439549917e-06, "logits/chosen": -3.063999652862549, "logits/rejected": -3.0168423652648926, "logps/chosen": -26.648204803466797, "logps/rejected": -111.08613586425781, "loss": 0.37, "rewards/accuracies": 1.0, "rewards/chosen": 0.23511680960655212, "rewards/margins": 0.9436699151992798, "rewards/rejected": -0.7085530757904053, "step": 2120 }, { "epoch": 0.03, "learning_rate": 1.2748384007661002e-06, "logits/chosen": -3.0234036445617676, "logits/rejected": -2.992269277572632, "logps/chosen": -21.6507625579834, "logps/rejected": -108.46159362792969, "loss": 0.3277, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.20670779049396515, "rewards/margins": 0.9129247665405273, "rewards/rejected": -0.7062169313430786, "step": 2130 }, { "epoch": 0.03, "learning_rate": 1.2808235575772086e-06, "logits/chosen": -3.020585298538208, "logits/rejected": -2.9846584796905518, "logps/chosen": -23.1008243560791, "logps/rejected": -113.43614196777344, "loss": 0.3549, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23313398659229279, "rewards/margins": 0.9856659173965454, "rewards/rejected": -0.752531886100769, "step": 2140 }, { "epoch": 0.03, "learning_rate": 1.2868087143883171e-06, "logits/chosen": -3.076878786087036, "logits/rejected": -3.065533399581909, "logps/chosen": -30.77134132385254, "logps/rejected": -107.0406494140625, "loss": 0.3511, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22537048161029816, "rewards/margins": 0.9070711135864258, "rewards/rejected": -0.6817006468772888, "step": 2150 }, { "epoch": 0.03, "learning_rate": 1.2927938711994257e-06, "logits/chosen": -3.041832685470581, "logits/rejected": -3.005432605743408, "logps/chosen": -27.448150634765625, "logps/rejected": -113.26502990722656, "loss": 0.348, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1924225389957428, "rewards/margins": 0.9324980974197388, "rewards/rejected": -0.7400754690170288, "step": 2160 }, { "epoch": 0.03, "learning_rate": 1.2987790280105338e-06, "logits/chosen": -3.0202672481536865, "logits/rejected": -2.9799911975860596, "logps/chosen": -42.70472717285156, "logps/rejected": -93.97052001953125, "loss": 0.3867, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.1254398673772812, "rewards/margins": 0.6671620607376099, "rewards/rejected": -0.5417221188545227, "step": 2170 }, { "epoch": 0.03, "learning_rate": 1.3047641848216424e-06, "logits/chosen": -3.0143682956695557, "logits/rejected": -2.9815850257873535, "logps/chosen": -34.25292205810547, "logps/rejected": -102.34700775146484, "loss": 0.3749, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.181281179189682, "rewards/margins": 0.8182290196418762, "rewards/rejected": -0.636947751045227, "step": 2180 }, { "epoch": 0.03, "learning_rate": 1.3107493416327507e-06, "logits/chosen": -3.059507369995117, "logits/rejected": -3.0402252674102783, "logps/chosen": -23.722026824951172, "logps/rejected": -114.10566711425781, "loss": 0.336, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2246413677930832, "rewards/margins": 0.9764682650566101, "rewards/rejected": -0.7518268823623657, "step": 2190 }, { "epoch": 0.03, "learning_rate": 1.3167344984438593e-06, "logits/chosen": -3.008615016937256, "logits/rejected": -2.985729694366455, "logps/chosen": -14.570306777954102, "logps/rejected": -123.7719955444336, "loss": 0.3139, "rewards/accuracies": 1.0, "rewards/chosen": 0.2764149010181427, "rewards/margins": 1.1212085485458374, "rewards/rejected": -0.8447936773300171, "step": 2200 }, { "epoch": 0.03, "learning_rate": 1.3227196552549678e-06, "logits/chosen": -3.0359675884246826, "logits/rejected": -2.9722607135772705, "logps/chosen": -29.461315155029297, "logps/rejected": -116.5088882446289, "loss": 0.319, "rewards/accuracies": 1.0, "rewards/chosen": 0.2638457715511322, "rewards/margins": 1.0150976181030273, "rewards/rejected": -0.7512518167495728, "step": 2210 }, { "epoch": 0.03, "learning_rate": 1.3287048120660762e-06, "logits/chosen": -3.0347485542297363, "logits/rejected": -2.9940810203552246, "logps/chosen": -31.282794952392578, "logps/rejected": -109.78730773925781, "loss": 0.368, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.19272592663764954, "rewards/margins": 0.8860715627670288, "rewards/rejected": -0.6933457255363464, "step": 2220 }, { "epoch": 0.03, "learning_rate": 1.3346899688771847e-06, "logits/chosen": -3.0467560291290283, "logits/rejected": -3.014968156814575, "logps/chosen": -19.510929107666016, "logps/rejected": -117.1983642578125, "loss": 0.3305, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23928621411323547, "rewards/margins": 1.0237858295440674, "rewards/rejected": -0.7844996452331543, "step": 2230 }, { "epoch": 0.03, "learning_rate": 1.3406751256882933e-06, "logits/chosen": -2.9990344047546387, "logits/rejected": -2.986015796661377, "logps/chosen": -15.017468452453613, "logps/rejected": -116.31953430175781, "loss": 0.3299, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.24245119094848633, "rewards/margins": 1.03984534740448, "rewards/rejected": -0.7973941564559937, "step": 2240 }, { "epoch": 0.03, "learning_rate": 1.3466602824994016e-06, "logits/chosen": -3.066215991973877, "logits/rejected": -3.0166802406311035, "logps/chosen": -26.773921966552734, "logps/rejected": -110.45039367675781, "loss": 0.3692, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22474458813667297, "rewards/margins": 0.9170758128166199, "rewards/rejected": -0.6923312544822693, "step": 2250 }, { "epoch": 0.03, "learning_rate": 1.3526454393105102e-06, "logits/chosen": -3.0212929248809814, "logits/rejected": -2.9792604446411133, "logps/chosen": -14.66914176940918, "logps/rejected": -126.24751281738281, "loss": 0.3317, "rewards/accuracies": 1.0, "rewards/chosen": 0.284410685300827, "rewards/margins": 1.1660798788070679, "rewards/rejected": -0.8816690444946289, "step": 2260 }, { "epoch": 0.03, "learning_rate": 1.3586305961216185e-06, "logits/chosen": -3.011422634124756, "logits/rejected": -2.970122814178467, "logps/chosen": -21.477542877197266, "logps/rejected": -119.2179946899414, "loss": 0.3375, "rewards/accuracies": 1.0, "rewards/chosen": 0.26992878317832947, "rewards/margins": 1.0722101926803589, "rewards/rejected": -0.802281379699707, "step": 2270 }, { "epoch": 0.03, "learning_rate": 1.364615752932727e-06, "logits/chosen": -3.025453805923462, "logits/rejected": -2.979205846786499, "logps/chosen": -23.512014389038086, "logps/rejected": -119.1420669555664, "loss": 0.3184, "rewards/accuracies": 1.0, "rewards/chosen": 0.25236400961875916, "rewards/margins": 1.0547195672988892, "rewards/rejected": -0.8023554086685181, "step": 2280 }, { "epoch": 0.03, "learning_rate": 1.3706009097438356e-06, "logits/chosen": -3.015718460083008, "logits/rejected": -2.970726728439331, "logps/chosen": -26.410680770874023, "logps/rejected": -110.9147720336914, "loss": 0.3753, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23425312340259552, "rewards/margins": 0.9444621801376343, "rewards/rejected": -0.7102091908454895, "step": 2290 }, { "epoch": 0.03, "learning_rate": 1.3765860665549437e-06, "logits/chosen": -3.0484557151794434, "logits/rejected": -2.9836013317108154, "logps/chosen": -29.445226669311523, "logps/rejected": -115.12088775634766, "loss": 0.3438, "rewards/accuracies": 1.0, "rewards/chosen": 0.2417299747467041, "rewards/margins": 0.983445942401886, "rewards/rejected": -0.7417159676551819, "step": 2300 }, { "epoch": 0.03, "learning_rate": 1.3825712233660523e-06, "logits/chosen": -3.0371689796447754, "logits/rejected": -2.996868133544922, "logps/chosen": -18.874420166015625, "logps/rejected": -116.43782806396484, "loss": 0.3456, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.24984140694141388, "rewards/margins": 1.0296016931533813, "rewards/rejected": -0.7797603011131287, "step": 2310 }, { "epoch": 0.03, "learning_rate": 1.3885563801771606e-06, "logits/chosen": -3.0525708198547363, "logits/rejected": -3.0195345878601074, "logps/chosen": -28.6385555267334, "logps/rejected": -106.53062438964844, "loss": 0.3854, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.18472465872764587, "rewards/margins": 0.8398686647415161, "rewards/rejected": -0.6551438570022583, "step": 2320 }, { "epoch": 0.03, "learning_rate": 1.3945415369882692e-06, "logits/chosen": -3.0130271911621094, "logits/rejected": -2.9698328971862793, "logps/chosen": -24.159759521484375, "logps/rejected": -122.55133056640625, "loss": 0.3413, "rewards/accuracies": 1.0, "rewards/chosen": 0.26966676115989685, "rewards/margins": 1.0995564460754395, "rewards/rejected": -0.8298897743225098, "step": 2330 }, { "epoch": 0.03, "learning_rate": 1.4005266937993777e-06, "logits/chosen": -3.060502290725708, "logits/rejected": -3.0210022926330566, "logps/chosen": -24.506053924560547, "logps/rejected": -120.4146499633789, "loss": 0.3425, "rewards/accuracies": 1.0, "rewards/chosen": 0.2483993023633957, "rewards/margins": 1.042006254196167, "rewards/rejected": -0.7936070561408997, "step": 2340 }, { "epoch": 0.03, "learning_rate": 1.406511850610486e-06, "logits/chosen": -3.0521087646484375, "logits/rejected": -3.0172061920166016, "logps/chosen": -19.18907928466797, "logps/rejected": -119.1736831665039, "loss": 0.3457, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23405346274375916, "rewards/margins": 1.0464626550674438, "rewards/rejected": -0.8124092221260071, "step": 2350 }, { "epoch": 0.03, "learning_rate": 1.4124970074215946e-06, "logits/chosen": -3.0358567237854004, "logits/rejected": -2.972255229949951, "logps/chosen": -22.0893497467041, "logps/rejected": -126.5716781616211, "loss": 0.2996, "rewards/accuracies": 1.0, "rewards/chosen": 0.26630324125289917, "rewards/margins": 1.1418797969818115, "rewards/rejected": -0.875576376914978, "step": 2360 }, { "epoch": 0.03, "learning_rate": 1.418482164232703e-06, "logits/chosen": -3.0162880420684814, "logits/rejected": -2.9899685382843018, "logps/chosen": -22.02424430847168, "logps/rejected": -117.54454040527344, "loss": 0.316, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22200560569763184, "rewards/margins": 1.0070116519927979, "rewards/rejected": -0.785006046295166, "step": 2370 }, { "epoch": 0.03, "learning_rate": 1.4244673210438115e-06, "logits/chosen": -3.0216033458709717, "logits/rejected": -3.006716251373291, "logps/chosen": -30.319103240966797, "logps/rejected": -120.79178619384766, "loss": 0.3425, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21175046265125275, "rewards/margins": 1.0379470586776733, "rewards/rejected": -0.826196551322937, "step": 2380 }, { "epoch": 0.03, "learning_rate": 1.43045247785492e-06, "logits/chosen": -3.0094058513641357, "logits/rejected": -2.989738941192627, "logps/chosen": -20.98488998413086, "logps/rejected": -125.59368896484375, "loss": 0.3515, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2543037533760071, "rewards/margins": 1.127861738204956, "rewards/rejected": -0.8735581636428833, "step": 2390 }, { "epoch": 0.03, "learning_rate": 1.4364376346660282e-06, "logits/chosen": -3.0422003269195557, "logits/rejected": -3.0078141689300537, "logps/chosen": -15.964128494262695, "logps/rejected": -126.29820251464844, "loss": 0.3077, "rewards/accuracies": 1.0, "rewards/chosen": 0.2651030421257019, "rewards/margins": 1.1397387981414795, "rewards/rejected": -0.8746356964111328, "step": 2400 }, { "epoch": 0.03, "learning_rate": 1.442422791477137e-06, "logits/chosen": -3.0208027362823486, "logits/rejected": -2.9981157779693604, "logps/chosen": -24.307659149169922, "logps/rejected": -127.73692321777344, "loss": 0.3776, "rewards/accuracies": 1.0, "rewards/chosen": 0.24792936444282532, "rewards/margins": 1.1215101480484009, "rewards/rejected": -0.8735807538032532, "step": 2410 }, { "epoch": 0.03, "learning_rate": 1.448407948288245e-06, "logits/chosen": -3.074977159500122, "logits/rejected": -3.0415890216827393, "logps/chosen": -23.237279891967773, "logps/rejected": -120.3862075805664, "loss": 0.2999, "rewards/accuracies": 1.0, "rewards/chosen": 0.2634797692298889, "rewards/margins": 1.0817368030548096, "rewards/rejected": -0.8182570338249207, "step": 2420 }, { "epoch": 0.03, "learning_rate": 1.4543931050993537e-06, "logits/chosen": -3.0221617221832275, "logits/rejected": -2.9886207580566406, "logps/chosen": -18.518606185913086, "logps/rejected": -124.3810806274414, "loss": 0.3104, "rewards/accuracies": 1.0, "rewards/chosen": 0.256295382976532, "rewards/margins": 1.104503870010376, "rewards/rejected": -0.8482083082199097, "step": 2430 }, { "epoch": 0.03, "learning_rate": 1.4603782619104622e-06, "logits/chosen": -3.028630018234253, "logits/rejected": -2.9760310649871826, "logps/chosen": -22.277748107910156, "logps/rejected": -126.94224548339844, "loss": 0.3102, "rewards/accuracies": 1.0, "rewards/chosen": 0.2673611044883728, "rewards/margins": 1.1387555599212646, "rewards/rejected": -0.8713944554328918, "step": 2440 }, { "epoch": 0.03, "learning_rate": 1.4663634187215706e-06, "logits/chosen": -3.0286381244659424, "logits/rejected": -2.9863643646240234, "logps/chosen": -21.376850128173828, "logps/rejected": -117.0934066772461, "loss": 0.3411, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2063603401184082, "rewards/margins": 0.9978941082954407, "rewards/rejected": -0.7915337085723877, "step": 2450 }, { "epoch": 0.03, "learning_rate": 1.4723485755326791e-06, "logits/chosen": -3.0402672290802, "logits/rejected": -3.0033152103424072, "logps/chosen": -16.705171585083008, "logps/rejected": -117.33650970458984, "loss": 0.3417, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.24842579662799835, "rewards/margins": 1.0467751026153564, "rewards/rejected": -0.7983493208885193, "step": 2460 }, { "epoch": 0.03, "learning_rate": 1.4783337323437874e-06, "logits/chosen": -3.032562255859375, "logits/rejected": -3.0004844665527344, "logps/chosen": -21.081031799316406, "logps/rejected": -126.4988021850586, "loss": 0.3088, "rewards/accuracies": 1.0, "rewards/chosen": 0.2742374539375305, "rewards/margins": 1.1435496807098389, "rewards/rejected": -0.8693121671676636, "step": 2470 }, { "epoch": 0.03, "learning_rate": 1.484318889154896e-06, "logits/chosen": -3.0033910274505615, "logits/rejected": -2.962822675704956, "logps/chosen": -20.251924514770508, "logps/rejected": -118.087158203125, "loss": 0.3228, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.24191895127296448, "rewards/margins": 1.034372091293335, "rewards/rejected": -0.7924532890319824, "step": 2480 }, { "epoch": 0.03, "learning_rate": 1.4903040459660046e-06, "logits/chosen": -3.055464267730713, "logits/rejected": -3.0194051265716553, "logps/chosen": -25.43471908569336, "logps/rejected": -116.54893493652344, "loss": 0.3543, "rewards/accuracies": 1.0, "rewards/chosen": 0.252069890499115, "rewards/margins": 1.0131888389587402, "rewards/rejected": -0.7611188292503357, "step": 2490 }, { "epoch": 0.03, "learning_rate": 1.496289202777113e-06, "logits/chosen": -3.0456156730651855, "logits/rejected": -3.0183253288269043, "logps/chosen": -19.012582778930664, "logps/rejected": -129.33253479003906, "loss": 0.2879, "rewards/accuracies": 1.0, "rewards/chosen": 0.2537485659122467, "rewards/margins": 1.1541041135787964, "rewards/rejected": -0.9003555178642273, "step": 2500 }, { "epoch": 0.03, "learning_rate": 1.5022743595882214e-06, "logits/chosen": -3.0463125705718994, "logits/rejected": -3.0108799934387207, "logps/chosen": -20.139902114868164, "logps/rejected": -123.0846176147461, "loss": 0.3389, "rewards/accuracies": 1.0, "rewards/chosen": 0.2612239420413971, "rewards/margins": 1.1004663705825806, "rewards/rejected": -0.839242160320282, "step": 2510 }, { "epoch": 0.03, "learning_rate": 1.5082595163993296e-06, "logits/chosen": -2.9996981620788574, "logits/rejected": -2.9727325439453125, "logps/chosen": -13.115266799926758, "logps/rejected": -137.2884979248047, "loss": 0.2651, "rewards/accuracies": 1.0, "rewards/chosen": 0.27862411737442017, "rewards/margins": 1.274902105331421, "rewards/rejected": -0.996277928352356, "step": 2520 }, { "epoch": 0.03, "learning_rate": 1.5142446732104381e-06, "logits/chosen": -3.0206916332244873, "logits/rejected": -2.989065647125244, "logps/chosen": -19.295032501220703, "logps/rejected": -126.18377685546875, "loss": 0.3008, "rewards/accuracies": 1.0, "rewards/chosen": 0.2634628117084503, "rewards/margins": 1.1381051540374756, "rewards/rejected": -0.8746424913406372, "step": 2530 }, { "epoch": 0.03, "learning_rate": 1.5202298300215467e-06, "logits/chosen": -3.0032050609588623, "logits/rejected": -2.9883739948272705, "logps/chosen": -14.441792488098145, "logps/rejected": -138.69747924804688, "loss": 0.2606, "rewards/accuracies": 1.0, "rewards/chosen": 0.2637493312358856, "rewards/margins": 1.2523080110549927, "rewards/rejected": -0.9885585904121399, "step": 2540 }, { "epoch": 0.03, "learning_rate": 1.526214986832655e-06, "logits/chosen": -3.0412521362304688, "logits/rejected": -3.0113587379455566, "logps/chosen": -20.725841522216797, "logps/rejected": -135.65057373046875, "loss": 0.2884, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.24639959633350372, "rewards/margins": 1.2241579294204712, "rewards/rejected": -0.9777582883834839, "step": 2550 }, { "epoch": 0.03, "learning_rate": 1.5322001436437636e-06, "logits/chosen": -3.0213279724121094, "logits/rejected": -2.974670886993408, "logps/chosen": -30.691631317138672, "logps/rejected": -123.3857192993164, "loss": 0.3022, "rewards/accuracies": 1.0, "rewards/chosen": 0.26487603783607483, "rewards/margins": 1.0952110290527344, "rewards/rejected": -0.8303348422050476, "step": 2560 }, { "epoch": 0.03, "learning_rate": 1.538185300454872e-06, "logits/chosen": -3.065622329711914, "logits/rejected": -3.021813154220581, "logps/chosen": -26.023656845092773, "logps/rejected": -127.1125259399414, "loss": 0.3281, "rewards/accuracies": 1.0, "rewards/chosen": 0.24323716759681702, "rewards/margins": 1.1050208806991577, "rewards/rejected": -0.8617838025093079, "step": 2570 }, { "epoch": 0.03, "learning_rate": 1.5441704572659805e-06, "logits/chosen": -2.953381299972534, "logits/rejected": -2.9141573905944824, "logps/chosen": -11.511762619018555, "logps/rejected": -140.37905883789062, "loss": 0.2842, "rewards/accuracies": 1.0, "rewards/chosen": 0.2809494137763977, "rewards/margins": 1.3021657466888428, "rewards/rejected": -1.0212162733078003, "step": 2580 }, { "epoch": 0.03, "learning_rate": 1.550155614077089e-06, "logits/chosen": -2.996351718902588, "logits/rejected": -2.970388412475586, "logps/chosen": -16.35747718811035, "logps/rejected": -131.7394256591797, "loss": 0.2819, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2537541389465332, "rewards/margins": 1.1856986284255981, "rewards/rejected": -0.9319443702697754, "step": 2590 }, { "epoch": 0.03, "learning_rate": 1.5561407708881974e-06, "logits/chosen": -3.0321152210235596, "logits/rejected": -3.0173726081848145, "logps/chosen": -15.540788650512695, "logps/rejected": -134.63296508789062, "loss": 0.2744, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2552971839904785, "rewards/margins": 1.2274082899093628, "rewards/rejected": -0.9721112251281738, "step": 2600 }, { "epoch": 0.03, "learning_rate": 1.562125927699306e-06, "logits/chosen": -3.0271284580230713, "logits/rejected": -2.9880056381225586, "logps/chosen": -39.278385162353516, "logps/rejected": -122.33724212646484, "loss": 0.2896, "rewards/accuracies": 1.0, "rewards/chosen": 0.2531677484512329, "rewards/margins": 1.0736925601959229, "rewards/rejected": -0.8205248117446899, "step": 2610 }, { "epoch": 0.03, "learning_rate": 1.5681110845104145e-06, "logits/chosen": -2.9998464584350586, "logits/rejected": -2.9709644317626953, "logps/chosen": -26.327098846435547, "logps/rejected": -131.97006225585938, "loss": 0.3009, "rewards/accuracies": 1.0, "rewards/chosen": 0.2491854876279831, "rewards/margins": 1.1799864768981934, "rewards/rejected": -0.9308007955551147, "step": 2620 }, { "epoch": 0.03, "learning_rate": 1.5740962413215228e-06, "logits/chosen": -3.0085442066192627, "logits/rejected": -2.9830596446990967, "logps/chosen": -20.752553939819336, "logps/rejected": -129.6886444091797, "loss": 0.3003, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23603525757789612, "rewards/margins": 1.1493254899978638, "rewards/rejected": -0.9132903814315796, "step": 2630 }, { "epoch": 0.03, "learning_rate": 1.5800813981326314e-06, "logits/chosen": -3.058830738067627, "logits/rejected": -3.0269086360931396, "logps/chosen": -27.915191650390625, "logps/rejected": -129.21664428710938, "loss": 0.2923, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2469487190246582, "rewards/margins": 1.1481273174285889, "rewards/rejected": -0.9011784791946411, "step": 2640 }, { "epoch": 0.03, "learning_rate": 1.5860665549437395e-06, "logits/chosen": -3.0091500282287598, "logits/rejected": -2.9950075149536133, "logps/chosen": -30.977718353271484, "logps/rejected": -118.20306396484375, "loss": 0.3402, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.09617109596729279, "rewards/margins": 0.8946582078933716, "rewards/rejected": -0.7984870672225952, "step": 2650 }, { "epoch": 0.03, "learning_rate": 1.592051711754848e-06, "logits/chosen": -3.0251338481903076, "logits/rejected": -2.9606285095214844, "logps/chosen": -34.73695755004883, "logps/rejected": -117.763671875, "loss": 0.3598, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2212434709072113, "rewards/margins": 0.9869757890701294, "rewards/rejected": -0.7657322883605957, "step": 2660 }, { "epoch": 0.03, "learning_rate": 1.5980368685659566e-06, "logits/chosen": -3.05200457572937, "logits/rejected": -2.986396312713623, "logps/chosen": -30.668338775634766, "logps/rejected": -118.41645812988281, "loss": 0.3093, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23941627144813538, "rewards/margins": 1.0332177877426147, "rewards/rejected": -0.7938016653060913, "step": 2670 }, { "epoch": 0.03, "learning_rate": 1.604022025377065e-06, "logits/chosen": -3.0125041007995605, "logits/rejected": -2.9539074897766113, "logps/chosen": -24.34776496887207, "logps/rejected": -124.12614440917969, "loss": 0.3085, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.2083745002746582, "rewards/margins": 1.060196876525879, "rewards/rejected": -0.8518223762512207, "step": 2680 }, { "epoch": 0.03, "learning_rate": 1.6100071821881735e-06, "logits/chosen": -3.011554718017578, "logits/rejected": -2.98128604888916, "logps/chosen": -39.933006286621094, "logps/rejected": -121.67118072509766, "loss": 0.3318, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.1563471406698227, "rewards/margins": 0.9792704582214355, "rewards/rejected": -0.8229233622550964, "step": 2690 }, { "epoch": 0.03, "learning_rate": 1.6159923389992818e-06, "logits/chosen": -3.024693250656128, "logits/rejected": -2.989534378051758, "logps/chosen": -22.058780670166016, "logps/rejected": -128.2741241455078, "loss": 0.3135, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2422667294740677, "rewards/margins": 1.1506534814834595, "rewards/rejected": -0.9083865880966187, "step": 2700 }, { "epoch": 0.03, "learning_rate": 1.6219774958103904e-06, "logits/chosen": -3.045319080352783, "logits/rejected": -2.9901633262634277, "logps/chosen": -20.95357322692871, "logps/rejected": -135.87709045410156, "loss": 0.2858, "rewards/accuracies": 1.0, "rewards/chosen": 0.2800377905368805, "rewards/margins": 1.237973928451538, "rewards/rejected": -0.9579361081123352, "step": 2710 }, { "epoch": 0.03, "learning_rate": 1.627962652621499e-06, "logits/chosen": -3.0634186267852783, "logits/rejected": -3.0353214740753174, "logps/chosen": -27.83573341369629, "logps/rejected": -127.72843933105469, "loss": 0.2975, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2134408950805664, "rewards/margins": 1.0854547023773193, "rewards/rejected": -0.8720137476921082, "step": 2720 }, { "epoch": 0.03, "learning_rate": 1.6339478094326073e-06, "logits/chosen": -3.0291786193847656, "logits/rejected": -3.0080113410949707, "logps/chosen": -16.622154235839844, "logps/rejected": -135.3357391357422, "loss": 0.284, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.24048760533332825, "rewards/margins": 1.2090933322906494, "rewards/rejected": -0.9686056971549988, "step": 2730 }, { "epoch": 0.03, "learning_rate": 1.6399329662437158e-06, "logits/chosen": -3.027923822402954, "logits/rejected": -3.0083470344543457, "logps/chosen": -18.38250732421875, "logps/rejected": -138.54794311523438, "loss": 0.2731, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2479892522096634, "rewards/margins": 1.2658799886703491, "rewards/rejected": -1.0178906917572021, "step": 2740 }, { "epoch": 0.03, "learning_rate": 1.645918123054824e-06, "logits/chosen": -2.9476230144500732, "logits/rejected": -2.9042487144470215, "logps/chosen": -24.54041290283203, "logps/rejected": -129.34945678710938, "loss": 0.3005, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2200092375278473, "rewards/margins": 1.1296135187149048, "rewards/rejected": -0.9096041917800903, "step": 2750 }, { "epoch": 0.03, "learning_rate": 1.6519032798659325e-06, "logits/chosen": -3.039025068283081, "logits/rejected": -2.996687412261963, "logps/chosen": -20.145885467529297, "logps/rejected": -132.4969024658203, "loss": 0.3293, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2504836916923523, "rewards/margins": 1.181280255317688, "rewards/rejected": -0.9307964444160461, "step": 2760 }, { "epoch": 0.03, "learning_rate": 1.6578884366770413e-06, "logits/chosen": -2.9903275966644287, "logits/rejected": -2.9687676429748535, "logps/chosen": -17.837120056152344, "logps/rejected": -139.8753662109375, "loss": 0.2702, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.25948816537857056, "rewards/margins": 1.2716140747070312, "rewards/rejected": -1.012126088142395, "step": 2770 }, { "epoch": 0.03, "learning_rate": 1.6638735934881494e-06, "logits/chosen": -3.017615795135498, "logits/rejected": -2.9658305644989014, "logps/chosen": -14.785379409790039, "logps/rejected": -140.3435821533203, "loss": 0.3109, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.238705113530159, "rewards/margins": 1.2612287998199463, "rewards/rejected": -1.0225236415863037, "step": 2780 }, { "epoch": 0.03, "learning_rate": 1.669858750299258e-06, "logits/chosen": -3.053385019302368, "logits/rejected": -2.9809672832489014, "logps/chosen": -21.347448348999023, "logps/rejected": -137.5646514892578, "loss": 0.3258, "rewards/accuracies": 1.0, "rewards/chosen": 0.2833128571510315, "rewards/margins": 1.2608550786972046, "rewards/rejected": -0.9775424003601074, "step": 2790 }, { "epoch": 0.03, "learning_rate": 1.6758439071103663e-06, "logits/chosen": -3.0182814598083496, "logits/rejected": -2.9799036979675293, "logps/chosen": -24.691192626953125, "logps/rejected": -137.3525390625, "loss": 0.3081, "rewards/accuracies": 1.0, "rewards/chosen": 0.25468170642852783, "rewards/margins": 1.2279119491577148, "rewards/rejected": -0.9732301831245422, "step": 2800 }, { "epoch": 0.03, "learning_rate": 1.6818290639214749e-06, "logits/chosen": -3.0290439128875732, "logits/rejected": -2.978914976119995, "logps/chosen": -27.688854217529297, "logps/rejected": -132.77125549316406, "loss": 0.2664, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.24808374047279358, "rewards/margins": 1.1636348962783813, "rewards/rejected": -0.9155510663986206, "step": 2810 }, { "epoch": 0.03, "learning_rate": 1.6878142207325834e-06, "logits/chosen": -3.034538745880127, "logits/rejected": -2.988266706466675, "logps/chosen": -21.057140350341797, "logps/rejected": -136.82391357421875, "loss": 0.2809, "rewards/accuracies": 1.0, "rewards/chosen": 0.2479582577943802, "rewards/margins": 1.2295602560043335, "rewards/rejected": -0.9816020727157593, "step": 2820 }, { "epoch": 0.03, "learning_rate": 1.6937993775436918e-06, "logits/chosen": -3.0259366035461426, "logits/rejected": -2.9686293601989746, "logps/chosen": -24.30469512939453, "logps/rejected": -131.84791564941406, "loss": 0.3083, "rewards/accuracies": 1.0, "rewards/chosen": 0.25996461510658264, "rewards/margins": 1.1597585678100586, "rewards/rejected": -0.8997939825057983, "step": 2830 }, { "epoch": 0.03, "learning_rate": 1.6997845343548003e-06, "logits/chosen": -2.9899816513061523, "logits/rejected": -2.9617831707000732, "logps/chosen": -27.32953453063965, "logps/rejected": -129.8206787109375, "loss": 0.3266, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.11940853297710419, "rewards/margins": 1.0448278188705444, "rewards/rejected": -0.9254193305969238, "step": 2840 }, { "epoch": 0.03, "learning_rate": 1.7057696911659087e-06, "logits/chosen": -3.0335898399353027, "logits/rejected": -2.9889001846313477, "logps/chosen": -16.77950668334961, "logps/rejected": -139.6163787841797, "loss": 0.3112, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2515026926994324, "rewards/margins": 1.2593716382980347, "rewards/rejected": -1.007869005203247, "step": 2850 }, { "epoch": 0.03, "learning_rate": 1.7117548479770172e-06, "logits/chosen": -2.9907844066619873, "logits/rejected": -2.9430716037750244, "logps/chosen": -21.677602767944336, "logps/rejected": -131.51095581054688, "loss": 0.3655, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2367161065340042, "rewards/margins": 1.1770168542861938, "rewards/rejected": -0.9403006434440613, "step": 2860 }, { "epoch": 0.03, "learning_rate": 1.7177400047881258e-06, "logits/chosen": -3.046492099761963, "logits/rejected": -3.011545181274414, "logps/chosen": -17.20138168334961, "logps/rejected": -141.84921264648438, "loss": 0.274, "rewards/accuracies": 1.0, "rewards/chosen": 0.27613699436187744, "rewards/margins": 1.2908488512039185, "rewards/rejected": -1.0147117376327515, "step": 2870 }, { "epoch": 0.03, "learning_rate": 1.723725161599234e-06, "logits/chosen": -3.040621757507324, "logits/rejected": -2.98626708984375, "logps/chosen": -36.44704055786133, "logps/rejected": -124.92594146728516, "loss": 0.2977, "rewards/accuracies": 1.0, "rewards/chosen": 0.2676721215248108, "rewards/margins": 1.0898289680480957, "rewards/rejected": -0.8221567273139954, "step": 2880 }, { "epoch": 0.03, "learning_rate": 1.7297103184103424e-06, "logits/chosen": -3.0358710289001465, "logits/rejected": -3.0098049640655518, "logps/chosen": -20.863231658935547, "logps/rejected": -133.98898315429688, "loss": 0.2573, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.25037452578544617, "rewards/margins": 1.2184934616088867, "rewards/rejected": -0.9681186676025391, "step": 2890 }, { "epoch": 0.03, "learning_rate": 1.7356954752214508e-06, "logits/chosen": -3.0406250953674316, "logits/rejected": -2.9657351970672607, "logps/chosen": -25.191341400146484, "logps/rejected": -131.3732147216797, "loss": 0.2718, "rewards/accuracies": 1.0, "rewards/chosen": 0.2552691102027893, "rewards/margins": 1.1702955961227417, "rewards/rejected": -0.9150264859199524, "step": 2900 }, { "epoch": 0.03, "learning_rate": 1.7416806320325593e-06, "logits/chosen": -3.0419692993164062, "logits/rejected": -3.0190365314483643, "logps/chosen": -18.14461898803711, "logps/rejected": -135.91172790527344, "loss": 0.2544, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.25036001205444336, "rewards/margins": 1.2348895072937012, "rewards/rejected": -0.9845294952392578, "step": 2910 }, { "epoch": 0.03, "learning_rate": 1.747665788843668e-06, "logits/chosen": -3.0287258625030518, "logits/rejected": -2.9810025691986084, "logps/chosen": -23.95147132873535, "logps/rejected": -135.60862731933594, "loss": 0.3146, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.21915681660175323, "rewards/margins": 1.1960879564285278, "rewards/rejected": -0.976931095123291, "step": 2920 }, { "epoch": 0.04, "learning_rate": 1.7536509456547762e-06, "logits/chosen": -3.0045688152313232, "logits/rejected": -2.964189052581787, "logps/chosen": -23.588953018188477, "logps/rejected": -141.66812133789062, "loss": 0.2712, "rewards/accuracies": 1.0, "rewards/chosen": 0.2707541584968567, "rewards/margins": 1.2904949188232422, "rewards/rejected": -1.0197408199310303, "step": 2930 }, { "epoch": 0.04, "learning_rate": 1.7596361024658848e-06, "logits/chosen": -3.0433499813079834, "logits/rejected": -3.0138468742370605, "logps/chosen": -13.291979789733887, "logps/rejected": -146.87535095214844, "loss": 0.283, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2608865201473236, "rewards/margins": 1.3465021848678589, "rewards/rejected": -1.0856157541275024, "step": 2940 }, { "epoch": 0.04, "learning_rate": 1.7656212592769931e-06, "logits/chosen": -3.029113292694092, "logits/rejected": -3.006244659423828, "logps/chosen": -27.9549503326416, "logps/rejected": -131.60964965820312, "loss": 0.2902, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.22395876049995422, "rewards/margins": 1.1571725606918335, "rewards/rejected": -0.9332138895988464, "step": 2950 }, { "epoch": 0.04, "learning_rate": 1.7716064160881017e-06, "logits/chosen": -3.0633862018585205, "logits/rejected": -3.0148215293884277, "logps/chosen": -32.29344177246094, "logps/rejected": -125.63896179199219, "loss": 0.3042, "rewards/accuracies": 1.0, "rewards/chosen": 0.25539228320121765, "rewards/margins": 1.096310019493103, "rewards/rejected": -0.8409177660942078, "step": 2960 }, { "epoch": 0.04, "learning_rate": 1.7775915728992102e-06, "logits/chosen": -3.025073528289795, "logits/rejected": -2.999619483947754, "logps/chosen": -29.34783363342285, "logps/rejected": -135.06634521484375, "loss": 0.3392, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20635969936847687, "rewards/margins": 1.1641494035720825, "rewards/rejected": -0.9577897191047668, "step": 2970 }, { "epoch": 0.04, "learning_rate": 1.7835767297103186e-06, "logits/chosen": -3.0508906841278076, "logits/rejected": -3.0127193927764893, "logps/chosen": -16.935970306396484, "logps/rejected": -140.4209442138672, "loss": 0.2898, "rewards/accuracies": 1.0, "rewards/chosen": 0.2758306562900543, "rewards/margins": 1.2999484539031982, "rewards/rejected": -1.0241178274154663, "step": 2980 }, { "epoch": 0.04, "learning_rate": 1.7895618865214271e-06, "logits/chosen": -3.017277240753174, "logits/rejected": -2.990163803100586, "logps/chosen": -12.317989349365234, "logps/rejected": -149.06263732910156, "loss": 0.2596, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2697729170322418, "rewards/margins": 1.3771603107452393, "rewards/rejected": -1.1073875427246094, "step": 2990 }, { "epoch": 0.04, "learning_rate": 1.7955470433325353e-06, "logits/chosen": -2.9774117469787598, "logits/rejected": -2.957369565963745, "logps/chosen": -13.104202270507812, "logps/rejected": -133.17393493652344, "loss": 0.2697, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.23500242829322815, "rewards/margins": 1.1851067543029785, "rewards/rejected": -0.9501043558120728, "step": 3000 }, { "epoch": 0.04, "eval_logits/chosen": -3.0861971378326416, "eval_logits/rejected": -3.0277702808380127, "eval_logps/chosen": -39.051849365234375, "eval_logps/rejected": -110.587646484375, "eval_loss": 0.33959078788757324, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.2212841510772705, "eval_rewards/margins": 0.8599068522453308, "eval_rewards/rejected": -0.6386227011680603, "eval_runtime": 1.2172, "eval_samples_per_second": 4.108, "eval_steps_per_second": 2.465, "step": 3000 }, { "epoch": 0.04, "learning_rate": 1.8015322001436438e-06, "logits/chosen": -3.0162038803100586, "logits/rejected": -2.9681029319763184, "logps/chosen": -14.130146980285645, "logps/rejected": -150.9864501953125, "loss": 0.2729, "rewards/accuracies": 1.0, "rewards/chosen": 0.29334306716918945, "rewards/margins": 1.4079563617706299, "rewards/rejected": -1.1146132946014404, "step": 3010 }, { "epoch": 0.04, "learning_rate": 1.8075173569547524e-06, "logits/chosen": -3.035358190536499, "logits/rejected": -2.9881043434143066, "logps/chosen": -34.47675704956055, "logps/rejected": -124.9987564086914, "loss": 0.3113, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20647363364696503, "rewards/margins": 1.061599850654602, "rewards/rejected": -0.8551262617111206, "step": 3020 }, { "epoch": 0.04, "learning_rate": 1.8135025137658607e-06, "logits/chosen": -3.063291311264038, "logits/rejected": -3.019822120666504, "logps/chosen": -20.469263076782227, "logps/rejected": -142.25071716308594, "loss": 0.2454, "rewards/accuracies": 1.0, "rewards/chosen": 0.28239917755126953, "rewards/margins": 1.3241589069366455, "rewards/rejected": -1.0417596101760864, "step": 3030 }, { "epoch": 0.04, "learning_rate": 1.8194876705769693e-06, "logits/chosen": -3.0360867977142334, "logits/rejected": -3.020159959793091, "logps/chosen": -3.798058271408081, "logps/rejected": -164.03082275390625, "loss": 0.2362, "rewards/accuracies": 1.0, "rewards/chosen": 0.30777621269226074, "rewards/margins": 1.5870437622070312, "rewards/rejected": -1.2792675495147705, "step": 3040 }, { "epoch": 0.04, "learning_rate": 1.8254728273880778e-06, "logits/chosen": -3.0070364475250244, "logits/rejected": -2.954711437225342, "logps/chosen": -30.136978149414062, "logps/rejected": -127.52079772949219, "loss": 0.3014, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.17139805853366852, "rewards/margins": 1.0673145055770874, "rewards/rejected": -0.8959164619445801, "step": 3050 }, { "epoch": 0.04, "learning_rate": 1.8314579841991862e-06, "logits/chosen": -3.051201343536377, "logits/rejected": -3.0026323795318604, "logps/chosen": -27.983875274658203, "logps/rejected": -142.89077758789062, "loss": 0.2558, "rewards/accuracies": 1.0, "rewards/chosen": 0.2732890248298645, "rewards/margins": 1.292039155960083, "rewards/rejected": -1.0187500715255737, "step": 3060 }, { "epoch": 0.04, "learning_rate": 1.8374431410102947e-06, "logits/chosen": -3.0450668334960938, "logits/rejected": -2.9902503490448, "logps/chosen": -32.1643180847168, "logps/rejected": -129.66085815429688, "loss": 0.3524, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.18661770224571228, "rewards/margins": 1.0795884132385254, "rewards/rejected": -0.8929705619812012, "step": 3070 }, { "epoch": 0.04, "learning_rate": 1.843428297821403e-06, "logits/chosen": -3.0315749645233154, "logits/rejected": -2.9933390617370605, "logps/chosen": -16.968990325927734, "logps/rejected": -151.94583129882812, "loss": 0.2297, "rewards/accuracies": 1.0, "rewards/chosen": 0.26194459199905396, "rewards/margins": 1.3891077041625977, "rewards/rejected": -1.1271631717681885, "step": 3080 }, { "epoch": 0.04, "learning_rate": 1.8494134546325116e-06, "logits/chosen": -3.0032825469970703, "logits/rejected": -2.9529318809509277, "logps/chosen": -21.6673641204834, "logps/rejected": -142.65957641601562, "loss": 0.2546, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2100980281829834, "rewards/margins": 1.2629717588424683, "rewards/rejected": -1.0528737306594849, "step": 3090 }, { "epoch": 0.04, "learning_rate": 1.8553986114436202e-06, "logits/chosen": -3.016291856765747, "logits/rejected": -2.983593225479126, "logps/chosen": -16.172954559326172, "logps/rejected": -141.650146484375, "loss": 0.2514, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.27831003069877625, "rewards/margins": 1.293750286102295, "rewards/rejected": -1.0154402256011963, "step": 3100 }, { "epoch": 0.04, "learning_rate": 1.8613837682547283e-06, "logits/chosen": -3.0376248359680176, "logits/rejected": -2.988473415374756, "logps/chosen": -23.512630462646484, "logps/rejected": -145.74951171875, "loss": 0.2461, "rewards/accuracies": 1.0, "rewards/chosen": 0.2760933041572571, "rewards/margins": 1.337992548942566, "rewards/rejected": -1.0618994235992432, "step": 3110 }, { "epoch": 0.04, "learning_rate": 1.8673689250658368e-06, "logits/chosen": -3.033390522003174, "logits/rejected": -2.9839603900909424, "logps/chosen": -14.573641777038574, "logps/rejected": -159.07675170898438, "loss": 0.2324, "rewards/accuracies": 1.0, "rewards/chosen": 0.27487507462501526, "rewards/margins": 1.4928557872772217, "rewards/rejected": -1.2179806232452393, "step": 3120 }, { "epoch": 0.04, "learning_rate": 1.8733540818769452e-06, "logits/chosen": -3.040773391723633, "logits/rejected": -2.9960408210754395, "logps/chosen": -15.569560050964355, "logps/rejected": -151.63555908203125, "loss": 0.2462, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.26086193323135376, "rewards/margins": 1.3978636264801025, "rewards/rejected": -1.137001633644104, "step": 3130 }, { "epoch": 0.04, "learning_rate": 1.8793392386880537e-06, "logits/chosen": -2.97284197807312, "logits/rejected": -2.936224937438965, "logps/chosen": -20.04423713684082, "logps/rejected": -139.63992309570312, "loss": 0.3068, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.20792222023010254, "rewards/margins": 1.2224562168121338, "rewards/rejected": -1.0145339965820312, "step": 3140 }, { "epoch": 0.04, "learning_rate": 1.8853243954991623e-06, "logits/chosen": -3.014707088470459, "logits/rejected": -2.9821250438690186, "logps/chosen": -15.3587646484375, "logps/rejected": -156.46388244628906, "loss": 0.2181, "rewards/accuracies": 1.0, "rewards/chosen": 0.2731040418148041, "rewards/margins": 1.4708775281906128, "rewards/rejected": -1.1977735757827759, "step": 3150 }, { "epoch": 0.04, "learning_rate": 1.8913095523102706e-06, "logits/chosen": -3.015742063522339, "logits/rejected": -2.9702348709106445, "logps/chosen": -25.295137405395508, "logps/rejected": -151.01974487304688, "loss": 0.2292, "rewards/accuracies": 1.0, "rewards/chosen": 0.28239375352859497, "rewards/margins": 1.407570719718933, "rewards/rejected": -1.1251771450042725, "step": 3160 }, { "epoch": 0.04, "learning_rate": 1.8972947091213792e-06, "logits/chosen": -3.0013880729675293, "logits/rejected": -2.980152130126953, "logps/chosen": -24.074310302734375, "logps/rejected": -144.8268280029297, "loss": 0.2955, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22853462398052216, "rewards/margins": 1.2824945449829102, "rewards/rejected": -1.0539599657058716, "step": 3170 }, { "epoch": 0.04, "learning_rate": 1.9032798659324875e-06, "logits/chosen": -3.018916606903076, "logits/rejected": -2.955052375793457, "logps/chosen": -17.454999923706055, "logps/rejected": -148.5893096923828, "loss": 0.2495, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.26234984397888184, "rewards/margins": 1.3472236394882202, "rewards/rejected": -1.084873914718628, "step": 3180 }, { "epoch": 0.04, "learning_rate": 1.909265022743596e-06, "logits/chosen": -2.983947992324829, "logits/rejected": -2.9652061462402344, "logps/chosen": -25.329174041748047, "logps/rejected": -147.1453399658203, "loss": 0.2562, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.1984163522720337, "rewards/margins": 1.280822992324829, "rewards/rejected": -1.0824064016342163, "step": 3190 }, { "epoch": 0.04, "learning_rate": 1.9152501795547044e-06, "logits/chosen": -3.007519245147705, "logits/rejected": -2.9754788875579834, "logps/chosen": -13.368730545043945, "logps/rejected": -153.67245483398438, "loss": 0.2473, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.26853078603744507, "rewards/margins": 1.4232908487319946, "rewards/rejected": -1.1547601222991943, "step": 3200 }, { "epoch": 0.04, "learning_rate": 1.921235336365813e-06, "logits/chosen": -3.0141613483428955, "logits/rejected": -2.982008457183838, "logps/chosen": -19.087390899658203, "logps/rejected": -158.61386108398438, "loss": 0.2167, "rewards/accuracies": 1.0, "rewards/chosen": 0.290615439414978, "rewards/margins": 1.4953975677490234, "rewards/rejected": -1.2047820091247559, "step": 3210 }, { "epoch": 0.04, "learning_rate": 1.9272204931769215e-06, "logits/chosen": -2.992335796356201, "logits/rejected": -2.9725708961486816, "logps/chosen": -13.750102043151855, "logps/rejected": -156.14862060546875, "loss": 0.2398, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21633939445018768, "rewards/margins": 1.4038242101669312, "rewards/rejected": -1.187484860420227, "step": 3220 }, { "epoch": 0.04, "learning_rate": 1.9332056499880297e-06, "logits/chosen": -3.005185127258301, "logits/rejected": -2.943711042404175, "logps/chosen": -29.43896484375, "logps/rejected": -137.00021362304688, "loss": 0.2779, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23182733356952667, "rewards/margins": 1.220719337463379, "rewards/rejected": -0.9888919591903687, "step": 3230 }, { "epoch": 0.04, "learning_rate": 1.939190806799138e-06, "logits/chosen": -2.9659595489501953, "logits/rejected": -2.945765972137451, "logps/chosen": -18.631893157958984, "logps/rejected": -159.7412872314453, "loss": 0.2388, "rewards/accuracies": 1.0, "rewards/chosen": 0.24409201741218567, "rewards/margins": 1.4625142812728882, "rewards/rejected": -1.2184221744537354, "step": 3240 }, { "epoch": 0.04, "learning_rate": 1.9451759636102468e-06, "logits/chosen": -3.0047128200531006, "logits/rejected": -2.9717369079589844, "logps/chosen": -14.842188835144043, "logps/rejected": -163.6923828125, "loss": 0.249, "rewards/accuracies": 1.0, "rewards/chosen": 0.28926950693130493, "rewards/margins": 1.5513780117034912, "rewards/rejected": -1.262108564376831, "step": 3250 }, { "epoch": 0.04, "learning_rate": 1.9511611204213553e-06, "logits/chosen": -2.998831272125244, "logits/rejected": -2.9653353691101074, "logps/chosen": -18.617218017578125, "logps/rejected": -151.1740264892578, "loss": 0.2648, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23024198412895203, "rewards/margins": 1.3574345111846924, "rewards/rejected": -1.127192497253418, "step": 3260 }, { "epoch": 0.04, "learning_rate": 1.957146277232464e-06, "logits/chosen": -3.0139849185943604, "logits/rejected": -2.9899184703826904, "logps/chosen": -13.159208297729492, "logps/rejected": -163.72042846679688, "loss": 0.216, "rewards/accuracies": 1.0, "rewards/chosen": 0.2761247456073761, "rewards/margins": 1.5177448987960815, "rewards/rejected": -1.2416203022003174, "step": 3270 }, { "epoch": 0.04, "learning_rate": 1.963131434043572e-06, "logits/chosen": -3.0243027210235596, "logits/rejected": -2.9672622680664062, "logps/chosen": -29.3509578704834, "logps/rejected": -154.52328491210938, "loss": 0.2477, "rewards/accuracies": 1.0, "rewards/chosen": 0.24281072616577148, "rewards/margins": 1.3863180875778198, "rewards/rejected": -1.1435073614120483, "step": 3280 }, { "epoch": 0.04, "learning_rate": 1.9691165908546806e-06, "logits/chosen": -3.0014846324920654, "logits/rejected": -2.9397830963134766, "logps/chosen": -27.796253204345703, "logps/rejected": -148.82615661621094, "loss": 0.2529, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20377317070960999, "rewards/margins": 1.291565179824829, "rewards/rejected": -1.0877920389175415, "step": 3290 }, { "epoch": 0.04, "learning_rate": 1.975101747665789e-06, "logits/chosen": -3.0269291400909424, "logits/rejected": -3.001868486404419, "logps/chosen": -17.91554069519043, "logps/rejected": -164.6310577392578, "loss": 0.2211, "rewards/accuracies": 1.0, "rewards/chosen": 0.28850993514060974, "rewards/margins": 1.5575488805770874, "rewards/rejected": -1.2690390348434448, "step": 3300 }, { "epoch": 0.04, "learning_rate": 1.9810869044768972e-06, "logits/chosen": -3.0562236309051514, "logits/rejected": -2.99239444732666, "logps/chosen": -32.0618782043457, "logps/rejected": -142.27743530273438, "loss": 0.2849, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.21689040958881378, "rewards/margins": 1.2460203170776367, "rewards/rejected": -1.029129981994629, "step": 3310 }, { "epoch": 0.04, "learning_rate": 1.987072061288006e-06, "logits/chosen": -3.0205283164978027, "logits/rejected": -2.955393075942993, "logps/chosen": -28.476476669311523, "logps/rejected": -155.41619873046875, "loss": 0.2954, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2336074411869049, "rewards/margins": 1.3813351392745972, "rewards/rejected": -1.147727608680725, "step": 3320 }, { "epoch": 0.04, "learning_rate": 1.9930572180991143e-06, "logits/chosen": -3.036764144897461, "logits/rejected": -2.99306583404541, "logps/chosen": -27.387109756469727, "logps/rejected": -154.47900390625, "loss": 0.2472, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.26181426644325256, "rewards/margins": 1.4060195684432983, "rewards/rejected": -1.1442053318023682, "step": 3330 }, { "epoch": 0.04, "learning_rate": 1.999042374910223e-06, "logits/chosen": -3.020017385482788, "logits/rejected": -2.9710402488708496, "logps/chosen": -26.039026260375977, "logps/rejected": -159.49510192871094, "loss": 0.1982, "rewards/accuracies": 1.0, "rewards/chosen": 0.26378577947616577, "rewards/margins": 1.4673207998275757, "rewards/rejected": -1.2035350799560547, "step": 3340 }, { "epoch": 0.04, "learning_rate": 2.0050275317213314e-06, "logits/chosen": -3.031615734100342, "logits/rejected": -2.979780912399292, "logps/chosen": -31.497173309326172, "logps/rejected": -167.32443237304688, "loss": 0.2299, "rewards/accuracies": 1.0, "rewards/chosen": 0.2479274570941925, "rewards/margins": 1.5177937746047974, "rewards/rejected": -1.2698662281036377, "step": 3350 }, { "epoch": 0.04, "learning_rate": 2.0110126885324396e-06, "logits/chosen": -3.0151429176330566, "logits/rejected": -2.976285934448242, "logps/chosen": -28.081857681274414, "logps/rejected": -162.0555419921875, "loss": 0.2176, "rewards/accuracies": 1.0, "rewards/chosen": 0.22215652465820312, "rewards/margins": 1.4553195238113403, "rewards/rejected": -1.2331628799438477, "step": 3360 }, { "epoch": 0.04, "learning_rate": 2.016997845343548e-06, "logits/chosen": -3.0264556407928467, "logits/rejected": -3.000546932220459, "logps/chosen": -24.136730194091797, "logps/rejected": -153.43258666992188, "loss": 0.2518, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.16773387789726257, "rewards/margins": 1.3264983892440796, "rewards/rejected": -1.1587644815444946, "step": 3370 }, { "epoch": 0.04, "learning_rate": 2.0229830021546567e-06, "logits/chosen": -3.0379977226257324, "logits/rejected": -2.991224527359009, "logps/chosen": -16.710529327392578, "logps/rejected": -158.4412384033203, "loss": 0.2467, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.26632508635520935, "rewards/margins": 1.4567108154296875, "rewards/rejected": -1.1903856992721558, "step": 3380 }, { "epoch": 0.04, "learning_rate": 2.0289681589657652e-06, "logits/chosen": -3.0193066596984863, "logits/rejected": -2.974607229232788, "logps/chosen": -22.588560104370117, "logps/rejected": -144.5328826904297, "loss": 0.2615, "rewards/accuracies": 1.0, "rewards/chosen": 0.23261824250221252, "rewards/margins": 1.3010348081588745, "rewards/rejected": -1.0684165954589844, "step": 3390 }, { "epoch": 0.04, "learning_rate": 2.034953315776874e-06, "logits/chosen": -2.995605945587158, "logits/rejected": -2.961679697036743, "logps/chosen": -16.814189910888672, "logps/rejected": -169.32089233398438, "loss": 0.2172, "rewards/accuracies": 1.0, "rewards/chosen": 0.2683697044849396, "rewards/margins": 1.589723825454712, "rewards/rejected": -1.3213541507720947, "step": 3400 }, { "epoch": 0.04, "learning_rate": 2.040938472587982e-06, "logits/chosen": -3.02864146232605, "logits/rejected": -3.012575626373291, "logps/chosen": -12.215537071228027, "logps/rejected": -175.76791381835938, "loss": 0.2882, "rewards/accuracies": 1.0, "rewards/chosen": 0.28346338868141174, "rewards/margins": 1.6784999370574951, "rewards/rejected": -1.3950364589691162, "step": 3410 }, { "epoch": 0.04, "learning_rate": 2.0469236293990905e-06, "logits/chosen": -3.015115261077881, "logits/rejected": -2.973571538925171, "logps/chosen": -18.395980834960938, "logps/rejected": -161.10682678222656, "loss": 0.2101, "rewards/accuracies": 1.0, "rewards/chosen": 0.27193260192871094, "rewards/margins": 1.494916558265686, "rewards/rejected": -1.2229838371276855, "step": 3420 }, { "epoch": 0.04, "learning_rate": 2.052908786210199e-06, "logits/chosen": -3.0430216789245605, "logits/rejected": -3.0198442935943604, "logps/chosen": -34.727691650390625, "logps/rejected": -140.4332733154297, "loss": 0.308, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.08825097233057022, "rewards/margins": 1.1134380102157593, "rewards/rejected": -1.0251870155334473, "step": 3430 }, { "epoch": 0.04, "learning_rate": 2.058893943021307e-06, "logits/chosen": -3.013122797012329, "logits/rejected": -2.9487171173095703, "logps/chosen": -26.908700942993164, "logps/rejected": -163.8384246826172, "loss": 0.2056, "rewards/accuracies": 1.0, "rewards/chosen": 0.2431071251630783, "rewards/margins": 1.4825162887573242, "rewards/rejected": -1.2394092082977295, "step": 3440 }, { "epoch": 0.04, "learning_rate": 2.0648790998324157e-06, "logits/chosen": -3.0386853218078613, "logits/rejected": -3.008478879928589, "logps/chosen": -41.251548767089844, "logps/rejected": -162.99423217773438, "loss": 0.2514, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.121795654296875, "rewards/margins": 1.3650665283203125, "rewards/rejected": -1.2432708740234375, "step": 3450 }, { "epoch": 0.04, "learning_rate": 2.0708642566435243e-06, "logits/chosen": -3.036235809326172, "logits/rejected": -2.974853992462158, "logps/chosen": -30.288055419921875, "logps/rejected": -150.36175537109375, "loss": 0.2695, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.14448951184749603, "rewards/margins": 1.2475007772445679, "rewards/rejected": -1.1030112504959106, "step": 3460 }, { "epoch": 0.04, "learning_rate": 2.076849413454633e-06, "logits/chosen": -3.0113046169281006, "logits/rejected": -2.9377517700195312, "logps/chosen": -18.4032039642334, "logps/rejected": -169.35643005371094, "loss": 0.2184, "rewards/accuracies": 1.0, "rewards/chosen": 0.2429407835006714, "rewards/margins": 1.5650510787963867, "rewards/rejected": -1.3221101760864258, "step": 3470 }, { "epoch": 0.04, "learning_rate": 2.0828345702657414e-06, "logits/chosen": -3.018512010574341, "logits/rejected": -2.9477105140686035, "logps/chosen": -35.876365661621094, "logps/rejected": -166.98863220214844, "loss": 0.2078, "rewards/accuracies": 1.0, "rewards/chosen": 0.2630768418312073, "rewards/margins": 1.5009419918060303, "rewards/rejected": -1.2378652095794678, "step": 3480 }, { "epoch": 0.04, "learning_rate": 2.0888197270768495e-06, "logits/chosen": -3.006671190261841, "logits/rejected": -2.9804506301879883, "logps/chosen": -19.809988021850586, "logps/rejected": -172.72036743164062, "loss": 0.1854, "rewards/accuracies": 1.0, "rewards/chosen": 0.2847282290458679, "rewards/margins": 1.63052499294281, "rewards/rejected": -1.345796823501587, "step": 3490 }, { "epoch": 0.04, "learning_rate": 2.094804883887958e-06, "logits/chosen": -3.01723051071167, "logits/rejected": -2.9871342182159424, "logps/chosen": -30.3463134765625, "logps/rejected": -170.92990112304688, "loss": 0.1905, "rewards/accuracies": 1.0, "rewards/chosen": 0.20323574542999268, "rewards/margins": 1.5289607048034668, "rewards/rejected": -1.3257248401641846, "step": 3500 }, { "epoch": 0.04, "learning_rate": 2.100790040699066e-06, "logits/chosen": -2.9806618690490723, "logits/rejected": -2.8894717693328857, "logps/chosen": -30.660236358642578, "logps/rejected": -155.3065948486328, "loss": 0.2271, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21662156283855438, "rewards/margins": 1.3709138631820679, "rewards/rejected": -1.154292345046997, "step": 3510 }, { "epoch": 0.04, "learning_rate": 2.106775197510175e-06, "logits/chosen": -3.040001392364502, "logits/rejected": -2.9355549812316895, "logps/chosen": -28.42267417907715, "logps/rejected": -164.1831512451172, "loss": 0.259, "rewards/accuracies": 1.0, "rewards/chosen": 0.26648837327957153, "rewards/margins": 1.496800422668457, "rewards/rejected": -1.2303121089935303, "step": 3520 }, { "epoch": 0.04, "learning_rate": 2.1127603543212837e-06, "logits/chosen": -3.031140089035034, "logits/rejected": -3.0002365112304688, "logps/chosen": -23.460153579711914, "logps/rejected": -168.14285278320312, "loss": 0.1879, "rewards/accuracies": 1.0, "rewards/chosen": 0.2562626302242279, "rewards/margins": 1.5488430261611938, "rewards/rejected": -1.2925803661346436, "step": 3530 }, { "epoch": 0.04, "learning_rate": 2.118745511132392e-06, "logits/chosen": -2.9970881938934326, "logits/rejected": -2.9576213359832764, "logps/chosen": -16.588134765625, "logps/rejected": -178.94277954101562, "loss": 0.2028, "rewards/accuracies": 1.0, "rewards/chosen": 0.2616289258003235, "rewards/margins": 1.6606266498565674, "rewards/rejected": -1.3989977836608887, "step": 3540 }, { "epoch": 0.04, "learning_rate": 2.1247306679435004e-06, "logits/chosen": -3.024984121322632, "logits/rejected": -2.978102207183838, "logps/chosen": -22.23536491394043, "logps/rejected": -172.95907592773438, "loss": 0.1972, "rewards/accuracies": 1.0, "rewards/chosen": 0.2575109302997589, "rewards/margins": 1.5978748798370361, "rewards/rejected": -1.34036386013031, "step": 3550 }, { "epoch": 0.04, "learning_rate": 2.1307158247546085e-06, "logits/chosen": -3.0141046047210693, "logits/rejected": -2.9800984859466553, "logps/chosen": -18.262741088867188, "logps/rejected": -174.53030395507812, "loss": 0.197, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.27159860730171204, "rewards/margins": 1.632464051246643, "rewards/rejected": -1.3608653545379639, "step": 3560 }, { "epoch": 0.04, "learning_rate": 2.136700981565717e-06, "logits/chosen": -3.018185615539551, "logits/rejected": -2.978658676147461, "logps/chosen": -22.561145782470703, "logps/rejected": -168.34799194335938, "loss": 0.2115, "rewards/accuracies": 1.0, "rewards/chosen": 0.24198894202709198, "rewards/margins": 1.538522720336914, "rewards/rejected": -1.2965338230133057, "step": 3570 }, { "epoch": 0.04, "learning_rate": 2.1426861383768256e-06, "logits/chosen": -3.0027143955230713, "logits/rejected": -2.9166641235351562, "logps/chosen": -23.4450626373291, "logps/rejected": -160.53529357910156, "loss": 0.2166, "rewards/accuracies": 1.0, "rewards/chosen": 0.24338987469673157, "rewards/margins": 1.4600751399993896, "rewards/rejected": -1.2166850566864014, "step": 3580 }, { "epoch": 0.04, "learning_rate": 2.148671295187934e-06, "logits/chosen": -3.0109405517578125, "logits/rejected": -2.9704995155334473, "logps/chosen": -30.363513946533203, "logps/rejected": -173.94873046875, "loss": 0.1933, "rewards/accuracies": 1.0, "rewards/chosen": 0.24023886024951935, "rewards/margins": 1.5750725269317627, "rewards/rejected": -1.3348337411880493, "step": 3590 }, { "epoch": 0.04, "learning_rate": 2.1546564519990427e-06, "logits/chosen": -2.9978106021881104, "logits/rejected": -2.9357523918151855, "logps/chosen": -18.98975944519043, "logps/rejected": -174.19834899902344, "loss": 0.1902, "rewards/accuracies": 1.0, "rewards/chosen": 0.25318285822868347, "rewards/margins": 1.6153976917266846, "rewards/rejected": -1.3622148036956787, "step": 3600 }, { "epoch": 0.04, "learning_rate": 2.160641608810151e-06, "logits/chosen": -3.0301241874694824, "logits/rejected": -3.0174367427825928, "logps/chosen": -28.887542724609375, "logps/rejected": -155.8094482421875, "loss": 0.2492, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.11318884789943695, "rewards/margins": 1.2915226221084595, "rewards/rejected": -1.1783336400985718, "step": 3610 }, { "epoch": 0.04, "learning_rate": 2.1666267656212594e-06, "logits/chosen": -3.0340161323547363, "logits/rejected": -2.97666335105896, "logps/chosen": -18.996780395507812, "logps/rejected": -175.41934204101562, "loss": 0.2106, "rewards/accuracies": 1.0, "rewards/chosen": 0.25835317373275757, "rewards/margins": 1.635952353477478, "rewards/rejected": -1.3775991201400757, "step": 3620 }, { "epoch": 0.04, "learning_rate": 2.172611922432368e-06, "logits/chosen": -3.039792537689209, "logits/rejected": -2.9951131343841553, "logps/chosen": -25.72396469116211, "logps/rejected": -177.6159210205078, "loss": 0.2286, "rewards/accuracies": 1.0, "rewards/chosen": 0.2356872260570526, "rewards/margins": 1.6101577281951904, "rewards/rejected": -1.3744707107543945, "step": 3630 }, { "epoch": 0.04, "learning_rate": 2.178597079243476e-06, "logits/chosen": -2.9734947681427, "logits/rejected": -2.938729763031006, "logps/chosen": -17.003353118896484, "logps/rejected": -181.80197143554688, "loss": 0.1891, "rewards/accuracies": 1.0, "rewards/chosen": 0.2756075859069824, "rewards/margins": 1.7028462886810303, "rewards/rejected": -1.4272388219833374, "step": 3640 }, { "epoch": 0.04, "learning_rate": 2.1845822360545847e-06, "logits/chosen": -3.0027554035186768, "logits/rejected": -2.9583654403686523, "logps/chosen": -24.7410831451416, "logps/rejected": -178.19882202148438, "loss": 0.2088, "rewards/accuracies": 1.0, "rewards/chosen": 0.22115226089954376, "rewards/margins": 1.5980578660964966, "rewards/rejected": -1.3769056797027588, "step": 3650 }, { "epoch": 0.04, "learning_rate": 2.1905673928656932e-06, "logits/chosen": -3.0462937355041504, "logits/rejected": -3.000284194946289, "logps/chosen": -27.694082260131836, "logps/rejected": -170.98361206054688, "loss": 0.2132, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20206239819526672, "rewards/margins": 1.5274598598480225, "rewards/rejected": -1.3253973722457886, "step": 3660 }, { "epoch": 0.04, "learning_rate": 2.1965525496768018e-06, "logits/chosen": -3.0614452362060547, "logits/rejected": -3.031022787094116, "logps/chosen": -23.17986488342285, "logps/rejected": -169.23565673828125, "loss": 0.2121, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20214739441871643, "rewards/margins": 1.5070797204971313, "rewards/rejected": -1.3049323558807373, "step": 3670 }, { "epoch": 0.04, "learning_rate": 2.2025377064879103e-06, "logits/chosen": -3.027392864227295, "logits/rejected": -3.003615140914917, "logps/chosen": -14.372332572937012, "logps/rejected": -183.09332275390625, "loss": 0.2395, "rewards/accuracies": 1.0, "rewards/chosen": 0.24860744178295135, "rewards/margins": 1.6955868005752563, "rewards/rejected": -1.446979284286499, "step": 3680 }, { "epoch": 0.04, "learning_rate": 2.2085228632990184e-06, "logits/chosen": -2.996262788772583, "logits/rejected": -2.9277377128601074, "logps/chosen": -25.08869171142578, "logps/rejected": -183.95664978027344, "loss": 0.1926, "rewards/accuracies": 1.0, "rewards/chosen": 0.2329518347978592, "rewards/margins": 1.6900138854980469, "rewards/rejected": -1.4570623636245728, "step": 3690 }, { "epoch": 0.04, "learning_rate": 2.214508020110127e-06, "logits/chosen": -3.0200729370117188, "logits/rejected": -2.9452321529388428, "logps/chosen": -25.757877349853516, "logps/rejected": -179.65484619140625, "loss": 0.2426, "rewards/accuracies": 1.0, "rewards/chosen": 0.25490468740463257, "rewards/margins": 1.6521589756011963, "rewards/rejected": -1.397254228591919, "step": 3700 }, { "epoch": 0.04, "learning_rate": 2.2204931769212356e-06, "logits/chosen": -2.964477300643921, "logits/rejected": -2.9018239974975586, "logps/chosen": -33.00424575805664, "logps/rejected": -182.91571044921875, "loss": 0.2354, "rewards/accuracies": 1.0, "rewards/chosen": 0.219386488199234, "rewards/margins": 1.6318881511688232, "rewards/rejected": -1.4125016927719116, "step": 3710 }, { "epoch": 0.04, "learning_rate": 2.226478333732344e-06, "logits/chosen": -3.004276752471924, "logits/rejected": -2.968533515930176, "logps/chosen": -21.244699478149414, "logps/rejected": -186.30856323242188, "loss": 0.2085, "rewards/accuracies": 1.0, "rewards/chosen": 0.24184031784534454, "rewards/margins": 1.7205009460449219, "rewards/rejected": -1.4786603450775146, "step": 3720 }, { "epoch": 0.04, "learning_rate": 2.2324634905434527e-06, "logits/chosen": -2.970311164855957, "logits/rejected": -2.902791976928711, "logps/chosen": -34.70060348510742, "logps/rejected": -177.0550537109375, "loss": 0.2136, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1614995300769806, "rewards/margins": 1.529625654220581, "rewards/rejected": -1.3681261539459229, "step": 3730 }, { "epoch": 0.04, "learning_rate": 2.238448647354561e-06, "logits/chosen": -3.0833349227905273, "logits/rejected": -3.031628131866455, "logps/chosen": -37.24443435668945, "logps/rejected": -169.77328491210938, "loss": 0.2576, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.15688081085681915, "rewards/margins": 1.4588451385498047, "rewards/rejected": -1.301964521408081, "step": 3740 }, { "epoch": 0.04, "learning_rate": 2.2444338041656693e-06, "logits/chosen": -3.0388777256011963, "logits/rejected": -2.9973158836364746, "logps/chosen": -32.29994201660156, "logps/rejected": -182.366455078125, "loss": 0.1987, "rewards/accuracies": 1.0, "rewards/chosen": 0.2543586194515228, "rewards/margins": 1.6664354801177979, "rewards/rejected": -1.412076711654663, "step": 3750 }, { "epoch": 0.05, "learning_rate": 2.2504189609767775e-06, "logits/chosen": -3.03171968460083, "logits/rejected": -3.018986225128174, "logps/chosen": -32.87628173828125, "logps/rejected": -170.8393096923828, "loss": 0.2189, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16419292986392975, "rewards/margins": 1.490230917930603, "rewards/rejected": -1.326038122177124, "step": 3760 }, { "epoch": 0.05, "learning_rate": 2.256404117787886e-06, "logits/chosen": -3.0313048362731934, "logits/rejected": -2.9531688690185547, "logps/chosen": -27.86349105834961, "logps/rejected": -183.38668823242188, "loss": 0.1851, "rewards/accuracies": 1.0, "rewards/chosen": 0.1806994378566742, "rewards/margins": 1.5964241027832031, "rewards/rejected": -1.415724515914917, "step": 3770 }, { "epoch": 0.05, "learning_rate": 2.2623892745989946e-06, "logits/chosen": -3.0480120182037354, "logits/rejected": -3.0010106563568115, "logps/chosen": -26.520095825195312, "logps/rejected": -180.804443359375, "loss": 0.177, "rewards/accuracies": 1.0, "rewards/chosen": 0.2288721799850464, "rewards/margins": 1.6517871618270874, "rewards/rejected": -1.4229151010513306, "step": 3780 }, { "epoch": 0.05, "learning_rate": 2.268374431410103e-06, "logits/chosen": -3.0362484455108643, "logits/rejected": -2.996431827545166, "logps/chosen": -16.387229919433594, "logps/rejected": -182.18240356445312, "loss": 0.1829, "rewards/accuracies": 1.0, "rewards/chosen": 0.25131839513778687, "rewards/margins": 1.6998382806777954, "rewards/rejected": -1.4485198259353638, "step": 3790 }, { "epoch": 0.05, "learning_rate": 2.2743595882212117e-06, "logits/chosen": -3.025073528289795, "logits/rejected": -2.958331346511841, "logps/chosen": -31.39395523071289, "logps/rejected": -187.15924072265625, "loss": 0.1849, "rewards/accuracies": 1.0, "rewards/chosen": 0.2343076765537262, "rewards/margins": 1.6892579793930054, "rewards/rejected": -1.454950213432312, "step": 3800 }, { "epoch": 0.05, "learning_rate": 2.28034474503232e-06, "logits/chosen": -3.0371041297912598, "logits/rejected": -2.9931435585021973, "logps/chosen": -40.19232940673828, "logps/rejected": -172.177001953125, "loss": 0.3042, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.035463444888591766, "rewards/margins": 1.3655166625976562, "rewards/rejected": -1.3300530910491943, "step": 3810 }, { "epoch": 0.05, "learning_rate": 2.2863299018434284e-06, "logits/chosen": -3.0326454639434814, "logits/rejected": -3.014432430267334, "logps/chosen": -15.559301376342773, "logps/rejected": -176.56393432617188, "loss": 0.1929, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20809869468212128, "rewards/margins": 1.6154474020004272, "rewards/rejected": -1.4073487520217896, "step": 3820 }, { "epoch": 0.05, "learning_rate": 2.292315058654537e-06, "logits/chosen": -3.031700372695923, "logits/rejected": -3.0094103813171387, "logps/chosen": -21.905309677124023, "logps/rejected": -180.9680938720703, "loss": 0.2329, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20967236161231995, "rewards/margins": 1.645369291305542, "rewards/rejected": -1.4356971979141235, "step": 3830 }, { "epoch": 0.05, "learning_rate": 2.2983002154656455e-06, "logits/chosen": -3.0191500186920166, "logits/rejected": -2.961043357849121, "logps/chosen": -21.418033599853516, "logps/rejected": -186.59007263183594, "loss": 0.2106, "rewards/accuracies": 1.0, "rewards/chosen": 0.24487009644508362, "rewards/margins": 1.732086181640625, "rewards/rejected": -1.4872162342071533, "step": 3840 }, { "epoch": 0.05, "learning_rate": 2.304285372276754e-06, "logits/chosen": -3.0033013820648193, "logits/rejected": -2.969338893890381, "logps/chosen": -15.262086868286133, "logps/rejected": -178.21505737304688, "loss": 0.1991, "rewards/accuracies": 1.0, "rewards/chosen": 0.24833464622497559, "rewards/margins": 1.6404072046279907, "rewards/rejected": -1.3920725584030151, "step": 3850 }, { "epoch": 0.05, "learning_rate": 2.3102705290878626e-06, "logits/chosen": -3.016700267791748, "logits/rejected": -2.9669740200042725, "logps/chosen": -30.00479507446289, "logps/rejected": -185.43743896484375, "loss": 0.191, "rewards/accuracies": 1.0, "rewards/chosen": 0.2172819823026657, "rewards/margins": 1.6806118488311768, "rewards/rejected": -1.4633299112319946, "step": 3860 }, { "epoch": 0.05, "learning_rate": 2.3162556858989707e-06, "logits/chosen": -3.0235636234283447, "logits/rejected": -2.9732093811035156, "logps/chosen": -37.94096755981445, "logps/rejected": -190.5591278076172, "loss": 0.1673, "rewards/accuracies": 1.0, "rewards/chosen": 0.22312462329864502, "rewards/margins": 1.7061941623687744, "rewards/rejected": -1.4830694198608398, "step": 3870 }, { "epoch": 0.05, "learning_rate": 2.3222408427100793e-06, "logits/chosen": -3.0298819541931152, "logits/rejected": -2.9737322330474854, "logps/chosen": -15.29656982421875, "logps/rejected": -183.5049285888672, "loss": 0.1658, "rewards/accuracies": 1.0, "rewards/chosen": 0.2559012174606323, "rewards/margins": 1.699406385421753, "rewards/rejected": -1.443505048751831, "step": 3880 }, { "epoch": 0.05, "learning_rate": 2.3282259995211874e-06, "logits/chosen": -3.0029475688934326, "logits/rejected": -2.9317855834960938, "logps/chosen": -27.872146606445312, "logps/rejected": -185.20404052734375, "loss": 0.1751, "rewards/accuracies": 1.0, "rewards/chosen": 0.25671228766441345, "rewards/margins": 1.709874153137207, "rewards/rejected": -1.4531619548797607, "step": 3890 }, { "epoch": 0.05, "learning_rate": 2.334211156332296e-06, "logits/chosen": -2.9860777854919434, "logits/rejected": -2.9219727516174316, "logps/chosen": -19.751522064208984, "logps/rejected": -176.13197326660156, "loss": 0.1921, "rewards/accuracies": 1.0, "rewards/chosen": 0.2324783056974411, "rewards/margins": 1.6217149496078491, "rewards/rejected": -1.389236569404602, "step": 3900 }, { "epoch": 0.05, "learning_rate": 2.3401963131434045e-06, "logits/chosen": -3.067779541015625, "logits/rejected": -3.000497341156006, "logps/chosen": -36.36333084106445, "logps/rejected": -188.21392822265625, "loss": 0.1692, "rewards/accuracies": 1.0, "rewards/chosen": 0.19816352427005768, "rewards/margins": 1.6790615320205688, "rewards/rejected": -1.4808977842330933, "step": 3910 }, { "epoch": 0.05, "learning_rate": 2.346181469954513e-06, "logits/chosen": -3.048191785812378, "logits/rejected": -2.9849746227264404, "logps/chosen": -22.810361862182617, "logps/rejected": -174.45921325683594, "loss": 0.1994, "rewards/accuracies": 1.0, "rewards/chosen": 0.2285373955965042, "rewards/margins": 1.588073968887329, "rewards/rejected": -1.3595366477966309, "step": 3920 }, { "epoch": 0.05, "learning_rate": 2.3521666267656216e-06, "logits/chosen": -2.9986793994903564, "logits/rejected": -2.961942195892334, "logps/chosen": -27.662038803100586, "logps/rejected": -174.9197235107422, "loss": 0.1819, "rewards/accuracies": 1.0, "rewards/chosen": 0.2167518138885498, "rewards/margins": 1.5659816265106201, "rewards/rejected": -1.3492298126220703, "step": 3930 }, { "epoch": 0.05, "learning_rate": 2.3581517835767297e-06, "logits/chosen": -3.0045359134674072, "logits/rejected": -2.965949535369873, "logps/chosen": -19.152626037597656, "logps/rejected": -193.96630859375, "loss": 0.1877, "rewards/accuracies": 1.0, "rewards/chosen": 0.23993580043315887, "rewards/margins": 1.7944526672363281, "rewards/rejected": -1.5545170307159424, "step": 3940 }, { "epoch": 0.05, "learning_rate": 2.3641369403878383e-06, "logits/chosen": -3.043207883834839, "logits/rejected": -2.9763407707214355, "logps/chosen": -42.410560607910156, "logps/rejected": -177.73458862304688, "loss": 0.2096, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.118630550801754, "rewards/margins": 1.497527003288269, "rewards/rejected": -1.3788964748382568, "step": 3950 }, { "epoch": 0.05, "learning_rate": 2.370122097198947e-06, "logits/chosen": -2.9799935817718506, "logits/rejected": -2.9479668140411377, "logps/chosen": -23.38645362854004, "logps/rejected": -188.64537048339844, "loss": 0.2026, "rewards/accuracies": 1.0, "rewards/chosen": 0.22551587224006653, "rewards/margins": 1.7143476009368896, "rewards/rejected": -1.4888317584991455, "step": 3960 }, { "epoch": 0.05, "learning_rate": 2.3761072540100554e-06, "logits/chosen": -3.0269858837127686, "logits/rejected": -2.954918622970581, "logps/chosen": -29.545400619506836, "logps/rejected": -173.5918426513672, "loss": 0.1841, "rewards/accuracies": 1.0, "rewards/chosen": 0.21968121826648712, "rewards/margins": 1.5558184385299683, "rewards/rejected": -1.3361371755599976, "step": 3970 }, { "epoch": 0.05, "learning_rate": 2.382092410821164e-06, "logits/chosen": -2.9771664142608643, "logits/rejected": -2.9368205070495605, "logps/chosen": -25.234119415283203, "logps/rejected": -175.41725158691406, "loss": 0.2068, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.2039385288953781, "rewards/margins": 1.5812346935272217, "rewards/rejected": -1.3772960901260376, "step": 3980 }, { "epoch": 0.05, "learning_rate": 2.388077567632272e-06, "logits/chosen": -3.013758420944214, "logits/rejected": -2.9786770343780518, "logps/chosen": -27.002056121826172, "logps/rejected": -180.91879272460938, "loss": 0.2302, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2012016326189041, "rewards/margins": 1.630549669265747, "rewards/rejected": -1.4293477535247803, "step": 3990 }, { "epoch": 0.05, "learning_rate": 2.3940627244433806e-06, "logits/chosen": -2.996492862701416, "logits/rejected": -2.941807270050049, "logps/chosen": -23.780715942382812, "logps/rejected": -173.12933349609375, "loss": 0.1848, "rewards/accuracies": 1.0, "rewards/chosen": 0.2434101104736328, "rewards/margins": 1.5672833919525146, "rewards/rejected": -1.323873519897461, "step": 4000 }, { "epoch": 0.05, "learning_rate": 2.400047881254489e-06, "logits/chosen": -3.0485191345214844, "logits/rejected": -3.0169951915740967, "logps/chosen": -27.312280654907227, "logps/rejected": -178.7981719970703, "loss": 0.2152, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16287139058113098, "rewards/margins": 1.5690795183181763, "rewards/rejected": -1.4062081575393677, "step": 4010 }, { "epoch": 0.05, "learning_rate": 2.4060330380655973e-06, "logits/chosen": -3.024501323699951, "logits/rejected": -2.9704525470733643, "logps/chosen": -17.354808807373047, "logps/rejected": -193.17202758789062, "loss": 0.2021, "rewards/accuracies": 1.0, "rewards/chosen": 0.23338481783866882, "rewards/margins": 1.7841707468032837, "rewards/rejected": -1.550785779953003, "step": 4020 }, { "epoch": 0.05, "learning_rate": 2.412018194876706e-06, "logits/chosen": -3.04306960105896, "logits/rejected": -2.9877665042877197, "logps/chosen": -24.063674926757812, "logps/rejected": -194.27415466308594, "loss": 0.1594, "rewards/accuracies": 1.0, "rewards/chosen": 0.22210931777954102, "rewards/margins": 1.792248010635376, "rewards/rejected": -1.5701385736465454, "step": 4030 }, { "epoch": 0.05, "learning_rate": 2.4180033516878144e-06, "logits/chosen": -3.0039379596710205, "logits/rejected": -2.968193292617798, "logps/chosen": -27.833927154541016, "logps/rejected": -189.20242309570312, "loss": 0.2065, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16213935613632202, "rewards/margins": 1.674612283706665, "rewards/rejected": -1.5124731063842773, "step": 4040 }, { "epoch": 0.05, "learning_rate": 2.423988508498923e-06, "logits/chosen": -3.040409803390503, "logits/rejected": -2.985712766647339, "logps/chosen": -33.365928649902344, "logps/rejected": -187.65098571777344, "loss": 0.2071, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16489750146865845, "rewards/margins": 1.645533800125122, "rewards/rejected": -1.4806363582611084, "step": 4050 }, { "epoch": 0.05, "learning_rate": 2.4299736653100315e-06, "logits/chosen": -2.988642454147339, "logits/rejected": -2.9567673206329346, "logps/chosen": -23.80105209350586, "logps/rejected": -193.24307250976562, "loss": 0.1653, "rewards/accuracies": 1.0, "rewards/chosen": 0.21959857642650604, "rewards/margins": 1.768031358718872, "rewards/rejected": -1.5484325885772705, "step": 4060 }, { "epoch": 0.05, "learning_rate": 2.4359588221211397e-06, "logits/chosen": -3.031162738800049, "logits/rejected": -3.0110249519348145, "logps/chosen": -17.276533126831055, "logps/rejected": -188.247802734375, "loss": 0.1873, "rewards/accuracies": 1.0, "rewards/chosen": 0.257924884557724, "rewards/margins": 1.7607948780059814, "rewards/rejected": -1.5028698444366455, "step": 4070 }, { "epoch": 0.05, "learning_rate": 2.4419439789322482e-06, "logits/chosen": -3.0070793628692627, "logits/rejected": -2.954540967941284, "logps/chosen": -27.393375396728516, "logps/rejected": -193.6285858154297, "loss": 0.1568, "rewards/accuracies": 1.0, "rewards/chosen": 0.20358674228191376, "rewards/margins": 1.7375099658966064, "rewards/rejected": -1.5339230298995972, "step": 4080 }, { "epoch": 0.05, "learning_rate": 2.4479291357433568e-06, "logits/chosen": -3.038128614425659, "logits/rejected": -2.9582622051239014, "logps/chosen": -23.504650115966797, "logps/rejected": -187.48159790039062, "loss": 0.1801, "rewards/accuracies": 1.0, "rewards/chosen": 0.1972036212682724, "rewards/margins": 1.6855065822601318, "rewards/rejected": -1.4883029460906982, "step": 4090 }, { "epoch": 0.05, "learning_rate": 2.4539142925544653e-06, "logits/chosen": -3.0096304416656494, "logits/rejected": -2.972506046295166, "logps/chosen": -30.96630859375, "logps/rejected": -183.35023498535156, "loss": 0.1791, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.15641745924949646, "rewards/margins": 1.6054861545562744, "rewards/rejected": -1.449068546295166, "step": 4100 }, { "epoch": 0.05, "learning_rate": 2.459899449365574e-06, "logits/chosen": -3.002368927001953, "logits/rejected": -2.9510464668273926, "logps/chosen": -22.80338478088379, "logps/rejected": -191.04808044433594, "loss": 0.2528, "rewards/accuracies": 1.0, "rewards/chosen": 0.22675231099128723, "rewards/margins": 1.7371180057525635, "rewards/rejected": -1.5103657245635986, "step": 4110 }, { "epoch": 0.05, "learning_rate": 2.465884606176682e-06, "logits/chosen": -3.0483222007751465, "logits/rejected": -2.997936487197876, "logps/chosen": -42.82896423339844, "logps/rejected": -174.08265686035156, "loss": 0.2762, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.07538391649723053, "rewards/margins": 1.4182995557785034, "rewards/rejected": -1.342915654182434, "step": 4120 }, { "epoch": 0.05, "learning_rate": 2.4718697629877906e-06, "logits/chosen": -3.018192768096924, "logits/rejected": -2.948568820953369, "logps/chosen": -31.202585220336914, "logps/rejected": -185.79074096679688, "loss": 0.1885, "rewards/accuracies": 1.0, "rewards/chosen": 0.1758287400007248, "rewards/margins": 1.6335407495498657, "rewards/rejected": -1.457711935043335, "step": 4130 }, { "epoch": 0.05, "learning_rate": 2.4778549197988987e-06, "logits/chosen": -3.0415616035461426, "logits/rejected": -2.9649243354797363, "logps/chosen": -30.43827247619629, "logps/rejected": -191.24020385742188, "loss": 0.1622, "rewards/accuracies": 1.0, "rewards/chosen": 0.20951130986213684, "rewards/margins": 1.7317005395889282, "rewards/rejected": -1.5221890211105347, "step": 4140 }, { "epoch": 0.05, "learning_rate": 2.4838400766100072e-06, "logits/chosen": -2.9892868995666504, "logits/rejected": -2.960529088973999, "logps/chosen": -14.706756591796875, "logps/rejected": -193.02928161621094, "loss": 0.2035, "rewards/accuracies": 1.0, "rewards/chosen": 0.25603383779525757, "rewards/margins": 1.7977157831192017, "rewards/rejected": -1.5416817665100098, "step": 4150 }, { "epoch": 0.05, "learning_rate": 2.489825233421116e-06, "logits/chosen": -3.0471906661987305, "logits/rejected": -3.002990245819092, "logps/chosen": -36.94972610473633, "logps/rejected": -186.06350708007812, "loss": 0.1968, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1628929078578949, "rewards/margins": 1.6203393936157227, "rewards/rejected": -1.4574464559555054, "step": 4160 }, { "epoch": 0.05, "learning_rate": 2.4958103902322243e-06, "logits/chosen": -2.9888482093811035, "logits/rejected": -2.9414820671081543, "logps/chosen": -23.892906188964844, "logps/rejected": -186.7390899658203, "loss": 0.1824, "rewards/accuracies": 1.0, "rewards/chosen": 0.2247171401977539, "rewards/margins": 1.6976890563964844, "rewards/rejected": -1.472971796989441, "step": 4170 }, { "epoch": 0.05, "learning_rate": 2.5017955470433325e-06, "logits/chosen": -3.0288729667663574, "logits/rejected": -2.927828073501587, "logps/chosen": -30.393539428710938, "logps/rejected": -193.70150756835938, "loss": 0.1668, "rewards/accuracies": 1.0, "rewards/chosen": 0.1916079819202423, "rewards/margins": 1.745266318321228, "rewards/rejected": -1.5536582469940186, "step": 4180 }, { "epoch": 0.05, "learning_rate": 2.507780703854441e-06, "logits/chosen": -2.9919698238372803, "logits/rejected": -2.9151759147644043, "logps/chosen": -38.001068115234375, "logps/rejected": -188.95657348632812, "loss": 0.2159, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16816338896751404, "rewards/margins": 1.6673095226287842, "rewards/rejected": -1.4991463422775269, "step": 4190 }, { "epoch": 0.05, "learning_rate": 2.5137658606655496e-06, "logits/chosen": -3.034090995788574, "logits/rejected": -2.9605956077575684, "logps/chosen": -22.88013458251953, "logps/rejected": -199.9810028076172, "loss": 0.1767, "rewards/accuracies": 1.0, "rewards/chosen": 0.2511909306049347, "rewards/margins": 1.8593311309814453, "rewards/rejected": -1.608139991760254, "step": 4200 }, { "epoch": 0.05, "learning_rate": 2.519751017476658e-06, "logits/chosen": -3.0287506580352783, "logits/rejected": -2.9918220043182373, "logps/chosen": -33.8594970703125, "logps/rejected": -193.82986450195312, "loss": 0.2092, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.11186392605304718, "rewards/margins": 1.6556951999664307, "rewards/rejected": -1.5438312292099, "step": 4210 }, { "epoch": 0.05, "learning_rate": 2.5257361742877667e-06, "logits/chosen": -2.997973918914795, "logits/rejected": -2.9527182579040527, "logps/chosen": -40.4965934753418, "logps/rejected": -184.5731658935547, "loss": 0.238, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.06997165083885193, "rewards/margins": 1.515535593032837, "rewards/rejected": -1.4455640316009521, "step": 4220 }, { "epoch": 0.05, "learning_rate": 2.531721331098875e-06, "logits/chosen": -3.0340943336486816, "logits/rejected": -2.953610420227051, "logps/chosen": -41.50559616088867, "logps/rejected": -183.1685791015625, "loss": 0.2227, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.12986589968204498, "rewards/margins": 1.5503849983215332, "rewards/rejected": -1.420519232749939, "step": 4230 }, { "epoch": 0.05, "learning_rate": 2.5377064879099834e-06, "logits/chosen": -3.077301502227783, "logits/rejected": -3.057396173477173, "logps/chosen": -25.889291763305664, "logps/rejected": -184.1664276123047, "loss": 0.2144, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.17228463292121887, "rewards/margins": 1.6407897472381592, "rewards/rejected": -1.4685051441192627, "step": 4240 }, { "epoch": 0.05, "learning_rate": 2.543691644721092e-06, "logits/chosen": -2.9859251976013184, "logits/rejected": -2.8972866535186768, "logps/chosen": -33.6193733215332, "logps/rejected": -195.65501403808594, "loss": 0.1819, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.17974448204040527, "rewards/margins": 1.7386964559555054, "rewards/rejected": -1.5589520931243896, "step": 4250 }, { "epoch": 0.05, "learning_rate": 2.5496768015322005e-06, "logits/chosen": -3.0136332511901855, "logits/rejected": -2.927426338195801, "logps/chosen": -22.525623321533203, "logps/rejected": -180.76034545898438, "loss": 0.2131, "rewards/accuracies": 1.0, "rewards/chosen": 0.24218937754631042, "rewards/margins": 1.658588171005249, "rewards/rejected": -1.4163988828659058, "step": 4260 }, { "epoch": 0.05, "learning_rate": 2.555661958343309e-06, "logits/chosen": -3.0308938026428223, "logits/rejected": -2.989293336868286, "logps/chosen": -22.72418212890625, "logps/rejected": -196.11805725097656, "loss": 0.1693, "rewards/accuracies": 1.0, "rewards/chosen": 0.20103387534618378, "rewards/margins": 1.7839730978012085, "rewards/rejected": -1.5829391479492188, "step": 4270 }, { "epoch": 0.05, "learning_rate": 2.561647115154417e-06, "logits/chosen": -3.0348572731018066, "logits/rejected": -2.9805808067321777, "logps/chosen": -34.665428161621094, "logps/rejected": -197.5814208984375, "loss": 0.2054, "rewards/accuracies": 1.0, "rewards/chosen": 0.18547627329826355, "rewards/margins": 1.7636770009994507, "rewards/rejected": -1.5782009363174438, "step": 4280 }, { "epoch": 0.05, "learning_rate": 2.5676322719655257e-06, "logits/chosen": -3.0289978981018066, "logits/rejected": -2.9846315383911133, "logps/chosen": -28.482751846313477, "logps/rejected": -195.650146484375, "loss": 0.1903, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.18103131651878357, "rewards/margins": 1.7559255361557007, "rewards/rejected": -1.5748941898345947, "step": 4290 }, { "epoch": 0.05, "learning_rate": 2.5736174287766343e-06, "logits/chosen": -3.0088706016540527, "logits/rejected": -2.9560043811798096, "logps/chosen": -28.07610511779785, "logps/rejected": -184.60507202148438, "loss": 0.195, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22178609669208527, "rewards/margins": 1.6766977310180664, "rewards/rejected": -1.4549118280410767, "step": 4300 }, { "epoch": 0.05, "learning_rate": 2.579602585587743e-06, "logits/chosen": -3.051687717437744, "logits/rejected": -2.994964122772217, "logps/chosen": -36.01586151123047, "logps/rejected": -186.71849060058594, "loss": 0.1861, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.15330877900123596, "rewards/margins": 1.6173655986785889, "rewards/rejected": -1.4640569686889648, "step": 4310 }, { "epoch": 0.05, "learning_rate": 2.5855877423988514e-06, "logits/chosen": -3.0081448554992676, "logits/rejected": -2.979332685470581, "logps/chosen": -11.456012725830078, "logps/rejected": -189.4493408203125, "loss": 0.1759, "rewards/accuracies": 1.0, "rewards/chosen": 0.26324206590652466, "rewards/margins": 1.791648507118225, "rewards/rejected": -1.5284065008163452, "step": 4320 }, { "epoch": 0.05, "learning_rate": 2.591572899209959e-06, "logits/chosen": -3.0260605812072754, "logits/rejected": -2.912606716156006, "logps/chosen": -49.65890884399414, "logps/rejected": -187.93637084960938, "loss": 0.2304, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.05580435320734978, "rewards/margins": 1.5333526134490967, "rewards/rejected": -1.477548360824585, "step": 4330 }, { "epoch": 0.05, "learning_rate": 2.5975580560210676e-06, "logits/chosen": -3.0186407566070557, "logits/rejected": -2.9656426906585693, "logps/chosen": -28.164409637451172, "logps/rejected": -183.10987854003906, "loss": 0.1882, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.17450903356075287, "rewards/margins": 1.6197131872177124, "rewards/rejected": -1.4452041387557983, "step": 4340 }, { "epoch": 0.05, "learning_rate": 2.603543212832176e-06, "logits/chosen": -3.02543568611145, "logits/rejected": -2.9806389808654785, "logps/chosen": -49.791786193847656, "logps/rejected": -177.69161987304688, "loss": 0.2416, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.015941385179758072, "rewards/margins": 1.4026000499725342, "rewards/rejected": -1.3866585493087769, "step": 4350 }, { "epoch": 0.05, "learning_rate": 2.6095283696432847e-06, "logits/chosen": -3.011646270751953, "logits/rejected": -2.951279401779175, "logps/chosen": -42.81531524658203, "logps/rejected": -188.31080627441406, "loss": 0.2105, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0807570219039917, "rewards/margins": 1.563084363937378, "rewards/rejected": -1.4823273420333862, "step": 4360 }, { "epoch": 0.05, "learning_rate": 2.6155135264543933e-06, "logits/chosen": -3.057204008102417, "logits/rejected": -2.9876492023468018, "logps/chosen": -37.3825798034668, "logps/rejected": -188.6734619140625, "loss": 0.2397, "rewards/accuracies": 1.0, "rewards/chosen": 0.1254686415195465, "rewards/margins": 1.608920693397522, "rewards/rejected": -1.4834520816802979, "step": 4370 }, { "epoch": 0.05, "learning_rate": 2.6214986832655014e-06, "logits/chosen": -3.0204989910125732, "logits/rejected": -2.973677158355713, "logps/chosen": -19.860506057739258, "logps/rejected": -192.52442932128906, "loss": 0.1729, "rewards/accuracies": 1.0, "rewards/chosen": 0.20562787353992462, "rewards/margins": 1.7419906854629517, "rewards/rejected": -1.536363124847412, "step": 4380 }, { "epoch": 0.05, "learning_rate": 2.62748384007661e-06, "logits/chosen": -3.0020129680633545, "logits/rejected": -2.962010622024536, "logps/chosen": -24.001890182495117, "logps/rejected": -205.2342071533203, "loss": 0.1543, "rewards/accuracies": 1.0, "rewards/chosen": 0.21681423485279083, "rewards/margins": 1.8773164749145508, "rewards/rejected": -1.6605024337768555, "step": 4390 }, { "epoch": 0.05, "learning_rate": 2.6334689968877185e-06, "logits/chosen": -3.014444351196289, "logits/rejected": -2.98093318939209, "logps/chosen": -18.093963623046875, "logps/rejected": -208.14315795898438, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": 0.21709048748016357, "rewards/margins": 1.9191001653671265, "rewards/rejected": -1.7020095586776733, "step": 4400 }, { "epoch": 0.05, "learning_rate": 2.639454153698827e-06, "logits/chosen": -3.0077321529388428, "logits/rejected": -2.962146282196045, "logps/chosen": -22.113849639892578, "logps/rejected": -203.8893280029297, "loss": 0.1529, "rewards/accuracies": 1.0, "rewards/chosen": 0.19850210845470428, "rewards/margins": 1.8326425552368164, "rewards/rejected": -1.6341406106948853, "step": 4410 }, { "epoch": 0.05, "learning_rate": 2.6454393105099356e-06, "logits/chosen": -2.989830493927002, "logits/rejected": -2.9117560386657715, "logps/chosen": -41.30767059326172, "logps/rejected": -192.33277893066406, "loss": 0.223, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.09037379920482635, "rewards/margins": 1.6097047328948975, "rewards/rejected": -1.5193309783935547, "step": 4420 }, { "epoch": 0.05, "learning_rate": 2.651424467321044e-06, "logits/chosen": -2.994993209838867, "logits/rejected": -2.9570999145507812, "logps/chosen": -28.240047454833984, "logps/rejected": -191.0813446044922, "loss": 0.1748, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16060778498649597, "rewards/margins": 1.6916357278823853, "rewards/rejected": -1.5310277938842773, "step": 4430 }, { "epoch": 0.05, "learning_rate": 2.6574096241321523e-06, "logits/chosen": -3.007476329803467, "logits/rejected": -2.980043888092041, "logps/chosen": -28.164464950561523, "logps/rejected": -194.7515411376953, "loss": 0.1681, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16483300924301147, "rewards/margins": 1.7207120656967163, "rewards/rejected": -1.55587899684906, "step": 4440 }, { "epoch": 0.05, "learning_rate": 2.663394780943261e-06, "logits/chosen": -3.0703699588775635, "logits/rejected": -3.021470069885254, "logps/chosen": -37.37091827392578, "logps/rejected": -194.26939392089844, "loss": 0.2012, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.10920719802379608, "rewards/margins": 1.6642223596572876, "rewards/rejected": -1.555014967918396, "step": 4450 }, { "epoch": 0.05, "learning_rate": 2.6693799377543694e-06, "logits/chosen": -2.988708972930908, "logits/rejected": -2.905017375946045, "logps/chosen": -35.172401428222656, "logps/rejected": -185.365234375, "loss": 0.2476, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.13882842659950256, "rewards/margins": 1.6205705404281616, "rewards/rejected": -1.481742024421692, "step": 4460 }, { "epoch": 0.05, "learning_rate": 2.675365094565478e-06, "logits/chosen": -3.0349833965301514, "logits/rejected": -2.9542176723480225, "logps/chosen": -38.90381622314453, "logps/rejected": -191.05953979492188, "loss": 0.2112, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.08207444101572037, "rewards/margins": 1.5872575044631958, "rewards/rejected": -1.5051829814910889, "step": 4470 }, { "epoch": 0.05, "learning_rate": 2.6813502513765865e-06, "logits/chosen": -3.0349631309509277, "logits/rejected": -2.970707416534424, "logps/chosen": -44.828460693359375, "logps/rejected": -207.5638427734375, "loss": 0.1768, "rewards/accuracies": 1.0, "rewards/chosen": 0.08697378635406494, "rewards/margins": 1.7606427669525146, "rewards/rejected": -1.6736690998077393, "step": 4480 }, { "epoch": 0.05, "learning_rate": 2.6873354081876947e-06, "logits/chosen": -3.0025579929351807, "logits/rejected": -2.937838554382324, "logps/chosen": -44.26203918457031, "logps/rejected": -198.6277313232422, "loss": 0.2121, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.08953789621591568, "rewards/margins": 1.6477733850479126, "rewards/rejected": -1.5582354068756104, "step": 4490 }, { "epoch": 0.05, "learning_rate": 2.6933205649988032e-06, "logits/chosen": -3.0082263946533203, "logits/rejected": -2.932823657989502, "logps/chosen": -20.69256591796875, "logps/rejected": -202.92193603515625, "loss": 0.155, "rewards/accuracies": 1.0, "rewards/chosen": 0.22111830115318298, "rewards/margins": 1.8928539752960205, "rewards/rejected": -1.6717357635498047, "step": 4500 }, { "epoch": 0.05, "learning_rate": 2.6993057218099118e-06, "logits/chosen": -3.0171165466308594, "logits/rejected": -2.997894763946533, "logps/chosen": -23.36641502380371, "logps/rejected": -210.6042022705078, "loss": 0.1592, "rewards/accuracies": 1.0, "rewards/chosen": 0.20440182089805603, "rewards/margins": 1.9303375482559204, "rewards/rejected": -1.725935697555542, "step": 4510 }, { "epoch": 0.05, "learning_rate": 2.7052908786210203e-06, "logits/chosen": -2.995854616165161, "logits/rejected": -2.946929454803467, "logps/chosen": -26.62234115600586, "logps/rejected": -207.4730987548828, "loss": 0.1536, "rewards/accuracies": 1.0, "rewards/chosen": 0.19067224860191345, "rewards/margins": 1.876919150352478, "rewards/rejected": -1.6862468719482422, "step": 4520 }, { "epoch": 0.05, "learning_rate": 2.711276035432129e-06, "logits/chosen": -2.981712818145752, "logits/rejected": -2.9564080238342285, "logps/chosen": -29.018590927124023, "logps/rejected": -209.59054565429688, "loss": 0.1596, "rewards/accuracies": 1.0, "rewards/chosen": 0.13968491554260254, "rewards/margins": 1.8368593454360962, "rewards/rejected": -1.697174310684204, "step": 4530 }, { "epoch": 0.05, "learning_rate": 2.717261192243237e-06, "logits/chosen": -2.993239164352417, "logits/rejected": -2.9356634616851807, "logps/chosen": -23.878238677978516, "logps/rejected": -191.7854461669922, "loss": 0.1702, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.15334607660770416, "rewards/margins": 1.6899248361587524, "rewards/rejected": -1.5365787744522095, "step": 4540 }, { "epoch": 0.05, "learning_rate": 2.7232463490543456e-06, "logits/chosen": -2.9783074855804443, "logits/rejected": -2.880694627761841, "logps/chosen": -36.74439239501953, "logps/rejected": -223.46463012695312, "loss": 0.2138, "rewards/accuracies": 1.0, "rewards/chosen": 0.13364467024803162, "rewards/margins": 1.9689900875091553, "rewards/rejected": -1.8353456258773804, "step": 4550 }, { "epoch": 0.05, "learning_rate": 2.729231505865454e-06, "logits/chosen": -3.0265049934387207, "logits/rejected": -2.9488368034362793, "logps/chosen": -42.04923629760742, "logps/rejected": -223.34814453125, "loss": 0.1414, "rewards/accuracies": 1.0, "rewards/chosen": 0.14872315526008606, "rewards/margins": 1.9655177593231201, "rewards/rejected": -1.816794753074646, "step": 4560 }, { "epoch": 0.05, "learning_rate": 2.7352166626765627e-06, "logits/chosen": -3.034707546234131, "logits/rejected": -2.9479689598083496, "logps/chosen": -43.212955474853516, "logps/rejected": -221.3975830078125, "loss": 0.1315, "rewards/accuracies": 1.0, "rewards/chosen": 0.15374918282032013, "rewards/margins": 1.939647912979126, "rewards/rejected": -1.7858985662460327, "step": 4570 }, { "epoch": 0.05, "learning_rate": 2.7412018194876712e-06, "logits/chosen": -3.018388032913208, "logits/rejected": -2.9538588523864746, "logps/chosen": -29.09103012084961, "logps/rejected": -220.2655487060547, "loss": 0.1519, "rewards/accuracies": 1.0, "rewards/chosen": 0.18255439400672913, "rewards/margins": 1.9971851110458374, "rewards/rejected": -1.8146308660507202, "step": 4580 }, { "epoch": 0.05, "learning_rate": 2.747186976298779e-06, "logits/chosen": -3.0378289222717285, "logits/rejected": -2.939906358718872, "logps/chosen": -36.443931579589844, "logps/rejected": -217.79074096679688, "loss": 0.1485, "rewards/accuracies": 1.0, "rewards/chosen": 0.13495846092700958, "rewards/margins": 1.9128938913345337, "rewards/rejected": -1.7779353857040405, "step": 4590 }, { "epoch": 0.06, "learning_rate": 2.7531721331098875e-06, "logits/chosen": -2.9672229290008545, "logits/rejected": -2.93280029296875, "logps/chosen": -25.299577713012695, "logps/rejected": -206.919921875, "loss": 0.146, "rewards/accuracies": 1.0, "rewards/chosen": 0.19814716279506683, "rewards/margins": 1.8850338459014893, "rewards/rejected": -1.6868867874145508, "step": 4600 }, { "epoch": 0.06, "learning_rate": 2.759157289920996e-06, "logits/chosen": -3.0664162635803223, "logits/rejected": -2.9890027046203613, "logps/chosen": -37.42949676513672, "logps/rejected": -221.29562377929688, "loss": 0.1602, "rewards/accuracies": 1.0, "rewards/chosen": 0.10050330311059952, "rewards/margins": 1.9224389791488647, "rewards/rejected": -1.8219356536865234, "step": 4610 }, { "epoch": 0.06, "learning_rate": 2.7651424467321046e-06, "logits/chosen": -3.011594295501709, "logits/rejected": -2.982551097869873, "logps/chosen": -28.10199546813965, "logps/rejected": -191.7155303955078, "loss": 0.227, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1396065205335617, "rewards/margins": 1.6668682098388672, "rewards/rejected": -1.5272616147994995, "step": 4620 }, { "epoch": 0.06, "learning_rate": 2.771127603543213e-06, "logits/chosen": -2.9727017879486084, "logits/rejected": -2.8661201000213623, "logps/chosen": -40.36912155151367, "logps/rejected": -199.38534545898438, "loss": 0.1631, "rewards/accuracies": 1.0, "rewards/chosen": 0.16307225823402405, "rewards/margins": 1.7643293142318726, "rewards/rejected": -1.601257085800171, "step": 4630 }, { "epoch": 0.06, "learning_rate": 2.7771127603543213e-06, "logits/chosen": -2.9847893714904785, "logits/rejected": -2.9454283714294434, "logps/chosen": -24.146747589111328, "logps/rejected": -207.8458709716797, "loss": 0.1317, "rewards/accuracies": 1.0, "rewards/chosen": 0.20495185256004333, "rewards/margins": 1.9011541604995728, "rewards/rejected": -1.696202278137207, "step": 4640 }, { "epoch": 0.06, "learning_rate": 2.78309791716543e-06, "logits/chosen": -2.9778175354003906, "logits/rejected": -2.884800910949707, "logps/chosen": -34.72550964355469, "logps/rejected": -230.14834594726562, "loss": 0.1371, "rewards/accuracies": 1.0, "rewards/chosen": 0.1229509487748146, "rewards/margins": 2.033963918685913, "rewards/rejected": -1.911012887954712, "step": 4650 }, { "epoch": 0.06, "learning_rate": 2.7890830739765384e-06, "logits/chosen": -3.0127220153808594, "logits/rejected": -2.958540678024292, "logps/chosen": -31.46722984313965, "logps/rejected": -225.4025115966797, "loss": 0.1356, "rewards/accuracies": 1.0, "rewards/chosen": 0.16433529555797577, "rewards/margins": 2.024275064468384, "rewards/rejected": -1.8599398136138916, "step": 4660 }, { "epoch": 0.06, "learning_rate": 2.795068230787647e-06, "logits/chosen": -3.028719902038574, "logits/rejected": -2.993922710418701, "logps/chosen": -18.919387817382812, "logps/rejected": -208.01675415039062, "loss": 0.1683, "rewards/accuracies": 1.0, "rewards/chosen": 0.21591182053089142, "rewards/margins": 1.9256137609481812, "rewards/rejected": -1.7097017765045166, "step": 4670 }, { "epoch": 0.06, "learning_rate": 2.8010533875987555e-06, "logits/chosen": -2.9877171516418457, "logits/rejected": -2.918712615966797, "logps/chosen": -36.4761962890625, "logps/rejected": -219.3140106201172, "loss": 0.1352, "rewards/accuracies": 1.0, "rewards/chosen": 0.11982015520334244, "rewards/margins": 1.9221868515014648, "rewards/rejected": -1.802366852760315, "step": 4680 }, { "epoch": 0.06, "learning_rate": 2.8070385444098636e-06, "logits/chosen": -3.02978777885437, "logits/rejected": -2.971224308013916, "logps/chosen": -42.03395462036133, "logps/rejected": -223.00009155273438, "loss": 0.1362, "rewards/accuracies": 1.0, "rewards/chosen": 0.11932500451803207, "rewards/margins": 1.9510886669158936, "rewards/rejected": -1.8317636251449585, "step": 4690 }, { "epoch": 0.06, "learning_rate": 2.813023701220972e-06, "logits/chosen": -3.025728225708008, "logits/rejected": -2.9459433555603027, "logps/chosen": -36.31629180908203, "logps/rejected": -232.96304321289062, "loss": 0.1216, "rewards/accuracies": 1.0, "rewards/chosen": 0.13776563107967377, "rewards/margins": 2.0699920654296875, "rewards/rejected": -1.9322264194488525, "step": 4700 }, { "epoch": 0.06, "learning_rate": 2.8190088580320807e-06, "logits/chosen": -3.003939151763916, "logits/rejected": -2.9803543090820312, "logps/chosen": -24.889720916748047, "logps/rejected": -209.62057495117188, "loss": 0.1443, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.15680097043514252, "rewards/margins": 1.8762706518173218, "rewards/rejected": -1.7194693088531494, "step": 4710 }, { "epoch": 0.06, "learning_rate": 2.8249940148431893e-06, "logits/chosen": -3.0364739894866943, "logits/rejected": -3.0003087520599365, "logps/chosen": -24.597366333007812, "logps/rejected": -207.5682830810547, "loss": 0.2087, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16745750606060028, "rewards/margins": 1.870873212814331, "rewards/rejected": -1.7034156322479248, "step": 4720 }, { "epoch": 0.06, "learning_rate": 2.830979171654298e-06, "logits/chosen": -2.9644949436187744, "logits/rejected": -2.9099481105804443, "logps/chosen": -27.6608829498291, "logps/rejected": -223.8698272705078, "loss": 0.1703, "rewards/accuracies": 1.0, "rewards/chosen": 0.14916852116584778, "rewards/margins": 2.0074849128723145, "rewards/rejected": -1.858316421508789, "step": 4730 }, { "epoch": 0.06, "learning_rate": 2.836964328465406e-06, "logits/chosen": -3.015444755554199, "logits/rejected": -2.979820728302002, "logps/chosen": -21.269439697265625, "logps/rejected": -210.5951690673828, "loss": 0.1851, "rewards/accuracies": 1.0, "rewards/chosen": 0.18390795588493347, "rewards/margins": 1.907480001449585, "rewards/rejected": -1.723572015762329, "step": 4740 }, { "epoch": 0.06, "learning_rate": 2.8429494852765145e-06, "logits/chosen": -3.0062828063964844, "logits/rejected": -2.9628829956054688, "logps/chosen": -26.739538192749023, "logps/rejected": -213.13540649414062, "loss": 0.1464, "rewards/accuracies": 1.0, "rewards/chosen": 0.1530245840549469, "rewards/margins": 1.9086637496948242, "rewards/rejected": -1.7556393146514893, "step": 4750 }, { "epoch": 0.06, "learning_rate": 2.848934642087623e-06, "logits/chosen": -3.032512664794922, "logits/rejected": -2.9986259937286377, "logps/chosen": -28.485004425048828, "logps/rejected": -214.9870147705078, "loss": 0.1393, "rewards/accuracies": 1.0, "rewards/chosen": 0.19280317425727844, "rewards/margins": 1.9490089416503906, "rewards/rejected": -1.7562053203582764, "step": 4760 }, { "epoch": 0.06, "learning_rate": 2.8549197988987316e-06, "logits/chosen": -3.0547356605529785, "logits/rejected": -2.9728589057922363, "logps/chosen": -32.546409606933594, "logps/rejected": -242.2402801513672, "loss": 0.2432, "rewards/accuracies": 1.0, "rewards/chosen": 0.156417116522789, "rewards/margins": 2.1774580478668213, "rewards/rejected": -2.0210413932800293, "step": 4770 }, { "epoch": 0.06, "learning_rate": 2.86090495570984e-06, "logits/chosen": -3.021535873413086, "logits/rejected": -2.948387622833252, "logps/chosen": -50.49555587768555, "logps/rejected": -250.8617401123047, "loss": 0.1193, "rewards/accuracies": 1.0, "rewards/chosen": 0.0029304935596883297, "rewards/margins": 2.10315203666687, "rewards/rejected": -2.100221633911133, "step": 4780 }, { "epoch": 0.06, "learning_rate": 2.866890112520948e-06, "logits/chosen": -2.9905266761779785, "logits/rejected": -2.927833080291748, "logps/chosen": -22.565841674804688, "logps/rejected": -221.244384765625, "loss": 0.2333, "rewards/accuracies": 1.0, "rewards/chosen": 0.20012331008911133, "rewards/margins": 2.0391464233398438, "rewards/rejected": -1.839023232460022, "step": 4790 }, { "epoch": 0.06, "learning_rate": 2.8728752693320564e-06, "logits/chosen": -2.9865612983703613, "logits/rejected": -2.886953592300415, "logps/chosen": -52.491432189941406, "logps/rejected": -218.4072723388672, "loss": 0.1836, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.028876323252916336, "rewards/margins": 1.7963440418243408, "rewards/rejected": -1.7674678564071655, "step": 4800 }, { "epoch": 0.06, "learning_rate": 2.8788604261431654e-06, "logits/chosen": -2.9985098838806152, "logits/rejected": -2.9655330181121826, "logps/chosen": -29.964975357055664, "logps/rejected": -222.7903289794922, "loss": 0.1678, "rewards/accuracies": 1.0, "rewards/chosen": 0.14685356616973877, "rewards/margins": 1.9801146984100342, "rewards/rejected": -1.833261251449585, "step": 4810 }, { "epoch": 0.06, "learning_rate": 2.884845582954274e-06, "logits/chosen": -3.029428005218506, "logits/rejected": -2.9338200092315674, "logps/chosen": -53.567115783691406, "logps/rejected": -231.73269653320312, "loss": 0.1396, "rewards/accuracies": 1.0, "rewards/chosen": 0.06552613526582718, "rewards/margins": 1.9792869091033936, "rewards/rejected": -1.913760781288147, "step": 4820 }, { "epoch": 0.06, "learning_rate": 2.8908307397653825e-06, "logits/chosen": -3.036029100418091, "logits/rejected": -2.923429012298584, "logps/chosen": -47.78996276855469, "logps/rejected": -230.068603515625, "loss": 0.1622, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06465088576078415, "rewards/margins": 1.980494737625122, "rewards/rejected": -1.9158439636230469, "step": 4830 }, { "epoch": 0.06, "learning_rate": 2.89681589657649e-06, "logits/chosen": -2.9836223125457764, "logits/rejected": -2.934058666229248, "logps/chosen": -30.1444149017334, "logps/rejected": -248.07638549804688, "loss": 0.1095, "rewards/accuracies": 1.0, "rewards/chosen": 0.1642160415649414, "rewards/margins": 2.2587156295776367, "rewards/rejected": -2.0944998264312744, "step": 4840 }, { "epoch": 0.06, "learning_rate": 2.9028010533875988e-06, "logits/chosen": -3.020744800567627, "logits/rejected": -2.9744973182678223, "logps/chosen": -42.30205535888672, "logps/rejected": -228.8632354736328, "loss": 0.1946, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.04058048874139786, "rewards/margins": 1.9387664794921875, "rewards/rejected": -1.8981859683990479, "step": 4850 }, { "epoch": 0.06, "learning_rate": 2.9087862101987073e-06, "logits/chosen": -2.9895992279052734, "logits/rejected": -2.949167251586914, "logps/chosen": -36.33244705200195, "logps/rejected": -220.2164306640625, "loss": 0.1572, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06962531805038452, "rewards/margins": 1.881376028060913, "rewards/rejected": -1.8117507696151733, "step": 4860 }, { "epoch": 0.06, "learning_rate": 2.914771367009816e-06, "logits/chosen": -2.9731252193450928, "logits/rejected": -2.9044342041015625, "logps/chosen": -46.945716857910156, "logps/rejected": -222.13943481445312, "loss": 0.1976, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.024702200666069984, "rewards/margins": 1.8011443614959717, "rewards/rejected": -1.8258469104766846, "step": 4870 }, { "epoch": 0.06, "learning_rate": 2.9207565238209244e-06, "logits/chosen": -3.0317983627319336, "logits/rejected": -2.9341769218444824, "logps/chosen": -37.29020690917969, "logps/rejected": -199.4810333251953, "loss": 0.149, "rewards/accuracies": 1.0, "rewards/chosen": 0.14097842574119568, "rewards/margins": 1.7458127737045288, "rewards/rejected": -1.6048343181610107, "step": 4880 }, { "epoch": 0.06, "learning_rate": 2.9267416806320326e-06, "logits/chosen": -3.028503894805908, "logits/rejected": -2.909170389175415, "logps/chosen": -57.6209602355957, "logps/rejected": -282.2735900878906, "loss": 0.0999, "rewards/accuracies": 1.0, "rewards/chosen": -0.03375552222132683, "rewards/margins": 2.384636402130127, "rewards/rejected": -2.4183919429779053, "step": 4890 }, { "epoch": 0.06, "learning_rate": 2.932726837443141e-06, "logits/chosen": -2.9904110431671143, "logits/rejected": -2.8609421253204346, "logps/chosen": -40.049468994140625, "logps/rejected": -268.72210693359375, "loss": 0.1694, "rewards/accuracies": 1.0, "rewards/chosen": 0.10032268613576889, "rewards/margins": 2.3828494548797607, "rewards/rejected": -2.282526731491089, "step": 4900 }, { "epoch": 0.06, "learning_rate": 2.9387119942542497e-06, "logits/chosen": -3.0220963954925537, "logits/rejected": -2.951918601989746, "logps/chosen": -46.229610443115234, "logps/rejected": -257.28729248046875, "loss": 0.1386, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.005295318551361561, "rewards/margins": 2.168762683868408, "rewards/rejected": -2.174057960510254, "step": 4910 }, { "epoch": 0.06, "learning_rate": 2.9446971510653582e-06, "logits/chosen": -3.0082950592041016, "logits/rejected": -2.962036609649658, "logps/chosen": -34.33168029785156, "logps/rejected": -253.7637939453125, "loss": 0.1279, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06186440587043762, "rewards/margins": 2.208345890045166, "rewards/rejected": -2.146481513977051, "step": 4920 }, { "epoch": 0.06, "learning_rate": 2.9506823078764668e-06, "logits/chosen": -2.996215343475342, "logits/rejected": -2.9234302043914795, "logps/chosen": -44.443016052246094, "logps/rejected": -263.4480895996094, "loss": 0.1291, "rewards/accuracies": 1.0, "rewards/chosen": 0.04343847557902336, "rewards/margins": 2.2823891639709473, "rewards/rejected": -2.238950490951538, "step": 4930 }, { "epoch": 0.06, "learning_rate": 2.956667464687575e-06, "logits/chosen": -3.0039913654327393, "logits/rejected": -2.957040786743164, "logps/chosen": -24.00750160217285, "logps/rejected": -237.9171905517578, "loss": 0.1066, "rewards/accuracies": 1.0, "rewards/chosen": 0.17124830186367035, "rewards/margins": 2.1659111976623535, "rewards/rejected": -1.9946630001068115, "step": 4940 }, { "epoch": 0.06, "learning_rate": 2.9626526214986834e-06, "logits/chosen": -2.9897024631500244, "logits/rejected": -2.930706739425659, "logps/chosen": -40.12211990356445, "logps/rejected": -234.7733154296875, "loss": 0.1637, "rewards/accuracies": 1.0, "rewards/chosen": 0.0889894962310791, "rewards/margins": 2.0368716716766357, "rewards/rejected": -1.947882056236267, "step": 4950 }, { "epoch": 0.06, "learning_rate": 2.968637778309792e-06, "logits/chosen": -3.0075876712799072, "logits/rejected": -2.977064609527588, "logps/chosen": -19.46194076538086, "logps/rejected": -228.8000946044922, "loss": 0.1229, "rewards/accuracies": 1.0, "rewards/chosen": 0.19349664449691772, "rewards/margins": 2.1219325065612793, "rewards/rejected": -1.9284359216690063, "step": 4960 }, { "epoch": 0.06, "learning_rate": 2.9746229351209006e-06, "logits/chosen": -3.015232563018799, "logits/rejected": -2.875865936279297, "logps/chosen": -69.5210952758789, "logps/rejected": -311.8857421875, "loss": 0.1088, "rewards/accuracies": 1.0, "rewards/chosen": -0.15130403637886047, "rewards/margins": 2.5424771308898926, "rewards/rejected": -2.6937811374664307, "step": 4970 }, { "epoch": 0.06, "learning_rate": 2.980608091932009e-06, "logits/chosen": -2.993424415588379, "logits/rejected": -2.894280433654785, "logps/chosen": -57.89201736450195, "logps/rejected": -288.7882080078125, "loss": 0.1111, "rewards/accuracies": 1.0, "rewards/chosen": -0.06960022449493408, "rewards/margins": 2.4157214164733887, "rewards/rejected": -2.485321521759033, "step": 4980 }, { "epoch": 0.06, "learning_rate": 2.9865932487431172e-06, "logits/chosen": -3.0028152465820312, "logits/rejected": -2.913115978240967, "logps/chosen": -54.17997360229492, "logps/rejected": -279.1604309082031, "loss": 0.1151, "rewards/accuracies": 1.0, "rewards/chosen": -0.1006183847784996, "rewards/margins": 2.2933545112609863, "rewards/rejected": -2.393972635269165, "step": 4990 }, { "epoch": 0.06, "learning_rate": 2.992578405554226e-06, "logits/chosen": -3.0237009525299072, "logits/rejected": -2.9063665866851807, "logps/chosen": -55.77141571044922, "logps/rejected": -281.08837890625, "loss": 0.1635, "rewards/accuracies": 1.0, "rewards/chosen": -0.07814660668373108, "rewards/margins": 2.3335254192352295, "rewards/rejected": -2.4116721153259277, "step": 5000 }, { "epoch": 0.06, "learning_rate": 2.9985635623653343e-06, "logits/chosen": -2.9716219902038574, "logits/rejected": -2.915215253829956, "logps/chosen": -50.177024841308594, "logps/rejected": -240.7209930419922, "loss": 0.2049, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.055899716913700104, "rewards/margins": 1.9684463739395142, "rewards/rejected": -2.024346113204956, "step": 5010 }, { "epoch": 0.06, "learning_rate": 3.004548719176443e-06, "logits/chosen": -3.0184998512268066, "logits/rejected": -2.9024903774261475, "logps/chosen": -59.118408203125, "logps/rejected": -289.1257629394531, "loss": 0.1581, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09193549305200577, "rewards/margins": 2.394331455230713, "rewards/rejected": -2.486266851425171, "step": 5020 }, { "epoch": 0.06, "learning_rate": 3.0105338759875515e-06, "logits/chosen": -2.991899251937866, "logits/rejected": -2.8736400604248047, "logps/chosen": -75.63072204589844, "logps/rejected": -310.03399658203125, "loss": 0.1271, "rewards/accuracies": 1.0, "rewards/chosen": -0.20561771094799042, "rewards/margins": 2.493241548538208, "rewards/rejected": -2.698859214782715, "step": 5030 }, { "epoch": 0.06, "learning_rate": 3.016519032798659e-06, "logits/chosen": -3.0233566761016846, "logits/rejected": -2.93076229095459, "logps/chosen": -59.02693557739258, "logps/rejected": -266.6100769042969, "loss": 0.1825, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06123960018157959, "rewards/margins": 2.178593397140503, "rewards/rejected": -2.239833116531372, "step": 5040 }, { "epoch": 0.06, "learning_rate": 3.0225041896097677e-06, "logits/chosen": -3.014186143875122, "logits/rejected": -2.9209842681884766, "logps/chosen": -48.80747985839844, "logps/rejected": -271.5855407714844, "loss": 0.1422, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.008378639817237854, "rewards/margins": 2.3393874168395996, "rewards/rejected": -2.3310086727142334, "step": 5050 }, { "epoch": 0.06, "learning_rate": 3.0284893464208763e-06, "logits/chosen": -3.012324810028076, "logits/rejected": -2.9854626655578613, "logps/chosen": -20.551151275634766, "logps/rejected": -238.5847930908203, "loss": 0.1317, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.15905466675758362, "rewards/margins": 2.1678626537323, "rewards/rejected": -2.008808135986328, "step": 5060 }, { "epoch": 0.06, "learning_rate": 3.034474503231985e-06, "logits/chosen": -2.9861159324645996, "logits/rejected": -2.91819167137146, "logps/chosen": -33.9564094543457, "logps/rejected": -283.2081298828125, "loss": 0.1278, "rewards/accuracies": 1.0, "rewards/chosen": 0.08283208310604095, "rewards/margins": 2.5379397869110107, "rewards/rejected": -2.455108165740967, "step": 5070 }, { "epoch": 0.06, "learning_rate": 3.0404596600430934e-06, "logits/chosen": -3.008671760559082, "logits/rejected": -2.9551596641540527, "logps/chosen": -35.66887283325195, "logps/rejected": -286.2888488769531, "loss": 0.1452, "rewards/accuracies": 1.0, "rewards/chosen": 0.060774438083171844, "rewards/margins": 2.5472025871276855, "rewards/rejected": -2.4864284992218018, "step": 5080 }, { "epoch": 0.06, "learning_rate": 3.0464448168542015e-06, "logits/chosen": -2.991081714630127, "logits/rejected": -2.9046857357025146, "logps/chosen": -60.73109817504883, "logps/rejected": -309.03265380859375, "loss": 0.1214, "rewards/accuracies": 1.0, "rewards/chosen": -0.11225497722625732, "rewards/margins": 2.577415943145752, "rewards/rejected": -2.689671039581299, "step": 5090 }, { "epoch": 0.06, "learning_rate": 3.05242997366531e-06, "logits/chosen": -3.0132460594177246, "logits/rejected": -2.8816628456115723, "logps/chosen": -47.51491165161133, "logps/rejected": -339.0894775390625, "loss": 0.0905, "rewards/accuracies": 1.0, "rewards/chosen": 0.021192923188209534, "rewards/margins": 3.0185577869415283, "rewards/rejected": -2.9973647594451904, "step": 5100 }, { "epoch": 0.06, "learning_rate": 3.0584151304764186e-06, "logits/chosen": -2.9853885173797607, "logits/rejected": -2.9045825004577637, "logps/chosen": -52.384178161621094, "logps/rejected": -285.93743896484375, "loss": 0.0934, "rewards/accuracies": 1.0, "rewards/chosen": -0.02400839328765869, "rewards/margins": 2.444410800933838, "rewards/rejected": -2.468419313430786, "step": 5110 }, { "epoch": 0.06, "learning_rate": 3.064400287287527e-06, "logits/chosen": -3.010995388031006, "logits/rejected": -2.9305882453918457, "logps/chosen": -41.562355041503906, "logps/rejected": -273.0790710449219, "loss": 0.1178, "rewards/accuracies": 1.0, "rewards/chosen": 0.012543099001049995, "rewards/margins": 2.355342388153076, "rewards/rejected": -2.342799186706543, "step": 5120 }, { "epoch": 0.06, "learning_rate": 3.0703854440986357e-06, "logits/chosen": -2.9769630432128906, "logits/rejected": -2.887880802154541, "logps/chosen": -61.100486755371094, "logps/rejected": -320.2652282714844, "loss": 0.1266, "rewards/accuracies": 1.0, "rewards/chosen": -0.06926075369119644, "rewards/margins": 2.740938901901245, "rewards/rejected": -2.810199737548828, "step": 5130 }, { "epoch": 0.06, "learning_rate": 3.076370600909744e-06, "logits/chosen": -3.010119915008545, "logits/rejected": -2.972925901412964, "logps/chosen": -41.402042388916016, "logps/rejected": -267.303466796875, "loss": 0.1469, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.00025380851002410054, "rewards/margins": 2.294079542160034, "rewards/rejected": -2.2943336963653564, "step": 5140 }, { "epoch": 0.06, "learning_rate": 3.0823557577208524e-06, "logits/chosen": -3.0187535285949707, "logits/rejected": -2.956918239593506, "logps/chosen": -48.60158920288086, "logps/rejected": -294.6176452636719, "loss": 0.1132, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.030945152044296265, "rewards/margins": 2.5297446250915527, "rewards/rejected": -2.5606894493103027, "step": 5150 }, { "epoch": 0.06, "learning_rate": 3.088340914531961e-06, "logits/chosen": -3.001335620880127, "logits/rejected": -2.9133238792419434, "logps/chosen": -41.05352020263672, "logps/rejected": -336.8919982910156, "loss": 0.1271, "rewards/accuracies": 1.0, "rewards/chosen": 0.07383303344249725, "rewards/margins": 3.0636496543884277, "rewards/rejected": -2.989816427230835, "step": 5160 }, { "epoch": 0.06, "learning_rate": 3.0943260713430695e-06, "logits/chosen": -2.962937355041504, "logits/rejected": -2.875899076461792, "logps/chosen": -59.091957092285156, "logps/rejected": -358.2029113769531, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": -0.024854006245732307, "rewards/margins": 3.1526331901550293, "rewards/rejected": -3.17748761177063, "step": 5170 }, { "epoch": 0.06, "learning_rate": 3.100311228154178e-06, "logits/chosen": -2.988442897796631, "logits/rejected": -2.942415714263916, "logps/chosen": -34.966575622558594, "logps/rejected": -234.84091186523438, "loss": 0.1396, "rewards/accuracies": 1.0, "rewards/chosen": 0.06402155756950378, "rewards/margins": 2.032688617706299, "rewards/rejected": -1.9686672687530518, "step": 5180 }, { "epoch": 0.06, "learning_rate": 3.106296384965286e-06, "logits/chosen": -2.982459783554077, "logits/rejected": -2.851632595062256, "logps/chosen": -72.27134704589844, "logps/rejected": -356.6342468261719, "loss": 0.0936, "rewards/accuracies": 1.0, "rewards/chosen": -0.1119346171617508, "rewards/margins": 3.0619559288024902, "rewards/rejected": -3.1738905906677246, "step": 5190 }, { "epoch": 0.06, "learning_rate": 3.1122815417763947e-06, "logits/chosen": -2.973456621170044, "logits/rejected": -2.924196243286133, "logps/chosen": -30.60361671447754, "logps/rejected": -309.7763977050781, "loss": 0.1121, "rewards/accuracies": 1.0, "rewards/chosen": 0.11815372854471207, "rewards/margins": 2.8317646980285645, "rewards/rejected": -2.713610887527466, "step": 5200 }, { "epoch": 0.06, "learning_rate": 3.1182666985875033e-06, "logits/chosen": -3.0051238536834717, "logits/rejected": -2.9638590812683105, "logps/chosen": -52.90217971801758, "logps/rejected": -264.62420654296875, "loss": 0.1205, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.04533940553665161, "rewards/margins": 2.2221295833587646, "rewards/rejected": -2.2674689292907715, "step": 5210 }, { "epoch": 0.06, "learning_rate": 3.124251855398612e-06, "logits/chosen": -2.976573944091797, "logits/rejected": -2.8738136291503906, "logps/chosen": -52.69377517700195, "logps/rejected": -380.1535949707031, "loss": 0.0928, "rewards/accuracies": 1.0, "rewards/chosen": -0.020917227491736412, "rewards/margins": 3.3653011322021484, "rewards/rejected": -3.386218547821045, "step": 5220 }, { "epoch": 0.06, "learning_rate": 3.1302370122097204e-06, "logits/chosen": -3.022925615310669, "logits/rejected": -2.9809417724609375, "logps/chosen": -49.229698181152344, "logps/rejected": -256.2526550292969, "loss": 0.2342, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06261793524026871, "rewards/margins": 2.115427017211914, "rewards/rejected": -2.1780447959899902, "step": 5230 }, { "epoch": 0.06, "learning_rate": 3.136222169020829e-06, "logits/chosen": -2.9935860633850098, "logits/rejected": -2.9010980129241943, "logps/chosen": -52.871337890625, "logps/rejected": -311.3472595214844, "loss": 0.1403, "rewards/accuracies": 1.0, "rewards/chosen": -0.08912291377782822, "rewards/margins": 2.6283507347106934, "rewards/rejected": -2.7174735069274902, "step": 5240 }, { "epoch": 0.06, "learning_rate": 3.142207325831937e-06, "logits/chosen": -2.9919533729553223, "logits/rejected": -2.8506171703338623, "logps/chosen": -64.968017578125, "logps/rejected": -346.5986022949219, "loss": 0.0988, "rewards/accuracies": 1.0, "rewards/chosen": -0.1222541332244873, "rewards/margins": 2.9284756183624268, "rewards/rejected": -3.050729513168335, "step": 5250 }, { "epoch": 0.06, "learning_rate": 3.1481924826430456e-06, "logits/chosen": -3.018242359161377, "logits/rejected": -2.9241251945495605, "logps/chosen": -60.61185836791992, "logps/rejected": -345.8450622558594, "loss": 0.0937, "rewards/accuracies": 1.0, "rewards/chosen": -0.10355617851018906, "rewards/margins": 2.9619510173797607, "rewards/rejected": -3.065507173538208, "step": 5260 }, { "epoch": 0.06, "learning_rate": 3.154177639454154e-06, "logits/chosen": -2.96334171295166, "logits/rejected": -2.871721029281616, "logps/chosen": -50.141639709472656, "logps/rejected": -295.1228332519531, "loss": 0.12, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.043458957225084305, "rewards/margins": 2.5145742893218994, "rewards/rejected": -2.5580332279205322, "step": 5270 }, { "epoch": 0.06, "learning_rate": 3.1601627962652627e-06, "logits/chosen": -3.0092501640319824, "logits/rejected": -2.940103054046631, "logps/chosen": -60.175498962402344, "logps/rejected": -283.78485107421875, "loss": 0.153, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.17151577770709991, "rewards/margins": 2.285263776779175, "rewards/rejected": -2.4567792415618896, "step": 5280 }, { "epoch": 0.06, "learning_rate": 3.1661479530763713e-06, "logits/chosen": -2.9932351112365723, "logits/rejected": -2.909818649291992, "logps/chosen": -58.883995056152344, "logps/rejected": -313.9906311035156, "loss": 0.1485, "rewards/accuracies": 1.0, "rewards/chosen": -0.10285627841949463, "rewards/margins": 2.6489014625549316, "rewards/rejected": -2.751757860183716, "step": 5290 }, { "epoch": 0.06, "learning_rate": 3.172133109887479e-06, "logits/chosen": -2.989675998687744, "logits/rejected": -2.9271459579467773, "logps/chosen": -52.86347579956055, "logps/rejected": -323.13677978515625, "loss": 0.1324, "rewards/accuracies": 1.0, "rewards/chosen": -0.06436979025602341, "rewards/margins": 2.762627363204956, "rewards/rejected": -2.8269972801208496, "step": 5300 }, { "epoch": 0.06, "learning_rate": 3.1781182666985876e-06, "logits/chosen": -3.0218698978424072, "logits/rejected": -2.9024200439453125, "logps/chosen": -59.11468505859375, "logps/rejected": -343.896484375, "loss": 0.1082, "rewards/accuracies": 1.0, "rewards/chosen": -0.0909852385520935, "rewards/margins": 2.953657865524292, "rewards/rejected": -3.044642925262451, "step": 5310 }, { "epoch": 0.06, "learning_rate": 3.184103423509696e-06, "logits/chosen": -2.9861176013946533, "logits/rejected": -2.92033314704895, "logps/chosen": -49.045982360839844, "logps/rejected": -316.56390380859375, "loss": 0.1574, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.08641550689935684, "rewards/margins": 2.6988043785095215, "rewards/rejected": -2.78521990776062, "step": 5320 }, { "epoch": 0.06, "learning_rate": 3.1900885803208047e-06, "logits/chosen": -3.0224597454071045, "logits/rejected": -2.9552226066589355, "logps/chosen": -37.55684280395508, "logps/rejected": -271.4873352050781, "loss": 0.0987, "rewards/accuracies": 1.0, "rewards/chosen": 0.06301368772983551, "rewards/margins": 2.408531665802002, "rewards/rejected": -2.345518112182617, "step": 5330 }, { "epoch": 0.06, "learning_rate": 3.1960737371319132e-06, "logits/chosen": -2.9964489936828613, "logits/rejected": -2.9302520751953125, "logps/chosen": -46.56301498413086, "logps/rejected": -348.96820068359375, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": 0.018899181857705116, "rewards/margins": 3.109135866165161, "rewards/rejected": -3.090236186981201, "step": 5340 }, { "epoch": 0.06, "learning_rate": 3.2020588939430213e-06, "logits/chosen": -3.032050848007202, "logits/rejected": -2.9662680625915527, "logps/chosen": -46.983436584472656, "logps/rejected": -317.98712158203125, "loss": 0.1653, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.02297847904264927, "rewards/margins": 2.8162765502929688, "rewards/rejected": -2.7932980060577393, "step": 5350 }, { "epoch": 0.06, "learning_rate": 3.20804405075413e-06, "logits/chosen": -3.0221004486083984, "logits/rejected": -2.9365241527557373, "logps/chosen": -83.19551086425781, "logps/rejected": -428.12127685546875, "loss": 0.0902, "rewards/accuracies": 1.0, "rewards/chosen": -0.2933086156845093, "rewards/margins": 3.5877525806427, "rewards/rejected": -3.881061553955078, "step": 5360 }, { "epoch": 0.06, "learning_rate": 3.2140292075652384e-06, "logits/chosen": -2.978792190551758, "logits/rejected": -2.9110710620880127, "logps/chosen": -62.08478546142578, "logps/rejected": -304.4969177246094, "loss": 0.1621, "rewards/accuracies": 1.0, "rewards/chosen": -0.1749628633260727, "rewards/margins": 2.4804816246032715, "rewards/rejected": -2.655444622039795, "step": 5370 }, { "epoch": 0.06, "learning_rate": 3.220014364376347e-06, "logits/chosen": -3.0399415493011475, "logits/rejected": -2.9943947792053223, "logps/chosen": -45.67047119140625, "logps/rejected": -299.46868896484375, "loss": 0.1636, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0065512643195688725, "rewards/margins": 2.6186909675598145, "rewards/rejected": -2.612139940261841, "step": 5380 }, { "epoch": 0.06, "learning_rate": 3.2259995211874556e-06, "logits/chosen": -2.9989399909973145, "logits/rejected": -2.984001636505127, "logps/chosen": -13.937461853027344, "logps/rejected": -244.49380493164062, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": 0.20175591111183167, "rewards/margins": 2.2768378257751465, "rewards/rejected": -2.0750818252563477, "step": 5390 }, { "epoch": 0.06, "learning_rate": 3.2319846779985637e-06, "logits/chosen": -3.0415892601013184, "logits/rejected": -2.951488494873047, "logps/chosen": -31.986858367919922, "logps/rejected": -228.442138671875, "loss": 0.1181, "rewards/accuracies": 1.0, "rewards/chosen": 0.09419538825750351, "rewards/margins": 2.0120849609375, "rewards/rejected": -1.9178898334503174, "step": 5400 }, { "epoch": 0.06, "learning_rate": 3.2379698348096722e-06, "logits/chosen": -3.0255813598632812, "logits/rejected": -2.956320285797119, "logps/chosen": -47.5153923034668, "logps/rejected": -342.7583923339844, "loss": 0.1289, "rewards/accuracies": 1.0, "rewards/chosen": 0.015972962602972984, "rewards/margins": 3.061044692993164, "rewards/rejected": -3.045071840286255, "step": 5410 }, { "epoch": 0.06, "learning_rate": 3.243954991620781e-06, "logits/chosen": -2.9740545749664307, "logits/rejected": -2.875389337539673, "logps/chosen": -59.4351921081543, "logps/rejected": -386.948486328125, "loss": 0.0815, "rewards/accuracies": 1.0, "rewards/chosen": -0.1169629842042923, "rewards/margins": 3.363375186920166, "rewards/rejected": -3.4803378582000732, "step": 5420 }, { "epoch": 0.06, "learning_rate": 3.2499401484318893e-06, "logits/chosen": -2.972933292388916, "logits/rejected": -2.9162986278533936, "logps/chosen": -42.57146453857422, "logps/rejected": -337.2945861816406, "loss": 0.1102, "rewards/accuracies": 1.0, "rewards/chosen": -0.015364435501396656, "rewards/margins": 2.972062349319458, "rewards/rejected": -2.987426519393921, "step": 5430 }, { "epoch": 0.07, "learning_rate": 3.255925305242998e-06, "logits/chosen": -3.0288872718811035, "logits/rejected": -2.9258692264556885, "logps/chosen": -55.207977294921875, "logps/rejected": -382.765625, "loss": 0.0766, "rewards/accuracies": 1.0, "rewards/chosen": -0.04259470850229263, "rewards/margins": 3.3897526264190674, "rewards/rejected": -3.432347059249878, "step": 5440 }, { "epoch": 0.07, "learning_rate": 3.261910462054106e-06, "logits/chosen": -2.9953484535217285, "logits/rejected": -2.9486608505249023, "logps/chosen": -47.830657958984375, "logps/rejected": -301.16229248046875, "loss": 0.1167, "rewards/accuracies": 1.0, "rewards/chosen": -0.07217635214328766, "rewards/margins": 2.556562662124634, "rewards/rejected": -2.62873911857605, "step": 5450 }, { "epoch": 0.07, "learning_rate": 3.2678956188652146e-06, "logits/chosen": -2.980771541595459, "logits/rejected": -2.8527681827545166, "logps/chosen": -71.85978698730469, "logps/rejected": -455.4142150878906, "loss": 0.0694, "rewards/accuracies": 1.0, "rewards/chosen": -0.11815030872821808, "rewards/margins": 4.022614479064941, "rewards/rejected": -4.140765190124512, "step": 5460 }, { "epoch": 0.07, "learning_rate": 3.273880775676323e-06, "logits/chosen": -2.9885849952697754, "logits/rejected": -2.858269214630127, "logps/chosen": -53.54035186767578, "logps/rejected": -332.7272644042969, "loss": 0.1049, "rewards/accuracies": 1.0, "rewards/chosen": -0.014492261223495007, "rewards/margins": 2.916456699371338, "rewards/rejected": -2.9309489727020264, "step": 5470 }, { "epoch": 0.07, "learning_rate": 3.2798659324874317e-06, "logits/chosen": -2.9564948081970215, "logits/rejected": -2.919356346130371, "logps/chosen": -20.705541610717773, "logps/rejected": -266.8041687011719, "loss": 0.1503, "rewards/accuracies": 1.0, "rewards/chosen": 0.2034260332584381, "rewards/margins": 2.5006728172302246, "rewards/rejected": -2.2972469329833984, "step": 5480 }, { "epoch": 0.07, "learning_rate": 3.2858510892985402e-06, "logits/chosen": -2.976449728012085, "logits/rejected": -2.914436101913452, "logps/chosen": -28.63602066040039, "logps/rejected": -266.32269287109375, "loss": 0.0964, "rewards/accuracies": 1.0, "rewards/chosen": 0.13295337557792664, "rewards/margins": 2.4132766723632812, "rewards/rejected": -2.2803235054016113, "step": 5490 }, { "epoch": 0.07, "learning_rate": 3.291836246109648e-06, "logits/chosen": -3.022071123123169, "logits/rejected": -2.990967035293579, "logps/chosen": -34.536033630371094, "logps/rejected": -314.45281982421875, "loss": 0.1009, "rewards/accuracies": 1.0, "rewards/chosen": 0.0401003435254097, "rewards/margins": 2.806320905685425, "rewards/rejected": -2.7662205696105957, "step": 5500 }, { "epoch": 0.07, "learning_rate": 3.2978214029207565e-06, "logits/chosen": -3.029271125793457, "logits/rejected": -2.9304516315460205, "logps/chosen": -54.425804138183594, "logps/rejected": -377.9002380371094, "loss": 0.0811, "rewards/accuracies": 1.0, "rewards/chosen": -0.05935411900281906, "rewards/margins": 3.3294003009796143, "rewards/rejected": -3.3887546062469482, "step": 5510 }, { "epoch": 0.07, "learning_rate": 3.303806559731865e-06, "logits/chosen": -3.0165724754333496, "logits/rejected": -2.9385018348693848, "logps/chosen": -66.43977355957031, "logps/rejected": -416.19268798828125, "loss": 0.1061, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15998853743076324, "rewards/margins": 3.5982754230499268, "rewards/rejected": -3.7582638263702393, "step": 5520 }, { "epoch": 0.07, "learning_rate": 3.309791716542974e-06, "logits/chosen": -3.028733730316162, "logits/rejected": -2.994466543197632, "logps/chosen": -34.690345764160156, "logps/rejected": -285.3797607421875, "loss": 0.1353, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.03061036206781864, "rewards/margins": 2.5228333473205566, "rewards/rejected": -2.492222547531128, "step": 5530 }, { "epoch": 0.07, "learning_rate": 3.3157768733540826e-06, "logits/chosen": -2.983461856842041, "logits/rejected": -2.9392011165618896, "logps/chosen": -54.129661560058594, "logps/rejected": -317.9588317871094, "loss": 0.1694, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11182298511266708, "rewards/margins": 2.681741237640381, "rewards/rejected": -2.7935643196105957, "step": 5540 }, { "epoch": 0.07, "learning_rate": 3.3217620301651903e-06, "logits/chosen": -3.0421478748321533, "logits/rejected": -2.9580016136169434, "logps/chosen": -59.69963455200195, "logps/rejected": -374.90185546875, "loss": 0.1502, "rewards/accuracies": 1.0, "rewards/chosen": -0.09257818013429642, "rewards/margins": 3.2802658081054688, "rewards/rejected": -3.3728435039520264, "step": 5550 }, { "epoch": 0.07, "learning_rate": 3.327747186976299e-06, "logits/chosen": -3.0098118782043457, "logits/rejected": -2.9164977073669434, "logps/chosen": -61.85163497924805, "logps/rejected": -358.11376953125, "loss": 0.1382, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15396633744239807, "rewards/margins": 3.0380606651306152, "rewards/rejected": -3.1920273303985596, "step": 5560 }, { "epoch": 0.07, "learning_rate": 3.3337323437874074e-06, "logits/chosen": -3.0546088218688965, "logits/rejected": -2.9915270805358887, "logps/chosen": -54.200340270996094, "logps/rejected": -338.5923767089844, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": -0.09530647099018097, "rewards/margins": 2.91878342628479, "rewards/rejected": -3.014090061187744, "step": 5570 }, { "epoch": 0.07, "learning_rate": 3.339717500598516e-06, "logits/chosen": -2.9963667392730713, "logits/rejected": -2.904160737991333, "logps/chosen": -67.5794448852539, "logps/rejected": -296.6826171875, "loss": 0.152, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18628811836242676, "rewards/margins": 2.4045979976654053, "rewards/rejected": -2.590886354446411, "step": 5580 }, { "epoch": 0.07, "learning_rate": 3.3457026574096245e-06, "logits/chosen": -2.9902751445770264, "logits/rejected": -2.8454718589782715, "logps/chosen": -59.74225616455078, "logps/rejected": -357.2461853027344, "loss": 0.1138, "rewards/accuracies": 1.0, "rewards/chosen": -0.07834075391292572, "rewards/margins": 3.1029624938964844, "rewards/rejected": -3.181302785873413, "step": 5590 }, { "epoch": 0.07, "learning_rate": 3.3516878142207326e-06, "logits/chosen": -2.996811628341675, "logits/rejected": -2.9631400108337402, "logps/chosen": -39.03668975830078, "logps/rejected": -320.10546875, "loss": 0.1019, "rewards/accuracies": 1.0, "rewards/chosen": 0.04316994920372963, "rewards/margins": 2.859640598297119, "rewards/rejected": -2.8164706230163574, "step": 5600 }, { "epoch": 0.07, "learning_rate": 3.357672971031841e-06, "logits/chosen": -3.0067360401153564, "logits/rejected": -2.909317970275879, "logps/chosen": -62.83075714111328, "logps/rejected": -422.92022705078125, "loss": 0.0813, "rewards/accuracies": 1.0, "rewards/chosen": -0.10831241309642792, "rewards/margins": 3.7124886512756348, "rewards/rejected": -3.82080078125, "step": 5610 }, { "epoch": 0.07, "learning_rate": 3.3636581278429497e-06, "logits/chosen": -3.0425095558166504, "logits/rejected": -3.004837989807129, "logps/chosen": -50.02080154418945, "logps/rejected": -305.36724853515625, "loss": 0.1526, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.08507189154624939, "rewards/margins": 2.582723379135132, "rewards/rejected": -2.667794942855835, "step": 5620 }, { "epoch": 0.07, "learning_rate": 3.3696432846540583e-06, "logits/chosen": -2.991152048110962, "logits/rejected": -2.893699884414673, "logps/chosen": -64.03775024414062, "logps/rejected": -393.77398681640625, "loss": 0.0687, "rewards/accuracies": 1.0, "rewards/chosen": -0.1820569932460785, "rewards/margins": 3.358302354812622, "rewards/rejected": -3.540358781814575, "step": 5630 }, { "epoch": 0.07, "learning_rate": 3.375628441465167e-06, "logits/chosen": -3.010159730911255, "logits/rejected": -2.8886358737945557, "logps/chosen": -54.36859130859375, "logps/rejected": -346.5320129394531, "loss": 0.0971, "rewards/accuracies": 1.0, "rewards/chosen": -0.056063372641801834, "rewards/margins": 3.0171432495117188, "rewards/rejected": -3.0732064247131348, "step": 5640 }, { "epoch": 0.07, "learning_rate": 3.381613598276275e-06, "logits/chosen": -3.0394272804260254, "logits/rejected": -2.934858798980713, "logps/chosen": -77.0989990234375, "logps/rejected": -457.11865234375, "loss": 0.067, "rewards/accuracies": 1.0, "rewards/chosen": -0.2642577588558197, "rewards/margins": 3.8927161693573, "rewards/rejected": -4.156973838806152, "step": 5650 }, { "epoch": 0.07, "learning_rate": 3.3875987550873835e-06, "logits/chosen": -3.001133441925049, "logits/rejected": -2.9320693016052246, "logps/chosen": -36.749359130859375, "logps/rejected": -345.01995849609375, "loss": 0.0932, "rewards/accuracies": 1.0, "rewards/chosen": 0.06273941695690155, "rewards/margins": 3.125656843185425, "rewards/rejected": -3.0629172325134277, "step": 5660 }, { "epoch": 0.07, "learning_rate": 3.393583911898492e-06, "logits/chosen": -3.027940511703491, "logits/rejected": -2.964547634124756, "logps/chosen": -47.538734436035156, "logps/rejected": -291.7622985839844, "loss": 0.1441, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.01804226078093052, "rewards/margins": 2.5641424655914307, "rewards/rejected": -2.5460994243621826, "step": 5670 }, { "epoch": 0.07, "learning_rate": 3.3995690687096006e-06, "logits/chosen": -2.986124277114868, "logits/rejected": -2.872321367263794, "logps/chosen": -62.87787628173828, "logps/rejected": -394.8035583496094, "loss": 0.0803, "rewards/accuracies": 1.0, "rewards/chosen": -0.13507229089736938, "rewards/margins": 3.4019699096679688, "rewards/rejected": -3.5370421409606934, "step": 5680 }, { "epoch": 0.07, "learning_rate": 3.405554225520709e-06, "logits/chosen": -3.006647825241089, "logits/rejected": -2.926886796951294, "logps/chosen": -67.75942993164062, "logps/rejected": -360.290283203125, "loss": 0.1313, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19887283444404602, "rewards/margins": 3.0212512016296387, "rewards/rejected": -3.2201240062713623, "step": 5690 }, { "epoch": 0.07, "learning_rate": 3.4115393823318173e-06, "logits/chosen": -3.016387939453125, "logits/rejected": -2.947366237640381, "logps/chosen": -69.4775161743164, "logps/rejected": -452.00238037109375, "loss": 0.0837, "rewards/accuracies": 1.0, "rewards/chosen": -0.187578946352005, "rewards/margins": 3.943483829498291, "rewards/rejected": -4.1310625076293945, "step": 5700 }, { "epoch": 0.07, "learning_rate": 3.417524539142926e-06, "logits/chosen": -3.0052523612976074, "logits/rejected": -2.9205801486968994, "logps/chosen": -78.20904541015625, "logps/rejected": -432.31964111328125, "loss": 0.118, "rewards/accuracies": 1.0, "rewards/chosen": -0.2576858401298523, "rewards/margins": 3.6507697105407715, "rewards/rejected": -3.9084556102752686, "step": 5710 }, { "epoch": 0.07, "learning_rate": 3.4235096959540344e-06, "logits/chosen": -3.0410521030426025, "logits/rejected": -2.953611373901367, "logps/chosen": -75.78434753417969, "logps/rejected": -405.88543701171875, "loss": 0.0935, "rewards/accuracies": 1.0, "rewards/chosen": -0.1930662989616394, "rewards/margins": 3.4571032524108887, "rewards/rejected": -3.650170087814331, "step": 5720 }, { "epoch": 0.07, "learning_rate": 3.429494852765143e-06, "logits/chosen": -3.0021395683288574, "logits/rejected": -2.907534122467041, "logps/chosen": -63.03047561645508, "logps/rejected": -386.627685546875, "loss": 0.1158, "rewards/accuracies": 1.0, "rewards/chosen": -0.14842049777507782, "rewards/margins": 3.313279628753662, "rewards/rejected": -3.4616997241973877, "step": 5730 }, { "epoch": 0.07, "learning_rate": 3.4354800095762515e-06, "logits/chosen": -2.988574981689453, "logits/rejected": -2.8995168209075928, "logps/chosen": -45.03822326660156, "logps/rejected": -373.40509033203125, "loss": 0.0871, "rewards/accuracies": 1.0, "rewards/chosen": 0.03483462333679199, "rewards/margins": 3.3766415119171143, "rewards/rejected": -3.3418071269989014, "step": 5740 }, { "epoch": 0.07, "learning_rate": 3.4414651663873592e-06, "logits/chosen": -3.0391685962677, "logits/rejected": -2.937356948852539, "logps/chosen": -59.21864700317383, "logps/rejected": -370.2032775878906, "loss": 0.1662, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.08895745873451233, "rewards/margins": 3.225741147994995, "rewards/rejected": -3.3146986961364746, "step": 5750 }, { "epoch": 0.07, "learning_rate": 3.447450323198468e-06, "logits/chosen": -2.968459367752075, "logits/rejected": -2.9323837757110596, "logps/chosen": -48.78335952758789, "logps/rejected": -337.1684265136719, "loss": 0.1179, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.06717019528150558, "rewards/margins": 2.915766954421997, "rewards/rejected": -2.9829370975494385, "step": 5760 }, { "epoch": 0.07, "learning_rate": 3.4534354800095763e-06, "logits/chosen": -3.0005297660827637, "logits/rejected": -2.90596866607666, "logps/chosen": -47.21421813964844, "logps/rejected": -316.8838806152344, "loss": 0.0805, "rewards/accuracies": 1.0, "rewards/chosen": 0.007748936302959919, "rewards/margins": 2.7957711219787598, "rewards/rejected": -2.78802227973938, "step": 5770 }, { "epoch": 0.07, "learning_rate": 3.459420636820685e-06, "logits/chosen": -2.9995150566101074, "logits/rejected": -2.9554688930511475, "logps/chosen": -55.45664596557617, "logps/rejected": -346.7427673339844, "loss": 0.1429, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.07158296555280685, "rewards/margins": 3.0098137855529785, "rewards/rejected": -3.0813968181610107, "step": 5780 }, { "epoch": 0.07, "learning_rate": 3.4654057936317935e-06, "logits/chosen": -3.0037131309509277, "logits/rejected": -2.9163060188293457, "logps/chosen": -57.97013473510742, "logps/rejected": -366.8642272949219, "loss": 0.0927, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10205739736557007, "rewards/margins": 3.1629371643066406, "rewards/rejected": -3.2649941444396973, "step": 5790 }, { "epoch": 0.07, "learning_rate": 3.4713909504429016e-06, "logits/chosen": -2.9940426349639893, "logits/rejected": -2.950232744216919, "logps/chosen": -43.16997528076172, "logps/rejected": -311.62200927734375, "loss": 0.11, "rewards/accuracies": 1.0, "rewards/chosen": 0.06171916797757149, "rewards/margins": 2.812617540359497, "rewards/rejected": -2.7508978843688965, "step": 5800 }, { "epoch": 0.07, "learning_rate": 3.47737610725401e-06, "logits/chosen": -2.996640682220459, "logits/rejected": -2.9056243896484375, "logps/chosen": -51.543601989746094, "logps/rejected": -411.2784118652344, "loss": 0.1283, "rewards/accuracies": 1.0, "rewards/chosen": -0.024437610059976578, "rewards/margins": 3.7022719383239746, "rewards/rejected": -3.7267098426818848, "step": 5810 }, { "epoch": 0.07, "learning_rate": 3.4833612640651187e-06, "logits/chosen": -3.0203912258148193, "logits/rejected": -2.8966071605682373, "logps/chosen": -83.59789276123047, "logps/rejected": -376.8374328613281, "loss": 0.1149, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.24633030593395233, "rewards/margins": 3.1288533210754395, "rewards/rejected": -3.375183582305908, "step": 5820 }, { "epoch": 0.07, "learning_rate": 3.4893464208762272e-06, "logits/chosen": -3.0205509662628174, "logits/rejected": -2.900228261947632, "logps/chosen": -96.72198486328125, "logps/rejected": -432.00146484375, "loss": 0.1486, "rewards/accuracies": 1.0, "rewards/chosen": -0.40849733352661133, "rewards/margins": 3.486828327178955, "rewards/rejected": -3.895325183868408, "step": 5830 }, { "epoch": 0.07, "learning_rate": 3.495331577687336e-06, "logits/chosen": -3.006986141204834, "logits/rejected": -2.92063307762146, "logps/chosen": -67.28530883789062, "logps/rejected": -422.990478515625, "loss": 0.0929, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.13352081179618835, "rewards/margins": 3.6855366230010986, "rewards/rejected": -3.8190574645996094, "step": 5840 }, { "epoch": 0.07, "learning_rate": 3.501316734498444e-06, "logits/chosen": -3.0097436904907227, "logits/rejected": -2.9622912406921387, "logps/chosen": -31.630847930908203, "logps/rejected": -284.2970275878906, "loss": 0.1548, "rewards/accuracies": 1.0, "rewards/chosen": 0.07468168437480927, "rewards/margins": 2.5589747428894043, "rewards/rejected": -2.484292984008789, "step": 5850 }, { "epoch": 0.07, "learning_rate": 3.5073018913095525e-06, "logits/chosen": -2.999688148498535, "logits/rejected": -2.9034008979797363, "logps/chosen": -76.210693359375, "logps/rejected": -367.55841064453125, "loss": 0.228, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.27302736043930054, "rewards/margins": 3.010835647583008, "rewards/rejected": -3.283863067626953, "step": 5860 }, { "epoch": 0.07, "learning_rate": 3.513287048120661e-06, "logits/chosen": -2.9949800968170166, "logits/rejected": -2.898984432220459, "logps/chosen": -53.17476272583008, "logps/rejected": -407.0135803222656, "loss": 0.1271, "rewards/accuracies": 1.0, "rewards/chosen": -0.0521073043346405, "rewards/margins": 3.6196236610412598, "rewards/rejected": -3.6717312335968018, "step": 5870 }, { "epoch": 0.07, "learning_rate": 3.5192722049317696e-06, "logits/chosen": -3.0103352069854736, "logits/rejected": -2.937682628631592, "logps/chosen": -48.68604278564453, "logps/rejected": -332.9332580566406, "loss": 0.0983, "rewards/accuracies": 1.0, "rewards/chosen": -0.058987222611904144, "rewards/margins": 2.8867385387420654, "rewards/rejected": -2.945725679397583, "step": 5880 }, { "epoch": 0.07, "learning_rate": 3.525257361742878e-06, "logits/chosen": -3.0026798248291016, "logits/rejected": -2.9004721641540527, "logps/chosen": -60.020263671875, "logps/rejected": -338.61749267578125, "loss": 0.1375, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1429513394832611, "rewards/margins": 2.8692073822021484, "rewards/rejected": -3.0121588706970215, "step": 5890 }, { "epoch": 0.07, "learning_rate": 3.5312425185539863e-06, "logits/chosen": -3.0157485008239746, "logits/rejected": -2.971651077270508, "logps/chosen": -76.83137512207031, "logps/rejected": -410.5595703125, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3152068257331848, "rewards/margins": 3.4051010608673096, "rewards/rejected": -3.7203075885772705, "step": 5900 }, { "epoch": 0.07, "learning_rate": 3.537227675365095e-06, "logits/chosen": -3.000185489654541, "logits/rejected": -2.887005090713501, "logps/chosen": -79.29052734375, "logps/rejected": -388.279052734375, "loss": 0.1967, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2947708070278168, "rewards/margins": 3.1987106800079346, "rewards/rejected": -3.493481397628784, "step": 5910 }, { "epoch": 0.07, "learning_rate": 3.5432128321762034e-06, "logits/chosen": -3.010355234146118, "logits/rejected": -2.972822427749634, "logps/chosen": -50.72563171386719, "logps/rejected": -398.720703125, "loss": 0.0728, "rewards/accuracies": 1.0, "rewards/chosen": -0.007085546851158142, "rewards/margins": 3.5862040519714355, "rewards/rejected": -3.593289613723755, "step": 5920 }, { "epoch": 0.07, "learning_rate": 3.549197988987312e-06, "logits/chosen": -3.038412094116211, "logits/rejected": -2.9897751808166504, "logps/chosen": -56.96965789794922, "logps/rejected": -320.6725158691406, "loss": 0.1235, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10090263187885284, "rewards/margins": 2.734947681427002, "rewards/rejected": -2.835850238800049, "step": 5930 }, { "epoch": 0.07, "learning_rate": 3.5551831457984205e-06, "logits/chosen": -2.9773590564727783, "logits/rejected": -2.8886494636535645, "logps/chosen": -68.94248962402344, "logps/rejected": -404.70867919921875, "loss": 0.1204, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.16606225073337555, "rewards/margins": 3.494340419769287, "rewards/rejected": -3.66040301322937, "step": 5940 }, { "epoch": 0.07, "learning_rate": 3.561168302609528e-06, "logits/chosen": -3.0269386768341064, "logits/rejected": -2.972496747970581, "logps/chosen": -46.574058532714844, "logps/rejected": -371.62762451171875, "loss": 0.0823, "rewards/accuracies": 1.0, "rewards/chosen": -0.05283573269844055, "rewards/margins": 3.2803051471710205, "rewards/rejected": -3.333141326904297, "step": 5950 }, { "epoch": 0.07, "learning_rate": 3.567153459420637e-06, "logits/chosen": -3.000826835632324, "logits/rejected": -2.935208559036255, "logps/chosen": -51.696998596191406, "logps/rejected": -423.0729064941406, "loss": 0.0842, "rewards/accuracies": 1.0, "rewards/chosen": -0.05392122268676758, "rewards/margins": 3.7762932777404785, "rewards/rejected": -3.830214262008667, "step": 5960 }, { "epoch": 0.07, "learning_rate": 3.5731386162317457e-06, "logits/chosen": -3.0016791820526123, "logits/rejected": -2.858668565750122, "logps/chosen": -105.96002197265625, "logps/rejected": -415.2490234375, "loss": 0.1024, "rewards/accuracies": 1.0, "rewards/chosen": -0.4852064549922943, "rewards/margins": 3.2565598487854004, "rewards/rejected": -3.7417659759521484, "step": 5970 }, { "epoch": 0.07, "learning_rate": 3.5791237730428543e-06, "logits/chosen": -2.9873387813568115, "logits/rejected": -2.9289755821228027, "logps/chosen": -60.5691032409668, "logps/rejected": -398.47564697265625, "loss": 0.1087, "rewards/accuracies": 1.0, "rewards/chosen": -0.1273776888847351, "rewards/margins": 3.4548583030700684, "rewards/rejected": -3.5822360515594482, "step": 5980 }, { "epoch": 0.07, "learning_rate": 3.585108929853963e-06, "logits/chosen": -2.9928183555603027, "logits/rejected": -2.9304890632629395, "logps/chosen": -52.27024459838867, "logps/rejected": -298.56573486328125, "loss": 0.0935, "rewards/accuracies": 1.0, "rewards/chosen": -0.013094549998641014, "rewards/margins": 2.599027156829834, "rewards/rejected": -2.612122058868408, "step": 5990 }, { "epoch": 0.07, "learning_rate": 3.5910940866650705e-06, "logits/chosen": -2.986082077026367, "logits/rejected": -2.926138401031494, "logps/chosen": -61.41353225708008, "logps/rejected": -388.50634765625, "loss": 0.1599, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19875654578208923, "rewards/margins": 3.3148109912872314, "rewards/rejected": -3.5135669708251953, "step": 6000 }, { "epoch": 0.07, "eval_logits/chosen": -3.034557342529297, "eval_logits/rejected": -2.9054813385009766, "eval_logps/chosen": -120.0204086303711, "eval_logps/rejected": -413.4546203613281, "eval_loss": 0.07504179328680038, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.5884013175964355, "eval_rewards/margins": 3.0788910388946533, "eval_rewards/rejected": -3.667292594909668, "eval_runtime": 1.2139, "eval_samples_per_second": 4.119, "eval_steps_per_second": 2.471, "step": 6000 }, { "epoch": 0.07, "learning_rate": 3.597079243476179e-06, "logits/chosen": -3.0108466148376465, "logits/rejected": -2.9679667949676514, "logps/chosen": -33.96746063232422, "logps/rejected": -333.68939208984375, "loss": 0.0805, "rewards/accuracies": 1.0, "rewards/chosen": 0.07177619636058807, "rewards/margins": 3.03208589553833, "rewards/rejected": -2.9603095054626465, "step": 6010 }, { "epoch": 0.07, "learning_rate": 3.6030644002872876e-06, "logits/chosen": -2.9873154163360596, "logits/rejected": -2.909423589706421, "logps/chosen": -64.3587646484375, "logps/rejected": -449.9397888183594, "loss": 0.0681, "rewards/accuracies": 1.0, "rewards/chosen": -0.16124621033668518, "rewards/margins": 3.9477505683898926, "rewards/rejected": -4.108996391296387, "step": 6020 }, { "epoch": 0.07, "learning_rate": 3.609049557098396e-06, "logits/chosen": -3.0080182552337646, "logits/rejected": -2.9824905395507812, "logps/chosen": -42.33258819580078, "logps/rejected": -306.9604187011719, "loss": 0.1409, "rewards/accuracies": 1.0, "rewards/chosen": -0.04984932020306587, "rewards/margins": 2.660971164703369, "rewards/rejected": -2.710820436477661, "step": 6030 }, { "epoch": 0.07, "learning_rate": 3.6150347139095047e-06, "logits/chosen": -3.006392240524292, "logits/rejected": -2.956327199935913, "logps/chosen": -62.15553665161133, "logps/rejected": -447.0653381347656, "loss": 0.0828, "rewards/accuracies": 1.0, "rewards/chosen": -0.19633238017559052, "rewards/margins": 3.8787567615509033, "rewards/rejected": -4.075089454650879, "step": 6040 }, { "epoch": 0.07, "learning_rate": 3.6210198707206133e-06, "logits/chosen": -2.997006893157959, "logits/rejected": -2.948627471923828, "logps/chosen": -53.21699905395508, "logps/rejected": -333.62322998046875, "loss": 0.1758, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11838479340076447, "rewards/margins": 2.8374390602111816, "rewards/rejected": -2.9558234214782715, "step": 6050 }, { "epoch": 0.07, "learning_rate": 3.6270050275317214e-06, "logits/chosen": -3.0212454795837402, "logits/rejected": -2.906794786453247, "logps/chosen": -118.2268295288086, "logps/rejected": -503.78094482421875, "loss": 0.1399, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6072114706039429, "rewards/margins": 4.017485618591309, "rewards/rejected": -4.624697208404541, "step": 6060 }, { "epoch": 0.07, "learning_rate": 3.63299018434283e-06, "logits/chosen": -2.960796594619751, "logits/rejected": -2.8840835094451904, "logps/chosen": -42.17526626586914, "logps/rejected": -336.60272216796875, "loss": 0.1477, "rewards/accuracies": 1.0, "rewards/chosen": 0.004016406834125519, "rewards/margins": 2.994333505630493, "rewards/rejected": -2.9903171062469482, "step": 6070 }, { "epoch": 0.07, "learning_rate": 3.6389753411539385e-06, "logits/chosen": -3.028041362762451, "logits/rejected": -2.963773250579834, "logps/chosen": -61.850502014160156, "logps/rejected": -371.0893249511719, "loss": 0.0939, "rewards/accuracies": 1.0, "rewards/chosen": -0.10746872425079346, "rewards/margins": 3.211235761642456, "rewards/rejected": -3.318704605102539, "step": 6080 }, { "epoch": 0.07, "learning_rate": 3.644960497965047e-06, "logits/chosen": -2.9938137531280518, "logits/rejected": -2.921516180038452, "logps/chosen": -76.90784454345703, "logps/rejected": -318.0732727050781, "loss": 0.1318, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.34624597430229187, "rewards/margins": 2.4559807777404785, "rewards/rejected": -2.8022265434265137, "step": 6090 }, { "epoch": 0.07, "learning_rate": 3.6509456547761556e-06, "logits/chosen": -3.026407241821289, "logits/rejected": -2.9823060035705566, "logps/chosen": -45.617210388183594, "logps/rejected": -299.9085693359375, "loss": 0.1033, "rewards/accuracies": 1.0, "rewards/chosen": -0.035929810255765915, "rewards/margins": 2.6019508838653564, "rewards/rejected": -2.637880802154541, "step": 6100 }, { "epoch": 0.07, "learning_rate": 3.6569308115872638e-06, "logits/chosen": -3.048152446746826, "logits/rejected": -3.021693706512451, "logps/chosen": -52.088836669921875, "logps/rejected": -310.657958984375, "loss": 0.1149, "rewards/accuracies": 1.0, "rewards/chosen": -0.08722952008247375, "rewards/margins": 2.642397403717041, "rewards/rejected": -2.7296266555786133, "step": 6110 }, { "epoch": 0.07, "learning_rate": 3.6629159683983723e-06, "logits/chosen": -3.0139057636260986, "logits/rejected": -2.9718732833862305, "logps/chosen": -41.25392150878906, "logps/rejected": -385.8874816894531, "loss": 0.0756, "rewards/accuracies": 1.0, "rewards/chosen": 0.03942788392305374, "rewards/margins": 3.518275737762451, "rewards/rejected": -3.4788482189178467, "step": 6120 }, { "epoch": 0.07, "learning_rate": 3.668901125209481e-06, "logits/chosen": -3.0085902214050293, "logits/rejected": -2.923344373703003, "logps/chosen": -63.29693603515625, "logps/rejected": -363.80230712890625, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": -0.15036353468894958, "rewards/margins": 3.0892434120178223, "rewards/rejected": -3.2396068572998047, "step": 6130 }, { "epoch": 0.07, "learning_rate": 3.6748862820205894e-06, "logits/chosen": -3.016732931137085, "logits/rejected": -2.9341866970062256, "logps/chosen": -82.8967514038086, "logps/rejected": -493.9683532714844, "loss": 0.0624, "rewards/accuracies": 1.0, "rewards/chosen": -0.23508679866790771, "rewards/margins": 4.303523063659668, "rewards/rejected": -4.538609981536865, "step": 6140 }, { "epoch": 0.07, "learning_rate": 3.680871438831698e-06, "logits/chosen": -3.006626605987549, "logits/rejected": -2.9849677085876465, "logps/chosen": -45.0453987121582, "logps/rejected": -346.26068115234375, "loss": 0.0898, "rewards/accuracies": 1.0, "rewards/chosen": 0.0068562268279492855, "rewards/margins": 3.0802810192108154, "rewards/rejected": -3.073424816131592, "step": 6150 }, { "epoch": 0.07, "learning_rate": 3.686856595642806e-06, "logits/chosen": -2.9986376762390137, "logits/rejected": -2.9391732215881348, "logps/chosen": -59.0346565246582, "logps/rejected": -391.43975830078125, "loss": 0.129, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.08651997148990631, "rewards/margins": 3.4418396949768066, "rewards/rejected": -3.528359889984131, "step": 6160 }, { "epoch": 0.07, "learning_rate": 3.6928417524539147e-06, "logits/chosen": -3.024336338043213, "logits/rejected": -2.9393012523651123, "logps/chosen": -45.34698486328125, "logps/rejected": -418.58001708984375, "loss": 0.0949, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.033923666924238205, "rewards/margins": 3.8268744945526123, "rewards/rejected": -3.7929508686065674, "step": 6170 }, { "epoch": 0.07, "learning_rate": 3.6988269092650232e-06, "logits/chosen": -2.990730047225952, "logits/rejected": -2.9192166328430176, "logps/chosen": -54.585472106933594, "logps/rejected": -478.55908203125, "loss": 0.0783, "rewards/accuracies": 1.0, "rewards/chosen": -0.09883668273687363, "rewards/margins": 4.285642147064209, "rewards/rejected": -4.384478569030762, "step": 6180 }, { "epoch": 0.07, "learning_rate": 3.7048120660761318e-06, "logits/chosen": -3.038299798965454, "logits/rejected": -2.9466185569763184, "logps/chosen": -59.970672607421875, "logps/rejected": -475.56201171875, "loss": 0.0814, "rewards/accuracies": 1.0, "rewards/chosen": -0.06099734827876091, "rewards/margins": 4.2857136726379395, "rewards/rejected": -4.346711158752441, "step": 6190 }, { "epoch": 0.07, "learning_rate": 3.7107972228872403e-06, "logits/chosen": -3.004671573638916, "logits/rejected": -2.8880982398986816, "logps/chosen": -50.39319610595703, "logps/rejected": -389.7159729003906, "loss": 0.074, "rewards/accuracies": 1.0, "rewards/chosen": 0.01742258109152317, "rewards/margins": 3.5258376598358154, "rewards/rejected": -3.5084152221679688, "step": 6200 }, { "epoch": 0.07, "learning_rate": 3.716782379698348e-06, "logits/chosen": -3.0447998046875, "logits/rejected": -2.979583740234375, "logps/chosen": -51.520538330078125, "logps/rejected": -430.5333557128906, "loss": 0.0724, "rewards/accuracies": 1.0, "rewards/chosen": -0.001172137213870883, "rewards/margins": 3.9111580848693848, "rewards/rejected": -3.912330150604248, "step": 6210 }, { "epoch": 0.07, "learning_rate": 3.7227675365094566e-06, "logits/chosen": -3.0040230751037598, "logits/rejected": -2.9684739112854004, "logps/chosen": -23.871532440185547, "logps/rejected": -332.4522399902344, "loss": 0.0719, "rewards/accuracies": 1.0, "rewards/chosen": 0.14044612646102905, "rewards/margins": 3.090817451477051, "rewards/rejected": -2.950371265411377, "step": 6220 }, { "epoch": 0.07, "learning_rate": 3.728752693320565e-06, "logits/chosen": -2.9930806159973145, "logits/rejected": -2.914943218231201, "logps/chosen": -44.479549407958984, "logps/rejected": -391.26751708984375, "loss": 0.0894, "rewards/accuracies": 1.0, "rewards/chosen": 0.0767221450805664, "rewards/margins": 3.6150031089782715, "rewards/rejected": -3.538280963897705, "step": 6230 }, { "epoch": 0.07, "learning_rate": 3.7347378501316737e-06, "logits/chosen": -3.0053586959838867, "logits/rejected": -2.928554058074951, "logps/chosen": -81.98564147949219, "logps/rejected": -416.5121154785156, "loss": 0.1541, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2718490958213806, "rewards/margins": 3.4841842651367188, "rewards/rejected": -3.7560336589813232, "step": 6240 }, { "epoch": 0.07, "learning_rate": 3.7407230069427827e-06, "logits/chosen": -3.026193141937256, "logits/rejected": -2.953011989593506, "logps/chosen": -39.714012145996094, "logps/rejected": -398.6741027832031, "loss": 0.0663, "rewards/accuracies": 1.0, "rewards/chosen": 0.061909131705760956, "rewards/margins": 3.6718649864196777, "rewards/rejected": -3.6099560260772705, "step": 6250 }, { "epoch": 0.07, "learning_rate": 3.7467081637538904e-06, "logits/chosen": -2.9994781017303467, "logits/rejected": -2.935194492340088, "logps/chosen": -52.28833770751953, "logps/rejected": -425.2735290527344, "loss": 0.0805, "rewards/accuracies": 1.0, "rewards/chosen": -0.028740327805280685, "rewards/margins": 3.8404479026794434, "rewards/rejected": -3.8691883087158203, "step": 6260 }, { "epoch": 0.08, "learning_rate": 3.752693320564999e-06, "logits/chosen": -3.010944366455078, "logits/rejected": -2.9859120845794678, "logps/chosen": -27.535043716430664, "logps/rejected": -355.2829284667969, "loss": 0.0821, "rewards/accuracies": 1.0, "rewards/chosen": 0.13788743317127228, "rewards/margins": 3.3339240550994873, "rewards/rejected": -3.1960370540618896, "step": 6270 }, { "epoch": 0.08, "learning_rate": 3.7586784773761075e-06, "logits/chosen": -3.0380005836486816, "logits/rejected": -2.9677581787109375, "logps/chosen": -43.37053298950195, "logps/rejected": -281.9716796875, "loss": 0.0927, "rewards/accuracies": 1.0, "rewards/chosen": 0.009769691154360771, "rewards/margins": 2.462942600250244, "rewards/rejected": -2.4531731605529785, "step": 6280 }, { "epoch": 0.08, "learning_rate": 3.764663634187216e-06, "logits/chosen": -3.0305380821228027, "logits/rejected": -2.9320037364959717, "logps/chosen": -50.40093231201172, "logps/rejected": -442.5018615722656, "loss": 0.185, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.05961514264345169, "rewards/margins": 3.9742431640625, "rewards/rejected": -4.033858299255371, "step": 6290 }, { "epoch": 0.08, "learning_rate": 3.7706487909983246e-06, "logits/chosen": -2.9996681213378906, "logits/rejected": -2.9268057346343994, "logps/chosen": -74.28829956054688, "logps/rejected": -469.40533447265625, "loss": 0.1245, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2926364541053772, "rewards/margins": 4.000265598297119, "rewards/rejected": -4.292901992797852, "step": 6300 }, { "epoch": 0.08, "learning_rate": 3.7766339478094327e-06, "logits/chosen": -3.0271456241607666, "logits/rejected": -2.9618897438049316, "logps/chosen": -34.00541687011719, "logps/rejected": -316.1839599609375, "loss": 0.087, "rewards/accuracies": 1.0, "rewards/chosen": 0.14258988201618195, "rewards/margins": 2.9336092472076416, "rewards/rejected": -2.7910194396972656, "step": 6310 }, { "epoch": 0.08, "learning_rate": 3.7826191046205413e-06, "logits/chosen": -3.0402603149414062, "logits/rejected": -2.9133565425872803, "logps/chosen": -60.6801872253418, "logps/rejected": -456.99365234375, "loss": 0.0681, "rewards/accuracies": 1.0, "rewards/chosen": -0.0708685889840126, "rewards/margins": 4.0974812507629395, "rewards/rejected": -4.1683502197265625, "step": 6320 }, { "epoch": 0.08, "learning_rate": 3.78860426143165e-06, "logits/chosen": -3.0392234325408936, "logits/rejected": -2.9959716796875, "logps/chosen": -43.6837043762207, "logps/rejected": -236.4279022216797, "loss": 0.2312, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08760137856006622, "rewards/margins": 1.9235765933990479, "rewards/rejected": -2.0111777782440186, "step": 6330 }, { "epoch": 0.08, "learning_rate": 3.7945894182427584e-06, "logits/chosen": -3.030425548553467, "logits/rejected": -2.973921298980713, "logps/chosen": -61.902435302734375, "logps/rejected": -376.4432678222656, "loss": 0.0759, "rewards/accuracies": 1.0, "rewards/chosen": -0.08762016147375107, "rewards/margins": 3.2926878929138184, "rewards/rejected": -3.380308151245117, "step": 6340 }, { "epoch": 0.08, "learning_rate": 3.800574575053867e-06, "logits/chosen": -3.0508015155792236, "logits/rejected": -2.9357614517211914, "logps/chosen": -92.18966674804688, "logps/rejected": -525.9163208007812, "loss": 0.1285, "rewards/accuracies": 1.0, "rewards/chosen": -0.3303682208061218, "rewards/margins": 4.533050060272217, "rewards/rejected": -4.863418102264404, "step": 6350 }, { "epoch": 0.08, "learning_rate": 3.806559731864975e-06, "logits/chosen": -3.035796880722046, "logits/rejected": -2.9588160514831543, "logps/chosen": -72.28211975097656, "logps/rejected": -496.548095703125, "loss": 0.1108, "rewards/accuracies": 1.0, "rewards/chosen": -0.2211037576198578, "rewards/margins": 4.355208396911621, "rewards/rejected": -4.576312065124512, "step": 6360 }, { "epoch": 0.08, "learning_rate": 3.8125448886760836e-06, "logits/chosen": -3.0222060680389404, "logits/rejected": -2.922482967376709, "logps/chosen": -57.59352493286133, "logps/rejected": -519.9468994140625, "loss": 0.0936, "rewards/accuracies": 1.0, "rewards/chosen": -0.07523485273122787, "rewards/margins": 4.737759590148926, "rewards/rejected": -4.812994480133057, "step": 6370 }, { "epoch": 0.08, "learning_rate": 3.818530045487192e-06, "logits/chosen": -3.0341036319732666, "logits/rejected": -2.9554495811462402, "logps/chosen": -74.481201171875, "logps/rejected": -447.1815490722656, "loss": 0.0821, "rewards/accuracies": 1.0, "rewards/chosen": -0.19261260330677032, "rewards/margins": 3.8862922191619873, "rewards/rejected": -4.078904628753662, "step": 6380 }, { "epoch": 0.08, "learning_rate": 3.824515202298301e-06, "logits/chosen": -3.003981351852417, "logits/rejected": -2.9545860290527344, "logps/chosen": -68.7845687866211, "logps/rejected": -421.32269287109375, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": -0.21828456223011017, "rewards/margins": 3.601564884185791, "rewards/rejected": -3.8198490142822266, "step": 6390 }, { "epoch": 0.08, "learning_rate": 3.830500359109409e-06, "logits/chosen": -3.010801315307617, "logits/rejected": -2.975106716156006, "logps/chosen": -46.61457061767578, "logps/rejected": -396.2247314453125, "loss": 0.0677, "rewards/accuracies": 1.0, "rewards/chosen": -0.03685355931520462, "rewards/margins": 3.5371124744415283, "rewards/rejected": -3.5739662647247314, "step": 6400 }, { "epoch": 0.08, "learning_rate": 3.836485515920517e-06, "logits/chosen": -3.0254249572753906, "logits/rejected": -2.9472403526306152, "logps/chosen": -69.88585662841797, "logps/rejected": -460.70794677734375, "loss": 0.0838, "rewards/accuracies": 1.0, "rewards/chosen": -0.20731742680072784, "rewards/margins": 4.012806415557861, "rewards/rejected": -4.220124244689941, "step": 6410 }, { "epoch": 0.08, "learning_rate": 3.842470672731626e-06, "logits/chosen": -3.0310230255126953, "logits/rejected": -2.9280972480773926, "logps/chosen": -76.39366149902344, "logps/rejected": -544.2347412109375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": -0.2146073877811432, "rewards/margins": 4.8306474685668945, "rewards/rejected": -5.045254230499268, "step": 6420 }, { "epoch": 0.08, "learning_rate": 3.848455829542734e-06, "logits/chosen": -3.026200532913208, "logits/rejected": -2.964167833328247, "logps/chosen": -36.695960998535156, "logps/rejected": -394.41351318359375, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": 0.08912280946969986, "rewards/margins": 3.654050827026367, "rewards/rejected": -3.564927577972412, "step": 6430 }, { "epoch": 0.08, "learning_rate": 3.854440986353843e-06, "logits/chosen": -3.0207865238189697, "logits/rejected": -2.9521191120147705, "logps/chosen": -54.52309036254883, "logps/rejected": -507.96002197265625, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": -0.10282786935567856, "rewards/margins": 4.583926200866699, "rewards/rejected": -4.6867547035217285, "step": 6440 }, { "epoch": 0.08, "learning_rate": 3.860426143164951e-06, "logits/chosen": -2.993556022644043, "logits/rejected": -2.9509148597717285, "logps/chosen": -33.93323516845703, "logps/rejected": -361.08697509765625, "loss": 0.0829, "rewards/accuracies": 1.0, "rewards/chosen": 0.09970208257436752, "rewards/margins": 3.3362221717834473, "rewards/rejected": -3.2365195751190186, "step": 6450 }, { "epoch": 0.08, "learning_rate": 3.866411299976059e-06, "logits/chosen": -3.004573106765747, "logits/rejected": -2.9212193489074707, "logps/chosen": -60.14354705810547, "logps/rejected": -497.43572998046875, "loss": 0.0788, "rewards/accuracies": 1.0, "rewards/chosen": -0.09715141355991364, "rewards/margins": 4.474579811096191, "rewards/rejected": -4.571732044219971, "step": 6460 }, { "epoch": 0.08, "learning_rate": 3.872396456787168e-06, "logits/chosen": -2.9939141273498535, "logits/rejected": -2.9329495429992676, "logps/chosen": -16.770580291748047, "logps/rejected": -343.87554931640625, "loss": 0.0836, "rewards/accuracies": 1.0, "rewards/chosen": 0.225328728556633, "rewards/margins": 3.291029453277588, "rewards/rejected": -3.0657010078430176, "step": 6470 }, { "epoch": 0.08, "learning_rate": 3.878381613598276e-06, "logits/chosen": -3.031116485595703, "logits/rejected": -2.9915308952331543, "logps/chosen": -30.668033599853516, "logps/rejected": -318.23992919921875, "loss": 0.0792, "rewards/accuracies": 1.0, "rewards/chosen": 0.10692118108272552, "rewards/margins": 2.9164535999298096, "rewards/rejected": -2.809532403945923, "step": 6480 }, { "epoch": 0.08, "learning_rate": 3.884366770409385e-06, "logits/chosen": -3.0132317543029785, "logits/rejected": -2.939354419708252, "logps/chosen": -52.9599609375, "logps/rejected": -477.7147521972656, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -0.04496973380446434, "rewards/margins": 4.348343372344971, "rewards/rejected": -4.393313407897949, "step": 6490 }, { "epoch": 0.08, "learning_rate": 3.8903519272204935e-06, "logits/chosen": -3.030414581298828, "logits/rejected": -2.920846939086914, "logps/chosen": -62.65595245361328, "logps/rejected": -394.3646240234375, "loss": 0.1855, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.12962296605110168, "rewards/margins": 3.422743320465088, "rewards/rejected": -3.552365779876709, "step": 6500 }, { "epoch": 0.08, "learning_rate": 3.896337084031602e-06, "logits/chosen": -3.0427145957946777, "logits/rejected": -2.9895780086517334, "logps/chosen": -68.94248962402344, "logps/rejected": -457.906982421875, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": -0.24687330424785614, "rewards/margins": 3.9514663219451904, "rewards/rejected": -4.198339939117432, "step": 6510 }, { "epoch": 0.08, "learning_rate": 3.902322240842711e-06, "logits/chosen": -3.0147643089294434, "logits/rejected": -2.8963706493377686, "logps/chosen": -88.90303039550781, "logps/rejected": -539.098876953125, "loss": 0.1547, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3258044123649597, "rewards/margins": 4.656490325927734, "rewards/rejected": -4.98229455947876, "step": 6520 }, { "epoch": 0.08, "learning_rate": 3.908307397653819e-06, "logits/chosen": -3.0291476249694824, "logits/rejected": -2.8820602893829346, "logps/chosen": -91.5669937133789, "logps/rejected": -516.6956176757812, "loss": 0.0746, "rewards/accuracies": 1.0, "rewards/chosen": -0.3511958122253418, "rewards/margins": 4.402204513549805, "rewards/rejected": -4.753399848937988, "step": 6530 }, { "epoch": 0.08, "learning_rate": 3.914292554464928e-06, "logits/chosen": -2.975066900253296, "logits/rejected": -2.8932971954345703, "logps/chosen": -51.812217712402344, "logps/rejected": -447.46173095703125, "loss": 0.0636, "rewards/accuracies": 1.0, "rewards/chosen": 0.06484166532754898, "rewards/margins": 4.149186611175537, "rewards/rejected": -4.084344863891602, "step": 6540 }, { "epoch": 0.08, "learning_rate": 3.920277711276036e-06, "logits/chosen": -3.015467882156372, "logits/rejected": -2.936581611633301, "logps/chosen": -67.86312103271484, "logps/rejected": -440.3789978027344, "loss": 0.126, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1668013334274292, "rewards/margins": 3.853038787841797, "rewards/rejected": -4.019840240478516, "step": 6550 }, { "epoch": 0.08, "learning_rate": 3.926262868087144e-06, "logits/chosen": -3.0021493434906006, "logits/rejected": -2.92822265625, "logps/chosen": -38.858253479003906, "logps/rejected": -352.87640380859375, "loss": 0.0706, "rewards/accuracies": 1.0, "rewards/chosen": 0.001383939408697188, "rewards/margins": 3.158195972442627, "rewards/rejected": -3.1568121910095215, "step": 6560 }, { "epoch": 0.08, "learning_rate": 3.932248024898252e-06, "logits/chosen": -2.976418972015381, "logits/rejected": -2.8777213096618652, "logps/chosen": -79.75529479980469, "logps/rejected": -452.1162109375, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": -0.31548216938972473, "rewards/margins": 3.8117470741271973, "rewards/rejected": -4.127228736877441, "step": 6570 }, { "epoch": 0.08, "learning_rate": 3.938233181709361e-06, "logits/chosen": -2.9921300411224365, "logits/rejected": -2.8875412940979004, "logps/chosen": -65.02778625488281, "logps/rejected": -503.33404541015625, "loss": 0.0626, "rewards/accuracies": 1.0, "rewards/chosen": -0.09323596209287643, "rewards/margins": 4.5563530921936035, "rewards/rejected": -4.649589538574219, "step": 6580 }, { "epoch": 0.08, "learning_rate": 3.944218338520469e-06, "logits/chosen": -3.023123264312744, "logits/rejected": -2.955134630203247, "logps/chosen": -69.33625793457031, "logps/rejected": -561.0730590820312, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": -0.22638258337974548, "rewards/margins": 4.986306190490723, "rewards/rejected": -5.212689399719238, "step": 6590 }, { "epoch": 0.08, "learning_rate": 3.950203495331578e-06, "logits/chosen": -3.0143818855285645, "logits/rejected": -2.8952090740203857, "logps/chosen": -88.36500549316406, "logps/rejected": -595.8099365234375, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/chosen": -0.32797735929489136, "rewards/margins": 5.219908237457275, "rewards/rejected": -5.547885417938232, "step": 6600 }, { "epoch": 0.08, "learning_rate": 3.956188652142686e-06, "logits/chosen": -3.0613389015197754, "logits/rejected": -2.9715099334716797, "logps/chosen": -50.37497329711914, "logps/rejected": -419.483154296875, "loss": 0.1045, "rewards/accuracies": 1.0, "rewards/chosen": -0.025362659245729446, "rewards/margins": 3.7771897315979004, "rewards/rejected": -3.8025519847869873, "step": 6610 }, { "epoch": 0.08, "learning_rate": 3.9621738089537945e-06, "logits/chosen": -3.027296543121338, "logits/rejected": -2.958789348602295, "logps/chosen": -63.61895751953125, "logps/rejected": -506.14227294921875, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -0.15877901017665863, "rewards/margins": 4.5078043937683105, "rewards/rejected": -4.666583061218262, "step": 6620 }, { "epoch": 0.08, "learning_rate": 3.9681589657649035e-06, "logits/chosen": -2.994948625564575, "logits/rejected": -2.9563021659851074, "logps/chosen": -63.727989196777344, "logps/rejected": -507.72393798828125, "loss": 0.1146, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18792596459388733, "rewards/margins": 4.511446475982666, "rewards/rejected": -4.6993727684021, "step": 6630 }, { "epoch": 0.08, "learning_rate": 3.974144122576012e-06, "logits/chosen": -3.0120153427124023, "logits/rejected": -2.939147710800171, "logps/chosen": -65.12754821777344, "logps/rejected": -490.66851806640625, "loss": 0.1101, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.14563047885894775, "rewards/margins": 4.370835304260254, "rewards/rejected": -4.516465663909912, "step": 6640 }, { "epoch": 0.08, "learning_rate": 3.9801292793871206e-06, "logits/chosen": -3.010913133621216, "logits/rejected": -2.9564085006713867, "logps/chosen": -36.608482360839844, "logps/rejected": -489.92987060546875, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": 0.061518002301454544, "rewards/margins": 4.572101593017578, "rewards/rejected": -4.51058292388916, "step": 6650 }, { "epoch": 0.08, "learning_rate": 3.986114436198229e-06, "logits/chosen": -3.0043342113494873, "logits/rejected": -2.9760024547576904, "logps/chosen": -30.746994018554688, "logps/rejected": -330.23724365234375, "loss": 0.1476, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06843115389347076, "rewards/margins": 2.990917921066284, "rewards/rejected": -2.9224867820739746, "step": 6660 }, { "epoch": 0.08, "learning_rate": 3.992099593009337e-06, "logits/chosen": -3.0029654502868652, "logits/rejected": -2.921220302581787, "logps/chosen": -79.13887023925781, "logps/rejected": -477.14794921875, "loss": 0.1473, "rewards/accuracies": 1.0, "rewards/chosen": -0.27599045634269714, "rewards/margins": 4.093825817108154, "rewards/rejected": -4.369816303253174, "step": 6670 }, { "epoch": 0.08, "learning_rate": 3.998084749820446e-06, "logits/chosen": -3.0151026248931885, "logits/rejected": -2.937915325164795, "logps/chosen": -53.12114334106445, "logps/rejected": -450.18878173828125, "loss": 0.077, "rewards/accuracies": 1.0, "rewards/chosen": -0.06466855108737946, "rewards/margins": 4.043179988861084, "rewards/rejected": -4.107848167419434, "step": 6680 }, { "epoch": 0.08, "learning_rate": 4.004069906631554e-06, "logits/chosen": -3.023047924041748, "logits/rejected": -2.96881365776062, "logps/chosen": -39.64735794067383, "logps/rejected": -411.7117614746094, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": 0.02618979848921299, "rewards/margins": 3.737879514694214, "rewards/rejected": -3.7116897106170654, "step": 6690 }, { "epoch": 0.08, "learning_rate": 4.010055063442663e-06, "logits/chosen": -2.9859704971313477, "logits/rejected": -2.9398040771484375, "logps/chosen": -37.47174072265625, "logps/rejected": -399.9027404785156, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/chosen": 0.07835632562637329, "rewards/margins": 3.694342851638794, "rewards/rejected": -3.6159863471984863, "step": 6700 }, { "epoch": 0.08, "learning_rate": 4.016040220253771e-06, "logits/chosen": -2.984196186065674, "logits/rejected": -2.9123966693878174, "logps/chosen": -82.01242065429688, "logps/rejected": -514.3792724609375, "loss": 0.0741, "rewards/accuracies": 1.0, "rewards/chosen": -0.3431532084941864, "rewards/margins": 4.408566474914551, "rewards/rejected": -4.7517194747924805, "step": 6710 }, { "epoch": 0.08, "learning_rate": 4.022025377064879e-06, "logits/chosen": -3.019634246826172, "logits/rejected": -2.9656124114990234, "logps/chosen": -42.977230072021484, "logps/rejected": -455.15899658203125, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": 0.0042897737585008144, "rewards/margins": 4.179634094238281, "rewards/rejected": -4.175344944000244, "step": 6720 }, { "epoch": 0.08, "learning_rate": 4.028010533875988e-06, "logits/chosen": -3.0029609203338623, "logits/rejected": -2.947719097137451, "logps/chosen": -43.23148727416992, "logps/rejected": -475.23370361328125, "loss": 0.0723, "rewards/accuracies": 1.0, "rewards/chosen": 0.026814620941877365, "rewards/margins": 4.393065452575684, "rewards/rejected": -4.366250991821289, "step": 6730 }, { "epoch": 0.08, "learning_rate": 4.033995690687096e-06, "logits/chosen": -3.0276694297790527, "logits/rejected": -2.974635601043701, "logps/chosen": -35.91432189941406, "logps/rejected": -419.4889221191406, "loss": 0.1239, "rewards/accuracies": 1.0, "rewards/chosen": 0.10003030300140381, "rewards/margins": 3.913003921508789, "rewards/rejected": -3.812973737716675, "step": 6740 }, { "epoch": 0.08, "learning_rate": 4.039980847498205e-06, "logits/chosen": -2.9962000846862793, "logits/rejected": -2.9571824073791504, "logps/chosen": -68.48130798339844, "logps/rejected": -280.6853942871094, "loss": 0.185, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.26737210154533386, "rewards/margins": 2.148108959197998, "rewards/rejected": -2.4154810905456543, "step": 6750 }, { "epoch": 0.08, "learning_rate": 4.045966004309313e-06, "logits/chosen": -3.028622627258301, "logits/rejected": -2.953312873840332, "logps/chosen": -33.37986373901367, "logps/rejected": -356.41375732421875, "loss": 0.085, "rewards/accuracies": 1.0, "rewards/chosen": 0.06578543782234192, "rewards/margins": 3.2468981742858887, "rewards/rejected": -3.181112766265869, "step": 6760 }, { "epoch": 0.08, "learning_rate": 4.0519511611204215e-06, "logits/chosen": -3.0225367546081543, "logits/rejected": -2.959728956222534, "logps/chosen": -51.362701416015625, "logps/rejected": -447.99359130859375, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": -0.0988745465874672, "rewards/margins": 3.988931179046631, "rewards/rejected": -4.08780574798584, "step": 6770 }, { "epoch": 0.08, "learning_rate": 4.0579363179315305e-06, "logits/chosen": -3.0188302993774414, "logits/rejected": -2.911613941192627, "logps/chosen": -60.15431594848633, "logps/rejected": -520.8186645507812, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": -0.07334982603788376, "rewards/margins": 4.72400426864624, "rewards/rejected": -4.797354221343994, "step": 6780 }, { "epoch": 0.08, "learning_rate": 4.063921474742639e-06, "logits/chosen": -2.9920902252197266, "logits/rejected": -2.9450459480285645, "logps/chosen": -62.336692810058594, "logps/rejected": -381.99591064453125, "loss": 0.0781, "rewards/accuracies": 1.0, "rewards/chosen": -0.20242328941822052, "rewards/margins": 3.219233989715576, "rewards/rejected": -3.421657085418701, "step": 6790 }, { "epoch": 0.08, "learning_rate": 4.069906631553748e-06, "logits/chosen": -3.022716999053955, "logits/rejected": -2.9076666831970215, "logps/chosen": -60.87662887573242, "logps/rejected": -434.8150329589844, "loss": 0.147, "rewards/accuracies": 1.0, "rewards/chosen": -0.05543569475412369, "rewards/margins": 3.8878674507141113, "rewards/rejected": -3.943303346633911, "step": 6800 }, { "epoch": 0.08, "learning_rate": 4.075891788364855e-06, "logits/chosen": -3.006166458129883, "logits/rejected": -2.9365386962890625, "logps/chosen": -70.2993392944336, "logps/rejected": -390.12164306640625, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/chosen": -0.25752145051956177, "rewards/margins": 3.2544949054718018, "rewards/rejected": -3.5120162963867188, "step": 6810 }, { "epoch": 0.08, "learning_rate": 4.081876945175964e-06, "logits/chosen": -3.009453773498535, "logits/rejected": -2.968289852142334, "logps/chosen": -68.20222473144531, "logps/rejected": -446.417724609375, "loss": 0.0656, "rewards/accuracies": 1.0, "rewards/chosen": -0.2770453989505768, "rewards/margins": 3.795804977416992, "rewards/rejected": -4.072850704193115, "step": 6820 }, { "epoch": 0.08, "learning_rate": 4.087862101987072e-06, "logits/chosen": -3.0454232692718506, "logits/rejected": -2.9793877601623535, "logps/chosen": -42.3050422668457, "logps/rejected": -440.5094299316406, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.05949683114886284, "rewards/margins": 4.076510429382324, "rewards/rejected": -4.017014026641846, "step": 6830 }, { "epoch": 0.08, "learning_rate": 4.093847258798181e-06, "logits/chosen": -3.004528522491455, "logits/rejected": -2.923245906829834, "logps/chosen": -76.42011260986328, "logps/rejected": -470.5263671875, "loss": 0.0794, "rewards/accuracies": 1.0, "rewards/chosen": -0.3170923590660095, "rewards/margins": 3.9971816539764404, "rewards/rejected": -4.314273834228516, "step": 6840 }, { "epoch": 0.08, "learning_rate": 4.099832415609289e-06, "logits/chosen": -2.999588966369629, "logits/rejected": -2.856415271759033, "logps/chosen": -108.7303237915039, "logps/rejected": -570.3038940429688, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -0.5054887533187866, "rewards/margins": 4.80574893951416, "rewards/rejected": -5.3112382888793945, "step": 6850 }, { "epoch": 0.08, "learning_rate": 4.105817572420398e-06, "logits/chosen": -3.0019538402557373, "logits/rejected": -2.9497809410095215, "logps/chosen": -41.6708869934082, "logps/rejected": -402.7589111328125, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": 0.0018336146604269743, "rewards/margins": 3.6461548805236816, "rewards/rejected": -3.6443207263946533, "step": 6860 }, { "epoch": 0.08, "learning_rate": 4.111802729231506e-06, "logits/chosen": -3.0498874187469482, "logits/rejected": -2.996678590774536, "logps/chosen": -59.622222900390625, "logps/rejected": -469.7632751464844, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/chosen": -0.12416082620620728, "rewards/margins": 4.181798458099365, "rewards/rejected": -4.3059587478637695, "step": 6870 }, { "epoch": 0.08, "learning_rate": 4.117787886042614e-06, "logits/chosen": -3.0428719520568848, "logits/rejected": -2.9515278339385986, "logps/chosen": -50.65986251831055, "logps/rejected": -538.6087036132812, "loss": 0.0727, "rewards/accuracies": 1.0, "rewards/chosen": -0.05121110752224922, "rewards/margins": 4.935863018035889, "rewards/rejected": -4.987074375152588, "step": 6880 }, { "epoch": 0.08, "learning_rate": 4.123773042853723e-06, "logits/chosen": -3.0059800148010254, "logits/rejected": -2.897822141647339, "logps/chosen": -79.31342315673828, "logps/rejected": -506.965576171875, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": -0.2650079131126404, "rewards/margins": 4.411576271057129, "rewards/rejected": -4.676583766937256, "step": 6890 }, { "epoch": 0.08, "learning_rate": 4.129758199664831e-06, "logits/chosen": -2.9971976280212402, "logits/rejected": -2.90089750289917, "logps/chosen": -91.85694122314453, "logps/rejected": -480.14447021484375, "loss": 0.1335, "rewards/accuracies": 1.0, "rewards/chosen": -0.3449804186820984, "rewards/margins": 4.045905113220215, "rewards/rejected": -4.390885353088379, "step": 6900 }, { "epoch": 0.08, "learning_rate": 4.13574335647594e-06, "logits/chosen": -2.9536099433898926, "logits/rejected": -2.903811454772949, "logps/chosen": -36.59892654418945, "logps/rejected": -347.8048095703125, "loss": 0.0702, "rewards/accuracies": 1.0, "rewards/chosen": 0.04018920287489891, "rewards/margins": 3.14159893989563, "rewards/rejected": -3.101409435272217, "step": 6910 }, { "epoch": 0.08, "learning_rate": 4.1417285132870485e-06, "logits/chosen": -3.0717403888702393, "logits/rejected": -3.0396151542663574, "logps/chosen": -60.84497833251953, "logps/rejected": -472.7386169433594, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -0.15358702838420868, "rewards/margins": 4.160944938659668, "rewards/rejected": -4.3145318031311035, "step": 6920 }, { "epoch": 0.08, "learning_rate": 4.147713670098157e-06, "logits/chosen": -3.031075954437256, "logits/rejected": -2.9108548164367676, "logps/chosen": -96.18494415283203, "logps/rejected": -508.34210205078125, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": -0.3816235661506653, "rewards/margins": 4.313274383544922, "rewards/rejected": -4.69489860534668, "step": 6930 }, { "epoch": 0.08, "learning_rate": 4.153698826909266e-06, "logits/chosen": -2.9969611167907715, "logits/rejected": -2.964475154876709, "logps/chosen": -27.631732940673828, "logps/rejected": -419.33380126953125, "loss": 0.0744, "rewards/accuracies": 1.0, "rewards/chosen": 0.1323929727077484, "rewards/margins": 3.95127534866333, "rewards/rejected": -3.8188819885253906, "step": 6940 }, { "epoch": 0.08, "learning_rate": 4.159683983720374e-06, "logits/chosen": -3.062819004058838, "logits/rejected": -3.015266180038452, "logps/chosen": -30.778095245361328, "logps/rejected": -365.63458251953125, "loss": 0.1165, "rewards/accuracies": 1.0, "rewards/chosen": 0.13278429210186005, "rewards/margins": 3.4083733558654785, "rewards/rejected": -3.2755889892578125, "step": 6950 }, { "epoch": 0.08, "learning_rate": 4.165669140531483e-06, "logits/chosen": -3.015559434890747, "logits/rejected": -2.968254804611206, "logps/chosen": -39.270328521728516, "logps/rejected": -438.8287048339844, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": 0.008777287788689137, "rewards/margins": 4.021834850311279, "rewards/rejected": -4.013057708740234, "step": 6960 }, { "epoch": 0.08, "learning_rate": 4.171654297342591e-06, "logits/chosen": -3.048372983932495, "logits/rejected": -2.996040105819702, "logps/chosen": -31.818401336669922, "logps/rejected": -426.9656677246094, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": 0.1403563767671585, "rewards/margins": 4.027698516845703, "rewards/rejected": -3.8873417377471924, "step": 6970 }, { "epoch": 0.08, "learning_rate": 4.177639454153699e-06, "logits/chosen": -3.015254497528076, "logits/rejected": -2.946228504180908, "logps/chosen": -67.36498260498047, "logps/rejected": -372.18017578125, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": -0.19227851927280426, "rewards/margins": 3.1352906227111816, "rewards/rejected": -3.3275692462921143, "step": 6980 }, { "epoch": 0.08, "learning_rate": 4.183624610964808e-06, "logits/chosen": -3.017819404602051, "logits/rejected": -2.9826366901397705, "logps/chosen": -47.406837463378906, "logps/rejected": -434.58154296875, "loss": 0.0729, "rewards/accuracies": 1.0, "rewards/chosen": -0.08314650505781174, "rewards/margins": 3.8831958770751953, "rewards/rejected": -3.9663422107696533, "step": 6990 }, { "epoch": 0.08, "learning_rate": 4.189609767775916e-06, "logits/chosen": -3.006730556488037, "logits/rejected": -2.9249937534332275, "logps/chosen": -84.13489532470703, "logps/rejected": -544.0818481445312, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": -0.30642956495285034, "rewards/margins": 4.7442474365234375, "rewards/rejected": -5.050676345825195, "step": 7000 }, { "epoch": 0.08, "learning_rate": 4.195594924587025e-06, "logits/chosen": -2.990638017654419, "logits/rejected": -2.897714138031006, "logps/chosen": -35.798675537109375, "logps/rejected": -390.0846862792969, "loss": 0.0768, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.08169890940189362, "rewards/margins": 3.61444091796875, "rewards/rejected": -3.5327415466308594, "step": 7010 }, { "epoch": 0.08, "learning_rate": 4.201580081398132e-06, "logits/chosen": -3.0611395835876465, "logits/rejected": -2.9607036113739014, "logps/chosen": -35.817481994628906, "logps/rejected": -403.93084716796875, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.08972962945699692, "rewards/margins": 3.7463817596435547, "rewards/rejected": -3.6566524505615234, "step": 7020 }, { "epoch": 0.08, "learning_rate": 4.207565238209241e-06, "logits/chosen": -3.008657932281494, "logits/rejected": -2.9570536613464355, "logps/chosen": -57.21451950073242, "logps/rejected": -441.4606018066406, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": -0.06754031032323837, "rewards/margins": 3.9507415294647217, "rewards/rejected": -4.01828145980835, "step": 7030 }, { "epoch": 0.08, "learning_rate": 4.21355039502035e-06, "logits/chosen": -3.0075249671936035, "logits/rejected": -2.930650234222412, "logps/chosen": -57.89238739013672, "logps/rejected": -594.360595703125, "loss": 0.1627, "rewards/accuracies": 1.0, "rewards/chosen": -0.05363532155752182, "rewards/margins": 5.475187301635742, "rewards/rejected": -5.528822898864746, "step": 7040 }, { "epoch": 0.08, "learning_rate": 4.2195355518314585e-06, "logits/chosen": -3.0281834602355957, "logits/rejected": -2.954665184020996, "logps/chosen": -54.54472732543945, "logps/rejected": -509.58367919921875, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": -0.002515482949092984, "rewards/margins": 4.702988147735596, "rewards/rejected": -4.705502986907959, "step": 7050 }, { "epoch": 0.08, "learning_rate": 4.2255207086425674e-06, "logits/chosen": -3.0617127418518066, "logits/rejected": -3.015092611312866, "logps/chosen": -56.55760955810547, "logps/rejected": -463.55615234375, "loss": 0.0969, "rewards/accuracies": 1.0, "rewards/chosen": -0.16296908259391785, "rewards/margins": 4.0888166427612305, "rewards/rejected": -4.251785755157471, "step": 7060 }, { "epoch": 0.08, "learning_rate": 4.231505865453675e-06, "logits/chosen": -3.0324997901916504, "logits/rejected": -2.9570741653442383, "logps/chosen": -54.48686981201172, "logps/rejected": -516.5628051757812, "loss": 0.1334, "rewards/accuracies": 1.0, "rewards/chosen": -0.010785524733364582, "rewards/margins": 4.763302803039551, "rewards/rejected": -4.774088382720947, "step": 7070 }, { "epoch": 0.08, "learning_rate": 4.237491022264784e-06, "logits/chosen": -3.0459861755371094, "logits/rejected": -2.9833569526672363, "logps/chosen": -54.66810989379883, "logps/rejected": -440.46685791015625, "loss": 0.0767, "rewards/accuracies": 1.0, "rewards/chosen": -0.09980431944131851, "rewards/margins": 3.9128997325897217, "rewards/rejected": -4.012704372406006, "step": 7080 }, { "epoch": 0.08, "learning_rate": 4.243476179075892e-06, "logits/chosen": -3.0314624309539795, "logits/rejected": -2.961883544921875, "logps/chosen": -54.26225662231445, "logps/rejected": -479.40411376953125, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": -0.07100401818752289, "rewards/margins": 4.339993953704834, "rewards/rejected": -4.4109978675842285, "step": 7090 }, { "epoch": 0.08, "learning_rate": 4.249461335887001e-06, "logits/chosen": -3.0504813194274902, "logits/rejected": -2.9894251823425293, "logps/chosen": -54.0513916015625, "logps/rejected": -412.39129638671875, "loss": 0.1023, "rewards/accuracies": 1.0, "rewards/chosen": -0.07982796430587769, "rewards/margins": 3.656208038330078, "rewards/rejected": -3.7360363006591797, "step": 7100 }, { "epoch": 0.09, "learning_rate": 4.255446492698109e-06, "logits/chosen": -3.0180280208587646, "logits/rejected": -2.882258892059326, "logps/chosen": -103.9251480102539, "logps/rejected": -590.1407470703125, "loss": 0.2056, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.508780837059021, "rewards/margins": 5.006078243255615, "rewards/rejected": -5.514859199523926, "step": 7110 }, { "epoch": 0.09, "learning_rate": 4.261431649509217e-06, "logits/chosen": -3.044706344604492, "logits/rejected": -2.957620143890381, "logps/chosen": -84.30338287353516, "logps/rejected": -487.42578125, "loss": 0.1313, "rewards/accuracies": 1.0, "rewards/chosen": -0.32901886105537415, "rewards/margins": 4.159998893737793, "rewards/rejected": -4.489018440246582, "step": 7120 }, { "epoch": 0.09, "learning_rate": 4.267416806320326e-06, "logits/chosen": -3.06355619430542, "logits/rejected": -2.965010643005371, "logps/chosen": -84.64799499511719, "logps/rejected": -456.26629638671875, "loss": 0.0936, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3323113024234772, "rewards/margins": 3.8400509357452393, "rewards/rejected": -4.172362327575684, "step": 7130 }, { "epoch": 0.09, "learning_rate": 4.273401963131434e-06, "logits/chosen": -3.0273990631103516, "logits/rejected": -2.878995180130005, "logps/chosen": -47.298866271972656, "logps/rejected": -521.5784912109375, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.07644541561603546, "rewards/margins": 4.891513824462891, "rewards/rejected": -4.815068244934082, "step": 7140 }, { "epoch": 0.09, "learning_rate": 4.279387119942543e-06, "logits/chosen": -3.004664659500122, "logits/rejected": -2.9209275245666504, "logps/chosen": -47.09686279296875, "logps/rejected": -427.7789001464844, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.033564675599336624, "rewards/margins": 3.9252161979675293, "rewards/rejected": -3.8916516304016113, "step": 7150 }, { "epoch": 0.09, "learning_rate": 4.285372276753651e-06, "logits/chosen": -3.0662455558776855, "logits/rejected": -3.0050160884857178, "logps/chosen": -39.663265228271484, "logps/rejected": -322.0996398925781, "loss": 0.0781, "rewards/accuracies": 1.0, "rewards/chosen": 0.034220464527606964, "rewards/margins": 2.8794116973876953, "rewards/rejected": -2.84519100189209, "step": 7160 }, { "epoch": 0.09, "learning_rate": 4.291357433564759e-06, "logits/chosen": -3.031222105026245, "logits/rejected": -2.9329397678375244, "logps/chosen": -65.55996704101562, "logps/rejected": -465.45452880859375, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": -0.2128148078918457, "rewards/margins": 4.047775745391846, "rewards/rejected": -4.260590553283691, "step": 7170 }, { "epoch": 0.09, "learning_rate": 4.297342590375868e-06, "logits/chosen": -3.0494487285614014, "logits/rejected": -2.9958176612854004, "logps/chosen": -36.00104904174805, "logps/rejected": -377.0081481933594, "loss": 0.0924, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.055163342505693436, "rewards/margins": 3.45802640914917, "rewards/rejected": -3.402862548828125, "step": 7180 }, { "epoch": 0.09, "learning_rate": 4.3033277471869765e-06, "logits/chosen": -3.0437779426574707, "logits/rejected": -2.9833223819732666, "logps/chosen": -54.67675018310547, "logps/rejected": -436.5203552246094, "loss": 0.1254, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.05194918438792229, "rewards/margins": 3.9280357360839844, "rewards/rejected": -3.979984760284424, "step": 7190 }, { "epoch": 0.09, "learning_rate": 4.3093129039980855e-06, "logits/chosen": -3.0375328063964844, "logits/rejected": -2.9476304054260254, "logps/chosen": -50.0878791809082, "logps/rejected": -574.3702392578125, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": -0.051481444388628006, "rewards/margins": 5.2914934158325195, "rewards/rejected": -5.3429741859436035, "step": 7200 }, { "epoch": 0.09, "learning_rate": 4.315298060809194e-06, "logits/chosen": -3.013395309448242, "logits/rejected": -2.971726655960083, "logps/chosen": -25.11598014831543, "logps/rejected": -370.11029052734375, "loss": 0.0726, "rewards/accuracies": 1.0, "rewards/chosen": 0.13261869549751282, "rewards/margins": 3.466592788696289, "rewards/rejected": -3.3339741230010986, "step": 7210 }, { "epoch": 0.09, "learning_rate": 4.321283217620302e-06, "logits/chosen": -3.0089685916900635, "logits/rejected": -2.9654436111450195, "logps/chosen": -59.77214431762695, "logps/rejected": -397.6619873046875, "loss": 0.1438, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1766057312488556, "rewards/margins": 3.413346767425537, "rewards/rejected": -3.5899524688720703, "step": 7220 }, { "epoch": 0.09, "learning_rate": 4.327268374431411e-06, "logits/chosen": -2.991286516189575, "logits/rejected": -2.8837196826934814, "logps/chosen": -49.70563888549805, "logps/rejected": -567.7117309570312, "loss": 0.0858, "rewards/accuracies": 1.0, "rewards/chosen": 0.0041486709378659725, "rewards/margins": 5.291574001312256, "rewards/rejected": -5.287425994873047, "step": 7230 }, { "epoch": 0.09, "learning_rate": 4.333253531242519e-06, "logits/chosen": -3.0232722759246826, "logits/rejected": -2.9120852947235107, "logps/chosen": -57.353904724121094, "logps/rejected": -576.2973022460938, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -0.07463113963603973, "rewards/margins": 5.278428077697754, "rewards/rejected": -5.353060245513916, "step": 7240 }, { "epoch": 0.09, "learning_rate": 4.339238688053628e-06, "logits/chosen": -3.0393266677856445, "logits/rejected": -2.997589349746704, "logps/chosen": -69.9557113647461, "logps/rejected": -509.4759216308594, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": -0.20083045959472656, "rewards/margins": 4.513111114501953, "rewards/rejected": -4.7139410972595215, "step": 7250 }, { "epoch": 0.09, "learning_rate": 4.345223844864736e-06, "logits/chosen": -3.029257297515869, "logits/rejected": -2.9817147254943848, "logps/chosen": -41.11869430541992, "logps/rejected": -431.8846130371094, "loss": 0.1114, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.02229365147650242, "rewards/margins": 3.9602885246276855, "rewards/rejected": -3.937995195388794, "step": 7260 }, { "epoch": 0.09, "learning_rate": 4.351209001675844e-06, "logits/chosen": -3.0415167808532715, "logits/rejected": -2.9279091358184814, "logps/chosen": -39.78880310058594, "logps/rejected": -431.84661865234375, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": 0.08632255345582962, "rewards/margins": 3.992990016937256, "rewards/rejected": -3.906667709350586, "step": 7270 }, { "epoch": 0.09, "learning_rate": 4.357194158486952e-06, "logits/chosen": -3.016700267791748, "logits/rejected": -2.965954542160034, "logps/chosen": -40.51622009277344, "logps/rejected": -461.84503173828125, "loss": 0.1266, "rewards/accuracies": 1.0, "rewards/chosen": 0.0502467043697834, "rewards/margins": 4.281499862670898, "rewards/rejected": -4.231253147125244, "step": 7280 }, { "epoch": 0.09, "learning_rate": 4.363179315298061e-06, "logits/chosen": -3.0369648933410645, "logits/rejected": -2.9440338611602783, "logps/chosen": -51.88206100463867, "logps/rejected": -496.1744079589844, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": -0.032242316752672195, "rewards/margins": 4.5359063148498535, "rewards/rejected": -4.568148612976074, "step": 7290 }, { "epoch": 0.09, "learning_rate": 4.369164472109169e-06, "logits/chosen": -2.978781223297119, "logits/rejected": -2.9208133220672607, "logps/chosen": -42.417293548583984, "logps/rejected": -395.7196960449219, "loss": 0.1206, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.01705770567059517, "rewards/margins": 3.6019129753112793, "rewards/rejected": -3.584855318069458, "step": 7300 }, { "epoch": 0.09, "learning_rate": 4.375149628920278e-06, "logits/chosen": -3.0342984199523926, "logits/rejected": -2.958491325378418, "logps/chosen": -51.36497116088867, "logps/rejected": -441.630859375, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": -0.018499543890357018, "rewards/margins": 4.002322196960449, "rewards/rejected": -4.020822525024414, "step": 7310 }, { "epoch": 0.09, "learning_rate": 4.3811347857313864e-06, "logits/chosen": -3.014308214187622, "logits/rejected": -2.9682023525238037, "logps/chosen": -36.385841369628906, "logps/rejected": -477.6517028808594, "loss": 0.0776, "rewards/accuracies": 1.0, "rewards/chosen": 0.06221326068043709, "rewards/margins": 4.442241668701172, "rewards/rejected": -4.38002872467041, "step": 7320 }, { "epoch": 0.09, "learning_rate": 4.3871199425424946e-06, "logits/chosen": -3.0115692615509033, "logits/rejected": -2.9728493690490723, "logps/chosen": -29.54302978515625, "logps/rejected": -431.8504943847656, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.11550790071487427, "rewards/margins": 4.057981967926025, "rewards/rejected": -3.942474365234375, "step": 7330 }, { "epoch": 0.09, "learning_rate": 4.3931050993536035e-06, "logits/chosen": -2.976604461669922, "logits/rejected": -2.8802196979522705, "logps/chosen": -49.588218688964844, "logps/rejected": -523.2085571289062, "loss": 0.1832, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.003534604562446475, "rewards/margins": 4.842253684997559, "rewards/rejected": -4.838719367980957, "step": 7340 }, { "epoch": 0.09, "learning_rate": 4.399090256164712e-06, "logits/chosen": -3.020167350769043, "logits/rejected": -2.9440951347351074, "logps/chosen": -62.632896423339844, "logps/rejected": -502.5218811035156, "loss": 0.0833, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11665613949298859, "rewards/margins": 4.526193141937256, "rewards/rejected": -4.642849922180176, "step": 7350 }, { "epoch": 0.09, "learning_rate": 4.405075412975821e-06, "logits/chosen": -2.9833712577819824, "logits/rejected": -2.919940948486328, "logps/chosen": -36.15540313720703, "logps/rejected": -393.4139404296875, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": 0.06504439562559128, "rewards/margins": 3.6166019439697266, "rewards/rejected": -3.5515575408935547, "step": 7360 }, { "epoch": 0.09, "learning_rate": 4.411060569786929e-06, "logits/chosen": -3.0117580890655518, "logits/rejected": -2.9096760749816895, "logps/chosen": -54.520545959472656, "logps/rejected": -549.3722534179688, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": -0.002673125360161066, "rewards/margins": 5.099690914154053, "rewards/rejected": -5.102364540100098, "step": 7370 }, { "epoch": 0.09, "learning_rate": 4.417045726598037e-06, "logits/chosen": -3.0535178184509277, "logits/rejected": -2.955691337585449, "logps/chosen": -80.01673889160156, "logps/rejected": -558.0823974609375, "loss": 0.1119, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.22864635288715363, "rewards/margins": 4.956751823425293, "rewards/rejected": -5.185398578643799, "step": 7380 }, { "epoch": 0.09, "learning_rate": 4.423030883409146e-06, "logits/chosen": -3.0382745265960693, "logits/rejected": -2.9366226196289062, "logps/chosen": -43.46425247192383, "logps/rejected": -582.8187255859375, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": 0.025228405371308327, "rewards/margins": 5.461124897003174, "rewards/rejected": -5.4358954429626465, "step": 7390 }, { "epoch": 0.09, "learning_rate": 4.429016040220254e-06, "logits/chosen": -3.0261809825897217, "logits/rejected": -2.9721903800964355, "logps/chosen": -65.1257095336914, "logps/rejected": -486.0730895996094, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -0.15025970339775085, "rewards/margins": 4.285866737365723, "rewards/rejected": -4.436125755310059, "step": 7400 }, { "epoch": 0.09, "learning_rate": 4.435001197031363e-06, "logits/chosen": -3.032820224761963, "logits/rejected": -2.9656033515930176, "logps/chosen": -64.93063354492188, "logps/rejected": -447.2674865722656, "loss": 0.1176, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.17119790613651276, "rewards/margins": 3.9244117736816406, "rewards/rejected": -4.095609664916992, "step": 7410 }, { "epoch": 0.09, "learning_rate": 4.440986353842471e-06, "logits/chosen": -3.011526346206665, "logits/rejected": -2.9271819591522217, "logps/chosen": -85.19865417480469, "logps/rejected": -623.4598999023438, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": -0.31380921602249146, "rewards/margins": 5.507225513458252, "rewards/rejected": -5.8210344314575195, "step": 7420 }, { "epoch": 0.09, "learning_rate": 4.446971510653579e-06, "logits/chosen": -3.0244953632354736, "logits/rejected": -2.9652531147003174, "logps/chosen": -59.733245849609375, "logps/rejected": -423.6366271972656, "loss": 0.2312, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.16453605890274048, "rewards/margins": 3.692197799682617, "rewards/rejected": -3.8567328453063965, "step": 7430 }, { "epoch": 0.09, "learning_rate": 4.452956667464688e-06, "logits/chosen": -3.021324872970581, "logits/rejected": -2.9369089603424072, "logps/chosen": -59.502113342285156, "logps/rejected": -459.466552734375, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": -0.0964406356215477, "rewards/margins": 4.091000080108643, "rewards/rejected": -4.187440872192383, "step": 7440 }, { "epoch": 0.09, "learning_rate": 4.458941824275796e-06, "logits/chosen": -3.024712085723877, "logits/rejected": -2.955906629562378, "logps/chosen": -31.975528717041016, "logps/rejected": -508.89007568359375, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.15016904473304749, "rewards/margins": 4.849688529968262, "rewards/rejected": -4.699519157409668, "step": 7450 }, { "epoch": 0.09, "learning_rate": 4.464926981086905e-06, "logits/chosen": -3.023043394088745, "logits/rejected": -2.9342799186706543, "logps/chosen": -68.91340637207031, "logps/rejected": -469.1015625, "loss": 0.1091, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15879681706428528, "rewards/margins": 4.141490459442139, "rewards/rejected": -4.300286769866943, "step": 7460 }, { "epoch": 0.09, "learning_rate": 4.4709121378980135e-06, "logits/chosen": -3.077563762664795, "logits/rejected": -3.042754650115967, "logps/chosen": -42.38349151611328, "logps/rejected": -355.37152099609375, "loss": 0.0987, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.006674435921013355, "rewards/margins": 3.1756255626678467, "rewards/rejected": -3.182300090789795, "step": 7470 }, { "epoch": 0.09, "learning_rate": 4.476897294709122e-06, "logits/chosen": -3.074392557144165, "logits/rejected": -2.960520029067993, "logps/chosen": -64.53568267822266, "logps/rejected": -635.5744018554688, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": -0.12888798117637634, "rewards/margins": 5.811798572540283, "rewards/rejected": -5.940686225891113, "step": 7480 }, { "epoch": 0.09, "learning_rate": 4.4828824515202306e-06, "logits/chosen": -3.049534320831299, "logits/rejected": -2.9939417839050293, "logps/chosen": -65.44108581542969, "logps/rejected": -398.5125427246094, "loss": 0.0776, "rewards/accuracies": 1.0, "rewards/chosen": -0.19233369827270508, "rewards/margins": 3.3992507457733154, "rewards/rejected": -3.5915846824645996, "step": 7490 }, { "epoch": 0.09, "learning_rate": 4.488867608331339e-06, "logits/chosen": -3.0544538497924805, "logits/rejected": -3.0089142322540283, "logps/chosen": -38.41520690917969, "logps/rejected": -384.0479431152344, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": 0.019903158769011497, "rewards/margins": 3.497540235519409, "rewards/rejected": -3.4776368141174316, "step": 7500 }, { "epoch": 0.09, "learning_rate": 4.494852765142448e-06, "logits/chosen": -2.9975478649139404, "logits/rejected": -2.919323682785034, "logps/chosen": -72.22798156738281, "logps/rejected": -395.43548583984375, "loss": 0.1396, "rewards/accuracies": 1.0, "rewards/chosen": -0.22967033088207245, "rewards/margins": 3.3295085430145264, "rewards/rejected": -3.5591793060302734, "step": 7510 }, { "epoch": 0.09, "learning_rate": 4.500837921953555e-06, "logits/chosen": -2.9975476264953613, "logits/rejected": -2.9399867057800293, "logps/chosen": -32.92772674560547, "logps/rejected": -425.3788146972656, "loss": 0.0742, "rewards/accuracies": 1.0, "rewards/chosen": 0.13226501643657684, "rewards/margins": 3.9913928508758545, "rewards/rejected": -3.8591275215148926, "step": 7520 }, { "epoch": 0.09, "learning_rate": 4.506823078764664e-06, "logits/chosen": -3.0264840126037598, "logits/rejected": -3.0037829875946045, "logps/chosen": -29.7465763092041, "logps/rejected": -465.64129638671875, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 0.12158739566802979, "rewards/margins": 4.393446445465088, "rewards/rejected": -4.271859169006348, "step": 7530 }, { "epoch": 0.09, "learning_rate": 4.512808235575772e-06, "logits/chosen": -3.0100252628326416, "logits/rejected": -2.989206314086914, "logps/chosen": -15.950761795043945, "logps/rejected": -343.9507751464844, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": 0.2047129124403, "rewards/margins": 3.2878925800323486, "rewards/rejected": -3.083179473876953, "step": 7540 }, { "epoch": 0.09, "learning_rate": 4.518793392386881e-06, "logits/chosen": -3.053323984146118, "logits/rejected": -2.9899351596832275, "logps/chosen": -38.79399871826172, "logps/rejected": -536.8599853515625, "loss": 0.1124, "rewards/accuracies": 1.0, "rewards/chosen": 0.08202316612005234, "rewards/margins": 5.062859535217285, "rewards/rejected": -4.980835914611816, "step": 7550 }, { "epoch": 0.09, "learning_rate": 4.524778549197989e-06, "logits/chosen": -3.0224716663360596, "logits/rejected": -2.9528276920318604, "logps/chosen": -73.71595764160156, "logps/rejected": -547.2513427734375, "loss": 0.0903, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2819986343383789, "rewards/margins": 4.797718048095703, "rewards/rejected": -5.079716682434082, "step": 7560 }, { "epoch": 0.09, "learning_rate": 4.530763706009097e-06, "logits/chosen": -2.996285915374756, "logits/rejected": -2.9304089546203613, "logps/chosen": -41.62346267700195, "logps/rejected": -385.22503662109375, "loss": 0.0818, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.01498134434223175, "rewards/margins": 3.4858360290527344, "rewards/rejected": -3.4708549976348877, "step": 7570 }, { "epoch": 0.09, "learning_rate": 4.536748862820206e-06, "logits/chosen": -3.0299816131591797, "logits/rejected": -2.9330434799194336, "logps/chosen": -105.69633483886719, "logps/rejected": -498.7704162597656, "loss": 0.1906, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4953010678291321, "rewards/margins": 4.104175567626953, "rewards/rejected": -4.5994768142700195, "step": 7580 }, { "epoch": 0.09, "learning_rate": 4.542734019631314e-06, "logits/chosen": -3.0362308025360107, "logits/rejected": -2.9746861457824707, "logps/chosen": -51.2600212097168, "logps/rejected": -457.86846923828125, "loss": 0.1049, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.035329677164554596, "rewards/margins": 4.141625881195068, "rewards/rejected": -4.176955223083496, "step": 7590 }, { "epoch": 0.09, "learning_rate": 4.548719176442423e-06, "logits/chosen": -3.046665906906128, "logits/rejected": -2.9532008171081543, "logps/chosen": -60.017303466796875, "logps/rejected": -731.5635986328125, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": 0.006543600466102362, "rewards/margins": 6.899575710296631, "rewards/rejected": -6.893031120300293, "step": 7600 }, { "epoch": 0.09, "learning_rate": 4.5547043332535315e-06, "logits/chosen": -3.006007432937622, "logits/rejected": -2.9325642585754395, "logps/chosen": -38.50631332397461, "logps/rejected": -548.9232788085938, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": 0.11751262843608856, "rewards/margins": 5.205714225769043, "rewards/rejected": -5.088201522827148, "step": 7610 }, { "epoch": 0.09, "learning_rate": 4.56068949006464e-06, "logits/chosen": -3.059105157852173, "logits/rejected": -3.017700672149658, "logps/chosen": -44.608642578125, "logps/rejected": -462.44488525390625, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": 0.05542459338903427, "rewards/margins": 4.291495323181152, "rewards/rejected": -4.2360711097717285, "step": 7620 }, { "epoch": 0.09, "learning_rate": 4.566674646875749e-06, "logits/chosen": -2.985626459121704, "logits/rejected": -2.9363532066345215, "logps/chosen": -29.349889755249023, "logps/rejected": -425.5794982910156, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.11697592586278915, "rewards/margins": 3.9824554920196533, "rewards/rejected": -3.8654799461364746, "step": 7630 }, { "epoch": 0.09, "learning_rate": 4.572659803686857e-06, "logits/chosen": -3.0119829177856445, "logits/rejected": -2.9049041271209717, "logps/chosen": -65.0676040649414, "logps/rejected": -709.0564575195312, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": -0.041461046785116196, "rewards/margins": 6.644538879394531, "rewards/rejected": -6.685999870300293, "step": 7640 }, { "epoch": 0.09, "learning_rate": 4.578644960497966e-06, "logits/chosen": -3.0225555896759033, "logits/rejected": -2.980443239212036, "logps/chosen": -53.24937057495117, "logps/rejected": -427.79547119140625, "loss": 0.1226, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1138344556093216, "rewards/margins": 3.7849388122558594, "rewards/rejected": -3.898773670196533, "step": 7650 }, { "epoch": 0.09, "learning_rate": 4.584630117309074e-06, "logits/chosen": -3.0476527214050293, "logits/rejected": -2.9666550159454346, "logps/chosen": -60.92060089111328, "logps/rejected": -633.6693115234375, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": -0.09282926470041275, "rewards/margins": 5.844266414642334, "rewards/rejected": -5.9370951652526855, "step": 7660 }, { "epoch": 0.09, "learning_rate": 4.590615274120183e-06, "logits/chosen": -3.0178816318511963, "logits/rejected": -2.8907299041748047, "logps/chosen": -104.1913833618164, "logps/rejected": -474.0940856933594, "loss": 0.0701, "rewards/accuracies": 1.0, "rewards/chosen": -0.5286089777946472, "rewards/margins": 3.8228485584259033, "rewards/rejected": -4.351457118988037, "step": 7670 }, { "epoch": 0.09, "learning_rate": 4.596600430931291e-06, "logits/chosen": -3.055224657058716, "logits/rejected": -2.9918034076690674, "logps/chosen": -64.6383056640625, "logps/rejected": -507.9896545410156, "loss": 0.0626, "rewards/accuracies": 1.0, "rewards/chosen": -0.16460204124450684, "rewards/margins": 4.519171714782715, "rewards/rejected": -4.683773994445801, "step": 7680 }, { "epoch": 0.09, "learning_rate": 4.602585587742399e-06, "logits/chosen": -3.0378477573394775, "logits/rejected": -2.9214844703674316, "logps/chosen": -85.986328125, "logps/rejected": -639.306396484375, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": -0.29980891942977905, "rewards/margins": 5.669129371643066, "rewards/rejected": -5.968937873840332, "step": 7690 }, { "epoch": 0.09, "learning_rate": 4.608570744553508e-06, "logits/chosen": -3.069139003753662, "logits/rejected": -3.0052056312561035, "logps/chosen": -50.689762115478516, "logps/rejected": -521.5679321289062, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": -0.05408851057291031, "rewards/margins": 4.78792667388916, "rewards/rejected": -4.842014312744141, "step": 7700 }, { "epoch": 0.09, "learning_rate": 4.614555901364616e-06, "logits/chosen": -3.0528838634490967, "logits/rejected": -3.0270466804504395, "logps/chosen": -71.6214370727539, "logps/rejected": -392.796142578125, "loss": 0.1614, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3224220871925354, "rewards/margins": 3.223062038421631, "rewards/rejected": -3.545485019683838, "step": 7710 }, { "epoch": 0.09, "learning_rate": 4.620541058175725e-06, "logits/chosen": -3.065441370010376, "logits/rejected": -3.008478879928589, "logps/chosen": -41.529823303222656, "logps/rejected": -427.9263610839844, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": 0.001297432230785489, "rewards/margins": 3.903064727783203, "rewards/rejected": -3.9017672538757324, "step": 7720 }, { "epoch": 0.09, "learning_rate": 4.6265262149868324e-06, "logits/chosen": -3.056774854660034, "logits/rejected": -2.9625072479248047, "logps/chosen": -99.47940826416016, "logps/rejected": -645.9232177734375, "loss": 0.1113, "rewards/accuracies": 1.0, "rewards/chosen": -0.48982080817222595, "rewards/margins": 5.562026023864746, "rewards/rejected": -6.051846504211426, "step": 7730 }, { "epoch": 0.09, "learning_rate": 4.6325113717979414e-06, "logits/chosen": -3.046177387237549, "logits/rejected": -3.0132431983947754, "logps/chosen": -91.00098419189453, "logps/rejected": -380.7486877441406, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/chosen": -0.45371299982070923, "rewards/margins": 2.9801833629608154, "rewards/rejected": -3.433896541595459, "step": 7740 }, { "epoch": 0.09, "learning_rate": 4.6384965286090496e-06, "logits/chosen": -3.0308384895324707, "logits/rejected": -2.9701809883117676, "logps/chosen": -34.57475662231445, "logps/rejected": -490.6730041503906, "loss": 0.0656, "rewards/accuracies": 1.0, "rewards/chosen": 0.1279294192790985, "rewards/margins": 4.6476149559021, "rewards/rejected": -4.519686222076416, "step": 7750 }, { "epoch": 0.09, "learning_rate": 4.6444816854201585e-06, "logits/chosen": -3.0450141429901123, "logits/rejected": -2.9249024391174316, "logps/chosen": -33.974578857421875, "logps/rejected": -505.3621520996094, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": 0.10480432212352753, "rewards/margins": 4.769728660583496, "rewards/rejected": -4.664923667907715, "step": 7760 }, { "epoch": 0.09, "learning_rate": 4.6504668422312675e-06, "logits/chosen": -3.0297861099243164, "logits/rejected": -2.988147497177124, "logps/chosen": -47.330379486083984, "logps/rejected": -453.55841064453125, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -0.06729943305253983, "rewards/margins": 4.088961601257324, "rewards/rejected": -4.156261444091797, "step": 7770 }, { "epoch": 0.09, "learning_rate": 4.656451999042375e-06, "logits/chosen": -3.046853542327881, "logits/rejected": -2.9679298400878906, "logps/chosen": -105.31932067871094, "logps/rejected": -517.4320068359375, "loss": 0.1361, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6011191606521606, "rewards/margins": 4.19439697265625, "rewards/rejected": -4.795515537261963, "step": 7780 }, { "epoch": 0.09, "learning_rate": 4.662437155853484e-06, "logits/chosen": -2.988434314727783, "logits/rejected": -2.928332805633545, "logps/chosen": -62.756629943847656, "logps/rejected": -542.7354125976562, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": -0.14255425333976746, "rewards/margins": 4.895969390869141, "rewards/rejected": -5.038523197174072, "step": 7790 }, { "epoch": 0.09, "learning_rate": 4.668422312664592e-06, "logits/chosen": -3.0234456062316895, "logits/rejected": -2.9661262035369873, "logps/chosen": -45.52370071411133, "logps/rejected": -416.2353515625, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.046657782047986984, "rewards/margins": 3.8319802284240723, "rewards/rejected": -3.7853221893310547, "step": 7800 }, { "epoch": 0.09, "learning_rate": 4.674407469475701e-06, "logits/chosen": -3.068037271499634, "logits/rejected": -2.9986462593078613, "logps/chosen": -46.10982131958008, "logps/rejected": -374.8138122558594, "loss": 0.1109, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0035567879676818848, "rewards/margins": 3.3833374977111816, "rewards/rejected": -3.3797805309295654, "step": 7810 }, { "epoch": 0.09, "learning_rate": 4.680392626286809e-06, "logits/chosen": -3.032927989959717, "logits/rejected": -2.954071283340454, "logps/chosen": -28.142345428466797, "logps/rejected": -456.3365173339844, "loss": 0.0533, "rewards/accuracies": 1.0, "rewards/chosen": 0.13796290755271912, "rewards/margins": 4.312276363372803, "rewards/rejected": -4.174313545227051, "step": 7820 }, { "epoch": 0.09, "learning_rate": 4.686377783097917e-06, "logits/chosen": -3.026019334793091, "logits/rejected": -3.0041427612304688, "logps/chosen": -19.118181228637695, "logps/rejected": -359.0189208984375, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": 0.16535012423992157, "rewards/margins": 3.3924412727355957, "rewards/rejected": -3.227090835571289, "step": 7830 }, { "epoch": 0.09, "learning_rate": 4.692362939909026e-06, "logits/chosen": -3.007567882537842, "logits/rejected": -2.9702155590057373, "logps/chosen": -42.03204345703125, "logps/rejected": -414.2215881347656, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012338102096691728, "rewards/margins": 3.758033037185669, "rewards/rejected": -3.7592663764953613, "step": 7840 }, { "epoch": 0.09, "learning_rate": 4.698348096720134e-06, "logits/chosen": -3.1056265830993652, "logits/rejected": -3.063337802886963, "logps/chosen": -74.52031707763672, "logps/rejected": -423.364501953125, "loss": 0.2306, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2995442748069763, "rewards/margins": 3.53344464302063, "rewards/rejected": -3.832988739013672, "step": 7850 }, { "epoch": 0.09, "learning_rate": 4.704333253531243e-06, "logits/chosen": -3.0485520362854004, "logits/rejected": -3.0026354789733887, "logps/chosen": -27.85158348083496, "logps/rejected": -468.75079345703125, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": 0.11739285290241241, "rewards/margins": 4.41854190826416, "rewards/rejected": -4.301149845123291, "step": 7860 }, { "epoch": 0.09, "learning_rate": 4.710318410342351e-06, "logits/chosen": -3.049447536468506, "logits/rejected": -2.9606878757476807, "logps/chosen": -35.720489501953125, "logps/rejected": -539.66552734375, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": 0.09216490387916565, "rewards/margins": 5.082617282867432, "rewards/rejected": -4.990452766418457, "step": 7870 }, { "epoch": 0.09, "learning_rate": 4.7163035671534595e-06, "logits/chosen": -3.031418800354004, "logits/rejected": -2.973444700241089, "logps/chosen": -62.32599639892578, "logps/rejected": -450.14227294921875, "loss": 0.1131, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.12531878054141998, "rewards/margins": 3.99263334274292, "rewards/rejected": -4.1179518699646, "step": 7880 }, { "epoch": 0.09, "learning_rate": 4.7222887239645685e-06, "logits/chosen": -3.054419994354248, "logits/rejected": -3.018359422683716, "logps/chosen": -68.22824096679688, "logps/rejected": -446.83184814453125, "loss": 0.1204, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.25213226675987244, "rewards/margins": 3.8429291248321533, "rewards/rejected": -4.095061779022217, "step": 7890 }, { "epoch": 0.09, "learning_rate": 4.728273880775677e-06, "logits/chosen": -3.0581774711608887, "logits/rejected": -3.023226737976074, "logps/chosen": -83.79425811767578, "logps/rejected": -495.6788024902344, "loss": 0.1066, "rewards/accuracies": 1.0, "rewards/chosen": -0.37297412753105164, "rewards/margins": 4.204458236694336, "rewards/rejected": -4.577432155609131, "step": 7900 }, { "epoch": 0.09, "learning_rate": 4.7342590375867856e-06, "logits/chosen": -3.0466806888580322, "logits/rejected": -2.9538369178771973, "logps/chosen": -50.36132049560547, "logps/rejected": -614.8416137695312, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -0.005123595707118511, "rewards/margins": 5.751150608062744, "rewards/rejected": -5.756274223327637, "step": 7910 }, { "epoch": 0.09, "learning_rate": 4.740244194397894e-06, "logits/chosen": -3.0560107231140137, "logits/rejected": -2.9856629371643066, "logps/chosen": -45.88309097290039, "logps/rejected": -531.3729248046875, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": -0.03074752911925316, "rewards/margins": 4.872594356536865, "rewards/rejected": -4.903342247009277, "step": 7920 }, { "epoch": 0.09, "learning_rate": 4.746229351209002e-06, "logits/chosen": -3.049511432647705, "logits/rejected": -2.98856782913208, "logps/chosen": -77.62103271484375, "logps/rejected": -509.067138671875, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": -0.26281437277793884, "rewards/margins": 4.439327239990234, "rewards/rejected": -4.702141761779785, "step": 7930 }, { "epoch": 0.1, "learning_rate": 4.752214508020111e-06, "logits/chosen": -3.0551464557647705, "logits/rejected": -2.9839866161346436, "logps/chosen": -49.824806213378906, "logps/rejected": -570.4354858398438, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.014690211042761803, "rewards/margins": 5.310606956481934, "rewards/rejected": -5.295916557312012, "step": 7940 }, { "epoch": 0.1, "learning_rate": 4.758199664831219e-06, "logits/chosen": -3.020397424697876, "logits/rejected": -2.95465087890625, "logps/chosen": -67.21943664550781, "logps/rejected": -481.0382385253906, "loss": 0.1196, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20849573612213135, "rewards/margins": 4.2272796630859375, "rewards/rejected": -4.435774803161621, "step": 7950 }, { "epoch": 0.1, "learning_rate": 4.764184821642328e-06, "logits/chosen": -3.0755562782287598, "logits/rejected": -3.038121461868286, "logps/chosen": -49.65763473510742, "logps/rejected": -386.4429626464844, "loss": 0.1054, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10186336934566498, "rewards/margins": 3.385322093963623, "rewards/rejected": -3.4871857166290283, "step": 7960 }, { "epoch": 0.1, "learning_rate": 4.770169978453436e-06, "logits/chosen": -3.040396213531494, "logits/rejected": -3.0018320083618164, "logps/chosen": -38.124778747558594, "logps/rejected": -375.8680114746094, "loss": 0.0705, "rewards/accuracies": 1.0, "rewards/chosen": 0.05464700609445572, "rewards/margins": 3.42744779586792, "rewards/rejected": -3.372800827026367, "step": 7970 }, { "epoch": 0.1, "learning_rate": 4.776155135264544e-06, "logits/chosen": -3.0307886600494385, "logits/rejected": -2.938126802444458, "logps/chosen": -54.1931037902832, "logps/rejected": -611.287109375, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": 0.02004231885075569, "rewards/margins": 5.722960472106934, "rewards/rejected": -5.702918529510498, "step": 7980 }, { "epoch": 0.1, "learning_rate": 4.782140292075652e-06, "logits/chosen": -3.0610384941101074, "logits/rejected": -2.891146421432495, "logps/chosen": -67.68582916259766, "logps/rejected": -601.285400390625, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -0.10628540813922882, "rewards/margins": 5.48830509185791, "rewards/rejected": -5.594590187072754, "step": 7990 }, { "epoch": 0.1, "learning_rate": 4.788125448886761e-06, "logits/chosen": -3.0657896995544434, "logits/rejected": -2.9808709621429443, "logps/chosen": -42.93663024902344, "logps/rejected": -513.1008911132812, "loss": 0.1159, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.00893249548971653, "rewards/margins": 4.743076324462891, "rewards/rejected": -4.75200891494751, "step": 8000 }, { "epoch": 0.1, "learning_rate": 4.794110605697869e-06, "logits/chosen": -3.0621466636657715, "logits/rejected": -2.9382824897766113, "logps/chosen": -86.44406127929688, "logps/rejected": -728.7704467773438, "loss": 0.1279, "rewards/accuracies": 1.0, "rewards/chosen": -0.2488025724887848, "rewards/margins": 6.618304252624512, "rewards/rejected": -6.867107391357422, "step": 8010 }, { "epoch": 0.1, "learning_rate": 4.800095762508978e-06, "logits/chosen": -3.042637348175049, "logits/rejected": -2.9835124015808105, "logps/chosen": -36.249427795410156, "logps/rejected": -606.2030639648438, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": 0.07650964707136154, "rewards/margins": 5.753020286560059, "rewards/rejected": -5.676510810852051, "step": 8020 }, { "epoch": 0.1, "learning_rate": 4.8060809193200865e-06, "logits/chosen": -3.0330231189727783, "logits/rejected": -2.9832260608673096, "logps/chosen": -34.2646369934082, "logps/rejected": -502.0621643066406, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": 0.05718403309583664, "rewards/margins": 4.695823669433594, "rewards/rejected": -4.638638973236084, "step": 8030 }, { "epoch": 0.1, "learning_rate": 4.812066076131195e-06, "logits/chosen": -3.0557491779327393, "logits/rejected": -2.997188091278076, "logps/chosen": -38.39020538330078, "logps/rejected": -458.43389892578125, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": 0.07274853438138962, "rewards/margins": 4.28013277053833, "rewards/rejected": -4.207383632659912, "step": 8040 }, { "epoch": 0.1, "learning_rate": 4.818051232942304e-06, "logits/chosen": -3.0043492317199707, "logits/rejected": -2.8673958778381348, "logps/chosen": -55.87309646606445, "logps/rejected": -522.5741577148438, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": -0.048225075006484985, "rewards/margins": 4.783317565917969, "rewards/rejected": -4.831542015075684, "step": 8050 }, { "epoch": 0.1, "learning_rate": 4.824036389753412e-06, "logits/chosen": -3.007847309112549, "logits/rejected": -2.962385654449463, "logps/chosen": -44.819862365722656, "logps/rejected": -455.83050537109375, "loss": 0.1358, "rewards/accuracies": 1.0, "rewards/chosen": -0.03569914028048515, "rewards/margins": 4.139822006225586, "rewards/rejected": -4.175521373748779, "step": 8060 }, { "epoch": 0.1, "learning_rate": 4.830021546564521e-06, "logits/chosen": -3.0278003215789795, "logits/rejected": -2.878171443939209, "logps/chosen": -65.00398254394531, "logps/rejected": -670.706787109375, "loss": 0.0931, "rewards/accuracies": 1.0, "rewards/chosen": -0.1492060422897339, "rewards/margins": 6.142232418060303, "rewards/rejected": -6.291439056396484, "step": 8070 }, { "epoch": 0.1, "learning_rate": 4.836006703375629e-06, "logits/chosen": -3.0364935398101807, "logits/rejected": -2.974205493927002, "logps/chosen": -32.869720458984375, "logps/rejected": -456.9125061035156, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": 0.140314981341362, "rewards/margins": 4.335558891296387, "rewards/rejected": -4.195243835449219, "step": 8080 }, { "epoch": 0.1, "learning_rate": 4.841991860186737e-06, "logits/chosen": -3.051805019378662, "logits/rejected": -2.943449020385742, "logps/chosen": -91.06651306152344, "logps/rejected": -615.658203125, "loss": 0.0904, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3916870653629303, "rewards/margins": 5.3527984619140625, "rewards/rejected": -5.744485855102539, "step": 8090 }, { "epoch": 0.1, "learning_rate": 4.847977016997846e-06, "logits/chosen": -3.024247407913208, "logits/rejected": -2.915907144546509, "logps/chosen": -94.44526672363281, "logps/rejected": -558.41357421875, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -0.31228068470954895, "rewards/margins": 4.888253688812256, "rewards/rejected": -5.200533866882324, "step": 8100 }, { "epoch": 0.1, "learning_rate": 4.853962173808954e-06, "logits/chosen": -3.0435516834259033, "logits/rejected": -2.9924607276916504, "logps/chosen": -53.188499450683594, "logps/rejected": -503.09814453125, "loss": 0.0741, "rewards/accuracies": 1.0, "rewards/chosen": -0.025533944368362427, "rewards/margins": 4.623964786529541, "rewards/rejected": -4.649498462677002, "step": 8110 }, { "epoch": 0.1, "learning_rate": 4.859947330620063e-06, "logits/chosen": -3.052152395248413, "logits/rejected": -2.970337152481079, "logps/chosen": -44.79603958129883, "logps/rejected": -593.79150390625, "loss": 0.049, "rewards/accuracies": 1.0, "rewards/chosen": 0.04830511286854744, "rewards/margins": 5.598971366882324, "rewards/rejected": -5.550665855407715, "step": 8120 }, { "epoch": 0.1, "learning_rate": 4.865932487431171e-06, "logits/chosen": -3.061842441558838, "logits/rejected": -3.0177416801452637, "logps/chosen": -39.73158264160156, "logps/rejected": -440.38031005859375, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": 0.07411577552556992, "rewards/margins": 4.095958709716797, "rewards/rejected": -4.021842956542969, "step": 8130 }, { "epoch": 0.1, "learning_rate": 4.871917644242279e-06, "logits/chosen": -3.045684576034546, "logits/rejected": -2.9864726066589355, "logps/chosen": -50.74065017700195, "logps/rejected": -465.236572265625, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": -0.04519547149538994, "rewards/margins": 4.202866554260254, "rewards/rejected": -4.248062610626221, "step": 8140 }, { "epoch": 0.1, "learning_rate": 4.877902801053388e-06, "logits/chosen": -3.03660249710083, "logits/rejected": -2.9949402809143066, "logps/chosen": -51.515480041503906, "logps/rejected": -519.9011840820312, "loss": 0.1231, "rewards/accuracies": 1.0, "rewards/chosen": -0.07991038262844086, "rewards/margins": 4.744698524475098, "rewards/rejected": -4.82460880279541, "step": 8150 }, { "epoch": 0.1, "learning_rate": 4.8838879578644964e-06, "logits/chosen": -3.0677649974823, "logits/rejected": -3.049787998199463, "logps/chosen": -31.674524307250977, "logps/rejected": -376.7925720214844, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": 0.11025917530059814, "rewards/margins": 3.5081112384796143, "rewards/rejected": -3.3978514671325684, "step": 8160 }, { "epoch": 0.1, "learning_rate": 4.889873114675605e-06, "logits/chosen": -3.0401198863983154, "logits/rejected": -3.0090906620025635, "logps/chosen": -29.665752410888672, "logps/rejected": -353.8428649902344, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": 0.1464247852563858, "rewards/margins": 3.3140978813171387, "rewards/rejected": -3.167672872543335, "step": 8170 }, { "epoch": 0.1, "learning_rate": 4.8958582714867135e-06, "logits/chosen": -3.0189707279205322, "logits/rejected": -2.983468770980835, "logps/chosen": -43.48053741455078, "logps/rejected": -469.83660888671875, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": -0.036506593227386475, "rewards/margins": 4.272501468658447, "rewards/rejected": -4.3090081214904785, "step": 8180 }, { "epoch": 0.1, "learning_rate": 4.901843428297822e-06, "logits/chosen": -3.068213939666748, "logits/rejected": -3.0207290649414062, "logps/chosen": -53.46636199951172, "logps/rejected": -500.7232360839844, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": -0.08116736263036728, "rewards/margins": 4.532951831817627, "rewards/rejected": -4.614119529724121, "step": 8190 }, { "epoch": 0.1, "learning_rate": 4.907828585108931e-06, "logits/chosen": -3.0700953006744385, "logits/rejected": -3.0330519676208496, "logps/chosen": -41.777740478515625, "logps/rejected": -489.73565673828125, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": 0.03898899629712105, "rewards/margins": 4.551074504852295, "rewards/rejected": -4.512085437774658, "step": 8200 }, { "epoch": 0.1, "learning_rate": 4.913813741920039e-06, "logits/chosen": -3.035275459289551, "logits/rejected": -2.9916043281555176, "logps/chosen": -55.33720016479492, "logps/rejected": -546.7852172851562, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": -0.10434988886117935, "rewards/margins": 4.958540439605713, "rewards/rejected": -5.06289005279541, "step": 8210 }, { "epoch": 0.1, "learning_rate": 4.919798898731148e-06, "logits/chosen": -3.059591770172119, "logits/rejected": -2.9888405799865723, "logps/chosen": -94.4122314453125, "logps/rejected": -560.6328735351562, "loss": 0.0607, "rewards/accuracies": 1.0, "rewards/chosen": -0.3127836287021637, "rewards/margins": 4.8756632804870605, "rewards/rejected": -5.188446998596191, "step": 8220 }, { "epoch": 0.1, "learning_rate": 4.925784055542255e-06, "logits/chosen": -3.068955659866333, "logits/rejected": -2.980642557144165, "logps/chosen": -74.8913345336914, "logps/rejected": -548.9749145507812, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -0.2406628578901291, "rewards/margins": 4.8449249267578125, "rewards/rejected": -5.085587978363037, "step": 8230 }, { "epoch": 0.1, "learning_rate": 4.931769212353364e-06, "logits/chosen": -3.051985025405884, "logits/rejected": -2.9973959922790527, "logps/chosen": -39.62030792236328, "logps/rejected": -525.5612182617188, "loss": 0.0931, "rewards/accuracies": 1.0, "rewards/chosen": 0.13093529641628265, "rewards/margins": 5.002021789550781, "rewards/rejected": -4.871086597442627, "step": 8240 }, { "epoch": 0.1, "learning_rate": 4.937754369164472e-06, "logits/chosen": -3.046487331390381, "logits/rejected": -2.967125177383423, "logps/chosen": -39.3007698059082, "logps/rejected": -542.1439819335938, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 0.0747755914926529, "rewards/margins": 5.110599994659424, "rewards/rejected": -5.035824298858643, "step": 8250 }, { "epoch": 0.1, "learning_rate": 4.943739525975581e-06, "logits/chosen": -3.0721492767333984, "logits/rejected": -3.0369231700897217, "logps/chosen": -34.98900604248047, "logps/rejected": -492.232177734375, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": 0.12733231484889984, "rewards/margins": 4.673294544219971, "rewards/rejected": -4.545962333679199, "step": 8260 }, { "epoch": 0.1, "learning_rate": 4.949724682786689e-06, "logits/chosen": -3.0273239612579346, "logits/rejected": -2.9305360317230225, "logps/chosen": -49.20183181762695, "logps/rejected": -619.3009033203125, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.030579078942537308, "rewards/margins": 5.823378562927246, "rewards/rejected": -5.792799472808838, "step": 8270 }, { "epoch": 0.1, "learning_rate": 4.955709839597797e-06, "logits/chosen": -3.026925563812256, "logits/rejected": -2.924166202545166, "logps/chosen": -62.60888671875, "logps/rejected": -610.9795532226562, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": -0.024160513654351234, "rewards/margins": 5.676016330718994, "rewards/rejected": -5.700177192687988, "step": 8280 }, { "epoch": 0.1, "learning_rate": 4.961694996408906e-06, "logits/chosen": -3.1113638877868652, "logits/rejected": -2.986180067062378, "logps/chosen": -54.94965362548828, "logps/rejected": -652.0568237304688, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -0.026252442970871925, "rewards/margins": 6.08854341506958, "rewards/rejected": -6.114796161651611, "step": 8290 }, { "epoch": 0.1, "learning_rate": 4.9676801532200145e-06, "logits/chosen": -3.059831142425537, "logits/rejected": -2.9582934379577637, "logps/chosen": -60.060035705566406, "logps/rejected": -509.6424865722656, "loss": 0.077, "rewards/accuracies": 1.0, "rewards/chosen": -0.151692196726799, "rewards/margins": 4.555292129516602, "rewards/rejected": -4.706984519958496, "step": 8300 }, { "epoch": 0.1, "learning_rate": 4.9736653100311235e-06, "logits/chosen": -3.0029094219207764, "logits/rejected": -2.9525532722473145, "logps/chosen": -19.20625877380371, "logps/rejected": -405.71441650390625, "loss": 0.0696, "rewards/accuracies": 1.0, "rewards/chosen": 0.17510215938091278, "rewards/margins": 3.8646609783172607, "rewards/rejected": -3.6895594596862793, "step": 8310 }, { "epoch": 0.1, "learning_rate": 4.979650466842232e-06, "logits/chosen": -3.0468170642852783, "logits/rejected": -3.006052255630493, "logps/chosen": -30.729816436767578, "logps/rejected": -500.2235412597656, "loss": 0.0674, "rewards/accuracies": 1.0, "rewards/chosen": 0.11668264865875244, "rewards/margins": 4.743964195251465, "rewards/rejected": -4.627281665802002, "step": 8320 }, { "epoch": 0.1, "learning_rate": 4.98563562365334e-06, "logits/chosen": -3.0798771381378174, "logits/rejected": -3.027916431427002, "logps/chosen": -55.51057052612305, "logps/rejected": -485.456787109375, "loss": 0.1898, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.09097753465175629, "rewards/margins": 4.3773417472839355, "rewards/rejected": -4.468319416046143, "step": 8330 }, { "epoch": 0.1, "learning_rate": 4.991620780464449e-06, "logits/chosen": -3.034468173980713, "logits/rejected": -2.9506430625915527, "logps/chosen": -69.41506958007812, "logps/rejected": -613.26318359375, "loss": 0.0944, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.16786500811576843, "rewards/margins": 5.572614669799805, "rewards/rejected": -5.740479469299316, "step": 8340 }, { "epoch": 0.1, "learning_rate": 4.997605937275557e-06, "logits/chosen": -3.0715160369873047, "logits/rejected": -3.013174533843994, "logps/chosen": -39.96064376831055, "logps/rejected": -603.89111328125, "loss": 0.0723, "rewards/accuracies": 1.0, "rewards/chosen": 0.08253239840269089, "rewards/margins": 5.71658182144165, "rewards/rejected": -5.634048938751221, "step": 8350 }, { "epoch": 0.1, "learning_rate": 4.9999999214312495e-06, "logits/chosen": -3.081514358520508, "logits/rejected": -3.032658338546753, "logps/chosen": -49.9189453125, "logps/rejected": -497.5552673339844, "loss": 0.1051, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03962335363030434, "rewards/margins": 4.546687126159668, "rewards/rejected": -4.586310386657715, "step": 8360 }, { "epoch": 0.1, "learning_rate": 4.9999994412889005e-06, "logits/chosen": -3.0538408756256104, "logits/rejected": -3.00046706199646, "logps/chosen": -52.0067138671875, "logps/rejected": -656.746337890625, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013549268478527665, "rewards/margins": 6.162026405334473, "rewards/rejected": -6.1633806228637695, "step": 8370 }, { "epoch": 0.1, "learning_rate": 4.999998524653593e-06, "logits/chosen": -3.028506278991699, "logits/rejected": -3.002793788909912, "logps/chosen": -31.817535400390625, "logps/rejected": -563.9564208984375, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": 0.15534693002700806, "rewards/margins": 5.400686264038086, "rewards/rejected": -5.245340347290039, "step": 8380 }, { "epoch": 0.1, "learning_rate": 4.999997171525486e-06, "logits/chosen": -3.037336826324463, "logits/rejected": -2.9645907878875732, "logps/chosen": -49.057273864746094, "logps/rejected": -509.63446044921875, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": -0.004124464932829142, "rewards/margins": 4.698664665222168, "rewards/rejected": -4.702789306640625, "step": 8390 }, { "epoch": 0.1, "learning_rate": 4.999995381904816e-06, "logits/chosen": -3.0549263954162598, "logits/rejected": -2.9389326572418213, "logps/chosen": -85.35543060302734, "logps/rejected": -679.1005859375, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -0.2914902865886688, "rewards/margins": 6.087657451629639, "rewards/rejected": -6.379148483276367, "step": 8400 }, { "epoch": 0.1, "learning_rate": 4.9999931557918964e-06, "logits/chosen": -3.082522392272949, "logits/rejected": -3.011096477508545, "logps/chosen": -81.28787994384766, "logps/rejected": -500.6809997558594, "loss": 0.1105, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.29493770003318787, "rewards/margins": 4.317121505737305, "rewards/rejected": -4.612059593200684, "step": 8410 }, { "epoch": 0.1, "learning_rate": 4.999990493187116e-06, "logits/chosen": -3.0864977836608887, "logits/rejected": -2.9973652362823486, "logps/chosen": -44.472557067871094, "logps/rejected": -564.1029052734375, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": -0.012392446398735046, "rewards/margins": 5.233931541442871, "rewards/rejected": -5.246323585510254, "step": 8420 }, { "epoch": 0.1, "learning_rate": 4.9999873940909385e-06, "logits/chosen": -3.0580320358276367, "logits/rejected": -3.0293192863464355, "logps/chosen": -41.50188446044922, "logps/rejected": -410.986328125, "loss": 0.1084, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0034335076343268156, "rewards/margins": 3.7228951454162598, "rewards/rejected": -3.7263283729553223, "step": 8430 }, { "epoch": 0.1, "learning_rate": 4.999983858503904e-06, "logits/chosen": -3.071200370788574, "logits/rejected": -3.032886028289795, "logps/chosen": -42.76713180541992, "logps/rejected": -527.3829345703125, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": 0.0746004655957222, "rewards/margins": 4.958191871643066, "rewards/rejected": -4.883591651916504, "step": 8440 }, { "epoch": 0.1, "learning_rate": 4.999979886426632e-06, "logits/chosen": -3.044917106628418, "logits/rejected": -3.0053532123565674, "logps/chosen": -30.02960205078125, "logps/rejected": -533.3546142578125, "loss": 0.049, "rewards/accuracies": 1.0, "rewards/chosen": 0.17783156037330627, "rewards/margins": 5.136017799377441, "rewards/rejected": -4.958186149597168, "step": 8450 }, { "epoch": 0.1, "learning_rate": 4.999975477859816e-06, "logits/chosen": -3.0676281452178955, "logits/rejected": -2.9978151321411133, "logps/chosen": -41.65400695800781, "logps/rejected": -385.13616943359375, "loss": 0.0893, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.026450350880622864, "rewards/margins": 3.4799232482910156, "rewards/rejected": -3.453472852706909, "step": 8460 }, { "epoch": 0.1, "learning_rate": 4.9999706328042245e-06, "logits/chosen": -3.0545098781585693, "logits/rejected": -2.9484972953796387, "logps/chosen": -47.71952438354492, "logps/rejected": -503.6099548339844, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.025260984897613525, "rewards/margins": 4.6623430252075195, "rewards/rejected": -4.637082576751709, "step": 8470 }, { "epoch": 0.1, "learning_rate": 4.999965351260704e-06, "logits/chosen": -3.067164897918701, "logits/rejected": -2.9402365684509277, "logps/chosen": -64.6422348022461, "logps/rejected": -730.3067626953125, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": -0.07994715869426727, "rewards/margins": 6.808529853820801, "rewards/rejected": -6.888476371765137, "step": 8480 }, { "epoch": 0.1, "learning_rate": 4.999959633230176e-06, "logits/chosen": -3.0754144191741943, "logits/rejected": -3.0188651084899902, "logps/chosen": -35.81456756591797, "logps/rejected": -533.3995361328125, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": 0.0521719753742218, "rewards/margins": 5.007576942443848, "rewards/rejected": -4.955405235290527, "step": 8490 }, { "epoch": 0.1, "learning_rate": 4.999953478713639e-06, "logits/chosen": -3.0370821952819824, "logits/rejected": -2.970977306365967, "logps/chosen": -69.8065414428711, "logps/rejected": -587.7892456054688, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": -0.22268664836883545, "rewards/margins": 5.247125148773193, "rewards/rejected": -5.46981143951416, "step": 8500 }, { "epoch": 0.1, "learning_rate": 4.999946887712169e-06, "logits/chosen": -3.0673604011535645, "logits/rejected": -3.019029140472412, "logps/chosen": -23.42746925354004, "logps/rejected": -505.43695068359375, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": 0.17914226651191711, "rewards/margins": 4.866961479187012, "rewards/rejected": -4.687819480895996, "step": 8510 }, { "epoch": 0.1, "learning_rate": 4.999939860226915e-06, "logits/chosen": -3.075146436691284, "logits/rejected": -2.989077091217041, "logps/chosen": -32.19868469238281, "logps/rejected": -559.0421752929688, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 0.13791362941265106, "rewards/margins": 5.342528343200684, "rewards/rejected": -5.204614162445068, "step": 8520 }, { "epoch": 0.1, "learning_rate": 4.999932396259105e-06, "logits/chosen": -3.063297748565674, "logits/rejected": -3.028243064880371, "logps/chosen": -48.552162170410156, "logps/rejected": -483.838134765625, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": -0.008743745274841785, "rewards/margins": 4.453482627868652, "rewards/rejected": -4.462226390838623, "step": 8530 }, { "epoch": 0.1, "learning_rate": 4.999924495810042e-06, "logits/chosen": -3.073972463607788, "logits/rejected": -3.053211212158203, "logps/chosen": -30.617694854736328, "logps/rejected": -410.72320556640625, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": 0.06522784382104874, "rewards/margins": 3.8075079917907715, "rewards/rejected": -3.7422802448272705, "step": 8540 }, { "epoch": 0.1, "learning_rate": 4.999916158881105e-06, "logits/chosen": -3.0496864318847656, "logits/rejected": -2.9848484992980957, "logps/chosen": -47.878684997558594, "logps/rejected": -517.9566650390625, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": 0.023552756756544113, "rewards/margins": 4.804348945617676, "rewards/rejected": -4.780797004699707, "step": 8550 }, { "epoch": 0.1, "learning_rate": 4.99990738547375e-06, "logits/chosen": -3.0674538612365723, "logits/rejected": -3.0192291736602783, "logps/chosen": -36.96794891357422, "logps/rejected": -542.6798095703125, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 0.11458562314510345, "rewards/margins": 5.145281791687012, "rewards/rejected": -5.030696392059326, "step": 8560 }, { "epoch": 0.1, "learning_rate": 4.9998981755895095e-06, "logits/chosen": -3.029355049133301, "logits/rejected": -2.9396870136260986, "logps/chosen": -54.806007385253906, "logps/rejected": -474.653076171875, "loss": 0.1161, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.061411917209625244, "rewards/margins": 4.319262504577637, "rewards/rejected": -4.380674839019775, "step": 8570 }, { "epoch": 0.1, "learning_rate": 4.999888529229989e-06, "logits/chosen": -3.067626476287842, "logits/rejected": -2.9720711708068848, "logps/chosen": -40.41810607910156, "logps/rejected": -637.367919921875, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": 0.08051793277263641, "rewards/margins": 6.061421871185303, "rewards/rejected": -5.980903625488281, "step": 8580 }, { "epoch": 0.1, "learning_rate": 4.999878446396876e-06, "logits/chosen": -3.0678417682647705, "logits/rejected": -3.0411570072174072, "logps/chosen": -29.27535629272461, "logps/rejected": -392.0329284667969, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/chosen": 0.07689812034368515, "rewards/margins": 3.6196129322052, "rewards/rejected": -3.5427145957946777, "step": 8590 }, { "epoch": 0.1, "learning_rate": 4.999867927091929e-06, "logits/chosen": -3.0477137565612793, "logits/rejected": -2.9615187644958496, "logps/chosen": -56.18982696533203, "logps/rejected": -383.9884338378906, "loss": 0.1441, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.046530116349458694, "rewards/margins": 3.4078803062438965, "rewards/rejected": -3.4544105529785156, "step": 8600 }, { "epoch": 0.1, "learning_rate": 4.999856971316984e-06, "logits/chosen": -3.0728697776794434, "logits/rejected": -3.0037684440612793, "logps/chosen": -28.06793212890625, "logps/rejected": -553.7804565429688, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": 0.14906273782253265, "rewards/margins": 5.297689914703369, "rewards/rejected": -5.148627281188965, "step": 8610 }, { "epoch": 0.1, "learning_rate": 4.999845579073956e-06, "logits/chosen": -3.0646474361419678, "logits/rejected": -3.0206868648529053, "logps/chosen": -52.822715759277344, "logps/rejected": -474.04974365234375, "loss": 0.1197, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.07457628101110458, "rewards/margins": 4.286048889160156, "rewards/rejected": -4.36062479019165, "step": 8620 }, { "epoch": 0.1, "learning_rate": 4.999833750364832e-06, "logits/chosen": -3.0775136947631836, "logits/rejected": -3.0296738147735596, "logps/chosen": -37.43596267700195, "logps/rejected": -439.7591857910156, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.0781383365392685, "rewards/margins": 4.100828647613525, "rewards/rejected": -4.022690296173096, "step": 8630 }, { "epoch": 0.1, "learning_rate": 4.999821485191679e-06, "logits/chosen": -3.0743000507354736, "logits/rejected": -2.9748528003692627, "logps/chosen": -87.37825775146484, "logps/rejected": -595.55322265625, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -0.3450250029563904, "rewards/margins": 5.2096333503723145, "rewards/rejected": -5.554657936096191, "step": 8640 }, { "epoch": 0.1, "learning_rate": 4.999808783556638e-06, "logits/chosen": -3.0219063758850098, "logits/rejected": -2.936777114868164, "logps/chosen": -90.3478775024414, "logps/rejected": -687.3416748046875, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": -0.3196953237056732, "rewards/margins": 6.137146949768066, "rewards/rejected": -6.456841945648193, "step": 8650 }, { "epoch": 0.1, "learning_rate": 4.999795645461925e-06, "logits/chosen": -3.1032698154449463, "logits/rejected": -3.035698652267456, "logps/chosen": -114.2046890258789, "logps/rejected": -655.6224975585938, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": -0.5884615182876587, "rewards/margins": 5.575291633605957, "rewards/rejected": -6.163752555847168, "step": 8660 }, { "epoch": 0.1, "learning_rate": 4.999782070909836e-06, "logits/chosen": -3.0746848583221436, "logits/rejected": -3.0445609092712402, "logps/chosen": -86.57347869873047, "logps/rejected": -452.88555908203125, "loss": 0.2049, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.37582582235336304, "rewards/margins": 3.7752082347869873, "rewards/rejected": -4.151034355163574, "step": 8670 }, { "epoch": 0.1, "learning_rate": 4.99976805990274e-06, "logits/chosen": -3.057809829711914, "logits/rejected": -2.9856677055358887, "logps/chosen": -86.54301452636719, "logps/rejected": -548.5682983398438, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -0.40461841225624084, "rewards/margins": 4.674792766571045, "rewards/rejected": -5.079411506652832, "step": 8680 }, { "epoch": 0.1, "learning_rate": 4.999753612443084e-06, "logits/chosen": -3.110299587249756, "logits/rejected": -3.04107928276062, "logps/chosen": -71.31452941894531, "logps/rejected": -502.16119384765625, "loss": 0.1167, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2841648459434509, "rewards/margins": 4.338988304138184, "rewards/rejected": -4.623152732849121, "step": 8690 }, { "epoch": 0.1, "learning_rate": 4.99973872853339e-06, "logits/chosen": -3.053104877471924, "logits/rejected": -2.9701433181762695, "logps/chosen": -45.776641845703125, "logps/rejected": -553.7066040039062, "loss": 0.0656, "rewards/accuracies": 1.0, "rewards/chosen": 0.004411667585372925, "rewards/margins": 5.143113136291504, "rewards/rejected": -5.138701438903809, "step": 8700 }, { "epoch": 0.1, "learning_rate": 4.999723408176256e-06, "logits/chosen": -3.0735630989074707, "logits/rejected": -3.029387950897217, "logps/chosen": -39.40694808959961, "logps/rejected": -553.2994384765625, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": 0.07235029339790344, "rewards/margins": 5.219162464141846, "rewards/rejected": -5.146812438964844, "step": 8710 }, { "epoch": 0.1, "learning_rate": 4.999707651374358e-06, "logits/chosen": -3.063948154449463, "logits/rejected": -3.0220627784729004, "logps/chosen": -12.127506256103516, "logps/rejected": -369.5936584472656, "loss": 0.0528, "rewards/accuracies": 1.0, "rewards/chosen": 0.23714056611061096, "rewards/margins": 3.56524395942688, "rewards/rejected": -3.328103542327881, "step": 8720 }, { "epoch": 0.1, "learning_rate": 4.999691458130446e-06, "logits/chosen": -3.119020462036133, "logits/rejected": -3.0454840660095215, "logps/chosen": -51.29646682739258, "logps/rejected": -535.4961547851562, "loss": 0.1121, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.04224897548556328, "rewards/margins": 4.913947105407715, "rewards/rejected": -4.956196308135986, "step": 8730 }, { "epoch": 0.1, "learning_rate": 4.99967482844735e-06, "logits/chosen": -3.0554146766662598, "logits/rejected": -3.0029304027557373, "logps/chosen": -20.943084716796875, "logps/rejected": -460.30322265625, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": 0.20702281594276428, "rewards/margins": 4.438570022583008, "rewards/rejected": -4.2315473556518555, "step": 8740 }, { "epoch": 0.1, "learning_rate": 4.99965776232797e-06, "logits/chosen": -3.09016752243042, "logits/rejected": -3.013077735900879, "logps/chosen": -20.011220932006836, "logps/rejected": -445.14410400390625, "loss": 0.1216, "rewards/accuracies": 1.0, "rewards/chosen": 0.16059185564517975, "rewards/margins": 4.24760103225708, "rewards/rejected": -4.087008953094482, "step": 8750 }, { "epoch": 0.1, "learning_rate": 4.9996402597752875e-06, "logits/chosen": -3.0950191020965576, "logits/rejected": -3.0165257453918457, "logps/chosen": -39.043739318847656, "logps/rejected": -552.2089233398438, "loss": 0.1276, "rewards/accuracies": 1.0, "rewards/chosen": 0.11070360988378525, "rewards/margins": 5.2401933670043945, "rewards/rejected": -5.129489421844482, "step": 8760 }, { "epoch": 0.1, "learning_rate": 4.999622320792358e-06, "logits/chosen": -3.0891029834747314, "logits/rejected": -3.0468010902404785, "logps/chosen": -49.8117790222168, "logps/rejected": -564.6720581054688, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -0.020416250452399254, "rewards/margins": 5.230719566345215, "rewards/rejected": -5.251136302947998, "step": 8770 }, { "epoch": 0.11, "learning_rate": 4.999603945382314e-06, "logits/chosen": -3.0829687118530273, "logits/rejected": -3.0251355171203613, "logps/chosen": -34.77599334716797, "logps/rejected": -518.374267578125, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": 0.15841439366340637, "rewards/margins": 4.957755088806152, "rewards/rejected": -4.79934024810791, "step": 8780 }, { "epoch": 0.11, "learning_rate": 4.999585133548363e-06, "logits/chosen": -3.1135144233703613, "logits/rejected": -3.0533084869384766, "logps/chosen": -36.61737060546875, "logps/rejected": -512.6041870117188, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": 0.11778160184621811, "rewards/margins": 4.849371910095215, "rewards/rejected": -4.731590747833252, "step": 8790 }, { "epoch": 0.11, "learning_rate": 4.99956588529379e-06, "logits/chosen": -3.075364589691162, "logits/rejected": -2.9606356620788574, "logps/chosen": -55.1121711730957, "logps/rejected": -628.3610229492188, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 0.044828057289123535, "rewards/margins": 5.919948577880859, "rewards/rejected": -5.875120162963867, "step": 8800 }, { "epoch": 0.11, "learning_rate": 4.999546200621956e-06, "logits/chosen": -3.0947155952453613, "logits/rejected": -3.0321879386901855, "logps/chosen": -57.36060333251953, "logps/rejected": -524.0116577148438, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": -0.13642747700214386, "rewards/margins": 4.705564975738525, "rewards/rejected": -4.841991424560547, "step": 8810 }, { "epoch": 0.11, "learning_rate": 4.999526079536298e-06, "logits/chosen": -3.086237668991089, "logits/rejected": -3.0487122535705566, "logps/chosen": -31.858211517333984, "logps/rejected": -449.2300720214844, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": 0.13549187779426575, "rewards/margins": 4.243161201477051, "rewards/rejected": -4.107668876647949, "step": 8820 }, { "epoch": 0.11, "learning_rate": 4.999505522040328e-06, "logits/chosen": -3.10456919670105, "logits/rejected": -3.0302186012268066, "logps/chosen": -93.8714828491211, "logps/rejected": -602.4816284179688, "loss": 0.1962, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4279175400733948, "rewards/margins": 5.201457977294922, "rewards/rejected": -5.629376411437988, "step": 8830 }, { "epoch": 0.11, "learning_rate": 4.999484528137636e-06, "logits/chosen": -3.069533586502075, "logits/rejected": -3.017191171646118, "logps/chosen": -48.65385055541992, "logps/rejected": -633.2689819335938, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 0.009728049859404564, "rewards/margins": 5.9307427406311035, "rewards/rejected": -5.921014308929443, "step": 8840 }, { "epoch": 0.11, "learning_rate": 4.999463097831887e-06, "logits/chosen": -3.073481798171997, "logits/rejected": -2.9934890270233154, "logps/chosen": -53.222434997558594, "logps/rejected": -498.67791748046875, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": -0.06785252690315247, "rewards/margins": 4.525514125823975, "rewards/rejected": -4.5933661460876465, "step": 8850 }, { "epoch": 0.11, "learning_rate": 4.999441231126824e-06, "logits/chosen": -3.0972819328308105, "logits/rejected": -3.051786184310913, "logps/chosen": -29.574962615966797, "logps/rejected": -437.48004150390625, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.16632641851902008, "rewards/margins": 4.164427757263184, "rewards/rejected": -3.998100996017456, "step": 8860 }, { "epoch": 0.11, "learning_rate": 4.999418928026263e-06, "logits/chosen": -3.072299003601074, "logits/rejected": -3.029175281524658, "logps/chosen": -30.250720977783203, "logps/rejected": -502.69317626953125, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": 0.13579623401165009, "rewards/margins": 4.772972583770752, "rewards/rejected": -4.637176513671875, "step": 8870 }, { "epoch": 0.11, "learning_rate": 4.9993961885341e-06, "logits/chosen": -3.085737943649292, "logits/rejected": -3.009549140930176, "logps/chosen": -42.759742736816406, "logps/rejected": -487.8379821777344, "loss": 0.072, "rewards/accuracies": 1.0, "rewards/chosen": 0.08063627034425735, "rewards/margins": 4.568955421447754, "rewards/rejected": -4.488318920135498, "step": 8880 }, { "epoch": 0.11, "learning_rate": 4.999373012654304e-06, "logits/chosen": -3.077752113342285, "logits/rejected": -2.935224771499634, "logps/chosen": -46.194190979003906, "logps/rejected": -553.357666015625, "loss": 0.0826, "rewards/accuracies": 1.0, "rewards/chosen": 0.08354628086090088, "rewards/margins": 5.227623462677002, "rewards/rejected": -5.144077301025391, "step": 8890 }, { "epoch": 0.11, "learning_rate": 4.999349400390921e-06, "logits/chosen": -3.1452441215515137, "logits/rejected": -3.0629053115844727, "logps/chosen": -55.202659606933594, "logps/rejected": -535.712158203125, "loss": 0.1156, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.06107480451464653, "rewards/margins": 4.904529094696045, "rewards/rejected": -4.965603828430176, "step": 8900 }, { "epoch": 0.11, "learning_rate": 4.999325351748075e-06, "logits/chosen": -3.060347080230713, "logits/rejected": -3.0528388023376465, "logps/chosen": -13.101753234863281, "logps/rejected": -411.286865234375, "loss": 0.0733, "rewards/accuracies": 1.0, "rewards/chosen": 0.253030002117157, "rewards/margins": 3.992572069168091, "rewards/rejected": -3.739542007446289, "step": 8910 }, { "epoch": 0.11, "learning_rate": 4.999300866729964e-06, "logits/chosen": -3.091768980026245, "logits/rejected": -3.0272152423858643, "logps/chosen": -44.084800720214844, "logps/rejected": -576.209228515625, "loss": 0.1161, "rewards/accuracies": 1.0, "rewards/chosen": 0.07312421500682831, "rewards/margins": 5.4419684410095215, "rewards/rejected": -5.368844509124756, "step": 8920 }, { "epoch": 0.11, "learning_rate": 4.999275945340863e-06, "logits/chosen": -3.0506882667541504, "logits/rejected": -2.975102663040161, "logps/chosen": -41.3146858215332, "logps/rejected": -626.60400390625, "loss": 0.1059, "rewards/accuracies": 1.0, "rewards/chosen": 0.060177695006132126, "rewards/margins": 5.937053680419922, "rewards/rejected": -5.876875877380371, "step": 8930 }, { "epoch": 0.11, "learning_rate": 4.999250587585123e-06, "logits/chosen": -3.087790012359619, "logits/rejected": -3.048640251159668, "logps/chosen": -104.60697174072266, "logps/rejected": -492.1963806152344, "loss": 0.2102, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5547735095024109, "rewards/margins": 3.980320692062378, "rewards/rejected": -4.535094261169434, "step": 8940 }, { "epoch": 0.11, "learning_rate": 4.9992247934671725e-06, "logits/chosen": -3.112117290496826, "logits/rejected": -3.03859806060791, "logps/chosen": -118.4149169921875, "logps/rejected": -586.2343139648438, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": -0.6530068516731262, "rewards/margins": 4.800646781921387, "rewards/rejected": -5.453653812408447, "step": 8950 }, { "epoch": 0.11, "learning_rate": 4.999198562991514e-06, "logits/chosen": -3.0888261795043945, "logits/rejected": -3.0506515502929688, "logps/chosen": -62.49609375, "logps/rejected": -538.5535888671875, "loss": 0.1059, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.16284728050231934, "rewards/margins": 4.828690528869629, "rewards/rejected": -4.991538047790527, "step": 8960 }, { "epoch": 0.11, "learning_rate": 4.999171896162728e-06, "logits/chosen": -3.094055652618408, "logits/rejected": -3.0337436199188232, "logps/chosen": -43.99740219116211, "logps/rejected": -576.9277954101562, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": 0.10297620296478271, "rewards/margins": 5.478954315185547, "rewards/rejected": -5.375978469848633, "step": 8970 }, { "epoch": 0.11, "learning_rate": 4.9991447929854695e-06, "logits/chosen": -3.0448474884033203, "logits/rejected": -2.9909369945526123, "logps/chosen": -34.36140060424805, "logps/rejected": -541.7244262695312, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": 0.13119438290596008, "rewards/margins": 5.156888008117676, "rewards/rejected": -5.025693416595459, "step": 8980 }, { "epoch": 0.11, "learning_rate": 4.999117253464472e-06, "logits/chosen": -3.062572717666626, "logits/rejected": -3.0202736854553223, "logps/chosen": -25.718664169311523, "logps/rejected": -510.8045959472656, "loss": 0.0912, "rewards/accuracies": 1.0, "rewards/chosen": 0.1621357947587967, "rewards/margins": 4.879272937774658, "rewards/rejected": -4.717137336730957, "step": 8990 }, { "epoch": 0.11, "learning_rate": 4.999089277604543e-06, "logits/chosen": -3.0688841342926025, "logits/rejected": -3.04455828666687, "logps/chosen": -31.7766056060791, "logps/rejected": -502.3777770996094, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.1496807187795639, "rewards/margins": 4.7992353439331055, "rewards/rejected": -4.649554252624512, "step": 9000 }, { "epoch": 0.11, "eval_logits/chosen": -3.1136200428009033, "eval_logits/rejected": -3.026916742324829, "eval_logps/chosen": -123.78189849853516, "eval_logps/rejected": -613.8440551757812, "eval_loss": 0.02038406953215599, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.6260163187980652, "eval_rewards/margins": 5.045170307159424, "eval_rewards/rejected": -5.671186447143555, "eval_runtime": 1.2166, "eval_samples_per_second": 4.11, "eval_steps_per_second": 2.466, "step": 9000 }, { "epoch": 0.11, "learning_rate": 4.999060865410567e-06, "logits/chosen": -3.0672848224639893, "logits/rejected": -2.978073835372925, "logps/chosen": -93.24671936035156, "logps/rejected": -628.2916259765625, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": -0.3738320469856262, "rewards/margins": 5.5047221183776855, "rewards/rejected": -5.878553867340088, "step": 9010 }, { "epoch": 0.11, "learning_rate": 4.999032016887504e-06, "logits/chosen": -3.081122875213623, "logits/rejected": -3.0215768814086914, "logps/chosen": -52.61600875854492, "logps/rejected": -661.0646362304688, "loss": 0.1046, "rewards/accuracies": 1.0, "rewards/chosen": -0.015228187665343285, "rewards/margins": 6.196347236633301, "rewards/rejected": -6.211574554443359, "step": 9020 }, { "epoch": 0.11, "learning_rate": 4.9990027320403925e-06, "logits/chosen": -3.074507236480713, "logits/rejected": -2.9835422039031982, "logps/chosen": -78.97842407226562, "logps/rejected": -663.33447265625, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": -0.2798195481300354, "rewards/margins": 5.958216667175293, "rewards/rejected": -6.238036155700684, "step": 9030 }, { "epoch": 0.11, "learning_rate": 4.998973010874344e-06, "logits/chosen": -3.098271131515503, "logits/rejected": -3.0193355083465576, "logps/chosen": -37.92732238769531, "logps/rejected": -553.3681640625, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.06383734941482544, "rewards/margins": 5.212776184082031, "rewards/rejected": -5.1489386558532715, "step": 9040 }, { "epoch": 0.11, "learning_rate": 4.998942853394548e-06, "logits/chosen": -3.09257173538208, "logits/rejected": -3.005636215209961, "logps/chosen": -74.14598083496094, "logps/rejected": -723.9775390625, "loss": 0.0673, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15510061383247375, "rewards/margins": 6.668810844421387, "rewards/rejected": -6.823910713195801, "step": 9050 }, { "epoch": 0.11, "learning_rate": 4.998912259606271e-06, "logits/chosen": -3.0824639797210693, "logits/rejected": -3.0325112342834473, "logps/chosen": -82.18896484375, "logps/rejected": -491.30377197265625, "loss": 0.1547, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3704712986946106, "rewards/margins": 4.158419609069824, "rewards/rejected": -4.528891086578369, "step": 9060 }, { "epoch": 0.11, "learning_rate": 4.998881229514854e-06, "logits/chosen": -3.0955567359924316, "logits/rejected": -3.0688159465789795, "logps/chosen": -79.82999420166016, "logps/rejected": -434.5846252441406, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": -0.3835376799106598, "rewards/margins": 3.5781455039978027, "rewards/rejected": -3.9616832733154297, "step": 9070 }, { "epoch": 0.11, "learning_rate": 4.998849763125715e-06, "logits/chosen": -3.0861639976501465, "logits/rejected": -3.0052928924560547, "logps/chosen": -64.25391387939453, "logps/rejected": -675.2799072265625, "loss": 0.1317, "rewards/accuracies": 1.0, "rewards/chosen": -0.15915709733963013, "rewards/margins": 6.202559471130371, "rewards/rejected": -6.361716270446777, "step": 9080 }, { "epoch": 0.11, "learning_rate": 4.998817860444346e-06, "logits/chosen": -3.0797340869903564, "logits/rejected": -2.9773812294006348, "logps/chosen": -108.60285949707031, "logps/rejected": -689.7935791015625, "loss": 0.0715, "rewards/accuracies": 1.0, "rewards/chosen": -0.6237901449203491, "rewards/margins": 5.866580009460449, "rewards/rejected": -6.490370273590088, "step": 9090 }, { "epoch": 0.11, "learning_rate": 4.99878552147632e-06, "logits/chosen": -3.063884735107422, "logits/rejected": -2.9856836795806885, "logps/chosen": -45.33899688720703, "logps/rejected": -515.7363891601562, "loss": 0.1288, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.01111592911183834, "rewards/margins": 4.780289649963379, "rewards/rejected": -4.769173622131348, "step": 9100 }, { "epoch": 0.11, "learning_rate": 4.9987527462272825e-06, "logits/chosen": -3.0308690071105957, "logits/rejected": -2.938260793685913, "logps/chosen": -60.55588912963867, "logps/rejected": -650.8956298828125, "loss": 0.1155, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.046709466725587845, "rewards/margins": 6.0590314865112305, "rewards/rejected": -6.105741500854492, "step": 9110 }, { "epoch": 0.11, "learning_rate": 4.998719534702955e-06, "logits/chosen": -3.1276462078094482, "logits/rejected": -3.0794475078582764, "logps/chosen": -54.19074630737305, "logps/rejected": -481.71722412109375, "loss": 0.109, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10742554813623428, "rewards/margins": 4.315962314605713, "rewards/rejected": -4.423388481140137, "step": 9120 }, { "epoch": 0.11, "learning_rate": 4.9986858869091365e-06, "logits/chosen": -3.066253900527954, "logits/rejected": -3.000877857208252, "logps/chosen": -52.76551055908203, "logps/rejected": -621.7063598632812, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": 0.02950243279337883, "rewards/margins": 5.8438568115234375, "rewards/rejected": -5.814354419708252, "step": 9130 }, { "epoch": 0.11, "learning_rate": 4.998651802851701e-06, "logits/chosen": -3.089937686920166, "logits/rejected": -2.99131441116333, "logps/chosen": -56.71881103515625, "logps/rejected": -597.272216796875, "loss": 0.1417, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.043841343373060226, "rewards/margins": 5.530558109283447, "rewards/rejected": -5.574398994445801, "step": 9140 }, { "epoch": 0.11, "learning_rate": 4.998617282536602e-06, "logits/chosen": -3.0394418239593506, "logits/rejected": -2.9523377418518066, "logps/chosen": -39.133827209472656, "logps/rejected": -556.9993896484375, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.08447077125310898, "rewards/margins": 5.254370212554932, "rewards/rejected": -5.1698994636535645, "step": 9150 }, { "epoch": 0.11, "learning_rate": 4.998582325969865e-06, "logits/chosen": -3.1192545890808105, "logits/rejected": -3.0592169761657715, "logps/chosen": -34.30164337158203, "logps/rejected": -674.1307373046875, "loss": 0.0843, "rewards/accuracies": 1.0, "rewards/chosen": 0.13646623492240906, "rewards/margins": 6.479323387145996, "rewards/rejected": -6.342857360839844, "step": 9160 }, { "epoch": 0.11, "learning_rate": 4.9985469331575926e-06, "logits/chosen": -3.118891477584839, "logits/rejected": -3.0790836811065674, "logps/chosen": -21.216781616210938, "logps/rejected": -462.624267578125, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": 0.18567045032978058, "rewards/margins": 4.425685405731201, "rewards/rejected": -4.240015506744385, "step": 9170 }, { "epoch": 0.11, "learning_rate": 4.998511104105966e-06, "logits/chosen": -3.046947479248047, "logits/rejected": -2.984679937362671, "logps/chosen": -51.37111282348633, "logps/rejected": -779.2328491210938, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": 0.08139514923095703, "rewards/margins": 7.4623565673828125, "rewards/rejected": -7.3809614181518555, "step": 9180 }, { "epoch": 0.11, "learning_rate": 4.998474838821239e-06, "logits/chosen": -3.070298671722412, "logits/rejected": -2.999493360519409, "logps/chosen": -59.0340690612793, "logps/rejected": -738.0706176757812, "loss": 0.1233, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.08515767753124237, "rewards/margins": 6.889557838439941, "rewards/rejected": -6.974714756011963, "step": 9190 }, { "epoch": 0.11, "learning_rate": 4.998438137309745e-06, "logits/chosen": -3.079407215118408, "logits/rejected": -3.0323891639709473, "logps/chosen": -45.83501434326172, "logps/rejected": -427.46368408203125, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": -0.04555099457502365, "rewards/margins": 3.853468656539917, "rewards/rejected": -3.899019718170166, "step": 9200 }, { "epoch": 0.11, "learning_rate": 4.99840099957789e-06, "logits/chosen": -3.090974807739258, "logits/rejected": -3.010389804840088, "logps/chosen": -65.58302307128906, "logps/rejected": -571.2030029296875, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -0.19938918948173523, "rewards/margins": 5.135775566101074, "rewards/rejected": -5.335165023803711, "step": 9210 }, { "epoch": 0.11, "learning_rate": 4.998363425632161e-06, "logits/chosen": -3.0825881958007812, "logits/rejected": -3.0088720321655273, "logps/chosen": -57.38078689575195, "logps/rejected": -628.5367431640625, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": -0.013734865002334118, "rewards/margins": 5.854405403137207, "rewards/rejected": -5.86814022064209, "step": 9220 }, { "epoch": 0.11, "learning_rate": 4.998325415479116e-06, "logits/chosen": -3.0619635581970215, "logits/rejected": -2.9992287158966064, "logps/chosen": -62.51201248168945, "logps/rejected": -727.7014770507812, "loss": 0.0903, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.12070751190185547, "rewards/margins": 6.753968238830566, "rewards/rejected": -6.8746747970581055, "step": 9230 }, { "epoch": 0.11, "learning_rate": 4.998286969125393e-06, "logits/chosen": -3.0444862842559814, "logits/rejected": -2.9270846843719482, "logps/chosen": -56.82952880859375, "logps/rejected": -609.0709228515625, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": -0.06476514041423798, "rewards/margins": 5.6234846115112305, "rewards/rejected": -5.688249111175537, "step": 9240 }, { "epoch": 0.11, "learning_rate": 4.998248086577704e-06, "logits/chosen": -3.106743335723877, "logits/rejected": -3.0497632026672363, "logps/chosen": -37.17952346801758, "logps/rejected": -535.7514038085938, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": 0.06004144996404648, "rewards/margins": 5.03103494644165, "rewards/rejected": -4.970993995666504, "step": 9250 }, { "epoch": 0.11, "learning_rate": 4.998208767842836e-06, "logits/chosen": -3.094853401184082, "logits/rejected": -2.9307360649108887, "logps/chosen": -121.4836196899414, "logps/rejected": -887.5480346679688, "loss": 0.0987, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6617704629898071, "rewards/margins": 7.78750467300415, "rewards/rejected": -8.449274063110352, "step": 9260 }, { "epoch": 0.11, "learning_rate": 4.9981690129276575e-06, "logits/chosen": -3.055040121078491, "logits/rejected": -3.01432466506958, "logps/chosen": -41.111507415771484, "logps/rejected": -452.89215087890625, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": 0.06564869731664658, "rewards/margins": 4.2107977867126465, "rewards/rejected": -4.145149230957031, "step": 9270 }, { "epoch": 0.11, "learning_rate": 4.998128821839106e-06, "logits/chosen": -3.073687791824341, "logits/rejected": -3.0484395027160645, "logps/chosen": -38.75830841064453, "logps/rejected": -414.8797302246094, "loss": 0.1248, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0029683648608624935, "rewards/margins": 3.7818236351013184, "rewards/rejected": -3.778855800628662, "step": 9280 }, { "epoch": 0.11, "learning_rate": 4.998088194584202e-06, "logits/chosen": -3.039902687072754, "logits/rejected": -2.971874237060547, "logps/chosen": -43.866695404052734, "logps/rejected": -720.266357421875, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -0.0052408454939723015, "rewards/margins": 6.807099342346191, "rewards/rejected": -6.812339782714844, "step": 9290 }, { "epoch": 0.11, "learning_rate": 4.998047131170036e-06, "logits/chosen": -3.072593927383423, "logits/rejected": -3.0474507808685303, "logps/chosen": -50.021522521972656, "logps/rejected": -450.9913635253906, "loss": 0.1223, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09150471538305283, "rewards/margins": 4.030438423156738, "rewards/rejected": -4.12194299697876, "step": 9300 }, { "epoch": 0.11, "learning_rate": 4.99800563160378e-06, "logits/chosen": -3.075866222381592, "logits/rejected": -2.9684014320373535, "logps/chosen": -66.32466888427734, "logps/rejected": -654.8642578125, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -0.15887750685214996, "rewards/margins": 5.9811906814575195, "rewards/rejected": -6.140068054199219, "step": 9310 }, { "epoch": 0.11, "learning_rate": 4.997963695892678e-06, "logits/chosen": -3.100893497467041, "logits/rejected": -3.047373056411743, "logps/chosen": -59.81605911254883, "logps/rejected": -473.5150451660156, "loss": 0.0528, "rewards/accuracies": 1.0, "rewards/chosen": -0.10095677524805069, "rewards/margins": 4.244928359985352, "rewards/rejected": -4.345885276794434, "step": 9320 }, { "epoch": 0.11, "learning_rate": 4.997921324044054e-06, "logits/chosen": -3.0539398193359375, "logits/rejected": -3.010143995285034, "logps/chosen": -49.75941467285156, "logps/rejected": -570.4165649414062, "loss": 0.0508, "rewards/accuracies": 1.0, "rewards/chosen": -0.006287410855293274, "rewards/margins": 5.30271053314209, "rewards/rejected": -5.3089985847473145, "step": 9330 }, { "epoch": 0.11, "learning_rate": 4.997878516065303e-06, "logits/chosen": -3.086778163909912, "logits/rejected": -3.0273871421813965, "logps/chosen": -33.39529037475586, "logps/rejected": -540.5811767578125, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.15826931595802307, "rewards/margins": 5.168120384216309, "rewards/rejected": -5.009850978851318, "step": 9340 }, { "epoch": 0.11, "learning_rate": 4.997835271963901e-06, "logits/chosen": -3.095398426055908, "logits/rejected": -3.0588459968566895, "logps/chosen": -42.807044982910156, "logps/rejected": -523.1246337890625, "loss": 0.1088, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.003004909958690405, "rewards/margins": 4.853929042816162, "rewards/rejected": -4.850924015045166, "step": 9350 }, { "epoch": 0.11, "learning_rate": 4.997791591747398e-06, "logits/chosen": -3.054957866668701, "logits/rejected": -2.9958596229553223, "logps/chosen": -80.61767578125, "logps/rejected": -620.0565185546875, "loss": 0.0958, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3781440854072571, "rewards/margins": 5.433331489562988, "rewards/rejected": -5.8114752769470215, "step": 9360 }, { "epoch": 0.11, "learning_rate": 4.99774747542342e-06, "logits/chosen": -3.063368320465088, "logits/rejected": -3.0213124752044678, "logps/chosen": -60.34442138671875, "logps/rejected": -546.6924438476562, "loss": 0.157, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11688268184661865, "rewards/margins": 4.960160732269287, "rewards/rejected": -5.077043533325195, "step": 9370 }, { "epoch": 0.11, "learning_rate": 4.997702922999671e-06, "logits/chosen": -3.063589572906494, "logits/rejected": -3.0071771144866943, "logps/chosen": -61.45256423950195, "logps/rejected": -663.4051513671875, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -0.03210620954632759, "rewards/margins": 6.196929931640625, "rewards/rejected": -6.229036331176758, "step": 9380 }, { "epoch": 0.11, "learning_rate": 4.997657934483927e-06, "logits/chosen": -3.039297103881836, "logits/rejected": -2.9591422080993652, "logps/chosen": -50.76759338378906, "logps/rejected": -736.59228515625, "loss": 0.0811, "rewards/accuracies": 1.0, "rewards/chosen": 0.05015711858868599, "rewards/margins": 6.987672328948975, "rewards/rejected": -6.937514305114746, "step": 9390 }, { "epoch": 0.11, "learning_rate": 4.997612509884046e-06, "logits/chosen": -3.0497360229492188, "logits/rejected": -2.955267906188965, "logps/chosen": -44.62461853027344, "logps/rejected": -684.7183837890625, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 0.026498913764953613, "rewards/margins": 6.48452091217041, "rewards/rejected": -6.458021640777588, "step": 9400 }, { "epoch": 0.11, "learning_rate": 4.9975666492079574e-06, "logits/chosen": -3.1038928031921387, "logits/rejected": -3.0211994647979736, "logps/chosen": -50.19918441772461, "logps/rejected": -460.7176818847656, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": -0.044337786734104156, "rewards/margins": 4.189174175262451, "rewards/rejected": -4.233512878417969, "step": 9410 }, { "epoch": 0.11, "learning_rate": 4.9975203524636685e-06, "logits/chosen": -3.108811855316162, "logits/rejected": -3.0472779273986816, "logps/chosen": -34.557212829589844, "logps/rejected": -593.052978515625, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": 0.1444544643163681, "rewards/margins": 5.687053680419922, "rewards/rejected": -5.542599201202393, "step": 9420 }, { "epoch": 0.11, "learning_rate": 4.997473619659262e-06, "logits/chosen": -3.1154723167419434, "logits/rejected": -3.0583183765411377, "logps/chosen": -26.845046997070312, "logps/rejected": -505.2361755371094, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": 0.16987289488315582, "rewards/margins": 4.824835777282715, "rewards/rejected": -4.65496301651001, "step": 9430 }, { "epoch": 0.11, "learning_rate": 4.997426450802899e-06, "logits/chosen": -3.127467632293701, "logits/rejected": -3.0112521648406982, "logps/chosen": -43.43376541137695, "logps/rejected": -582.5481567382812, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": 0.1290743350982666, "rewards/margins": 5.5520124435424805, "rewards/rejected": -5.422938346862793, "step": 9440 }, { "epoch": 0.11, "learning_rate": 4.997378845902813e-06, "logits/chosen": -3.121182918548584, "logits/rejected": -3.077118396759033, "logps/chosen": -24.059795379638672, "logps/rejected": -356.54254150390625, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": 0.1472131311893463, "rewards/margins": 3.3541769981384277, "rewards/rejected": -3.2069640159606934, "step": 9450 }, { "epoch": 0.11, "learning_rate": 4.9973308049673175e-06, "logits/chosen": -3.102726459503174, "logits/rejected": -3.0858750343322754, "logps/chosen": -36.48549270629883, "logps/rejected": -419.7886657714844, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": 0.07764170318841934, "rewards/margins": 3.9015800952911377, "rewards/rejected": -3.8239388465881348, "step": 9460 }, { "epoch": 0.11, "learning_rate": 4.997282328004799e-06, "logits/chosen": -3.0820770263671875, "logits/rejected": -3.0320184230804443, "logps/chosen": -82.78069305419922, "logps/rejected": -616.9734497070312, "loss": 0.1185, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2841911017894745, "rewards/margins": 5.498443603515625, "rewards/rejected": -5.782634735107422, "step": 9470 }, { "epoch": 0.11, "learning_rate": 4.997233415023722e-06, "logits/chosen": -3.0866246223449707, "logits/rejected": -2.989799737930298, "logps/chosen": -34.35099411010742, "logps/rejected": -578.9456787109375, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 0.12338165193796158, "rewards/margins": 5.515599250793457, "rewards/rejected": -5.392216682434082, "step": 9480 }, { "epoch": 0.11, "learning_rate": 4.997184066032627e-06, "logits/chosen": -3.068897247314453, "logits/rejected": -2.9827208518981934, "logps/chosen": -46.8503532409668, "logps/rejected": -526.923583984375, "loss": 0.1099, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.015874793753027916, "rewards/margins": 4.906674861907959, "rewards/rejected": -4.890799522399902, "step": 9490 }, { "epoch": 0.11, "learning_rate": 4.997134281040128e-06, "logits/chosen": -3.081599473953247, "logits/rejected": -3.0063154697418213, "logps/chosen": -51.89912033081055, "logps/rejected": -600.07275390625, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": 0.046885453164577484, "rewards/margins": 5.655783653259277, "rewards/rejected": -5.608898639678955, "step": 9500 }, { "epoch": 0.11, "learning_rate": 4.997084060054921e-06, "logits/chosen": -3.108860492706299, "logits/rejected": -3.070613145828247, "logps/chosen": -37.97473907470703, "logps/rejected": -502.4673767089844, "loss": 0.1223, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.01964651048183441, "rewards/margins": 4.664579391479492, "rewards/rejected": -4.644932746887207, "step": 9510 }, { "epoch": 0.11, "learning_rate": 4.997033403085771e-06, "logits/chosen": -3.0937399864196777, "logits/rejected": -3.066985607147217, "logps/chosen": -37.14159393310547, "logps/rejected": -470.419921875, "loss": 0.1824, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.05991704389452934, "rewards/margins": 4.391079425811768, "rewards/rejected": -4.331162452697754, "step": 9520 }, { "epoch": 0.11, "learning_rate": 4.996982310141526e-06, "logits/chosen": -3.1281590461730957, "logits/rejected": -3.080925703048706, "logps/chosen": -42.423667907714844, "logps/rejected": -353.93328857421875, "loss": 0.115, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03645015507936478, "rewards/margins": 3.1290929317474365, "rewards/rejected": -3.1655433177948, "step": 9530 }, { "epoch": 0.11, "learning_rate": 4.996930781231103e-06, "logits/chosen": -3.0803890228271484, "logits/rejected": -3.0408458709716797, "logps/chosen": -54.47527313232422, "logps/rejected": -616.0869140625, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": 0.017871806398034096, "rewards/margins": 5.775922775268555, "rewards/rejected": -5.758050441741943, "step": 9540 }, { "epoch": 0.11, "learning_rate": 4.996878816363501e-06, "logits/chosen": -3.087773323059082, "logits/rejected": -3.027182102203369, "logps/chosen": -54.97522735595703, "logps/rejected": -455.973876953125, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": -0.07938803732395172, "rewards/margins": 4.0954718589782715, "rewards/rejected": -4.174860000610352, "step": 9550 }, { "epoch": 0.11, "learning_rate": 4.996826415547792e-06, "logits/chosen": -3.0740859508514404, "logits/rejected": -3.0312671661376953, "logps/chosen": -28.73981285095215, "logps/rejected": -472.10064697265625, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": 0.1806071549654007, "rewards/margins": 4.5228681564331055, "rewards/rejected": -4.34226131439209, "step": 9560 }, { "epoch": 0.11, "learning_rate": 4.996773578793126e-06, "logits/chosen": -3.097409725189209, "logits/rejected": -3.0510010719299316, "logps/chosen": -44.1057014465332, "logps/rejected": -526.7113037109375, "loss": 0.1097, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.017473934218287468, "rewards/margins": 4.894179344177246, "rewards/rejected": -4.876705169677734, "step": 9570 }, { "epoch": 0.11, "learning_rate": 4.996720306108727e-06, "logits/chosen": -3.0676627159118652, "logits/rejected": -3.0234131813049316, "logps/chosen": -26.372995376586914, "logps/rejected": -538.0377197265625, "loss": 0.0664, "rewards/accuracies": 1.0, "rewards/chosen": 0.15862712264060974, "rewards/margins": 5.149519920349121, "rewards/rejected": -4.990893363952637, "step": 9580 }, { "epoch": 0.11, "learning_rate": 4.996666597503899e-06, "logits/chosen": -3.088256359100342, "logits/rejected": -3.051664352416992, "logps/chosen": -55.92671585083008, "logps/rejected": -523.4490356445312, "loss": 0.1124, "rewards/accuracies": 1.0, "rewards/chosen": -0.05339865759015083, "rewards/margins": 4.798871994018555, "rewards/rejected": -4.85227108001709, "step": 9590 }, { "epoch": 0.11, "learning_rate": 4.996612452988015e-06, "logits/chosen": -3.0561845302581787, "logits/rejected": -2.983477830886841, "logps/chosen": -74.31575012207031, "logps/rejected": -436.13690185546875, "loss": 0.0772, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.30368703603744507, "rewards/margins": 3.6816704273223877, "rewards/rejected": -3.9853577613830566, "step": 9600 }, { "epoch": 0.12, "learning_rate": 4.996557872570532e-06, "logits/chosen": -3.0983128547668457, "logits/rejected": -3.0449652671813965, "logps/chosen": -40.30735397338867, "logps/rejected": -607.51708984375, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": 0.08777575194835663, "rewards/margins": 5.770040512084961, "rewards/rejected": -5.682264804840088, "step": 9610 }, { "epoch": 0.12, "learning_rate": 4.996502856260977e-06, "logits/chosen": -3.0981574058532715, "logits/rejected": -3.0902788639068604, "logps/chosen": -53.92631912231445, "logps/rejected": -442.3843688964844, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -0.06870993226766586, "rewards/margins": 3.968400239944458, "rewards/rejected": -4.037110328674316, "step": 9620 }, { "epoch": 0.12, "learning_rate": 4.996447404068958e-06, "logits/chosen": -3.0737533569335938, "logits/rejected": -3.012042284011841, "logps/chosen": -45.40840148925781, "logps/rejected": -535.21240234375, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": 0.00278068776242435, "rewards/margins": 4.957952499389648, "rewards/rejected": -4.955172061920166, "step": 9630 }, { "epoch": 0.12, "learning_rate": 4.996391516004156e-06, "logits/chosen": -3.1388607025146484, "logits/rejected": -3.073657274246216, "logps/chosen": -57.30541229248047, "logps/rejected": -475.4881896972656, "loss": 0.053, "rewards/accuracies": 1.0, "rewards/chosen": -0.11996284872293472, "rewards/margins": 4.258890628814697, "rewards/rejected": -4.3788533210754395, "step": 9640 }, { "epoch": 0.12, "learning_rate": 4.996335192076328e-06, "logits/chosen": -3.100548267364502, "logits/rejected": -3.043271064758301, "logps/chosen": -62.177635192871094, "logps/rejected": -507.8081970214844, "loss": 0.1198, "rewards/accuracies": 1.0, "rewards/chosen": -0.17908969521522522, "rewards/margins": 4.51894998550415, "rewards/rejected": -4.6980390548706055, "step": 9650 }, { "epoch": 0.12, "learning_rate": 4.996278432295309e-06, "logits/chosen": -3.121060848236084, "logits/rejected": -3.0794105529785156, "logps/chosen": -47.4708137512207, "logps/rejected": -443.1006774902344, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": -0.02654564008116722, "rewards/margins": 4.031044006347656, "rewards/rejected": -4.057589530944824, "step": 9660 }, { "epoch": 0.12, "learning_rate": 4.996221236671009e-06, "logits/chosen": -3.1248562335968018, "logits/rejected": -3.0384764671325684, "logps/chosen": -63.725502014160156, "logps/rejected": -623.4607543945312, "loss": 0.103, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11655964702367783, "rewards/margins": 5.728701591491699, "rewards/rejected": -5.845261573791504, "step": 9670 }, { "epoch": 0.12, "learning_rate": 4.996163605213415e-06, "logits/chosen": -3.0648417472839355, "logits/rejected": -3.0222043991088867, "logps/chosen": -38.248233795166016, "logps/rejected": -459.86346435546875, "loss": 0.1171, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.08202281594276428, "rewards/margins": 4.306046485900879, "rewards/rejected": -4.224023818969727, "step": 9680 }, { "epoch": 0.12, "learning_rate": 4.996105537932587e-06, "logits/chosen": -3.1143970489501953, "logits/rejected": -3.0464377403259277, "logps/chosen": -39.40289306640625, "logps/rejected": -543.1620483398438, "loss": 0.1257, "rewards/accuracies": 1.0, "rewards/chosen": 0.08867505192756653, "rewards/margins": 5.126323223114014, "rewards/rejected": -5.0376482009887695, "step": 9690 }, { "epoch": 0.12, "learning_rate": 4.996047034838665e-06, "logits/chosen": -3.118537187576294, "logits/rejected": -3.1031970977783203, "logps/chosen": -26.563739776611328, "logps/rejected": -505.6805725097656, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": 0.1804017275571823, "rewards/margins": 4.860157489776611, "rewards/rejected": -4.679755210876465, "step": 9700 }, { "epoch": 0.12, "learning_rate": 4.995988095941864e-06, "logits/chosen": -3.1104893684387207, "logits/rejected": -3.0539956092834473, "logps/chosen": -57.29343795776367, "logps/rejected": -562.37109375, "loss": 0.1167, "rewards/accuracies": 1.0, "rewards/chosen": -0.018617892637848854, "rewards/margins": 5.192198276519775, "rewards/rejected": -5.210815906524658, "step": 9710 }, { "epoch": 0.12, "learning_rate": 4.995928721252473e-06, "logits/chosen": -3.0907845497131348, "logits/rejected": -3.0041353702545166, "logps/chosen": -28.669103622436523, "logps/rejected": -472.220703125, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.13220059871673584, "rewards/margins": 4.46567964553833, "rewards/rejected": -4.333478927612305, "step": 9720 }, { "epoch": 0.12, "learning_rate": 4.9958689107808585e-06, "logits/chosen": -3.107152223587036, "logits/rejected": -3.0611205101013184, "logps/chosen": -30.04239845275879, "logps/rejected": -437.7333068847656, "loss": 0.1215, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.05621261149644852, "rewards/margins": 4.0643415451049805, "rewards/rejected": -4.008129119873047, "step": 9730 }, { "epoch": 0.12, "learning_rate": 4.995808664537466e-06, "logits/chosen": -3.061871290206909, "logits/rejected": -3.0211336612701416, "logps/chosen": -43.817989349365234, "logps/rejected": -462.9325256347656, "loss": 0.1286, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.020222142338752747, "rewards/margins": 4.278084754943848, "rewards/rejected": -4.257862567901611, "step": 9740 }, { "epoch": 0.12, "learning_rate": 4.995747982532812e-06, "logits/chosen": -3.0900111198425293, "logits/rejected": -3.011345863342285, "logps/chosen": -181.1084442138672, "logps/rejected": -515.3560791015625, "loss": 0.1858, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.3782809972763062, "rewards/margins": 3.4017879962921143, "rewards/rejected": -4.780069351196289, "step": 9750 }, { "epoch": 0.12, "learning_rate": 4.995686864777491e-06, "logits/chosen": -3.1242547035217285, "logits/rejected": -3.0612988471984863, "logps/chosen": -88.85496520996094, "logps/rejected": -548.0294799804688, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -0.45649680495262146, "rewards/margins": 4.636321067810059, "rewards/rejected": -5.092817783355713, "step": 9760 }, { "epoch": 0.12, "learning_rate": 4.995625311282175e-06, "logits/chosen": -3.12023663520813, "logits/rejected": -3.079202651977539, "logps/chosen": -25.213308334350586, "logps/rejected": -383.92987060546875, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": 0.1946069747209549, "rewards/margins": 3.6492671966552734, "rewards/rejected": -3.454660415649414, "step": 9770 }, { "epoch": 0.12, "learning_rate": 4.995563322057612e-06, "logits/chosen": -3.1041946411132812, "logits/rejected": -3.040889024734497, "logps/chosen": -48.02218246459961, "logps/rejected": -443.6295471191406, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": 0.03691168501973152, "rewards/margins": 4.08181095123291, "rewards/rejected": -4.0448994636535645, "step": 9780 }, { "epoch": 0.12, "learning_rate": 4.995500897114623e-06, "logits/chosen": -3.1110198497772217, "logits/rejected": -3.065321683883667, "logps/chosen": -47.56570816040039, "logps/rejected": -645.7523193359375, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": 0.08979083597660065, "rewards/margins": 6.126394748687744, "rewards/rejected": -6.036604404449463, "step": 9790 }, { "epoch": 0.12, "learning_rate": 4.995438036464109e-06, "logits/chosen": -3.096026659011841, "logits/rejected": -3.044074296951294, "logps/chosen": -54.19709014892578, "logps/rejected": -435.7903747558594, "loss": 0.1875, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12252205610275269, "rewards/margins": 3.8640294075012207, "rewards/rejected": -3.986551284790039, "step": 9800 }, { "epoch": 0.12, "learning_rate": 4.995374740117044e-06, "logits/chosen": -3.068988561630249, "logits/rejected": -3.012259006500244, "logps/chosen": -40.68684387207031, "logps/rejected": -516.8187255859375, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": 0.09245269000530243, "rewards/margins": 4.8722639083862305, "rewards/rejected": -4.779810905456543, "step": 9810 }, { "epoch": 0.12, "learning_rate": 4.99531100808448e-06, "logits/chosen": -3.086392879486084, "logits/rejected": -3.044245481491089, "logps/chosen": -46.570030212402344, "logps/rejected": -638.1939697265625, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": 0.060548000037670135, "rewards/margins": 6.051846981048584, "rewards/rejected": -5.991298675537109, "step": 9820 }, { "epoch": 0.12, "learning_rate": 4.995246840377546e-06, "logits/chosen": -3.0838165283203125, "logits/rejected": -3.0229380130767822, "logps/chosen": -36.02112579345703, "logps/rejected": -583.2185668945312, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": 0.14099325239658356, "rewards/margins": 5.587338924407959, "rewards/rejected": -5.446345329284668, "step": 9830 }, { "epoch": 0.12, "learning_rate": 4.995182237007442e-06, "logits/chosen": -3.137479782104492, "logits/rejected": -3.0596442222595215, "logps/chosen": -56.3624153137207, "logps/rejected": -720.2185668945312, "loss": 0.0902, "rewards/accuracies": 1.0, "rewards/chosen": -0.03714904561638832, "rewards/margins": 6.746330261230469, "rewards/rejected": -6.783480167388916, "step": 9840 }, { "epoch": 0.12, "learning_rate": 4.99511719798545e-06, "logits/chosen": -3.0820820331573486, "logits/rejected": -3.0420308113098145, "logps/chosen": -41.928409576416016, "logps/rejected": -529.2315673828125, "loss": 0.1127, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.011098278686404228, "rewards/margins": 4.928018569946289, "rewards/rejected": -4.9169206619262695, "step": 9850 }, { "epoch": 0.12, "learning_rate": 4.9950517233229255e-06, "logits/chosen": -3.112757682800293, "logits/rejected": -3.0316967964172363, "logps/chosen": -65.57611846923828, "logps/rejected": -609.4805908203125, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -0.09056820720434189, "rewards/margins": 5.619215488433838, "rewards/rejected": -5.709784507751465, "step": 9860 }, { "epoch": 0.12, "learning_rate": 4.994985813031299e-06, "logits/chosen": -3.1345882415771484, "logits/rejected": -3.080371141433716, "logps/chosen": -38.61627960205078, "logps/rejected": -547.191162109375, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": 0.0948125347495079, "rewards/margins": 5.182926177978516, "rewards/rejected": -5.088113784790039, "step": 9870 }, { "epoch": 0.12, "learning_rate": 4.99491946712208e-06, "logits/chosen": -3.114503860473633, "logits/rejected": -3.0841739177703857, "logps/chosen": -30.625646591186523, "logps/rejected": -516.3011474609375, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": 0.11237196624279022, "rewards/margins": 4.888161659240723, "rewards/rejected": -4.775789260864258, "step": 9880 }, { "epoch": 0.12, "learning_rate": 4.994852685606851e-06, "logits/chosen": -3.092012643814087, "logits/rejected": -3.000831127166748, "logps/chosen": -42.562313079833984, "logps/rejected": -657.3818359375, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": 0.10528498888015747, "rewards/margins": 6.286776065826416, "rewards/rejected": -6.181490898132324, "step": 9890 }, { "epoch": 0.12, "learning_rate": 4.994785468497273e-06, "logits/chosen": -3.134403705596924, "logits/rejected": -3.1018166542053223, "logps/chosen": -36.904510498046875, "logps/rejected": -395.11260986328125, "loss": 0.1538, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.05188601464033127, "rewards/margins": 3.631653308868408, "rewards/rejected": -3.5797667503356934, "step": 9900 }, { "epoch": 0.12, "learning_rate": 4.99471781580508e-06, "logits/chosen": -3.0727832317352295, "logits/rejected": -3.042612314224243, "logps/chosen": -27.206274032592773, "logps/rejected": -583.909912109375, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.18213313817977905, "rewards/margins": 5.633699893951416, "rewards/rejected": -5.451565742492676, "step": 9910 }, { "epoch": 0.12, "learning_rate": 4.994649727542086e-06, "logits/chosen": -3.0824756622314453, "logits/rejected": -3.0068912506103516, "logps/chosen": -47.51868438720703, "logps/rejected": -522.3851928710938, "loss": 0.0768, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.013863801956176758, "rewards/margins": 4.815337181091309, "rewards/rejected": -4.829200267791748, "step": 9920 }, { "epoch": 0.12, "learning_rate": 4.994581203720178e-06, "logits/chosen": -3.102545976638794, "logits/rejected": -3.038785457611084, "logps/chosen": -59.21602249145508, "logps/rejected": -592.2791748046875, "loss": 0.1286, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.07001825422048569, "rewards/margins": 5.456835746765137, "rewards/rejected": -5.526854038238525, "step": 9930 }, { "epoch": 0.12, "learning_rate": 4.99451224435132e-06, "logits/chosen": -3.1176509857177734, "logits/rejected": -3.0560479164123535, "logps/chosen": -83.25889587402344, "logps/rejected": -742.7366943359375, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -0.29775285720825195, "rewards/margins": 6.709317207336426, "rewards/rejected": -7.0070695877075195, "step": 9940 }, { "epoch": 0.12, "learning_rate": 4.994442849447553e-06, "logits/chosen": -3.1035566329956055, "logits/rejected": -3.031395673751831, "logps/chosen": -46.54660415649414, "logps/rejected": -491.84918212890625, "loss": 0.1076, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.014241328462958336, "rewards/margins": 4.53000020980835, "rewards/rejected": -4.544240951538086, "step": 9950 }, { "epoch": 0.12, "learning_rate": 4.994373019020991e-06, "logits/chosen": -3.1495518684387207, "logits/rejected": -3.106752634048462, "logps/chosen": -46.318504333496094, "logps/rejected": -636.0123291015625, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.004385144915431738, "rewards/margins": 5.962582588195801, "rewards/rejected": -5.966967582702637, "step": 9960 }, { "epoch": 0.12, "learning_rate": 4.994302753083828e-06, "logits/chosen": -3.1131319999694824, "logits/rejected": -3.077141523361206, "logps/chosen": -72.18194580078125, "logps/rejected": -440.1165466308594, "loss": 0.1855, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.28130975365638733, "rewards/margins": 3.7157058715820312, "rewards/rejected": -3.9970154762268066, "step": 9970 }, { "epoch": 0.12, "learning_rate": 4.994232051648333e-06, "logits/chosen": -3.150320291519165, "logits/rejected": -3.1078174114227295, "logps/chosen": -94.88713836669922, "logps/rejected": -652.3267822265625, "loss": 0.1155, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.37710654735565186, "rewards/margins": 5.741892337799072, "rewards/rejected": -6.118999481201172, "step": 9980 }, { "epoch": 0.12, "learning_rate": 4.994160914726849e-06, "logits/chosen": -3.1189792156219482, "logits/rejected": -3.0598883628845215, "logps/chosen": -31.401714324951172, "logps/rejected": -524.9768676757812, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": 0.12779447436332703, "rewards/margins": 4.997411727905273, "rewards/rejected": -4.869617462158203, "step": 9990 }, { "epoch": 0.12, "learning_rate": 4.994089342331796e-06, "logits/chosen": -3.093461513519287, "logits/rejected": -3.036339282989502, "logps/chosen": -37.100830078125, "logps/rejected": -496.544189453125, "loss": 0.136, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.08991717547178268, "rewards/margins": 4.667092323303223, "rewards/rejected": -4.577175140380859, "step": 10000 }, { "epoch": 0.12, "learning_rate": 4.994017334475671e-06, "logits/chosen": -3.113694667816162, "logits/rejected": -3.0662193298339844, "logps/chosen": -53.00580978393555, "logps/rejected": -478.31976318359375, "loss": 0.0731, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0588190034031868, "rewards/margins": 4.337172508239746, "rewards/rejected": -4.39599084854126, "step": 10010 }, { "epoch": 0.12, "learning_rate": 4.9939448911710465e-06, "logits/chosen": -3.128255605697632, "logits/rejected": -3.063324451446533, "logps/chosen": -28.316898345947266, "logps/rejected": -480.79351806640625, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": 0.13202282786369324, "rewards/margins": 4.560511589050293, "rewards/rejected": -4.428488731384277, "step": 10020 }, { "epoch": 0.12, "learning_rate": 4.993872012430571e-06, "logits/chosen": -3.1458778381347656, "logits/rejected": -3.097365140914917, "logps/chosen": -43.079689025878906, "logps/rejected": -557.85205078125, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": 0.04513826593756676, "rewards/margins": 5.233662128448486, "rewards/rejected": -5.188523292541504, "step": 10030 }, { "epoch": 0.12, "learning_rate": 4.9937986982669675e-06, "logits/chosen": -3.0713837146759033, "logits/rejected": -3.0261051654815674, "logps/chosen": -37.673362731933594, "logps/rejected": -570.7185668945312, "loss": 0.118, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.05858408287167549, "rewards/margins": 5.380268573760986, "rewards/rejected": -5.321684837341309, "step": 10040 }, { "epoch": 0.12, "learning_rate": 4.9937249486930386e-06, "logits/chosen": -3.1169304847717285, "logits/rejected": -3.073143482208252, "logps/chosen": -22.785383224487305, "logps/rejected": -551.923828125, "loss": 0.0742, "rewards/accuracies": 1.0, "rewards/chosen": 0.18637219071388245, "rewards/margins": 5.319610595703125, "rewards/rejected": -5.133237838745117, "step": 10050 }, { "epoch": 0.12, "learning_rate": 4.993650763721659e-06, "logits/chosen": -3.1012725830078125, "logits/rejected": -2.9699621200561523, "logps/chosen": -43.2763557434082, "logps/rejected": -779.4473266601562, "loss": 0.1057, "rewards/accuracies": 1.0, "rewards/chosen": 0.11425008624792099, "rewards/margins": 7.501269340515137, "rewards/rejected": -7.387020111083984, "step": 10060 }, { "epoch": 0.12, "learning_rate": 4.993576143365782e-06, "logits/chosen": -3.1162452697753906, "logits/rejected": -3.0597102642059326, "logps/chosen": -85.83900451660156, "logps/rejected": -638.8579711914062, "loss": 0.1966, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2937815487384796, "rewards/margins": 5.6883673667907715, "rewards/rejected": -5.98214864730835, "step": 10070 }, { "epoch": 0.12, "learning_rate": 4.993501087638436e-06, "logits/chosen": -3.099315643310547, "logits/rejected": -3.0652685165405273, "logps/chosen": -38.702003479003906, "logps/rejected": -513.4869384765625, "loss": 0.1225, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.02461088076233864, "rewards/margins": 4.774284362792969, "rewards/rejected": -4.749673843383789, "step": 10080 }, { "epoch": 0.12, "learning_rate": 4.993425596552726e-06, "logits/chosen": -3.099202871322632, "logits/rejected": -3.000969409942627, "logps/chosen": -57.56114959716797, "logps/rejected": -670.95751953125, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001636207161936909, "rewards/margins": 6.304862022399902, "rewards/rejected": -6.305025577545166, "step": 10090 }, { "epoch": 0.12, "learning_rate": 4.9933496701218305e-06, "logits/chosen": -3.1179699897766113, "logits/rejected": -3.068160057067871, "logps/chosen": -50.11964416503906, "logps/rejected": -625.817138671875, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": 0.041657786816358566, "rewards/margins": 5.884416103363037, "rewards/rejected": -5.842759132385254, "step": 10100 }, { "epoch": 0.12, "learning_rate": 4.993273308359009e-06, "logits/chosen": -3.1100354194641113, "logits/rejected": -3.070234775543213, "logps/chosen": -26.839595794677734, "logps/rejected": -509.26995849609375, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": 0.1389160305261612, "rewards/margins": 4.8598313331604, "rewards/rejected": -4.7209153175354, "step": 10110 }, { "epoch": 0.12, "learning_rate": 4.993196511277593e-06, "logits/chosen": -3.1092495918273926, "logits/rejected": -3.0679216384887695, "logps/chosen": -27.383188247680664, "logps/rejected": -317.3639831542969, "loss": 0.1106, "rewards/accuracies": 1.0, "rewards/chosen": 0.1968248188495636, "rewards/margins": 3.0100226402282715, "rewards/rejected": -2.813197612762451, "step": 10120 }, { "epoch": 0.12, "learning_rate": 4.993119278890989e-06, "logits/chosen": -3.0766639709472656, "logits/rejected": -2.9962997436523438, "logps/chosen": -33.87058639526367, "logps/rejected": -513.3701171875, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.16633550822734833, "rewards/margins": 4.914067268371582, "rewards/rejected": -4.747731685638428, "step": 10130 }, { "epoch": 0.12, "learning_rate": 4.993041611212685e-06, "logits/chosen": -3.0798211097717285, "logits/rejected": -3.013777256011963, "logps/chosen": -74.3489761352539, "logps/rejected": -436.87255859375, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": -0.27745527029037476, "rewards/margins": 3.693361759185791, "rewards/rejected": -3.9708168506622314, "step": 10140 }, { "epoch": 0.12, "learning_rate": 4.992963508256239e-06, "logits/chosen": -3.058866262435913, "logits/rejected": -3.0352468490600586, "logps/chosen": -46.849647521972656, "logps/rejected": -418.0255432128906, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": -0.03038623556494713, "rewards/margins": 3.770111560821533, "rewards/rejected": -3.8004977703094482, "step": 10150 }, { "epoch": 0.12, "learning_rate": 4.992884970035288e-06, "logits/chosen": -3.063007354736328, "logits/rejected": -3.0130977630615234, "logps/chosen": -74.05130767822266, "logps/rejected": -614.4411010742188, "loss": 0.1166, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2668324112892151, "rewards/margins": 5.469324111938477, "rewards/rejected": -5.736156940460205, "step": 10160 }, { "epoch": 0.12, "learning_rate": 4.9928059965635454e-06, "logits/chosen": -3.1245670318603516, "logits/rejected": -3.0423734188079834, "logps/chosen": -48.32783126831055, "logps/rejected": -552.3087768554688, "loss": 0.052, "rewards/accuracies": 1.0, "rewards/chosen": -0.028189634904265404, "rewards/margins": 5.111468315124512, "rewards/rejected": -5.1396589279174805, "step": 10170 }, { "epoch": 0.12, "learning_rate": 4.992726587854799e-06, "logits/chosen": -3.092886447906494, "logits/rejected": -3.026502847671509, "logps/chosen": -44.92804718017578, "logps/rejected": -415.2532653808594, "loss": 0.1201, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.055354051291942596, "rewards/margins": 3.728121280670166, "rewards/rejected": -3.783475160598755, "step": 10180 }, { "epoch": 0.12, "learning_rate": 4.9926467439229145e-06, "logits/chosen": -3.1254851818084717, "logits/rejected": -3.0598349571228027, "logps/chosen": -44.665950775146484, "logps/rejected": -493.512939453125, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": 0.026278335601091385, "rewards/margins": 4.57933235168457, "rewards/rejected": -4.553053855895996, "step": 10190 }, { "epoch": 0.12, "learning_rate": 4.992566464781831e-06, "logits/chosen": -3.104410171508789, "logits/rejected": -3.013306140899658, "logps/chosen": -65.92181396484375, "logps/rejected": -534.7981567382812, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": -0.18136736750602722, "rewards/margins": 4.760345458984375, "rewards/rejected": -4.941713809967041, "step": 10200 }, { "epoch": 0.12, "learning_rate": 4.992485750445565e-06, "logits/chosen": -3.0866217613220215, "logits/rejected": -3.053095579147339, "logps/chosen": -50.38914489746094, "logps/rejected": -603.2244873046875, "loss": 0.1048, "rewards/accuracies": 1.0, "rewards/chosen": -0.060181695967912674, "rewards/margins": 5.578530788421631, "rewards/rejected": -5.6387128829956055, "step": 10210 }, { "epoch": 0.12, "learning_rate": 4.99240460092821e-06, "logits/chosen": -3.0877833366394043, "logits/rejected": -3.0572445392608643, "logps/chosen": -35.07377243041992, "logps/rejected": -503.72039794921875, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": 0.07718031108379364, "rewards/margins": 4.733306884765625, "rewards/rejected": -4.656126499176025, "step": 10220 }, { "epoch": 0.12, "learning_rate": 4.992323016243934e-06, "logits/chosen": -3.103508472442627, "logits/rejected": -3.063075304031372, "logps/chosen": -49.46389389038086, "logps/rejected": -713.6676635742188, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012478232383728027, "rewards/margins": 6.727308750152588, "rewards/rejected": -6.7274346351623535, "step": 10230 }, { "epoch": 0.12, "learning_rate": 4.992240996406981e-06, "logits/chosen": -3.105363368988037, "logits/rejected": -3.0578417778015137, "logps/chosen": -47.640907287597656, "logps/rejected": -443.64520263671875, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": -0.06662555038928986, "rewards/margins": 3.9911117553710938, "rewards/rejected": -4.057737350463867, "step": 10240 }, { "epoch": 0.12, "learning_rate": 4.992158541431672e-06, "logits/chosen": -3.0812864303588867, "logits/rejected": -2.9962785243988037, "logps/chosen": -44.67341232299805, "logps/rejected": -547.2618408203125, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": 0.12007106840610504, "rewards/margins": 5.203692436218262, "rewards/rejected": -5.083621978759766, "step": 10250 }, { "epoch": 0.12, "learning_rate": 4.9920756513324044e-06, "logits/chosen": -3.1487317085266113, "logits/rejected": -3.0888493061065674, "logps/chosen": -34.41770935058594, "logps/rejected": -500.634033203125, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": 0.1119321957230568, "rewards/margins": 4.737724304199219, "rewards/rejected": -4.625792503356934, "step": 10260 }, { "epoch": 0.12, "learning_rate": 4.991992326123648e-06, "logits/chosen": -3.158583879470825, "logits/rejected": -3.1039576530456543, "logps/chosen": -77.82080841064453, "logps/rejected": -583.6700439453125, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": -0.30031758546829224, "rewards/margins": 5.138144493103027, "rewards/rejected": -5.438462257385254, "step": 10270 }, { "epoch": 0.12, "learning_rate": 4.991908565819954e-06, "logits/chosen": -3.1309056282043457, "logits/rejected": -3.064713478088379, "logps/chosen": -24.468467712402344, "logps/rejected": -510.3248596191406, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.15007884800434113, "rewards/margins": 4.87869930267334, "rewards/rejected": -4.7286200523376465, "step": 10280 }, { "epoch": 0.12, "learning_rate": 4.9918243704359445e-06, "logits/chosen": -3.105663776397705, "logits/rejected": -3.0322818756103516, "logps/chosen": -43.28764724731445, "logps/rejected": -654.1741943359375, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": 0.10268578678369522, "rewards/margins": 6.248574733734131, "rewards/rejected": -6.1458892822265625, "step": 10290 }, { "epoch": 0.12, "learning_rate": 4.991739739986322e-06, "logits/chosen": -3.0991616249084473, "logits/rejected": -3.060882091522217, "logps/chosen": -23.974706649780273, "logps/rejected": -470.2857971191406, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": 0.16606658697128296, "rewards/margins": 4.498885154724121, "rewards/rejected": -4.332818031311035, "step": 10300 }, { "epoch": 0.12, "learning_rate": 4.99165467448586e-06, "logits/chosen": -3.1024813652038574, "logits/rejected": -3.06400728225708, "logps/chosen": -34.112449645996094, "logps/rejected": -500.55987548828125, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": 0.09455601871013641, "rewards/margins": 4.718436241149902, "rewards/rejected": -4.623879432678223, "step": 10310 }, { "epoch": 0.12, "learning_rate": 4.9915691739494135e-06, "logits/chosen": -3.107886552810669, "logits/rejected": -3.078371286392212, "logps/chosen": -45.43115234375, "logps/rejected": -355.7124328613281, "loss": 0.2259, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07053013145923615, "rewards/margins": 3.1185085773468018, "rewards/rejected": -3.1890387535095215, "step": 10320 }, { "epoch": 0.12, "learning_rate": 4.991483238391908e-06, "logits/chosen": -3.1140122413635254, "logits/rejected": -3.0834157466888428, "logps/chosen": -39.84809112548828, "logps/rejected": -426.66571044921875, "loss": 0.1204, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.026691162958741188, "rewards/margins": 3.8774890899658203, "rewards/rejected": -3.9041800498962402, "step": 10330 }, { "epoch": 0.12, "learning_rate": 4.991396867828349e-06, "logits/chosen": -3.088259696960449, "logits/rejected": -3.025696277618408, "logps/chosen": -45.522254943847656, "logps/rejected": -511.1194763183594, "loss": 0.0602, "rewards/accuracies": 1.0, "rewards/chosen": 0.013153878040611744, "rewards/margins": 4.750260353088379, "rewards/rejected": -4.737105369567871, "step": 10340 }, { "epoch": 0.12, "learning_rate": 4.991310062273816e-06, "logits/chosen": -3.1248528957366943, "logits/rejected": -3.115192413330078, "logps/chosen": -21.85077667236328, "logps/rejected": -485.730224609375, "loss": 0.1225, "rewards/accuracies": 1.0, "rewards/chosen": 0.1997031271457672, "rewards/margins": 4.686652183532715, "rewards/rejected": -4.4869489669799805, "step": 10350 }, { "epoch": 0.12, "learning_rate": 4.9912228217434665e-06, "logits/chosen": -3.083552122116089, "logits/rejected": -3.012363910675049, "logps/chosen": -35.527793884277344, "logps/rejected": -486.94287109375, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": 0.1074819415807724, "rewards/margins": 4.587344169616699, "rewards/rejected": -4.479861259460449, "step": 10360 }, { "epoch": 0.12, "learning_rate": 4.991135146252532e-06, "logits/chosen": -3.0537397861480713, "logits/rejected": -2.99229097366333, "logps/chosen": -44.33600997924805, "logps/rejected": -422.7401428222656, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": 0.018345486372709274, "rewards/margins": 3.875598192214966, "rewards/rejected": -3.857253313064575, "step": 10370 }, { "epoch": 0.12, "learning_rate": 4.9910470358163185e-06, "logits/chosen": -3.1227269172668457, "logits/rejected": -3.039424419403076, "logps/chosen": -33.50871658325195, "logps/rejected": -724.09814453125, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 0.12513796985149384, "rewards/margins": 6.981745719909668, "rewards/rejected": -6.8566083908081055, "step": 10380 }, { "epoch": 0.12, "learning_rate": 4.990958490450211e-06, "logits/chosen": -3.119680404663086, "logits/rejected": -3.079010486602783, "logps/chosen": -36.343257904052734, "logps/rejected": -576.8023071289062, "loss": 0.1022, "rewards/accuracies": 1.0, "rewards/chosen": 0.12254683673381805, "rewards/margins": 5.511857986450195, "rewards/rejected": -5.389310836791992, "step": 10390 }, { "epoch": 0.12, "learning_rate": 4.9908695101696705e-06, "logits/chosen": -3.106738567352295, "logits/rejected": -3.0565686225891113, "logps/chosen": -34.890403747558594, "logps/rejected": -622.0672607421875, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": 0.14978353679180145, "rewards/margins": 5.987652778625488, "rewards/rejected": -5.837869644165039, "step": 10400 }, { "epoch": 0.12, "learning_rate": 4.99078009499023e-06, "logits/chosen": -3.104825258255005, "logits/rejected": -3.0320334434509277, "logps/chosen": -40.4833869934082, "logps/rejected": -651.89990234375, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": 0.11230071634054184, "rewards/margins": 6.231534957885742, "rewards/rejected": -6.11923360824585, "step": 10410 }, { "epoch": 0.12, "learning_rate": 4.990690244927504e-06, "logits/chosen": -3.1309361457824707, "logits/rejected": -3.088810443878174, "logps/chosen": -36.612735748291016, "logps/rejected": -574.0933837890625, "loss": 0.1176, "rewards/accuracies": 1.0, "rewards/chosen": 0.09774967283010483, "rewards/margins": 5.467617988586426, "rewards/rejected": -5.369868278503418, "step": 10420 }, { "epoch": 0.12, "learning_rate": 4.9905999599971785e-06, "logits/chosen": -3.055156946182251, "logits/rejected": -2.9663493633270264, "logps/chosen": -42.56254959106445, "logps/rejected": -734.9066772460938, "loss": 0.0617, "rewards/accuracies": 1.0, "rewards/chosen": 0.12463551759719849, "rewards/margins": 7.065482139587402, "rewards/rejected": -6.940846920013428, "step": 10430 }, { "epoch": 0.12, "learning_rate": 4.990509240215018e-06, "logits/chosen": -3.116446018218994, "logits/rejected": -3.0476956367492676, "logps/chosen": -35.03488540649414, "logps/rejected": -562.0808715820312, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": 0.13380563259124756, "rewards/margins": 5.368826866149902, "rewards/rejected": -5.235021114349365, "step": 10440 }, { "epoch": 0.13, "learning_rate": 4.99041808559686e-06, "logits/chosen": -3.139524221420288, "logits/rejected": -3.0805039405822754, "logps/chosen": -43.096099853515625, "logps/rejected": -612.4402465820312, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": 0.055222440510988235, "rewards/margins": 5.780508995056152, "rewards/rejected": -5.725286960601807, "step": 10450 }, { "epoch": 0.13, "learning_rate": 4.990326496158622e-06, "logits/chosen": -3.110816478729248, "logits/rejected": -3.058551073074341, "logps/chosen": -33.75818634033203, "logps/rejected": -588.5872192382812, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": 0.07224917411804199, "rewards/margins": 5.578825950622559, "rewards/rejected": -5.5065765380859375, "step": 10460 }, { "epoch": 0.13, "learning_rate": 4.990234471916294e-06, "logits/chosen": -3.121601104736328, "logits/rejected": -3.0765278339385986, "logps/chosen": -31.503936767578125, "logps/rejected": -639.1619873046875, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": 0.14361660182476044, "rewards/margins": 6.145869255065918, "rewards/rejected": -6.002253532409668, "step": 10470 }, { "epoch": 0.13, "learning_rate": 4.990142012885944e-06, "logits/chosen": -3.1193485260009766, "logits/rejected": -3.0477945804595947, "logps/chosen": -53.670013427734375, "logps/rejected": -554.0449829101562, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -0.03221739083528519, "rewards/margins": 5.122265815734863, "rewards/rejected": -5.154482841491699, "step": 10480 }, { "epoch": 0.13, "learning_rate": 4.9900491190837144e-06, "logits/chosen": -3.101508378982544, "logits/rejected": -3.055380344390869, "logps/chosen": -41.232696533203125, "logps/rejected": -539.0328369140625, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": 0.07691161334514618, "rewards/margins": 5.077730655670166, "rewards/rejected": -5.000819206237793, "step": 10490 }, { "epoch": 0.13, "learning_rate": 4.989955790525824e-06, "logits/chosen": -3.1060292720794678, "logits/rejected": -3.0398736000061035, "logps/chosen": -66.17607116699219, "logps/rejected": -473.596435546875, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": -0.11010535061359406, "rewards/margins": 4.2318220138549805, "rewards/rejected": -4.341927528381348, "step": 10500 }, { "epoch": 0.13, "learning_rate": 4.989862027228568e-06, "logits/chosen": -3.129974603652954, "logits/rejected": -3.093356132507324, "logps/chosen": -41.436126708984375, "logps/rejected": -569.6881103515625, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": 0.020462552085518837, "rewards/margins": 5.322558403015137, "rewards/rejected": -5.302096366882324, "step": 10510 }, { "epoch": 0.13, "learning_rate": 4.989767829208319e-06, "logits/chosen": -3.094325304031372, "logits/rejected": -3.0342769622802734, "logps/chosen": -50.83086395263672, "logps/rejected": -640.2390747070312, "loss": 0.0902, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.006906820926815271, "rewards/margins": 5.982285499572754, "rewards/rejected": -5.989192008972168, "step": 10520 }, { "epoch": 0.13, "learning_rate": 4.98967319648152e-06, "logits/chosen": -3.084149122238159, "logits/rejected": -3.0100722312927246, "logps/chosen": -54.25873947143555, "logps/rejected": -702.1495971679688, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": -0.07906418293714523, "rewards/margins": 6.5513105392456055, "rewards/rejected": -6.630374908447266, "step": 10530 }, { "epoch": 0.13, "learning_rate": 4.989578129064697e-06, "logits/chosen": -3.0783257484436035, "logits/rejected": -2.950208902359009, "logps/chosen": -53.82727813720703, "logps/rejected": -745.6353759765625, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": 0.0354827418923378, "rewards/margins": 7.083052635192871, "rewards/rejected": -7.047570705413818, "step": 10540 }, { "epoch": 0.13, "learning_rate": 4.989482626974446e-06, "logits/chosen": -3.0959720611572266, "logits/rejected": -3.0695154666900635, "logps/chosen": -33.71475601196289, "logps/rejected": -504.8939514160156, "loss": 0.0466, "rewards/accuracies": 1.0, "rewards/chosen": 0.044828955084085464, "rewards/margins": 4.71488094329834, "rewards/rejected": -4.6700520515441895, "step": 10550 }, { "epoch": 0.13, "learning_rate": 4.989386690227443e-06, "logits/chosen": -3.14998722076416, "logits/rejected": -3.0447747707366943, "logps/chosen": -99.33876037597656, "logps/rejected": -615.6980590820312, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": -0.38092947006225586, "rewards/margins": 5.389049530029297, "rewards/rejected": -5.769979476928711, "step": 10560 }, { "epoch": 0.13, "learning_rate": 4.9892903188404384e-06, "logits/chosen": -3.1218652725219727, "logits/rejected": -3.0462114810943604, "logps/chosen": -61.77067184448242, "logps/rejected": -666.9874267578125, "loss": 0.1073, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1443430632352829, "rewards/margins": 6.128988742828369, "rewards/rejected": -6.273331642150879, "step": 10570 }, { "epoch": 0.13, "learning_rate": 4.989193512830257e-06, "logits/chosen": -3.1238980293273926, "logits/rejected": -3.083395481109619, "logps/chosen": -56.0390739440918, "logps/rejected": -564.7879638671875, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": -0.11747226864099503, "rewards/margins": 5.137998580932617, "rewards/rejected": -5.255471229553223, "step": 10580 }, { "epoch": 0.13, "learning_rate": 4.989096272213802e-06, "logits/chosen": -3.0941526889801025, "logits/rejected": -3.0482490062713623, "logps/chosen": -35.675777435302734, "logps/rejected": -608.2310791015625, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": 0.08984716236591339, "rewards/margins": 5.782767295837402, "rewards/rejected": -5.692920207977295, "step": 10590 }, { "epoch": 0.13, "learning_rate": 4.988998597008053e-06, "logits/chosen": -3.084986448287964, "logits/rejected": -3.0335986614227295, "logps/chosen": -45.739864349365234, "logps/rejected": -673.0084838867188, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": 0.03503502160310745, "rewards/margins": 6.364222049713135, "rewards/rejected": -6.329186916351318, "step": 10600 }, { "epoch": 0.13, "learning_rate": 4.98890048723006e-06, "logits/chosen": -3.115243434906006, "logits/rejected": -3.069667339324951, "logps/chosen": -52.33440017700195, "logps/rejected": -501.7870178222656, "loss": 0.1879, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.017121003940701485, "rewards/margins": 4.64357852935791, "rewards/rejected": -4.626458168029785, "step": 10610 }, { "epoch": 0.13, "learning_rate": 4.988801942896954e-06, "logits/chosen": -3.1287944316864014, "logits/rejected": -3.073147773742676, "logps/chosen": -34.927345275878906, "logps/rejected": -612.7021484375, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": 0.0992933139204979, "rewards/margins": 5.817183017730713, "rewards/rejected": -5.717889308929443, "step": 10620 }, { "epoch": 0.13, "learning_rate": 4.9887029640259424e-06, "logits/chosen": -3.1248176097869873, "logits/rejected": -3.036694288253784, "logps/chosen": -40.669071197509766, "logps/rejected": -702.2918701171875, "loss": 0.109, "rewards/accuracies": 1.0, "rewards/chosen": 0.14772556722164154, "rewards/margins": 6.76843786239624, "rewards/rejected": -6.620713233947754, "step": 10630 }, { "epoch": 0.13, "learning_rate": 4.988603550634304e-06, "logits/chosen": -3.1421422958374023, "logits/rejected": -3.034985065460205, "logps/chosen": -60.0167350769043, "logps/rejected": -842.3880004882812, "loss": 0.0519, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.047603800892829895, "rewards/margins": 7.963967323303223, "rewards/rejected": -8.011570930480957, "step": 10640 }, { "epoch": 0.13, "learning_rate": 4.988503702739399e-06, "logits/chosen": -3.1008057594299316, "logits/rejected": -3.035677433013916, "logps/chosen": -26.650991439819336, "logps/rejected": -640.5137939453125, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": 0.17152856290340424, "rewards/margins": 6.190362930297852, "rewards/rejected": -6.018834114074707, "step": 10650 }, { "epoch": 0.13, "learning_rate": 4.988403420358657e-06, "logits/chosen": -3.1144278049468994, "logits/rejected": -3.0210280418395996, "logps/chosen": -44.675113677978516, "logps/rejected": -479.199462890625, "loss": 0.1374, "rewards/accuracies": 1.0, "rewards/chosen": 0.013655567541718483, "rewards/margins": 4.420536994934082, "rewards/rejected": -4.406881809234619, "step": 10660 }, { "epoch": 0.13, "learning_rate": 4.988302703509591e-06, "logits/chosen": -3.106635570526123, "logits/rejected": -3.024021625518799, "logps/chosen": -49.72163391113281, "logps/rejected": -691.4583740234375, "loss": 0.1093, "rewards/accuracies": 1.0, "rewards/chosen": -0.012156933546066284, "rewards/margins": 6.502338409423828, "rewards/rejected": -6.514494895935059, "step": 10670 }, { "epoch": 0.13, "learning_rate": 4.9882015522097834e-06, "logits/chosen": -3.109830379486084, "logits/rejected": -3.0458269119262695, "logps/chosen": -58.459075927734375, "logps/rejected": -713.4645385742188, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -0.10050570964813232, "rewards/margins": 6.6367340087890625, "rewards/rejected": -6.737239837646484, "step": 10680 }, { "epoch": 0.13, "learning_rate": 4.988099966476895e-06, "logits/chosen": -3.0994975566864014, "logits/rejected": -3.0224766731262207, "logps/chosen": -65.55133056640625, "logps/rejected": -771.7138671875, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -0.12282519042491913, "rewards/margins": 7.177890777587891, "rewards/rejected": -7.300715446472168, "step": 10690 }, { "epoch": 0.13, "learning_rate": 4.9879979463286635e-06, "logits/chosen": -3.0799994468688965, "logits/rejected": -3.0407607555389404, "logps/chosen": -61.7285270690918, "logps/rejected": -603.2969970703125, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": -0.12331455945968628, "rewards/margins": 5.522385597229004, "rewards/rejected": -5.645699977874756, "step": 10700 }, { "epoch": 0.13, "learning_rate": 4.987895491782901e-06, "logits/chosen": -3.120969772338867, "logits/rejected": -3.072361707687378, "logps/chosen": -52.6322135925293, "logps/rejected": -527.27490234375, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": 0.0360652320086956, "rewards/margins": 4.908031940460205, "rewards/rejected": -4.871966361999512, "step": 10710 }, { "epoch": 0.13, "learning_rate": 4.987792602857494e-06, "logits/chosen": -3.1230320930480957, "logits/rejected": -3.0833563804626465, "logps/chosen": -30.32535171508789, "logps/rejected": -503.41009521484375, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": 0.12846532464027405, "rewards/margins": 4.792965888977051, "rewards/rejected": -4.664500713348389, "step": 10720 }, { "epoch": 0.13, "learning_rate": 4.987689279570409e-06, "logits/chosen": -3.1326146125793457, "logits/rejected": -3.100908041000366, "logps/chosen": -30.514551162719727, "logps/rejected": -506.15948486328125, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": 0.0696224793791771, "rewards/margins": 4.763131141662598, "rewards/rejected": -4.693509101867676, "step": 10730 }, { "epoch": 0.13, "learning_rate": 4.987585521939685e-06, "logits/chosen": -3.123471260070801, "logits/rejected": -3.0799882411956787, "logps/chosen": -35.73051834106445, "logps/rejected": -501.70684814453125, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": 0.163339301943779, "rewards/margins": 4.796446323394775, "rewards/rejected": -4.6331071853637695, "step": 10740 }, { "epoch": 0.13, "learning_rate": 4.987481329983437e-06, "logits/chosen": -3.1143760681152344, "logits/rejected": -3.051264524459839, "logps/chosen": -59.71904754638672, "logps/rejected": -717.1132202148438, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -0.08689513057470322, "rewards/margins": 6.672126770019531, "rewards/rejected": -6.759021759033203, "step": 10750 }, { "epoch": 0.13, "learning_rate": 4.987376703719858e-06, "logits/chosen": -3.114203929901123, "logits/rejected": -3.053985834121704, "logps/chosen": -38.87504959106445, "logps/rejected": -601.4718017578125, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": 0.1247311607003212, "rewards/margins": 5.75620698928833, "rewards/rejected": -5.631475925445557, "step": 10760 }, { "epoch": 0.13, "learning_rate": 4.987271643167216e-06, "logits/chosen": -3.0899832248687744, "logits/rejected": -3.0212152004241943, "logps/chosen": -41.7475700378418, "logps/rejected": -587.0457153320312, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": 0.05580659955739975, "rewards/margins": 5.538045883178711, "rewards/rejected": -5.482239246368408, "step": 10770 }, { "epoch": 0.13, "learning_rate": 4.987166148343853e-06, "logits/chosen": -3.109895944595337, "logits/rejected": -3.062371253967285, "logps/chosen": -46.026390075683594, "logps/rejected": -545.9715576171875, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": -0.041328370571136475, "rewards/margins": 5.0292768478393555, "rewards/rejected": -5.070605278015137, "step": 10780 }, { "epoch": 0.13, "learning_rate": 4.9870602192681874e-06, "logits/chosen": -3.1732177734375, "logits/rejected": -3.0706026554107666, "logps/chosen": -59.29674530029297, "logps/rejected": -924.8777465820312, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -0.00839453935623169, "rewards/margins": 8.835100173950195, "rewards/rejected": -8.84349536895752, "step": 10790 }, { "epoch": 0.13, "learning_rate": 4.986953855958715e-06, "logits/chosen": -3.0754201412200928, "logits/rejected": -3.0198135375976562, "logps/chosen": -41.89788818359375, "logps/rejected": -672.5613403320312, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": 0.027756160125136375, "rewards/margins": 6.351806163787842, "rewards/rejected": -6.324049949645996, "step": 10800 }, { "epoch": 0.13, "learning_rate": 4.986847058434007e-06, "logits/chosen": -3.116793155670166, "logits/rejected": -3.050499200820923, "logps/chosen": -28.155715942382812, "logps/rejected": -554.7745971679688, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": 0.17857253551483154, "rewards/margins": 5.350437164306641, "rewards/rejected": -5.1718645095825195, "step": 10810 }, { "epoch": 0.13, "learning_rate": 4.986739826712709e-06, "logits/chosen": -3.096069097518921, "logits/rejected": -3.0786213874816895, "logps/chosen": -71.69318389892578, "logps/rejected": -510.8380432128906, "loss": 0.1209, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.25322386622428894, "rewards/margins": 4.467995643615723, "rewards/rejected": -4.721220016479492, "step": 10820 }, { "epoch": 0.13, "learning_rate": 4.986632160813545e-06, "logits/chosen": -3.095895290374756, "logits/rejected": -3.060084581375122, "logps/chosen": -29.838542938232422, "logps/rejected": -443.96820068359375, "loss": 0.201, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.10188207775354385, "rewards/margins": 4.177187919616699, "rewards/rejected": -4.075305938720703, "step": 10830 }, { "epoch": 0.13, "learning_rate": 4.986524060755311e-06, "logits/chosen": -3.1238951683044434, "logits/rejected": -3.08497953414917, "logps/chosen": -37.822792053222656, "logps/rejected": -532.3305053710938, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": 0.09161935746669769, "rewards/margins": 5.031641960144043, "rewards/rejected": -4.940021991729736, "step": 10840 }, { "epoch": 0.13, "learning_rate": 4.986415526556883e-06, "logits/chosen": -3.133512020111084, "logits/rejected": -3.0815372467041016, "logps/chosen": -30.83675193786621, "logps/rejected": -502.71484375, "loss": 0.1164, "rewards/accuracies": 1.0, "rewards/chosen": 0.12261015176773071, "rewards/margins": 4.755334854125977, "rewards/rejected": -4.632724761962891, "step": 10850 }, { "epoch": 0.13, "learning_rate": 4.9863065582372085e-06, "logits/chosen": -3.1450953483581543, "logits/rejected": -3.080237865447998, "logps/chosen": -45.432228088378906, "logps/rejected": -534.2513427734375, "loss": 0.0833, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.023103129118680954, "rewards/margins": 4.957339286804199, "rewards/rejected": -4.9342360496521, "step": 10860 }, { "epoch": 0.13, "learning_rate": 4.986197155815315e-06, "logits/chosen": -3.121121883392334, "logits/rejected": -3.051875114440918, "logps/chosen": -41.01947021484375, "logps/rejected": -498.23577880859375, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": 0.11646588146686554, "rewards/margins": 4.711089134216309, "rewards/rejected": -4.594624042510986, "step": 10870 }, { "epoch": 0.13, "learning_rate": 4.986087319310304e-06, "logits/chosen": -3.1136207580566406, "logits/rejected": -3.0761446952819824, "logps/chosen": -34.263580322265625, "logps/rejected": -385.431640625, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": 0.04906047508120537, "rewards/margins": 3.528594970703125, "rewards/rejected": -3.479534864425659, "step": 10880 }, { "epoch": 0.13, "learning_rate": 4.98597704874135e-06, "logits/chosen": -3.094285726547241, "logits/rejected": -3.0737979412078857, "logps/chosen": -38.25449752807617, "logps/rejected": -446.8938903808594, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": 0.004521495196968317, "rewards/margins": 4.100203037261963, "rewards/rejected": -4.095681667327881, "step": 10890 }, { "epoch": 0.13, "learning_rate": 4.985866344127709e-06, "logits/chosen": -3.0938713550567627, "logits/rejected": -3.0155255794525146, "logps/chosen": -105.37247467041016, "logps/rejected": -619.1956787109375, "loss": 0.172, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.608418345451355, "rewards/margins": 5.188089847564697, "rewards/rejected": -5.796507835388184, "step": 10900 }, { "epoch": 0.13, "learning_rate": 4.985755205488708e-06, "logits/chosen": -3.0773701667785645, "logits/rejected": -3.020024061203003, "logps/chosen": -35.43396759033203, "logps/rejected": -530.9786987304688, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": 0.09560501575469971, "rewards/margins": 5.007555961608887, "rewards/rejected": -4.91195011138916, "step": 10910 }, { "epoch": 0.13, "learning_rate": 4.985643632843753e-06, "logits/chosen": -3.1323745250701904, "logits/rejected": -3.0500099658966064, "logps/chosen": -45.87617111206055, "logps/rejected": -509.22430419921875, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": 0.03903815895318985, "rewards/margins": 4.73333215713501, "rewards/rejected": -4.694293975830078, "step": 10920 }, { "epoch": 0.13, "learning_rate": 4.9855316262123225e-06, "logits/chosen": -3.092914581298828, "logits/rejected": -3.0203537940979004, "logps/chosen": -63.898582458496094, "logps/rejected": -494.7840270996094, "loss": 0.1148, "rewards/accuracies": 1.0, "rewards/chosen": -0.1709696352481842, "rewards/margins": 4.388367652893066, "rewards/rejected": -4.5593366622924805, "step": 10930 }, { "epoch": 0.13, "learning_rate": 4.985419185613974e-06, "logits/chosen": -3.122653007507324, "logits/rejected": -3.0796544551849365, "logps/chosen": -28.860729217529297, "logps/rejected": -467.9576110839844, "loss": 0.1036, "rewards/accuracies": 1.0, "rewards/chosen": 0.166174054145813, "rewards/margins": 4.477345943450928, "rewards/rejected": -4.311171531677246, "step": 10940 }, { "epoch": 0.13, "learning_rate": 4.985306311068338e-06, "logits/chosen": -3.138653516769409, "logits/rejected": -3.0559046268463135, "logps/chosen": -66.76249694824219, "logps/rejected": -678.40625, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -0.03349442034959793, "rewards/margins": 6.347217082977295, "rewards/rejected": -6.380710601806641, "step": 10950 }, { "epoch": 0.13, "learning_rate": 4.985193002595122e-06, "logits/chosen": -3.1258645057678223, "logits/rejected": -3.096487522125244, "logps/chosen": -18.439105987548828, "logps/rejected": -404.2735290527344, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": 0.1956455111503601, "rewards/margins": 3.87349009513855, "rewards/rejected": -3.677845001220703, "step": 10960 }, { "epoch": 0.13, "learning_rate": 4.985079260214112e-06, "logits/chosen": -3.0814998149871826, "logits/rejected": -3.0485644340515137, "logps/chosen": -42.929405212402344, "logps/rejected": -622.8216552734375, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": 0.050631213933229446, "rewards/margins": 5.881433010101318, "rewards/rejected": -5.830801963806152, "step": 10970 }, { "epoch": 0.13, "learning_rate": 4.984965083945164e-06, "logits/chosen": -3.118481159210205, "logits/rejected": -3.0904955863952637, "logps/chosen": -39.025794982910156, "logps/rejected": -513.1896362304688, "loss": 0.1203, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0025350451469421387, "rewards/margins": 4.7547736167907715, "rewards/rejected": -4.757308006286621, "step": 10980 }, { "epoch": 0.13, "learning_rate": 4.984850473808214e-06, "logits/chosen": -3.1388449668884277, "logits/rejected": -3.0653672218322754, "logps/chosen": -96.95951080322266, "logps/rejected": -512.4745483398438, "loss": 0.2212, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.49327796697616577, "rewards/margins": 4.230251789093018, "rewards/rejected": -4.723529815673828, "step": 10990 }, { "epoch": 0.13, "learning_rate": 4.984735429823272e-06, "logits/chosen": -3.141329765319824, "logits/rejected": -3.0483524799346924, "logps/chosen": -69.67383575439453, "logps/rejected": -745.1460571289062, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.15712818503379822, "rewards/margins": 6.869257926940918, "rewards/rejected": -7.026386260986328, "step": 11000 }, { "epoch": 0.13, "learning_rate": 4.984619952010426e-06, "logits/chosen": -3.1354711055755615, "logits/rejected": -3.1091458797454834, "logps/chosen": -46.083641052246094, "logps/rejected": -488.38385009765625, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": 0.01743098348379135, "rewards/margins": 4.529683589935303, "rewards/rejected": -4.512252330780029, "step": 11010 }, { "epoch": 0.13, "learning_rate": 4.984504040389837e-06, "logits/chosen": -3.1373627185821533, "logits/rejected": -3.0700039863586426, "logps/chosen": -55.658836364746094, "logps/rejected": -629.015869140625, "loss": 0.0681, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.031086962670087814, "rewards/margins": 5.872490406036377, "rewards/rejected": -5.903576850891113, "step": 11020 }, { "epoch": 0.13, "learning_rate": 4.984387694981743e-06, "logits/chosen": -3.1468985080718994, "logits/rejected": -3.099276304244995, "logps/chosen": -78.30196380615234, "logps/rejected": -571.6697387695312, "loss": 0.0636, "rewards/accuracies": 1.0, "rewards/chosen": -0.3137621283531189, "rewards/margins": 5.013864040374756, "rewards/rejected": -5.3276262283325195, "step": 11030 }, { "epoch": 0.13, "learning_rate": 4.984270915806458e-06, "logits/chosen": -3.078270673751831, "logits/rejected": -2.9989469051361084, "logps/chosen": -53.880043029785156, "logps/rejected": -625.6227416992188, "loss": 0.1141, "rewards/accuracies": 1.0, "rewards/chosen": -0.06052782014012337, "rewards/margins": 5.791456699371338, "rewards/rejected": -5.851983547210693, "step": 11040 }, { "epoch": 0.13, "learning_rate": 4.98415370288437e-06, "logits/chosen": -3.1124958992004395, "logits/rejected": -3.06718373298645, "logps/chosen": -35.30287551879883, "logps/rejected": -454.7515563964844, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": 0.10570122301578522, "rewards/margins": 4.265681743621826, "rewards/rejected": -4.159981727600098, "step": 11050 }, { "epoch": 0.13, "learning_rate": 4.984036056235946e-06, "logits/chosen": -3.1326754093170166, "logits/rejected": -3.0857081413269043, "logps/chosen": -27.135974884033203, "logps/rejected": -540.814208984375, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": 0.16256992518901825, "rewards/margins": 5.191890716552734, "rewards/rejected": -5.02932071685791, "step": 11060 }, { "epoch": 0.13, "learning_rate": 4.983917975881724e-06, "logits/chosen": -3.0769011974334717, "logits/rejected": -3.0271427631378174, "logps/chosen": -46.91804122924805, "logps/rejected": -537.3005981445312, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": -0.014001891016960144, "rewards/margins": 4.97075891494751, "rewards/rejected": -4.9847612380981445, "step": 11070 }, { "epoch": 0.13, "learning_rate": 4.9837994618423235e-06, "logits/chosen": -3.083373546600342, "logits/rejected": -2.9918203353881836, "logps/chosen": -37.03863525390625, "logps/rejected": -568.75244140625, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": 0.06033667176961899, "rewards/margins": 5.365311145782471, "rewards/rejected": -5.30497407913208, "step": 11080 }, { "epoch": 0.13, "learning_rate": 4.9836805141384345e-06, "logits/chosen": -3.1140692234039307, "logits/rejected": -3.051419734954834, "logps/chosen": -44.440528869628906, "logps/rejected": -610.2250366210938, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": 0.05109548568725586, "rewards/margins": 5.761467933654785, "rewards/rejected": -5.710372447967529, "step": 11090 }, { "epoch": 0.13, "learning_rate": 4.983561132790827e-06, "logits/chosen": -3.092804193496704, "logits/rejected": -3.044400453567505, "logps/chosen": -70.34070587158203, "logps/rejected": -697.1937255859375, "loss": 0.1094, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.26008135080337524, "rewards/margins": 6.3152995109558105, "rewards/rejected": -6.575380802154541, "step": 11100 }, { "epoch": 0.13, "learning_rate": 4.983441317820342e-06, "logits/chosen": -3.103532314300537, "logits/rejected": -3.0831246376037598, "logps/chosen": -31.717575073242188, "logps/rejected": -370.9524841308594, "loss": 0.1011, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.07752587646245956, "rewards/margins": 3.4173922538757324, "rewards/rejected": -3.3398661613464355, "step": 11110 }, { "epoch": 0.13, "learning_rate": 4.983321069247902e-06, "logits/chosen": -3.1069588661193848, "logits/rejected": -3.0246195793151855, "logps/chosen": -52.40391159057617, "logps/rejected": -621.4522705078125, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": 0.005577713251113892, "rewards/margins": 5.824158668518066, "rewards/rejected": -5.818580627441406, "step": 11120 }, { "epoch": 0.13, "learning_rate": 4.9832003870945e-06, "logits/chosen": -3.094414472579956, "logits/rejected": -2.9943652153015137, "logps/chosen": -54.73113250732422, "logps/rejected": -639.8638305664062, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -0.05013049393892288, "rewards/margins": 5.958499431610107, "rewards/rejected": -6.008630275726318, "step": 11130 }, { "epoch": 0.13, "learning_rate": 4.983079271381207e-06, "logits/chosen": -3.1226742267608643, "logits/rejected": -3.0801897048950195, "logps/chosen": -47.3804817199707, "logps/rejected": -523.3130493164062, "loss": 0.1194, "rewards/accuracies": 1.0, "rewards/chosen": -0.042339518666267395, "rewards/margins": 4.791443347930908, "rewards/rejected": -4.833783149719238, "step": 11140 }, { "epoch": 0.13, "learning_rate": 4.982957722129169e-06, "logits/chosen": -3.099031448364258, "logits/rejected": -3.0560154914855957, "logps/chosen": -24.821680068969727, "logps/rejected": -456.63006591796875, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": 0.18393956124782562, "rewards/margins": 4.359757900238037, "rewards/rejected": -4.175817966461182, "step": 11150 }, { "epoch": 0.13, "learning_rate": 4.98283573935961e-06, "logits/chosen": -3.116250991821289, "logits/rejected": -3.0364856719970703, "logps/chosen": -65.31416320800781, "logps/rejected": -606.1244506835938, "loss": 0.1066, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2080930471420288, "rewards/margins": 5.458554744720459, "rewards/rejected": -5.666647434234619, "step": 11160 }, { "epoch": 0.13, "learning_rate": 4.982713323093827e-06, "logits/chosen": -3.0867667198181152, "logits/rejected": -2.952177047729492, "logps/chosen": -68.8281478881836, "logps/rejected": -752.9945068359375, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": -0.09659077972173691, "rewards/margins": 7.021785736083984, "rewards/rejected": -7.1183762550354, "step": 11170 }, { "epoch": 0.13, "learning_rate": 4.9825904733531916e-06, "logits/chosen": -3.1079275608062744, "logits/rejected": -3.064068555831909, "logps/chosen": -63.53325271606445, "logps/rejected": -450.9444885253906, "loss": 0.1098, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19285039603710175, "rewards/margins": 3.9050650596618652, "rewards/rejected": -4.097914695739746, "step": 11180 }, { "epoch": 0.13, "learning_rate": 4.982467190159156e-06, "logits/chosen": -3.150569200515747, "logits/rejected": -3.1198174953460693, "logps/chosen": -22.861251831054688, "logps/rejected": -449.45623779296875, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": 0.1572239100933075, "rewards/margins": 4.277518272399902, "rewards/rejected": -4.120294570922852, "step": 11190 }, { "epoch": 0.13, "learning_rate": 4.982343473533242e-06, "logits/chosen": -3.0856423377990723, "logits/rejected": -3.0223634243011475, "logps/chosen": -43.45486831665039, "logps/rejected": -578.059814453125, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": 0.06623435020446777, "rewards/margins": 5.466017246246338, "rewards/rejected": -5.399782657623291, "step": 11200 }, { "epoch": 0.13, "learning_rate": 4.9822193234970536e-06, "logits/chosen": -3.115044116973877, "logits/rejected": -3.057826042175293, "logps/chosen": -51.49851608276367, "logps/rejected": -550.3159790039062, "loss": 0.1261, "rewards/accuracies": 1.0, "rewards/chosen": -0.07239262014627457, "rewards/margins": 5.042213439941406, "rewards/rejected": -5.114605903625488, "step": 11210 }, { "epoch": 0.13, "learning_rate": 4.9820947400722656e-06, "logits/chosen": -3.1512763500213623, "logits/rejected": -3.0948596000671387, "logps/chosen": -33.48408889770508, "logps/rejected": -425.38629150390625, "loss": 0.0502, "rewards/accuracies": 1.0, "rewards/chosen": 0.08462327718734741, "rewards/margins": 3.96746826171875, "rewards/rejected": -3.8828444480895996, "step": 11220 }, { "epoch": 0.13, "learning_rate": 4.981969723280628e-06, "logits/chosen": -3.1097412109375, "logits/rejected": -3.014714241027832, "logps/chosen": -150.69415283203125, "logps/rejected": -756.2052001953125, "loss": 0.0864, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9783941507339478, "rewards/margins": 6.1937456130981445, "rewards/rejected": -7.172139644622803, "step": 11230 }, { "epoch": 0.13, "learning_rate": 4.981844273143971e-06, "logits/chosen": -3.13090181350708, "logits/rejected": -3.1096863746643066, "logps/chosen": -56.80261993408203, "logps/rejected": -437.5302734375, "loss": 0.1662, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1469646394252777, "rewards/margins": 3.855782985687256, "rewards/rejected": -4.002747535705566, "step": 11240 }, { "epoch": 0.13, "learning_rate": 4.981718389684197e-06, "logits/chosen": -3.1324832439422607, "logits/rejected": -3.0122740268707275, "logps/chosen": -98.12322998046875, "logps/rejected": -797.8138427734375, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": -0.3888486623764038, "rewards/margins": 7.172914028167725, "rewards/rejected": -7.561763763427734, "step": 11250 }, { "epoch": 0.13, "learning_rate": 4.981592072923285e-06, "logits/chosen": -3.1689040660858154, "logits/rejected": -3.0902316570281982, "logps/chosen": -28.655099868774414, "logps/rejected": -573.18603515625, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": 0.13337662816047668, "rewards/margins": 5.473182678222656, "rewards/rejected": -5.33980655670166, "step": 11260 }, { "epoch": 0.13, "learning_rate": 4.98146532288329e-06, "logits/chosen": -3.114811420440674, "logits/rejected": -3.0614371299743652, "logps/chosen": -40.37739181518555, "logps/rejected": -581.4114990234375, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": 0.12335608899593353, "rewards/margins": 5.552520275115967, "rewards/rejected": -5.429163932800293, "step": 11270 }, { "epoch": 0.14, "learning_rate": 4.98133813958634e-06, "logits/chosen": -3.1312408447265625, "logits/rejected": -3.057875394821167, "logps/chosen": -89.50236511230469, "logps/rejected": -807.552490234375, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": -0.3628005385398865, "rewards/margins": 7.311671257019043, "rewards/rejected": -7.674471855163574, "step": 11280 }, { "epoch": 0.14, "learning_rate": 4.981210523054644e-06, "logits/chosen": -3.1247718334198, "logits/rejected": -3.056657314300537, "logps/chosen": -110.26847839355469, "logps/rejected": -761.6038818359375, "loss": 0.114, "rewards/accuracies": 1.0, "rewards/chosen": -0.5734876394271851, "rewards/margins": 6.6338911056518555, "rewards/rejected": -7.207379341125488, "step": 11290 }, { "epoch": 0.14, "learning_rate": 4.9810824733104815e-06, "logits/chosen": -3.097822666168213, "logits/rejected": -2.9834580421447754, "logps/chosen": -47.340614318847656, "logps/rejected": -682.5821533203125, "loss": 0.048, "rewards/accuracies": 1.0, "rewards/chosen": 0.031325388699769974, "rewards/margins": 6.470628261566162, "rewards/rejected": -6.439302921295166, "step": 11300 }, { "epoch": 0.14, "learning_rate": 4.980953990376209e-06, "logits/chosen": -3.1401820182800293, "logits/rejected": -3.104778528213501, "logps/chosen": -28.271286010742188, "logps/rejected": -488.64776611328125, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": 0.14083559811115265, "rewards/margins": 4.648734092712402, "rewards/rejected": -4.507898807525635, "step": 11310 }, { "epoch": 0.14, "learning_rate": 4.9808250742742616e-06, "logits/chosen": -3.141599178314209, "logits/rejected": -3.043623208999634, "logps/chosen": -36.038761138916016, "logps/rejected": -620.5734252929688, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": 0.1183479055762291, "rewards/margins": 5.9317216873168945, "rewards/rejected": -5.813374042510986, "step": 11320 }, { "epoch": 0.14, "learning_rate": 4.980695725027146e-06, "logits/chosen": -3.111940383911133, "logits/rejected": -3.0517966747283936, "logps/chosen": -52.246498107910156, "logps/rejected": -775.6749877929688, "loss": 0.102, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.013579335995018482, "rewards/margins": 7.3280348777771, "rewards/rejected": -7.341614723205566, "step": 11330 }, { "epoch": 0.14, "learning_rate": 4.980565942657446e-06, "logits/chosen": -3.131714105606079, "logits/rejected": -3.015943765640259, "logps/chosen": -62.96488571166992, "logps/rejected": -525.7294921875, "loss": 0.1836, "rewards/accuracies": 1.0, "rewards/chosen": -0.055621158331632614, "rewards/margins": 4.803840637207031, "rewards/rejected": -4.859461784362793, "step": 11340 }, { "epoch": 0.14, "learning_rate": 4.980435727187823e-06, "logits/chosen": -3.1484501361846924, "logits/rejected": -3.0977182388305664, "logps/chosen": -88.3237533569336, "logps/rejected": -640.6373901367188, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -0.3335133194923401, "rewards/margins": 5.665997505187988, "rewards/rejected": -5.999510288238525, "step": 11350 }, { "epoch": 0.14, "learning_rate": 4.980305078641011e-06, "logits/chosen": -3.1259605884552, "logits/rejected": -3.0751023292541504, "logps/chosen": -105.6871566772461, "logps/rejected": -614.1091918945312, "loss": 0.0907, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5325325727462769, "rewards/margins": 5.208441734313965, "rewards/rejected": -5.740973949432373, "step": 11360 }, { "epoch": 0.14, "learning_rate": 4.9801739970398205e-06, "logits/chosen": -3.1577188968658447, "logits/rejected": -3.106661796569824, "logps/chosen": -42.1165657043457, "logps/rejected": -488.75726318359375, "loss": 0.1641, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.012040753848850727, "rewards/margins": 4.498644828796387, "rewards/rejected": -4.510685920715332, "step": 11370 }, { "epoch": 0.14, "learning_rate": 4.980042482407139e-06, "logits/chosen": -3.13720703125, "logits/rejected": -3.0484061241149902, "logps/chosen": -75.89009094238281, "logps/rejected": -815.7130737304688, "loss": 0.1058, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18498626351356506, "rewards/margins": 7.564248561859131, "rewards/rejected": -7.749234676361084, "step": 11380 }, { "epoch": 0.14, "learning_rate": 4.979910534765928e-06, "logits/chosen": -3.141253709793091, "logits/rejected": -3.0631699562072754, "logps/chosen": -56.59687423706055, "logps/rejected": -700.8775634765625, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -0.027997568249702454, "rewards/margins": 6.586673736572266, "rewards/rejected": -6.614671230316162, "step": 11390 }, { "epoch": 0.14, "learning_rate": 4.979778154139225e-06, "logits/chosen": -3.124133825302124, "logits/rejected": -3.0622363090515137, "logps/chosen": -79.20333099365234, "logps/rejected": -571.22119140625, "loss": 0.1151, "rewards/accuracies": 1.0, "rewards/chosen": -0.2906619906425476, "rewards/margins": 5.004582405090332, "rewards/rejected": -5.295244216918945, "step": 11400 }, { "epoch": 0.14, "learning_rate": 4.979645340550143e-06, "logits/chosen": -3.1053690910339355, "logits/rejected": -3.015153169631958, "logps/chosen": -74.6761245727539, "logps/rejected": -734.6959228515625, "loss": 0.1119, "rewards/accuracies": 1.0, "rewards/chosen": -0.18816351890563965, "rewards/margins": 6.756476402282715, "rewards/rejected": -6.94464111328125, "step": 11410 }, { "epoch": 0.14, "learning_rate": 4.9795120940218725e-06, "logits/chosen": -3.126818895339966, "logits/rejected": -3.08575701713562, "logps/chosen": -40.177066802978516, "logps/rejected": -571.0109252929688, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": 0.08759159594774246, "rewards/margins": 5.4184889793396, "rewards/rejected": -5.330897808074951, "step": 11420 }, { "epoch": 0.14, "learning_rate": 4.979378414577677e-06, "logits/chosen": -3.1393675804138184, "logits/rejected": -3.066988706588745, "logps/chosen": -84.59564971923828, "logps/rejected": -640.9767456054688, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -0.37817245721817017, "rewards/margins": 5.628551483154297, "rewards/rejected": -6.0067243576049805, "step": 11430 }, { "epoch": 0.14, "learning_rate": 4.979244302240895e-06, "logits/chosen": -3.099137544631958, "logits/rejected": -3.0370376110076904, "logps/chosen": -56.02538299560547, "logps/rejected": -689.6585693359375, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": -0.0890224352478981, "rewards/margins": 6.4037909507751465, "rewards/rejected": -6.4928131103515625, "step": 11440 }, { "epoch": 0.14, "learning_rate": 4.979109757034945e-06, "logits/chosen": -3.1242268085479736, "logits/rejected": -3.051222562789917, "logps/chosen": -76.29955291748047, "logps/rejected": -588.3392333984375, "loss": 0.1243, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.25493600964546204, "rewards/margins": 5.247382164001465, "rewards/rejected": -5.502317905426025, "step": 11450 }, { "epoch": 0.14, "learning_rate": 4.978974778983316e-06, "logits/chosen": -3.1384787559509277, "logits/rejected": -3.0746970176696777, "logps/chosen": -85.96754455566406, "logps/rejected": -736.6190185546875, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -0.3413424789905548, "rewards/margins": 6.6214399337768555, "rewards/rejected": -6.9627838134765625, "step": 11460 }, { "epoch": 0.14, "learning_rate": 4.9788393681095774e-06, "logits/chosen": -3.141427993774414, "logits/rejected": -3.105396270751953, "logps/chosen": -47.116004943847656, "logps/rejected": -614.2943725585938, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -0.042310480028390884, "rewards/margins": 5.710268497467041, "rewards/rejected": -5.7525787353515625, "step": 11470 }, { "epoch": 0.14, "learning_rate": 4.978703524437368e-06, "logits/chosen": -3.156447410583496, "logits/rejected": -3.1206254959106445, "logps/chosen": -29.068706512451172, "logps/rejected": -562.202392578125, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": 0.14447906613349915, "rewards/margins": 5.385921478271484, "rewards/rejected": -5.2414422035217285, "step": 11480 }, { "epoch": 0.14, "learning_rate": 4.978567247990408e-06, "logits/chosen": -3.1367599964141846, "logits/rejected": -3.1364874839782715, "logps/chosen": -23.1626033782959, "logps/rejected": -413.7874450683594, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": 0.1676935851573944, "rewards/margins": 3.935053586959839, "rewards/rejected": -3.767359972000122, "step": 11490 }, { "epoch": 0.14, "learning_rate": 4.978430538792491e-06, "logits/chosen": -3.143130302429199, "logits/rejected": -3.0702061653137207, "logps/chosen": -47.20264434814453, "logps/rejected": -645.6603393554688, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": 0.02022288367152214, "rewards/margins": 6.066488265991211, "rewards/rejected": -6.046265602111816, "step": 11500 }, { "epoch": 0.14, "learning_rate": 4.978293396867485e-06, "logits/chosen": -3.1087944507598877, "logits/rejected": -2.997565746307373, "logps/chosen": -67.26729583740234, "logps/rejected": -660.7174072265625, "loss": 0.1247, "rewards/accuracies": 1.0, "rewards/chosen": -0.09176081418991089, "rewards/margins": 6.108555793762207, "rewards/rejected": -6.200316429138184, "step": 11510 }, { "epoch": 0.14, "learning_rate": 4.978155822239335e-06, "logits/chosen": -3.110346555709839, "logits/rejected": -3.0600287914276123, "logps/chosen": -160.7854766845703, "logps/rejected": -574.9549560546875, "loss": 0.051, "rewards/accuracies": 1.0, "rewards/chosen": -1.1244930028915405, "rewards/margins": 4.239261150360107, "rewards/rejected": -5.3637542724609375, "step": 11520 }, { "epoch": 0.14, "learning_rate": 4.9780178149320616e-06, "logits/chosen": -3.1093974113464355, "logits/rejected": -3.0578720569610596, "logps/chosen": -67.8206558227539, "logps/rejected": -673.1240844726562, "loss": 0.1063, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2171907126903534, "rewards/margins": 6.131374835968018, "rewards/rejected": -6.348565578460693, "step": 11530 }, { "epoch": 0.14, "learning_rate": 4.97787937496976e-06, "logits/chosen": -3.107591152191162, "logits/rejected": -3.058385133743286, "logps/chosen": -57.087318420410156, "logps/rejected": -549.9573974609375, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": -0.05441342666745186, "rewards/margins": 5.053942680358887, "rewards/rejected": -5.108355522155762, "step": 11540 }, { "epoch": 0.14, "learning_rate": 4.977740502376601e-06, "logits/chosen": -3.1497559547424316, "logits/rejected": -3.059670925140381, "logps/chosen": -57.65155029296875, "logps/rejected": -674.0105590820312, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -0.05443757027387619, "rewards/margins": 6.293389797210693, "rewards/rejected": -6.347827911376953, "step": 11550 }, { "epoch": 0.14, "learning_rate": 4.977601197176832e-06, "logits/chosen": -3.109778881072998, "logits/rejected": -3.0748939514160156, "logps/chosen": -54.23789596557617, "logps/rejected": -775.0029296875, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -0.04287820681929588, "rewards/margins": 7.305832862854004, "rewards/rejected": -7.348710536956787, "step": 11560 }, { "epoch": 0.14, "learning_rate": 4.977461459394776e-06, "logits/chosen": -3.128042459487915, "logits/rejected": -3.0753250122070312, "logps/chosen": -42.009666442871094, "logps/rejected": -524.334228515625, "loss": 0.0741, "rewards/accuracies": 1.0, "rewards/chosen": 0.053657568991184235, "rewards/margins": 4.911094665527344, "rewards/rejected": -4.857437610626221, "step": 11570 }, { "epoch": 0.14, "learning_rate": 4.9773212890548295e-06, "logits/chosen": -3.116114377975464, "logits/rejected": -3.0769503116607666, "logps/chosen": -35.72043228149414, "logps/rejected": -545.6143798828125, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": 0.07494955509901047, "rewards/margins": 5.157727241516113, "rewards/rejected": -5.082777976989746, "step": 11580 }, { "epoch": 0.14, "learning_rate": 4.977180686181467e-06, "logits/chosen": -3.143627882003784, "logits/rejected": -3.0226173400878906, "logps/chosen": -54.66080856323242, "logps/rejected": -604.383544921875, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": -0.04584948718547821, "rewards/margins": 5.6075849533081055, "rewards/rejected": -5.653433799743652, "step": 11590 }, { "epoch": 0.14, "learning_rate": 4.977039650799236e-06, "logits/chosen": -3.1444685459136963, "logits/rejected": -3.0850131511688232, "logps/chosen": -62.25843048095703, "logps/rejected": -549.1724853515625, "loss": 0.1761, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20971646904945374, "rewards/margins": 4.902127742767334, "rewards/rejected": -5.111844062805176, "step": 11600 }, { "epoch": 0.14, "learning_rate": 4.976898182932761e-06, "logits/chosen": -3.1265604496002197, "logits/rejected": -3.0677101612091064, "logps/chosen": -48.94452667236328, "logps/rejected": -616.1166381835938, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": -0.039173539727926254, "rewards/margins": 5.732674598693848, "rewards/rejected": -5.771848201751709, "step": 11610 }, { "epoch": 0.14, "learning_rate": 4.976756282606744e-06, "logits/chosen": -3.149904251098633, "logits/rejected": -3.122826099395752, "logps/chosen": -64.30268096923828, "logps/rejected": -664.3590087890625, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -0.13772240281105042, "rewards/margins": 6.097314834594727, "rewards/rejected": -6.235037326812744, "step": 11620 }, { "epoch": 0.14, "learning_rate": 4.9766139498459585e-06, "logits/chosen": -3.1458427906036377, "logits/rejected": -3.1213512420654297, "logps/chosen": -54.216583251953125, "logps/rejected": -470.0528259277344, "loss": 0.12, "rewards/accuracies": 1.0, "rewards/chosen": -0.15164019167423248, "rewards/margins": 4.167325019836426, "rewards/rejected": -4.318964958190918, "step": 11630 }, { "epoch": 0.14, "learning_rate": 4.976471184675256e-06, "logits/chosen": -3.1418542861938477, "logits/rejected": -3.089250087738037, "logps/chosen": -67.84873962402344, "logps/rejected": -525.5088500976562, "loss": 0.118, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2288222759962082, "rewards/margins": 4.641224384307861, "rewards/rejected": -4.870046138763428, "step": 11640 }, { "epoch": 0.14, "learning_rate": 4.976327987119562e-06, "logits/chosen": -3.1379499435424805, "logits/rejected": -3.068779230117798, "logps/chosen": -90.36975860595703, "logps/rejected": -851.8946533203125, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -0.3711617887020111, "rewards/margins": 7.724249362945557, "rewards/rejected": -8.09541130065918, "step": 11650 }, { "epoch": 0.14, "learning_rate": 4.97618435720388e-06, "logits/chosen": -3.120229959487915, "logits/rejected": -3.1009275913238525, "logps/chosen": -43.86228561401367, "logps/rejected": -550.0541381835938, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": -0.012180613353848457, "rewards/margins": 5.110342979431152, "rewards/rejected": -5.122523307800293, "step": 11660 }, { "epoch": 0.14, "learning_rate": 4.976040294953286e-06, "logits/chosen": -3.146348237991333, "logits/rejected": -3.1121437549591064, "logps/chosen": -35.4135856628418, "logps/rejected": -653.3058471679688, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 0.0573221817612648, "rewards/margins": 6.202315330505371, "rewards/rejected": -6.144993782043457, "step": 11670 }, { "epoch": 0.14, "learning_rate": 4.975895800392934e-06, "logits/chosen": -3.1440484523773193, "logits/rejected": -3.100247859954834, "logps/chosen": -40.84427261352539, "logps/rejected": -646.6834106445312, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": 0.06709345430135727, "rewards/margins": 6.14053201675415, "rewards/rejected": -6.073437690734863, "step": 11680 }, { "epoch": 0.14, "learning_rate": 4.9757508735480505e-06, "logits/chosen": -3.123340606689453, "logits/rejected": -3.0716559886932373, "logps/chosen": -48.74787902832031, "logps/rejected": -562.6395263671875, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -0.038322534412145615, "rewards/margins": 5.201150417327881, "rewards/rejected": -5.23947286605835, "step": 11690 }, { "epoch": 0.14, "learning_rate": 4.9756055144439416e-06, "logits/chosen": -3.150372266769409, "logits/rejected": -3.11075496673584, "logps/chosen": -45.80690383911133, "logps/rejected": -569.1470336914062, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": 0.028793295845389366, "rewards/margins": 5.321707248687744, "rewards/rejected": -5.292914390563965, "step": 11700 }, { "epoch": 0.14, "learning_rate": 4.975459723105986e-06, "logits/chosen": -3.1190121173858643, "logits/rejected": -3.0821216106414795, "logps/chosen": -39.19682312011719, "logps/rejected": -473.19781494140625, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": 0.03000454045832157, "rewards/margins": 4.3716721534729, "rewards/rejected": -4.341667175292969, "step": 11710 }, { "epoch": 0.14, "learning_rate": 4.975313499559638e-06, "logits/chosen": -3.094196319580078, "logits/rejected": -3.006847381591797, "logps/chosen": -59.43349075317383, "logps/rejected": -618.7071533203125, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": -0.09941422194242477, "rewards/margins": 5.695810794830322, "rewards/rejected": -5.795224666595459, "step": 11720 }, { "epoch": 0.14, "learning_rate": 4.975166843830427e-06, "logits/chosen": -3.112974166870117, "logits/rejected": -3.0643115043640137, "logps/chosen": -44.86341857910156, "logps/rejected": -661.8466186523438, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": -0.01882011629641056, "rewards/margins": 6.211477756500244, "rewards/rejected": -6.230298042297363, "step": 11730 }, { "epoch": 0.14, "learning_rate": 4.975019755943959e-06, "logits/chosen": -3.187326192855835, "logits/rejected": -3.1032488346099854, "logps/chosen": -48.38612365722656, "logps/rejected": -604.1303100585938, "loss": 0.1133, "rewards/accuracies": 1.0, "rewards/chosen": 0.056691430509090424, "rewards/margins": 5.711338520050049, "rewards/rejected": -5.654646873474121, "step": 11740 }, { "epoch": 0.14, "learning_rate": 4.974872235925917e-06, "logits/chosen": -3.125551700592041, "logits/rejected": -3.0769689083099365, "logps/chosen": -37.868465423583984, "logps/rejected": -469.70025634765625, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": 0.04197027161717415, "rewards/margins": 4.361945629119873, "rewards/rejected": -4.31997537612915, "step": 11750 }, { "epoch": 0.14, "learning_rate": 4.974724283802056e-06, "logits/chosen": -3.0985615253448486, "logits/rejected": -3.017730474472046, "logps/chosen": -44.029396057128906, "logps/rejected": -579.493896484375, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": 0.05544215440750122, "rewards/margins": 5.454549789428711, "rewards/rejected": -5.399108409881592, "step": 11760 }, { "epoch": 0.14, "learning_rate": 4.974575899598208e-06, "logits/chosen": -3.131955623626709, "logits/rejected": -3.0636582374572754, "logps/chosen": -58.47283935546875, "logps/rejected": -678.73388671875, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -0.0657147616147995, "rewards/margins": 6.319733142852783, "rewards/rejected": -6.3854475021362305, "step": 11770 }, { "epoch": 0.14, "learning_rate": 4.9744270833402805e-06, "logits/chosen": -3.1231658458709717, "logits/rejected": -3.0828475952148438, "logps/chosen": -26.169275283813477, "logps/rejected": -544.018310546875, "loss": 0.1036, "rewards/accuracies": 1.0, "rewards/chosen": 0.1497609168291092, "rewards/margins": 5.1986565589904785, "rewards/rejected": -5.048895359039307, "step": 11780 }, { "epoch": 0.14, "learning_rate": 4.974277835054258e-06, "logits/chosen": -3.129150152206421, "logits/rejected": -3.0861663818359375, "logps/chosen": -42.99947738647461, "logps/rejected": -636.8818359375, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": 0.04178536310791969, "rewards/margins": 6.011785984039307, "rewards/rejected": -5.970000267028809, "step": 11790 }, { "epoch": 0.14, "learning_rate": 4.974128154766195e-06, "logits/chosen": -3.151293992996216, "logits/rejected": -3.109569549560547, "logps/chosen": -31.766155242919922, "logps/rejected": -537.2572021484375, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": 0.11451027542352676, "rewards/margins": 5.096773147583008, "rewards/rejected": -4.982263088226318, "step": 11800 }, { "epoch": 0.14, "learning_rate": 4.97397804250223e-06, "logits/chosen": -3.134582042694092, "logits/rejected": -3.085601806640625, "logps/chosen": -41.565223693847656, "logps/rejected": -627.5188598632812, "loss": 0.1099, "rewards/accuracies": 1.0, "rewards/chosen": 0.07866836339235306, "rewards/margins": 5.9526047706604, "rewards/rejected": -5.873936653137207, "step": 11810 }, { "epoch": 0.14, "learning_rate": 4.973827498288569e-06, "logits/chosen": -3.1225125789642334, "logits/rejected": -3.049312114715576, "logps/chosen": -44.759666442871094, "logps/rejected": -721.6165161132812, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 0.05805007368326187, "rewards/margins": 6.8788628578186035, "rewards/rejected": -6.820812225341797, "step": 11820 }, { "epoch": 0.14, "learning_rate": 4.973676522151498e-06, "logits/chosen": -3.151085138320923, "logits/rejected": -3.0929970741271973, "logps/chosen": -72.1854476928711, "logps/rejected": -568.029541015625, "loss": 0.0936, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21859517693519592, "rewards/margins": 5.0772576332092285, "rewards/rejected": -5.2958526611328125, "step": 11830 }, { "epoch": 0.14, "learning_rate": 4.973525114117377e-06, "logits/chosen": -3.1023736000061035, "logits/rejected": -3.059882640838623, "logps/chosen": -31.211843490600586, "logps/rejected": -626.6146240234375, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": 0.1522432416677475, "rewards/margins": 6.010124206542969, "rewards/rejected": -5.857880592346191, "step": 11840 }, { "epoch": 0.14, "learning_rate": 4.973373274212641e-06, "logits/chosen": -3.1156606674194336, "logits/rejected": -3.0731029510498047, "logps/chosen": -45.95381546020508, "logps/rejected": -565.7694091796875, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003736734506674111, "rewards/margins": 5.2749857902526855, "rewards/rejected": -5.275359630584717, "step": 11850 }, { "epoch": 0.14, "learning_rate": 4.9732210024637995e-06, "logits/chosen": -3.1363325119018555, "logits/rejected": -3.0756335258483887, "logps/chosen": -63.16912841796875, "logps/rejected": -717.4779052734375, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -0.12897376716136932, "rewards/margins": 6.653199672698975, "rewards/rejected": -6.782174110412598, "step": 11860 }, { "epoch": 0.14, "learning_rate": 4.9730682988974414e-06, "logits/chosen": -3.0773608684539795, "logits/rejected": -3.016181230545044, "logps/chosen": -65.79634094238281, "logps/rejected": -574.1458129882812, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -0.21012206375598907, "rewards/margins": 5.150589466094971, "rewards/rejected": -5.360711097717285, "step": 11870 }, { "epoch": 0.14, "learning_rate": 4.972915163540226e-06, "logits/chosen": -3.1162428855895996, "logits/rejected": -3.0787105560302734, "logps/chosen": -30.17508888244629, "logps/rejected": -402.71319580078125, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": 0.06923940032720566, "rewards/margins": 3.7255988121032715, "rewards/rejected": -3.656359910964966, "step": 11880 }, { "epoch": 0.14, "learning_rate": 4.972761596418891e-06, "logits/chosen": -3.1145756244659424, "logits/rejected": -3.0793023109436035, "logps/chosen": -26.193988800048828, "logps/rejected": -459.00579833984375, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": 0.1218150407075882, "rewards/margins": 4.333815097808838, "rewards/rejected": -4.212000370025635, "step": 11890 }, { "epoch": 0.14, "learning_rate": 4.9726075975602504e-06, "logits/chosen": -3.114220142364502, "logits/rejected": -3.033604145050049, "logps/chosen": -51.41857147216797, "logps/rejected": -571.6851806640625, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": -0.06299678981304169, "rewards/margins": 5.259636878967285, "rewards/rejected": -5.322633266448975, "step": 11900 }, { "epoch": 0.14, "learning_rate": 4.97245316699119e-06, "logits/chosen": -3.079197406768799, "logits/rejected": -2.9886984825134277, "logps/chosen": -57.60822296142578, "logps/rejected": -705.3116455078125, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -0.061686109751462936, "rewards/margins": 6.599614143371582, "rewards/rejected": -6.6613006591796875, "step": 11910 }, { "epoch": 0.14, "learning_rate": 4.972298304738673e-06, "logits/chosen": -3.140580177307129, "logits/rejected": -3.0734267234802246, "logps/chosen": -27.8206844329834, "logps/rejected": -600.9803466796875, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": 0.1557856798171997, "rewards/margins": 5.783272743225098, "rewards/rejected": -5.627487659454346, "step": 11920 }, { "epoch": 0.14, "learning_rate": 4.97214301082974e-06, "logits/chosen": -3.1346256732940674, "logits/rejected": -3.0582430362701416, "logps/chosen": -58.219886779785156, "logps/rejected": -587.2483520507812, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": -0.08806530386209488, "rewards/margins": 5.395395755767822, "rewards/rejected": -5.483460426330566, "step": 11930 }, { "epoch": 0.14, "learning_rate": 4.971987285291502e-06, "logits/chosen": -3.121811628341675, "logits/rejected": -3.067126512527466, "logps/chosen": -50.103233337402344, "logps/rejected": -662.4632568359375, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -0.04489763826131821, "rewards/margins": 6.192817687988281, "rewards/rejected": -6.237715721130371, "step": 11940 }, { "epoch": 0.14, "learning_rate": 4.97183112815115e-06, "logits/chosen": -3.116328001022339, "logits/rejected": -3.030038356781006, "logps/chosen": -52.69978713989258, "logps/rejected": -702.2621459960938, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -0.015399140305817127, "rewards/margins": 6.593118190765381, "rewards/rejected": -6.608516693115234, "step": 11950 }, { "epoch": 0.14, "learning_rate": 4.971674539435949e-06, "logits/chosen": -3.0954604148864746, "logits/rejected": -3.0734496116638184, "logps/chosen": -20.158538818359375, "logps/rejected": -454.2843322753906, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": 0.16163036227226257, "rewards/margins": 4.329110145568848, "rewards/rejected": -4.16748046875, "step": 11960 }, { "epoch": 0.14, "learning_rate": 4.971517519173237e-06, "logits/chosen": -3.1435112953186035, "logits/rejected": -3.062650680541992, "logps/chosen": -43.474613189697266, "logps/rejected": -640.09326171875, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": 0.08354160934686661, "rewards/margins": 6.0853962898254395, "rewards/rejected": -6.00185489654541, "step": 11970 }, { "epoch": 0.14, "learning_rate": 4.971360067390431e-06, "logits/chosen": -3.1044232845306396, "logits/rejected": -3.075239896774292, "logps/chosen": -40.562835693359375, "logps/rejected": -556.4769897460938, "loss": 0.1104, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.007614131085574627, "rewards/margins": 5.185427188873291, "rewards/rejected": -5.1778130531311035, "step": 11980 }, { "epoch": 0.14, "learning_rate": 4.971202184115021e-06, "logits/chosen": -3.1202640533447266, "logits/rejected": -3.088698387145996, "logps/chosen": -32.713008880615234, "logps/rejected": -502.0357971191406, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": 0.1096600890159607, "rewards/margins": 4.752352714538574, "rewards/rejected": -4.642693042755127, "step": 11990 }, { "epoch": 0.14, "learning_rate": 4.971043869374573e-06, "logits/chosen": -3.1059627532958984, "logits/rejected": -3.0494911670684814, "logps/chosen": -41.26619338989258, "logps/rejected": -646.2188720703125, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": 0.11422520875930786, "rewards/margins": 6.176490306854248, "rewards/rejected": -6.062265396118164, "step": 12000 }, { "epoch": 0.14, "eval_logits/chosen": -3.162815809249878, "eval_logits/rejected": -3.0641918182373047, "eval_logps/chosen": -133.27401733398438, "eval_logps/rejected": -838.9609375, "eval_loss": 0.02869274839758873, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.7209374308586121, "eval_rewards/margins": 7.201417446136475, "eval_rewards/rejected": -7.922354221343994, "eval_runtime": 1.2167, "eval_samples_per_second": 4.109, "eval_steps_per_second": 2.466, "step": 12000 }, { "epoch": 0.14, "learning_rate": 4.970885123196729e-06, "logits/chosen": -3.142738103866577, "logits/rejected": -3.1017332077026367, "logps/chosen": -39.535491943359375, "logps/rejected": -529.3750610351562, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": 0.07075054198503494, "rewards/margins": 4.966372013092041, "rewards/rejected": -4.895621299743652, "step": 12010 }, { "epoch": 0.14, "learning_rate": 4.970725945609205e-06, "logits/chosen": -3.1600143909454346, "logits/rejected": -3.0817999839782715, "logps/chosen": -54.565673828125, "logps/rejected": -623.6724243164062, "loss": 0.1307, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0026172995567321777, "rewards/margins": 5.861249923706055, "rewards/rejected": -5.858632564544678, "step": 12020 }, { "epoch": 0.14, "learning_rate": 4.970566336639793e-06, "logits/chosen": -3.119288444519043, "logits/rejected": -3.0571184158325195, "logps/chosen": -32.82534408569336, "logps/rejected": -549.2300415039062, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": 0.13711123168468475, "rewards/margins": 5.256673812866211, "rewards/rejected": -5.119563102722168, "step": 12030 }, { "epoch": 0.14, "learning_rate": 4.97040629631636e-06, "logits/chosen": -3.136772871017456, "logits/rejected": -3.09141206741333, "logps/chosen": -39.99408721923828, "logps/rejected": -711.2931518554688, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": 0.10916869342327118, "rewards/margins": 6.815470218658447, "rewards/rejected": -6.706301689147949, "step": 12040 }, { "epoch": 0.14, "learning_rate": 4.970245824666849e-06, "logits/chosen": -3.148648500442505, "logits/rejected": -3.088489532470703, "logps/chosen": -44.49467468261719, "logps/rejected": -677.203369140625, "loss": 0.1109, "rewards/accuracies": 1.0, "rewards/chosen": 0.045282162725925446, "rewards/margins": 6.412101745605469, "rewards/rejected": -6.366818428039551, "step": 12050 }, { "epoch": 0.14, "learning_rate": 4.970084921719278e-06, "logits/chosen": -3.1236255168914795, "logits/rejected": -3.03898024559021, "logps/chosen": -27.717798233032227, "logps/rejected": -503.68963623046875, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": 0.18566159904003143, "rewards/margins": 4.852652072906494, "rewards/rejected": -4.666990756988525, "step": 12060 }, { "epoch": 0.14, "learning_rate": 4.9699235875017406e-06, "logits/chosen": -3.1615090370178223, "logits/rejected": -3.0795676708221436, "logps/chosen": -37.19284439086914, "logps/rejected": -721.1597900390625, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": 0.1125013679265976, "rewards/margins": 6.933306694030762, "rewards/rejected": -6.820805549621582, "step": 12070 }, { "epoch": 0.14, "learning_rate": 4.969761822042403e-06, "logits/chosen": -3.1436612606048584, "logits/rejected": -3.113734483718872, "logps/chosen": -87.70648193359375, "logps/rejected": -553.7545776367188, "loss": 0.0739, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.33984100818634033, "rewards/margins": 4.798377990722656, "rewards/rejected": -5.138218879699707, "step": 12080 }, { "epoch": 0.14, "learning_rate": 4.969599625369513e-06, "logits/chosen": -3.1055448055267334, "logits/rejected": -3.023742198944092, "logps/chosen": -55.774986267089844, "logps/rejected": -696.702880859375, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": -0.06020437926054001, "rewards/margins": 6.49672794342041, "rewards/rejected": -6.556931972503662, "step": 12090 }, { "epoch": 0.14, "learning_rate": 4.969436997511386e-06, "logits/chosen": -3.0976176261901855, "logits/rejected": -3.0441603660583496, "logps/chosen": -44.15275955200195, "logps/rejected": -638.6578369140625, "loss": 0.1189, "rewards/accuracies": 1.0, "rewards/chosen": 0.01968943513929844, "rewards/margins": 6.016343116760254, "rewards/rejected": -5.996653079986572, "step": 12100 }, { "epoch": 0.14, "learning_rate": 4.969273938496418e-06, "logits/chosen": -3.1372663974761963, "logits/rejected": -3.0597996711730957, "logps/chosen": -47.35752487182617, "logps/rejected": -637.2639770507812, "loss": 0.2384, "rewards/accuracies": 1.0, "rewards/chosen": 0.06298314034938812, "rewards/margins": 6.032280445098877, "rewards/rejected": -5.969297409057617, "step": 12110 }, { "epoch": 0.15, "learning_rate": 4.969110448353078e-06, "logits/chosen": -3.1470205783843994, "logits/rejected": -3.097398519515991, "logps/chosen": -35.67119216918945, "logps/rejected": -699.5070190429688, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": 0.11632444709539413, "rewards/margins": 6.721761226654053, "rewards/rejected": -6.6054368019104, "step": 12120 }, { "epoch": 0.15, "learning_rate": 4.9689465271099126e-06, "logits/chosen": -3.1095376014709473, "logits/rejected": -3.0833888053894043, "logps/chosen": -37.30706024169922, "logps/rejected": -636.571044921875, "loss": 0.1826, "rewards/accuracies": 1.0, "rewards/chosen": 0.130912646651268, "rewards/margins": 6.102841377258301, "rewards/rejected": -5.971928596496582, "step": 12130 }, { "epoch": 0.15, "learning_rate": 4.96878217479554e-06, "logits/chosen": -3.1634414196014404, "logits/rejected": -3.127896785736084, "logps/chosen": -44.951377868652344, "logps/rejected": -587.143798828125, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": 0.0528937391936779, "rewards/margins": 5.513850688934326, "rewards/rejected": -5.460956573486328, "step": 12140 }, { "epoch": 0.15, "learning_rate": 4.968617391438657e-06, "logits/chosen": -3.1566367149353027, "logits/rejected": -3.125042676925659, "logps/chosen": -48.294368743896484, "logps/rejected": -618.2907104492188, "loss": 0.0993, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.03211923688650131, "rewards/margins": 5.824815273284912, "rewards/rejected": -5.79269552230835, "step": 12150 }, { "epoch": 0.15, "learning_rate": 4.968452177068032e-06, "logits/chosen": -3.1469717025756836, "logits/rejected": -3.0957841873168945, "logps/chosen": -40.30765151977539, "logps/rejected": -527.260498046875, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": 0.12698692083358765, "rewards/margins": 5.003279685974121, "rewards/rejected": -4.8762922286987305, "step": 12160 }, { "epoch": 0.15, "learning_rate": 4.968286531712515e-06, "logits/chosen": -3.160646438598633, "logits/rejected": -3.1318373680114746, "logps/chosen": -34.71467971801758, "logps/rejected": -627.7454833984375, "loss": 0.0932, "rewards/accuracies": 1.0, "rewards/chosen": 0.14859908819198608, "rewards/margins": 6.035083293914795, "rewards/rejected": -5.886483669281006, "step": 12170 }, { "epoch": 0.15, "learning_rate": 4.968120455401024e-06, "logits/chosen": -3.1344456672668457, "logits/rejected": -3.0445568561553955, "logps/chosen": -50.35930633544922, "logps/rejected": -637.3134765625, "loss": 0.1859, "rewards/accuracies": 1.0, "rewards/chosen": 0.052767314016819, "rewards/margins": 6.026761054992676, "rewards/rejected": -5.973994255065918, "step": 12180 }, { "epoch": 0.15, "learning_rate": 4.967953948162556e-06, "logits/chosen": -3.1217446327209473, "logits/rejected": -3.0818371772766113, "logps/chosen": -41.32204055786133, "logps/rejected": -670.0868530273438, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": 0.09519940614700317, "rewards/margins": 6.408022880554199, "rewards/rejected": -6.312823295593262, "step": 12190 }, { "epoch": 0.15, "learning_rate": 4.967787010026184e-06, "logits/chosen": -3.1194560527801514, "logits/rejected": -3.0593390464782715, "logps/chosen": -46.649539947509766, "logps/rejected": -690.55126953125, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -0.020586103200912476, "rewards/margins": 6.483198642730713, "rewards/rejected": -6.503785133361816, "step": 12200 }, { "epoch": 0.15, "learning_rate": 4.967619641021053e-06, "logits/chosen": -3.1186935901641846, "logits/rejected": -3.0731139183044434, "logps/chosen": -77.70616149902344, "logps/rejected": -656.063232421875, "loss": 0.1106, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2835825979709625, "rewards/margins": 5.878883361816406, "rewards/rejected": -6.162466049194336, "step": 12210 }, { "epoch": 0.15, "learning_rate": 4.9674518411763865e-06, "logits/chosen": -3.166180372238159, "logits/rejected": -3.112623691558838, "logps/chosen": -57.9205207824707, "logps/rejected": -470.0125427246094, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -0.09713288396596909, "rewards/margins": 4.217229843139648, "rewards/rejected": -4.3143630027771, "step": 12220 }, { "epoch": 0.15, "learning_rate": 4.967283610521481e-06, "logits/chosen": -3.133284330368042, "logits/rejected": -3.0160410404205322, "logps/chosen": -54.0164794921875, "logps/rejected": -769.4969482421875, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -0.04509865492582321, "rewards/margins": 7.230265140533447, "rewards/rejected": -7.275362968444824, "step": 12230 }, { "epoch": 0.15, "learning_rate": 4.9671149490857114e-06, "logits/chosen": -3.141573905944824, "logits/rejected": -3.0908570289611816, "logps/chosen": -46.853973388671875, "logps/rejected": -588.368896484375, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": 0.07924838364124298, "rewards/margins": 5.5687360763549805, "rewards/rejected": -5.489487648010254, "step": 12240 }, { "epoch": 0.15, "learning_rate": 4.9669458568985225e-06, "logits/chosen": -3.1694388389587402, "logits/rejected": -3.099137306213379, "logps/chosen": -55.12522506713867, "logps/rejected": -594.1434936523438, "loss": 0.1983, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.06675964593887329, "rewards/margins": 5.47686243057251, "rewards/rejected": -5.543622016906738, "step": 12250 }, { "epoch": 0.15, "learning_rate": 4.9667763339894396e-06, "logits/chosen": -3.1722464561462402, "logits/rejected": -3.1470634937286377, "logps/chosen": -28.186237335205078, "logps/rejected": -483.14935302734375, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": 0.12981754541397095, "rewards/margins": 4.593740940093994, "rewards/rejected": -4.463923454284668, "step": 12260 }, { "epoch": 0.15, "learning_rate": 4.9666063803880595e-06, "logits/chosen": -3.1449484825134277, "logits/rejected": -3.1001129150390625, "logps/chosen": -47.528560638427734, "logps/rejected": -483.2362365722656, "loss": 0.1255, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.06674519181251526, "rewards/margins": 4.389518737792969, "rewards/rejected": -4.456264019012451, "step": 12270 }, { "epoch": 0.15, "learning_rate": 4.966435996124056e-06, "logits/chosen": -3.153449535369873, "logits/rejected": -3.072571039199829, "logps/chosen": -38.908424377441406, "logps/rejected": -574.5233154296875, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": 0.055947862565517426, "rewards/margins": 5.418635368347168, "rewards/rejected": -5.362687110900879, "step": 12280 }, { "epoch": 0.15, "learning_rate": 4.966265181227178e-06, "logits/chosen": -3.1479296684265137, "logits/rejected": -3.062638521194458, "logps/chosen": -30.656818389892578, "logps/rejected": -618.723388671875, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": 0.1699107438325882, "rewards/margins": 5.967578887939453, "rewards/rejected": -5.797667503356934, "step": 12290 }, { "epoch": 0.15, "learning_rate": 4.966093935727248e-06, "logits/chosen": -3.1341347694396973, "logits/rejected": -3.1108362674713135, "logps/chosen": -70.4669418334961, "logps/rejected": -558.5016479492188, "loss": 0.0691, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.290723979473114, "rewards/margins": 4.913177013397217, "rewards/rejected": -5.2039008140563965, "step": 12300 }, { "epoch": 0.15, "learning_rate": 4.965922259654167e-06, "logits/chosen": -3.1306395530700684, "logits/rejected": -3.066171169281006, "logps/chosen": -80.23573303222656, "logps/rejected": -685.3251953125, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -0.2304270714521408, "rewards/margins": 6.233921527862549, "rewards/rejected": -6.464348793029785, "step": 12310 }, { "epoch": 0.15, "learning_rate": 4.965750153037908e-06, "logits/chosen": -3.1158969402313232, "logits/rejected": -3.0614266395568848, "logps/chosen": -45.20975112915039, "logps/rejected": -441.6761169433594, "loss": 0.1138, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.06511615961790085, "rewards/margins": 3.9711215496063232, "rewards/rejected": -4.0362372398376465, "step": 12320 }, { "epoch": 0.15, "learning_rate": 4.965577615908521e-06, "logits/chosen": -3.170719623565674, "logits/rejected": -3.0696122646331787, "logps/chosen": -66.21038818359375, "logps/rejected": -667.9879150390625, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -0.14294631779193878, "rewards/margins": 6.13845157623291, "rewards/rejected": -6.281397819519043, "step": 12330 }, { "epoch": 0.15, "learning_rate": 4.96540464829613e-06, "logits/chosen": -3.1563050746917725, "logits/rejected": -3.126689910888672, "logps/chosen": -26.498266220092773, "logps/rejected": -441.026123046875, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 0.12789225578308105, "rewards/margins": 4.172471046447754, "rewards/rejected": -4.044578552246094, "step": 12340 }, { "epoch": 0.15, "learning_rate": 4.965231250230935e-06, "logits/chosen": -3.119847536087036, "logits/rejected": -3.054429769515991, "logps/chosen": -50.01197052001953, "logps/rejected": -613.6364135742188, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -0.011632168665528297, "rewards/margins": 5.741535663604736, "rewards/rejected": -5.753167152404785, "step": 12350 }, { "epoch": 0.15, "learning_rate": 4.96505742174321e-06, "logits/chosen": -3.1363961696624756, "logits/rejected": -3.0730032920837402, "logps/chosen": -44.76493835449219, "logps/rejected": -409.1731872558594, "loss": 0.1121, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.030925026163458824, "rewards/margins": 3.733879804611206, "rewards/rejected": -3.7029552459716797, "step": 12360 }, { "epoch": 0.15, "learning_rate": 4.964883162863306e-06, "logits/chosen": -3.1140151023864746, "logits/rejected": -3.070131540298462, "logps/chosen": -23.697540283203125, "logps/rejected": -536.0709228515625, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": 0.16429118812084198, "rewards/margins": 5.130609512329102, "rewards/rejected": -4.966318130493164, "step": 12370 }, { "epoch": 0.15, "learning_rate": 4.9647084736216474e-06, "logits/chosen": -3.1785264015197754, "logits/rejected": -3.1503043174743652, "logps/chosen": -48.83889389038086, "logps/rejected": -462.7676696777344, "loss": 0.1114, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.012831087224185467, "rewards/margins": 4.223700523376465, "rewards/rejected": -4.2365312576293945, "step": 12380 }, { "epoch": 0.15, "learning_rate": 4.964533354048735e-06, "logits/chosen": -3.109248161315918, "logits/rejected": -3.080601692199707, "logps/chosen": -34.87248611450195, "logps/rejected": -487.5240783691406, "loss": 0.1155, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.07440797984600067, "rewards/margins": 4.5630645751953125, "rewards/rejected": -4.488656520843506, "step": 12390 }, { "epoch": 0.15, "learning_rate": 4.964357804175144e-06, "logits/chosen": -3.1854076385498047, "logits/rejected": -3.139209508895874, "logps/chosen": -43.36716842651367, "logps/rejected": -802.8575439453125, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": 0.1022224873304367, "rewards/margins": 7.724253177642822, "rewards/rejected": -7.622030735015869, "step": 12400 }, { "epoch": 0.15, "learning_rate": 4.964181824031524e-06, "logits/chosen": -3.14559268951416, "logits/rejected": -3.128973960876465, "logps/chosen": -50.99367904663086, "logps/rejected": -633.9429321289062, "loss": 0.1138, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.005415740422904491, "rewards/margins": 5.940032482147217, "rewards/rejected": -5.945448875427246, "step": 12410 }, { "epoch": 0.15, "learning_rate": 4.964005413648603e-06, "logits/chosen": -3.117835521697998, "logits/rejected": -3.0540175437927246, "logps/chosen": -60.32391357421875, "logps/rejected": -505.24090576171875, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/chosen": -0.14519503712654114, "rewards/margins": 4.5327324867248535, "rewards/rejected": -4.6779279708862305, "step": 12420 }, { "epoch": 0.15, "learning_rate": 4.963828573057179e-06, "logits/chosen": -3.1489057540893555, "logits/rejected": -3.125441074371338, "logps/chosen": -40.96483612060547, "logps/rejected": -412.53076171875, "loss": 0.1149, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0008672185358591378, "rewards/margins": 3.7585949897766113, "rewards/rejected": -3.759462356567383, "step": 12430 }, { "epoch": 0.15, "learning_rate": 4.9636513022881294e-06, "logits/chosen": -3.129661798477173, "logits/rejected": -3.1158225536346436, "logps/chosen": -33.064979553222656, "logps/rejected": -588.3223266601562, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": 0.17997080087661743, "rewards/margins": 5.682878017425537, "rewards/rejected": -5.502907752990723, "step": 12440 }, { "epoch": 0.15, "learning_rate": 4.963473601372406e-06, "logits/chosen": -3.1352760791778564, "logits/rejected": -3.079345226287842, "logps/chosen": -29.216516494750977, "logps/rejected": -507.49798583984375, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": 0.11864311993122101, "rewards/margins": 4.818293571472168, "rewards/rejected": -4.69965124130249, "step": 12450 }, { "epoch": 0.15, "learning_rate": 4.963295470341033e-06, "logits/chosen": -3.1323635578155518, "logits/rejected": -3.086905002593994, "logps/chosen": -30.034011840820312, "logps/rejected": -456.18096923828125, "loss": 0.1205, "rewards/accuracies": 1.0, "rewards/chosen": 0.11588189750909805, "rewards/margins": 4.300073623657227, "rewards/rejected": -4.184191703796387, "step": 12460 }, { "epoch": 0.15, "learning_rate": 4.963116909225112e-06, "logits/chosen": -3.155989170074463, "logits/rejected": -3.1053576469421387, "logps/chosen": -34.683101654052734, "logps/rejected": -577.7526245117188, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.078243687748909, "rewards/margins": 5.468806743621826, "rewards/rejected": -5.390563011169434, "step": 12470 }, { "epoch": 0.15, "learning_rate": 4.9629379180558195e-06, "logits/chosen": -3.1455187797546387, "logits/rejected": -3.0992817878723145, "logps/chosen": -36.36969757080078, "logps/rejected": -670.6649169921875, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": 0.13079878687858582, "rewards/margins": 6.444519996643066, "rewards/rejected": -6.313720703125, "step": 12480 }, { "epoch": 0.15, "learning_rate": 4.962758496864408e-06, "logits/chosen": -3.151340961456299, "logits/rejected": -3.0762460231781006, "logps/chosen": -43.9825553894043, "logps/rejected": -837.49658203125, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": 0.07001130282878876, "rewards/margins": 8.028802871704102, "rewards/rejected": -7.9587907791137695, "step": 12490 }, { "epoch": 0.15, "learning_rate": 4.962578645682202e-06, "logits/chosen": -3.1058058738708496, "logits/rejected": -3.074944257736206, "logps/chosen": -28.472558975219727, "logps/rejected": -501.20489501953125, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": 0.16377627849578857, "rewards/margins": 4.792850494384766, "rewards/rejected": -4.629075050354004, "step": 12500 }, { "epoch": 0.15, "learning_rate": 4.962398364540605e-06, "logits/chosen": -3.1579020023345947, "logits/rejected": -3.122798442840576, "logps/chosen": -41.1778564453125, "logps/rejected": -542.9105834960938, "loss": 0.0698, "rewards/accuracies": 1.0, "rewards/chosen": -0.03610052913427353, "rewards/margins": 5.026088237762451, "rewards/rejected": -5.062188625335693, "step": 12510 }, { "epoch": 0.15, "learning_rate": 4.962217653471091e-06, "logits/chosen": -3.156043767929077, "logits/rejected": -3.1280746459960938, "logps/chosen": -31.73252296447754, "logps/rejected": -567.5017700195312, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": 0.0739310160279274, "rewards/margins": 5.370965957641602, "rewards/rejected": -5.29703426361084, "step": 12520 }, { "epoch": 0.15, "learning_rate": 4.962036512505215e-06, "logits/chosen": -3.1610114574432373, "logits/rejected": -3.1103529930114746, "logps/chosen": -32.577144622802734, "logps/rejected": -428.3785095214844, "loss": 0.1171, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06887348741292953, "rewards/margins": 3.9852943420410156, "rewards/rejected": -3.9164211750030518, "step": 12530 }, { "epoch": 0.15, "learning_rate": 4.9618549416746006e-06, "logits/chosen": -3.1430656909942627, "logits/rejected": -3.064972400665283, "logps/chosen": -41.941429138183594, "logps/rejected": -691.2503051757812, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": 0.058103859424591064, "rewards/margins": 6.582344055175781, "rewards/rejected": -6.524240970611572, "step": 12540 }, { "epoch": 0.15, "learning_rate": 4.961672941010952e-06, "logits/chosen": -3.1515138149261475, "logits/rejected": -3.076002597808838, "logps/chosen": -53.098670959472656, "logps/rejected": -741.3165283203125, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": 0.009957845322787762, "rewards/margins": 7.026663303375244, "rewards/rejected": -7.016704559326172, "step": 12550 }, { "epoch": 0.15, "learning_rate": 4.961490510546044e-06, "logits/chosen": -3.1133084297180176, "logits/rejected": -3.042008876800537, "logps/chosen": -84.82585144042969, "logps/rejected": -582.225341796875, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -0.37728622555732727, "rewards/margins": 5.05859375, "rewards/rejected": -5.435880184173584, "step": 12560 }, { "epoch": 0.15, "learning_rate": 4.96130765031173e-06, "logits/chosen": -3.1093127727508545, "logits/rejected": -3.0997214317321777, "logps/chosen": -57.2518310546875, "logps/rejected": -495.34967041015625, "loss": 0.1488, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1273459643125534, "rewards/margins": 4.4427571296691895, "rewards/rejected": -4.570103168487549, "step": 12570 }, { "epoch": 0.15, "learning_rate": 4.961124360339936e-06, "logits/chosen": -3.160501003265381, "logits/rejected": -3.0898478031158447, "logps/chosen": -39.60321044921875, "logps/rejected": -606.6182250976562, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": 0.06248842552304268, "rewards/margins": 5.736334800720215, "rewards/rejected": -5.673846244812012, "step": 12580 }, { "epoch": 0.15, "learning_rate": 4.960940640662664e-06, "logits/chosen": -3.115692138671875, "logits/rejected": -3.075451374053955, "logps/chosen": -31.34951400756836, "logps/rejected": -435.45745849609375, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": 0.12000717967748642, "rewards/margins": 4.109007835388184, "rewards/rejected": -3.9890010356903076, "step": 12590 }, { "epoch": 0.15, "learning_rate": 4.960756491311991e-06, "logits/chosen": -3.1721179485321045, "logits/rejected": -3.1130900382995605, "logps/chosen": -39.857276916503906, "logps/rejected": -528.3851318359375, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": 0.05558352544903755, "rewards/margins": 4.962067604064941, "rewards/rejected": -4.9064836502075195, "step": 12600 }, { "epoch": 0.15, "learning_rate": 4.960571912320069e-06, "logits/chosen": -3.19205379486084, "logits/rejected": -3.1528282165527344, "logps/chosen": -29.365161895751953, "logps/rejected": -512.6197509765625, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": 0.14871899783611298, "rewards/margins": 4.904505729675293, "rewards/rejected": -4.755786418914795, "step": 12610 }, { "epoch": 0.15, "learning_rate": 4.9603869037191255e-06, "logits/chosen": -3.13785982131958, "logits/rejected": -3.082637310028076, "logps/chosen": -36.400943756103516, "logps/rejected": -575.7028198242188, "loss": 0.0877, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.049361031502485275, "rewards/margins": 5.426609992980957, "rewards/rejected": -5.377248764038086, "step": 12620 }, { "epoch": 0.15, "learning_rate": 4.9602014655414625e-06, "logits/chosen": -3.107776165008545, "logits/rejected": -3.1057064533233643, "logps/chosen": -25.814483642578125, "logps/rejected": -465.66876220703125, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": 0.16327518224716187, "rewards/margins": 4.43168830871582, "rewards/rejected": -4.268413066864014, "step": 12630 }, { "epoch": 0.15, "learning_rate": 4.960015597819455e-06, "logits/chosen": -3.1595826148986816, "logits/rejected": -3.11747670173645, "logps/chosen": -67.53147888183594, "logps/rejected": -547.171875, "loss": 0.1117, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.22837504744529724, "rewards/margins": 4.858519077301025, "rewards/rejected": -5.086893558502197, "step": 12640 }, { "epoch": 0.15, "learning_rate": 4.959829300585558e-06, "logits/chosen": -3.11796236038208, "logits/rejected": -3.0061581134796143, "logps/chosen": -67.36604309082031, "logps/rejected": -703.158203125, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": -0.1578672230243683, "rewards/margins": 6.467984199523926, "rewards/rejected": -6.625851631164551, "step": 12650 }, { "epoch": 0.15, "learning_rate": 4.959642573872297e-06, "logits/chosen": -3.1325831413269043, "logits/rejected": -3.051877975463867, "logps/chosen": -32.74433898925781, "logps/rejected": -580.6539916992188, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 0.15313754975795746, "rewards/margins": 5.578329563140869, "rewards/rejected": -5.425192356109619, "step": 12660 }, { "epoch": 0.15, "learning_rate": 4.959455417712273e-06, "logits/chosen": -3.1268179416656494, "logits/rejected": -3.090056896209717, "logps/chosen": -38.237449645996094, "logps/rejected": -389.106201171875, "loss": 0.1029, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.058285266160964966, "rewards/margins": 3.57068133354187, "rewards/rejected": -3.5123963356018066, "step": 12670 }, { "epoch": 0.15, "learning_rate": 4.959267832138164e-06, "logits/chosen": -3.1715500354766846, "logits/rejected": -3.1223068237304688, "logps/chosen": -77.229248046875, "logps/rejected": -537.0064697265625, "loss": 0.0731, "rewards/accuracies": 1.0, "rewards/chosen": -0.25800976157188416, "rewards/margins": 4.734551906585693, "rewards/rejected": -4.992562294006348, "step": 12680 }, { "epoch": 0.15, "learning_rate": 4.959079817182722e-06, "logits/chosen": -3.139252185821533, "logits/rejected": -3.1007862091064453, "logps/chosen": -28.044902801513672, "logps/rejected": -565.47802734375, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": 0.18351306021213531, "rewards/margins": 5.467398643493652, "rewards/rejected": -5.2838850021362305, "step": 12690 }, { "epoch": 0.15, "learning_rate": 4.958891372878774e-06, "logits/chosen": -3.099372148513794, "logits/rejected": -3.0847625732421875, "logps/chosen": -58.17803192138672, "logps/rejected": -467.05389404296875, "loss": 0.0702, "rewards/accuracies": 1.0, "rewards/chosen": -0.12221705913543701, "rewards/margins": 4.166354656219482, "rewards/rejected": -4.288571357727051, "step": 12700 }, { "epoch": 0.15, "learning_rate": 4.958702499259221e-06, "logits/chosen": -3.1113035678863525, "logits/rejected": -2.9606616497039795, "logps/chosen": -311.120361328125, "logps/rejected": -855.8946533203125, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -2.5524563789367676, "rewards/margins": 5.608778953552246, "rewards/rejected": -8.161233901977539, "step": 12710 }, { "epoch": 0.15, "learning_rate": 4.958513196357041e-06, "logits/chosen": -3.1135175228118896, "logits/rejected": -3.0608155727386475, "logps/chosen": -81.77759552001953, "logps/rejected": -757.9568481445312, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -0.31525903940200806, "rewards/margins": 6.878390312194824, "rewards/rejected": -7.193650245666504, "step": 12720 }, { "epoch": 0.15, "learning_rate": 4.958323464205286e-06, "logits/chosen": -3.1597988605499268, "logits/rejected": -3.1032931804656982, "logps/chosen": -50.45685577392578, "logps/rejected": -568.5234375, "loss": 0.1095, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.04857726767659187, "rewards/margins": 5.242012977600098, "rewards/rejected": -5.290590286254883, "step": 12730 }, { "epoch": 0.15, "learning_rate": 4.95813330283708e-06, "logits/chosen": -3.1381983757019043, "logits/rejected": -3.0983901023864746, "logps/chosen": -28.27280044555664, "logps/rejected": -560.9652099609375, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": 0.13250839710235596, "rewards/margins": 5.367785453796387, "rewards/rejected": -5.235276222229004, "step": 12740 }, { "epoch": 0.15, "learning_rate": 4.9579427122856275e-06, "logits/chosen": -3.1132164001464844, "logits/rejected": -3.092383623123169, "logps/chosen": -59.235984802246094, "logps/rejected": -578.2444458007812, "loss": 0.0841, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.048342883586883545, "rewards/margins": 5.347046852111816, "rewards/rejected": -5.395390033721924, "step": 12750 }, { "epoch": 0.15, "learning_rate": 4.957751692584204e-06, "logits/chosen": -3.160024404525757, "logits/rejected": -3.0619912147521973, "logps/chosen": -78.87876892089844, "logps/rejected": -793.8259887695312, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -0.2584065794944763, "rewards/margins": 7.2534589767456055, "rewards/rejected": -7.511865139007568, "step": 12760 }, { "epoch": 0.15, "learning_rate": 4.957560243766161e-06, "logits/chosen": -3.16982102394104, "logits/rejected": -3.1089348793029785, "logps/chosen": -29.963083267211914, "logps/rejected": -611.4991455078125, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": 0.1434023678302765, "rewards/margins": 5.88559627532959, "rewards/rejected": -5.742194175720215, "step": 12770 }, { "epoch": 0.15, "learning_rate": 4.957368365864925e-06, "logits/chosen": -3.1266069412231445, "logits/rejected": -3.0490784645080566, "logps/chosen": -25.551204681396484, "logps/rejected": -637.613525390625, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": 0.17917434871196747, "rewards/margins": 6.161268711090088, "rewards/rejected": -5.982093811035156, "step": 12780 }, { "epoch": 0.15, "learning_rate": 4.957176058913998e-06, "logits/chosen": -3.161684036254883, "logits/rejected": -3.1338400840759277, "logps/chosen": -47.701194763183594, "logps/rejected": -786.8278198242188, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": 0.022255271673202515, "rewards/margins": 7.493287563323975, "rewards/rejected": -7.471032619476318, "step": 12790 }, { "epoch": 0.15, "learning_rate": 4.9569833229469545e-06, "logits/chosen": -3.1284220218658447, "logits/rejected": -3.0654330253601074, "logps/chosen": -76.94759368896484, "logps/rejected": -497.07183837890625, "loss": 0.078, "rewards/accuracies": 1.0, "rewards/chosen": -0.31963446736335754, "rewards/margins": 4.279196739196777, "rewards/rejected": -4.5988311767578125, "step": 12800 }, { "epoch": 0.15, "learning_rate": 4.956790157997448e-06, "logits/chosen": -3.101982831954956, "logits/rejected": -3.029773473739624, "logps/chosen": -104.5625, "logps/rejected": -690.5541381835938, "loss": 0.1303, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5282677412033081, "rewards/margins": 5.980792045593262, "rewards/rejected": -6.509059906005859, "step": 12810 }, { "epoch": 0.15, "learning_rate": 4.956596564099202e-06, "logits/chosen": -3.185300827026367, "logits/rejected": -3.1229052543640137, "logps/chosen": -102.68974304199219, "logps/rejected": -822.7200317382812, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": -0.4360162615776062, "rewards/margins": 7.388800144195557, "rewards/rejected": -7.8248162269592285, "step": 12820 }, { "epoch": 0.15, "learning_rate": 4.95640254128602e-06, "logits/chosen": -3.1025288105010986, "logits/rejected": -3.076080322265625, "logps/chosen": -60.8707275390625, "logps/rejected": -478.16766357421875, "loss": 0.1014, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20696954429149628, "rewards/margins": 4.192906379699707, "rewards/rejected": -4.399876117706299, "step": 12830 }, { "epoch": 0.15, "learning_rate": 4.9562080895917754e-06, "logits/chosen": -3.1819891929626465, "logits/rejected": -3.146721363067627, "logps/chosen": -28.13802146911621, "logps/rejected": -530.9379272460938, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": 0.1619902402162552, "rewards/margins": 5.07966423034668, "rewards/rejected": -4.9176740646362305, "step": 12840 }, { "epoch": 0.15, "learning_rate": 4.956013209050421e-06, "logits/chosen": -3.145062208175659, "logits/rejected": -3.0918636322021484, "logps/chosen": -46.769710540771484, "logps/rejected": -592.8206787109375, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": 0.08383367210626602, "rewards/margins": 5.618825912475586, "rewards/rejected": -5.534992218017578, "step": 12850 }, { "epoch": 0.15, "learning_rate": 4.9558178996959814e-06, "logits/chosen": -3.1541454792022705, "logits/rejected": -3.08872652053833, "logps/chosen": -54.413002014160156, "logps/rejected": -568.9256591796875, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": 0.006466861814260483, "rewards/margins": 5.298872947692871, "rewards/rejected": -5.29240608215332, "step": 12860 }, { "epoch": 0.15, "learning_rate": 4.955622161562558e-06, "logits/chosen": -3.171522617340088, "logits/rejected": -3.087938070297241, "logps/chosen": -49.87978744506836, "logps/rejected": -760.2347412109375, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": 0.09390401840209961, "rewards/margins": 7.286381721496582, "rewards/rejected": -7.192477226257324, "step": 12870 }, { "epoch": 0.15, "learning_rate": 4.955425994684324e-06, "logits/chosen": -3.1398558616638184, "logits/rejected": -3.050441265106201, "logps/chosen": -57.211524963378906, "logps/rejected": -600.1904296875, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": -0.007505419664084911, "rewards/margins": 5.585291862487793, "rewards/rejected": -5.59279727935791, "step": 12880 }, { "epoch": 0.15, "learning_rate": 4.955229399095533e-06, "logits/chosen": -3.143287181854248, "logits/rejected": -3.0742862224578857, "logps/chosen": -53.11021041870117, "logps/rejected": -571.7316284179688, "loss": 0.0665, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.014279806986451149, "rewards/margins": 5.341884136199951, "rewards/rejected": -5.327604293823242, "step": 12890 }, { "epoch": 0.15, "learning_rate": 4.955032374830506e-06, "logits/chosen": -3.152707576751709, "logits/rejected": -3.1173408031463623, "logps/chosen": -30.743148803710938, "logps/rejected": -499.83514404296875, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": 0.1194121241569519, "rewards/margins": 4.7463836669921875, "rewards/rejected": -4.626972198486328, "step": 12900 }, { "epoch": 0.15, "learning_rate": 4.954834921923646e-06, "logits/chosen": -3.143756866455078, "logits/rejected": -3.098694324493408, "logps/chosen": -71.61248779296875, "logps/rejected": -621.973388671875, "loss": 0.1179, "rewards/accuracies": 1.0, "rewards/chosen": -0.2096339762210846, "rewards/margins": 5.618856430053711, "rewards/rejected": -5.828490257263184, "step": 12910 }, { "epoch": 0.15, "learning_rate": 4.954637040409426e-06, "logits/chosen": -3.1514534950256348, "logits/rejected": -3.1171672344207764, "logps/chosen": -27.2060604095459, "logps/rejected": -484.0804748535156, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": 0.1738298088312149, "rewards/margins": 4.639342784881592, "rewards/rejected": -4.465513706207275, "step": 12920 }, { "epoch": 0.15, "learning_rate": 4.954438730322396e-06, "logits/chosen": -3.133965015411377, "logits/rejected": -3.0840208530426025, "logps/chosen": -42.25037384033203, "logps/rejected": -606.6619262695312, "loss": 0.1197, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.008510427549481392, "rewards/margins": 5.684329032897949, "rewards/rejected": -5.67581844329834, "step": 12930 }, { "epoch": 0.15, "learning_rate": 4.954239991697182e-06, "logits/chosen": -3.181244373321533, "logits/rejected": -3.139976739883423, "logps/chosen": -59.276161193847656, "logps/rejected": -679.3186645507812, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -0.08279737085103989, "rewards/margins": 6.31151819229126, "rewards/rejected": -6.394315719604492, "step": 12940 }, { "epoch": 0.16, "learning_rate": 4.9540408245684804e-06, "logits/chosen": -3.1681270599365234, "logits/rejected": -3.094017744064331, "logps/chosen": -47.773380279541016, "logps/rejected": -610.5696411132812, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": 0.013280299492180347, "rewards/margins": 5.727867126464844, "rewards/rejected": -5.714587211608887, "step": 12950 }, { "epoch": 0.16, "learning_rate": 4.953841228971067e-06, "logits/chosen": -3.1177330017089844, "logits/rejected": -3.0807292461395264, "logps/chosen": -33.162540435791016, "logps/rejected": -482.0133361816406, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": 0.07584750652313232, "rewards/margins": 4.513009071350098, "rewards/rejected": -4.437160491943359, "step": 12960 }, { "epoch": 0.16, "learning_rate": 4.953641204939791e-06, "logits/chosen": -3.184835910797119, "logits/rejected": -3.136683225631714, "logps/chosen": -39.29829788208008, "logps/rejected": -568.0670166015625, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": 0.046979229897260666, "rewards/margins": 5.341452598571777, "rewards/rejected": -5.294474124908447, "step": 12970 }, { "epoch": 0.16, "learning_rate": 4.9534407525095745e-06, "logits/chosen": -3.1684815883636475, "logits/rejected": -3.074848175048828, "logps/chosen": -33.52009963989258, "logps/rejected": -612.1494140625, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": 0.1734098196029663, "rewards/margins": 5.916502475738525, "rewards/rejected": -5.7430925369262695, "step": 12980 }, { "epoch": 0.16, "learning_rate": 4.953239871715417e-06, "logits/chosen": -3.150344133377075, "logits/rejected": -3.1276838779449463, "logps/chosen": -28.651723861694336, "logps/rejected": -520.7945556640625, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 0.1508113443851471, "rewards/margins": 4.966858863830566, "rewards/rejected": -4.816047191619873, "step": 12990 }, { "epoch": 0.16, "learning_rate": 4.953038562592392e-06, "logits/chosen": -3.1786818504333496, "logits/rejected": -3.1717724800109863, "logps/chosen": -38.26432800292969, "logps/rejected": -500.22674560546875, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": 0.010494356043636799, "rewards/margins": 4.647899150848389, "rewards/rejected": -4.637404441833496, "step": 13000 }, { "epoch": 0.16, "learning_rate": 4.952836825175646e-06, "logits/chosen": -3.1405491828918457, "logits/rejected": -3.0633111000061035, "logps/chosen": -54.90546417236328, "logps/rejected": -670.9608154296875, "loss": 0.1115, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.008854130282998085, "rewards/margins": 6.309774875640869, "rewards/rejected": -6.318628787994385, "step": 13010 }, { "epoch": 0.16, "learning_rate": 4.952634659500404e-06, "logits/chosen": -3.1182894706726074, "logits/rejected": -3.0534799098968506, "logps/chosen": -55.69465255737305, "logps/rejected": -669.1932373046875, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -0.06144900247454643, "rewards/margins": 6.2302656173706055, "rewards/rejected": -6.291715145111084, "step": 13020 }, { "epoch": 0.16, "learning_rate": 4.952432065601961e-06, "logits/chosen": -3.144803285598755, "logits/rejected": -3.0549213886260986, "logps/chosen": -53.74242401123047, "logps/rejected": -774.3143310546875, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": 0.016421759501099586, "rewards/margins": 7.347731590270996, "rewards/rejected": -7.331310272216797, "step": 13030 }, { "epoch": 0.16, "learning_rate": 4.952229043515692e-06, "logits/chosen": -3.1647849082946777, "logits/rejected": -3.0880274772644043, "logps/chosen": -56.05511474609375, "logps/rejected": -760.3549194335938, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": -0.0915893167257309, "rewards/margins": 7.115941047668457, "rewards/rejected": -7.2075300216674805, "step": 13040 }, { "epoch": 0.16, "learning_rate": 4.952025593277041e-06, "logits/chosen": -3.1459994316101074, "logits/rejected": -3.0656611919403076, "logps/chosen": -54.24030685424805, "logps/rejected": -625.2960205078125, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -0.0342969112098217, "rewards/margins": 5.8297224044799805, "rewards/rejected": -5.864019393920898, "step": 13050 }, { "epoch": 0.16, "learning_rate": 4.951821714921533e-06, "logits/chosen": -3.1157195568084717, "logits/rejected": -3.0537285804748535, "logps/chosen": -64.60762023925781, "logps/rejected": -649.5599365234375, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -0.13917198777198792, "rewards/margins": 5.959578037261963, "rewards/rejected": -6.09874963760376, "step": 13060 }, { "epoch": 0.16, "learning_rate": 4.9516174084847635e-06, "logits/chosen": -3.139944314956665, "logits/rejected": -3.082486867904663, "logps/chosen": -53.4090690612793, "logps/rejected": -672.6212158203125, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": -0.03623037785291672, "rewards/margins": 6.284905910491943, "rewards/rejected": -6.321136474609375, "step": 13070 }, { "epoch": 0.16, "learning_rate": 4.951412674002403e-06, "logits/chosen": -3.1620233058929443, "logits/rejected": -3.0777649879455566, "logps/chosen": -69.99723815917969, "logps/rejected": -749.0342407226562, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -0.1438809335231781, "rewards/margins": 6.957015037536621, "rewards/rejected": -7.100895881652832, "step": 13080 }, { "epoch": 0.16, "learning_rate": 4.9512075115101975e-06, "logits/chosen": -3.1545028686523438, "logits/rejected": -3.0957443714141846, "logps/chosen": -85.2114486694336, "logps/rejected": -642.0030517578125, "loss": 0.1633, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.32049229741096497, "rewards/margins": 5.712988376617432, "rewards/rejected": -6.033480644226074, "step": 13090 }, { "epoch": 0.16, "learning_rate": 4.951001921043969e-06, "logits/chosen": -3.2021842002868652, "logits/rejected": -3.1186723709106445, "logps/chosen": -61.976470947265625, "logps/rejected": -698.9781494140625, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -0.12220729887485504, "rewards/margins": 6.46508264541626, "rewards/rejected": -6.587289333343506, "step": 13100 }, { "epoch": 0.16, "learning_rate": 4.950795902639612e-06, "logits/chosen": -3.135113000869751, "logits/rejected": -3.0746569633483887, "logps/chosen": -106.64280700683594, "logps/rejected": -605.608642578125, "loss": 0.1099, "rewards/accuracies": 1.0, "rewards/chosen": -0.6151520013809204, "rewards/margins": 5.051883220672607, "rewards/rejected": -5.667035102844238, "step": 13110 }, { "epoch": 0.16, "learning_rate": 4.950589456333098e-06, "logits/chosen": -3.131425380706787, "logits/rejected": -3.0397486686706543, "logps/chosen": -190.80909729003906, "logps/rejected": -769.4141845703125, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -1.330309510231018, "rewards/margins": 5.930628299713135, "rewards/rejected": -7.260937690734863, "step": 13120 }, { "epoch": 0.16, "learning_rate": 4.950382582160471e-06, "logits/chosen": -3.136082172393799, "logits/rejected": -3.096883773803711, "logps/chosen": -75.3218765258789, "logps/rejected": -644.3926391601562, "loss": 0.0889, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.23632776737213135, "rewards/margins": 5.80210542678833, "rewards/rejected": -6.038433074951172, "step": 13130 }, { "epoch": 0.16, "learning_rate": 4.95017528015785e-06, "logits/chosen": -3.146507978439331, "logits/rejected": -3.0855069160461426, "logps/chosen": -53.94525909423828, "logps/rejected": -921.0753784179688, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": 0.07484327256679535, "rewards/margins": 8.879480361938477, "rewards/rejected": -8.80463695526123, "step": 13140 }, { "epoch": 0.16, "learning_rate": 4.949967550361432e-06, "logits/chosen": -3.156097412109375, "logits/rejected": -3.0546562671661377, "logps/chosen": -65.00261688232422, "logps/rejected": -740.1981201171875, "loss": 0.1379, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0945371463894844, "rewards/margins": 6.903675079345703, "rewards/rejected": -6.998211860656738, "step": 13150 }, { "epoch": 0.16, "learning_rate": 4.949759392807483e-06, "logits/chosen": -3.183945417404175, "logits/rejected": -3.117068290710449, "logps/chosen": -50.271568298339844, "logps/rejected": -739.9190673828125, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": 0.007896100170910358, "rewards/margins": 7.019390106201172, "rewards/rejected": -7.0114946365356445, "step": 13160 }, { "epoch": 0.16, "learning_rate": 4.9495508075323475e-06, "logits/chosen": -3.139599561691284, "logits/rejected": -3.098848819732666, "logps/chosen": -51.6004753112793, "logps/rejected": -666.0773315429688, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -0.04768911004066467, "rewards/margins": 6.223202705383301, "rewards/rejected": -6.270892143249512, "step": 13170 }, { "epoch": 0.16, "learning_rate": 4.949341794572445e-06, "logits/chosen": -3.1703763008117676, "logits/rejected": -3.1047275066375732, "logps/chosen": -58.18428421020508, "logps/rejected": -625.9415283203125, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": 0.001356391585431993, "rewards/margins": 5.860371112823486, "rewards/rejected": -5.85901403427124, "step": 13180 }, { "epoch": 0.16, "learning_rate": 4.949132353964268e-06, "logits/chosen": -3.1184372901916504, "logits/rejected": -3.077298402786255, "logps/chosen": -45.15890884399414, "logps/rejected": -647.206787109375, "loss": 0.077, "rewards/accuracies": 1.0, "rewards/chosen": 0.020091038197278976, "rewards/margins": 6.115046501159668, "rewards/rejected": -6.094955921173096, "step": 13190 }, { "epoch": 0.16, "learning_rate": 4.948922485744383e-06, "logits/chosen": -3.1666817665100098, "logits/rejected": -3.149373769760132, "logps/chosen": -22.363569259643555, "logps/rejected": -370.3056640625, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": 0.1363331377506256, "rewards/margins": 3.483041286468506, "rewards/rejected": -3.346708297729492, "step": 13200 }, { "epoch": 0.16, "learning_rate": 4.948712189949435e-06, "logits/chosen": -3.1328155994415283, "logits/rejected": -3.1021058559417725, "logps/chosen": -56.341270446777344, "logps/rejected": -608.0196533203125, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -0.09887824207544327, "rewards/margins": 5.5841064453125, "rewards/rejected": -5.682984828948975, "step": 13210 }, { "epoch": 0.16, "learning_rate": 4.948501466616139e-06, "logits/chosen": -3.1441636085510254, "logits/rejected": -3.0667824745178223, "logps/chosen": -60.855743408203125, "logps/rejected": -673.0387573242188, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": -0.10479070991277695, "rewards/margins": 6.228314399719238, "rewards/rejected": -6.333104610443115, "step": 13220 }, { "epoch": 0.16, "learning_rate": 4.948290315781287e-06, "logits/chosen": -3.150865077972412, "logits/rejected": -3.091404676437378, "logps/chosen": -29.006683349609375, "logps/rejected": -458.39324951171875, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": 0.1546032428741455, "rewards/margins": 4.356755256652832, "rewards/rejected": -4.202152252197266, "step": 13230 }, { "epoch": 0.16, "learning_rate": 4.948078737481746e-06, "logits/chosen": -3.1642467975616455, "logits/rejected": -3.1412734985351562, "logps/chosen": -69.9259262084961, "logps/rejected": -606.2530517578125, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -0.18647687137126923, "rewards/margins": 5.464674472808838, "rewards/rejected": -5.651151657104492, "step": 13240 }, { "epoch": 0.16, "learning_rate": 4.947866731754457e-06, "logits/chosen": -3.172698497772217, "logits/rejected": -3.1068646907806396, "logps/chosen": -35.722652435302734, "logps/rejected": -638.8976440429688, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 0.10291621834039688, "rewards/margins": 6.1070027351379395, "rewards/rejected": -6.004086494445801, "step": 13250 }, { "epoch": 0.16, "learning_rate": 4.947654298636435e-06, "logits/chosen": -3.146512508392334, "logits/rejected": -3.095325231552124, "logps/chosen": -45.52921676635742, "logps/rejected": -585.198974609375, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -0.028183147311210632, "rewards/margins": 5.422400951385498, "rewards/rejected": -5.4505839347839355, "step": 13260 }, { "epoch": 0.16, "learning_rate": 4.947441438164771e-06, "logits/chosen": -3.1414108276367188, "logits/rejected": -3.0810070037841797, "logps/chosen": -65.65589904785156, "logps/rejected": -698.0142822265625, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -0.13598227500915527, "rewards/margins": 6.446101188659668, "rewards/rejected": -6.582084655761719, "step": 13270 }, { "epoch": 0.16, "learning_rate": 4.947228150376629e-06, "logits/chosen": -3.1240789890289307, "logits/rejected": -3.0742743015289307, "logps/chosen": -66.33541870117188, "logps/rejected": -679.5531616210938, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -0.14797069132328033, "rewards/margins": 6.242895126342773, "rewards/rejected": -6.390865802764893, "step": 13280 }, { "epoch": 0.16, "learning_rate": 4.9470144353092485e-06, "logits/chosen": -3.1440367698669434, "logits/rejected": -3.0505428314208984, "logps/chosen": -72.637939453125, "logps/rejected": -730.6746826171875, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.16554321348667145, "rewards/margins": 6.719298362731934, "rewards/rejected": -6.8848419189453125, "step": 13290 }, { "epoch": 0.16, "learning_rate": 4.946800292999945e-06, "logits/chosen": -3.120244026184082, "logits/rejected": -3.0444235801696777, "logps/chosen": -49.3956298828125, "logps/rejected": -664.4742431640625, "loss": 0.1132, "rewards/accuracies": 1.0, "rewards/chosen": 0.048575520515441895, "rewards/margins": 6.289854049682617, "rewards/rejected": -6.241278648376465, "step": 13300 }, { "epoch": 0.16, "learning_rate": 4.946585723486105e-06, "logits/chosen": -3.178093671798706, "logits/rejected": -3.1222987174987793, "logps/chosen": -26.42782974243164, "logps/rejected": -653.6253662109375, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": 0.1684895008802414, "rewards/margins": 6.320120811462402, "rewards/rejected": -6.1516313552856445, "step": 13310 }, { "epoch": 0.16, "learning_rate": 4.9463707268051934e-06, "logits/chosen": -3.1187405586242676, "logits/rejected": -3.0461394786834717, "logps/chosen": -68.59745788574219, "logps/rejected": -616.1363525390625, "loss": 0.1238, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20746025443077087, "rewards/margins": 5.561392784118652, "rewards/rejected": -5.768853664398193, "step": 13320 }, { "epoch": 0.16, "learning_rate": 4.946155302994746e-06, "logits/chosen": -3.1461234092712402, "logits/rejected": -3.123342514038086, "logps/chosen": -48.21498489379883, "logps/rejected": -471.4034729003906, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -0.07470650970935822, "rewards/margins": 4.262216091156006, "rewards/rejected": -4.336922645568848, "step": 13330 }, { "epoch": 0.16, "learning_rate": 4.945939452092378e-06, "logits/chosen": -3.167149782180786, "logits/rejected": -3.1267497539520264, "logps/chosen": -42.51911926269531, "logps/rejected": -561.9488525390625, "loss": 0.0753, "rewards/accuracies": 1.0, "rewards/chosen": 0.0655142217874527, "rewards/margins": 5.292803764343262, "rewards/rejected": -5.227290153503418, "step": 13340 }, { "epoch": 0.16, "learning_rate": 4.945723174135775e-06, "logits/chosen": -3.128356456756592, "logits/rejected": -3.0752112865448, "logps/chosen": -45.009925842285156, "logps/rejected": -728.8273315429688, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": 0.08586324006319046, "rewards/margins": 6.97119140625, "rewards/rejected": -6.8853278160095215, "step": 13350 }, { "epoch": 0.16, "learning_rate": 4.945506469162697e-06, "logits/chosen": -3.1288022994995117, "logits/rejected": -3.059809923171997, "logps/chosen": -81.54733276367188, "logps/rejected": -551.26953125, "loss": 0.1972, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3119969964027405, "rewards/margins": 4.814784526824951, "rewards/rejected": -5.126781940460205, "step": 13360 }, { "epoch": 0.16, "learning_rate": 4.945289337210984e-06, "logits/chosen": -3.114778757095337, "logits/rejected": -3.064706802368164, "logps/chosen": -44.988929748535156, "logps/rejected": -571.789306640625, "loss": 0.1112, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.010511411353945732, "rewards/margins": 5.342288017272949, "rewards/rejected": -5.331777095794678, "step": 13370 }, { "epoch": 0.16, "learning_rate": 4.945071778318542e-06, "logits/chosen": -3.171323776245117, "logits/rejected": -3.1052358150482178, "logps/chosen": -31.078561782836914, "logps/rejected": -494.5462951660156, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": 0.10920727252960205, "rewards/margins": 4.671467304229736, "rewards/rejected": -4.562259197235107, "step": 13380 }, { "epoch": 0.16, "learning_rate": 4.94485379252336e-06, "logits/chosen": -3.1570842266082764, "logits/rejected": -3.1167891025543213, "logps/chosen": -24.147247314453125, "logps/rejected": -526.7890625, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": 0.18338695168495178, "rewards/margins": 5.060407638549805, "rewards/rejected": -4.877020835876465, "step": 13390 }, { "epoch": 0.16, "learning_rate": 4.944635379863495e-06, "logits/chosen": -3.1391806602478027, "logits/rejected": -3.087554931640625, "logps/chosen": -40.28547286987305, "logps/rejected": -436.595458984375, "loss": 0.202, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06084712594747543, "rewards/margins": 4.040613174438477, "rewards/rejected": -3.9797661304473877, "step": 13400 }, { "epoch": 0.16, "learning_rate": 4.944416540377083e-06, "logits/chosen": -3.1786246299743652, "logits/rejected": -3.122471570968628, "logps/chosen": -30.34938621520996, "logps/rejected": -477.515380859375, "loss": 0.1169, "rewards/accuracies": 1.0, "rewards/chosen": 0.13178035616874695, "rewards/margins": 4.531317710876465, "rewards/rejected": -4.399538040161133, "step": 13410 }, { "epoch": 0.16, "learning_rate": 4.944197274102332e-06, "logits/chosen": -3.1769185066223145, "logits/rejected": -3.141559362411499, "logps/chosen": -41.85614013671875, "logps/rejected": -513.0516357421875, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": 0.025110507383942604, "rewards/margins": 4.759054660797119, "rewards/rejected": -4.733944892883301, "step": 13420 }, { "epoch": 0.16, "learning_rate": 4.943977581077525e-06, "logits/chosen": -3.152320623397827, "logits/rejected": -3.0901217460632324, "logps/chosen": -50.2003173828125, "logps/rejected": -576.7752685546875, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -0.016817713156342506, "rewards/margins": 5.348154544830322, "rewards/rejected": -5.364972114562988, "step": 13430 }, { "epoch": 0.16, "learning_rate": 4.943757461341021e-06, "logits/chosen": -3.1897287368774414, "logits/rejected": -3.151669979095459, "logps/chosen": -24.84561538696289, "logps/rejected": -342.2099304199219, "loss": 0.0526, "rewards/accuracies": 1.0, "rewards/chosen": 0.13482236862182617, "rewards/margins": 3.2011806964874268, "rewards/rejected": -3.0663580894470215, "step": 13440 }, { "epoch": 0.16, "learning_rate": 4.943536914931251e-06, "logits/chosen": -3.1144561767578125, "logits/rejected": -3.049736738204956, "logps/chosen": -49.564273834228516, "logps/rejected": -618.1798706054688, "loss": 0.0984, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.013197995722293854, "rewards/margins": 5.761162757873535, "rewards/rejected": -5.774360656738281, "step": 13450 }, { "epoch": 0.16, "learning_rate": 4.943315941886722e-06, "logits/chosen": -3.1852450370788574, "logits/rejected": -3.118661880493164, "logps/chosen": -52.295616149902344, "logps/rejected": -653.2116088867188, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -0.054124753922224045, "rewards/margins": 6.093517303466797, "rewards/rejected": -6.147641658782959, "step": 13460 }, { "epoch": 0.16, "learning_rate": 4.9430945422460165e-06, "logits/chosen": -3.173532009124756, "logits/rejected": -3.107412815093994, "logps/chosen": -56.5013313293457, "logps/rejected": -601.1725463867188, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -0.044882632791996, "rewards/margins": 5.555563926696777, "rewards/rejected": -5.600447654724121, "step": 13470 }, { "epoch": 0.16, "learning_rate": 4.942872716047788e-06, "logits/chosen": -3.201611280441284, "logits/rejected": -3.1253581047058105, "logps/chosen": -49.273983001708984, "logps/rejected": -680.5853881835938, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": -0.009171197190880775, "rewards/margins": 6.400247097015381, "rewards/rejected": -6.40941858291626, "step": 13480 }, { "epoch": 0.16, "learning_rate": 4.9426504633307694e-06, "logits/chosen": -3.147062063217163, "logits/rejected": -3.084846019744873, "logps/chosen": -95.58154296875, "logps/rejected": -611.6604614257812, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": -0.4825390875339508, "rewards/margins": 5.240290641784668, "rewards/rejected": -5.722830295562744, "step": 13490 }, { "epoch": 0.16, "learning_rate": 4.942427784133763e-06, "logits/chosen": -3.149050235748291, "logits/rejected": -3.073132276535034, "logps/chosen": -80.91426849365234, "logps/rejected": -495.86639404296875, "loss": 0.0727, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.39237797260284424, "rewards/margins": 4.187745571136475, "rewards/rejected": -4.580122947692871, "step": 13500 }, { "epoch": 0.16, "learning_rate": 4.94220467849565e-06, "logits/chosen": -3.1415278911590576, "logits/rejected": -3.0926194190979004, "logps/chosen": -57.540367126464844, "logps/rejected": -531.8163452148438, "loss": 0.0519, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1411331444978714, "rewards/margins": 4.781424045562744, "rewards/rejected": -4.9225568771362305, "step": 13510 }, { "epoch": 0.16, "learning_rate": 4.941981146455382e-06, "logits/chosen": -3.149460554122925, "logits/rejected": -3.0810093879699707, "logps/chosen": -42.40929412841797, "logps/rejected": -724.3038940429688, "loss": 0.0508, "rewards/accuracies": 1.0, "rewards/chosen": 0.06405146420001984, "rewards/margins": 6.8916916847229, "rewards/rejected": -6.827639579772949, "step": 13520 }, { "epoch": 0.16, "learning_rate": 4.941757188051989e-06, "logits/chosen": -3.144915819168091, "logits/rejected": -3.080596685409546, "logps/chosen": -56.661659240722656, "logps/rejected": -504.6513671875, "loss": 0.1843, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10785780102014542, "rewards/margins": 4.546370029449463, "rewards/rejected": -4.654227256774902, "step": 13530 }, { "epoch": 0.16, "learning_rate": 4.9415328033245726e-06, "logits/chosen": -3.1270787715911865, "logits/rejected": -3.055521011352539, "logps/chosen": -41.24449920654297, "logps/rejected": -524.2276611328125, "loss": 0.095, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.05110473185777664, "rewards/margins": 4.905728340148926, "rewards/rejected": -4.854623794555664, "step": 13540 }, { "epoch": 0.16, "learning_rate": 4.941307992312309e-06, "logits/chosen": -3.112806558609009, "logits/rejected": -3.088576316833496, "logps/chosen": -21.214630126953125, "logps/rejected": -410.92144775390625, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": 0.1792634129524231, "rewards/margins": 3.9176888465881348, "rewards/rejected": -3.7384257316589355, "step": 13550 }, { "epoch": 0.16, "learning_rate": 4.941082755054452e-06, "logits/chosen": -3.1350460052490234, "logits/rejected": -3.1201961040496826, "logps/chosen": -20.43405532836914, "logps/rejected": -469.89276123046875, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": 0.20252183079719543, "rewards/margins": 4.537515163421631, "rewards/rejected": -4.334994316101074, "step": 13560 }, { "epoch": 0.16, "learning_rate": 4.940857091590324e-06, "logits/chosen": -3.16023588180542, "logits/rejected": -3.100710391998291, "logps/chosen": -45.371971130371094, "logps/rejected": -771.03857421875, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.11217758804559708, "rewards/margins": 7.409432411193848, "rewards/rejected": -7.297255039215088, "step": 13570 }, { "epoch": 0.16, "learning_rate": 4.940631001959328e-06, "logits/chosen": -3.142038583755493, "logits/rejected": -3.1004459857940674, "logps/chosen": -35.5573616027832, "logps/rejected": -500.8890686035156, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 0.14184977114200592, "rewards/margins": 4.764453411102295, "rewards/rejected": -4.6226043701171875, "step": 13580 }, { "epoch": 0.16, "learning_rate": 4.940404486200937e-06, "logits/chosen": -3.141413450241089, "logits/rejected": -3.1202340126037598, "logps/chosen": -28.898441314697266, "logps/rejected": -437.6780700683594, "loss": 0.0507, "rewards/accuracies": 1.0, "rewards/chosen": 0.14336004853248596, "rewards/margins": 4.140738010406494, "rewards/rejected": -3.997377872467041, "step": 13590 }, { "epoch": 0.16, "learning_rate": 4.940177544354701e-06, "logits/chosen": -3.143113136291504, "logits/rejected": -3.106914520263672, "logps/chosen": -31.229244232177734, "logps/rejected": -513.5989990234375, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": 0.15232878923416138, "rewards/margins": 4.890472412109375, "rewards/rejected": -4.7381439208984375, "step": 13600 }, { "epoch": 0.16, "learning_rate": 4.939950176460243e-06, "logits/chosen": -3.1613831520080566, "logits/rejected": -3.1248996257781982, "logps/chosen": -30.239566802978516, "logps/rejected": -504.13970947265625, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": 0.14579495787620544, "rewards/margins": 4.812438011169434, "rewards/rejected": -4.6666436195373535, "step": 13610 }, { "epoch": 0.16, "learning_rate": 4.939722382557262e-06, "logits/chosen": -3.143618106842041, "logits/rejected": -3.1007988452911377, "logps/chosen": -40.17237091064453, "logps/rejected": -428.2689514160156, "loss": 0.1147, "rewards/accuracies": 1.0, "rewards/chosen": 0.1772070974111557, "rewards/margins": 4.070599555969238, "rewards/rejected": -3.893393039703369, "step": 13620 }, { "epoch": 0.16, "learning_rate": 4.939494162685527e-06, "logits/chosen": -3.180795192718506, "logits/rejected": -3.144331455230713, "logps/chosen": -41.06291580200195, "logps/rejected": -588.2193603515625, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": 0.09461015462875366, "rewards/margins": 5.572603702545166, "rewards/rejected": -5.477993488311768, "step": 13630 }, { "epoch": 0.16, "learning_rate": 4.939265516884888e-06, "logits/chosen": -3.136955738067627, "logits/rejected": -3.085785150527954, "logps/chosen": -25.983922958374023, "logps/rejected": -523.1392211914062, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 0.1989726424217224, "rewards/margins": 5.028193950653076, "rewards/rejected": -4.829220771789551, "step": 13640 }, { "epoch": 0.16, "learning_rate": 4.9390364451952636e-06, "logits/chosen": -3.1280717849731445, "logits/rejected": -3.0352749824523926, "logps/chosen": -62.29230880737305, "logps/rejected": -712.5966796875, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": 0.04785952717065811, "rewards/margins": 6.75011682510376, "rewards/rejected": -6.7022576332092285, "step": 13650 }, { "epoch": 0.16, "learning_rate": 4.93880694765665e-06, "logits/chosen": -3.1388192176818848, "logits/rejected": -3.0413527488708496, "logps/chosen": -38.9981575012207, "logps/rejected": -816.2364501953125, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": 0.14146383106708527, "rewards/margins": 7.892320156097412, "rewards/rejected": -7.750856876373291, "step": 13660 }, { "epoch": 0.16, "learning_rate": 4.938577024309118e-06, "logits/chosen": -3.164644718170166, "logits/rejected": -3.0991694927215576, "logps/chosen": -37.43510055541992, "logps/rejected": -624.8187866210938, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": 0.18804824352264404, "rewards/margins": 6.026909828186035, "rewards/rejected": -5.838861465454102, "step": 13670 }, { "epoch": 0.16, "learning_rate": 4.93834667519281e-06, "logits/chosen": -3.145268201828003, "logits/rejected": -3.085923671722412, "logps/chosen": -32.228492736816406, "logps/rejected": -622.9417114257812, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": 0.183110773563385, "rewards/margins": 6.028853893280029, "rewards/rejected": -5.845743179321289, "step": 13680 }, { "epoch": 0.16, "learning_rate": 4.938115900347944e-06, "logits/chosen": -3.1570987701416016, "logits/rejected": -3.102377414703369, "logps/chosen": -30.1242618560791, "logps/rejected": -584.1685791015625, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": 0.19182242453098297, "rewards/margins": 5.637156963348389, "rewards/rejected": -5.445334434509277, "step": 13690 }, { "epoch": 0.16, "learning_rate": 4.937884699814815e-06, "logits/chosen": -3.173386812210083, "logits/rejected": -3.0983164310455322, "logps/chosen": -30.68805503845215, "logps/rejected": -501.416748046875, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": 0.16356724500656128, "rewards/margins": 4.785765647888184, "rewards/rejected": -4.62219762802124, "step": 13700 }, { "epoch": 0.16, "learning_rate": 4.937653073633786e-06, "logits/chosen": -3.1558496952056885, "logits/rejected": -3.1392712593078613, "logps/chosen": -33.71427536010742, "logps/rejected": -736.6044311523438, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": 0.12950439751148224, "rewards/margins": 7.0995378494262695, "rewards/rejected": -6.970034122467041, "step": 13710 }, { "epoch": 0.16, "learning_rate": 4.937421021845302e-06, "logits/chosen": -3.1834957599639893, "logits/rejected": -3.133899211883545, "logps/chosen": -45.02086639404297, "logps/rejected": -631.6712646484375, "loss": 0.1634, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06308282911777496, "rewards/margins": 5.982916831970215, "rewards/rejected": -5.919834136962891, "step": 13720 }, { "epoch": 0.16, "learning_rate": 4.937188544489877e-06, "logits/chosen": -3.1681385040283203, "logits/rejected": -3.0755457878112793, "logps/chosen": -37.175804138183594, "logps/rejected": -656.0748291015625, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": 0.13627734780311584, "rewards/margins": 6.297625541687012, "rewards/rejected": -6.161348342895508, "step": 13730 }, { "epoch": 0.16, "learning_rate": 4.936955641608101e-06, "logits/chosen": -3.1893324851989746, "logits/rejected": -3.139326810836792, "logps/chosen": -51.238670349121094, "logps/rejected": -634.5999755859375, "loss": 0.1234, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.012924917042255402, "rewards/margins": 5.944572448730469, "rewards/rejected": -5.957497596740723, "step": 13740 }, { "epoch": 0.16, "learning_rate": 4.936722313240639e-06, "logits/chosen": -3.1367480754852295, "logits/rejected": -3.0956058502197266, "logps/chosen": -52.91986083984375, "logps/rejected": -538.4111938476562, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -0.03902077302336693, "rewards/margins": 4.965217590332031, "rewards/rejected": -5.004238128662109, "step": 13750 }, { "epoch": 0.16, "learning_rate": 4.936488559428228e-06, "logits/chosen": -3.180067539215088, "logits/rejected": -3.14196515083313, "logps/chosen": -19.863862991333008, "logps/rejected": -527.3875732421875, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": 0.1751127541065216, "rewards/margins": 5.072988510131836, "rewards/rejected": -4.8978753089904785, "step": 13760 }, { "epoch": 0.16, "learning_rate": 4.936254380211683e-06, "logits/chosen": -3.2030322551727295, "logits/rejected": -3.112421989440918, "logps/chosen": -38.582008361816406, "logps/rejected": -720.1553344726562, "loss": 0.0476, "rewards/accuracies": 1.0, "rewards/chosen": 0.12944626808166504, "rewards/margins": 6.945558071136475, "rewards/rejected": -6.8161115646362305, "step": 13770 }, { "epoch": 0.16, "learning_rate": 4.9360197756318875e-06, "logits/chosen": -3.1832528114318848, "logits/rejected": -3.083357572555542, "logps/chosen": -38.47062301635742, "logps/rejected": -644.3741455078125, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": 0.08161963522434235, "rewards/margins": 6.120258808135986, "rewards/rejected": -6.038639068603516, "step": 13780 }, { "epoch": 0.17, "learning_rate": 4.9357847457298056e-06, "logits/chosen": -3.1318750381469727, "logits/rejected": -3.034672260284424, "logps/chosen": -47.472801208496094, "logps/rejected": -719.7825927734375, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": 0.00674863439053297, "rewards/margins": 6.807156562805176, "rewards/rejected": -6.800407409667969, "step": 13790 }, { "epoch": 0.17, "learning_rate": 4.935549290546473e-06, "logits/chosen": -3.141331434249878, "logits/rejected": -3.078794002532959, "logps/chosen": -44.3135986328125, "logps/rejected": -592.1637573242188, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": 0.03515029698610306, "rewards/margins": 5.567455768585205, "rewards/rejected": -5.532305717468262, "step": 13800 }, { "epoch": 0.17, "learning_rate": 4.935313410122999e-06, "logits/chosen": -3.161437511444092, "logits/rejected": -3.098384380340576, "logps/chosen": -50.057334899902344, "logps/rejected": -602.4278564453125, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.038901012390851974, "rewards/margins": 5.673551559448242, "rewards/rejected": -5.634650230407715, "step": 13810 }, { "epoch": 0.17, "learning_rate": 4.935077104500566e-06, "logits/chosen": -3.2073616981506348, "logits/rejected": -3.172088623046875, "logps/chosen": -55.36757278442383, "logps/rejected": -667.9420776367188, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": -0.015629559755325317, "rewards/margins": 6.272040367126465, "rewards/rejected": -6.287670135498047, "step": 13820 }, { "epoch": 0.17, "learning_rate": 4.934840373720434e-06, "logits/chosen": -3.180222988128662, "logits/rejected": -3.1208503246307373, "logps/chosen": -39.521419525146484, "logps/rejected": -443.11700439453125, "loss": 0.0982, "rewards/accuracies": 1.0, "rewards/chosen": 0.05704673007130623, "rewards/margins": 4.077856063842773, "rewards/rejected": -4.020809173583984, "step": 13830 }, { "epoch": 0.17, "learning_rate": 4.934603217823936e-06, "logits/chosen": -3.1385936737060547, "logits/rejected": -3.079338312149048, "logps/chosen": -44.10083770751953, "logps/rejected": -460.0283203125, "loss": 0.1018, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.04173237830400467, "rewards/margins": 4.235164642333984, "rewards/rejected": -4.193432331085205, "step": 13840 }, { "epoch": 0.17, "learning_rate": 4.934365636852478e-06, "logits/chosen": -3.1552767753601074, "logits/rejected": -3.092942476272583, "logps/chosen": -39.45242691040039, "logps/rejected": -610.3616333007812, "loss": 0.1281, "rewards/accuracies": 1.0, "rewards/chosen": 0.12140699476003647, "rewards/margins": 5.839205741882324, "rewards/rejected": -5.717798709869385, "step": 13850 }, { "epoch": 0.17, "learning_rate": 4.934127630847541e-06, "logits/chosen": -3.174893379211426, "logits/rejected": -3.1597073078155518, "logps/chosen": -53.44919967651367, "logps/rejected": -620.9901123046875, "loss": 0.1072, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.019027799367904663, "rewards/margins": 5.798735618591309, "rewards/rejected": -5.817763805389404, "step": 13860 }, { "epoch": 0.17, "learning_rate": 4.933889199850679e-06, "logits/chosen": -3.151639223098755, "logits/rejected": -3.060159206390381, "logps/chosen": -60.8812255859375, "logps/rejected": -852.9786987304688, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -0.06465853005647659, "rewards/margins": 8.05102252960205, "rewards/rejected": -8.115681648254395, "step": 13870 }, { "epoch": 0.17, "learning_rate": 4.933650343903523e-06, "logits/chosen": -3.1349687576293945, "logits/rejected": -3.061093807220459, "logps/chosen": -44.04215621948242, "logps/rejected": -419.86785888671875, "loss": 0.0943, "rewards/accuracies": 1.0, "rewards/chosen": 0.04558473825454712, "rewards/margins": 3.8696064949035645, "rewards/rejected": -3.824021816253662, "step": 13880 }, { "epoch": 0.17, "learning_rate": 4.933411063047777e-06, "logits/chosen": -3.140265464782715, "logits/rejected": -3.1132311820983887, "logps/chosen": -28.367305755615234, "logps/rejected": -400.9180603027344, "loss": 0.0508, "rewards/accuracies": 1.0, "rewards/chosen": 0.18021753430366516, "rewards/margins": 3.8174843788146973, "rewards/rejected": -3.6372666358947754, "step": 13890 }, { "epoch": 0.17, "learning_rate": 4.933171357325217e-06, "logits/chosen": -3.142426013946533, "logits/rejected": -3.1248908042907715, "logps/chosen": -36.20148468017578, "logps/rejected": -530.8145751953125, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": 0.06817231327295303, "rewards/margins": 4.986785411834717, "rewards/rejected": -4.918613433837891, "step": 13900 }, { "epoch": 0.17, "learning_rate": 4.932931226777696e-06, "logits/chosen": -3.145984172821045, "logits/rejected": -3.09122633934021, "logps/chosen": -35.86854553222656, "logps/rejected": -570.7467041015625, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": 0.09707184135913849, "rewards/margins": 5.407347202301025, "rewards/rejected": -5.310275077819824, "step": 13910 }, { "epoch": 0.17, "learning_rate": 4.93269067144714e-06, "logits/chosen": -3.1323227882385254, "logits/rejected": -3.117838144302368, "logps/chosen": -20.179811477661133, "logps/rejected": -469.6578674316406, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 0.17745204269886017, "rewards/margins": 4.513843536376953, "rewards/rejected": -4.336391448974609, "step": 13920 }, { "epoch": 0.17, "learning_rate": 4.932449691375549e-06, "logits/chosen": -3.1680331230163574, "logits/rejected": -3.106276035308838, "logps/chosen": -39.14980697631836, "logps/rejected": -560.7262573242188, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": 0.07684250175952911, "rewards/margins": 5.31076717376709, "rewards/rejected": -5.233924865722656, "step": 13930 }, { "epoch": 0.17, "learning_rate": 4.932208286604999e-06, "logits/chosen": -3.1584975719451904, "logits/rejected": -3.1126558780670166, "logps/chosen": -66.75768280029297, "logps/rejected": -590.0797729492188, "loss": 0.1104, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15078015625476837, "rewards/margins": 5.361985683441162, "rewards/rejected": -5.512766361236572, "step": 13940 }, { "epoch": 0.17, "learning_rate": 4.931966457177636e-06, "logits/chosen": -3.147369623184204, "logits/rejected": -3.0948963165283203, "logps/chosen": -46.223175048828125, "logps/rejected": -725.0157470703125, "loss": 0.1041, "rewards/accuracies": 1.0, "rewards/chosen": -0.01711173728108406, "rewards/margins": 6.825968265533447, "rewards/rejected": -6.843080997467041, "step": 13950 }, { "epoch": 0.17, "learning_rate": 4.931724203135685e-06, "logits/chosen": -3.132248878479004, "logits/rejected": -3.1032347679138184, "logps/chosen": -55.164398193359375, "logps/rejected": -670.9078369140625, "loss": 0.0999, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.06659569591283798, "rewards/margins": 6.240729331970215, "rewards/rejected": -6.307324409484863, "step": 13960 }, { "epoch": 0.17, "learning_rate": 4.931481524521441e-06, "logits/chosen": -3.1272547245025635, "logits/rejected": -3.087663412094116, "logps/chosen": -35.96698760986328, "logps/rejected": -573.1497802734375, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": 0.0438566729426384, "rewards/margins": 5.396088600158691, "rewards/rejected": -5.352231979370117, "step": 13970 }, { "epoch": 0.17, "learning_rate": 4.931238421377277e-06, "logits/chosen": -3.168837070465088, "logits/rejected": -3.141883611679077, "logps/chosen": -50.04160690307617, "logps/rejected": -391.14593505859375, "loss": 0.1201, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.053208570927381516, "rewards/margins": 3.4933578968048096, "rewards/rejected": -3.5465667247772217, "step": 13980 }, { "epoch": 0.17, "learning_rate": 4.930994893745636e-06, "logits/chosen": -3.1503264904022217, "logits/rejected": -3.060896635055542, "logps/chosen": -71.5203628540039, "logps/rejected": -715.1751708984375, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -0.1174449473619461, "rewards/margins": 6.633604526519775, "rewards/rejected": -6.751049995422363, "step": 13990 }, { "epoch": 0.17, "learning_rate": 4.930750941669039e-06, "logits/chosen": -3.1693432331085205, "logits/rejected": -3.1314241886138916, "logps/chosen": -52.756072998046875, "logps/rejected": -537.1959838867188, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -0.10571090877056122, "rewards/margins": 4.881314754486084, "rewards/rejected": -4.987025260925293, "step": 14000 }, { "epoch": 0.17, "learning_rate": 4.930506565190079e-06, "logits/chosen": -3.1555492877960205, "logits/rejected": -3.0950779914855957, "logps/chosen": -34.59805679321289, "logps/rejected": -518.3795166015625, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": 0.06400806456804276, "rewards/margins": 4.863302707672119, "rewards/rejected": -4.799294471740723, "step": 14010 }, { "epoch": 0.17, "learning_rate": 4.930261764351423e-06, "logits/chosen": -3.181777238845825, "logits/rejected": -3.1075634956359863, "logps/chosen": -60.92394256591797, "logps/rejected": -802.7457885742188, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.08266860246658325, "rewards/margins": 7.540063381195068, "rewards/rejected": -7.622733116149902, "step": 14020 }, { "epoch": 0.17, "learning_rate": 4.930016539195812e-06, "logits/chosen": -3.2036843299865723, "logits/rejected": -3.1457414627075195, "logps/chosen": -71.76695251464844, "logps/rejected": -809.0352783203125, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": -0.27362728118896484, "rewards/margins": 7.421095371246338, "rewards/rejected": -7.694722652435303, "step": 14030 }, { "epoch": 0.17, "learning_rate": 4.929770889766062e-06, "logits/chosen": -3.148216724395752, "logits/rejected": -3.0789647102355957, "logps/chosen": -60.22166061401367, "logps/rejected": -689.9714965820312, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -0.1012803316116333, "rewards/margins": 6.400847434997559, "rewards/rejected": -6.502127647399902, "step": 14040 }, { "epoch": 0.17, "learning_rate": 4.9295248161050645e-06, "logits/chosen": -3.1368751525878906, "logits/rejected": -3.0337729454040527, "logps/chosen": -34.6024055480957, "logps/rejected": -526.2987670898438, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": 0.08186628669500351, "rewards/margins": 4.969902992248535, "rewards/rejected": -4.888037204742432, "step": 14050 }, { "epoch": 0.17, "learning_rate": 4.929278318255781e-06, "logits/chosen": -3.183966636657715, "logits/rejected": -3.1060500144958496, "logps/chosen": -47.149497985839844, "logps/rejected": -563.5589599609375, "loss": 0.103, "rewards/accuracies": 1.0, "rewards/chosen": 0.05018164962530136, "rewards/margins": 5.30512809753418, "rewards/rejected": -5.254946231842041, "step": 14060 }, { "epoch": 0.17, "learning_rate": 4.929031396261251e-06, "logits/chosen": -3.15094256401062, "logits/rejected": -3.1186842918395996, "logps/chosen": -28.273006439208984, "logps/rejected": -598.9224853515625, "loss": 0.1141, "rewards/accuracies": 1.0, "rewards/chosen": 0.14391738176345825, "rewards/margins": 5.748244285583496, "rewards/rejected": -5.6043267250061035, "step": 14070 }, { "epoch": 0.17, "learning_rate": 4.928784050164584e-06, "logits/chosen": -3.194577932357788, "logits/rejected": -3.1192846298217773, "logps/chosen": -43.186920166015625, "logps/rejected": -722.3375854492188, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": 0.06943444907665253, "rewards/margins": 6.9036712646484375, "rewards/rejected": -6.834237098693848, "step": 14080 }, { "epoch": 0.17, "learning_rate": 4.928536280008969e-06, "logits/chosen": -3.178802967071533, "logits/rejected": -3.1226513385772705, "logps/chosen": -42.919578552246094, "logps/rejected": -629.17041015625, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": 0.1038336530327797, "rewards/margins": 5.995518207550049, "rewards/rejected": -5.891684532165527, "step": 14090 }, { "epoch": 0.17, "learning_rate": 4.928288085837664e-06, "logits/chosen": -3.1780529022216797, "logits/rejected": -3.1544620990753174, "logps/chosen": -31.113988876342773, "logps/rejected": -604.0977783203125, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": 0.05261405184864998, "rewards/margins": 5.723003387451172, "rewards/rejected": -5.670389652252197, "step": 14100 }, { "epoch": 0.17, "learning_rate": 4.928039467694004e-06, "logits/chosen": -3.209320068359375, "logits/rejected": -3.168245315551758, "logps/chosen": -45.95613479614258, "logps/rejected": -487.0713806152344, "loss": 0.0737, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.027903422713279724, "rewards/margins": 4.463078022003174, "rewards/rejected": -4.490981578826904, "step": 14110 }, { "epoch": 0.17, "learning_rate": 4.927790425621395e-06, "logits/chosen": -3.149390697479248, "logits/rejected": -3.1198391914367676, "logps/chosen": -44.43912887573242, "logps/rejected": -630.7325439453125, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": -0.02632587030529976, "rewards/margins": 5.905582427978516, "rewards/rejected": -5.931907653808594, "step": 14120 }, { "epoch": 0.17, "learning_rate": 4.9275409596633215e-06, "logits/chosen": -3.1629624366760254, "logits/rejected": -3.1016879081726074, "logps/chosen": -65.02253723144531, "logps/rejected": -629.7183837890625, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.20206180214881897, "rewards/margins": 5.695517539978027, "rewards/rejected": -5.897579193115234, "step": 14130 }, { "epoch": 0.17, "learning_rate": 4.927291069863339e-06, "logits/chosen": -3.1970832347869873, "logits/rejected": -3.1654491424560547, "logps/chosen": -70.98230743408203, "logps/rejected": -481.2503967285156, "loss": 0.2149, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2888891100883484, "rewards/margins": 4.164122581481934, "rewards/rejected": -4.453011512756348, "step": 14140 }, { "epoch": 0.17, "learning_rate": 4.927040756265077e-06, "logits/chosen": -3.171417236328125, "logits/rejected": -3.15116024017334, "logps/chosen": -50.0844841003418, "logps/rejected": -562.7613525390625, "loss": 0.1151, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.02374059520661831, "rewards/margins": 5.22358512878418, "rewards/rejected": -5.2473249435424805, "step": 14150 }, { "epoch": 0.17, "learning_rate": 4.926790018912239e-06, "logits/chosen": -3.1507010459899902, "logits/rejected": -3.1201369762420654, "logps/chosen": -33.62574005126953, "logps/rejected": -511.91796875, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": 0.08183794468641281, "rewards/margins": 4.827857971191406, "rewards/rejected": -4.746020317077637, "step": 14160 }, { "epoch": 0.17, "learning_rate": 4.926538857848605e-06, "logits/chosen": -3.102006435394287, "logits/rejected": -3.029215097427368, "logps/chosen": -49.31629180908203, "logps/rejected": -758.6336669921875, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -0.015782367438077927, "rewards/margins": 7.168917655944824, "rewards/rejected": -7.184700012207031, "step": 14170 }, { "epoch": 0.17, "learning_rate": 4.926287273118025e-06, "logits/chosen": -3.1540184020996094, "logits/rejected": -3.0956292152404785, "logps/chosen": -69.14754486083984, "logps/rejected": -737.5670166015625, "loss": 0.0756, "rewards/accuracies": 1.0, "rewards/chosen": -0.1826169788837433, "rewards/margins": 6.782235145568848, "rewards/rejected": -6.964851379394531, "step": 14180 }, { "epoch": 0.17, "learning_rate": 4.926035264764426e-06, "logits/chosen": -3.1631383895874023, "logits/rejected": -3.1326751708984375, "logps/chosen": -26.361719131469727, "logps/rejected": -479.61993408203125, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": 0.16316267848014832, "rewards/margins": 4.589371681213379, "rewards/rejected": -4.42620849609375, "step": 14190 }, { "epoch": 0.17, "learning_rate": 4.925782832831808e-06, "logits/chosen": -3.1730151176452637, "logits/rejected": -3.1363418102264404, "logps/chosen": -46.888614654541016, "logps/rejected": -598.4635620117188, "loss": 0.0911, "rewards/accuracies": 1.0, "rewards/chosen": -0.05328383296728134, "rewards/margins": 5.5408477783203125, "rewards/rejected": -5.594132423400879, "step": 14200 }, { "epoch": 0.17, "learning_rate": 4.925529977364245e-06, "logits/chosen": -3.1480202674865723, "logits/rejected": -3.14220929145813, "logps/chosen": -54.852943420410156, "logps/rejected": -417.28619384765625, "loss": 0.1066, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11647695302963257, "rewards/margins": 3.674259662628174, "rewards/rejected": -3.790736675262451, "step": 14210 }, { "epoch": 0.17, "learning_rate": 4.925276698405884e-06, "logits/chosen": -3.195841073989868, "logits/rejected": -3.1341240406036377, "logps/chosen": -36.56647491455078, "logps/rejected": -619.5894775390625, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": 0.08009164035320282, "rewards/margins": 5.885817050933838, "rewards/rejected": -5.80572509765625, "step": 14220 }, { "epoch": 0.17, "learning_rate": 4.925022996000948e-06, "logits/chosen": -3.146512508392334, "logits/rejected": -3.122288942337036, "logps/chosen": -32.46585464477539, "logps/rejected": -547.4097900390625, "loss": 0.1078, "rewards/accuracies": 1.0, "rewards/chosen": 0.10300276428461075, "rewards/margins": 5.193001747131348, "rewards/rejected": -5.089999198913574, "step": 14230 }, { "epoch": 0.17, "learning_rate": 4.9247688701937315e-06, "logits/chosen": -3.171043634414673, "logits/rejected": -3.1297106742858887, "logps/chosen": -43.37635040283203, "logps/rejected": -670.3721313476562, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -0.006031823344528675, "rewards/margins": 6.294877529144287, "rewards/rejected": -6.300909519195557, "step": 14240 }, { "epoch": 0.17, "learning_rate": 4.924514321028605e-06, "logits/chosen": -3.1688387393951416, "logits/rejected": -3.1009953022003174, "logps/chosen": -53.6750602722168, "logps/rejected": -670.4879760742188, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -0.027074813842773438, "rewards/margins": 6.25496768951416, "rewards/rejected": -6.282042503356934, "step": 14250 }, { "epoch": 0.17, "learning_rate": 4.9242593485500115e-06, "logits/chosen": -3.178760051727295, "logits/rejected": -3.1202120780944824, "logps/chosen": -42.260040283203125, "logps/rejected": -647.569580078125, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": 0.06230132654309273, "rewards/margins": 6.142647743225098, "rewards/rejected": -6.080346584320068, "step": 14260 }, { "epoch": 0.17, "learning_rate": 4.924003952802469e-06, "logits/chosen": -3.196441411972046, "logits/rejected": -3.1464669704437256, "logps/chosen": -32.43235778808594, "logps/rejected": -591.1517944335938, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": 0.14863839745521545, "rewards/margins": 5.67508602142334, "rewards/rejected": -5.526447772979736, "step": 14270 }, { "epoch": 0.17, "learning_rate": 4.923748133830569e-06, "logits/chosen": -3.1506834030151367, "logits/rejected": -3.1161386966705322, "logps/chosen": -36.96425247192383, "logps/rejected": -439.42852783203125, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.11647462844848633, "rewards/margins": 4.137101173400879, "rewards/rejected": -4.020626544952393, "step": 14280 }, { "epoch": 0.17, "learning_rate": 4.923491891678976e-06, "logits/chosen": -3.1866981983184814, "logits/rejected": -3.137766122817993, "logps/chosen": -30.137821197509766, "logps/rejected": -659.1605224609375, "loss": 0.0647, "rewards/accuracies": 1.0, "rewards/chosen": 0.1386864334344864, "rewards/margins": 6.345513820648193, "rewards/rejected": -6.2068281173706055, "step": 14290 }, { "epoch": 0.17, "learning_rate": 4.923235226392431e-06, "logits/chosen": -3.159902334213257, "logits/rejected": -3.0834410190582275, "logps/chosen": -28.73647689819336, "logps/rejected": -591.6033325195312, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": 0.16132691502571106, "rewards/margins": 5.679320335388184, "rewards/rejected": -5.517993927001953, "step": 14300 }, { "epoch": 0.17, "learning_rate": 4.922978138015744e-06, "logits/chosen": -3.1465861797332764, "logits/rejected": -3.0921871662139893, "logps/chosen": -52.12541961669922, "logps/rejected": -707.2283935546875, "loss": 0.0903, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.006999397184699774, "rewards/margins": 6.6750383377075195, "rewards/rejected": -6.682037353515625, "step": 14310 }, { "epoch": 0.17, "learning_rate": 4.922720626593805e-06, "logits/chosen": -3.1752450466156006, "logits/rejected": -3.146899938583374, "logps/chosen": -31.877309799194336, "logps/rejected": -595.9527587890625, "loss": 0.1072, "rewards/accuracies": 1.0, "rewards/chosen": 0.14459364116191864, "rewards/margins": 5.715510845184326, "rewards/rejected": -5.570916652679443, "step": 14320 }, { "epoch": 0.17, "learning_rate": 4.922462692171572e-06, "logits/chosen": -3.168900489807129, "logits/rejected": -3.0956943035125732, "logps/chosen": -38.0590705871582, "logps/rejected": -524.6567993164062, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": 0.05674268677830696, "rewards/margins": 4.917331695556641, "rewards/rejected": -4.860589027404785, "step": 14330 }, { "epoch": 0.17, "learning_rate": 4.922204334794083e-06, "logits/chosen": -3.171718120574951, "logits/rejected": -3.107480525970459, "logps/chosen": -35.87409591674805, "logps/rejected": -511.4478454589844, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": 0.129564106464386, "rewards/margins": 4.8626933097839355, "rewards/rejected": -4.73313045501709, "step": 14340 }, { "epoch": 0.17, "learning_rate": 4.921945554506443e-06, "logits/chosen": -3.171581506729126, "logits/rejected": -3.1027190685272217, "logps/chosen": -43.159812927246094, "logps/rejected": -548.2054443359375, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": 0.12992092967033386, "rewards/margins": 5.223067283630371, "rewards/rejected": -5.093146800994873, "step": 14350 }, { "epoch": 0.17, "learning_rate": 4.921686351353836e-06, "logits/chosen": -3.203439235687256, "logits/rejected": -3.1069514751434326, "logps/chosen": -47.17833709716797, "logps/rejected": -820.3699340820312, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": 0.10618986934423447, "rewards/margins": 7.897686004638672, "rewards/rejected": -7.791495323181152, "step": 14360 }, { "epoch": 0.17, "learning_rate": 4.921426725381518e-06, "logits/chosen": -3.1611733436584473, "logits/rejected": -3.128204107284546, "logps/chosen": -28.58383560180664, "logps/rejected": -543.5706787109375, "loss": 0.111, "rewards/accuracies": 1.0, "rewards/chosen": 0.15467287600040436, "rewards/margins": 5.207942962646484, "rewards/rejected": -5.053269863128662, "step": 14370 }, { "epoch": 0.17, "learning_rate": 4.92116667663482e-06, "logits/chosen": -3.1416666507720947, "logits/rejected": -3.116878032684326, "logps/chosen": -18.5484619140625, "logps/rejected": -484.26104736328125, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": 0.1825370341539383, "rewards/margins": 4.6569085121154785, "rewards/rejected": -4.474370956420898, "step": 14380 }, { "epoch": 0.17, "learning_rate": 4.920906205159144e-06, "logits/chosen": -3.161940336227417, "logits/rejected": -3.0906717777252197, "logps/chosen": -49.585487365722656, "logps/rejected": -662.853759765625, "loss": 0.1004, "rewards/accuracies": 1.0, "rewards/chosen": 0.051678914576768875, "rewards/margins": 6.270909786224365, "rewards/rejected": -6.219231605529785, "step": 14390 }, { "epoch": 0.17, "learning_rate": 4.920645310999968e-06, "logits/chosen": -3.1948819160461426, "logits/rejected": -3.107774019241333, "logps/chosen": -40.66168975830078, "logps/rejected": -707.6748657226562, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": 0.10896766185760498, "rewards/margins": 6.791510105133057, "rewards/rejected": -6.682541847229004, "step": 14400 }, { "epoch": 0.17, "learning_rate": 4.920383994202845e-06, "logits/chosen": -3.189239263534546, "logits/rejected": -3.1531920433044434, "logps/chosen": -35.26319122314453, "logps/rejected": -449.75177001953125, "loss": 0.1361, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.04022375866770744, "rewards/margins": 4.157414436340332, "rewards/rejected": -4.117190837860107, "step": 14410 }, { "epoch": 0.17, "learning_rate": 4.9201222548133974e-06, "logits/chosen": -3.1578259468078613, "logits/rejected": -3.122429132461548, "logps/chosen": -69.68577575683594, "logps/rejected": -625.7257690429688, "loss": 0.1056, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.23598606884479523, "rewards/margins": 5.634339809417725, "rewards/rejected": -5.870326042175293, "step": 14420 }, { "epoch": 0.17, "learning_rate": 4.919860092877327e-06, "logits/chosen": -3.147552967071533, "logits/rejected": -3.0811104774475098, "logps/chosen": -50.491722106933594, "logps/rejected": -559.2677001953125, "loss": 0.097, "rewards/accuracies": 1.0, "rewards/chosen": -0.02183621935546398, "rewards/margins": 5.191379547119141, "rewards/rejected": -5.2132158279418945, "step": 14430 }, { "epoch": 0.17, "learning_rate": 4.919597508440404e-06, "logits/chosen": -3.1748030185699463, "logits/rejected": -3.1389708518981934, "logps/chosen": -28.591699600219727, "logps/rejected": -562.27880859375, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": 0.12611058354377747, "rewards/margins": 5.373482704162598, "rewards/rejected": -5.247370719909668, "step": 14440 }, { "epoch": 0.17, "learning_rate": 4.919334501548477e-06, "logits/chosen": -3.167412281036377, "logits/rejected": -3.0833823680877686, "logps/chosen": -50.56570816040039, "logps/rejected": -508.0267639160156, "loss": 0.1277, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.011378651484847069, "rewards/margins": 4.681733131408691, "rewards/rejected": -4.693112373352051, "step": 14450 }, { "epoch": 0.17, "learning_rate": 4.919071072247465e-06, "logits/chosen": -3.1698756217956543, "logits/rejected": -3.119664430618286, "logps/chosen": -28.259063720703125, "logps/rejected": -587.3001098632812, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": 0.16492655873298645, "rewards/margins": 5.6540117263793945, "rewards/rejected": -5.4890851974487305, "step": 14460 }, { "epoch": 0.17, "learning_rate": 4.918807220583362e-06, "logits/chosen": -3.1301181316375732, "logits/rejected": -3.1276681423187256, "logps/chosen": -44.189266204833984, "logps/rejected": -389.64398193359375, "loss": 0.1101, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03824678808450699, "rewards/margins": 3.4919979572296143, "rewards/rejected": -3.530244827270508, "step": 14470 }, { "epoch": 0.17, "learning_rate": 4.918542946602237e-06, "logits/chosen": -3.119384527206421, "logits/rejected": -3.057596206665039, "logps/chosen": -84.75960540771484, "logps/rejected": -884.5174560546875, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -0.25883275270462036, "rewards/margins": 8.147706031799316, "rewards/rejected": -8.406538009643555, "step": 14480 }, { "epoch": 0.17, "learning_rate": 4.918278250350232e-06, "logits/chosen": -3.14668607711792, "logits/rejected": -3.081705331802368, "logps/chosen": -41.02906799316406, "logps/rejected": -705.4519653320312, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": 0.11323167383670807, "rewards/margins": 6.763374328613281, "rewards/rejected": -6.650142669677734, "step": 14490 }, { "epoch": 0.17, "learning_rate": 4.918013131873559e-06, "logits/chosen": -3.147071123123169, "logits/rejected": -3.092329502105713, "logps/chosen": -29.864206314086914, "logps/rejected": -659.3204345703125, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": 0.15632599592208862, "rewards/margins": 6.357617378234863, "rewards/rejected": -6.201291084289551, "step": 14500 }, { "epoch": 0.17, "learning_rate": 4.917747591218509e-06, "logits/chosen": -3.1728336811065674, "logits/rejected": -3.1172492504119873, "logps/chosen": -33.0706901550293, "logps/rejected": -605.5607299804688, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": 0.1495358645915985, "rewards/margins": 5.824159622192383, "rewards/rejected": -5.674623489379883, "step": 14510 }, { "epoch": 0.17, "learning_rate": 4.917481628431445e-06, "logits/chosen": -3.135164499282837, "logits/rejected": -3.0335612297058105, "logps/chosen": -39.50450897216797, "logps/rejected": -609.6271362304688, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": 0.10225869715213776, "rewards/margins": 5.796441078186035, "rewards/rejected": -5.694182395935059, "step": 14520 }, { "epoch": 0.17, "learning_rate": 4.917215243558804e-06, "logits/chosen": -3.16471004486084, "logits/rejected": -3.0923051834106445, "logps/chosen": -61.16179275512695, "logps/rejected": -654.6366577148438, "loss": 0.0681, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09848548471927643, "rewards/margins": 6.05037784576416, "rewards/rejected": -6.148863792419434, "step": 14530 }, { "epoch": 0.17, "learning_rate": 4.916948436647094e-06, "logits/chosen": -3.1429924964904785, "logits/rejected": -3.115574359893799, "logps/chosen": -51.060699462890625, "logps/rejected": -331.1549377441406, "loss": 0.1843, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1320277452468872, "rewards/margins": 2.804924964904785, "rewards/rejected": -2.936952590942383, "step": 14540 }, { "epoch": 0.17, "learning_rate": 4.9166812077428996e-06, "logits/chosen": -3.1841092109680176, "logits/rejected": -3.157684087753296, "logps/chosen": -43.078426361083984, "logps/rejected": -516.7182006835938, "loss": 0.1366, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03540315106511116, "rewards/margins": 4.763210296630859, "rewards/rejected": -4.798613548278809, "step": 14550 }, { "epoch": 0.17, "learning_rate": 4.916413556892878e-06, "logits/chosen": -3.159045696258545, "logits/rejected": -3.1137211322784424, "logps/chosen": -36.651668548583984, "logps/rejected": -553.9561767578125, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 0.1012897714972496, "rewards/margins": 5.247736930847168, "rewards/rejected": -5.146448135375977, "step": 14560 }, { "epoch": 0.17, "learning_rate": 4.9161454841437615e-06, "logits/chosen": -3.161299705505371, "logits/rejected": -3.1139700412750244, "logps/chosen": -36.56755828857422, "logps/rejected": -591.8712158203125, "loss": 0.1014, "rewards/accuracies": 1.0, "rewards/chosen": 0.12455949932336807, "rewards/margins": 5.661162376403809, "rewards/rejected": -5.536602973937988, "step": 14570 }, { "epoch": 0.17, "learning_rate": 4.9158769895423544e-06, "logits/chosen": -3.1772141456604004, "logits/rejected": -3.1459972858428955, "logps/chosen": -75.15302276611328, "logps/rejected": -539.5202026367188, "loss": 0.0521, "rewards/accuracies": 1.0, "rewards/chosen": -0.3041275143623352, "rewards/margins": 4.69123649597168, "rewards/rejected": -4.995364189147949, "step": 14580 }, { "epoch": 0.17, "learning_rate": 4.915608073135534e-06, "logits/chosen": -3.1620447635650635, "logits/rejected": -3.0645923614501953, "logps/chosen": -56.58850860595703, "logps/rejected": -778.7830200195312, "loss": 0.1835, "rewards/accuracies": 1.0, "rewards/chosen": -0.04902054741978645, "rewards/margins": 7.349863529205322, "rewards/rejected": -7.398882865905762, "step": 14590 }, { "epoch": 0.17, "learning_rate": 4.915338734970253e-06, "logits/chosen": -3.1238746643066406, "logits/rejected": -3.076448440551758, "logps/chosen": -40.94762420654297, "logps/rejected": -585.3919067382812, "loss": 0.1213, "rewards/accuracies": 1.0, "rewards/chosen": -0.009292256087064743, "rewards/margins": 5.459877967834473, "rewards/rejected": -5.469170093536377, "step": 14600 }, { "epoch": 0.17, "learning_rate": 4.915068975093537e-06, "logits/chosen": -3.141490936279297, "logits/rejected": -3.102107048034668, "logps/chosen": -137.35179138183594, "logps/rejected": -644.2761840820312, "loss": 0.1637, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9123002886772156, "rewards/margins": 5.138925075531006, "rewards/rejected": -6.051225662231445, "step": 14610 }, { "epoch": 0.18, "learning_rate": 4.914798793552487e-06, "logits/chosen": -3.161463499069214, "logits/rejected": -3.085761547088623, "logps/chosen": -129.50167846679688, "logps/rejected": -623.9315185546875, "loss": 0.1018, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8028029203414917, "rewards/margins": 5.040809631347656, "rewards/rejected": -5.843613147735596, "step": 14620 }, { "epoch": 0.18, "learning_rate": 4.914528190394274e-06, "logits/chosen": -3.1616623401641846, "logits/rejected": -3.101456880569458, "logps/chosen": -89.12628173828125, "logps/rejected": -598.6466064453125, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": -0.45225945115089417, "rewards/margins": 5.145230293273926, "rewards/rejected": -5.597489833831787, "step": 14630 }, { "epoch": 0.18, "learning_rate": 4.914257165666143e-06, "logits/chosen": -3.154310703277588, "logits/rejected": -3.091848850250244, "logps/chosen": -43.42493438720703, "logps/rejected": -529.3931884765625, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": 0.027075190097093582, "rewards/margins": 4.938172817230225, "rewards/rejected": -4.911098003387451, "step": 14640 }, { "epoch": 0.18, "learning_rate": 4.913985719415418e-06, "logits/chosen": -3.1851115226745605, "logits/rejected": -3.0923266410827637, "logps/chosen": -55.40129470825195, "logps/rejected": -618.0278930664062, "loss": 0.1094, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1001548171043396, "rewards/margins": 5.691956996917725, "rewards/rejected": -5.792111873626709, "step": 14650 }, { "epoch": 0.18, "learning_rate": 4.91371385168949e-06, "logits/chosen": -3.189840793609619, "logits/rejected": -3.1554653644561768, "logps/chosen": -41.83476257324219, "logps/rejected": -517.5072021484375, "loss": 0.1163, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03971210494637489, "rewards/margins": 4.7670769691467285, "rewards/rejected": -4.806788444519043, "step": 14660 }, { "epoch": 0.18, "learning_rate": 4.913441562535828e-06, "logits/chosen": -3.119405508041382, "logits/rejected": -3.081697940826416, "logps/chosen": -34.51887130737305, "logps/rejected": -563.3347778320312, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": 0.1074480190873146, "rewards/margins": 5.356621742248535, "rewards/rejected": -5.249173164367676, "step": 14670 }, { "epoch": 0.18, "learning_rate": 4.913168852001973e-06, "logits/chosen": -3.150486469268799, "logits/rejected": -3.1147713661193848, "logps/chosen": -40.58406066894531, "logps/rejected": -656.2095336914062, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": 0.08305664360523224, "rewards/margins": 6.239696025848389, "rewards/rejected": -6.15664005279541, "step": 14680 }, { "epoch": 0.18, "learning_rate": 4.912895720135538e-06, "logits/chosen": -3.1679539680480957, "logits/rejected": -3.08933424949646, "logps/chosen": -43.80580520629883, "logps/rejected": -541.2579345703125, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": 0.04218965768814087, "rewards/margins": 5.055184841156006, "rewards/rejected": -5.012995719909668, "step": 14690 }, { "epoch": 0.18, "learning_rate": 4.912622166984211e-06, "logits/chosen": -3.146742582321167, "logits/rejected": -3.118443489074707, "logps/chosen": -34.699729919433594, "logps/rejected": -523.924072265625, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": 0.07916953414678574, "rewards/margins": 4.937603950500488, "rewards/rejected": -4.858433723449707, "step": 14700 }, { "epoch": 0.18, "learning_rate": 4.912348192595755e-06, "logits/chosen": -3.1786746978759766, "logits/rejected": -3.09600567817688, "logps/chosen": -117.76473236083984, "logps/rejected": -783.20263671875, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -0.6308873891830444, "rewards/margins": 6.803765296936035, "rewards/rejected": -7.434651851654053, "step": 14710 }, { "epoch": 0.18, "learning_rate": 4.912073797018004e-06, "logits/chosen": -3.1428158283233643, "logits/rejected": -3.043429136276245, "logps/chosen": -128.05361938476562, "logps/rejected": -756.5829467773438, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -0.7766329050064087, "rewards/margins": 6.3852972984313965, "rewards/rejected": -7.161930084228516, "step": 14720 }, { "epoch": 0.18, "learning_rate": 4.911798980298868e-06, "logits/chosen": -3.1596896648406982, "logits/rejected": -3.0563645362854004, "logps/chosen": -115.1688003540039, "logps/rejected": -610.3869018554688, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -0.5906474590301514, "rewards/margins": 5.1265549659729, "rewards/rejected": -5.717202186584473, "step": 14730 }, { "epoch": 0.18, "learning_rate": 4.911523742486329e-06, "logits/chosen": -3.1698575019836426, "logits/rejected": -3.083491325378418, "logps/chosen": -58.188575744628906, "logps/rejected": -645.287841796875, "loss": 0.105, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.05714669078588486, "rewards/margins": 6.007320404052734, "rewards/rejected": -6.064467430114746, "step": 14740 }, { "epoch": 0.18, "learning_rate": 4.911248083628441e-06, "logits/chosen": -3.1741459369659424, "logits/rejected": -3.119123935699463, "logps/chosen": -59.531837463378906, "logps/rejected": -792.6278076171875, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -0.03009256161749363, "rewards/margins": 7.494200229644775, "rewards/rejected": -7.524293422698975, "step": 14750 }, { "epoch": 0.18, "learning_rate": 4.910972003773336e-06, "logits/chosen": -3.151012897491455, "logits/rejected": -3.0765557289123535, "logps/chosen": -60.94230270385742, "logps/rejected": -650.0736083984375, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -0.11878243833780289, "rewards/margins": 5.983905792236328, "rewards/rejected": -6.102687835693359, "step": 14760 }, { "epoch": 0.18, "learning_rate": 4.910695502969215e-06, "logits/chosen": -3.1675302982330322, "logits/rejected": -3.1054580211639404, "logps/chosen": -68.56558227539062, "logps/rejected": -673.953125, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -0.23613771796226501, "rewards/margins": 6.09614372253418, "rewards/rejected": -6.332281112670898, "step": 14770 }, { "epoch": 0.18, "learning_rate": 4.910418581264353e-06, "logits/chosen": -3.1223835945129395, "logits/rejected": -3.0398030281066895, "logps/chosen": -54.05394744873047, "logps/rejected": -686.3969116210938, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": -0.059531278908252716, "rewards/margins": 6.399545669555664, "rewards/rejected": -6.459076881408691, "step": 14780 }, { "epoch": 0.18, "learning_rate": 4.910141238707104e-06, "logits/chosen": -3.1681370735168457, "logits/rejected": -3.1096363067626953, "logps/chosen": -37.89513397216797, "logps/rejected": -564.3531494140625, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": 0.11952431499958038, "rewards/margins": 5.383787631988525, "rewards/rejected": -5.264263153076172, "step": 14790 }, { "epoch": 0.18, "learning_rate": 4.909863475345886e-06, "logits/chosen": -3.1135382652282715, "logits/rejected": -3.050572395324707, "logps/chosen": -74.39756774902344, "logps/rejected": -538.3228759765625, "loss": 0.1753, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21716241538524628, "rewards/margins": 4.787868022918701, "rewards/rejected": -5.005029678344727, "step": 14800 }, { "epoch": 0.18, "learning_rate": 4.9095852912292e-06, "logits/chosen": -3.1495773792266846, "logits/rejected": -3.1047425270080566, "logps/chosen": -61.520301818847656, "logps/rejected": -575.6317138671875, "loss": 0.1086, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18154527246952057, "rewards/margins": 5.192004203796387, "rewards/rejected": -5.373549461364746, "step": 14810 }, { "epoch": 0.18, "learning_rate": 4.909306686405614e-06, "logits/chosen": -3.1595935821533203, "logits/rejected": -3.124020576477051, "logps/chosen": -56.6912727355957, "logps/rejected": -576.4613647460938, "loss": 0.1108, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.13984599709510803, "rewards/margins": 5.233975410461426, "rewards/rejected": -5.373820781707764, "step": 14820 }, { "epoch": 0.18, "learning_rate": 4.909027660923772e-06, "logits/chosen": -3.1425042152404785, "logits/rejected": -3.0783047676086426, "logps/chosen": -79.12733459472656, "logps/rejected": -757.7935791015625, "loss": 0.1086, "rewards/accuracies": 1.0, "rewards/chosen": -0.2466195821762085, "rewards/margins": 6.930994510650635, "rewards/rejected": -7.177613258361816, "step": 14830 }, { "epoch": 0.18, "learning_rate": 4.908748214832391e-06, "logits/chosen": -3.1724162101745605, "logits/rejected": -3.1217103004455566, "logps/chosen": -51.38048553466797, "logps/rejected": -595.13037109375, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -0.0508095845580101, "rewards/margins": 5.49892520904541, "rewards/rejected": -5.549735069274902, "step": 14840 }, { "epoch": 0.18, "learning_rate": 4.908468348180262e-06, "logits/chosen": -3.153008222579956, "logits/rejected": -3.083536148071289, "logps/chosen": -73.09598541259766, "logps/rejected": -921.4358520507812, "loss": 0.0834, "rewards/accuracies": 1.0, "rewards/chosen": -0.23884522914886475, "rewards/margins": 8.569042205810547, "rewards/rejected": -8.807887077331543, "step": 14850 }, { "epoch": 0.18, "learning_rate": 4.908188061016249e-06, "logits/chosen": -3.142482280731201, "logits/rejected": -3.1047592163085938, "logps/chosen": -36.6960563659668, "logps/rejected": -648.9052734375, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": 0.10464143753051758, "rewards/margins": 6.203155517578125, "rewards/rejected": -6.098513603210449, "step": 14860 }, { "epoch": 0.18, "learning_rate": 4.907907353389289e-06, "logits/chosen": -3.1798033714294434, "logits/rejected": -3.119655132293701, "logps/chosen": -49.602596282958984, "logps/rejected": -615.7669677734375, "loss": 0.1115, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.009365895763039589, "rewards/margins": 5.769257545471191, "rewards/rejected": -5.759891510009766, "step": 14870 }, { "epoch": 0.18, "learning_rate": 4.907626225348392e-06, "logits/chosen": -3.159541368484497, "logits/rejected": -3.114898204803467, "logps/chosen": -28.64434814453125, "logps/rejected": -506.76617431640625, "loss": 0.1994, "rewards/accuracies": 1.0, "rewards/chosen": 0.11026464402675629, "rewards/margins": 4.7936320304870605, "rewards/rejected": -4.683367729187012, "step": 14880 }, { "epoch": 0.18, "learning_rate": 4.907344676942643e-06, "logits/chosen": -3.1764254570007324, "logits/rejected": -3.1266837120056152, "logps/chosen": -31.514766693115234, "logps/rejected": -682.14404296875, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": 0.1762995421886444, "rewards/margins": 6.606133460998535, "rewards/rejected": -6.42983341217041, "step": 14890 }, { "epoch": 0.18, "learning_rate": 4.9070627082212e-06, "logits/chosen": -3.1566953659057617, "logits/rejected": -3.093592643737793, "logps/chosen": -40.04852294921875, "logps/rejected": -543.4224243164062, "loss": 0.0835, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.03560640662908554, "rewards/margins": 5.088615417480469, "rewards/rejected": -5.053009510040283, "step": 14900 }, { "epoch": 0.18, "learning_rate": 4.906780319233293e-06, "logits/chosen": -3.142735242843628, "logits/rejected": -3.095334529876709, "logps/chosen": -48.384254455566406, "logps/rejected": -495.193603515625, "loss": 0.0701, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.05339600518345833, "rewards/margins": 4.514452934265137, "rewards/rejected": -4.567849159240723, "step": 14910 }, { "epoch": 0.18, "learning_rate": 4.906497510028227e-06, "logits/chosen": -3.172273635864258, "logits/rejected": -3.108397960662842, "logps/chosen": -32.515525817871094, "logps/rejected": -597.1349487304688, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": 0.12811891734600067, "rewards/margins": 5.70166540145874, "rewards/rejected": -5.573546409606934, "step": 14920 }, { "epoch": 0.18, "learning_rate": 4.906214280655381e-06, "logits/chosen": -3.150240421295166, "logits/rejected": -3.1512436866760254, "logps/chosen": -19.727779388427734, "logps/rejected": -345.1706237792969, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.1466280221939087, "rewards/margins": 3.237107515335083, "rewards/rejected": -3.0904793739318848, "step": 14930 }, { "epoch": 0.18, "learning_rate": 4.905930631164202e-06, "logits/chosen": -3.146979570388794, "logits/rejected": -3.045912981033325, "logps/chosen": -71.2306137084961, "logps/rejected": -820.7092895507812, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.13079674541950226, "rewards/margins": 7.65737771987915, "rewards/rejected": -7.788174629211426, "step": 14940 }, { "epoch": 0.18, "learning_rate": 4.905646561604219e-06, "logits/chosen": -3.1577858924865723, "logits/rejected": -3.0305423736572266, "logps/chosen": -42.214622497558594, "logps/rejected": -723.5906982421875, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": 0.09147638082504272, "rewards/margins": 6.922987937927246, "rewards/rejected": -6.831511497497559, "step": 14950 }, { "epoch": 0.18, "learning_rate": 4.9053620720250266e-06, "logits/chosen": -3.1459107398986816, "logits/rejected": -3.112750291824341, "logps/chosen": -69.04917907714844, "logps/rejected": -546.0399780273438, "loss": 0.1832, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2147875726222992, "rewards/margins": 4.865909576416016, "rewards/rejected": -5.080697059631348, "step": 14960 }, { "epoch": 0.18, "learning_rate": 4.9050771624762974e-06, "logits/chosen": -3.111204147338867, "logits/rejected": -3.0415000915527344, "logps/chosen": -41.91514205932617, "logps/rejected": -582.6924438476562, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": 0.11854080855846405, "rewards/margins": 5.54892110824585, "rewards/rejected": -5.430380344390869, "step": 14970 }, { "epoch": 0.18, "learning_rate": 4.904791833007775e-06, "logits/chosen": -3.1438727378845215, "logits/rejected": -3.0983524322509766, "logps/chosen": -43.09523010253906, "logps/rejected": -565.5391845703125, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": 0.07270724326372147, "rewards/margins": 5.326157569885254, "rewards/rejected": -5.253450393676758, "step": 14980 }, { "epoch": 0.18, "learning_rate": 4.904506083669278e-06, "logits/chosen": -3.147977590560913, "logits/rejected": -3.0813074111938477, "logps/chosen": -61.131927490234375, "logps/rejected": -523.8931884765625, "loss": 0.099, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1318889856338501, "rewards/margins": 4.716760635375977, "rewards/rejected": -4.848649024963379, "step": 14990 }, { "epoch": 0.18, "learning_rate": 4.904219914510697e-06, "logits/chosen": -3.134021282196045, "logits/rejected": -3.103984832763672, "logps/chosen": -30.80672836303711, "logps/rejected": -443.4254455566406, "loss": 0.1206, "rewards/accuracies": 1.0, "rewards/chosen": 0.11002789437770844, "rewards/margins": 4.167117118835449, "rewards/rejected": -4.057089328765869, "step": 15000 }, { "epoch": 0.18, "eval_logits/chosen": -3.1765849590301514, "eval_logits/rejected": -3.0801970958709717, "eval_logps/chosen": -153.2669677734375, "eval_logps/rejected": -927.61181640625, "eval_loss": 0.003026393475010991, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.9208669662475586, "eval_rewards/margins": 7.887996673583984, "eval_rewards/rejected": -8.808863639831543, "eval_runtime": 1.2163, "eval_samples_per_second": 4.111, "eval_steps_per_second": 2.466, "step": 15000 }, { "epoch": 0.18, "learning_rate": 4.903933325581997e-06, "logits/chosen": -3.1533150672912598, "logits/rejected": -3.110010862350464, "logps/chosen": -42.755775451660156, "logps/rejected": -693.0447998046875, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": 0.0592985637485981, "rewards/margins": 6.599720001220703, "rewards/rejected": -6.5404205322265625, "step": 15010 }, { "epoch": 0.18, "learning_rate": 4.903646316933214e-06, "logits/chosen": -3.1632659435272217, "logits/rejected": -3.110487461090088, "logps/chosen": -28.444299697875977, "logps/rejected": -505.8743591308594, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 0.12212026119232178, "rewards/margins": 4.822232246398926, "rewards/rejected": -4.700112819671631, "step": 15020 }, { "epoch": 0.18, "learning_rate": 4.90335888861446e-06, "logits/chosen": -3.1667652130126953, "logits/rejected": -3.104628086090088, "logps/chosen": -33.395755767822266, "logps/rejected": -626.1278686523438, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": 0.11464317888021469, "rewards/margins": 5.997931957244873, "rewards/rejected": -5.883288860321045, "step": 15030 }, { "epoch": 0.18, "learning_rate": 4.903071040675919e-06, "logits/chosen": -3.1492459774017334, "logits/rejected": -3.0931084156036377, "logps/chosen": -39.300148010253906, "logps/rejected": -658.5166625976562, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": 0.03464770317077637, "rewards/margins": 6.226067543029785, "rewards/rejected": -6.191420078277588, "step": 15040 }, { "epoch": 0.18, "learning_rate": 4.90278277316785e-06, "logits/chosen": -3.159151554107666, "logits/rejected": -3.0608766078948975, "logps/chosen": -52.2041015625, "logps/rejected": -734.1940307617188, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.028837617486715317, "rewards/margins": 6.909107208251953, "rewards/rejected": -6.9379448890686035, "step": 15050 }, { "epoch": 0.18, "learning_rate": 4.902494086140581e-06, "logits/chosen": -3.1667819023132324, "logits/rejected": -3.1191458702087402, "logps/chosen": -46.840171813964844, "logps/rejected": -790.0462646484375, "loss": 0.1341, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.008355086669325829, "rewards/margins": 7.503514289855957, "rewards/rejected": -7.511868953704834, "step": 15060 }, { "epoch": 0.18, "learning_rate": 4.902204979644517e-06, "logits/chosen": -3.1160378456115723, "logits/rejected": -3.090754747390747, "logps/chosen": -25.898366928100586, "logps/rejected": -596.4559326171875, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": 0.1778106838464737, "rewards/margins": 5.770631790161133, "rewards/rejected": -5.59282112121582, "step": 15070 }, { "epoch": 0.18, "learning_rate": 4.901915453730136e-06, "logits/chosen": -3.1815707683563232, "logits/rejected": -3.0989818572998047, "logps/chosen": -74.65040588378906, "logps/rejected": -682.1405029296875, "loss": 0.1618, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.25894299149513245, "rewards/margins": 6.173645973205566, "rewards/rejected": -6.43258810043335, "step": 15080 }, { "epoch": 0.18, "learning_rate": 4.901625508447988e-06, "logits/chosen": -3.14874529838562, "logits/rejected": -3.062688112258911, "logps/chosen": -63.90918731689453, "logps/rejected": -961.8997192382812, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": -0.03787462040781975, "rewards/margins": 9.155659675598145, "rewards/rejected": -9.193532943725586, "step": 15090 }, { "epoch": 0.18, "learning_rate": 4.901335143848696e-06, "logits/chosen": -3.117079734802246, "logits/rejected": -3.0785727500915527, "logps/chosen": -43.349632263183594, "logps/rejected": -520.298828125, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": 0.10214344412088394, "rewards/margins": 4.905397415161133, "rewards/rejected": -4.803253650665283, "step": 15100 }, { "epoch": 0.18, "learning_rate": 4.901044359982957e-06, "logits/chosen": -3.123401165008545, "logits/rejected": -3.116396427154541, "logps/chosen": -39.82402801513672, "logps/rejected": -670.9645385742188, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": 0.06653617322444916, "rewards/margins": 6.384037494659424, "rewards/rejected": -6.317500591278076, "step": 15110 }, { "epoch": 0.18, "learning_rate": 4.900753156901542e-06, "logits/chosen": -3.1559243202209473, "logits/rejected": -3.105116367340088, "logps/chosen": -31.61195945739746, "logps/rejected": -647.86962890625, "loss": 0.1127, "rewards/accuracies": 1.0, "rewards/chosen": 0.13882626593112946, "rewards/margins": 6.2422099113464355, "rewards/rejected": -6.103384017944336, "step": 15120 }, { "epoch": 0.18, "learning_rate": 4.900461534655293e-06, "logits/chosen": -3.1560797691345215, "logits/rejected": -3.112797737121582, "logps/chosen": -47.08884048461914, "logps/rejected": -603.1235961914062, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": -0.04588315635919571, "rewards/margins": 5.6001691818237305, "rewards/rejected": -5.64605188369751, "step": 15130 }, { "epoch": 0.18, "learning_rate": 4.900169493295127e-06, "logits/chosen": -3.1197619438171387, "logits/rejected": -3.0503361225128174, "logps/chosen": -59.13898468017578, "logps/rejected": -722.3500366210938, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -0.12072189152240753, "rewards/margins": 6.706154823303223, "rewards/rejected": -6.826876163482666, "step": 15140 }, { "epoch": 0.18, "learning_rate": 4.899877032872035e-06, "logits/chosen": -3.1682586669921875, "logits/rejected": -3.120013475418091, "logps/chosen": -83.21649169921875, "logps/rejected": -569.6531982421875, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": -0.4178538918495178, "rewards/margins": 4.893689155578613, "rewards/rejected": -5.311542510986328, "step": 15150 }, { "epoch": 0.18, "learning_rate": 4.899584153437077e-06, "logits/chosen": -3.1504156589508057, "logits/rejected": -3.0817675590515137, "logps/chosen": -83.58470916748047, "logps/rejected": -679.8099975585938, "loss": 0.0982, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3389491140842438, "rewards/margins": 6.058157920837402, "rewards/rejected": -6.397107124328613, "step": 15160 }, { "epoch": 0.18, "learning_rate": 4.8992908550413905e-06, "logits/chosen": -3.123365640640259, "logits/rejected": -3.0504047870635986, "logps/chosen": -44.50920867919922, "logps/rejected": -722.140625, "loss": 0.103, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.03358561545610428, "rewards/margins": 6.871729850769043, "rewards/rejected": -6.838143825531006, "step": 15170 }, { "epoch": 0.18, "learning_rate": 4.898997137736185e-06, "logits/chosen": -3.130737066268921, "logits/rejected": -3.0851454734802246, "logps/chosen": -38.41469192504883, "logps/rejected": -657.29345703125, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": 0.09532518684864044, "rewards/margins": 6.280001640319824, "rewards/rejected": -6.184676170349121, "step": 15180 }, { "epoch": 0.18, "learning_rate": 4.898703001572742e-06, "logits/chosen": -3.1690711975097656, "logits/rejected": -3.096403121948242, "logps/chosen": -30.972881317138672, "logps/rejected": -571.245361328125, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": 0.11797109991312027, "rewards/margins": 5.443126201629639, "rewards/rejected": -5.325154781341553, "step": 15190 }, { "epoch": 0.18, "learning_rate": 4.898408446602417e-06, "logits/chosen": -3.1468939781188965, "logits/rejected": -3.0609774589538574, "logps/chosen": -77.79597473144531, "logps/rejected": -691.0799560546875, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": -0.30905982851982117, "rewards/margins": 6.209054946899414, "rewards/rejected": -6.518115043640137, "step": 15200 }, { "epoch": 0.18, "learning_rate": 4.898113472876639e-06, "logits/chosen": -3.1221718788146973, "logits/rejected": -3.079296112060547, "logps/chosen": -42.885780334472656, "logps/rejected": -512.9865112304688, "loss": 0.1013, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.005604092963039875, "rewards/margins": 4.75321102142334, "rewards/rejected": -4.758814811706543, "step": 15210 }, { "epoch": 0.18, "learning_rate": 4.897818080446908e-06, "logits/chosen": -3.1537487506866455, "logits/rejected": -3.1357784271240234, "logps/chosen": -37.92072296142578, "logps/rejected": -530.5318603515625, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": 0.05075167864561081, "rewards/margins": 4.9764323234558105, "rewards/rejected": -4.925680637359619, "step": 15220 }, { "epoch": 0.18, "learning_rate": 4.8975222693648005e-06, "logits/chosen": -3.184831142425537, "logits/rejected": -3.1339852809906006, "logps/chosen": -44.986576080322266, "logps/rejected": -578.8250732421875, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": 0.028898421674966812, "rewards/margins": 5.4366044998168945, "rewards/rejected": -5.407706260681152, "step": 15230 }, { "epoch": 0.18, "learning_rate": 4.897226039681964e-06, "logits/chosen": -3.137350559234619, "logits/rejected": -3.049861192703247, "logps/chosen": -39.34386444091797, "logps/rejected": -696.7230224609375, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": 0.10107157379388809, "rewards/margins": 6.675038814544678, "rewards/rejected": -6.573966979980469, "step": 15240 }, { "epoch": 0.18, "learning_rate": 4.8969293914501185e-06, "logits/chosen": -3.1555397510528564, "logits/rejected": -3.0858585834503174, "logps/chosen": -47.06906509399414, "logps/rejected": -796.1209716796875, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": 0.0731208324432373, "rewards/margins": 7.6423211097717285, "rewards/rejected": -7.569200038909912, "step": 15250 }, { "epoch": 0.18, "learning_rate": 4.896632324721058e-06, "logits/chosen": -3.1871769428253174, "logits/rejected": -3.10526704788208, "logps/chosen": -33.178016662597656, "logps/rejected": -596.1642456054688, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": 0.11756940931081772, "rewards/margins": 5.708528995513916, "rewards/rejected": -5.590959548950195, "step": 15260 }, { "epoch": 0.18, "learning_rate": 4.89633483954665e-06, "logits/chosen": -3.127462387084961, "logits/rejected": -3.0494396686553955, "logps/chosen": -59.04790115356445, "logps/rejected": -804.2071533203125, "loss": 0.0732, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03667232394218445, "rewards/margins": 7.59851598739624, "rewards/rejected": -7.635189056396484, "step": 15270 }, { "epoch": 0.18, "learning_rate": 4.896036935978835e-06, "logits/chosen": -3.171056032180786, "logits/rejected": -3.1063523292541504, "logps/chosen": -58.47124099731445, "logps/rejected": -751.1170654296875, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": -0.11382336914539337, "rewards/margins": 6.989326477050781, "rewards/rejected": -7.103148460388184, "step": 15280 }, { "epoch": 0.18, "learning_rate": 4.895738614069626e-06, "logits/chosen": -3.1470227241516113, "logits/rejected": -3.0843112468719482, "logps/chosen": -38.17351531982422, "logps/rejected": -852.86279296875, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": 0.16809795796871185, "rewards/margins": 8.307161331176758, "rewards/rejected": -8.139063835144043, "step": 15290 }, { "epoch": 0.18, "learning_rate": 4.895439873871108e-06, "logits/chosen": -3.1605024337768555, "logits/rejected": -3.0728371143341064, "logps/chosen": -52.56926345825195, "logps/rejected": -557.8384399414062, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": 0.12825021147727966, "rewards/margins": 5.306988716125488, "rewards/rejected": -5.178738117218018, "step": 15300 }, { "epoch": 0.18, "learning_rate": 4.895140715435442e-06, "logits/chosen": -3.173778533935547, "logits/rejected": -3.1367366313934326, "logps/chosen": -26.2192440032959, "logps/rejected": -567.250732421875, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": 0.16278842091560364, "rewards/margins": 5.444039344787598, "rewards/rejected": -5.281250953674316, "step": 15310 }, { "epoch": 0.18, "learning_rate": 4.8948411388148585e-06, "logits/chosen": -3.1409544944763184, "logits/rejected": -3.101893186569214, "logps/chosen": -42.62284469604492, "logps/rejected": -780.9970092773438, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": 0.13027267158031464, "rewards/margins": 7.538506507873535, "rewards/rejected": -7.408233642578125, "step": 15320 }, { "epoch": 0.18, "learning_rate": 4.8945411440616645e-06, "logits/chosen": -3.1883630752563477, "logits/rejected": -3.148550271987915, "logps/chosen": -31.359508514404297, "logps/rejected": -570.81201171875, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": 0.08264069259166718, "rewards/margins": 5.410569667816162, "rewards/rejected": -5.327929496765137, "step": 15330 }, { "epoch": 0.18, "learning_rate": 4.8942407312282365e-06, "logits/chosen": -3.1691763401031494, "logits/rejected": -3.127876043319702, "logps/chosen": -55.88579559326172, "logps/rejected": -596.8201904296875, "loss": 0.1042, "rewards/accuracies": 1.0, "rewards/chosen": -0.049890533089637756, "rewards/margins": 5.5253825187683105, "rewards/rejected": -5.575272560119629, "step": 15340 }, { "epoch": 0.18, "learning_rate": 4.893939900367027e-06, "logits/chosen": -3.185025691986084, "logits/rejected": -3.142540454864502, "logps/chosen": -40.978179931640625, "logps/rejected": -555.8955078125, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": 0.08185496926307678, "rewards/margins": 5.249696731567383, "rewards/rejected": -5.167841911315918, "step": 15350 }, { "epoch": 0.18, "learning_rate": 4.893638651530559e-06, "logits/chosen": -3.1670572757720947, "logits/rejected": -3.1143975257873535, "logps/chosen": -21.325376510620117, "logps/rejected": -494.12567138671875, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": 0.1618923395872116, "rewards/margins": 4.735358238220215, "rewards/rejected": -4.5734663009643555, "step": 15360 }, { "epoch": 0.18, "learning_rate": 4.893336984771432e-06, "logits/chosen": -3.1282267570495605, "logits/rejected": -3.072648525238037, "logps/chosen": -45.240840911865234, "logps/rejected": -905.4305419921875, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": 0.11423607170581818, "rewards/margins": 8.767005920410156, "rewards/rejected": -8.652769088745117, "step": 15370 }, { "epoch": 0.18, "learning_rate": 4.893034900142313e-06, "logits/chosen": -3.1808276176452637, "logits/rejected": -3.073216676712036, "logps/chosen": -51.3402214050293, "logps/rejected": -681.4022216796875, "loss": 0.0887, "rewards/accuracies": 1.0, "rewards/chosen": 0.026625698432326317, "rewards/margins": 6.438396453857422, "rewards/rejected": -6.411770820617676, "step": 15380 }, { "epoch": 0.18, "learning_rate": 4.892732397695948e-06, "logits/chosen": -3.1572976112365723, "logits/rejected": -3.148041248321533, "logps/chosen": -23.532188415527344, "logps/rejected": -564.0489501953125, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": 0.19867651164531708, "rewards/margins": 5.465985298156738, "rewards/rejected": -5.267308235168457, "step": 15390 }, { "epoch": 0.18, "learning_rate": 4.892429477485151e-06, "logits/chosen": -3.150987148284912, "logits/rejected": -3.1103427410125732, "logps/chosen": -25.481826782226562, "logps/rejected": -624.1829833984375, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": 0.19498547911643982, "rewards/margins": 6.062540531158447, "rewards/rejected": -5.867554664611816, "step": 15400 }, { "epoch": 0.18, "learning_rate": 4.892126139562813e-06, "logits/chosen": -3.1484341621398926, "logits/rejected": -3.064572334289551, "logps/chosen": -41.02313232421875, "logps/rejected": -787.1710205078125, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": 0.08049391210079193, "rewards/margins": 7.556789398193359, "rewards/rejected": -7.476296424865723, "step": 15410 }, { "epoch": 0.18, "learning_rate": 4.891822383981894e-06, "logits/chosen": -3.1496076583862305, "logits/rejected": -3.074410915374756, "logps/chosen": -80.40386199951172, "logps/rejected": -744.273193359375, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -0.31177279353141785, "rewards/margins": 6.736390590667725, "rewards/rejected": -7.048163414001465, "step": 15420 }, { "epoch": 0.18, "learning_rate": 4.8915182107954295e-06, "logits/chosen": -3.194631338119507, "logits/rejected": -3.1669509410858154, "logps/chosen": -32.473182678222656, "logps/rejected": -582.6435546875, "loss": 0.0507, "rewards/accuracies": 1.0, "rewards/chosen": 0.1243402361869812, "rewards/margins": 5.569275856018066, "rewards/rejected": -5.4449357986450195, "step": 15430 }, { "epoch": 0.18, "learning_rate": 4.891213620056528e-06, "logits/chosen": -3.152470350265503, "logits/rejected": -3.096992015838623, "logps/chosen": -35.80450439453125, "logps/rejected": -694.472900390625, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": 0.10317827761173248, "rewards/margins": 6.662447929382324, "rewards/rejected": -6.559269905090332, "step": 15440 }, { "epoch": 0.18, "learning_rate": 4.89090861181837e-06, "logits/chosen": -3.173755645751953, "logits/rejected": -3.105454206466675, "logps/chosen": -48.002323150634766, "logps/rejected": -626.3610229492188, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 0.019051432609558105, "rewards/margins": 5.890015125274658, "rewards/rejected": -5.8709635734558105, "step": 15450 }, { "epoch": 0.19, "learning_rate": 4.890603186134209e-06, "logits/chosen": -3.15588641166687, "logits/rejected": -3.0998356342315674, "logps/chosen": -34.96489715576172, "logps/rejected": -514.7250366210938, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": 0.1041632741689682, "rewards/margins": 4.87233829498291, "rewards/rejected": -4.7681756019592285, "step": 15460 }, { "epoch": 0.19, "learning_rate": 4.890297343057371e-06, "logits/chosen": -3.1769657135009766, "logits/rejected": -3.1221208572387695, "logps/chosen": -36.660057067871094, "logps/rejected": -731.6959228515625, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": 0.10482928901910782, "rewards/margins": 7.027749538421631, "rewards/rejected": -6.922920227050781, "step": 15470 }, { "epoch": 0.19, "learning_rate": 4.889991082641256e-06, "logits/chosen": -3.178886890411377, "logits/rejected": -3.1381022930145264, "logps/chosen": -47.0615348815918, "logps/rejected": -481.37164306640625, "loss": 0.1214, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.04095853120088577, "rewards/margins": 4.394172668457031, "rewards/rejected": -4.435130596160889, "step": 15480 }, { "epoch": 0.19, "learning_rate": 4.889684404939335e-06, "logits/chosen": -3.1420388221740723, "logits/rejected": -3.0403785705566406, "logps/chosen": -163.12762451171875, "logps/rejected": -798.1761474609375, "loss": 0.1118, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9882842898368835, "rewards/margins": 6.591742515563965, "rewards/rejected": -7.5800275802612305, "step": 15490 }, { "epoch": 0.19, "learning_rate": 4.889377310005154e-06, "logits/chosen": -3.120640277862549, "logits/rejected": -3.0850605964660645, "logps/chosen": -43.36335372924805, "logps/rejected": -766.97802734375, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": 0.04962228983640671, "rewards/margins": 7.326128959655762, "rewards/rejected": -7.2765069007873535, "step": 15500 }, { "epoch": 0.19, "learning_rate": 4.889069797892332e-06, "logits/chosen": -3.158094882965088, "logits/rejected": -3.0766940116882324, "logps/chosen": -40.21588897705078, "logps/rejected": -688.0167236328125, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": 0.12750259041786194, "rewards/margins": 6.622446537017822, "rewards/rejected": -6.494944095611572, "step": 15510 }, { "epoch": 0.19, "learning_rate": 4.888761868654558e-06, "logits/chosen": -3.1642324924468994, "logits/rejected": -3.1345858573913574, "logps/chosen": -32.84929275512695, "logps/rejected": -459.4874572753906, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": 0.10972459614276886, "rewards/margins": 4.332505702972412, "rewards/rejected": -4.222781181335449, "step": 15520 }, { "epoch": 0.19, "learning_rate": 4.888453522345596e-06, "logits/chosen": -3.140349864959717, "logits/rejected": -3.0625643730163574, "logps/chosen": -37.40100860595703, "logps/rejected": -547.69140625, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": 0.08773161470890045, "rewards/margins": 5.1782402992248535, "rewards/rejected": -5.090508460998535, "step": 15530 }, { "epoch": 0.19, "learning_rate": 4.888144759019283e-06, "logits/chosen": -3.1669082641601562, "logits/rejected": -3.089496612548828, "logps/chosen": -31.530712127685547, "logps/rejected": -668.7940673828125, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": 0.13899517059326172, "rewards/margins": 6.43356466293335, "rewards/rejected": -6.294569969177246, "step": 15540 }, { "epoch": 0.19, "learning_rate": 4.887835578729528e-06, "logits/chosen": -3.185171604156494, "logits/rejected": -3.104766845703125, "logps/chosen": -51.4660530090332, "logps/rejected": -748.7342529296875, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -0.05901684612035751, "rewards/margins": 7.035630226135254, "rewards/rejected": -7.094647407531738, "step": 15550 }, { "epoch": 0.19, "learning_rate": 4.887525981530312e-06, "logits/chosen": -3.147001028060913, "logits/rejected": -3.115159511566162, "logps/chosen": -26.79632568359375, "logps/rejected": -616.4183349609375, "loss": 0.1031, "rewards/accuracies": 1.0, "rewards/chosen": 0.18770304322242737, "rewards/margins": 5.967029571533203, "rewards/rejected": -5.77932596206665, "step": 15560 }, { "epoch": 0.19, "learning_rate": 4.8872159674756916e-06, "logits/chosen": -3.155823230743408, "logits/rejected": -3.093107223510742, "logps/chosen": -42.71467208862305, "logps/rejected": -687.1590576171875, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": 0.040696609765291214, "rewards/margins": 6.515360355377197, "rewards/rejected": -6.474664211273193, "step": 15570 }, { "epoch": 0.19, "learning_rate": 4.886905536619793e-06, "logits/chosen": -3.1770358085632324, "logits/rejected": -3.158890962600708, "logps/chosen": -41.84922409057617, "logps/rejected": -518.439697265625, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": 0.013006201013922691, "rewards/margins": 4.814307689666748, "rewards/rejected": -4.801301956176758, "step": 15580 }, { "epoch": 0.19, "learning_rate": 4.886594689016817e-06, "logits/chosen": -3.1335954666137695, "logits/rejected": -3.0762550830841064, "logps/chosen": -47.57613754272461, "logps/rejected": -668.373046875, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -0.027349408715963364, "rewards/margins": 6.266204833984375, "rewards/rejected": -6.293554306030273, "step": 15590 }, { "epoch": 0.19, "learning_rate": 4.886283424721037e-06, "logits/chosen": -3.1401970386505127, "logits/rejected": -3.1077237129211426, "logps/chosen": -20.735761642456055, "logps/rejected": -488.98101806640625, "loss": 0.1032, "rewards/accuracies": 1.0, "rewards/chosen": 0.1775725930929184, "rewards/margins": 4.697035312652588, "rewards/rejected": -4.519463062286377, "step": 15600 }, { "epoch": 0.19, "learning_rate": 4.8859717437867996e-06, "logits/chosen": -3.1321969032287598, "logits/rejected": -3.051785945892334, "logps/chosen": -53.42213821411133, "logps/rejected": -601.2047119140625, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": -0.08403442800045013, "rewards/margins": 5.54284143447876, "rewards/rejected": -5.626875400543213, "step": 15610 }, { "epoch": 0.19, "learning_rate": 4.8856596462685205e-06, "logits/chosen": -3.1518783569335938, "logits/rejected": -3.0764167308807373, "logps/chosen": -111.47891998291016, "logps/rejected": -589.715087890625, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": -0.6405360102653503, "rewards/margins": 4.8519816398620605, "rewards/rejected": -5.492517948150635, "step": 15620 }, { "epoch": 0.19, "learning_rate": 4.885347132220694e-06, "logits/chosen": -3.156978130340576, "logits/rejected": -3.131150484085083, "logps/chosen": -83.84461975097656, "logps/rejected": -666.131591796875, "loss": 0.0815, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.40203362703323364, "rewards/margins": 5.861329078674316, "rewards/rejected": -6.263362407684326, "step": 15630 }, { "epoch": 0.19, "learning_rate": 4.885034201697884e-06, "logits/chosen": -3.191600799560547, "logits/rejected": -3.150380849838257, "logps/chosen": -59.60455322265625, "logps/rejected": -471.4588317871094, "loss": 0.1118, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10497970879077911, "rewards/margins": 4.223999977111816, "rewards/rejected": -4.328979015350342, "step": 15640 }, { "epoch": 0.19, "learning_rate": 4.884720854754726e-06, "logits/chosen": -3.1902356147766113, "logits/rejected": -3.1534061431884766, "logps/chosen": -31.507488250732422, "logps/rejected": -399.076416015625, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": 0.08152109384536743, "rewards/margins": 3.690936326980591, "rewards/rejected": -3.609415054321289, "step": 15650 }, { "epoch": 0.19, "learning_rate": 4.88440709144593e-06, "logits/chosen": -3.1423487663269043, "logits/rejected": -3.0306828022003174, "logps/chosen": -50.08372497558594, "logps/rejected": -618.2691040039062, "loss": 0.102, "rewards/accuracies": 1.0, "rewards/chosen": -0.022510254755616188, "rewards/margins": 5.768208026885986, "rewards/rejected": -5.7907185554504395, "step": 15660 }, { "epoch": 0.19, "learning_rate": 4.884092911826278e-06, "logits/chosen": -3.148435115814209, "logits/rejected": -3.057133436203003, "logps/chosen": -121.74253845214844, "logps/rejected": -846.9871215820312, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -0.6318223476409912, "rewards/margins": 7.430360317230225, "rewards/rejected": -8.062183380126953, "step": 15670 }, { "epoch": 0.19, "learning_rate": 4.883778315950625e-06, "logits/chosen": -3.17358136177063, "logits/rejected": -3.063924789428711, "logps/chosen": -88.3887710571289, "logps/rejected": -800.2996826171875, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -0.3737362027168274, "rewards/margins": 7.232382774353027, "rewards/rejected": -7.606118679046631, "step": 15680 }, { "epoch": 0.19, "learning_rate": 4.883463303873899e-06, "logits/chosen": -3.1356823444366455, "logits/rejected": -3.094754457473755, "logps/chosen": -63.1111946105957, "logps/rejected": -553.7711791992188, "loss": 0.1154, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1947030872106552, "rewards/margins": 4.946249961853027, "rewards/rejected": -5.140953540802002, "step": 15690 }, { "epoch": 0.19, "learning_rate": 4.8831478756511e-06, "logits/chosen": -3.137869358062744, "logits/rejected": -3.0356698036193848, "logps/chosen": -48.74258804321289, "logps/rejected": -522.7994384765625, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": -0.015355115756392479, "rewards/margins": 4.815393447875977, "rewards/rejected": -4.830748558044434, "step": 15700 }, { "epoch": 0.19, "learning_rate": 4.882832031337301e-06, "logits/chosen": -3.162867784500122, "logits/rejected": -3.080712080001831, "logps/chosen": -59.329200744628906, "logps/rejected": -510.6568298339844, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -0.15505637228488922, "rewards/margins": 4.567564010620117, "rewards/rejected": -4.722620964050293, "step": 15710 }, { "epoch": 0.19, "learning_rate": 4.882515770987647e-06, "logits/chosen": -3.170619487762451, "logits/rejected": -3.1141304969787598, "logps/chosen": -40.16536331176758, "logps/rejected": -591.7473754882812, "loss": 0.0757, "rewards/accuracies": 1.0, "rewards/chosen": 0.05340675264596939, "rewards/margins": 5.591139793395996, "rewards/rejected": -5.537733554840088, "step": 15720 }, { "epoch": 0.19, "learning_rate": 4.882199094657357e-06, "logits/chosen": -3.1595115661621094, "logits/rejected": -3.1053309440612793, "logps/chosen": -36.30593490600586, "logps/rejected": -485.37957763671875, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": 0.10186117887496948, "rewards/margins": 4.567033767700195, "rewards/rejected": -4.46517276763916, "step": 15730 }, { "epoch": 0.19, "learning_rate": 4.88188200240172e-06, "logits/chosen": -3.184736728668213, "logits/rejected": -3.0955193042755127, "logps/chosen": -83.2617416381836, "logps/rejected": -874.3800659179688, "loss": 0.1787, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.22198286652565002, "rewards/margins": 8.103306770324707, "rewards/rejected": -8.325288772583008, "step": 15740 }, { "epoch": 0.19, "learning_rate": 4.881564494276102e-06, "logits/chosen": -3.2003014087677, "logits/rejected": -3.12792706489563, "logps/chosen": -28.29319190979004, "logps/rejected": -492.44305419921875, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": 0.1775808036327362, "rewards/margins": 4.735182762145996, "rewards/rejected": -4.5576019287109375, "step": 15750 }, { "epoch": 0.19, "learning_rate": 4.881246570335938e-06, "logits/chosen": -3.1534078121185303, "logits/rejected": -3.0148446559906006, "logps/chosen": -70.94640350341797, "logps/rejected": -551.1796875, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -0.14870354533195496, "rewards/margins": 4.966867923736572, "rewards/rejected": -5.11557149887085, "step": 15760 }, { "epoch": 0.19, "learning_rate": 4.880928230636735e-06, "logits/chosen": -3.1344172954559326, "logits/rejected": -3.033677577972412, "logps/chosen": -71.23292541503906, "logps/rejected": -710.0458984375, "loss": 0.1831, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.25572505593299866, "rewards/margins": 6.4530463218688965, "rewards/rejected": -6.708771705627441, "step": 15770 }, { "epoch": 0.19, "learning_rate": 4.880609475234078e-06, "logits/chosen": -3.154463768005371, "logits/rejected": -3.1126580238342285, "logps/chosen": -57.364410400390625, "logps/rejected": -610.0470581054688, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -0.09167047590017319, "rewards/margins": 5.625302314758301, "rewards/rejected": -5.716973304748535, "step": 15780 }, { "epoch": 0.19, "learning_rate": 4.880290304183617e-06, "logits/chosen": -3.1388323307037354, "logits/rejected": -3.0837197303771973, "logps/chosen": -64.24287414550781, "logps/rejected": -724.7796630859375, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -0.1198551282286644, "rewards/margins": 6.710700988769531, "rewards/rejected": -6.8305559158325195, "step": 15790 }, { "epoch": 0.19, "learning_rate": 4.87997071754108e-06, "logits/chosen": -3.1475701332092285, "logits/rejected": -3.085294246673584, "logps/chosen": -42.42572784423828, "logps/rejected": -728.4552612304688, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": 0.04486975818872452, "rewards/margins": 6.9304327964782715, "rewards/rejected": -6.885562896728516, "step": 15800 }, { "epoch": 0.19, "learning_rate": 4.879650715362266e-06, "logits/chosen": -3.1679346561431885, "logits/rejected": -3.0766282081604004, "logps/chosen": -57.19700241088867, "logps/rejected": -863.1217041015625, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.02107379026710987, "rewards/margins": 8.20044231414795, "rewards/rejected": -8.221515655517578, "step": 15810 }, { "epoch": 0.19, "learning_rate": 4.879330297703046e-06, "logits/chosen": -3.1377882957458496, "logits/rejected": -3.0896801948547363, "logps/chosen": -65.99337005615234, "logps/rejected": -428.72515869140625, "loss": 0.1512, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.25293052196502686, "rewards/margins": 3.6556010246276855, "rewards/rejected": -3.908531904220581, "step": 15820 }, { "epoch": 0.19, "learning_rate": 4.879009464619364e-06, "logits/chosen": -3.175264835357666, "logits/rejected": -3.1224629878997803, "logps/chosen": -53.95049285888672, "logps/rejected": -705.8134155273438, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -0.06845740973949432, "rewards/margins": 6.608053684234619, "rewards/rejected": -6.676510810852051, "step": 15830 }, { "epoch": 0.19, "learning_rate": 4.878688216167237e-06, "logits/chosen": -3.1570277214050293, "logits/rejected": -3.070587635040283, "logps/chosen": -34.59328079223633, "logps/rejected": -753.09716796875, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": 0.11452578008174896, "rewards/margins": 7.257192134857178, "rewards/rejected": -7.142667293548584, "step": 15840 }, { "epoch": 0.19, "learning_rate": 4.878366552402755e-06, "logits/chosen": -3.160583972930908, "logits/rejected": -3.105670213699341, "logps/chosen": -55.37437057495117, "logps/rejected": -680.58056640625, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -0.051817797124385834, "rewards/margins": 6.369781970977783, "rewards/rejected": -6.421599388122559, "step": 15850 }, { "epoch": 0.19, "learning_rate": 4.878044473382078e-06, "logits/chosen": -3.177833080291748, "logits/rejected": -3.1475412845611572, "logps/chosen": -22.393091201782227, "logps/rejected": -482.3992614746094, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": 0.1601211428642273, "rewards/margins": 4.610510349273682, "rewards/rejected": -4.450389862060547, "step": 15860 }, { "epoch": 0.19, "learning_rate": 4.87772197916144e-06, "logits/chosen": -3.1471009254455566, "logits/rejected": -3.1121644973754883, "logps/chosen": -45.96525955200195, "logps/rejected": -448.20654296875, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": -0.08922681957483292, "rewards/margins": 4.032618045806885, "rewards/rejected": -4.121844291687012, "step": 15870 }, { "epoch": 0.19, "learning_rate": 4.8773990697971495e-06, "logits/chosen": -3.20019268989563, "logits/rejected": -3.1382012367248535, "logps/chosen": -83.70686340332031, "logps/rejected": -670.5665283203125, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -0.3564659357070923, "rewards/margins": 5.940618991851807, "rewards/rejected": -6.297084808349609, "step": 15880 }, { "epoch": 0.19, "learning_rate": 4.877075745345584e-06, "logits/chosen": -3.166916608810425, "logits/rejected": -3.075885057449341, "logps/chosen": -101.5950927734375, "logps/rejected": -822.9488525390625, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -0.46703559160232544, "rewards/margins": 7.363729000091553, "rewards/rejected": -7.830763816833496, "step": 15890 }, { "epoch": 0.19, "learning_rate": 4.876752005863195e-06, "logits/chosen": -3.1883387565612793, "logits/rejected": -3.1440064907073975, "logps/chosen": -40.205101013183594, "logps/rejected": -523.593505859375, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": -0.012948369607329369, "rewards/margins": 4.847807884216309, "rewards/rejected": -4.860755920410156, "step": 15900 }, { "epoch": 0.19, "learning_rate": 4.876427851406506e-06, "logits/chosen": -3.1945672035217285, "logits/rejected": -3.1540026664733887, "logps/chosen": -29.34486961364746, "logps/rejected": -547.8211669921875, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": 0.13066107034683228, "rewards/margins": 5.23344612121582, "rewards/rejected": -5.102785587310791, "step": 15910 }, { "epoch": 0.19, "learning_rate": 4.8761032820321166e-06, "logits/chosen": -3.1506779193878174, "logits/rejected": -3.090395212173462, "logps/chosen": -40.06767272949219, "logps/rejected": -531.2303466796875, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": 0.06334346532821655, "rewards/margins": 4.994755268096924, "rewards/rejected": -4.9314117431640625, "step": 15920 }, { "epoch": 0.19, "learning_rate": 4.875778297796692e-06, "logits/chosen": -3.1883983612060547, "logits/rejected": -3.1550660133361816, "logps/chosen": -50.98564147949219, "logps/rejected": -635.0465087890625, "loss": 0.0979, "rewards/accuracies": 1.0, "rewards/chosen": -0.03994157165288925, "rewards/margins": 5.918920993804932, "rewards/rejected": -5.958863258361816, "step": 15930 }, { "epoch": 0.19, "learning_rate": 4.875452898756976e-06, "logits/chosen": -3.132256031036377, "logits/rejected": -3.1000680923461914, "logps/chosen": -33.63882064819336, "logps/rejected": -568.22412109375, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": 0.07486025989055634, "rewards/margins": 5.370248317718506, "rewards/rejected": -5.295388221740723, "step": 15940 }, { "epoch": 0.19, "learning_rate": 4.8751270849697804e-06, "logits/chosen": -3.1693084239959717, "logits/rejected": -3.1142983436584473, "logps/chosen": -43.0334587097168, "logps/rejected": -760.40625, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": 0.061563193798065186, "rewards/margins": 7.27042293548584, "rewards/rejected": -7.208859920501709, "step": 15950 }, { "epoch": 0.19, "learning_rate": 4.874800856491993e-06, "logits/chosen": -3.1568422317504883, "logits/rejected": -3.076056718826294, "logps/chosen": -49.75410079956055, "logps/rejected": -657.4065551757812, "loss": 0.0632, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.039235107600688934, "rewards/margins": 6.2313737869262695, "rewards/rejected": -6.192138671875, "step": 15960 }, { "epoch": 0.19, "learning_rate": 4.874474213380572e-06, "logits/chosen": -3.1629436016082764, "logits/rejected": -3.083625078201294, "logps/chosen": -39.85662078857422, "logps/rejected": -720.9432373046875, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": 0.11673638969659805, "rewards/margins": 6.931546688079834, "rewards/rejected": -6.814810276031494, "step": 15970 }, { "epoch": 0.19, "learning_rate": 4.874147155692547e-06, "logits/chosen": -3.1843783855438232, "logits/rejected": -3.1135401725769043, "logps/chosen": -45.49728775024414, "logps/rejected": -880.1268310546875, "loss": 0.0725, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.07703528553247452, "rewards/margins": 8.470199584960938, "rewards/rejected": -8.39316463470459, "step": 15980 }, { "epoch": 0.19, "learning_rate": 4.873819683485024e-06, "logits/chosen": -3.1587626934051514, "logits/rejected": -3.0749526023864746, "logps/chosen": -41.590641021728516, "logps/rejected": -645.7640380859375, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": 0.13670434057712555, "rewards/margins": 6.210751533508301, "rewards/rejected": -6.074046611785889, "step": 15990 }, { "epoch": 0.19, "learning_rate": 4.873491796815176e-06, "logits/chosen": -3.1581478118896484, "logits/rejected": -3.046923875808716, "logps/chosen": -38.04184341430664, "logps/rejected": -646.0827026367188, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": 0.12261392921209335, "rewards/margins": 6.187953948974609, "rewards/rejected": -6.065340042114258, "step": 16000 }, { "epoch": 0.19, "learning_rate": 4.873163495740253e-06, "logits/chosen": -3.177628993988037, "logits/rejected": -3.157021999359131, "logps/chosen": -31.561864852905273, "logps/rejected": -410.44964599609375, "loss": 0.1174, "rewards/accuracies": 1.0, "rewards/chosen": 0.08452124148607254, "rewards/margins": 3.804720640182495, "rewards/rejected": -3.7201991081237793, "step": 16010 }, { "epoch": 0.19, "learning_rate": 4.872834780317574e-06, "logits/chosen": -3.1978933811187744, "logits/rejected": -3.1425092220306396, "logps/chosen": -29.612186431884766, "logps/rejected": -493.75390625, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": 0.12201255559921265, "rewards/margins": 4.685150146484375, "rewards/rejected": -4.563138008117676, "step": 16020 }, { "epoch": 0.19, "learning_rate": 4.8725056506045345e-06, "logits/chosen": -3.1446540355682373, "logits/rejected": -3.097308397293091, "logps/chosen": -47.817481994628906, "logps/rejected": -566.6533203125, "loss": 0.187, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.04740596562623978, "rewards/margins": 5.2204389572143555, "rewards/rejected": -5.267844200134277, "step": 16030 }, { "epoch": 0.19, "learning_rate": 4.872176106658596e-06, "logits/chosen": -3.1580421924591064, "logits/rejected": -3.114100217819214, "logps/chosen": -29.592044830322266, "logps/rejected": -618.3750610351562, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": 0.1461019366979599, "rewards/margins": 5.945827007293701, "rewards/rejected": -5.79972505569458, "step": 16040 }, { "epoch": 0.19, "learning_rate": 4.871846148537299e-06, "logits/chosen": -3.1539502143859863, "logits/rejected": -3.119788646697998, "logps/chosen": -29.494403839111328, "logps/rejected": -524.5513916015625, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": 0.18017375469207764, "rewards/margins": 5.053027629852295, "rewards/rejected": -4.8728532791137695, "step": 16050 }, { "epoch": 0.19, "learning_rate": 4.871515776298252e-06, "logits/chosen": -3.1640374660491943, "logits/rejected": -3.1252241134643555, "logps/chosen": -46.833431243896484, "logps/rejected": -613.2413330078125, "loss": 0.1259, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0009116530418395996, "rewards/margins": 5.740176677703857, "rewards/rejected": -5.739264488220215, "step": 16060 }, { "epoch": 0.19, "learning_rate": 4.871184989999136e-06, "logits/chosen": -3.124878406524658, "logits/rejected": -3.059934139251709, "logps/chosen": -41.859596252441406, "logps/rejected": -805.6376342773438, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": 0.1379290521144867, "rewards/margins": 7.780691623687744, "rewards/rejected": -7.642762660980225, "step": 16070 }, { "epoch": 0.19, "learning_rate": 4.870853789697706e-06, "logits/chosen": -3.1823785305023193, "logits/rejected": -3.087794542312622, "logps/chosen": -49.64655685424805, "logps/rejected": -805.9818725585938, "loss": 0.092, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.027745747938752174, "rewards/margins": 7.694012641906738, "rewards/rejected": -7.666266441345215, "step": 16080 }, { "epoch": 0.19, "learning_rate": 4.87052217545179e-06, "logits/chosen": -3.2005202770233154, "logits/rejected": -3.0857577323913574, "logps/chosen": -32.35274124145508, "logps/rejected": -698.21240234375, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": 0.1431305855512619, "rewards/margins": 6.724565029144287, "rewards/rejected": -6.581434726715088, "step": 16090 }, { "epoch": 0.19, "learning_rate": 4.870190147319286e-06, "logits/chosen": -3.1254334449768066, "logits/rejected": -3.0526139736175537, "logps/chosen": -35.54132843017578, "logps/rejected": -644.6602783203125, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": 0.18100954592227936, "rewards/margins": 6.236814022064209, "rewards/rejected": -6.0558037757873535, "step": 16100 }, { "epoch": 0.19, "learning_rate": 4.869857705358165e-06, "logits/chosen": -3.183394432067871, "logits/rejected": -3.1396801471710205, "logps/chosen": -35.76264953613281, "logps/rejected": -546.3657836914062, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": 0.15851907432079315, "rewards/margins": 5.24419641494751, "rewards/rejected": -5.085677146911621, "step": 16110 }, { "epoch": 0.19, "learning_rate": 4.869524849626471e-06, "logits/chosen": -3.1682991981506348, "logits/rejected": -3.077632188796997, "logps/chosen": -47.10828399658203, "logps/rejected": -613.5629272460938, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -0.005281650926917791, "rewards/margins": 5.718369960784912, "rewards/rejected": -5.72365140914917, "step": 16120 }, { "epoch": 0.19, "learning_rate": 4.869191580182319e-06, "logits/chosen": -3.168457508087158, "logits/rejected": -3.1253981590270996, "logps/chosen": -37.75395202636719, "logps/rejected": -667.1129150390625, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": 0.08927857875823975, "rewards/margins": 6.376067638397217, "rewards/rejected": -6.2867889404296875, "step": 16130 }, { "epoch": 0.19, "learning_rate": 4.868857897083897e-06, "logits/chosen": -3.1689062118530273, "logits/rejected": -3.1621718406677246, "logps/chosen": -14.650777816772461, "logps/rejected": -421.20574951171875, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.208038330078125, "rewards/margins": 4.050954818725586, "rewards/rejected": -3.842916488647461, "step": 16140 }, { "epoch": 0.19, "learning_rate": 4.868523800389467e-06, "logits/chosen": -3.173754930496216, "logits/rejected": -3.1038661003112793, "logps/chosen": -49.51701354980469, "logps/rejected": -738.91796875, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": 0.07002189010381699, "rewards/margins": 7.050570487976074, "rewards/rejected": -6.980548858642578, "step": 16150 }, { "epoch": 0.19, "learning_rate": 4.868189290157358e-06, "logits/chosen": -3.136909246444702, "logits/rejected": -2.9946227073669434, "logps/chosen": -148.1281280517578, "logps/rejected": -911.9675903320312, "loss": 0.2134, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8893072009086609, "rewards/margins": 7.8075151443481445, "rewards/rejected": -8.696822166442871, "step": 16160 }, { "epoch": 0.19, "learning_rate": 4.867854366445977e-06, "logits/chosen": -3.1854865550994873, "logits/rejected": -3.1459994316101074, "logps/chosen": -32.479408264160156, "logps/rejected": -488.2454528808594, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": 0.08681520819664001, "rewards/margins": 4.579987525939941, "rewards/rejected": -4.493173122406006, "step": 16170 }, { "epoch": 0.19, "learning_rate": 4.867519029313799e-06, "logits/chosen": -3.1516828536987305, "logits/rejected": -3.0984418392181396, "logps/chosen": -29.0570068359375, "logps/rejected": -512.7659912109375, "loss": 0.078, "rewards/accuracies": 1.0, "rewards/chosen": 0.13302326202392578, "rewards/margins": 4.876846790313721, "rewards/rejected": -4.743824481964111, "step": 16180 }, { "epoch": 0.19, "learning_rate": 4.867183278819375e-06, "logits/chosen": -3.200011730194092, "logits/rejected": -3.1646664142608643, "logps/chosen": -50.927581787109375, "logps/rejected": -372.3172912597656, "loss": 0.1123, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.12059338390827179, "rewards/margins": 3.2405829429626465, "rewards/rejected": -3.3611762523651123, "step": 16190 }, { "epoch": 0.19, "learning_rate": 4.8668471150213245e-06, "logits/chosen": -3.1478214263916016, "logits/rejected": -3.070122241973877, "logps/chosen": -37.71741485595703, "logps/rejected": -672.2525634765625, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": 0.11108577251434326, "rewards/margins": 6.442309379577637, "rewards/rejected": -6.331223487854004, "step": 16200 }, { "epoch": 0.19, "learning_rate": 4.866510537978342e-06, "logits/chosen": -3.1776981353759766, "logits/rejected": -3.1073615550994873, "logps/chosen": -43.83876419067383, "logps/rejected": -719.9510498046875, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": 0.022102704271674156, "rewards/margins": 6.830796718597412, "rewards/rejected": -6.808694362640381, "step": 16210 }, { "epoch": 0.19, "learning_rate": 4.866173547749192e-06, "logits/chosen": -3.141201972961426, "logits/rejected": -3.086796283721924, "logps/chosen": -34.53471755981445, "logps/rejected": -536.3738403320312, "loss": 0.0798, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.10328735411167145, "rewards/margins": 5.085288047790527, "rewards/rejected": -4.982000350952148, "step": 16220 }, { "epoch": 0.19, "learning_rate": 4.865836144392712e-06, "logits/chosen": -3.1901886463165283, "logits/rejected": -3.162087917327881, "logps/chosen": -99.03997802734375, "logps/rejected": -479.8636169433594, "loss": 0.1538, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5377418398857117, "rewards/margins": 3.8865866661071777, "rewards/rejected": -4.424328804016113, "step": 16230 }, { "epoch": 0.19, "learning_rate": 4.865498327967813e-06, "logits/chosen": -3.1547045707702637, "logits/rejected": -3.093447685241699, "logps/chosen": -25.083253860473633, "logps/rejected": -368.37237548828125, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": 0.1457168161869049, "rewards/margins": 3.4589431285858154, "rewards/rejected": -3.3132262229919434, "step": 16240 }, { "epoch": 0.19, "learning_rate": 4.865160098533476e-06, "logits/chosen": -3.1520485877990723, "logits/rejected": -3.0651791095733643, "logps/chosen": -35.42942810058594, "logps/rejected": -713.1740112304688, "loss": 0.0729, "rewards/accuracies": 1.0, "rewards/chosen": 0.17328467965126038, "rewards/margins": 6.915072441101074, "rewards/rejected": -6.741787910461426, "step": 16250 }, { "epoch": 0.19, "learning_rate": 4.864821456148754e-06, "logits/chosen": -3.1577093601226807, "logits/rejected": -3.1066997051239014, "logps/chosen": -54.49275588989258, "logps/rejected": -425.6748046875, "loss": 0.1148, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1169547587633133, "rewards/margins": 3.759545087814331, "rewards/rejected": -3.876499891281128, "step": 16260 }, { "epoch": 0.19, "learning_rate": 4.864482400872773e-06, "logits/chosen": -3.1282668113708496, "logits/rejected": -3.0644707679748535, "logps/chosen": -59.990516662597656, "logps/rejected": -741.5960693359375, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.12751798331737518, "rewards/margins": 6.898342132568359, "rewards/rejected": -7.025859832763672, "step": 16270 }, { "epoch": 0.19, "learning_rate": 4.864142932764733e-06, "logits/chosen": -3.14457631111145, "logits/rejected": -3.1278014183044434, "logps/chosen": -29.991830825805664, "logps/rejected": -421.6188049316406, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": 0.1331424117088318, "rewards/margins": 3.9815871715545654, "rewards/rejected": -3.848445177078247, "step": 16280 }, { "epoch": 0.19, "learning_rate": 4.863803051883903e-06, "logits/chosen": -3.1811811923980713, "logits/rejected": -3.0813465118408203, "logps/chosen": -36.08076858520508, "logps/rejected": -732.2425537109375, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": 0.13947871327400208, "rewards/margins": 7.064180850982666, "rewards/rejected": -6.924701690673828, "step": 16290 }, { "epoch": 0.2, "learning_rate": 4.863462758289624e-06, "logits/chosen": -3.175755262374878, "logits/rejected": -3.0830516815185547, "logps/chosen": -39.78515625, "logps/rejected": -715.6889038085938, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": 0.09292496740818024, "rewards/margins": 6.852581024169922, "rewards/rejected": -6.759655952453613, "step": 16300 }, { "epoch": 0.2, "learning_rate": 4.863122052041313e-06, "logits/chosen": -3.1761441230773926, "logits/rejected": -3.165060520172119, "logps/chosen": -42.48955535888672, "logps/rejected": -502.02020263671875, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": -0.06374015659093857, "rewards/margins": 4.575809001922607, "rewards/rejected": -4.639549255371094, "step": 16310 }, { "epoch": 0.2, "learning_rate": 4.862780933198454e-06, "logits/chosen": -3.179185390472412, "logits/rejected": -3.1140856742858887, "logps/chosen": -37.48882293701172, "logps/rejected": -584.5671997070312, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": 0.15116535127162933, "rewards/margins": 5.609963417053223, "rewards/rejected": -5.458798408508301, "step": 16320 }, { "epoch": 0.2, "learning_rate": 4.8624394018206075e-06, "logits/chosen": -3.172873020172119, "logits/rejected": -3.0874924659729004, "logps/chosen": -63.162208557128906, "logps/rejected": -945.75537109375, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -0.055053602904081345, "rewards/margins": 8.982431411743164, "rewards/rejected": -9.037485122680664, "step": 16330 }, { "epoch": 0.2, "learning_rate": 4.862097457967402e-06, "logits/chosen": -3.1488735675811768, "logits/rejected": -3.1204161643981934, "logps/chosen": -24.65471839904785, "logps/rejected": -562.5435791015625, "loss": 0.0472, "rewards/accuracies": 1.0, "rewards/chosen": 0.16176669299602509, "rewards/margins": 5.416108131408691, "rewards/rejected": -5.254342079162598, "step": 16340 }, { "epoch": 0.2, "learning_rate": 4.861755101698542e-06, "logits/chosen": -3.14989972114563, "logits/rejected": -3.1041293144226074, "logps/chosen": -49.95877456665039, "logps/rejected": -605.9273681640625, "loss": 0.0868, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.05137905478477478, "rewards/margins": 5.6331377029418945, "rewards/rejected": -5.684516429901123, "step": 16350 }, { "epoch": 0.2, "learning_rate": 4.8614123330738e-06, "logits/chosen": -3.155470371246338, "logits/rejected": -3.103842258453369, "logps/chosen": -36.36178970336914, "logps/rejected": -851.328125, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 0.12508977949619293, "rewards/margins": 8.244341850280762, "rewards/rejected": -8.11925220489502, "step": 16360 }, { "epoch": 0.2, "learning_rate": 4.861069152153023e-06, "logits/chosen": -3.1477153301239014, "logits/rejected": -3.1039321422576904, "logps/chosen": -71.29557037353516, "logps/rejected": -630.1414184570312, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": -0.30902156233787537, "rewards/margins": 5.612302303314209, "rewards/rejected": -5.921324253082275, "step": 16370 }, { "epoch": 0.2, "learning_rate": 4.860725558996129e-06, "logits/chosen": -3.185511827468872, "logits/rejected": -3.0876269340515137, "logps/chosen": -50.31862258911133, "logps/rejected": -741.2645263671875, "loss": 0.1683, "rewards/accuracies": 1.0, "rewards/chosen": 0.013769591227173805, "rewards/margins": 7.019471645355225, "rewards/rejected": -7.005702018737793, "step": 16380 }, { "epoch": 0.2, "learning_rate": 4.86038155366311e-06, "logits/chosen": -3.161191463470459, "logits/rejected": -3.1175684928894043, "logps/chosen": -41.26097869873047, "logps/rejected": -479.8804626464844, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": 0.040885914117097855, "rewards/margins": 4.4691362380981445, "rewards/rejected": -4.428250312805176, "step": 16390 }, { "epoch": 0.2, "learning_rate": 4.8600371362140275e-06, "logits/chosen": -3.1472880840301514, "logits/rejected": -3.0839433670043945, "logps/chosen": -60.894508361816406, "logps/rejected": -622.8076782226562, "loss": 0.097, "rewards/accuracies": 1.0, "rewards/chosen": -0.038877155631780624, "rewards/margins": 5.802502632141113, "rewards/rejected": -5.841379642486572, "step": 16400 }, { "epoch": 0.2, "learning_rate": 4.859692306709015e-06, "logits/chosen": -3.1530604362487793, "logits/rejected": -3.078930616378784, "logps/chosen": -48.69226837158203, "logps/rejected": -756.3209228515625, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": 0.02171155996620655, "rewards/margins": 7.185088157653809, "rewards/rejected": -7.163377285003662, "step": 16410 }, { "epoch": 0.2, "learning_rate": 4.85934706520828e-06, "logits/chosen": -3.2003579139709473, "logits/rejected": -3.1689696311950684, "logps/chosen": -55.32683181762695, "logps/rejected": -594.9843139648438, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -0.12973332405090332, "rewards/margins": 5.421778678894043, "rewards/rejected": -5.551511764526367, "step": 16420 }, { "epoch": 0.2, "learning_rate": 4.8590014117721e-06, "logits/chosen": -3.196610927581787, "logits/rejected": -3.0976831912994385, "logps/chosen": -37.8350944519043, "logps/rejected": -534.7763061523438, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 0.07559745758771896, "rewards/margins": 5.04836368560791, "rewards/rejected": -4.9727654457092285, "step": 16430 }, { "epoch": 0.2, "learning_rate": 4.858655346460825e-06, "logits/chosen": -3.1341254711151123, "logits/rejected": -3.0750222206115723, "logps/chosen": -54.97343826293945, "logps/rejected": -611.7008666992188, "loss": 0.1371, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.07306559383869171, "rewards/margins": 5.639357566833496, "rewards/rejected": -5.712424278259277, "step": 16440 }, { "epoch": 0.2, "learning_rate": 4.858308869334878e-06, "logits/chosen": -3.1510071754455566, "logits/rejected": -3.105971574783325, "logps/chosen": -55.92180252075195, "logps/rejected": -699.4464721679688, "loss": 0.1357, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.06006425619125366, "rewards/margins": 6.525815486907959, "rewards/rejected": -6.585879325866699, "step": 16450 }, { "epoch": 0.2, "learning_rate": 4.857961980454753e-06, "logits/chosen": -3.116666078567505, "logits/rejected": -3.0683555603027344, "logps/chosen": -31.311519622802734, "logps/rejected": -449.1358337402344, "loss": 0.1128, "rewards/accuracies": 1.0, "rewards/chosen": 0.11731056123971939, "rewards/margins": 4.235006809234619, "rewards/rejected": -4.117696285247803, "step": 16460 }, { "epoch": 0.2, "learning_rate": 4.8576146798810145e-06, "logits/chosen": -3.15972900390625, "logits/rejected": -3.101811170578003, "logps/chosen": -34.423858642578125, "logps/rejected": -602.1026000976562, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": 0.06596706807613373, "rewards/margins": 5.698686599731445, "rewards/rejected": -5.63271951675415, "step": 16470 }, { "epoch": 0.2, "learning_rate": 4.8572669676743e-06, "logits/chosen": -3.1613829135894775, "logits/rejected": -3.085120916366577, "logps/chosen": -42.7996711730957, "logps/rejected": -612.5595703125, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": 0.06420252472162247, "rewards/margins": 5.797759532928467, "rewards/rejected": -5.733556270599365, "step": 16480 }, { "epoch": 0.2, "learning_rate": 4.8569188438953205e-06, "logits/chosen": -3.170348882675171, "logits/rejected": -3.089789867401123, "logps/chosen": -38.96098709106445, "logps/rejected": -649.5771484375, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": 0.04607591778039932, "rewards/margins": 6.156124114990234, "rewards/rejected": -6.110048770904541, "step": 16490 }, { "epoch": 0.2, "learning_rate": 4.856570308604857e-06, "logits/chosen": -3.1541895866394043, "logits/rejected": -3.074906587600708, "logps/chosen": -48.00603485107422, "logps/rejected": -796.4617309570312, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": 0.03125423938035965, "rewards/margins": 7.588682651519775, "rewards/rejected": -7.55742883682251, "step": 16500 }, { "epoch": 0.2, "learning_rate": 4.856221361863764e-06, "logits/chosen": -3.124847412109375, "logits/rejected": -3.062359094619751, "logps/chosen": -31.245092391967773, "logps/rejected": -547.884033203125, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": 0.1440613865852356, "rewards/margins": 5.243834495544434, "rewards/rejected": -5.09977388381958, "step": 16510 }, { "epoch": 0.2, "learning_rate": 4.855872003732964e-06, "logits/chosen": -3.1659276485443115, "logits/rejected": -3.131749391555786, "logps/chosen": -37.50769805908203, "logps/rejected": -653.8209838867188, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": 0.10523833334445953, "rewards/margins": 6.246894359588623, "rewards/rejected": -6.141655921936035, "step": 16520 }, { "epoch": 0.2, "learning_rate": 4.855522234273456e-06, "logits/chosen": -3.1594338417053223, "logits/rejected": -3.136497974395752, "logps/chosen": -25.434261322021484, "logps/rejected": -504.9434509277344, "loss": 0.1044, "rewards/accuracies": 1.0, "rewards/chosen": 0.1433113068342209, "rewards/margins": 4.822508811950684, "rewards/rejected": -4.679197311401367, "step": 16530 }, { "epoch": 0.2, "learning_rate": 4.8551720535463065e-06, "logits/chosen": -3.1915783882141113, "logits/rejected": -3.1432604789733887, "logps/chosen": -43.61222457885742, "logps/rejected": -686.3488159179688, "loss": 0.1, "rewards/accuracies": 1.0, "rewards/chosen": 0.06292538344860077, "rewards/margins": 6.512739658355713, "rewards/rejected": -6.4498138427734375, "step": 16540 }, { "epoch": 0.2, "learning_rate": 4.854821461612658e-06, "logits/chosen": -3.1649844646453857, "logits/rejected": -3.093905448913574, "logps/chosen": -39.83488464355469, "logps/rejected": -663.4382934570312, "loss": 0.1048, "rewards/accuracies": 1.0, "rewards/chosen": 0.1054743155837059, "rewards/margins": 6.351802825927734, "rewards/rejected": -6.246328353881836, "step": 16550 }, { "epoch": 0.2, "learning_rate": 4.854470458533724e-06, "logits/chosen": -3.1700057983398438, "logits/rejected": -3.1248512268066406, "logps/chosen": -54.76639938354492, "logps/rejected": -615.80859375, "loss": 0.196, "rewards/accuracies": 1.0, "rewards/chosen": -0.0966644436120987, "rewards/margins": 5.670640468597412, "rewards/rejected": -5.76730489730835, "step": 16560 }, { "epoch": 0.2, "learning_rate": 4.854119044370787e-06, "logits/chosen": -3.201906204223633, "logits/rejected": -3.140009880065918, "logps/chosen": -40.6658821105957, "logps/rejected": -591.8394775390625, "loss": 0.1259, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0008446440333500504, "rewards/margins": 5.537274360656738, "rewards/rejected": -5.538119792938232, "step": 16570 }, { "epoch": 0.2, "learning_rate": 4.853767219185203e-06, "logits/chosen": -3.1540820598602295, "logits/rejected": -3.097573757171631, "logps/chosen": -39.887943267822266, "logps/rejected": -658.6297607421875, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": 0.07068192958831787, "rewards/margins": 6.257744789123535, "rewards/rejected": -6.187062740325928, "step": 16580 }, { "epoch": 0.2, "learning_rate": 4.853414983038399e-06, "logits/chosen": -3.182107925415039, "logits/rejected": -3.117422342300415, "logps/chosen": -27.747577667236328, "logps/rejected": -639.6386108398438, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 0.13078299164772034, "rewards/margins": 6.143805503845215, "rewards/rejected": -6.013022422790527, "step": 16590 }, { "epoch": 0.2, "learning_rate": 4.853062335991876e-06, "logits/chosen": -3.176833391189575, "logits/rejected": -3.1036736965179443, "logps/chosen": -56.02842330932617, "logps/rejected": -546.1217041015625, "loss": 0.1192, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1550605148077011, "rewards/margins": 4.931069850921631, "rewards/rejected": -5.086130142211914, "step": 16600 }, { "epoch": 0.2, "learning_rate": 4.852709278107204e-06, "logits/chosen": -3.1608173847198486, "logits/rejected": -3.08717679977417, "logps/chosen": -44.950401306152344, "logps/rejected": -745.6011962890625, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": 0.02166737988591194, "rewards/margins": 7.080528259277344, "rewards/rejected": -7.05886173248291, "step": 16610 }, { "epoch": 0.2, "learning_rate": 4.852355809446027e-06, "logits/chosen": -3.183265209197998, "logits/rejected": -3.115041732788086, "logps/chosen": -50.915977478027344, "logps/rejected": -621.462890625, "loss": 0.0671, "rewards/accuracies": 1.0, "rewards/chosen": -0.047570522874593735, "rewards/margins": 5.763768672943115, "rewards/rejected": -5.811339378356934, "step": 16620 }, { "epoch": 0.2, "learning_rate": 4.852001930070058e-06, "logits/chosen": -3.1921133995056152, "logits/rejected": -3.0752055644989014, "logps/chosen": -101.02598571777344, "logps/rejected": -699.1370239257812, "loss": 0.095, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5048521757125854, "rewards/margins": 6.073426723480225, "rewards/rejected": -6.5782790184021, "step": 16630 }, { "epoch": 0.2, "learning_rate": 4.851647640041086e-06, "logits/chosen": -3.146556854248047, "logits/rejected": -3.0772266387939453, "logps/chosen": -43.11128234863281, "logps/rejected": -657.4725341796875, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 0.09935685992240906, "rewards/margins": 6.271378040313721, "rewards/rejected": -6.17202091217041, "step": 16640 }, { "epoch": 0.2, "learning_rate": 4.851292939420966e-06, "logits/chosen": -3.1771888732910156, "logits/rejected": -3.0949673652648926, "logps/chosen": -65.86910247802734, "logps/rejected": -745.9882202148438, "loss": 0.1663, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1326991468667984, "rewards/margins": 6.919949531555176, "rewards/rejected": -7.052649021148682, "step": 16650 }, { "epoch": 0.2, "learning_rate": 4.85093782827163e-06, "logits/chosen": -3.1600890159606934, "logits/rejected": -3.072841167449951, "logps/chosen": -42.460609436035156, "logps/rejected": -624.2659912109375, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": 0.09549294412136078, "rewards/margins": 5.934191703796387, "rewards/rejected": -5.838698387145996, "step": 16660 }, { "epoch": 0.2, "learning_rate": 4.850582306655078e-06, "logits/chosen": -3.126709461212158, "logits/rejected": -3.073646068572998, "logps/chosen": -31.228235244750977, "logps/rejected": -520.7506103515625, "loss": 0.1259, "rewards/accuracies": 1.0, "rewards/chosen": 0.10548625886440277, "rewards/margins": 4.927950859069824, "rewards/rejected": -4.822464942932129, "step": 16670 }, { "epoch": 0.2, "learning_rate": 4.850226374633384e-06, "logits/chosen": -3.168768882751465, "logits/rejected": -3.1475796699523926, "logps/chosen": -27.24432373046875, "logps/rejected": -434.52850341796875, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": 0.1113658994436264, "rewards/margins": 4.088577747344971, "rewards/rejected": -3.9772117137908936, "step": 16680 }, { "epoch": 0.2, "learning_rate": 4.849870032268693e-06, "logits/chosen": -3.146253824234009, "logits/rejected": -3.0452582836151123, "logps/chosen": -40.5661506652832, "logps/rejected": -681.38916015625, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": 0.11635343730449677, "rewards/margins": 6.524412631988525, "rewards/rejected": -6.408059597015381, "step": 16690 }, { "epoch": 0.2, "learning_rate": 4.84951327962322e-06, "logits/chosen": -3.1933951377868652, "logits/rejected": -3.108760356903076, "logps/chosen": -54.48948287963867, "logps/rejected": -556.223388671875, "loss": 0.1188, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03838653117418289, "rewards/margins": 5.156881809234619, "rewards/rejected": -5.1952691078186035, "step": 16700 }, { "epoch": 0.2, "learning_rate": 4.849156116759255e-06, "logits/chosen": -3.205836772918701, "logits/rejected": -3.1477532386779785, "logps/chosen": -39.0611457824707, "logps/rejected": -620.6361083984375, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": 0.09326710551977158, "rewards/margins": 5.920607566833496, "rewards/rejected": -5.827340602874756, "step": 16710 }, { "epoch": 0.2, "learning_rate": 4.848798543739156e-06, "logits/chosen": -3.2031784057617188, "logits/rejected": -3.121840715408325, "logps/chosen": -55.21317672729492, "logps/rejected": -792.0382690429688, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": 0.04266177490353584, "rewards/margins": 7.549344539642334, "rewards/rejected": -7.506682395935059, "step": 16720 }, { "epoch": 0.2, "learning_rate": 4.848440560625355e-06, "logits/chosen": -3.1767737865448, "logits/rejected": -3.1488497257232666, "logps/chosen": -47.2021484375, "logps/rejected": -663.3807373046875, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -0.009168470278382301, "rewards/margins": 6.214563369750977, "rewards/rejected": -6.22373104095459, "step": 16730 }, { "epoch": 0.2, "learning_rate": 4.848082167480354e-06, "logits/chosen": -3.146554470062256, "logits/rejected": -3.07779598236084, "logps/chosen": -40.378353118896484, "logps/rejected": -610.806640625, "loss": 0.1106, "rewards/accuracies": 1.0, "rewards/chosen": 0.06563086062669754, "rewards/margins": 5.791508674621582, "rewards/rejected": -5.725876808166504, "step": 16740 }, { "epoch": 0.2, "learning_rate": 4.847723364366728e-06, "logits/chosen": -3.1606953144073486, "logits/rejected": -3.093657970428467, "logps/chosen": -33.48169708251953, "logps/rejected": -623.0479736328125, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": 0.08800773322582245, "rewards/margins": 5.931626319885254, "rewards/rejected": -5.843618869781494, "step": 16750 }, { "epoch": 0.2, "learning_rate": 4.847364151347124e-06, "logits/chosen": -3.183302402496338, "logits/rejected": -3.0869879722595215, "logps/chosen": -124.18473052978516, "logps/rejected": -830.1309814453125, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -0.5803592801094055, "rewards/margins": 7.303903102874756, "rewards/rejected": -7.8842620849609375, "step": 16760 }, { "epoch": 0.2, "learning_rate": 4.847004528484258e-06, "logits/chosen": -3.171337604522705, "logits/rejected": -3.122729539871216, "logps/chosen": -62.61492919921875, "logps/rejected": -627.3801879882812, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -0.1866552084684372, "rewards/margins": 5.689207077026367, "rewards/rejected": -5.875862121582031, "step": 16770 }, { "epoch": 0.2, "learning_rate": 4.8466444958409195e-06, "logits/chosen": -3.1744775772094727, "logits/rejected": -3.0456247329711914, "logps/chosen": -75.48477172851562, "logps/rejected": -685.0758056640625, "loss": 0.3023, "rewards/accuracies": 1.0, "rewards/chosen": -0.21635285019874573, "rewards/margins": 6.237942695617676, "rewards/rejected": -6.454296112060547, "step": 16780 }, { "epoch": 0.2, "learning_rate": 4.8462840534799705e-06, "logits/chosen": -3.1934974193573, "logits/rejected": -3.1328372955322266, "logps/chosen": -76.98076629638672, "logps/rejected": -562.2095947265625, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -0.29491910338401794, "rewards/margins": 4.94106388092041, "rewards/rejected": -5.235983371734619, "step": 16790 }, { "epoch": 0.2, "learning_rate": 4.845923201464342e-06, "logits/chosen": -3.134770393371582, "logits/rejected": -3.063253402709961, "logps/chosen": -67.6143798828125, "logps/rejected": -795.0877075195312, "loss": 0.0904, "rewards/accuracies": 1.0, "rewards/chosen": -0.22525015473365784, "rewards/margins": 7.327306270599365, "rewards/rejected": -7.55255651473999, "step": 16800 }, { "epoch": 0.2, "learning_rate": 4.845561939857037e-06, "logits/chosen": -3.1460280418395996, "logits/rejected": -3.0988218784332275, "logps/chosen": -61.71087646484375, "logps/rejected": -650.7293090820312, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -0.14154548943042755, "rewards/margins": 5.9856462478637695, "rewards/rejected": -6.127191543579102, "step": 16810 }, { "epoch": 0.2, "learning_rate": 4.845200268721133e-06, "logits/chosen": -3.1640853881835938, "logits/rejected": -3.0940146446228027, "logps/chosen": -62.74131393432617, "logps/rejected": -872.7682495117188, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -0.1245579868555069, "rewards/margins": 8.201257705688477, "rewards/rejected": -8.325815200805664, "step": 16820 }, { "epoch": 0.2, "learning_rate": 4.844838188119776e-06, "logits/chosen": -3.152036666870117, "logits/rejected": -3.1174089908599854, "logps/chosen": -36.533470153808594, "logps/rejected": -591.6721801757812, "loss": 0.1154, "rewards/accuracies": 1.0, "rewards/chosen": 0.03785043582320213, "rewards/margins": 5.568147659301758, "rewards/rejected": -5.530297756195068, "step": 16830 }, { "epoch": 0.2, "learning_rate": 4.844475698116183e-06, "logits/chosen": -3.1727702617645264, "logits/rejected": -3.1059529781341553, "logps/chosen": -59.39141082763672, "logps/rejected": -707.0677490234375, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -0.10510706901550293, "rewards/margins": 6.570815086364746, "rewards/rejected": -6.675922393798828, "step": 16840 }, { "epoch": 0.2, "learning_rate": 4.844112798773644e-06, "logits/chosen": -3.170642375946045, "logits/rejected": -3.0907325744628906, "logps/chosen": -122.52424621582031, "logps/rejected": -818.0686645507812, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": -0.7647258639335632, "rewards/margins": 7.021692752838135, "rewards/rejected": -7.786418914794922, "step": 16850 }, { "epoch": 0.2, "learning_rate": 4.8437494901555225e-06, "logits/chosen": -3.172257661819458, "logits/rejected": -3.127652883529663, "logps/chosen": -63.334815979003906, "logps/rejected": -659.3649291992188, "loss": 0.0508, "rewards/accuracies": 1.0, "rewards/chosen": -0.15576283633708954, "rewards/margins": 6.042241096496582, "rewards/rejected": -6.198003768920898, "step": 16860 }, { "epoch": 0.2, "learning_rate": 4.8433857723252485e-06, "logits/chosen": -3.1673641204833984, "logits/rejected": -3.1518115997314453, "logps/chosen": -35.12667465209961, "logps/rejected": -480.83270263671875, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": 0.044126223772764206, "rewards/margins": 4.472229957580566, "rewards/rejected": -4.428103923797607, "step": 16870 }, { "epoch": 0.2, "learning_rate": 4.843021645346327e-06, "logits/chosen": -3.171081066131592, "logits/rejected": -3.0656681060791016, "logps/chosen": -72.301025390625, "logps/rejected": -754.4385986328125, "loss": 0.085, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1895979344844818, "rewards/margins": 6.95552921295166, "rewards/rejected": -7.145127296447754, "step": 16880 }, { "epoch": 0.2, "learning_rate": 4.842657109282333e-06, "logits/chosen": -3.172194719314575, "logits/rejected": -3.137174129486084, "logps/chosen": -45.31820297241211, "logps/rejected": -480.29864501953125, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": 0.013692572712898254, "rewards/margins": 4.439364433288574, "rewards/rejected": -4.4256720542907715, "step": 16890 }, { "epoch": 0.2, "learning_rate": 4.842292164196916e-06, "logits/chosen": -3.2068564891815186, "logits/rejected": -3.0818257331848145, "logps/chosen": -67.79010009765625, "logps/rejected": -680.5748901367188, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -0.10839954763650894, "rewards/margins": 6.28903341293335, "rewards/rejected": -6.397433280944824, "step": 16900 }, { "epoch": 0.2, "learning_rate": 4.841926810153792e-06, "logits/chosen": -3.164365530014038, "logits/rejected": -3.0938191413879395, "logps/chosen": -56.76728439331055, "logps/rejected": -658.2724609375, "loss": 0.1104, "rewards/accuracies": 1.0, "rewards/chosen": -0.0282062329351902, "rewards/margins": 6.164447784423828, "rewards/rejected": -6.192653656005859, "step": 16910 }, { "epoch": 0.2, "learning_rate": 4.841561047216751e-06, "logits/chosen": -3.167724132537842, "logits/rejected": -3.1387245655059814, "logps/chosen": -53.09228515625, "logps/rejected": -591.4912719726562, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": -0.03872352093458176, "rewards/margins": 5.491223335266113, "rewards/rejected": -5.529946804046631, "step": 16920 }, { "epoch": 0.2, "learning_rate": 4.841194875449654e-06, "logits/chosen": -3.1314053535461426, "logits/rejected": -3.0698368549346924, "logps/chosen": -32.974281311035156, "logps/rejected": -559.2354736328125, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": 0.09423323720693588, "rewards/margins": 5.311756134033203, "rewards/rejected": -5.217522621154785, "step": 16930 }, { "epoch": 0.2, "learning_rate": 4.840828294916435e-06, "logits/chosen": -3.1674137115478516, "logits/rejected": -3.0747883319854736, "logps/chosen": -57.4921875, "logps/rejected": -654.364013671875, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -0.04633135348558426, "rewards/margins": 6.105217933654785, "rewards/rejected": -6.151549339294434, "step": 16940 }, { "epoch": 0.2, "learning_rate": 4.840461305681097e-06, "logits/chosen": -3.218388080596924, "logits/rejected": -3.168489933013916, "logps/chosen": -42.50517272949219, "logps/rejected": -566.4824829101562, "loss": 0.1038, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.023979011923074722, "rewards/margins": 5.261706352233887, "rewards/rejected": -5.2856855392456055, "step": 16950 }, { "epoch": 0.2, "learning_rate": 4.840093907807715e-06, "logits/chosen": -3.1862854957580566, "logits/rejected": -3.1596038341522217, "logps/chosen": -31.689319610595703, "logps/rejected": -590.5787353515625, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": 0.08386365324258804, "rewards/margins": 5.598289489746094, "rewards/rejected": -5.514425754547119, "step": 16960 }, { "epoch": 0.2, "learning_rate": 4.839726101360436e-06, "logits/chosen": -3.127837657928467, "logits/rejected": -3.07438588142395, "logps/chosen": -73.73373413085938, "logps/rejected": -755.5953369140625, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -0.043319035321474075, "rewards/margins": 7.08408260345459, "rewards/rejected": -7.127402305603027, "step": 16970 }, { "epoch": 0.2, "learning_rate": 4.839357886403479e-06, "logits/chosen": -3.1816859245300293, "logits/rejected": -3.1293017864227295, "logps/chosen": -37.42818832397461, "logps/rejected": -667.3318481445312, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 0.11152195930480957, "rewards/margins": 6.397072792053223, "rewards/rejected": -6.285550594329834, "step": 16980 }, { "epoch": 0.2, "learning_rate": 4.838989263001131e-06, "logits/chosen": -3.1658706665039062, "logits/rejected": -3.1045315265655518, "logps/chosen": -34.26361846923828, "logps/rejected": -677.114013671875, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": 0.13746799528598785, "rewards/margins": 6.533776760101318, "rewards/rejected": -6.396308422088623, "step": 16990 }, { "epoch": 0.2, "learning_rate": 4.838620231217754e-06, "logits/chosen": -3.1789793968200684, "logits/rejected": -3.1345438957214355, "logps/chosen": -65.7638931274414, "logps/rejected": -790.3654174804688, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": -0.11472678184509277, "rewards/margins": 7.3997297286987305, "rewards/rejected": -7.514456748962402, "step": 17000 }, { "epoch": 0.2, "learning_rate": 4.8382507911177814e-06, "logits/chosen": -3.1919713020324707, "logits/rejected": -3.142916202545166, "logps/chosen": -34.511138916015625, "logps/rejected": -658.0015869140625, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": 0.11429872363805771, "rewards/margins": 6.317963600158691, "rewards/rejected": -6.203665733337402, "step": 17010 }, { "epoch": 0.2, "learning_rate": 4.837880942765714e-06, "logits/chosen": -3.125645637512207, "logits/rejected": -3.0426058769226074, "logps/chosen": -43.0038948059082, "logps/rejected": -581.763916015625, "loss": 0.1098, "rewards/accuracies": 1.0, "rewards/chosen": 0.06388778984546661, "rewards/margins": 5.484613418579102, "rewards/rejected": -5.4207258224487305, "step": 17020 }, { "epoch": 0.2, "learning_rate": 4.837510686226127e-06, "logits/chosen": -3.153226137161255, "logits/rejected": -3.049136161804199, "logps/chosen": -32.114540100097656, "logps/rejected": -591.3912963867188, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 0.13142165541648865, "rewards/margins": 5.668872356414795, "rewards/rejected": -5.537451267242432, "step": 17030 }, { "epoch": 0.2, "learning_rate": 4.837140021563666e-06, "logits/chosen": -3.1694447994232178, "logits/rejected": -3.1043307781219482, "logps/chosen": -30.17365074157715, "logps/rejected": -536.2039184570312, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": 0.1363528072834015, "rewards/margins": 5.109414577484131, "rewards/rejected": -4.973061561584473, "step": 17040 }, { "epoch": 0.2, "learning_rate": 4.836768948843049e-06, "logits/chosen": -3.160126209259033, "logits/rejected": -3.1063807010650635, "logps/chosen": -17.434024810791016, "logps/rejected": -449.1080627441406, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": 0.19864213466644287, "rewards/margins": 4.310214042663574, "rewards/rejected": -4.111571788787842, "step": 17050 }, { "epoch": 0.2, "learning_rate": 4.836397468129063e-06, "logits/chosen": -3.155794143676758, "logits/rejected": -3.105990409851074, "logps/chosen": -34.66498947143555, "logps/rejected": -655.919677734375, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": 0.12051942199468613, "rewards/margins": 6.287911415100098, "rewards/rejected": -6.167391777038574, "step": 17060 }, { "epoch": 0.2, "learning_rate": 4.8360255794865684e-06, "logits/chosen": -3.1861958503723145, "logits/rejected": -3.1624035835266113, "logps/chosen": -26.496013641357422, "logps/rejected": -487.5625, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": 0.15321537852287292, "rewards/margins": 4.656038761138916, "rewards/rejected": -4.5028228759765625, "step": 17070 }, { "epoch": 0.2, "learning_rate": 4.8356532829804956e-06, "logits/chosen": -3.168606996536255, "logits/rejected": -3.072399616241455, "logps/chosen": -47.905372619628906, "logps/rejected": -916.4295043945312, "loss": 0.1002, "rewards/accuracies": 1.0, "rewards/chosen": 0.06494663655757904, "rewards/margins": 8.819329261779785, "rewards/rejected": -8.754383087158203, "step": 17080 }, { "epoch": 0.2, "learning_rate": 4.835280578675847e-06, "logits/chosen": -3.1608028411865234, "logits/rejected": -3.102722644805908, "logps/chosen": -34.84790802001953, "logps/rejected": -620.5089721679688, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": 0.10316692292690277, "rewards/margins": 5.921229362487793, "rewards/rejected": -5.818062782287598, "step": 17090 }, { "epoch": 0.2, "learning_rate": 4.8349074666376945e-06, "logits/chosen": -3.1777093410491943, "logits/rejected": -3.1310229301452637, "logps/chosen": -35.1877555847168, "logps/rejected": -634.5372314453125, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": 0.09394451975822449, "rewards/margins": 6.054823875427246, "rewards/rejected": -5.960879325866699, "step": 17100 }, { "epoch": 0.2, "learning_rate": 4.834533946931185e-06, "logits/chosen": -3.151076078414917, "logits/rejected": -3.0788025856018066, "logps/chosen": -28.579513549804688, "logps/rejected": -616.8017578125, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": 0.16524609923362732, "rewards/margins": 5.949464797973633, "rewards/rejected": -5.784218788146973, "step": 17110 }, { "epoch": 0.2, "learning_rate": 4.834160019621531e-06, "logits/chosen": -3.1727938652038574, "logits/rejected": -3.095231533050537, "logps/chosen": -41.483665466308594, "logps/rejected": -802.9028930664062, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": 0.12525713443756104, "rewards/margins": 7.759149074554443, "rewards/rejected": -7.633892059326172, "step": 17120 }, { "epoch": 0.21, "learning_rate": 4.833785684774021e-06, "logits/chosen": -3.1718387603759766, "logits/rejected": -3.085749387741089, "logps/chosen": -49.355812072753906, "logps/rejected": -749.643310546875, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": 0.09295114129781723, "rewards/margins": 7.187366485595703, "rewards/rejected": -7.094414710998535, "step": 17130 }, { "epoch": 0.21, "learning_rate": 4.833410942454011e-06, "logits/chosen": -3.1954362392425537, "logits/rejected": -3.1278634071350098, "logps/chosen": -50.01800537109375, "logps/rejected": -552.9859619140625, "loss": 0.1268, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.06333205848932266, "rewards/margins": 5.082494735717773, "rewards/rejected": -5.1458258628845215, "step": 17140 }, { "epoch": 0.21, "learning_rate": 4.8330357927269325e-06, "logits/chosen": -3.188840389251709, "logits/rejected": -3.1213390827178955, "logps/chosen": -29.68828773498535, "logps/rejected": -526.9913330078125, "loss": 0.1097, "rewards/accuracies": 1.0, "rewards/chosen": 0.15768875181674957, "rewards/margins": 5.049822807312012, "rewards/rejected": -4.8921332359313965, "step": 17150 }, { "epoch": 0.21, "learning_rate": 4.832660235658283e-06, "logits/chosen": -3.180380344390869, "logits/rejected": -3.0638985633850098, "logps/chosen": -38.71208572387695, "logps/rejected": -589.8507080078125, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": 0.10614198446273804, "rewards/margins": 5.6207380294799805, "rewards/rejected": -5.514595985412598, "step": 17160 }, { "epoch": 0.21, "learning_rate": 4.8322842713136365e-06, "logits/chosen": -3.153099536895752, "logits/rejected": -3.0710487365722656, "logps/chosen": -40.009525299072266, "logps/rejected": -731.0535888671875, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": 0.1010548323392868, "rewards/margins": 7.030080318450928, "rewards/rejected": -6.929025173187256, "step": 17170 }, { "epoch": 0.21, "learning_rate": 4.8319078997586334e-06, "logits/chosen": -3.212327241897583, "logits/rejected": -3.127708673477173, "logps/chosen": -39.90346908569336, "logps/rejected": -756.3616943359375, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": 0.0955500453710556, "rewards/margins": 7.25354528427124, "rewards/rejected": -7.157995700836182, "step": 17180 }, { "epoch": 0.21, "learning_rate": 4.8315311210589865e-06, "logits/chosen": -3.2163643836975098, "logits/rejected": -3.172175168991089, "logps/chosen": -24.929977416992188, "logps/rejected": -567.9264526367188, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": 0.19493068754673004, "rewards/margins": 5.490783214569092, "rewards/rejected": -5.295853137969971, "step": 17190 }, { "epoch": 0.21, "learning_rate": 4.831153935280483e-06, "logits/chosen": -3.1615374088287354, "logits/rejected": -3.082960605621338, "logps/chosen": -26.785140991210938, "logps/rejected": -517.8507080078125, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.14024227857589722, "rewards/margins": 4.934698581695557, "rewards/rejected": -4.7944560050964355, "step": 17200 }, { "epoch": 0.21, "learning_rate": 4.830776342488976e-06, "logits/chosen": -3.165079355239868, "logits/rejected": -3.127537488937378, "logps/chosen": -39.27711486816406, "logps/rejected": -613.239501953125, "loss": 0.0934, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.04082821309566498, "rewards/margins": 5.7832512855529785, "rewards/rejected": -5.742422580718994, "step": 17210 }, { "epoch": 0.21, "learning_rate": 4.830398342750393e-06, "logits/chosen": -3.2000091075897217, "logits/rejected": -3.1257801055908203, "logps/chosen": -34.44319152832031, "logps/rejected": -696.3051147460938, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": 0.13562574982643127, "rewards/margins": 6.703841209411621, "rewards/rejected": -6.568215847015381, "step": 17220 }, { "epoch": 0.21, "learning_rate": 4.830019936130732e-06, "logits/chosen": -3.147916793823242, "logits/rejected": -3.0672764778137207, "logps/chosen": -36.24736785888672, "logps/rejected": -700.6043701171875, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": 0.16923613846302032, "rewards/margins": 6.779216766357422, "rewards/rejected": -6.60998010635376, "step": 17230 }, { "epoch": 0.21, "learning_rate": 4.829641122696061e-06, "logits/chosen": -3.199143409729004, "logits/rejected": -3.1068663597106934, "logps/chosen": -41.089271545410156, "logps/rejected": -672.2801513671875, "loss": 0.0772, "rewards/accuracies": 1.0, "rewards/chosen": 0.13589957356452942, "rewards/margins": 6.462594509124756, "rewards/rejected": -6.326693534851074, "step": 17240 }, { "epoch": 0.21, "learning_rate": 4.8292619025125224e-06, "logits/chosen": -3.163907527923584, "logits/rejected": -3.1109485626220703, "logps/chosen": -39.73796844482422, "logps/rejected": -512.3187866210938, "loss": 0.1326, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.02148653194308281, "rewards/margins": 4.765145301818848, "rewards/rejected": -4.743659019470215, "step": 17250 }, { "epoch": 0.21, "learning_rate": 4.828882275646322e-06, "logits/chosen": -3.1584560871124268, "logits/rejected": -3.0870015621185303, "logps/chosen": -36.53729248046875, "logps/rejected": -536.4078369140625, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": 0.13456562161445618, "rewards/margins": 5.095249652862549, "rewards/rejected": -4.960683345794678, "step": 17260 }, { "epoch": 0.21, "learning_rate": 4.828502242163747e-06, "logits/chosen": -3.172060012817383, "logits/rejected": -3.097383737564087, "logps/chosen": -54.048606872558594, "logps/rejected": -616.3911743164062, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": -0.0644255131483078, "rewards/margins": 5.710334777832031, "rewards/rejected": -5.7747602462768555, "step": 17270 }, { "epoch": 0.21, "learning_rate": 4.828121802131146e-06, "logits/chosen": -3.153869152069092, "logits/rejected": -3.09159255027771, "logps/chosen": -29.643634796142578, "logps/rejected": -485.603759765625, "loss": 0.0775, "rewards/accuracies": 1.0, "rewards/chosen": 0.14240151643753052, "rewards/margins": 4.6159868240356445, "rewards/rejected": -4.473585605621338, "step": 17280 }, { "epoch": 0.21, "learning_rate": 4.827740955614944e-06, "logits/chosen": -3.1663315296173096, "logits/rejected": -3.1015307903289795, "logps/chosen": -36.87042999267578, "logps/rejected": -696.2725830078125, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": 0.09798703342676163, "rewards/margins": 6.675281524658203, "rewards/rejected": -6.577294826507568, "step": 17290 }, { "epoch": 0.21, "learning_rate": 4.827359702681637e-06, "logits/chosen": -3.1578164100646973, "logits/rejected": -3.06885027885437, "logps/chosen": -47.421714782714844, "logps/rejected": -763.7100830078125, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": 0.09272267669439316, "rewards/margins": 7.324893951416016, "rewards/rejected": -7.232171058654785, "step": 17300 }, { "epoch": 0.21, "learning_rate": 4.826978043397791e-06, "logits/chosen": -3.1625123023986816, "logits/rejected": -3.1177895069122314, "logps/chosen": -42.25054931640625, "logps/rejected": -697.41943359375, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 0.09072025120258331, "rewards/margins": 6.674920558929443, "rewards/rejected": -6.584200859069824, "step": 17310 }, { "epoch": 0.21, "learning_rate": 4.8265959778300396e-06, "logits/chosen": -3.1432321071624756, "logits/rejected": -3.076308012008667, "logps/chosen": -34.73093795776367, "logps/rejected": -747.8826293945312, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": 0.13869479298591614, "rewards/margins": 7.2297468185424805, "rewards/rejected": -7.091052055358887, "step": 17320 }, { "epoch": 0.21, "learning_rate": 4.826213506045094e-06, "logits/chosen": -3.1946234703063965, "logits/rejected": -3.1277434825897217, "logps/chosen": -54.9622802734375, "logps/rejected": -619.1636962890625, "loss": 0.1161, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.07741229981184006, "rewards/margins": 5.727742671966553, "rewards/rejected": -5.805155277252197, "step": 17330 }, { "epoch": 0.21, "learning_rate": 4.82583062810973e-06, "logits/chosen": -3.1863701343536377, "logits/rejected": -3.056030750274658, "logps/chosen": -49.53594207763672, "logps/rejected": -680.4720458984375, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": 0.049927279353141785, "rewards/margins": 6.456031799316406, "rewards/rejected": -6.40610408782959, "step": 17340 }, { "epoch": 0.21, "learning_rate": 4.825447344090798e-06, "logits/chosen": -3.1644234657287598, "logits/rejected": -3.0844366550445557, "logps/chosen": -118.8488998413086, "logps/rejected": -690.8604736328125, "loss": 0.1334, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6906370520591736, "rewards/margins": 5.823337554931641, "rewards/rejected": -6.513975620269775, "step": 17350 }, { "epoch": 0.21, "learning_rate": 4.825063654055218e-06, "logits/chosen": -3.172710418701172, "logits/rejected": -3.1259031295776367, "logps/chosen": -32.11761474609375, "logps/rejected": -670.7691650390625, "loss": 0.111, "rewards/accuracies": 1.0, "rewards/chosen": 0.12160869687795639, "rewards/margins": 6.432298183441162, "rewards/rejected": -6.3106889724731445, "step": 17360 }, { "epoch": 0.21, "learning_rate": 4.824679558069983e-06, "logits/chosen": -3.139554500579834, "logits/rejected": -3.0240426063537598, "logps/chosen": -44.37873077392578, "logps/rejected": -763.3937377929688, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": 0.08512042462825775, "rewards/margins": 7.313153266906738, "rewards/rejected": -7.22803258895874, "step": 17370 }, { "epoch": 0.21, "learning_rate": 4.824295056202153e-06, "logits/chosen": -3.155149459838867, "logits/rejected": -3.0963218212127686, "logps/chosen": -80.48614501953125, "logps/rejected": -760.1864624023438, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -0.2830604612827301, "rewards/margins": 6.911125183105469, "rewards/rejected": -7.194186210632324, "step": 17380 }, { "epoch": 0.21, "learning_rate": 4.823910148518861e-06, "logits/chosen": -3.1996257305145264, "logits/rejected": -3.1445043087005615, "logps/chosen": -46.95568084716797, "logps/rejected": -599.1929931640625, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -0.015703249722719193, "rewards/margins": 5.596991539001465, "rewards/rejected": -5.61269474029541, "step": 17390 }, { "epoch": 0.21, "learning_rate": 4.823524835087312e-06, "logits/chosen": -3.200845241546631, "logits/rejected": -3.1613223552703857, "logps/chosen": -41.51105499267578, "logps/rejected": -513.5722045898438, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": 0.010014170780777931, "rewards/margins": 4.766888618469238, "rewards/rejected": -4.7568745613098145, "step": 17400 }, { "epoch": 0.21, "learning_rate": 4.82313911597478e-06, "logits/chosen": -3.1673197746276855, "logits/rejected": -3.100641965866089, "logps/chosen": -43.15608215332031, "logps/rejected": -593.7578125, "loss": 0.111, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.014236378483474255, "rewards/margins": 5.539432525634766, "rewards/rejected": -5.553668975830078, "step": 17410 }, { "epoch": 0.21, "learning_rate": 4.822752991248611e-06, "logits/chosen": -3.1533424854278564, "logits/rejected": -3.1156036853790283, "logps/chosen": -53.988990783691406, "logps/rejected": -672.5851440429688, "loss": 0.0818, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.02902778424322605, "rewards/margins": 6.306706428527832, "rewards/rejected": -6.3357343673706055, "step": 17420 }, { "epoch": 0.21, "learning_rate": 4.82236646097622e-06, "logits/chosen": -3.16389799118042, "logits/rejected": -3.0659756660461426, "logps/chosen": -35.557613372802734, "logps/rejected": -510.218505859375, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": 0.11169328540563583, "rewards/margins": 4.829502582550049, "rewards/rejected": -4.717808723449707, "step": 17430 }, { "epoch": 0.21, "learning_rate": 4.821979525225096e-06, "logits/chosen": -3.1932759284973145, "logits/rejected": -3.1409361362457275, "logps/chosen": -30.27408790588379, "logps/rejected": -572.2181396484375, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": 0.1364559680223465, "rewards/margins": 5.468310356140137, "rewards/rejected": -5.331854820251465, "step": 17440 }, { "epoch": 0.21, "learning_rate": 4.821592184062796e-06, "logits/chosen": -3.150007724761963, "logits/rejected": -3.116215229034424, "logps/chosen": -28.485721588134766, "logps/rejected": -539.8438720703125, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": 0.13471364974975586, "rewards/margins": 5.149422645568848, "rewards/rejected": -5.014708518981934, "step": 17450 }, { "epoch": 0.21, "learning_rate": 4.8212044375569475e-06, "logits/chosen": -3.1945481300354004, "logits/rejected": -3.1336445808410645, "logps/chosen": -35.05354309082031, "logps/rejected": -647.2141723632812, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": 0.10014010965824127, "rewards/margins": 6.1834235191345215, "rewards/rejected": -6.083283424377441, "step": 17460 }, { "epoch": 0.21, "learning_rate": 4.820816285775251e-06, "logits/chosen": -3.182713270187378, "logits/rejected": -3.1089320182800293, "logps/chosen": -42.087955474853516, "logps/rejected": -622.1853637695312, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": 0.054009854793548584, "rewards/margins": 5.858098030090332, "rewards/rejected": -5.8040876388549805, "step": 17470 }, { "epoch": 0.21, "learning_rate": 4.8204277287854785e-06, "logits/chosen": -3.1806375980377197, "logits/rejected": -3.1609301567077637, "logps/chosen": -61.87163162231445, "logps/rejected": -556.556396484375, "loss": 0.103, "rewards/accuracies": 1.0, "rewards/chosen": -0.1870611310005188, "rewards/margins": 4.987677574157715, "rewards/rejected": -5.174738883972168, "step": 17480 }, { "epoch": 0.21, "learning_rate": 4.820038766655468e-06, "logits/chosen": -3.15868878364563, "logits/rejected": -3.099125385284424, "logps/chosen": -35.53925323486328, "logps/rejected": -698.1181030273438, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": 0.11407464742660522, "rewards/margins": 6.687320709228516, "rewards/rejected": -6.573246002197266, "step": 17490 }, { "epoch": 0.21, "learning_rate": 4.819649399453133e-06, "logits/chosen": -3.140777587890625, "logits/rejected": -3.0264313220977783, "logps/chosen": -54.59954071044922, "logps/rejected": -718.8690185546875, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": 0.054029643535614014, "rewards/margins": 6.823925018310547, "rewards/rejected": -6.769895076751709, "step": 17500 }, { "epoch": 0.21, "learning_rate": 4.819259627246455e-06, "logits/chosen": -3.1894490718841553, "logits/rejected": -3.080626964569092, "logps/chosen": -50.0887336730957, "logps/rejected": -661.3485107421875, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": 0.025525569915771484, "rewards/margins": 6.250154495239258, "rewards/rejected": -6.224628925323486, "step": 17510 }, { "epoch": 0.21, "learning_rate": 4.818869450103488e-06, "logits/chosen": -3.1363205909729004, "logits/rejected": -3.0472888946533203, "logps/chosen": -63.51987838745117, "logps/rejected": -866.12939453125, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.06382330507040024, "rewards/margins": 8.173598289489746, "rewards/rejected": -8.237421989440918, "step": 17520 }, { "epoch": 0.21, "learning_rate": 4.818478868092354e-06, "logits/chosen": -3.1839137077331543, "logits/rejected": -3.117748737335205, "logps/chosen": -52.50307083129883, "logps/rejected": -769.04833984375, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": 0.002399646444246173, "rewards/margins": 7.295687198638916, "rewards/rejected": -7.293287754058838, "step": 17530 }, { "epoch": 0.21, "learning_rate": 4.8180878812812506e-06, "logits/chosen": -3.1767303943634033, "logits/rejected": -3.127136468887329, "logps/chosen": -52.35060501098633, "logps/rejected": -595.4345703125, "loss": 0.1778, "rewards/accuracies": 1.0, "rewards/chosen": -0.02829635702073574, "rewards/margins": 5.522373676300049, "rewards/rejected": -5.5506696701049805, "step": 17540 }, { "epoch": 0.21, "learning_rate": 4.817696489738441e-06, "logits/chosen": -3.184204578399658, "logits/rejected": -3.127357244491577, "logps/chosen": -30.62786865234375, "logps/rejected": -604.556884765625, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": 0.15054365992546082, "rewards/margins": 5.810416221618652, "rewards/rejected": -5.659872055053711, "step": 17550 }, { "epoch": 0.21, "learning_rate": 4.817304693532261e-06, "logits/chosen": -3.167954921722412, "logits/rejected": -3.1090049743652344, "logps/chosen": -34.96138000488281, "logps/rejected": -634.3348388671875, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": 0.11639032512903214, "rewards/margins": 6.07926082611084, "rewards/rejected": -5.9628705978393555, "step": 17560 }, { "epoch": 0.21, "learning_rate": 4.816912492731117e-06, "logits/chosen": -3.130803346633911, "logits/rejected": -3.080502510070801, "logps/chosen": -29.09132957458496, "logps/rejected": -480.6962890625, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.1448196917772293, "rewards/margins": 4.564518451690674, "rewards/rejected": -4.419699192047119, "step": 17570 }, { "epoch": 0.21, "learning_rate": 4.816519887403488e-06, "logits/chosen": -3.175936460494995, "logits/rejected": -3.110374927520752, "logps/chosen": -50.02449417114258, "logps/rejected": -532.4319458007812, "loss": 0.1165, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.010914541780948639, "rewards/margins": 4.92143440246582, "rewards/rejected": -4.93234920501709, "step": 17580 }, { "epoch": 0.21, "learning_rate": 4.81612687761792e-06, "logits/chosen": -3.2065303325653076, "logits/rejected": -3.0967307090759277, "logps/chosen": -50.07355499267578, "logps/rejected": -614.1001586914062, "loss": 0.1709, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.021357599645853043, "rewards/margins": 5.740013122558594, "rewards/rejected": -5.761370658874512, "step": 17590 }, { "epoch": 0.21, "learning_rate": 4.8157334634430315e-06, "logits/chosen": -3.1851625442504883, "logits/rejected": -3.1505167484283447, "logps/chosen": -38.047203063964844, "logps/rejected": -555.9652099609375, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": 0.11590759456157684, "rewards/margins": 5.285164833068848, "rewards/rejected": -5.169257164001465, "step": 17600 }, { "epoch": 0.21, "learning_rate": 4.815339644947513e-06, "logits/chosen": -3.1545662879943848, "logits/rejected": -3.1007332801818848, "logps/chosen": -46.250465393066406, "logps/rejected": -753.754150390625, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": 0.032041050493717194, "rewards/margins": 7.158542633056641, "rewards/rejected": -7.126501560211182, "step": 17610 }, { "epoch": 0.21, "learning_rate": 4.814945422200123e-06, "logits/chosen": -3.1806187629699707, "logits/rejected": -3.135883092880249, "logps/chosen": -56.29512405395508, "logps/rejected": -567.1718139648438, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -0.03927825763821602, "rewards/margins": 5.256839275360107, "rewards/rejected": -5.296117305755615, "step": 17620 }, { "epoch": 0.21, "learning_rate": 4.81455079526969e-06, "logits/chosen": -3.154240369796753, "logits/rejected": -3.0888521671295166, "logps/chosen": -32.872825622558594, "logps/rejected": -626.388427734375, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": 0.1208885908126831, "rewards/margins": 6.006565093994141, "rewards/rejected": -5.885676383972168, "step": 17630 }, { "epoch": 0.21, "learning_rate": 4.814155764225118e-06, "logits/chosen": -3.149338483810425, "logits/rejected": -3.109997272491455, "logps/chosen": -34.8269157409668, "logps/rejected": -630.8533325195312, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": 0.13932476937770844, "rewards/margins": 6.052849769592285, "rewards/rejected": -5.913525104522705, "step": 17640 }, { "epoch": 0.21, "learning_rate": 4.813760329135376e-06, "logits/chosen": -3.1597397327423096, "logits/rejected": -3.1169955730438232, "logps/chosen": -74.31640625, "logps/rejected": -499.73004150390625, "loss": 0.1151, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.23984667658805847, "rewards/margins": 4.356999397277832, "rewards/rejected": -4.596846580505371, "step": 17650 }, { "epoch": 0.21, "learning_rate": 4.813364490069508e-06, "logits/chosen": -3.159409999847412, "logits/rejected": -3.115265369415283, "logps/chosen": -52.203826904296875, "logps/rejected": -747.7542724609375, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": -0.054939381778240204, "rewards/margins": 7.027192115783691, "rewards/rejected": -7.082131385803223, "step": 17660 }, { "epoch": 0.21, "learning_rate": 4.8129682470966245e-06, "logits/chosen": -3.1579787731170654, "logits/rejected": -3.0859994888305664, "logps/chosen": -54.30120086669922, "logps/rejected": -658.9616088867188, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -0.10070182383060455, "rewards/margins": 6.103592872619629, "rewards/rejected": -6.204294681549072, "step": 17670 }, { "epoch": 0.21, "learning_rate": 4.812571600285909e-06, "logits/chosen": -3.1689975261688232, "logits/rejected": -3.141669750213623, "logps/chosen": -56.872398376464844, "logps/rejected": -586.3925170898438, "loss": 0.0763, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.14898309111595154, "rewards/margins": 5.3475422859191895, "rewards/rejected": -5.496525287628174, "step": 17680 }, { "epoch": 0.21, "learning_rate": 4.8121745497066145e-06, "logits/chosen": -3.1577553749084473, "logits/rejected": -3.120884418487549, "logps/chosen": -64.00022888183594, "logps/rejected": -751.1950073242188, "loss": 0.0599, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.13308104872703552, "rewards/margins": 6.986899375915527, "rewards/rejected": -7.1199798583984375, "step": 17690 }, { "epoch": 0.21, "learning_rate": 4.811777095428067e-06, "logits/chosen": -3.1927742958068848, "logits/rejected": -3.0996646881103516, "logps/chosen": -38.58606719970703, "logps/rejected": -696.8951416015625, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/chosen": 0.0654199942946434, "rewards/margins": 6.644622802734375, "rewards/rejected": -6.5792036056518555, "step": 17700 }, { "epoch": 0.21, "learning_rate": 4.8113792375196574e-06, "logits/chosen": -3.1980223655700684, "logits/rejected": -3.1039953231811523, "logps/chosen": -109.840087890625, "logps/rejected": -763.5943603515625, "loss": 0.1074, "rewards/accuracies": 1.0, "rewards/chosen": -0.6214901804924011, "rewards/margins": 6.618878364562988, "rewards/rejected": -7.240367889404297, "step": 17710 }, { "epoch": 0.21, "learning_rate": 4.810980976050853e-06, "logits/chosen": -3.1580052375793457, "logits/rejected": -3.126523494720459, "logps/chosen": -41.66132354736328, "logps/rejected": -642.8306884765625, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": -0.01209114957600832, "rewards/margins": 6.03860330581665, "rewards/rejected": -6.050694942474365, "step": 17720 }, { "epoch": 0.21, "learning_rate": 4.810582311091189e-06, "logits/chosen": -3.1505541801452637, "logits/rejected": -3.115931510925293, "logps/chosen": -54.56047439575195, "logps/rejected": -402.48577880859375, "loss": 0.1737, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.11460530757904053, "rewards/margins": 3.5386314392089844, "rewards/rejected": -3.6532363891601562, "step": 17730 }, { "epoch": 0.21, "learning_rate": 4.810183242710271e-06, "logits/chosen": -3.1590466499328613, "logits/rejected": -3.0832245349884033, "logps/chosen": -74.48303985595703, "logps/rejected": -613.7059936523438, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -0.27602091431617737, "rewards/margins": 5.467386245727539, "rewards/rejected": -5.743407249450684, "step": 17740 }, { "epoch": 0.21, "learning_rate": 4.809783770977775e-06, "logits/chosen": -3.188113212585449, "logits/rejected": -3.0824196338653564, "logps/chosen": -50.98122024536133, "logps/rejected": -770.7344970703125, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -0.004404750652611256, "rewards/margins": 7.299717903137207, "rewards/rejected": -7.304121971130371, "step": 17750 }, { "epoch": 0.21, "learning_rate": 4.809383895963447e-06, "logits/chosen": -3.2078680992126465, "logits/rejected": -3.1542086601257324, "logps/chosen": -48.85110855102539, "logps/rejected": -717.985107421875, "loss": 0.0589, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.04471302404999733, "rewards/margins": 6.748272895812988, "rewards/rejected": -6.7929863929748535, "step": 17760 }, { "epoch": 0.21, "learning_rate": 4.808983617737106e-06, "logits/chosen": -3.162534236907959, "logits/rejected": -3.10632061958313, "logps/chosen": -36.31945037841797, "logps/rejected": -648.4426879882812, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": 0.1044306606054306, "rewards/margins": 6.195005893707275, "rewards/rejected": -6.090575218200684, "step": 17770 }, { "epoch": 0.21, "learning_rate": 4.808582936368638e-06, "logits/chosen": -3.175264835357666, "logits/rejected": -3.1123061180114746, "logps/chosen": -26.715463638305664, "logps/rejected": -545.8004760742188, "loss": 0.114, "rewards/accuracies": 1.0, "rewards/chosen": 0.1230401024222374, "rewards/margins": 5.19272518157959, "rewards/rejected": -5.0696845054626465, "step": 17780 }, { "epoch": 0.21, "learning_rate": 4.808181851928e-06, "logits/chosen": -3.207484006881714, "logits/rejected": -3.125835418701172, "logps/chosen": -35.66222381591797, "logps/rejected": -754.0924072265625, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": 0.06535281985998154, "rewards/margins": 7.215084075927734, "rewards/rejected": -7.149730682373047, "step": 17790 }, { "epoch": 0.21, "learning_rate": 4.807780364485223e-06, "logits/chosen": -3.1849477291107178, "logits/rejected": -3.1251380443573, "logps/chosen": -75.84930419921875, "logps/rejected": -516.7012939453125, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -0.31478363275527954, "rewards/margins": 4.484816551208496, "rewards/rejected": -4.799600124359131, "step": 17800 }, { "epoch": 0.21, "learning_rate": 4.807378474110403e-06, "logits/chosen": -3.1896307468414307, "logits/rejected": -3.1226813793182373, "logps/chosen": -99.34807586669922, "logps/rejected": -709.5632934570312, "loss": 0.1117, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.49756956100463867, "rewards/margins": 6.204056739807129, "rewards/rejected": -6.701626777648926, "step": 17810 }, { "epoch": 0.21, "learning_rate": 4.80697618087371e-06, "logits/chosen": -3.2264580726623535, "logits/rejected": -3.1548233032226562, "logps/chosen": -52.277374267578125, "logps/rejected": -614.7021484375, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": 0.03471808135509491, "rewards/margins": 5.785876274108887, "rewards/rejected": -5.751158714294434, "step": 17820 }, { "epoch": 0.21, "learning_rate": 4.806573484845383e-06, "logits/chosen": -3.1572976112365723, "logits/rejected": -3.119899272918701, "logps/chosen": -33.202659606933594, "logps/rejected": -637.4959106445312, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": 0.1323474645614624, "rewards/margins": 6.112172603607178, "rewards/rejected": -5.979825019836426, "step": 17830 }, { "epoch": 0.21, "learning_rate": 4.806170386095732e-06, "logits/chosen": -3.159639835357666, "logits/rejected": -3.0989766120910645, "logps/chosen": -65.09038543701172, "logps/rejected": -754.416015625, "loss": 0.1135, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.12771642208099365, "rewards/margins": 7.013326168060303, "rewards/rejected": -7.141042232513428, "step": 17840 }, { "epoch": 0.21, "learning_rate": 4.805766884695137e-06, "logits/chosen": -3.192399263381958, "logits/rejected": -3.126286029815674, "logps/chosen": -39.26047134399414, "logps/rejected": -644.7303466796875, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": 0.06858782470226288, "rewards/margins": 6.125114440917969, "rewards/rejected": -6.0565266609191895, "step": 17850 }, { "epoch": 0.21, "learning_rate": 4.805362980714048e-06, "logits/chosen": -3.202320098876953, "logits/rejected": -3.174640655517578, "logps/chosen": -64.96644592285156, "logps/rejected": -626.6491088867188, "loss": 0.1033, "rewards/accuracies": 1.0, "rewards/chosen": -0.11772345006465912, "rewards/margins": 5.763326168060303, "rewards/rejected": -5.881050109863281, "step": 17860 }, { "epoch": 0.21, "learning_rate": 4.804958674222984e-06, "logits/chosen": -3.1843135356903076, "logits/rejected": -3.109267473220825, "logps/chosen": -116.11299896240234, "logps/rejected": -694.8906860351562, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -0.6269485354423523, "rewards/margins": 5.9236273765563965, "rewards/rejected": -6.550576210021973, "step": 17870 }, { "epoch": 0.21, "learning_rate": 4.804553965292538e-06, "logits/chosen": -3.2076077461242676, "logits/rejected": -3.136321544647217, "logps/chosen": -124.43497467041016, "logps/rejected": -802.1302490234375, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": -0.7077934145927429, "rewards/margins": 6.883352756500244, "rewards/rejected": -7.591145992279053, "step": 17880 }, { "epoch": 0.21, "learning_rate": 4.804148853993371e-06, "logits/chosen": -3.1920905113220215, "logits/rejected": -3.1551766395568848, "logps/chosen": -57.00855255126953, "logps/rejected": -525.1209716796875, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -0.14121565222740173, "rewards/margins": 4.731264591217041, "rewards/rejected": -4.872480392456055, "step": 17890 }, { "epoch": 0.21, "learning_rate": 4.803743340396213e-06, "logits/chosen": -3.1702184677124023, "logits/rejected": -3.11179256439209, "logps/chosen": -33.695068359375, "logps/rejected": -581.2276000976562, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": 0.11901628971099854, "rewards/margins": 5.556384086608887, "rewards/rejected": -5.4373674392700195, "step": 17900 }, { "epoch": 0.21, "learning_rate": 4.803337424571866e-06, "logits/chosen": -3.175938844680786, "logits/rejected": -3.097325325012207, "logps/chosen": -37.57833480834961, "logps/rejected": -787.5216064453125, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": 0.17720858752727509, "rewards/margins": 7.662317752838135, "rewards/rejected": -7.485108852386475, "step": 17910 }, { "epoch": 0.21, "learning_rate": 4.802931106591203e-06, "logits/chosen": -3.1859018802642822, "logits/rejected": -3.1232805252075195, "logps/chosen": -73.42926025390625, "logps/rejected": -641.9202270507812, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -0.2386934757232666, "rewards/margins": 5.781720161437988, "rewards/rejected": -6.020413398742676, "step": 17920 }, { "epoch": 0.21, "learning_rate": 4.802524386525163e-06, "logits/chosen": -3.2045791149139404, "logits/rejected": -3.1693825721740723, "logps/chosen": -22.54744529724121, "logps/rejected": -582.5364990234375, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": 0.15328019857406616, "rewards/margins": 5.597068786621094, "rewards/rejected": -5.443789005279541, "step": 17930 }, { "epoch": 0.21, "learning_rate": 4.802117264444762e-06, "logits/chosen": -3.1657614707946777, "logits/rejected": -3.1271235942840576, "logps/chosen": -46.091827392578125, "logps/rejected": -726.5818481445312, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": 0.06800167262554169, "rewards/margins": 6.938043117523193, "rewards/rejected": -6.870041847229004, "step": 17940 }, { "epoch": 0.21, "learning_rate": 4.80170974042108e-06, "logits/chosen": -3.1750473976135254, "logits/rejected": -3.148270845413208, "logps/chosen": -70.42708587646484, "logps/rejected": -581.1893920898438, "loss": 0.087, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2764207720756531, "rewards/margins": 5.13725471496582, "rewards/rejected": -5.4136762619018555, "step": 17950 }, { "epoch": 0.21, "learning_rate": 4.801301814525269e-06, "logits/chosen": -3.1717216968536377, "logits/rejected": -3.1008708477020264, "logps/chosen": -48.07933807373047, "logps/rejected": -504.8419494628906, "loss": 0.1131, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.030340367928147316, "rewards/margins": 4.629817485809326, "rewards/rejected": -4.660157680511475, "step": 17960 }, { "epoch": 0.22, "learning_rate": 4.800893486828554e-06, "logits/chosen": -3.2025020122528076, "logits/rejected": -3.1638917922973633, "logps/chosen": -36.18095779418945, "logps/rejected": -667.044677734375, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": 0.10035663843154907, "rewards/margins": 6.374530792236328, "rewards/rejected": -6.274174690246582, "step": 17970 }, { "epoch": 0.22, "learning_rate": 4.800484757402226e-06, "logits/chosen": -3.1863417625427246, "logits/rejected": -3.143549680709839, "logps/chosen": -75.99365234375, "logps/rejected": -570.8824462890625, "loss": 0.1383, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19254474341869354, "rewards/margins": 5.1401262283325195, "rewards/rejected": -5.332670211791992, "step": 17980 }, { "epoch": 0.22, "learning_rate": 4.800075626317649e-06, "logits/chosen": -3.1675162315368652, "logits/rejected": -3.1273996829986572, "logps/chosen": -29.19466209411621, "logps/rejected": -586.7293701171875, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": 0.16242286562919617, "rewards/margins": 5.643396854400635, "rewards/rejected": -5.480974197387695, "step": 17990 }, { "epoch": 0.22, "learning_rate": 4.799666093646256e-06, "logits/chosen": -3.1971356868743896, "logits/rejected": -3.1333909034729004, "logps/chosen": -34.33320617675781, "logps/rejected": -565.0445556640625, "loss": 0.0508, "rewards/accuracies": 1.0, "rewards/chosen": 0.17117717862129211, "rewards/margins": 5.431267738342285, "rewards/rejected": -5.260090351104736, "step": 18000 }, { "epoch": 0.22, "eval_logits/chosen": -3.207535743713379, "eval_logits/rejected": -3.131427764892578, "eval_logps/chosen": -101.4397201538086, "eval_logps/rejected": -850.0244750976562, "eval_loss": 0.49637484550476074, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.4025944769382477, "eval_rewards/margins": 7.630396366119385, "eval_rewards/rejected": -8.032990455627441, "eval_runtime": 1.2158, "eval_samples_per_second": 4.112, "eval_steps_per_second": 2.467, "step": 18000 }, { "epoch": 0.22, "learning_rate": 4.799256159459549e-06, "logits/chosen": -3.212761402130127, "logits/rejected": -3.1772098541259766, "logps/chosen": -52.35419845581055, "logps/rejected": -568.8406982421875, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": -0.06305593997240067, "rewards/margins": 5.240860939025879, "rewards/rejected": -5.303916931152344, "step": 18010 }, { "epoch": 0.22, "learning_rate": 4.798845823829103e-06, "logits/chosen": -3.159514904022217, "logits/rejected": -3.0756165981292725, "logps/chosen": -31.405832290649414, "logps/rejected": -651.0117797851562, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": 0.10262376070022583, "rewards/margins": 6.23214864730835, "rewards/rejected": -6.129525661468506, "step": 18020 }, { "epoch": 0.22, "learning_rate": 4.798435086826562e-06, "logits/chosen": -3.1631150245666504, "logits/rejected": -3.079031229019165, "logps/chosen": -40.2860107421875, "logps/rejected": -606.79638671875, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": 0.10983355343341827, "rewards/margins": 5.790705680847168, "rewards/rejected": -5.680871963500977, "step": 18030 }, { "epoch": 0.22, "learning_rate": 4.798023948523637e-06, "logits/chosen": -3.1881730556488037, "logits/rejected": -3.134492874145508, "logps/chosen": -48.9234733581543, "logps/rejected": -770.322998046875, "loss": 0.0466, "rewards/accuracies": 1.0, "rewards/chosen": 0.02711609937250614, "rewards/margins": 7.327735900878906, "rewards/rejected": -7.300620079040527, "step": 18040 }, { "epoch": 0.22, "learning_rate": 4.797612408992114e-06, "logits/chosen": -3.21460223197937, "logits/rejected": -3.184582233428955, "logps/chosen": -28.143108367919922, "logps/rejected": -493.9117126464844, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": 0.15839329361915588, "rewards/margins": 4.708230018615723, "rewards/rejected": -4.549837112426758, "step": 18050 }, { "epoch": 0.22, "learning_rate": 4.797200468303845e-06, "logits/chosen": -3.1998143196105957, "logits/rejected": -3.1262710094451904, "logps/chosen": -41.989036560058594, "logps/rejected": -433.35089111328125, "loss": 0.1308, "rewards/accuracies": 1.0, "rewards/chosen": 0.07518722862005234, "rewards/margins": 4.023754596710205, "rewards/rejected": -3.9485676288604736, "step": 18060 }, { "epoch": 0.22, "learning_rate": 4.796788126530756e-06, "logits/chosen": -3.1758759021759033, "logits/rejected": -3.134380340576172, "logps/chosen": -58.36696243286133, "logps/rejected": -699.2689819335938, "loss": 0.1312, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.13061198592185974, "rewards/margins": 6.476489067077637, "rewards/rejected": -6.6071014404296875, "step": 18070 }, { "epoch": 0.22, "learning_rate": 4.796375383744838e-06, "logits/chosen": -3.173157215118408, "logits/rejected": -3.137803792953491, "logps/chosen": -49.873878479003906, "logps/rejected": -566.5003662109375, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -0.037307873368263245, "rewards/margins": 5.237374782562256, "rewards/rejected": -5.274682998657227, "step": 18080 }, { "epoch": 0.22, "learning_rate": 4.795962240018156e-06, "logits/chosen": -3.214965343475342, "logits/rejected": -3.174691677093506, "logps/chosen": -39.524600982666016, "logps/rejected": -697.96533203125, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": 0.06063282489776611, "rewards/margins": 6.646792411804199, "rewards/rejected": -6.586159706115723, "step": 18090 }, { "epoch": 0.22, "learning_rate": 4.795548695422843e-06, "logits/chosen": -3.197629451751709, "logits/rejected": -3.1059162616729736, "logps/chosen": -39.14727020263672, "logps/rejected": -792.6793212890625, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": 0.13823214173316956, "rewards/margins": 7.6542649269104, "rewards/rejected": -7.5160322189331055, "step": 18100 }, { "epoch": 0.22, "learning_rate": 4.795134750031104e-06, "logits/chosen": -3.1696553230285645, "logits/rejected": -3.1071319580078125, "logps/chosen": -26.9601993560791, "logps/rejected": -556.6591796875, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": 0.19057145714759827, "rewards/margins": 5.375043869018555, "rewards/rejected": -5.18447208404541, "step": 18110 }, { "epoch": 0.22, "learning_rate": 4.794720403915212e-06, "logits/chosen": -3.1912763118743896, "logits/rejected": -3.123769760131836, "logps/chosen": -39.68696212768555, "logps/rejected": -778.06396484375, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 0.08644334226846695, "rewards/margins": 7.473822593688965, "rewards/rejected": -7.3873796463012695, "step": 18120 }, { "epoch": 0.22, "learning_rate": 4.794305657147511e-06, "logits/chosen": -3.177917003631592, "logits/rejected": -3.0974364280700684, "logps/chosen": -40.20317840576172, "logps/rejected": -611.6114501953125, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.15936391055583954, "rewards/margins": 5.872964859008789, "rewards/rejected": -5.713601112365723, "step": 18130 }, { "epoch": 0.22, "learning_rate": 4.793890509800415e-06, "logits/chosen": -3.1965904235839844, "logits/rejected": -3.1684272289276123, "logps/chosen": -23.108779907226562, "logps/rejected": -534.912109375, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": 0.2136550396680832, "rewards/margins": 5.178016662597656, "rewards/rejected": -4.964361667633057, "step": 18140 }, { "epoch": 0.22, "learning_rate": 4.793474961946406e-06, "logits/chosen": -3.1728739738464355, "logits/rejected": -3.1070544719696045, "logps/chosen": -31.561450958251953, "logps/rejected": -601.6348876953125, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": 0.1628631055355072, "rewards/margins": 5.784971237182617, "rewards/rejected": -5.622107982635498, "step": 18150 }, { "epoch": 0.22, "learning_rate": 4.793059013658039e-06, "logits/chosen": -3.179612636566162, "logits/rejected": -3.0823159217834473, "logps/chosen": -32.80236053466797, "logps/rejected": -791.71142578125, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": 0.14408765733242035, "rewards/margins": 7.659456729888916, "rewards/rejected": -7.515369415283203, "step": 18160 }, { "epoch": 0.22, "learning_rate": 4.7926426650079364e-06, "logits/chosen": -3.183622360229492, "logits/rejected": -3.0765271186828613, "logps/chosen": -39.139076232910156, "logps/rejected": -548.9674682617188, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 0.15048350393772125, "rewards/margins": 5.245111465454102, "rewards/rejected": -5.094627857208252, "step": 18170 }, { "epoch": 0.22, "learning_rate": 4.792225916068793e-06, "logits/chosen": -3.208576202392578, "logits/rejected": -3.1506714820861816, "logps/chosen": -27.312707901000977, "logps/rejected": -493.366455078125, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.13105063140392303, "rewards/margins": 4.693572998046875, "rewards/rejected": -4.5625224113464355, "step": 18180 }, { "epoch": 0.22, "learning_rate": 4.79180876691337e-06, "logits/chosen": -3.1787025928497314, "logits/rejected": -3.1283211708068848, "logps/chosen": -29.230438232421875, "logps/rejected": -650.6400146484375, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": 0.14368630945682526, "rewards/margins": 6.274386405944824, "rewards/rejected": -6.130700588226318, "step": 18190 }, { "epoch": 0.22, "learning_rate": 4.7913912176145014e-06, "logits/chosen": -3.1751253604888916, "logits/rejected": -3.1136093139648438, "logps/chosen": -28.87114906311035, "logps/rejected": -666.7399291992188, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": 0.18731459975242615, "rewards/margins": 6.4594526290893555, "rewards/rejected": -6.272137641906738, "step": 18200 }, { "epoch": 0.22, "learning_rate": 4.790973268245092e-06, "logits/chosen": -3.230997085571289, "logits/rejected": -3.14585280418396, "logps/chosen": -39.83819580078125, "logps/rejected": -656.4375, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 0.13037995994091034, "rewards/margins": 6.30684757232666, "rewards/rejected": -6.176468372344971, "step": 18210 }, { "epoch": 0.22, "learning_rate": 4.790554918878111e-06, "logits/chosen": -3.202261447906494, "logits/rejected": -3.1960220336914062, "logps/chosen": -28.84292221069336, "logps/rejected": -371.3907165527344, "loss": 0.1112, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.04863981902599335, "rewards/margins": 3.400582790374756, "rewards/rejected": -3.351942777633667, "step": 18220 }, { "epoch": 0.22, "learning_rate": 4.790136169586604e-06, "logits/chosen": -3.1562905311584473, "logits/rejected": -3.0643115043640137, "logps/chosen": -27.230615615844727, "logps/rejected": -578.2605590820312, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": 0.17669053375720978, "rewards/margins": 5.5873847007751465, "rewards/rejected": -5.410694122314453, "step": 18230 }, { "epoch": 0.22, "learning_rate": 4.789717020443681e-06, "logits/chosen": -3.16294527053833, "logits/rejected": -3.0999820232391357, "logps/chosen": -31.9240665435791, "logps/rejected": -527.1412353515625, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 0.14435002207756042, "rewards/margins": 5.0385966300964355, "rewards/rejected": -4.894246578216553, "step": 18240 }, { "epoch": 0.22, "learning_rate": 4.789297471522527e-06, "logits/chosen": -3.180896282196045, "logits/rejected": -3.1138296127319336, "logps/chosen": -29.821874618530273, "logps/rejected": -599.732666015625, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": 0.1320124864578247, "rewards/margins": 5.739439010620117, "rewards/rejected": -5.607426643371582, "step": 18250 }, { "epoch": 0.22, "learning_rate": 4.7888775228963925e-06, "logits/chosen": -3.181170701980591, "logits/rejected": -3.0688538551330566, "logps/chosen": -54.14055252075195, "logps/rejected": -777.413330078125, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 0.06218031048774719, "rewards/margins": 7.416665077209473, "rewards/rejected": -7.354484558105469, "step": 18260 }, { "epoch": 0.22, "learning_rate": 4.788457174638599e-06, "logits/chosen": -3.215388059616089, "logits/rejected": -3.163517475128174, "logps/chosen": -37.01213073730469, "logps/rejected": -485.4422302246094, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": 0.04026117920875549, "rewards/margins": 4.527467250823975, "rewards/rejected": -4.48720645904541, "step": 18270 }, { "epoch": 0.22, "learning_rate": 4.78803642682254e-06, "logits/chosen": -3.1652603149414062, "logits/rejected": -3.1104588508605957, "logps/chosen": -36.689125061035156, "logps/rejected": -400.0694274902344, "loss": 0.1238, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.09892378747463226, "rewards/margins": 3.7223472595214844, "rewards/rejected": -3.6234230995178223, "step": 18280 }, { "epoch": 0.22, "learning_rate": 4.787615279521675e-06, "logits/chosen": -3.18367338180542, "logits/rejected": -3.146284818649292, "logps/chosen": -26.2878475189209, "logps/rejected": -662.7222290039062, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": 0.12124156951904297, "rewards/margins": 6.373045921325684, "rewards/rejected": -6.251803874969482, "step": 18290 }, { "epoch": 0.22, "learning_rate": 4.787193732809535e-06, "logits/chosen": -3.175574779510498, "logits/rejected": -3.130404233932495, "logps/chosen": -45.11110305786133, "logps/rejected": -567.5589599609375, "loss": 0.1089, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.004199604503810406, "rewards/margins": 5.291866779327393, "rewards/rejected": -5.296065807342529, "step": 18300 }, { "epoch": 0.22, "learning_rate": 4.786771786759722e-06, "logits/chosen": -3.1773629188537598, "logits/rejected": -3.128284454345703, "logps/chosen": -27.95792007446289, "logps/rejected": -491.6102600097656, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": 0.15901662409305573, "rewards/margins": 4.704617977142334, "rewards/rejected": -4.545601844787598, "step": 18310 }, { "epoch": 0.22, "learning_rate": 4.7863494414459064e-06, "logits/chosen": -3.151427984237671, "logits/rejected": -3.093869686126709, "logps/chosen": -27.9951114654541, "logps/rejected": -473.4736328125, "loss": 0.1639, "rewards/accuracies": 1.0, "rewards/chosen": 0.14139868319034576, "rewards/margins": 4.490683555603027, "rewards/rejected": -4.349285125732422, "step": 18320 }, { "epoch": 0.22, "learning_rate": 4.785926696941828e-06, "logits/chosen": -3.130499839782715, "logits/rejected": -3.1013782024383545, "logps/chosen": -78.63509368896484, "logps/rejected": -489.8977966308594, "loss": 0.0584, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3459841012954712, "rewards/margins": 4.180445194244385, "rewards/rejected": -4.526429653167725, "step": 18330 }, { "epoch": 0.22, "learning_rate": 4.785503553321298e-06, "logits/chosen": -3.185429096221924, "logits/rejected": -3.108726978302002, "logps/chosen": -41.55928039550781, "logps/rejected": -717.8382568359375, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": 0.07636447995901108, "rewards/margins": 6.855342864990234, "rewards/rejected": -6.778977870941162, "step": 18340 }, { "epoch": 0.22, "learning_rate": 4.785080010658195e-06, "logits/chosen": -3.181415557861328, "logits/rejected": -3.155701160430908, "logps/chosen": -43.91828536987305, "logps/rejected": -607.3281860351562, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": 0.025328021496534348, "rewards/margins": 5.708200931549072, "rewards/rejected": -5.682873725891113, "step": 18350 }, { "epoch": 0.22, "learning_rate": 4.784656069026469e-06, "logits/chosen": -3.1519973278045654, "logits/rejected": -3.0834898948669434, "logps/chosen": -57.800811767578125, "logps/rejected": -645.223876953125, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -0.10087855160236359, "rewards/margins": 5.957427024841309, "rewards/rejected": -6.058305740356445, "step": 18360 }, { "epoch": 0.22, "learning_rate": 4.784231728500138e-06, "logits/chosen": -3.1984517574310303, "logits/rejected": -3.165820598602295, "logps/chosen": -41.53333282470703, "logps/rejected": -543.6080932617188, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": 0.07769148051738739, "rewards/margins": 5.142368793487549, "rewards/rejected": -5.064676761627197, "step": 18370 }, { "epoch": 0.22, "learning_rate": 4.783806989153292e-06, "logits/chosen": -3.2005984783172607, "logits/rejected": -3.180593967437744, "logps/chosen": -33.17485809326172, "logps/rejected": -513.560302734375, "loss": 0.053, "rewards/accuracies": 1.0, "rewards/chosen": 0.13888844847679138, "rewards/margins": 4.8985490798950195, "rewards/rejected": -4.759660720825195, "step": 18380 }, { "epoch": 0.22, "learning_rate": 4.783381851060088e-06, "logits/chosen": -3.1776630878448486, "logits/rejected": -3.1174700260162354, "logps/chosen": -41.80768966674805, "logps/rejected": -719.4383544921875, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": 0.06724199652671814, "rewards/margins": 6.86382532119751, "rewards/rejected": -6.7965826988220215, "step": 18390 }, { "epoch": 0.22, "learning_rate": 4.782956314294755e-06, "logits/chosen": -3.212069272994995, "logits/rejected": -3.178858995437622, "logps/chosen": -28.402191162109375, "logps/rejected": -554.5611572265625, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": 0.14449159801006317, "rewards/margins": 5.321532249450684, "rewards/rejected": -5.1770405769348145, "step": 18400 }, { "epoch": 0.22, "learning_rate": 4.782530378931591e-06, "logits/chosen": -3.1826064586639404, "logits/rejected": -3.121717929840088, "logps/chosen": -54.4451904296875, "logps/rejected": -693.6646118164062, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -0.06473906338214874, "rewards/margins": 6.475717067718506, "rewards/rejected": -6.540456295013428, "step": 18410 }, { "epoch": 0.22, "learning_rate": 4.782104045044961e-06, "logits/chosen": -3.179638385772705, "logits/rejected": -3.1419005393981934, "logps/chosen": -66.6184310913086, "logps/rejected": -472.5523376464844, "loss": 0.0986, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.24975422024726868, "rewards/margins": 4.101741790771484, "rewards/rejected": -4.351495265960693, "step": 18420 }, { "epoch": 0.22, "learning_rate": 4.781677312709304e-06, "logits/chosen": -3.2083568572998047, "logits/rejected": -3.094937562942505, "logps/chosen": -44.48143768310547, "logps/rejected": -670.1378784179688, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 0.041509490460157394, "rewards/margins": 6.356449127197266, "rewards/rejected": -6.314939498901367, "step": 18430 }, { "epoch": 0.22, "learning_rate": 4.781250181999124e-06, "logits/chosen": -3.1725924015045166, "logits/rejected": -3.105118989944458, "logps/chosen": -28.242733001708984, "logps/rejected": -524.9562377929688, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": 0.1418960988521576, "rewards/margins": 5.013023376464844, "rewards/rejected": -4.871127128601074, "step": 18440 }, { "epoch": 0.22, "learning_rate": 4.7808226529889995e-06, "logits/chosen": -3.1807656288146973, "logits/rejected": -3.058096408843994, "logps/chosen": -74.27516174316406, "logps/rejected": -812.1507568359375, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -0.12353017181158066, "rewards/margins": 7.583510398864746, "rewards/rejected": -7.707040309906006, "step": 18450 }, { "epoch": 0.22, "learning_rate": 4.780394725753574e-06, "logits/chosen": -3.1445701122283936, "logits/rejected": -3.082144021987915, "logps/chosen": -84.76410675048828, "logps/rejected": -632.2686767578125, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -0.40019360184669495, "rewards/margins": 5.526142597198486, "rewards/rejected": -5.92633581161499, "step": 18460 }, { "epoch": 0.22, "learning_rate": 4.779966400367562e-06, "logits/chosen": -3.1627445220947266, "logits/rejected": -3.1351828575134277, "logps/chosen": -50.22356414794922, "logps/rejected": -499.00445556640625, "loss": 0.114, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.08908401429653168, "rewards/margins": 4.524748802185059, "rewards/rejected": -4.613833427429199, "step": 18470 }, { "epoch": 0.22, "learning_rate": 4.7795376769057506e-06, "logits/chosen": -3.137233257293701, "logits/rejected": -3.0620598793029785, "logps/chosen": -70.27349853515625, "logps/rejected": -570.669921875, "loss": 0.0938, "rewards/accuracies": 1.0, "rewards/chosen": -0.2845275402069092, "rewards/margins": 5.017086982727051, "rewards/rejected": -5.301615238189697, "step": 18480 }, { "epoch": 0.22, "learning_rate": 4.779108555442991e-06, "logits/chosen": -3.229330539703369, "logits/rejected": -3.1655688285827637, "logps/chosen": -28.168682098388672, "logps/rejected": -598.261474609375, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": 0.1132231131196022, "rewards/margins": 5.725486755371094, "rewards/rejected": -5.612263202667236, "step": 18490 }, { "epoch": 0.22, "learning_rate": 4.778679036054208e-06, "logits/chosen": -3.1953742504119873, "logits/rejected": -3.1303582191467285, "logps/chosen": -119.82691955566406, "logps/rejected": -636.4979248046875, "loss": 0.0779, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6842309236526489, "rewards/margins": 5.2853851318359375, "rewards/rejected": -5.969616413116455, "step": 18500 }, { "epoch": 0.22, "learning_rate": 4.778249118814393e-06, "logits/chosen": -3.1695594787597656, "logits/rejected": -3.1219170093536377, "logps/chosen": -34.073883056640625, "logps/rejected": -677.9248046875, "loss": 0.0985, "rewards/accuracies": 1.0, "rewards/chosen": 0.09959997981786728, "rewards/margins": 6.480832099914551, "rewards/rejected": -6.381231307983398, "step": 18510 }, { "epoch": 0.22, "learning_rate": 4.7778188037986106e-06, "logits/chosen": -3.2308554649353027, "logits/rejected": -3.1987500190734863, "logps/chosen": -34.443931579589844, "logps/rejected": -500.9752502441406, "loss": 0.1472, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.09858331829309464, "rewards/margins": 4.741473197937012, "rewards/rejected": -4.642889976501465, "step": 18520 }, { "epoch": 0.22, "learning_rate": 4.7773880910819906e-06, "logits/chosen": -3.2020630836486816, "logits/rejected": -3.13232684135437, "logps/chosen": -37.92041015625, "logps/rejected": -787.5517578125, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": 0.10087764263153076, "rewards/margins": 7.5773186683654785, "rewards/rejected": -7.4764404296875, "step": 18530 }, { "epoch": 0.22, "learning_rate": 4.7769569807397354e-06, "logits/chosen": -3.1629703044891357, "logits/rejected": -3.09090518951416, "logps/chosen": -56.01306915283203, "logps/rejected": -736.4251098632812, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -0.04120514541864395, "rewards/margins": 6.9230523109436035, "rewards/rejected": -6.964257717132568, "step": 18540 }, { "epoch": 0.22, "learning_rate": 4.776525472847116e-06, "logits/chosen": -3.1508822441101074, "logits/rejected": -3.118875503540039, "logps/chosen": -39.93342971801758, "logps/rejected": -629.2947998046875, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": 0.021132057532668114, "rewards/margins": 5.927068710327148, "rewards/rejected": -5.905936241149902, "step": 18550 }, { "epoch": 0.22, "learning_rate": 4.776093567479471e-06, "logits/chosen": -3.1557559967041016, "logits/rejected": -3.1037662029266357, "logps/chosen": -41.62799835205078, "logps/rejected": -577.5799560546875, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": 0.031193453818559647, "rewards/margins": 5.430711269378662, "rewards/rejected": -5.399518013000488, "step": 18560 }, { "epoch": 0.22, "learning_rate": 4.775661264712211e-06, "logits/chosen": -3.1608588695526123, "logits/rejected": -3.129122257232666, "logps/chosen": -39.166709899902344, "logps/rejected": -644.9545288085938, "loss": 0.1097, "rewards/accuracies": 1.0, "rewards/chosen": 0.09482765197753906, "rewards/margins": 6.154202938079834, "rewards/rejected": -6.059375762939453, "step": 18570 }, { "epoch": 0.22, "learning_rate": 4.7752285646208145e-06, "logits/chosen": -3.1753487586975098, "logits/rejected": -3.0879013538360596, "logps/chosen": -47.643619537353516, "logps/rejected": -681.9122924804688, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": 0.04968566820025444, "rewards/margins": 6.4722580909729, "rewards/rejected": -6.422572135925293, "step": 18580 }, { "epoch": 0.22, "learning_rate": 4.77479546728083e-06, "logits/chosen": -3.1722981929779053, "logits/rejected": -3.1123321056365967, "logps/chosen": -61.65944290161133, "logps/rejected": -807.2498168945312, "loss": 0.0982, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11035485565662384, "rewards/margins": 7.5717315673828125, "rewards/rejected": -7.682085990905762, "step": 18590 }, { "epoch": 0.22, "learning_rate": 4.774361972767874e-06, "logits/chosen": -3.1856324672698975, "logits/rejected": -3.148763418197632, "logps/chosen": -37.72772979736328, "logps/rejected": -504.27294921875, "loss": 0.1093, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.014080586843192577, "rewards/margins": 4.667580604553223, "rewards/rejected": -4.681662082672119, "step": 18600 }, { "epoch": 0.22, "learning_rate": 4.773928081157635e-06, "logits/chosen": -3.173680305480957, "logits/rejected": -3.142148017883301, "logps/chosen": -60.23252487182617, "logps/rejected": -484.7025451660156, "loss": 0.051, "rewards/accuracies": 1.0, "rewards/chosen": -0.16085784137248993, "rewards/margins": 4.310990810394287, "rewards/rejected": -4.471848011016846, "step": 18610 }, { "epoch": 0.22, "learning_rate": 4.773493792525868e-06, "logits/chosen": -3.2236180305480957, "logits/rejected": -3.146239757537842, "logps/chosen": -33.582176208496094, "logps/rejected": -568.7937622070312, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": 0.13902027904987335, "rewards/margins": 5.441267490386963, "rewards/rejected": -5.302247047424316, "step": 18620 }, { "epoch": 0.22, "learning_rate": 4.7730591069484e-06, "logits/chosen": -3.1693177223205566, "logits/rejected": -3.053844690322876, "logps/chosen": -36.26626968383789, "logps/rejected": -872.2716064453125, "loss": 0.1065, "rewards/accuracies": 1.0, "rewards/chosen": 0.12022681534290314, "rewards/margins": 8.446942329406738, "rewards/rejected": -8.326715469360352, "step": 18630 }, { "epoch": 0.22, "learning_rate": 4.772624024501125e-06, "logits/chosen": -3.158674478530884, "logits/rejected": -3.1085078716278076, "logps/chosen": -21.31422233581543, "logps/rejected": -353.1741943359375, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": 0.1855442374944687, "rewards/margins": 3.3481528759002686, "rewards/rejected": -3.1626083850860596, "step": 18640 }, { "epoch": 0.22, "learning_rate": 4.772188545260007e-06, "logits/chosen": -3.174928903579712, "logits/rejected": -3.099351406097412, "logps/chosen": -54.089569091796875, "logps/rejected": -745.4151611328125, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": 0.004038224928081036, "rewards/margins": 7.045679569244385, "rewards/rejected": -7.0416412353515625, "step": 18650 }, { "epoch": 0.22, "learning_rate": 4.7717526693010795e-06, "logits/chosen": -3.173463821411133, "logits/rejected": -3.0921003818511963, "logps/chosen": -52.329315185546875, "logps/rejected": -664.3671264648438, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": 0.029322605580091476, "rewards/margins": 6.2412214279174805, "rewards/rejected": -6.211899757385254, "step": 18660 }, { "epoch": 0.22, "learning_rate": 4.771316396700446e-06, "logits/chosen": -3.1319804191589355, "logits/rejected": -3.0592200756073, "logps/chosen": -31.016876220703125, "logps/rejected": -657.915283203125, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": 0.11570306122303009, "rewards/margins": 6.305529594421387, "rewards/rejected": -6.189826965332031, "step": 18670 }, { "epoch": 0.22, "learning_rate": 4.770879727534278e-06, "logits/chosen": -3.1847758293151855, "logits/rejected": -3.102553367614746, "logps/chosen": -59.165550231933594, "logps/rejected": -796.1209106445312, "loss": 0.1475, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0880947932600975, "rewards/margins": 7.480433464050293, "rewards/rejected": -7.568528652191162, "step": 18680 }, { "epoch": 0.22, "learning_rate": 4.770442661878816e-06, "logits/chosen": -3.183737277984619, "logits/rejected": -3.180345058441162, "logps/chosen": -23.316442489624023, "logps/rejected": -407.8121643066406, "loss": 0.0502, "rewards/accuracies": 1.0, "rewards/chosen": 0.1694563329219818, "rewards/margins": 3.8796439170837402, "rewards/rejected": -3.7101879119873047, "step": 18690 }, { "epoch": 0.22, "learning_rate": 4.770005199810372e-06, "logits/chosen": -3.167412757873535, "logits/rejected": -3.097306251525879, "logps/chosen": -77.01338958740234, "logps/rejected": -666.7518310546875, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": -0.30019238591194153, "rewards/margins": 5.969631195068359, "rewards/rejected": -6.269824504852295, "step": 18700 }, { "epoch": 0.22, "learning_rate": 4.769567341405323e-06, "logits/chosen": -3.165241241455078, "logits/rejected": -3.0707383155822754, "logps/chosen": -40.70953369140625, "logps/rejected": -900.9002075195312, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": 0.039708781987428665, "rewards/margins": 8.654905319213867, "rewards/rejected": -8.61519718170166, "step": 18710 }, { "epoch": 0.22, "learning_rate": 4.769129086740121e-06, "logits/chosen": -3.2043678760528564, "logits/rejected": -3.0864856243133545, "logps/chosen": -75.31343078613281, "logps/rejected": -694.990966796875, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": -0.2567828595638275, "rewards/margins": 6.302128314971924, "rewards/rejected": -6.558911323547363, "step": 18720 }, { "epoch": 0.22, "learning_rate": 4.7686904358912835e-06, "logits/chosen": -3.1902108192443848, "logits/rejected": -3.1051175594329834, "logps/chosen": -60.77299118041992, "logps/rejected": -619.5053100585938, "loss": 0.1111, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.13005715608596802, "rewards/margins": 5.660633087158203, "rewards/rejected": -5.790689468383789, "step": 18730 }, { "epoch": 0.22, "learning_rate": 4.768251388935395e-06, "logits/chosen": -3.2032108306884766, "logits/rejected": -3.1472365856170654, "logps/chosen": -42.79222869873047, "logps/rejected": -762.3805541992188, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": 0.020988086238503456, "rewards/margins": 7.252587795257568, "rewards/rejected": -7.2315993309021, "step": 18740 }, { "epoch": 0.22, "learning_rate": 4.767811945949116e-06, "logits/chosen": -3.2135348320007324, "logits/rejected": -3.1618175506591797, "logps/chosen": -39.87192153930664, "logps/rejected": -606.5657348632812, "loss": 0.0954, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.002075445605441928, "rewards/margins": 5.685366630554199, "rewards/rejected": -5.687441825866699, "step": 18750 }, { "epoch": 0.22, "learning_rate": 4.767372107009168e-06, "logits/chosen": -3.226512908935547, "logits/rejected": -3.1481733322143555, "logps/chosen": -34.25383758544922, "logps/rejected": -587.3018798828125, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": 0.1395721733570099, "rewards/margins": 5.630918979644775, "rewards/rejected": -5.491347312927246, "step": 18760 }, { "epoch": 0.22, "learning_rate": 4.766931872192348e-06, "logits/chosen": -3.1991207599639893, "logits/rejected": -3.138512134552002, "logps/chosen": -40.911529541015625, "logps/rejected": -608.0410766601562, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": 0.021774737164378166, "rewards/margins": 5.7193779945373535, "rewards/rejected": -5.697603225708008, "step": 18770 }, { "epoch": 0.22, "learning_rate": 4.766491241575519e-06, "logits/chosen": -3.1951842308044434, "logits/rejected": -3.149385452270508, "logps/chosen": -33.74518966674805, "logps/rejected": -587.5349731445312, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 0.12414728105068207, "rewards/margins": 5.610431671142578, "rewards/rejected": -5.4862847328186035, "step": 18780 }, { "epoch": 0.22, "learning_rate": 4.766050215235614e-06, "logits/chosen": -3.16587233543396, "logits/rejected": -3.107762098312378, "logps/chosen": -69.23873138427734, "logps/rejected": -684.5079345703125, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -0.17321787774562836, "rewards/margins": 6.295710563659668, "rewards/rejected": -6.468928337097168, "step": 18790 }, { "epoch": 0.23, "learning_rate": 4.7656087932496365e-06, "logits/chosen": -3.2106196880340576, "logits/rejected": -3.1591458320617676, "logps/chosen": -62.400245666503906, "logps/rejected": -434.24908447265625, "loss": 0.0674, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21227765083312988, "rewards/margins": 3.759064197540283, "rewards/rejected": -3.971341371536255, "step": 18800 }, { "epoch": 0.23, "learning_rate": 4.765166975694655e-06, "logits/chosen": -3.1666207313537598, "logits/rejected": -3.094203233718872, "logps/chosen": -76.30030059814453, "logps/rejected": -578.1657104492188, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.25167933106422424, "rewards/margins": 5.144152641296387, "rewards/rejected": -5.395832061767578, "step": 18810 }, { "epoch": 0.23, "learning_rate": 4.764724762647811e-06, "logits/chosen": -3.1666946411132812, "logits/rejected": -3.1217198371887207, "logps/chosen": -34.43369674682617, "logps/rejected": -568.001220703125, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": 0.07177414000034332, "rewards/margins": 5.368123531341553, "rewards/rejected": -5.29634952545166, "step": 18820 }, { "epoch": 0.23, "learning_rate": 4.764282154186314e-06, "logits/chosen": -3.17620587348938, "logits/rejected": -3.088658094406128, "logps/chosen": -44.34565353393555, "logps/rejected": -580.907958984375, "loss": 0.119, "rewards/accuracies": 1.0, "rewards/chosen": -0.006545289419591427, "rewards/margins": 5.432892322540283, "rewards/rejected": -5.4394378662109375, "step": 18830 }, { "epoch": 0.23, "learning_rate": 4.7638391503874404e-06, "logits/chosen": -3.2080368995666504, "logits/rejected": -3.1394734382629395, "logps/chosen": -128.44537353515625, "logps/rejected": -609.63037109375, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": -0.7317646145820618, "rewards/margins": 4.984428405761719, "rewards/rejected": -5.716193199157715, "step": 18840 }, { "epoch": 0.23, "learning_rate": 4.76339575132854e-06, "logits/chosen": -3.226287841796875, "logits/rejected": -3.1761016845703125, "logps/chosen": -50.79193878173828, "logps/rejected": -639.5128173828125, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -0.06304476410150528, "rewards/margins": 5.933650016784668, "rewards/rejected": -5.996695041656494, "step": 18850 }, { "epoch": 0.23, "learning_rate": 4.762951957087027e-06, "logits/chosen": -3.2132956981658936, "logits/rejected": -3.1346075534820557, "logps/chosen": -46.425228118896484, "logps/rejected": -775.6447143554688, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": 0.03058497980237007, "rewards/margins": 7.4010329246521, "rewards/rejected": -7.370448112487793, "step": 18860 }, { "epoch": 0.23, "learning_rate": 4.762507767740388e-06, "logits/chosen": -3.1937546730041504, "logits/rejected": -3.1370270252227783, "logps/chosen": -42.94987106323242, "logps/rejected": -649.8162841796875, "loss": 0.1107, "rewards/accuracies": 1.0, "rewards/chosen": 0.0589224211871624, "rewards/margins": 6.170631408691406, "rewards/rejected": -6.111708641052246, "step": 18870 }, { "epoch": 0.23, "learning_rate": 4.762063183366175e-06, "logits/chosen": -3.1470541954040527, "logits/rejected": -3.1075398921966553, "logps/chosen": -65.13069152832031, "logps/rejected": -529.5792236328125, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -0.17755261063575745, "rewards/margins": 4.734549522399902, "rewards/rejected": -4.912102222442627, "step": 18880 }, { "epoch": 0.23, "learning_rate": 4.761618204042014e-06, "logits/chosen": -3.1785025596618652, "logits/rejected": -3.112217426300049, "logps/chosen": -68.67874145507812, "logps/rejected": -738.8405151367188, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": -0.1590670645236969, "rewards/margins": 6.819794654846191, "rewards/rejected": -6.9788618087768555, "step": 18890 }, { "epoch": 0.23, "learning_rate": 4.761172829845596e-06, "logits/chosen": -3.1919875144958496, "logits/rejected": -3.124347686767578, "logps/chosen": -61.61396026611328, "logps/rejected": -683.4617919921875, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -0.07712770998477936, "rewards/margins": 6.354655742645264, "rewards/rejected": -6.431783199310303, "step": 18900 }, { "epoch": 0.23, "learning_rate": 4.760727060854682e-06, "logits/chosen": -3.1911873817443848, "logits/rejected": -3.103797197341919, "logps/chosen": -43.6294059753418, "logps/rejected": -612.4151611328125, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": 0.022849708795547485, "rewards/margins": 5.766373634338379, "rewards/rejected": -5.743523120880127, "step": 18910 }, { "epoch": 0.23, "learning_rate": 4.760280897147102e-06, "logits/chosen": -3.1853206157684326, "logits/rejected": -3.1192221641540527, "logps/chosen": -41.13057327270508, "logps/rejected": -601.3734130859375, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": 0.019941722974181175, "rewards/margins": 5.65157413482666, "rewards/rejected": -5.6316328048706055, "step": 18920 }, { "epoch": 0.23, "learning_rate": 4.759834338800755e-06, "logits/chosen": -3.2214152812957764, "logits/rejected": -3.129504680633545, "logps/chosen": -56.970054626464844, "logps/rejected": -735.7125854492188, "loss": 0.1094, "rewards/accuracies": 1.0, "rewards/chosen": -0.04441134259104729, "rewards/margins": 6.916957855224609, "rewards/rejected": -6.961369514465332, "step": 18930 }, { "epoch": 0.23, "learning_rate": 4.759387385893609e-06, "logits/chosen": -3.1634018421173096, "logits/rejected": -3.080382823944092, "logps/chosen": -36.37002182006836, "logps/rejected": -670.9927978515625, "loss": 0.1056, "rewards/accuracies": 1.0, "rewards/chosen": 0.14676310122013092, "rewards/margins": 6.464931488037109, "rewards/rejected": -6.318168640136719, "step": 18940 }, { "epoch": 0.23, "learning_rate": 4.7589400385037e-06, "logits/chosen": -3.1589760780334473, "logits/rejected": -3.0780599117279053, "logps/chosen": -56.206886291503906, "logps/rejected": -689.5535888671875, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.08844275772571564, "rewards/margins": 6.418791770935059, "rewards/rejected": -6.507235050201416, "step": 18950 }, { "epoch": 0.23, "learning_rate": 4.758492296709135e-06, "logits/chosen": -3.198422431945801, "logits/rejected": -3.1627097129821777, "logps/chosen": -53.76323318481445, "logps/rejected": -556.10595703125, "loss": 0.1115, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11624890565872192, "rewards/margins": 5.066561222076416, "rewards/rejected": -5.182809829711914, "step": 18960 }, { "epoch": 0.23, "learning_rate": 4.7580441605880875e-06, "logits/chosen": -3.192091464996338, "logits/rejected": -3.166792392730713, "logps/chosen": -42.48954391479492, "logps/rejected": -454.454833984375, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": 0.00716495793312788, "rewards/margins": 4.15996789932251, "rewards/rejected": -4.152802467346191, "step": 18970 }, { "epoch": 0.23, "learning_rate": 4.757595630218801e-06, "logits/chosen": -3.2066988945007324, "logits/rejected": -3.168947458267212, "logps/chosen": -25.31780433654785, "logps/rejected": -437.22589111328125, "loss": 0.0528, "rewards/accuracies": 1.0, "rewards/chosen": 0.12426960468292236, "rewards/margins": 4.127516269683838, "rewards/rejected": -4.003246784210205, "step": 18980 }, { "epoch": 0.23, "learning_rate": 4.757146705679587e-06, "logits/chosen": -3.163663625717163, "logits/rejected": -3.0899791717529297, "logps/chosen": -50.71601104736328, "logps/rejected": -723.317138671875, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": 0.0027485177852213383, "rewards/margins": 6.8177947998046875, "rewards/rejected": -6.8150458335876465, "step": 18990 }, { "epoch": 0.23, "learning_rate": 4.756697387048828e-06, "logits/chosen": -3.193221092224121, "logits/rejected": -3.0930562019348145, "logps/chosen": -53.7901725769043, "logps/rejected": -822.1138916015625, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": 0.058882057666778564, "rewards/margins": 7.879426002502441, "rewards/rejected": -7.820544242858887, "step": 19000 }, { "epoch": 0.23, "learning_rate": 4.756247674404973e-06, "logits/chosen": -3.2117412090301514, "logits/rejected": -3.144150972366333, "logps/chosen": -68.65492248535156, "logps/rejected": -607.6776123046875, "loss": 0.1096, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2640931010246277, "rewards/margins": 5.425037860870361, "rewards/rejected": -5.689131259918213, "step": 19010 }, { "epoch": 0.23, "learning_rate": 4.75579756782654e-06, "logits/chosen": -3.198054552078247, "logits/rejected": -3.1450107097625732, "logps/chosen": -44.31441116333008, "logps/rejected": -521.4053344726562, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": -0.03480986878275871, "rewards/margins": 4.795285224914551, "rewards/rejected": -4.830094814300537, "step": 19020 }, { "epoch": 0.23, "learning_rate": 4.755347067392117e-06, "logits/chosen": -3.1650261878967285, "logits/rejected": -3.062143087387085, "logps/chosen": -46.1224365234375, "logps/rejected": -605.0489501953125, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": 0.006231170147657394, "rewards/margins": 5.690443992614746, "rewards/rejected": -5.684212684631348, "step": 19030 }, { "epoch": 0.23, "learning_rate": 4.754896173180359e-06, "logits/chosen": -3.1830153465270996, "logits/rejected": -3.1343162059783936, "logps/chosen": -51.04941940307617, "logps/rejected": -643.9364013671875, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -0.05023075267672539, "rewards/margins": 5.987842559814453, "rewards/rejected": -6.038073539733887, "step": 19040 }, { "epoch": 0.23, "learning_rate": 4.754444885269993e-06, "logits/chosen": -3.2012112140655518, "logits/rejected": -3.1521341800689697, "logps/chosen": -51.30161666870117, "logps/rejected": -655.3615112304688, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": -0.07119874656200409, "rewards/margins": 6.089676856994629, "rewards/rejected": -6.1608757972717285, "step": 19050 }, { "epoch": 0.23, "learning_rate": 4.753993203739812e-06, "logits/chosen": -3.158050060272217, "logits/rejected": -3.113839626312256, "logps/chosen": -109.52641296386719, "logps/rejected": -656.0316162109375, "loss": 0.1701, "rewards/accuracies": 1.0, "rewards/chosen": -0.6126673817634583, "rewards/margins": 5.541351795196533, "rewards/rejected": -6.154018878936768, "step": 19060 }, { "epoch": 0.23, "learning_rate": 4.753541128668677e-06, "logits/chosen": -3.1817383766174316, "logits/rejected": -3.154547929763794, "logps/chosen": -20.046693801879883, "logps/rejected": -458.732177734375, "loss": 0.0469, "rewards/accuracies": 1.0, "rewards/chosen": 0.15912748873233795, "rewards/margins": 4.376415729522705, "rewards/rejected": -4.217288494110107, "step": 19070 }, { "epoch": 0.23, "learning_rate": 4.7530886601355196e-06, "logits/chosen": -3.1946325302124023, "logits/rejected": -3.1071255207061768, "logps/chosen": -51.48053741455078, "logps/rejected": -647.0028076171875, "loss": 0.106, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03071008250117302, "rewards/margins": 6.037764549255371, "rewards/rejected": -6.068474769592285, "step": 19080 }, { "epoch": 0.23, "learning_rate": 4.7526357982193405e-06, "logits/chosen": -3.220348834991455, "logits/rejected": -3.1821725368499756, "logps/chosen": -23.344633102416992, "logps/rejected": -432.28509521484375, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": 0.1710328757762909, "rewards/margins": 4.128380298614502, "rewards/rejected": -3.9573466777801514, "step": 19090 }, { "epoch": 0.23, "learning_rate": 4.752182542999207e-06, "logits/chosen": -3.1969141960144043, "logits/rejected": -3.173351764678955, "logps/chosen": -24.231470108032227, "logps/rejected": -566.9240112304688, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": 0.1421269327402115, "rewards/margins": 5.433375835418701, "rewards/rejected": -5.2912492752075195, "step": 19100 }, { "epoch": 0.23, "learning_rate": 4.7517288945542574e-06, "logits/chosen": -3.189518690109253, "logits/rejected": -3.089942455291748, "logps/chosen": -50.47676467895508, "logps/rejected": -533.2532958984375, "loss": 0.1817, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0002114582748617977, "rewards/margins": 4.942914009094238, "rewards/rejected": -4.9431257247924805, "step": 19110 }, { "epoch": 0.23, "learning_rate": 4.751274852963696e-06, "logits/chosen": -3.1901988983154297, "logits/rejected": -3.1190402507781982, "logps/chosen": -35.9279899597168, "logps/rejected": -649.6083374023438, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": 0.10197141021490097, "rewards/margins": 6.202987194061279, "rewards/rejected": -6.101015567779541, "step": 19120 }, { "epoch": 0.23, "learning_rate": 4.750820418306798e-06, "logits/chosen": -3.190505266189575, "logits/rejected": -3.1210737228393555, "logps/chosen": -53.227142333984375, "logps/rejected": -577.955322265625, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -0.05175275728106499, "rewards/margins": 5.341315746307373, "rewards/rejected": -5.393069267272949, "step": 19130 }, { "epoch": 0.23, "learning_rate": 4.750365590662907e-06, "logits/chosen": -3.231381893157959, "logits/rejected": -3.159618854522705, "logps/chosen": -41.819034576416016, "logps/rejected": -858.0111083984375, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": 0.06847372651100159, "rewards/margins": 8.228124618530273, "rewards/rejected": -8.159650802612305, "step": 19140 }, { "epoch": 0.23, "learning_rate": 4.749910370111433e-06, "logits/chosen": -3.1560299396514893, "logits/rejected": -3.0872514247894287, "logps/chosen": -50.109886169433594, "logps/rejected": -574.94384765625, "loss": 0.261, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.07227069139480591, "rewards/margins": 5.289480209350586, "rewards/rejected": -5.361751556396484, "step": 19150 }, { "epoch": 0.23, "learning_rate": 4.7494547567318574e-06, "logits/chosen": -3.2004966735839844, "logits/rejected": -3.1469321250915527, "logps/chosen": -50.47189712524414, "logps/rejected": -853.3243408203125, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/chosen": 0.000927089131437242, "rewards/margins": 8.123144149780273, "rewards/rejected": -8.122217178344727, "step": 19160 }, { "epoch": 0.23, "learning_rate": 4.748998750603729e-06, "logits/chosen": -3.1839680671691895, "logits/rejected": -3.137878894805908, "logps/chosen": -59.805084228515625, "logps/rejected": -570.8855590820312, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": -0.17108982801437378, "rewards/margins": 5.158266067504883, "rewards/rejected": -5.329355716705322, "step": 19170 }, { "epoch": 0.23, "learning_rate": 4.748542351806665e-06, "logits/chosen": -3.1870133876800537, "logits/rejected": -3.161290407180786, "logps/chosen": -31.47613525390625, "logps/rejected": -471.31988525390625, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": 0.09084434062242508, "rewards/margins": 4.441760540008545, "rewards/rejected": -4.350916385650635, "step": 19180 }, { "epoch": 0.23, "learning_rate": 4.7480855604203524e-06, "logits/chosen": -3.162994861602783, "logits/rejected": -3.102482318878174, "logps/chosen": -64.82637786865234, "logps/rejected": -544.726318359375, "loss": 0.0466, "rewards/accuracies": 1.0, "rewards/chosen": -0.1773996204137802, "rewards/margins": 4.8857316970825195, "rewards/rejected": -5.063130855560303, "step": 19190 }, { "epoch": 0.23, "learning_rate": 4.747628376524544e-06, "logits/chosen": -3.1959543228149414, "logits/rejected": -3.1275277137756348, "logps/chosen": -64.7469253540039, "logps/rejected": -697.0191040039062, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -0.17700648307800293, "rewards/margins": 6.40105676651001, "rewards/rejected": -6.578062534332275, "step": 19200 }, { "epoch": 0.23, "learning_rate": 4.747170800199064e-06, "logits/chosen": -3.178062677383423, "logits/rejected": -3.1021955013275146, "logps/chosen": -57.4404296875, "logps/rejected": -826.1179809570312, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -0.05902675911784172, "rewards/margins": 7.809250831604004, "rewards/rejected": -7.868277072906494, "step": 19210 }, { "epoch": 0.23, "learning_rate": 4.746712831523803e-06, "logits/chosen": -3.175532341003418, "logits/rejected": -3.128357410430908, "logps/chosen": -47.22803497314453, "logps/rejected": -607.2208251953125, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006438225391320884, "rewards/margins": 5.684189319610596, "rewards/rejected": -5.684833526611328, "step": 19220 }, { "epoch": 0.23, "learning_rate": 4.746254470578722e-06, "logits/chosen": -3.17688250541687, "logits/rejected": -3.1300148963928223, "logps/chosen": -43.134071350097656, "logps/rejected": -680.8585815429688, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": 0.018919307738542557, "rewards/margins": 6.4367570877075195, "rewards/rejected": -6.417837619781494, "step": 19230 }, { "epoch": 0.23, "learning_rate": 4.745795717443849e-06, "logits/chosen": -3.2213046550750732, "logits/rejected": -3.1485931873321533, "logps/chosen": -40.96693801879883, "logps/rejected": -592.8841552734375, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": 0.09102506935596466, "rewards/margins": 5.62180233001709, "rewards/rejected": -5.530777454376221, "step": 19240 }, { "epoch": 0.23, "learning_rate": 4.745336572199281e-06, "logits/chosen": -3.1516926288604736, "logits/rejected": -3.072706699371338, "logps/chosen": -47.559261322021484, "logps/rejected": -814.6055908203125, "loss": 0.0933, "rewards/accuracies": 1.0, "rewards/chosen": 0.09580062329769135, "rewards/margins": 7.836581230163574, "rewards/rejected": -7.740780830383301, "step": 19250 }, { "epoch": 0.23, "learning_rate": 4.744877034925182e-06, "logits/chosen": -3.179459571838379, "logits/rejected": -3.100520610809326, "logps/chosen": -51.93970489501953, "logps/rejected": -795.6669921875, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.051095325499773026, "rewards/margins": 7.4999895095825195, "rewards/rejected": -7.551085472106934, "step": 19260 }, { "epoch": 0.23, "learning_rate": 4.744417105701791e-06, "logits/chosen": -3.210247039794922, "logits/rejected": -3.1387414932250977, "logps/chosen": -66.75786590576172, "logps/rejected": -691.9186401367188, "loss": 0.1024, "rewards/accuracies": 1.0, "rewards/chosen": -0.14430226385593414, "rewards/margins": 6.379677772521973, "rewards/rejected": -6.523980617523193, "step": 19270 }, { "epoch": 0.23, "learning_rate": 4.743956784609404e-06, "logits/chosen": -3.1977860927581787, "logits/rejected": -3.1456146240234375, "logps/chosen": -53.79827117919922, "logps/rejected": -672.3934936523438, "loss": 0.1196, "rewards/accuracies": 1.0, "rewards/chosen": -0.03883930668234825, "rewards/margins": 6.292685508728027, "rewards/rejected": -6.3315253257751465, "step": 19280 }, { "epoch": 0.23, "learning_rate": 4.743496071728396e-06, "logits/chosen": -3.211041212081909, "logits/rejected": -3.1222217082977295, "logps/chosen": -50.23115158081055, "logps/rejected": -706.7102661132812, "loss": 0.1129, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.060293614864349365, "rewards/margins": 6.63318395614624, "rewards/rejected": -6.693478584289551, "step": 19290 }, { "epoch": 0.23, "learning_rate": 4.743034967139205e-06, "logits/chosen": -3.205930709838867, "logits/rejected": -3.1265900135040283, "logps/chosen": -44.906375885009766, "logps/rejected": -544.837890625, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": 0.0767582505941391, "rewards/margins": 5.136972904205322, "rewards/rejected": -5.060214996337891, "step": 19300 }, { "epoch": 0.23, "learning_rate": 4.742573470922339e-06, "logits/chosen": -3.222567319869995, "logits/rejected": -3.1094918251037598, "logps/chosen": -49.66557312011719, "logps/rejected": -715.437744140625, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": 0.024418670684099197, "rewards/margins": 6.776159763336182, "rewards/rejected": -6.751741886138916, "step": 19310 }, { "epoch": 0.23, "learning_rate": 4.742111583158372e-06, "logits/chosen": -3.220236301422119, "logits/rejected": -3.1627981662750244, "logps/chosen": -56.82664108276367, "logps/rejected": -750.8187255859375, "loss": 0.2499, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.059982895851135254, "rewards/margins": 7.055211544036865, "rewards/rejected": -7.115194797515869, "step": 19320 }, { "epoch": 0.23, "learning_rate": 4.741649303927951e-06, "logits/chosen": -3.1706032752990723, "logits/rejected": -3.126744270324707, "logps/chosen": -37.65292739868164, "logps/rejected": -804.361572265625, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": 0.09006941318511963, "rewards/margins": 7.736634731292725, "rewards/rejected": -7.646565914154053, "step": 19330 }, { "epoch": 0.23, "learning_rate": 4.7411866333117875e-06, "logits/chosen": -3.1957879066467285, "logits/rejected": -3.1715173721313477, "logps/chosen": -41.38291931152344, "logps/rejected": -680.5317993164062, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": 0.048995669931173325, "rewards/margins": 6.464560508728027, "rewards/rejected": -6.415565490722656, "step": 19340 }, { "epoch": 0.23, "learning_rate": 4.740723571390662e-06, "logits/chosen": -3.1840615272521973, "logits/rejected": -3.164158344268799, "logps/chosen": -35.25068283081055, "logps/rejected": -511.015625, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": 0.02793414518237114, "rewards/margins": 4.774456977844238, "rewards/rejected": -4.746522426605225, "step": 19350 }, { "epoch": 0.23, "learning_rate": 4.740260118245424e-06, "logits/chosen": -3.1737663745880127, "logits/rejected": -3.148989200592041, "logps/chosen": -34.373504638671875, "logps/rejected": -498.96893310546875, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": 0.055811263620853424, "rewards/margins": 4.66103458404541, "rewards/rejected": -4.605223655700684, "step": 19360 }, { "epoch": 0.23, "learning_rate": 4.739796273956992e-06, "logits/chosen": -3.223098039627075, "logits/rejected": -3.1365573406219482, "logps/chosen": -56.3575439453125, "logps/rejected": -691.16357421875, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -0.07247883081436157, "rewards/margins": 6.449893951416016, "rewards/rejected": -6.522372245788574, "step": 19370 }, { "epoch": 0.23, "learning_rate": 4.73933203860635e-06, "logits/chosen": -3.198345899581909, "logits/rejected": -3.1703853607177734, "logps/chosen": -48.594970703125, "logps/rejected": -513.0013427734375, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": -0.03323885053396225, "rewards/margins": 4.711740493774414, "rewards/rejected": -4.7449798583984375, "step": 19380 }, { "epoch": 0.23, "learning_rate": 4.738867412274555e-06, "logits/chosen": -3.2015938758850098, "logits/rejected": -3.153325319290161, "logps/chosen": -65.10297393798828, "logps/rejected": -611.1207885742188, "loss": 0.1113, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15728244185447693, "rewards/margins": 5.561652660369873, "rewards/rejected": -5.718935012817383, "step": 19390 }, { "epoch": 0.23, "learning_rate": 4.7384023950427274e-06, "logits/chosen": -3.200150728225708, "logits/rejected": -3.131096839904785, "logps/chosen": -33.874366760253906, "logps/rejected": -471.19207763671875, "loss": 0.0476, "rewards/accuracies": 1.0, "rewards/chosen": 0.09175103902816772, "rewards/margins": 4.422037601470947, "rewards/rejected": -4.330286502838135, "step": 19400 }, { "epoch": 0.23, "learning_rate": 4.737936986992059e-06, "logits/chosen": -3.2262656688690186, "logits/rejected": -3.151916980743408, "logps/chosen": -41.60637664794922, "logps/rejected": -713.3470458984375, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 0.02226370945572853, "rewards/margins": 6.770020961761475, "rewards/rejected": -6.7477569580078125, "step": 19410 }, { "epoch": 0.23, "learning_rate": 4.737471188203808e-06, "logits/chosen": -3.1769940853118896, "logits/rejected": -3.1243348121643066, "logps/chosen": -54.379310607910156, "logps/rejected": -554.2506713867188, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -0.03646441176533699, "rewards/margins": 5.102385997772217, "rewards/rejected": -5.138850688934326, "step": 19420 }, { "epoch": 0.23, "learning_rate": 4.737004998759302e-06, "logits/chosen": -3.206235408782959, "logits/rejected": -3.1545112133026123, "logps/chosen": -45.7871208190918, "logps/rejected": -570.7176513671875, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": 0.015716630965471268, "rewards/margins": 5.32207727432251, "rewards/rejected": -5.306360721588135, "step": 19430 }, { "epoch": 0.23, "learning_rate": 4.7365384187399355e-06, "logits/chosen": -3.2081305980682373, "logits/rejected": -3.1643872261047363, "logps/chosen": -30.65460205078125, "logps/rejected": -579.3358154296875, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": 0.11669975519180298, "rewards/margins": 5.537053108215332, "rewards/rejected": -5.420352935791016, "step": 19440 }, { "epoch": 0.23, "learning_rate": 4.7360714482271734e-06, "logits/chosen": -3.1645596027374268, "logits/rejected": -3.104678153991699, "logps/chosen": -45.1609001159668, "logps/rejected": -522.1812133789062, "loss": 0.1243, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.004311546683311462, "rewards/margins": 4.847990989685059, "rewards/rejected": -4.852302551269531, "step": 19450 }, { "epoch": 0.23, "learning_rate": 4.735604087302547e-06, "logits/chosen": -3.2258141040802, "logits/rejected": -3.18278169631958, "logps/chosen": -36.24006271362305, "logps/rejected": -601.1617431640625, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": 0.09517668187618256, "rewards/margins": 5.718456268310547, "rewards/rejected": -5.623279571533203, "step": 19460 }, { "epoch": 0.23, "learning_rate": 4.735136336047655e-06, "logits/chosen": -3.1913037300109863, "logits/rejected": -3.1597187519073486, "logps/chosen": -42.01895523071289, "logps/rejected": -545.7110595703125, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": 0.04462580755352974, "rewards/margins": 5.1144700050354, "rewards/rejected": -5.0698442459106445, "step": 19470 }, { "epoch": 0.23, "learning_rate": 4.734668194544169e-06, "logits/chosen": -3.154934883117676, "logits/rejected": -3.0815377235412598, "logps/chosen": -52.05797576904297, "logps/rejected": -605.6077270507812, "loss": 0.1155, "rewards/accuracies": 1.0, "rewards/chosen": -0.05268890783190727, "rewards/margins": 5.598119258880615, "rewards/rejected": -5.650808811187744, "step": 19480 }, { "epoch": 0.23, "learning_rate": 4.7341996628738205e-06, "logits/chosen": -3.2058510780334473, "logits/rejected": -3.099216938018799, "logps/chosen": -58.66100311279297, "logps/rejected": -726.1329345703125, "loss": 0.1041, "rewards/accuracies": 1.0, "rewards/chosen": -0.06998400390148163, "rewards/margins": 6.795578956604004, "rewards/rejected": -6.865562438964844, "step": 19490 }, { "epoch": 0.23, "learning_rate": 4.733730741118417e-06, "logits/chosen": -3.189154863357544, "logits/rejected": -3.129445791244507, "logps/chosen": -64.27872467041016, "logps/rejected": -502.7435607910156, "loss": 0.1148, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.248376727104187, "rewards/margins": 4.405210971832275, "rewards/rejected": -4.65358829498291, "step": 19500 }, { "epoch": 0.23, "learning_rate": 4.73326142935983e-06, "logits/chosen": -3.1745691299438477, "logits/rejected": -3.105395793914795, "logps/chosen": -65.65892028808594, "logps/rejected": -527.1421508789062, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -0.14823567867279053, "rewards/margins": 4.727409362792969, "rewards/rejected": -4.875645637512207, "step": 19510 }, { "epoch": 0.23, "learning_rate": 4.73279172768e-06, "logits/chosen": -3.1942360401153564, "logits/rejected": -3.183044910430908, "logps/chosen": -20.879030227661133, "logps/rejected": -369.25714111328125, "loss": 0.049, "rewards/accuracies": 1.0, "rewards/chosen": 0.16194245219230652, "rewards/margins": 3.4851222038269043, "rewards/rejected": -3.3231799602508545, "step": 19520 }, { "epoch": 0.23, "learning_rate": 4.732321636160935e-06, "logits/chosen": -3.173194169998169, "logits/rejected": -3.1061038970947266, "logps/chosen": -53.432273864746094, "logps/rejected": -885.91748046875, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -0.04560966044664383, "rewards/margins": 8.388930320739746, "rewards/rejected": -8.434539794921875, "step": 19530 }, { "epoch": 0.23, "learning_rate": 4.731851154884712e-06, "logits/chosen": -3.1954123973846436, "logits/rejected": -3.1846680641174316, "logps/chosen": -22.661632537841797, "logps/rejected": -449.8501892089844, "loss": 0.0891, "rewards/accuracies": 1.0, "rewards/chosen": 0.11449158191680908, "rewards/margins": 4.241816520690918, "rewards/rejected": -4.127325534820557, "step": 19540 }, { "epoch": 0.23, "learning_rate": 4.731380283933478e-06, "logits/chosen": -3.230342388153076, "logits/rejected": -3.1712839603424072, "logps/chosen": -48.15081024169922, "logps/rejected": -692.224853515625, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": -0.033815883100032806, "rewards/margins": 6.49734354019165, "rewards/rejected": -6.531159400939941, "step": 19550 }, { "epoch": 0.23, "learning_rate": 4.730909023389443e-06, "logits/chosen": -3.2190499305725098, "logits/rejected": -3.1714181900024414, "logps/chosen": -32.4251594543457, "logps/rejected": -506.7953186035156, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": 0.07520528137683868, "rewards/margins": 4.764410495758057, "rewards/rejected": -4.689205169677734, "step": 19560 }, { "epoch": 0.23, "learning_rate": 4.730437373334888e-06, "logits/chosen": -3.1760101318359375, "logits/rejected": -3.0666885375976562, "logps/chosen": -71.64270782470703, "logps/rejected": -802.1845092773438, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -0.13208439946174622, "rewards/margins": 7.463809013366699, "rewards/rejected": -7.595892906188965, "step": 19570 }, { "epoch": 0.23, "learning_rate": 4.7299653338521625e-06, "logits/chosen": -3.2105319499969482, "logits/rejected": -3.123382091522217, "logps/chosen": -61.79212188720703, "logps/rejected": -623.1685180664062, "loss": 0.1033, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10261185467243195, "rewards/margins": 5.7394490242004395, "rewards/rejected": -5.8420610427856445, "step": 19580 }, { "epoch": 0.23, "learning_rate": 4.729492905023684e-06, "logits/chosen": -3.142268657684326, "logits/rejected": -3.0711774826049805, "logps/chosen": -54.03844451904297, "logps/rejected": -774.9782104492188, "loss": 0.0958, "rewards/accuracies": 1.0, "rewards/chosen": -0.05526227504014969, "rewards/margins": 7.295538425445557, "rewards/rejected": -7.350800514221191, "step": 19590 }, { "epoch": 0.23, "learning_rate": 4.7290200869319355e-06, "logits/chosen": -3.1672654151916504, "logits/rejected": -3.109649181365967, "logps/chosen": -49.39373779296875, "logps/rejected": -600.9444580078125, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": -0.053268540650606155, "rewards/margins": 5.573232650756836, "rewards/rejected": -5.626501560211182, "step": 19600 }, { "epoch": 0.23, "learning_rate": 4.728546879659471e-06, "logits/chosen": -3.193509340286255, "logits/rejected": -3.1373934745788574, "logps/chosen": -153.53799438476562, "logps/rejected": -669.9635620117188, "loss": 0.0927, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1281911134719849, "rewards/margins": 5.184145927429199, "rewards/rejected": -6.312336444854736, "step": 19610 }, { "epoch": 0.23, "learning_rate": 4.7280732832889105e-06, "logits/chosen": -3.169745922088623, "logits/rejected": -3.009012222290039, "logps/chosen": -235.78842163085938, "logps/rejected": -947.40234375, "loss": 0.1696, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.801092505455017, "rewards/margins": 7.254950046539307, "rewards/rejected": -9.056042671203613, "step": 19620 }, { "epoch": 0.23, "learning_rate": 4.727599297902944e-06, "logits/chosen": -3.1497676372528076, "logits/rejected": -3.0909242630004883, "logps/chosen": -97.40988159179688, "logps/rejected": -593.2593994140625, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -0.5322471857070923, "rewards/margins": 5.029706954956055, "rewards/rejected": -5.561953544616699, "step": 19630 }, { "epoch": 0.24, "learning_rate": 4.727124923584326e-06, "logits/chosen": -3.157548666000366, "logits/rejected": -3.0419726371765137, "logps/chosen": -178.815185546875, "logps/rejected": -839.7906494140625, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -1.2711036205291748, "rewards/margins": 6.718925476074219, "rewards/rejected": -7.990029335021973, "step": 19640 }, { "epoch": 0.24, "learning_rate": 4.726650160415882e-06, "logits/chosen": -3.241743803024292, "logits/rejected": -3.206272840499878, "logps/chosen": -41.2509765625, "logps/rejected": -469.71728515625, "loss": 0.0965, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.02843674086034298, "rewards/margins": 4.295817852020264, "rewards/rejected": -4.324254989624023, "step": 19650 }, { "epoch": 0.24, "learning_rate": 4.7261750084805046e-06, "logits/chosen": -3.1923575401306152, "logits/rejected": -3.0971744060516357, "logps/chosen": -39.580894470214844, "logps/rejected": -463.5498046875, "loss": 0.0957, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0024875805247575045, "rewards/margins": 4.270249366760254, "rewards/rejected": -4.272736549377441, "step": 19660 }, { "epoch": 0.24, "learning_rate": 4.725699467861153e-06, "logits/chosen": -3.1935484409332275, "logits/rejected": -3.0891520977020264, "logps/chosen": -42.8394889831543, "logps/rejected": -661.5177001953125, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": 0.004599171690642834, "rewards/margins": 6.236361503601074, "rewards/rejected": -6.231762409210205, "step": 19670 }, { "epoch": 0.24, "learning_rate": 4.725223538640856e-06, "logits/chosen": -3.1620535850524902, "logits/rejected": -3.0441348552703857, "logps/chosen": -93.8152847290039, "logps/rejected": -762.5660400390625, "loss": 0.0597, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.44169479608535767, "rewards/margins": 6.779538631439209, "rewards/rejected": -7.221234321594238, "step": 19680 }, { "epoch": 0.24, "learning_rate": 4.72474722090271e-06, "logits/chosen": -3.209357500076294, "logits/rejected": -3.1712005138397217, "logps/chosen": -45.92464065551758, "logps/rejected": -566.6655883789062, "loss": 0.1696, "rewards/accuracies": 1.0, "rewards/chosen": -0.06464909017086029, "rewards/margins": 5.220685958862305, "rewards/rejected": -5.285334587097168, "step": 19690 }, { "epoch": 0.24, "learning_rate": 4.724270514729878e-06, "logits/chosen": -3.161822557449341, "logits/rejected": -3.072327136993408, "logps/chosen": -81.21479034423828, "logps/rejected": -683.0819702148438, "loss": 0.1487, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.39448508620262146, "rewards/margins": 6.050205230712891, "rewards/rejected": -6.444690704345703, "step": 19700 }, { "epoch": 0.24, "learning_rate": 4.7237934202055925e-06, "logits/chosen": -3.223288059234619, "logits/rejected": -3.1472816467285156, "logps/chosen": -76.78056335449219, "logps/rejected": -698.3958129882812, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -0.3042750358581543, "rewards/margins": 6.295979976654053, "rewards/rejected": -6.600255012512207, "step": 19710 }, { "epoch": 0.24, "learning_rate": 4.723315937413151e-06, "logits/chosen": -3.13619327545166, "logits/rejected": -3.087233066558838, "logps/chosen": -77.39060974121094, "logps/rejected": -497.3236389160156, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": -0.36127254366874695, "rewards/margins": 4.240415096282959, "rewards/rejected": -4.601687908172607, "step": 19720 }, { "epoch": 0.24, "learning_rate": 4.722838066435923e-06, "logits/chosen": -3.194831132888794, "logits/rejected": -3.109478712081909, "logps/chosen": -96.8798599243164, "logps/rejected": -624.7394409179688, "loss": 0.1022, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5014018416404724, "rewards/margins": 5.363720417022705, "rewards/rejected": -5.865121364593506, "step": 19730 }, { "epoch": 0.24, "learning_rate": 4.722359807357341e-06, "logits/chosen": -3.196993827819824, "logits/rejected": -3.1347854137420654, "logps/chosen": -81.49554443359375, "logps/rejected": -492.3572692871094, "loss": 0.113, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4084928035736084, "rewards/margins": 4.149393081665039, "rewards/rejected": -4.557885646820068, "step": 19740 }, { "epoch": 0.24, "learning_rate": 4.72188116026091e-06, "logits/chosen": -3.198105812072754, "logits/rejected": -3.0778005123138428, "logps/chosen": -77.09542083740234, "logps/rejected": -805.4046020507812, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -0.232573464512825, "rewards/margins": 7.426599025726318, "rewards/rejected": -7.659172058105469, "step": 19750 }, { "epoch": 0.24, "learning_rate": 4.721402125230198e-06, "logits/chosen": -3.1515305042266846, "logits/rejected": -3.024178981781006, "logps/chosen": -60.11536407470703, "logps/rejected": -800.0787353515625, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.07852979004383087, "rewards/margins": 7.520298957824707, "rewards/rejected": -7.598828315734863, "step": 19760 }, { "epoch": 0.24, "learning_rate": 4.720922702348846e-06, "logits/chosen": -3.1615054607391357, "logits/rejected": -3.0867934226989746, "logps/chosen": -53.248558044433594, "logps/rejected": -735.2176513671875, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": -0.06523284316062927, "rewards/margins": 6.886417388916016, "rewards/rejected": -6.951651096343994, "step": 19770 }, { "epoch": 0.24, "learning_rate": 4.7204428917005585e-06, "logits/chosen": -3.1726250648498535, "logits/rejected": -3.114030599594116, "logps/chosen": -39.43345642089844, "logps/rejected": -659.4195556640625, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": 0.09524645656347275, "rewards/margins": 6.3110551834106445, "rewards/rejected": -6.215807914733887, "step": 19780 }, { "epoch": 0.24, "learning_rate": 4.719962693369109e-06, "logits/chosen": -3.1852922439575195, "logits/rejected": -3.0441744327545166, "logps/chosen": -54.10725784301758, "logps/rejected": -827.2550048828125, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -0.00388391618616879, "rewards/margins": 7.874658107757568, "rewards/rejected": -7.878541469573975, "step": 19790 }, { "epoch": 0.24, "learning_rate": 4.719482107438338e-06, "logits/chosen": -3.1414947509765625, "logits/rejected": -3.0514914989471436, "logps/chosen": -74.0536117553711, "logps/rejected": -518.438232421875, "loss": 0.0653, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.28376996517181396, "rewards/margins": 4.512251853942871, "rewards/rejected": -4.796021938323975, "step": 19800 }, { "epoch": 0.24, "learning_rate": 4.719001133992156e-06, "logits/chosen": -3.184627056121826, "logits/rejected": -3.1060543060302734, "logps/chosen": -52.07415008544922, "logps/rejected": -666.4270629882812, "loss": 0.0963, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.08821825683116913, "rewards/margins": 6.195150375366211, "rewards/rejected": -6.283369064331055, "step": 19810 }, { "epoch": 0.24, "learning_rate": 4.7185197731145395e-06, "logits/chosen": -3.1529581546783447, "logits/rejected": -2.9687788486480713, "logps/chosen": -52.5753173828125, "logps/rejected": -674.2916259765625, "loss": 0.1122, "rewards/accuracies": 1.0, "rewards/chosen": -0.018391240388154984, "rewards/margins": 6.326376914978027, "rewards/rejected": -6.344768047332764, "step": 19820 }, { "epoch": 0.24, "learning_rate": 4.718038024889532e-06, "logits/chosen": -3.1825075149536133, "logits/rejected": -3.0987210273742676, "logps/chosen": -53.6689567565918, "logps/rejected": -613.9381103515625, "loss": 0.0944, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10991813987493515, "rewards/margins": 5.650496006011963, "rewards/rejected": -5.760414123535156, "step": 19830 }, { "epoch": 0.24, "learning_rate": 4.717555889401245e-06, "logits/chosen": -3.148794412612915, "logits/rejected": -2.9797005653381348, "logps/chosen": -51.937530517578125, "logps/rejected": -644.9166870117188, "loss": 0.1117, "rewards/accuracies": 1.0, "rewards/chosen": -0.06728188693523407, "rewards/margins": 5.999236106872559, "rewards/rejected": -6.0665178298950195, "step": 19840 }, { "epoch": 0.24, "learning_rate": 4.7170733667338595e-06, "logits/chosen": -3.150000810623169, "logits/rejected": -3.0720739364624023, "logps/chosen": -72.9292984008789, "logps/rejected": -560.187744140625, "loss": 0.1492, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.249217227101326, "rewards/margins": 4.957609176635742, "rewards/rejected": -5.206826210021973, "step": 19850 }, { "epoch": 0.24, "learning_rate": 4.716590456971621e-06, "logits/chosen": -3.176048755645752, "logits/rejected": -3.0622477531433105, "logps/chosen": -57.8907356262207, "logps/rejected": -552.0279541015625, "loss": 0.0636, "rewards/accuracies": 1.0, "rewards/chosen": -0.09910012036561966, "rewards/margins": 5.034402370452881, "rewards/rejected": -5.133502006530762, "step": 19860 }, { "epoch": 0.24, "learning_rate": 4.716107160198846e-06, "logits/chosen": -3.17503023147583, "logits/rejected": -3.003751039505005, "logps/chosen": -107.63398742675781, "logps/rejected": -825.5427856445312, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -0.4810718595981598, "rewards/margins": 7.368189334869385, "rewards/rejected": -7.849259853363037, "step": 19870 }, { "epoch": 0.24, "learning_rate": 4.715623476499916e-06, "logits/chosen": -3.15659761428833, "logits/rejected": -3.136373281478882, "logps/chosen": -20.906551361083984, "logps/rejected": -373.2077941894531, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": 0.12183792889118195, "rewards/margins": 3.4962074756622314, "rewards/rejected": -3.3743691444396973, "step": 19880 }, { "epoch": 0.24, "learning_rate": 4.715139405959279e-06, "logits/chosen": -3.171638250350952, "logits/rejected": -3.019883632659912, "logps/chosen": -103.76396179199219, "logps/rejected": -641.1349487304688, "loss": 0.1387, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6069990396499634, "rewards/margins": 5.402310371398926, "rewards/rejected": -6.009310245513916, "step": 19890 }, { "epoch": 0.24, "learning_rate": 4.714654948661456e-06, "logits/chosen": -3.1737799644470215, "logits/rejected": -3.0842013359069824, "logps/chosen": -45.23431396484375, "logps/rejected": -559.2247314453125, "loss": 0.0647, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.009973317384719849, "rewards/margins": 5.203608512878418, "rewards/rejected": -5.2135820388793945, "step": 19900 }, { "epoch": 0.24, "learning_rate": 4.714170104691029e-06, "logits/chosen": -3.1516668796539307, "logits/rejected": -3.0005908012390137, "logps/chosen": -61.537925720214844, "logps/rejected": -661.1654052734375, "loss": 0.1146, "rewards/accuracies": 1.0, "rewards/chosen": -0.07553975284099579, "rewards/margins": 6.132957935333252, "rewards/rejected": -6.208497524261475, "step": 19910 }, { "epoch": 0.24, "learning_rate": 4.713684874132651e-06, "logits/chosen": -3.1740102767944336, "logits/rejected": -3.1045913696289062, "logps/chosen": -51.38042449951172, "logps/rejected": -473.21710205078125, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -0.056232161819934845, "rewards/margins": 4.289068698883057, "rewards/rejected": -4.345301151275635, "step": 19920 }, { "epoch": 0.24, "learning_rate": 4.7131992570710425e-06, "logits/chosen": -3.157562255859375, "logits/rejected": -3.062044858932495, "logps/chosen": -43.013282775878906, "logps/rejected": -679.5758056640625, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": 0.001780612044967711, "rewards/margins": 6.39981746673584, "rewards/rejected": -6.398037433624268, "step": 19930 }, { "epoch": 0.24, "learning_rate": 4.7127132535909905e-06, "logits/chosen": -3.1495718955993652, "logits/rejected": -3.0474987030029297, "logps/chosen": -30.326919555664062, "logps/rejected": -577.7496337890625, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": 0.10574767738580704, "rewards/margins": 5.4978179931640625, "rewards/rejected": -5.3920698165893555, "step": 19940 }, { "epoch": 0.24, "learning_rate": 4.712226863777349e-06, "logits/chosen": -3.1667463779449463, "logits/rejected": -3.0521204471588135, "logps/chosen": -61.617462158203125, "logps/rejected": -496.92138671875, "loss": 0.1134, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.17207928001880646, "rewards/margins": 4.406649589538574, "rewards/rejected": -4.578729152679443, "step": 19950 }, { "epoch": 0.24, "learning_rate": 4.711740087715044e-06, "logits/chosen": -3.1361374855041504, "logits/rejected": -2.984769582748413, "logps/chosen": -72.5524673461914, "logps/rejected": -612.5140380859375, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.23911809921264648, "rewards/margins": 5.489699840545654, "rewards/rejected": -5.728817939758301, "step": 19960 }, { "epoch": 0.24, "learning_rate": 4.71125292548906e-06, "logits/chosen": -3.1770591735839844, "logits/rejected": -3.0743725299835205, "logps/chosen": -49.21699142456055, "logps/rejected": -645.9114990234375, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": -0.009811187162995338, "rewards/margins": 6.063089370727539, "rewards/rejected": -6.07289981842041, "step": 19970 }, { "epoch": 0.24, "learning_rate": 4.7107653771844575e-06, "logits/chosen": -3.139172315597534, "logits/rejected": -3.0156803131103516, "logps/chosen": -58.08411407470703, "logps/rejected": -783.5328369140625, "loss": 0.0922, "rewards/accuracies": 1.0, "rewards/chosen": -0.021331721916794777, "rewards/margins": 7.411404609680176, "rewards/rejected": -7.432736396789551, "step": 19980 }, { "epoch": 0.24, "learning_rate": 4.71027744288636e-06, "logits/chosen": -3.1439590454101562, "logits/rejected": -3.038463830947876, "logps/chosen": -34.655487060546875, "logps/rejected": -641.872314453125, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": 0.11518865823745728, "rewards/margins": 6.151527404785156, "rewards/rejected": -6.036339282989502, "step": 19990 }, { "epoch": 0.24, "learning_rate": 4.709789122679961e-06, "logits/chosen": -3.169236660003662, "logits/rejected": -3.066070079803467, "logps/chosen": -102.4820327758789, "logps/rejected": -716.1491088867188, "loss": 0.0781, "rewards/accuracies": 1.0, "rewards/chosen": -0.5223112106323242, "rewards/margins": 6.259675025939941, "rewards/rejected": -6.781986236572266, "step": 20000 }, { "epoch": 0.24, "learning_rate": 4.709300416650518e-06, "logits/chosen": -3.1493375301361084, "logits/rejected": -3.019157648086548, "logps/chosen": -44.535308837890625, "logps/rejected": -589.9754638671875, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": 0.021174360066652298, "rewards/margins": 5.541727066040039, "rewards/rejected": -5.520552635192871, "step": 20010 }, { "epoch": 0.24, "learning_rate": 4.708811324883358e-06, "logits/chosen": -3.1879043579101562, "logits/rejected": -2.9554903507232666, "logps/chosen": -70.71446990966797, "logps/rejected": -717.6888427734375, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -0.12492283433675766, "rewards/margins": 6.657863616943359, "rewards/rejected": -6.782787322998047, "step": 20020 }, { "epoch": 0.24, "learning_rate": 4.708321847463875e-06, "logits/chosen": -3.141209125518799, "logits/rejected": -3.0550456047058105, "logps/chosen": -69.70362091064453, "logps/rejected": -626.9083862304688, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": -0.23314404487609863, "rewards/margins": 5.64668607711792, "rewards/rejected": -5.879830360412598, "step": 20030 }, { "epoch": 0.24, "learning_rate": 4.707831984477531e-06, "logits/chosen": -3.1639113426208496, "logits/rejected": -3.0549800395965576, "logps/chosen": -55.02996826171875, "logps/rejected": -843.0281372070312, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -0.07667993009090424, "rewards/margins": 7.967388153076172, "rewards/rejected": -8.044068336486816, "step": 20040 }, { "epoch": 0.24, "learning_rate": 4.707341736009855e-06, "logits/chosen": -3.1375324726104736, "logits/rejected": -2.938581943511963, "logps/chosen": -60.802467346191406, "logps/rejected": -787.9757080078125, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -0.1043776124715805, "rewards/margins": 7.380688667297363, "rewards/rejected": -7.485066890716553, "step": 20050 }, { "epoch": 0.24, "learning_rate": 4.706851102146442e-06, "logits/chosen": -3.128709316253662, "logits/rejected": -2.9957780838012695, "logps/chosen": -67.85346984863281, "logps/rejected": -522.99658203125, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -0.18626026809215546, "rewards/margins": 4.661241054534912, "rewards/rejected": -4.847501754760742, "step": 20060 }, { "epoch": 0.24, "learning_rate": 4.7063600829729565e-06, "logits/chosen": -3.134381055831909, "logits/rejected": -2.966542959213257, "logps/chosen": -86.07923889160156, "logps/rejected": -866.77685546875, "loss": 0.0859, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3350643515586853, "rewards/margins": 7.927332401275635, "rewards/rejected": -8.262395858764648, "step": 20070 }, { "epoch": 0.24, "learning_rate": 4.705868678575127e-06, "logits/chosen": -3.173354387283325, "logits/rejected": -3.0450260639190674, "logps/chosen": -50.32456970214844, "logps/rejected": -645.8995971679688, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": 0.01528339833021164, "rewards/margins": 6.094570159912109, "rewards/rejected": -6.079287052154541, "step": 20080 }, { "epoch": 0.24, "learning_rate": 4.7053768890387545e-06, "logits/chosen": -3.1474053859710693, "logits/rejected": -2.9818737506866455, "logps/chosen": -52.507110595703125, "logps/rejected": -529.5065307617188, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": -0.03997647389769554, "rewards/margins": 4.871623992919922, "rewards/rejected": -4.9116010665893555, "step": 20090 }, { "epoch": 0.24, "learning_rate": 4.7048847144497015e-06, "logits/chosen": -3.1293702125549316, "logits/rejected": -2.981022834777832, "logps/chosen": -71.36283874511719, "logps/rejected": -729.2899169921875, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.17721354961395264, "rewards/margins": 6.717005252838135, "rewards/rejected": -6.894218444824219, "step": 20100 }, { "epoch": 0.24, "learning_rate": 4.7043921548939005e-06, "logits/chosen": -3.162585496902466, "logits/rejected": -3.0301456451416016, "logps/chosen": -47.45592498779297, "logps/rejected": -676.4208984375, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": -0.0069612422958016396, "rewards/margins": 6.3564252853393555, "rewards/rejected": -6.363387107849121, "step": 20110 }, { "epoch": 0.24, "learning_rate": 4.703899210457353e-06, "logits/chosen": -3.1749322414398193, "logits/rejected": -3.087602138519287, "logps/chosen": -36.201812744140625, "logps/rejected": -512.4872436523438, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": 0.07899650186300278, "rewards/margins": 4.8339385986328125, "rewards/rejected": -4.754942417144775, "step": 20120 }, { "epoch": 0.24, "learning_rate": 4.703405881226124e-06, "logits/chosen": -3.1640830039978027, "logits/rejected": -3.0874180793762207, "logps/chosen": -45.67780303955078, "logps/rejected": -532.306640625, "loss": 0.0933, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.07836271077394485, "rewards/margins": 4.8736701011657715, "rewards/rejected": -4.952033042907715, "step": 20130 }, { "epoch": 0.24, "learning_rate": 4.702912167286347e-06, "logits/chosen": -3.1334428787231445, "logits/rejected": -2.8968212604522705, "logps/chosen": -63.76759719848633, "logps/rejected": -763.5961303710938, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -0.10268809646368027, "rewards/margins": 7.148561954498291, "rewards/rejected": -7.25124979019165, "step": 20140 }, { "epoch": 0.24, "learning_rate": 4.702418068724225e-06, "logits/chosen": -3.1385416984558105, "logits/rejected": -2.960845947265625, "logps/chosen": -74.0042953491211, "logps/rejected": -623.6334228515625, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -0.25804904103279114, "rewards/margins": 5.58199405670166, "rewards/rejected": -5.840043067932129, "step": 20150 }, { "epoch": 0.24, "learning_rate": 4.701923585626024e-06, "logits/chosen": -3.1578643321990967, "logits/rejected": -3.062859296798706, "logps/chosen": -71.25971221923828, "logps/rejected": -645.904541015625, "loss": 0.1697, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.24704833328723907, "rewards/margins": 5.830065727233887, "rewards/rejected": -6.077113628387451, "step": 20160 }, { "epoch": 0.24, "learning_rate": 4.701428718078083e-06, "logits/chosen": -3.119060754776001, "logits/rejected": -2.960437536239624, "logps/chosen": -47.494571685791016, "logps/rejected": -621.1476440429688, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": 0.0018646385287865996, "rewards/margins": 5.836030006408691, "rewards/rejected": -5.834165573120117, "step": 20170 }, { "epoch": 0.24, "learning_rate": 4.7009334661668e-06, "logits/chosen": -3.134615421295166, "logits/rejected": -3.0275826454162598, "logps/chosen": -43.959938049316406, "logps/rejected": -599.4586181640625, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": -0.05544445663690567, "rewards/margins": 5.5653181076049805, "rewards/rejected": -5.620762825012207, "step": 20180 }, { "epoch": 0.24, "learning_rate": 4.7004378299786464e-06, "logits/chosen": -3.1095242500305176, "logits/rejected": -2.86944580078125, "logps/chosen": -134.8632354736328, "logps/rejected": -972.4261474609375, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -0.7600712776184082, "rewards/margins": 8.5512056350708, "rewards/rejected": -9.31127643585205, "step": 20190 }, { "epoch": 0.24, "learning_rate": 4.69994180960016e-06, "logits/chosen": -3.1302428245544434, "logits/rejected": -3.1000924110412598, "logps/chosen": -38.96685028076172, "logps/rejected": -389.12738037109375, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": -0.026421736925840378, "rewards/margins": 3.486807346343994, "rewards/rejected": -3.51322865486145, "step": 20200 }, { "epoch": 0.24, "learning_rate": 4.699445405117943e-06, "logits/chosen": -3.1400551795959473, "logits/rejected": -2.9059128761291504, "logps/chosen": -81.82563018798828, "logps/rejected": -757.6126098632812, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": -0.3309085965156555, "rewards/margins": 6.8568010330200195, "rewards/rejected": -7.187709808349609, "step": 20210 }, { "epoch": 0.24, "learning_rate": 4.698948616618668e-06, "logits/chosen": -3.1641955375671387, "logits/rejected": -3.020167827606201, "logps/chosen": -59.68217849731445, "logps/rejected": -552.7607421875, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -0.12080130726099014, "rewards/margins": 5.006698131561279, "rewards/rejected": -5.127498626708984, "step": 20220 }, { "epoch": 0.24, "learning_rate": 4.698451444189071e-06, "logits/chosen": -3.1357784271240234, "logits/rejected": -2.9458765983581543, "logps/chosen": -84.72378540039062, "logps/rejected": -758.7101440429688, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -0.32742840051651, "rewards/margins": 6.861944675445557, "rewards/rejected": -7.189373016357422, "step": 20230 }, { "epoch": 0.24, "learning_rate": 4.697953887915958e-06, "logits/chosen": -3.1667747497558594, "logits/rejected": -3.057786703109741, "logps/chosen": -74.21944427490234, "logps/rejected": -685.6986694335938, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -0.252868115901947, "rewards/margins": 6.2155232429504395, "rewards/rejected": -6.468392372131348, "step": 20240 }, { "epoch": 0.24, "learning_rate": 4.6974559478862005e-06, "logits/chosen": -3.1885159015655518, "logits/rejected": -3.137704849243164, "logps/chosen": -54.678977966308594, "logps/rejected": -450.0673828125, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": -0.15994931757450104, "rewards/margins": 3.946498394012451, "rewards/rejected": -4.106447696685791, "step": 20250 }, { "epoch": 0.24, "learning_rate": 4.696957624186739e-06, "logits/chosen": -3.1163220405578613, "logits/rejected": -2.8930039405822754, "logps/chosen": -99.41813659667969, "logps/rejected": -867.5758666992188, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -0.42899447679519653, "rewards/margins": 7.840083122253418, "rewards/rejected": -8.269078254699707, "step": 20260 }, { "epoch": 0.24, "learning_rate": 4.696458916904576e-06, "logits/chosen": -3.13743257522583, "logits/rejected": -2.9210686683654785, "logps/chosen": -71.90742492675781, "logps/rejected": -729.1864013671875, "loss": 0.1083, "rewards/accuracies": 1.0, "rewards/chosen": -0.19861415028572083, "rewards/margins": 6.683860778808594, "rewards/rejected": -6.88247537612915, "step": 20270 }, { "epoch": 0.24, "learning_rate": 4.695959826126788e-06, "logits/chosen": -3.15057110786438, "logits/rejected": -3.0508012771606445, "logps/chosen": -83.33682250976562, "logps/rejected": -594.3919067382812, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -0.34827950596809387, "rewards/margins": 5.198178291320801, "rewards/rejected": -5.546457767486572, "step": 20280 }, { "epoch": 0.24, "learning_rate": 4.695460351940513e-06, "logits/chosen": -3.1324524879455566, "logits/rejected": -2.9509923458099365, "logps/chosen": -86.6979751586914, "logps/rejected": -630.0343017578125, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -0.2994166612625122, "rewards/margins": 5.606266498565674, "rewards/rejected": -5.9056830406188965, "step": 20290 }, { "epoch": 0.24, "learning_rate": 4.694960494432958e-06, "logits/chosen": -3.1159188747406006, "logits/rejected": -2.9509692192077637, "logps/chosen": -160.28042602539062, "logps/rejected": -873.3341674804688, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -1.092195749282837, "rewards/margins": 7.247998237609863, "rewards/rejected": -8.340194702148438, "step": 20300 }, { "epoch": 0.24, "learning_rate": 4.694460253691397e-06, "logits/chosen": -3.1164612770080566, "logits/rejected": -2.966426134109497, "logps/chosen": -59.71015548706055, "logps/rejected": -741.0135498046875, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -0.0767715722322464, "rewards/margins": 6.945603847503662, "rewards/rejected": -7.022375583648682, "step": 20310 }, { "epoch": 0.24, "learning_rate": 4.6939596298031705e-06, "logits/chosen": -3.129970073699951, "logits/rejected": -2.9195408821105957, "logps/chosen": -69.35813903808594, "logps/rejected": -710.5640869140625, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": -0.11536991596221924, "rewards/margins": 6.595855712890625, "rewards/rejected": -6.7112250328063965, "step": 20320 }, { "epoch": 0.24, "learning_rate": 4.693458622855686e-06, "logits/chosen": -3.157928705215454, "logits/rejected": -3.0496299266815186, "logps/chosen": -54.603538513183594, "logps/rejected": -689.9767456054688, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": -0.055979467928409576, "rewards/margins": 6.457318305969238, "rewards/rejected": -6.513297080993652, "step": 20330 }, { "epoch": 0.24, "learning_rate": 4.692957232936418e-06, "logits/chosen": -3.142315149307251, "logits/rejected": -3.032663345336914, "logps/chosen": -60.49406814575195, "logps/rejected": -573.3385620117188, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -0.18892750144004822, "rewards/margins": 5.166041851043701, "rewards/rejected": -5.354969501495361, "step": 20340 }, { "epoch": 0.24, "learning_rate": 4.692455460132909e-06, "logits/chosen": -3.118495464324951, "logits/rejected": -2.909209728240967, "logps/chosen": -105.315185546875, "logps/rejected": -889.5794677734375, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -0.40889972448349, "rewards/margins": 8.050930976867676, "rewards/rejected": -8.459829330444336, "step": 20350 }, { "epoch": 0.24, "learning_rate": 4.691953304532764e-06, "logits/chosen": -3.136284112930298, "logits/rejected": -2.874121904373169, "logps/chosen": -68.32415008544922, "logps/rejected": -943.6630859375, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -0.14429369568824768, "rewards/margins": 8.8972806930542, "rewards/rejected": -9.041574478149414, "step": 20360 }, { "epoch": 0.24, "learning_rate": 4.691450766223661e-06, "logits/chosen": -3.1420845985412598, "logits/rejected": -2.9515717029571533, "logps/chosen": -76.90913391113281, "logps/rejected": -871.0056762695312, "loss": 0.1025, "rewards/accuracies": 1.0, "rewards/chosen": -0.25288155674934387, "rewards/margins": 8.042828559875488, "rewards/rejected": -8.295709609985352, "step": 20370 }, { "epoch": 0.24, "learning_rate": 4.690947845293341e-06, "logits/chosen": -3.1402904987335205, "logits/rejected": -3.034738063812256, "logps/chosen": -74.99407958984375, "logps/rejected": -685.5790405273438, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": -0.37246501445770264, "rewards/margins": 6.10612154006958, "rewards/rejected": -6.478585720062256, "step": 20380 }, { "epoch": 0.24, "learning_rate": 4.690444541829612e-06, "logits/chosen": -3.1330864429473877, "logits/rejected": -2.9704153537750244, "logps/chosen": -63.85859298706055, "logps/rejected": -605.1448974609375, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -0.11324409395456314, "rewards/margins": 5.548619747161865, "rewards/rejected": -5.661863803863525, "step": 20390 }, { "epoch": 0.24, "learning_rate": 4.68994085592035e-06, "logits/chosen": -3.114631175994873, "logits/rejected": -3.024564743041992, "logps/chosen": -44.401119232177734, "logps/rejected": -608.926513671875, "loss": 0.1575, "rewards/accuracies": 1.0, "rewards/chosen": -0.05944652110338211, "rewards/margins": 5.638063907623291, "rewards/rejected": -5.697510242462158, "step": 20400 }, { "epoch": 0.24, "learning_rate": 4.689436787653496e-06, "logits/chosen": -3.1724092960357666, "logits/rejected": -2.9832301139831543, "logps/chosen": -92.95437622070312, "logps/rejected": -548.2037353515625, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -0.49143391847610474, "rewards/margins": 4.597516059875488, "rewards/rejected": -5.088950157165527, "step": 20410 }, { "epoch": 0.24, "learning_rate": 4.68893233711706e-06, "logits/chosen": -3.1324057579040527, "logits/rejected": -2.9559240341186523, "logps/chosen": -90.69054412841797, "logps/rejected": -893.2301025390625, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -0.3931328356266022, "rewards/margins": 8.139505386352539, "rewards/rejected": -8.532639503479004, "step": 20420 }, { "epoch": 0.24, "learning_rate": 4.688427504399118e-06, "logits/chosen": -3.15311598777771, "logits/rejected": -2.9710023403167725, "logps/chosen": -127.8583984375, "logps/rejected": -806.1115112304688, "loss": 0.1179, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7531962990760803, "rewards/margins": 6.9044976234436035, "rewards/rejected": -7.657693386077881, "step": 20430 }, { "epoch": 0.24, "learning_rate": 4.6879222895878115e-06, "logits/chosen": -3.1603755950927734, "logits/rejected": -3.0433576107025146, "logps/chosen": -72.45915222167969, "logps/rejected": -656.7926025390625, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": -0.27014172077178955, "rewards/margins": 5.9041242599487305, "rewards/rejected": -6.1742658615112305, "step": 20440 }, { "epoch": 0.24, "learning_rate": 4.687416692771349e-06, "logits/chosen": -3.184669017791748, "logits/rejected": -3.073643207550049, "logps/chosen": -46.58293914794922, "logps/rejected": -535.968994140625, "loss": 0.0641, "rewards/accuracies": 1.0, "rewards/chosen": -0.04627007246017456, "rewards/margins": 4.940855503082275, "rewards/rejected": -4.987125396728516, "step": 20450 }, { "epoch": 0.24, "learning_rate": 4.6869107140380086e-06, "logits/chosen": -3.1719017028808594, "logits/rejected": -3.0186123847961426, "logps/chosen": -49.21614456176758, "logps/rejected": -624.9736328125, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": 0.0006412506336346269, "rewards/margins": 5.8558149337768555, "rewards/rejected": -5.8551740646362305, "step": 20460 }, { "epoch": 0.25, "learning_rate": 4.686404353476131e-06, "logits/chosen": -3.1253502368927, "logits/rejected": -2.959019422531128, "logps/chosen": -58.1236686706543, "logps/rejected": -717.4810791015625, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -0.09820567816495895, "rewards/margins": 6.685973167419434, "rewards/rejected": -6.784178733825684, "step": 20470 }, { "epoch": 0.25, "learning_rate": 4.6858976111741265e-06, "logits/chosen": -3.110858678817749, "logits/rejected": -2.9544858932495117, "logps/chosen": -62.87971115112305, "logps/rejected": -780.816650390625, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -0.12750931084156036, "rewards/margins": 7.277918338775635, "rewards/rejected": -7.4054274559021, "step": 20480 }, { "epoch": 0.25, "learning_rate": 4.685390487220469e-06, "logits/chosen": -3.1357617378234863, "logits/rejected": -2.872051477432251, "logps/chosen": -89.32067108154297, "logps/rejected": -930.09619140625, "loss": 0.1053, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3615974485874176, "rewards/margins": 8.541072845458984, "rewards/rejected": -8.902669906616211, "step": 20490 }, { "epoch": 0.25, "learning_rate": 4.684882981703702e-06, "logits/chosen": -3.1241769790649414, "logits/rejected": -2.963117837905884, "logps/chosen": -60.4195556640625, "logps/rejected": -652.3956298828125, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -0.06114839389920235, "rewards/margins": 6.06362247467041, "rewards/rejected": -6.124770164489746, "step": 20500 }, { "epoch": 0.25, "learning_rate": 4.684375094712435e-06, "logits/chosen": -3.181990146636963, "logits/rejected": -3.016054630279541, "logps/chosen": -65.03981018066406, "logps/rejected": -776.82568359375, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": -0.09976494312286377, "rewards/margins": 7.2618560791015625, "rewards/rejected": -7.361620903015137, "step": 20510 }, { "epoch": 0.25, "learning_rate": 4.683866826335344e-06, "logits/chosen": -3.120903253555298, "logits/rejected": -3.0080292224884033, "logps/chosen": -80.40410614013672, "logps/rejected": -636.3934936523438, "loss": 0.0921, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.339280366897583, "rewards/margins": 5.635315895080566, "rewards/rejected": -5.97459602355957, "step": 20520 }, { "epoch": 0.25, "learning_rate": 4.68335817666117e-06, "logits/chosen": -3.1630778312683105, "logits/rejected": -3.047360897064209, "logps/chosen": -51.405372619628906, "logps/rejected": -595.7935791015625, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": -0.03078829124569893, "rewards/margins": 5.53576135635376, "rewards/rejected": -5.566548824310303, "step": 20530 }, { "epoch": 0.25, "learning_rate": 4.6828491457787224e-06, "logits/chosen": -3.162090539932251, "logits/rejected": -3.022359848022461, "logps/chosen": -35.128753662109375, "logps/rejected": -486.9461975097656, "loss": 0.1173, "rewards/accuracies": 1.0, "rewards/chosen": 0.10092952102422714, "rewards/margins": 4.602248191833496, "rewards/rejected": -4.50131893157959, "step": 20540 }, { "epoch": 0.25, "learning_rate": 4.682339733776876e-06, "logits/chosen": -3.1517302989959717, "logits/rejected": -3.1167330741882324, "logps/chosen": -18.01365089416504, "logps/rejected": -348.2733459472656, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": 0.14866399765014648, "rewards/margins": 3.2652618885040283, "rewards/rejected": -3.1165976524353027, "step": 20550 }, { "epoch": 0.25, "learning_rate": 4.6818299407445746e-06, "logits/chosen": -3.1080925464630127, "logits/rejected": -2.943044424057007, "logps/chosen": -51.00920486450195, "logps/rejected": -703.6500854492188, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -0.029863521456718445, "rewards/margins": 6.6094841957092285, "rewards/rejected": -6.639348030090332, "step": 20560 }, { "epoch": 0.25, "learning_rate": 4.681319766770824e-06, "logits/chosen": -3.1665682792663574, "logits/rejected": -3.0183968544006348, "logps/chosen": -58.803611755371094, "logps/rejected": -659.9539184570312, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -0.10241016000509262, "rewards/margins": 6.084175109863281, "rewards/rejected": -6.186584949493408, "step": 20570 }, { "epoch": 0.25, "learning_rate": 4.6808092119447016e-06, "logits/chosen": -3.137197494506836, "logits/rejected": -2.9882471561431885, "logps/chosen": -52.37938690185547, "logps/rejected": -613.3984375, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.05456286668777466, "rewards/margins": 5.691629409790039, "rewards/rejected": -5.746191501617432, "step": 20580 }, { "epoch": 0.25, "learning_rate": 4.680298276355346e-06, "logits/chosen": -3.162292242050171, "logits/rejected": -2.9451708793640137, "logps/chosen": -74.85265350341797, "logps/rejected": -965.5430908203125, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -0.2118763029575348, "rewards/margins": 9.035435676574707, "rewards/rejected": -9.24731159210205, "step": 20590 }, { "epoch": 0.25, "learning_rate": 4.679786960091969e-06, "logits/chosen": -3.1477460861206055, "logits/rejected": -3.004594087600708, "logps/chosen": -41.95930480957031, "logps/rejected": -679.6458740234375, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": 0.03914860263466835, "rewards/margins": 6.446630954742432, "rewards/rejected": -6.407483100891113, "step": 20600 }, { "epoch": 0.25, "learning_rate": 4.679275263243841e-06, "logits/chosen": -3.1184279918670654, "logits/rejected": -2.883352756500244, "logps/chosen": -57.93937301635742, "logps/rejected": -747.4427490234375, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/chosen": -0.08895145356655121, "rewards/margins": 6.995521545410156, "rewards/rejected": -7.084474086761475, "step": 20610 }, { "epoch": 0.25, "learning_rate": 4.678763185900306e-06, "logits/chosen": -3.149320602416992, "logits/rejected": -3.032116651535034, "logps/chosen": -37.849891662597656, "logps/rejected": -574.5338745117188, "loss": 0.0956, "rewards/accuracies": 1.0, "rewards/chosen": 0.05389178544282913, "rewards/margins": 5.416475296020508, "rewards/rejected": -5.362583637237549, "step": 20620 }, { "epoch": 0.25, "learning_rate": 4.678250728150769e-06, "logits/chosen": -3.1209733486175537, "logits/rejected": -2.8858089447021484, "logps/chosen": -74.28134155273438, "logps/rejected": -892.3082275390625, "loss": 0.0999, "rewards/accuracies": 1.0, "rewards/chosen": -0.20202799141407013, "rewards/margins": 8.293027877807617, "rewards/rejected": -8.49505615234375, "step": 20630 }, { "epoch": 0.25, "learning_rate": 4.677737890084706e-06, "logits/chosen": -3.1321773529052734, "logits/rejected": -2.932528018951416, "logps/chosen": -77.1668930053711, "logps/rejected": -782.1102294921875, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -0.2094285786151886, "rewards/margins": 7.216040134429932, "rewards/rejected": -7.425469875335693, "step": 20640 }, { "epoch": 0.25, "learning_rate": 4.677224671791655e-06, "logits/chosen": -3.157806873321533, "logits/rejected": -2.911989450454712, "logps/chosen": -68.434814453125, "logps/rejected": -869.0289306640625, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -0.17333097755908966, "rewards/margins": 8.108062744140625, "rewards/rejected": -8.281394004821777, "step": 20650 }, { "epoch": 0.25, "learning_rate": 4.6767110733612234e-06, "logits/chosen": -3.1394214630126953, "logits/rejected": -2.833317995071411, "logps/chosen": -64.85669708251953, "logps/rejected": -721.4877319335938, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -0.09727857261896133, "rewards/margins": 6.7200822830200195, "rewards/rejected": -6.817360877990723, "step": 20660 }, { "epoch": 0.25, "learning_rate": 4.6761970948830836e-06, "logits/chosen": -3.1538405418395996, "logits/rejected": -3.0266520977020264, "logps/chosen": -70.60830688476562, "logps/rejected": -596.3043212890625, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": -0.2931067645549774, "rewards/margins": 5.283350944519043, "rewards/rejected": -5.576458930969238, "step": 20670 }, { "epoch": 0.25, "learning_rate": 4.675682736446976e-06, "logits/chosen": -3.1367321014404297, "logits/rejected": -2.949990749359131, "logps/chosen": -61.41228103637695, "logps/rejected": -749.6744384765625, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -0.14121007919311523, "rewards/margins": 6.953547477722168, "rewards/rejected": -7.0947585105896, "step": 20680 }, { "epoch": 0.25, "learning_rate": 4.675167998142705e-06, "logits/chosen": -3.1388378143310547, "logits/rejected": -2.8499763011932373, "logps/chosen": -53.82537078857422, "logps/rejected": -684.8147583007812, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": -0.06622101366519928, "rewards/margins": 6.376835346221924, "rewards/rejected": -6.443056583404541, "step": 20690 }, { "epoch": 0.25, "learning_rate": 4.674652880060142e-06, "logits/chosen": -3.1361188888549805, "logits/rejected": -2.869009494781494, "logps/chosen": -59.14558792114258, "logps/rejected": -801.9059448242188, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -0.01592758484184742, "rewards/margins": 7.612158298492432, "rewards/rejected": -7.628086090087891, "step": 20700 }, { "epoch": 0.25, "learning_rate": 4.674137382289226e-06, "logits/chosen": -3.138827085494995, "logits/rejected": -2.9521961212158203, "logps/chosen": -51.79551315307617, "logps/rejected": -618.5023193359375, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -0.0044609480537474155, "rewards/margins": 5.800566673278809, "rewards/rejected": -5.805027484893799, "step": 20710 }, { "epoch": 0.25, "learning_rate": 4.673621504919962e-06, "logits/chosen": -3.138218402862549, "logits/rejected": -3.0753722190856934, "logps/chosen": -27.806665420532227, "logps/rejected": -556.0427856445312, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": 0.1264820694923401, "rewards/margins": 5.319797992706299, "rewards/rejected": -5.1933159828186035, "step": 20720 }, { "epoch": 0.25, "learning_rate": 4.673105248042421e-06, "logits/chosen": -3.126716136932373, "logits/rejected": -2.8586719036102295, "logps/chosen": -94.84051513671875, "logps/rejected": -777.5572509765625, "loss": 0.1132, "rewards/accuracies": 1.0, "rewards/chosen": -0.3413955867290497, "rewards/margins": 7.027585029602051, "rewards/rejected": -7.368980407714844, "step": 20730 }, { "epoch": 0.25, "learning_rate": 4.672588611746738e-06, "logits/chosen": -3.089542865753174, "logits/rejected": -2.8407044410705566, "logps/chosen": -87.11442565917969, "logps/rejected": -771.3818969726562, "loss": 0.2183, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3102293610572815, "rewards/margins": 7.012915134429932, "rewards/rejected": -7.323144435882568, "step": 20740 }, { "epoch": 0.25, "learning_rate": 4.6720715961231175e-06, "logits/chosen": -3.1679368019104004, "logits/rejected": -2.963162899017334, "logps/chosen": -51.34537887573242, "logps/rejected": -557.9730224609375, "loss": 0.1819, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09423637390136719, "rewards/margins": 5.0994672775268555, "rewards/rejected": -5.193703651428223, "step": 20750 }, { "epoch": 0.25, "learning_rate": 4.671554201261829e-06, "logits/chosen": -3.130373477935791, "logits/rejected": -2.974940538406372, "logps/chosen": -49.90995788574219, "logps/rejected": -746.1407470703125, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -0.014276531524956226, "rewards/margins": 7.036177158355713, "rewards/rejected": -7.050453186035156, "step": 20760 }, { "epoch": 0.25, "learning_rate": 4.6710364272532094e-06, "logits/chosen": -3.144125461578369, "logits/rejected": -2.942232608795166, "logps/chosen": -63.908607482910156, "logps/rejected": -723.552978515625, "loss": 0.1016, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1702747344970703, "rewards/margins": 6.681761741638184, "rewards/rejected": -6.852036476135254, "step": 20770 }, { "epoch": 0.25, "learning_rate": 4.670518274187659e-06, "logits/chosen": -3.128535747528076, "logits/rejected": -2.997805118560791, "logps/chosen": -44.709529876708984, "logps/rejected": -532.8194580078125, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -0.015352508053183556, "rewards/margins": 4.912554740905762, "rewards/rejected": -4.927908420562744, "step": 20780 }, { "epoch": 0.25, "learning_rate": 4.669999742155646e-06, "logits/chosen": -3.1372454166412354, "logits/rejected": -2.980907440185547, "logps/chosen": -38.80430603027344, "logps/rejected": -514.8583984375, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": 0.05151427909731865, "rewards/margins": 4.812926292419434, "rewards/rejected": -4.761412620544434, "step": 20790 }, { "epoch": 0.25, "learning_rate": 4.669480831247705e-06, "logits/chosen": -3.134749174118042, "logits/rejected": -2.9320712089538574, "logps/chosen": -52.50358963012695, "logps/rejected": -627.3798217773438, "loss": 0.09, "rewards/accuracies": 1.0, "rewards/chosen": -0.07314633578062057, "rewards/margins": 5.819455146789551, "rewards/rejected": -5.892600059509277, "step": 20800 }, { "epoch": 0.25, "learning_rate": 4.668961541554436e-06, "logits/chosen": -3.106598377227783, "logits/rejected": -2.8757472038269043, "logps/chosen": -56.685752868652344, "logps/rejected": -798.4443359375, "loss": 0.1855, "rewards/accuracies": 1.0, "rewards/chosen": -0.044587116688489914, "rewards/margins": 7.533974647521973, "rewards/rejected": -7.578561305999756, "step": 20810 }, { "epoch": 0.25, "learning_rate": 4.668441873166506e-06, "logits/chosen": -3.1135780811309814, "logits/rejected": -2.846151351928711, "logps/chosen": -63.840293884277344, "logps/rejected": -763.5201416015625, "loss": 0.0951, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09980061650276184, "rewards/margins": 7.153582572937012, "rewards/rejected": -7.253383636474609, "step": 20820 }, { "epoch": 0.25, "learning_rate": 4.667921826174647e-06, "logits/chosen": -3.1146740913391113, "logits/rejected": -2.9843363761901855, "logps/chosen": -42.93743896484375, "logps/rejected": -604.5864868164062, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": 0.01278614066541195, "rewards/margins": 5.6728315353393555, "rewards/rejected": -5.6600446701049805, "step": 20830 }, { "epoch": 0.25, "learning_rate": 4.6674014006696596e-06, "logits/chosen": -3.152825117111206, "logits/rejected": -2.9675068855285645, "logps/chosen": -42.70073318481445, "logps/rejected": -616.7213745117188, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": 0.04304545000195503, "rewards/margins": 5.8228960037231445, "rewards/rejected": -5.779850959777832, "step": 20840 }, { "epoch": 0.25, "learning_rate": 4.666880596742406e-06, "logits/chosen": -3.132223606109619, "logits/rejected": -3.0185487270355225, "logps/chosen": -62.3441276550293, "logps/rejected": -443.80340576171875, "loss": 0.0992, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18126563727855682, "rewards/margins": 3.888756513595581, "rewards/rejected": -4.070022106170654, "step": 20850 }, { "epoch": 0.25, "learning_rate": 4.6663594144838196e-06, "logits/chosen": -3.1149275302886963, "logits/rejected": -2.8842315673828125, "logps/chosen": -40.31236267089844, "logps/rejected": -572.925537109375, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 0.08289666473865509, "rewards/margins": 5.418941497802734, "rewards/rejected": -5.336044788360596, "step": 20860 }, { "epoch": 0.25, "learning_rate": 4.6658378539848955e-06, "logits/chosen": -3.135625123977661, "logits/rejected": -2.935842990875244, "logps/chosen": -51.62400436401367, "logps/rejected": -650.78466796875, "loss": 0.056, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0012954160338267684, "rewards/margins": 6.117443561553955, "rewards/rejected": -6.116147994995117, "step": 20870 }, { "epoch": 0.25, "learning_rate": 4.6653159153366976e-06, "logits/chosen": -3.135258913040161, "logits/rejected": -3.0141379833221436, "logps/chosen": -35.267303466796875, "logps/rejected": -565.3812255859375, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": 0.06486515700817108, "rewards/margins": 5.343172550201416, "rewards/rejected": -5.2783074378967285, "step": 20880 }, { "epoch": 0.25, "learning_rate": 4.664793598630355e-06, "logits/chosen": -3.1107096672058105, "logits/rejected": -2.763176441192627, "logps/chosen": -86.97471618652344, "logps/rejected": -790.5009765625, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -0.30219346284866333, "rewards/margins": 7.196286201477051, "rewards/rejected": -7.49847936630249, "step": 20890 }, { "epoch": 0.25, "learning_rate": 4.664270903957062e-06, "logits/chosen": -3.0844836235046387, "logits/rejected": -2.7624144554138184, "logps/chosen": -72.51537322998047, "logps/rejected": -764.3514404296875, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -0.1971697360277176, "rewards/margins": 7.042685031890869, "rewards/rejected": -7.239854335784912, "step": 20900 }, { "epoch": 0.25, "learning_rate": 4.6637478314080805e-06, "logits/chosen": -3.121886730194092, "logits/rejected": -2.9622740745544434, "logps/chosen": -56.24821853637695, "logps/rejected": -503.62139892578125, "loss": 0.163, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11187410354614258, "rewards/margins": 4.547061443328857, "rewards/rejected": -4.658936023712158, "step": 20910 }, { "epoch": 0.25, "learning_rate": 4.663224381074738e-06, "logits/chosen": -3.150838851928711, "logits/rejected": -3.023629665374756, "logps/chosen": -34.84291076660156, "logps/rejected": -504.13720703125, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": 0.0944751724600792, "rewards/margins": 4.752280235290527, "rewards/rejected": -4.6578049659729, "step": 20920 }, { "epoch": 0.25, "learning_rate": 4.662700553048426e-06, "logits/chosen": -3.1272642612457275, "logits/rejected": -2.986649513244629, "logps/chosen": -38.54327392578125, "logps/rejected": -499.59197998046875, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": 0.02942492440342903, "rewards/margins": 4.641976833343506, "rewards/rejected": -4.612552165985107, "step": 20930 }, { "epoch": 0.25, "learning_rate": 4.662176347420604e-06, "logits/chosen": -3.106313943862915, "logits/rejected": -2.8869287967681885, "logps/chosen": -48.212425231933594, "logps/rejected": -574.4208984375, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -0.007340550422668457, "rewards/margins": 5.35974645614624, "rewards/rejected": -5.367087364196777, "step": 20940 }, { "epoch": 0.25, "learning_rate": 4.661651764282797e-06, "logits/chosen": -3.149761438369751, "logits/rejected": -2.947329044342041, "logps/chosen": -64.19799041748047, "logps/rejected": -599.0150146484375, "loss": 0.1759, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.16089850664138794, "rewards/margins": 5.440340995788574, "rewards/rejected": -5.6012396812438965, "step": 20950 }, { "epoch": 0.25, "learning_rate": 4.661126803726596e-06, "logits/chosen": -3.190603733062744, "logits/rejected": -3.0062766075134277, "logps/chosen": -51.37822341918945, "logps/rejected": -575.9661865234375, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -0.09038980305194855, "rewards/margins": 5.264505386352539, "rewards/rejected": -5.354895114898682, "step": 20960 }, { "epoch": 0.25, "learning_rate": 4.660601465843656e-06, "logits/chosen": -3.111814022064209, "logits/rejected": -2.912104845046997, "logps/chosen": -76.479248046875, "logps/rejected": -594.8368530273438, "loss": 0.1353, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.28395146131515503, "rewards/margins": 5.273838043212891, "rewards/rejected": -5.5577898025512695, "step": 20970 }, { "epoch": 0.25, "learning_rate": 4.660075750725702e-06, "logits/chosen": -3.073850631713867, "logits/rejected": -2.9346604347229004, "logps/chosen": -46.21318817138672, "logps/rejected": -470.58154296875, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -0.02358124405145645, "rewards/margins": 4.306628227233887, "rewards/rejected": -4.330209255218506, "step": 20980 }, { "epoch": 0.25, "learning_rate": 4.659549658464522e-06, "logits/chosen": -3.1278786659240723, "logits/rejected": -2.9022510051727295, "logps/chosen": -70.83270263671875, "logps/rejected": -640.8370361328125, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.24103088676929474, "rewards/margins": 5.784502983093262, "rewards/rejected": -6.025534152984619, "step": 20990 }, { "epoch": 0.25, "learning_rate": 4.659023189151968e-06, "logits/chosen": -3.1292247772216797, "logits/rejected": -2.8416459560394287, "logps/chosen": -71.40373229980469, "logps/rejected": -767.5633544921875, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -0.23604121804237366, "rewards/margins": 7.055106163024902, "rewards/rejected": -7.291146755218506, "step": 21000 }, { "epoch": 0.25, "eval_logits/chosen": -3.1189377307891846, "eval_logits/rejected": -2.6496493816375732, "eval_logps/chosen": -208.3129425048828, "eval_logps/rejected": -1081.09130859375, "eval_loss": 0.08719372749328613, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.4713268280029297, "eval_rewards/margins": 8.872331619262695, "eval_rewards/rejected": -10.343658447265625, "eval_runtime": 1.2153, "eval_samples_per_second": 4.114, "eval_steps_per_second": 2.468, "step": 21000 }, { "epoch": 0.25, "learning_rate": 4.658496342879962e-06, "logits/chosen": -3.1088082790374756, "logits/rejected": -2.9407787322998047, "logps/chosen": -48.6391716003418, "logps/rejected": -609.6959228515625, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -0.0956990122795105, "rewards/margins": 5.6244635581970215, "rewards/rejected": -5.720162868499756, "step": 21010 }, { "epoch": 0.25, "learning_rate": 4.65796911974049e-06, "logits/chosen": -3.148682117462158, "logits/rejected": -2.884298801422119, "logps/chosen": -102.31629180908203, "logps/rejected": -623.441162109375, "loss": 0.0729, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.48806196451187134, "rewards/margins": 5.356266975402832, "rewards/rejected": -5.8443284034729, "step": 21020 }, { "epoch": 0.25, "learning_rate": 4.657441519825603e-06, "logits/chosen": -3.1576485633850098, "logits/rejected": -3.0705606937408447, "logps/chosen": -46.69727325439453, "logps/rejected": -546.8690185546875, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": -0.0454779751598835, "rewards/margins": 5.048475742340088, "rewards/rejected": -5.093954086303711, "step": 21030 }, { "epoch": 0.25, "learning_rate": 4.656913543227419e-06, "logits/chosen": -3.118344306945801, "logits/rejected": -2.8539204597473145, "logps/chosen": -49.55942916870117, "logps/rejected": -635.0240478515625, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -0.003191572381183505, "rewards/margins": 5.968372344970703, "rewards/rejected": -5.971564292907715, "step": 21040 }, { "epoch": 0.25, "learning_rate": 4.6563851900381206e-06, "logits/chosen": -3.1653974056243896, "logits/rejected": -2.9094436168670654, "logps/chosen": -67.09235382080078, "logps/rejected": -666.9501342773438, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -0.19272087514400482, "rewards/margins": 6.075210094451904, "rewards/rejected": -6.2679314613342285, "step": 21050 }, { "epoch": 0.25, "learning_rate": 4.6558564603499565e-06, "logits/chosen": -3.103311061859131, "logits/rejected": -2.8609962463378906, "logps/chosen": -121.49961853027344, "logps/rejected": -704.4161376953125, "loss": 0.1922, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7656408548355103, "rewards/margins": 5.88161563873291, "rewards/rejected": -6.647256374359131, "step": 21060 }, { "epoch": 0.25, "learning_rate": 4.655327354255243e-06, "logits/chosen": -3.1146390438079834, "logits/rejected": -2.876457691192627, "logps/chosen": -48.75177764892578, "logps/rejected": -567.7652587890625, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": -0.028667354956269264, "rewards/margins": 5.266606330871582, "rewards/rejected": -5.295273303985596, "step": 21070 }, { "epoch": 0.25, "learning_rate": 4.654797871846359e-06, "logits/chosen": -3.1266939640045166, "logits/rejected": -2.9436421394348145, "logps/chosen": -65.63349914550781, "logps/rejected": -562.6676635742188, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -0.22930879890918732, "rewards/margins": 5.008925914764404, "rewards/rejected": -5.2382354736328125, "step": 21080 }, { "epoch": 0.25, "learning_rate": 4.6542680132157515e-06, "logits/chosen": -3.1557445526123047, "logits/rejected": -3.0025267601013184, "logps/chosen": -57.71477127075195, "logps/rejected": -601.0220947265625, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -0.1648263782262802, "rewards/margins": 5.465872287750244, "rewards/rejected": -5.630698204040527, "step": 21090 }, { "epoch": 0.25, "learning_rate": 4.653737778455932e-06, "logits/chosen": -3.119523525238037, "logits/rejected": -2.914829730987549, "logps/chosen": -58.6599235534668, "logps/rejected": -714.6707153320312, "loss": 0.1779, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1110520139336586, "rewards/margins": 6.65784215927124, "rewards/rejected": -6.768894195556641, "step": 21100 }, { "epoch": 0.25, "learning_rate": 4.653207167659478e-06, "logits/chosen": -3.083911657333374, "logits/rejected": -2.782923936843872, "logps/chosen": -73.6757583618164, "logps/rejected": -756.3480224609375, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.19468307495117188, "rewards/margins": 6.947275638580322, "rewards/rejected": -7.141958713531494, "step": 21110 }, { "epoch": 0.25, "learning_rate": 4.652676180919033e-06, "logits/chosen": -3.1456925868988037, "logits/rejected": -2.971651077270508, "logps/chosen": -67.68075561523438, "logps/rejected": -408.4542541503906, "loss": 0.0956, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.23088541626930237, "rewards/margins": 3.465819835662842, "rewards/rejected": -3.6967053413391113, "step": 21120 }, { "epoch": 0.25, "learning_rate": 4.652144818327307e-06, "logits/chosen": -3.1221566200256348, "logits/rejected": -2.844532012939453, "logps/chosen": -68.57088470458984, "logps/rejected": -543.2449340820312, "loss": 0.052, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15833710134029388, "rewards/margins": 4.898585319519043, "rewards/rejected": -5.056921482086182, "step": 21130 }, { "epoch": 0.25, "learning_rate": 4.6516130799770705e-06, "logits/chosen": -3.0898308753967285, "logits/rejected": -2.7941904067993164, "logps/chosen": -61.097618103027344, "logps/rejected": -772.5125732421875, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -0.07062459737062454, "rewards/margins": 7.260115146636963, "rewards/rejected": -7.330739498138428, "step": 21140 }, { "epoch": 0.25, "learning_rate": 4.651080965961168e-06, "logits/chosen": -3.1290383338928223, "logits/rejected": -2.781822919845581, "logps/chosen": -89.240478515625, "logps/rejected": -853.001953125, "loss": 0.1046, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2866305708885193, "rewards/margins": 7.823558807373047, "rewards/rejected": -8.110189437866211, "step": 21150 }, { "epoch": 0.25, "learning_rate": 4.650548476372502e-06, "logits/chosen": -3.142556667327881, "logits/rejected": -2.9347572326660156, "logps/chosen": -39.65182876586914, "logps/rejected": -442.45391845703125, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": 0.021939029917120934, "rewards/margins": 4.057656764984131, "rewards/rejected": -4.035717964172363, "step": 21160 }, { "epoch": 0.25, "learning_rate": 4.650015611304046e-06, "logits/chosen": -3.1417300701141357, "logits/rejected": -2.9486136436462402, "logps/chosen": -52.2304801940918, "logps/rejected": -596.7706298828125, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": -0.08000197261571884, "rewards/margins": 5.492275238037109, "rewards/rejected": -5.572276592254639, "step": 21170 }, { "epoch": 0.25, "learning_rate": 4.649482370848835e-06, "logits/chosen": -3.1404495239257812, "logits/rejected": -2.747659683227539, "logps/chosen": -79.92478942871094, "logps/rejected": -857.4435424804688, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -0.28048020601272583, "rewards/margins": 7.885764122009277, "rewards/rejected": -8.16624641418457, "step": 21180 }, { "epoch": 0.25, "learning_rate": 4.648948755099972e-06, "logits/chosen": -3.113009214401245, "logits/rejected": -2.993257999420166, "logps/chosen": -28.34450912475586, "logps/rejected": -518.3154296875, "loss": 0.0927, "rewards/accuracies": 1.0, "rewards/chosen": 0.07597939670085907, "rewards/margins": 4.890580654144287, "rewards/rejected": -4.814601421356201, "step": 21190 }, { "epoch": 0.25, "learning_rate": 4.648414764150625e-06, "logits/chosen": -3.1314501762390137, "logits/rejected": -2.7880218029022217, "logps/chosen": -80.02281951904297, "logps/rejected": -643.6806030273438, "loss": 0.1127, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2871237099170685, "rewards/margins": 5.755596160888672, "rewards/rejected": -6.042719841003418, "step": 21200 }, { "epoch": 0.25, "learning_rate": 4.647880398094027e-06, "logits/chosen": -3.133239269256592, "logits/rejected": -2.9197921752929688, "logps/chosen": -52.107666015625, "logps/rejected": -653.6644287109375, "loss": 0.057, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.06112097576260567, "rewards/margins": 6.090977668762207, "rewards/rejected": -6.152098655700684, "step": 21210 }, { "epoch": 0.25, "learning_rate": 4.647345657023477e-06, "logits/chosen": -3.1174848079681396, "logits/rejected": -2.890465021133423, "logps/chosen": -57.850555419921875, "logps/rejected": -611.6325073242188, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -0.11459821462631226, "rewards/margins": 5.622052192687988, "rewards/rejected": -5.7366509437561035, "step": 21220 }, { "epoch": 0.25, "learning_rate": 4.646810541032339e-06, "logits/chosen": -3.0903000831604004, "logits/rejected": -2.8142573833465576, "logps/chosen": -99.52664184570312, "logps/rejected": -639.3023681640625, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -0.4952065050601959, "rewards/margins": 5.50566291809082, "rewards/rejected": -6.000869274139404, "step": 21230 }, { "epoch": 0.25, "learning_rate": 4.646275050214044e-06, "logits/chosen": -3.0950136184692383, "logits/rejected": -2.9204416275024414, "logps/chosen": -40.44902801513672, "logps/rejected": -563.7486572265625, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006893366808071733, "rewards/margins": 5.262001991271973, "rewards/rejected": -5.262690544128418, "step": 21240 }, { "epoch": 0.25, "learning_rate": 4.645739184662086e-06, "logits/chosen": -3.0959579944610596, "logits/rejected": -2.965752124786377, "logps/chosen": -46.69676208496094, "logps/rejected": -595.8588256835938, "loss": 0.1158, "rewards/accuracies": 1.0, "rewards/chosen": -0.06164707615971565, "rewards/margins": 5.502528190612793, "rewards/rejected": -5.564175605773926, "step": 21250 }, { "epoch": 0.25, "learning_rate": 4.645202944470025e-06, "logits/chosen": -3.1246209144592285, "logits/rejected": -2.9635415077209473, "logps/chosen": -51.319175720214844, "logps/rejected": -635.8771362304688, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -0.056840430945158005, "rewards/margins": 5.918438911437988, "rewards/rejected": -5.975279331207275, "step": 21260 }, { "epoch": 0.25, "learning_rate": 4.644666329731489e-06, "logits/chosen": -3.10896635055542, "logits/rejected": -2.9484310150146484, "logps/chosen": -39.637794494628906, "logps/rejected": -634.7146606445312, "loss": 0.0515, "rewards/accuracies": 1.0, "rewards/chosen": 0.04479794204235077, "rewards/margins": 6.019249439239502, "rewards/rejected": -5.974451065063477, "step": 21270 }, { "epoch": 0.25, "learning_rate": 4.644129340540168e-06, "logits/chosen": -3.1088614463806152, "logits/rejected": -2.9885706901550293, "logps/chosen": -43.856101989746094, "logps/rejected": -460.18902587890625, "loss": 0.1064, "rewards/accuracies": 1.0, "rewards/chosen": -0.016004860401153564, "rewards/margins": 4.214510917663574, "rewards/rejected": -4.230515956878662, "step": 21280 }, { "epoch": 0.25, "learning_rate": 4.6435919769898186e-06, "logits/chosen": -3.1141042709350586, "logits/rejected": -2.930093288421631, "logps/chosen": -65.54849243164062, "logps/rejected": -676.5613403320312, "loss": 0.0922, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2394494265317917, "rewards/margins": 6.155970096588135, "rewards/rejected": -6.395419120788574, "step": 21290 }, { "epoch": 0.25, "learning_rate": 4.643054239174264e-06, "logits/chosen": -3.1086113452911377, "logits/rejected": -2.893556833267212, "logps/chosen": -60.03102493286133, "logps/rejected": -647.1134033203125, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -0.09075647592544556, "rewards/margins": 5.982331275939941, "rewards/rejected": -6.0730881690979, "step": 21300 }, { "epoch": 0.26, "learning_rate": 4.642516127187391e-06, "logits/chosen": -3.118882894515991, "logits/rejected": -3.070383310317993, "logps/chosen": -28.280384063720703, "logps/rejected": -506.29766845703125, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": 0.06315429508686066, "rewards/margins": 4.749765396118164, "rewards/rejected": -4.686610221862793, "step": 21310 }, { "epoch": 0.26, "learning_rate": 4.641977641123153e-06, "logits/chosen": -3.084855556488037, "logits/rejected": -2.8068060874938965, "logps/chosen": -63.0046501159668, "logps/rejected": -617.1644287109375, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -0.15548929572105408, "rewards/margins": 5.628456115722656, "rewards/rejected": -5.783945560455322, "step": 21320 }, { "epoch": 0.26, "learning_rate": 4.641438781075568e-06, "logits/chosen": -3.176027536392212, "logits/rejected": -2.987643241882324, "logps/chosen": -66.73341369628906, "logps/rejected": -661.2540283203125, "loss": 0.0966, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1899033486843109, "rewards/margins": 6.016347408294678, "rewards/rejected": -6.2062506675720215, "step": 21330 }, { "epoch": 0.26, "learning_rate": 4.6408995471387195e-06, "logits/chosen": -3.135262966156006, "logits/rejected": -2.8834493160247803, "logps/chosen": -63.5705680847168, "logps/rejected": -753.5807495117188, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.15039286017417908, "rewards/margins": 6.991552829742432, "rewards/rejected": -7.141944885253906, "step": 21340 }, { "epoch": 0.26, "learning_rate": 4.640359939406756e-06, "logits/chosen": -3.08672833442688, "logits/rejected": -2.8182806968688965, "logps/chosen": -62.21854782104492, "logps/rejected": -887.0632934570312, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -0.1386267989873886, "rewards/margins": 8.33357048034668, "rewards/rejected": -8.472196578979492, "step": 21350 }, { "epoch": 0.26, "learning_rate": 4.6398199579738925e-06, "logits/chosen": -3.093959093093872, "logits/rejected": -2.7168021202087402, "logps/chosen": -106.6027603149414, "logps/rejected": -773.9488525390625, "loss": 0.0886, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4658409655094147, "rewards/margins": 6.874690055847168, "rewards/rejected": -7.340531349182129, "step": 21360 }, { "epoch": 0.26, "learning_rate": 4.639279602934407e-06, "logits/chosen": -3.138981342315674, "logits/rejected": -2.797701120376587, "logps/chosen": -69.13346862792969, "logps/rejected": -759.9215698242188, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": -0.19968734681606293, "rewards/margins": 7.012319087982178, "rewards/rejected": -7.212006568908691, "step": 21370 }, { "epoch": 0.26, "learning_rate": 4.638738874382644e-06, "logits/chosen": -3.1176648139953613, "logits/rejected": -2.9147701263427734, "logps/chosen": -59.24535369873047, "logps/rejected": -659.1978759765625, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -0.10709498077630997, "rewards/margins": 6.0814924240112305, "rewards/rejected": -6.188587665557861, "step": 21380 }, { "epoch": 0.26, "learning_rate": 4.638197772413013e-06, "logits/chosen": -3.0977025032043457, "logits/rejected": -3.0408027172088623, "logps/chosen": -29.779338836669922, "logps/rejected": -471.2515563964844, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": 0.07009489089250565, "rewards/margins": 4.413729667663574, "rewards/rejected": -4.343635559082031, "step": 21390 }, { "epoch": 0.26, "learning_rate": 4.637656297119991e-06, "logits/chosen": -3.11338210105896, "logits/rejected": -2.945996046066284, "logps/chosen": -57.9028205871582, "logps/rejected": -468.06085205078125, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -0.16365739703178406, "rewards/margins": 4.144266605377197, "rewards/rejected": -4.307923316955566, "step": 21400 }, { "epoch": 0.26, "learning_rate": 4.637114448598116e-06, "logits/chosen": -3.12194561958313, "logits/rejected": -2.901731252670288, "logps/chosen": -59.12493896484375, "logps/rejected": -664.4744873046875, "loss": 0.1046, "rewards/accuracies": 1.0, "rewards/chosen": -0.1508481502532959, "rewards/margins": 6.097506999969482, "rewards/rejected": -6.248354911804199, "step": 21410 }, { "epoch": 0.26, "learning_rate": 4.6365722269419934e-06, "logits/chosen": -3.060572624206543, "logits/rejected": -2.7360150814056396, "logps/chosen": -91.2039794921875, "logps/rejected": -790.9251098632812, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -0.3700120747089386, "rewards/margins": 7.125182151794434, "rewards/rejected": -7.4951934814453125, "step": 21420 }, { "epoch": 0.26, "learning_rate": 4.636029632246295e-06, "logits/chosen": -3.087390899658203, "logits/rejected": -2.92354154586792, "logps/chosen": -42.498748779296875, "logps/rejected": -595.4437255859375, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": 0.0022575347684323788, "rewards/margins": 5.583422660827637, "rewards/rejected": -5.581164360046387, "step": 21430 }, { "epoch": 0.26, "learning_rate": 4.6354866646057554e-06, "logits/chosen": -3.092801332473755, "logits/rejected": -2.7058792114257812, "logps/chosen": -69.15699768066406, "logps/rejected": -806.5897216796875, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.1322021186351776, "rewards/margins": 7.5467705726623535, "rewards/rejected": -7.6789727210998535, "step": 21440 }, { "epoch": 0.26, "learning_rate": 4.634943324115174e-06, "logits/chosen": -3.0933029651641846, "logits/rejected": -2.804150104522705, "logps/chosen": -89.77125549316406, "logps/rejected": -675.4847412109375, "loss": 0.174, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4188772737979889, "rewards/margins": 5.944670677185059, "rewards/rejected": -6.363548278808594, "step": 21450 }, { "epoch": 0.26, "learning_rate": 4.6343996108694184e-06, "logits/chosen": -3.1141486167907715, "logits/rejected": -2.9988462924957275, "logps/chosen": -37.546669006347656, "logps/rejected": -537.1214599609375, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": 0.01733170822262764, "rewards/margins": 5.009137153625488, "rewards/rejected": -4.991805076599121, "step": 21460 }, { "epoch": 0.26, "learning_rate": 4.6338555249634185e-06, "logits/chosen": -3.092869997024536, "logits/rejected": -2.8502161502838135, "logps/chosen": -54.133056640625, "logps/rejected": -630.3511962890625, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -0.07958410680294037, "rewards/margins": 5.836966514587402, "rewards/rejected": -5.916550636291504, "step": 21470 }, { "epoch": 0.26, "learning_rate": 4.633311066492171e-06, "logits/chosen": -3.102015256881714, "logits/rejected": -2.813741445541382, "logps/chosen": -62.695281982421875, "logps/rejected": -722.9661865234375, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -0.15486010909080505, "rewards/margins": 6.680131435394287, "rewards/rejected": -6.834990501403809, "step": 21480 }, { "epoch": 0.26, "learning_rate": 4.632766235550736e-06, "logits/chosen": -3.1282334327697754, "logits/rejected": -2.9031379222869873, "logps/chosen": -52.519874572753906, "logps/rejected": -585.4118041992188, "loss": 0.1031, "rewards/accuracies": 1.0, "rewards/chosen": -0.1053682416677475, "rewards/margins": 5.358893394470215, "rewards/rejected": -5.464261531829834, "step": 21490 }, { "epoch": 0.26, "learning_rate": 4.63222103223424e-06, "logits/chosen": -3.124220371246338, "logits/rejected": -2.87018084526062, "logps/chosen": -62.1193733215332, "logps/rejected": -676.5057373046875, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": -0.13224896788597107, "rewards/margins": 6.244350910186768, "rewards/rejected": -6.376599311828613, "step": 21500 }, { "epoch": 0.26, "learning_rate": 4.631675456637874e-06, "logits/chosen": -3.140153169631958, "logits/rejected": -3.0148673057556152, "logps/chosen": -36.66939163208008, "logps/rejected": -475.4405822753906, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": 0.002975229872390628, "rewards/margins": 4.405434608459473, "rewards/rejected": -4.402459144592285, "step": 21510 }, { "epoch": 0.26, "learning_rate": 4.631129508856893e-06, "logits/chosen": -3.1105778217315674, "logits/rejected": -2.947582483291626, "logps/chosen": -57.761566162109375, "logps/rejected": -434.314208984375, "loss": 0.2037, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14874674379825592, "rewards/margins": 3.814082384109497, "rewards/rejected": -3.96282958984375, "step": 21520 }, { "epoch": 0.26, "learning_rate": 4.6305831889866195e-06, "logits/chosen": -3.0969700813293457, "logits/rejected": -2.724327802658081, "logps/chosen": -72.63294982910156, "logps/rejected": -813.1580200195312, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.19538316130638123, "rewards/margins": 7.525572776794434, "rewards/rejected": -7.720956325531006, "step": 21530 }, { "epoch": 0.26, "learning_rate": 4.630036497122438e-06, "logits/chosen": -3.1031174659729004, "logits/rejected": -2.7541074752807617, "logps/chosen": -77.3917007446289, "logps/rejected": -791.8005981445312, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -0.21682226657867432, "rewards/margins": 7.311563968658447, "rewards/rejected": -7.528386116027832, "step": 21540 }, { "epoch": 0.26, "learning_rate": 4.6294894333598e-06, "logits/chosen": -3.128859758377075, "logits/rejected": -2.769075870513916, "logps/chosen": -71.62309265136719, "logps/rejected": -843.7412109375, "loss": 0.1071, "rewards/accuracies": 1.0, "rewards/chosen": -0.21079769730567932, "rewards/margins": 7.8213605880737305, "rewards/rejected": -8.032158851623535, "step": 21550 }, { "epoch": 0.26, "learning_rate": 4.628941997794222e-06, "logits/chosen": -3.1517162322998047, "logits/rejected": -2.9836621284484863, "logps/chosen": -68.04467010498047, "logps/rejected": -518.0068969726562, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -0.2801102101802826, "rewards/margins": 4.531972885131836, "rewards/rejected": -4.812082767486572, "step": 21560 }, { "epoch": 0.26, "learning_rate": 4.6283941905212835e-06, "logits/chosen": -3.0872879028320312, "logits/rejected": -2.8746650218963623, "logps/chosen": -46.473812103271484, "logps/rejected": -605.1080322265625, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": -0.04371105879545212, "rewards/margins": 5.621489524841309, "rewards/rejected": -5.665201187133789, "step": 21570 }, { "epoch": 0.26, "learning_rate": 4.627846011636631e-06, "logits/chosen": -3.143312931060791, "logits/rejected": -2.9678702354431152, "logps/chosen": -65.98531341552734, "logps/rejected": -518.3528442382812, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -0.23140642046928406, "rewards/margins": 4.553973197937012, "rewards/rejected": -4.785379886627197, "step": 21580 }, { "epoch": 0.26, "learning_rate": 4.627297461235975e-06, "logits/chosen": -3.128143787384033, "logits/rejected": -2.8761940002441406, "logps/chosen": -69.71527099609375, "logps/rejected": -758.5103149414062, "loss": 0.1359, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20083370804786682, "rewards/margins": 6.997138977050781, "rewards/rejected": -7.197972297668457, "step": 21590 }, { "epoch": 0.26, "learning_rate": 4.6267485394150894e-06, "logits/chosen": -3.124363422393799, "logits/rejected": -2.95914626121521, "logps/chosen": -43.76774978637695, "logps/rejected": -503.22607421875, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -0.03806976601481438, "rewards/margins": 4.614416599273682, "rewards/rejected": -4.6524858474731445, "step": 21600 }, { "epoch": 0.26, "learning_rate": 4.626199246269817e-06, "logits/chosen": -3.0686333179473877, "logits/rejected": -2.783172130584717, "logps/chosen": -80.57388305664062, "logps/rejected": -724.1376342773438, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.2622762620449066, "rewards/margins": 6.585212707519531, "rewards/rejected": -6.847489833831787, "step": 21610 }, { "epoch": 0.26, "learning_rate": 4.6256495818960605e-06, "logits/chosen": -3.0980873107910156, "logits/rejected": -2.807953357696533, "logps/chosen": -86.75646209716797, "logps/rejected": -849.5108642578125, "loss": 0.0899, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.36540499329566956, "rewards/margins": 7.719666481018066, "rewards/rejected": -8.085070610046387, "step": 21620 }, { "epoch": 0.26, "learning_rate": 4.625099546389791e-06, "logits/chosen": -3.1159160137176514, "logits/rejected": -2.8881802558898926, "logps/chosen": -61.1143913269043, "logps/rejected": -755.9893188476562, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -0.12749400734901428, "rewards/margins": 7.044671535491943, "rewards/rejected": -7.1721649169921875, "step": 21630 }, { "epoch": 0.26, "learning_rate": 4.624549139847042e-06, "logits/chosen": -3.113168239593506, "logits/rejected": -2.84789776802063, "logps/chosen": -50.76645278930664, "logps/rejected": -706.2063598632812, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -0.054881203919649124, "rewards/margins": 6.618531703948975, "rewards/rejected": -6.673412322998047, "step": 21640 }, { "epoch": 0.26, "learning_rate": 4.623998362363916e-06, "logits/chosen": -3.0893237590789795, "logits/rejected": -2.847822666168213, "logps/chosen": -61.9769172668457, "logps/rejected": -676.2229614257812, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -0.11330138146877289, "rewards/margins": 6.245621204376221, "rewards/rejected": -6.358922481536865, "step": 21650 }, { "epoch": 0.26, "learning_rate": 4.623447214036574e-06, "logits/chosen": -3.093780040740967, "logits/rejected": -2.937288284301758, "logps/chosen": -48.273902893066406, "logps/rejected": -700.4605102539062, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -0.0368785485625267, "rewards/margins": 6.576569557189941, "rewards/rejected": -6.613447666168213, "step": 21660 }, { "epoch": 0.26, "learning_rate": 4.622895694961246e-06, "logits/chosen": -3.099147081375122, "logits/rejected": -2.974142551422119, "logps/chosen": -52.12642288208008, "logps/rejected": -572.3953857421875, "loss": 0.1107, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11015448719263077, "rewards/margins": 5.225838661193848, "rewards/rejected": -5.335992813110352, "step": 21670 }, { "epoch": 0.26, "learning_rate": 4.622343805234225e-06, "logits/chosen": -3.1132545471191406, "logits/rejected": -2.8958537578582764, "logps/chosen": -58.567665100097656, "logps/rejected": -631.3878784179688, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -0.08534447848796844, "rewards/margins": 5.831911563873291, "rewards/rejected": -5.9172563552856445, "step": 21680 }, { "epoch": 0.26, "learning_rate": 4.621791544951871e-06, "logits/chosen": -3.088904857635498, "logits/rejected": -2.7795541286468506, "logps/chosen": -60.50974655151367, "logps/rejected": -587.4144287109375, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -0.08430176228284836, "rewards/margins": 5.392380714416504, "rewards/rejected": -5.476681709289551, "step": 21690 }, { "epoch": 0.26, "learning_rate": 4.6212389142106065e-06, "logits/chosen": -3.1156253814697266, "logits/rejected": -2.8529469966888428, "logps/chosen": -57.47088623046875, "logps/rejected": -633.7588500976562, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -0.09875887632369995, "rewards/margins": 5.85443115234375, "rewards/rejected": -5.953189849853516, "step": 21700 }, { "epoch": 0.26, "learning_rate": 4.620685913106919e-06, "logits/chosen": -3.162290334701538, "logits/rejected": -2.941775321960449, "logps/chosen": -47.173919677734375, "logps/rejected": -624.6160888671875, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -0.020350947976112366, "rewards/margins": 5.83905553817749, "rewards/rejected": -5.859406471252441, "step": 21710 }, { "epoch": 0.26, "learning_rate": 4.62013254173736e-06, "logits/chosen": -3.0603537559509277, "logits/rejected": -2.919954538345337, "logps/chosen": -45.59920883178711, "logps/rejected": -643.2972412109375, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -0.05893097445368767, "rewards/margins": 5.971962928771973, "rewards/rejected": -6.030893802642822, "step": 21720 }, { "epoch": 0.26, "learning_rate": 4.619578800198549e-06, "logits/chosen": -3.093264579772949, "logits/rejected": -2.7952144145965576, "logps/chosen": -68.46669006347656, "logps/rejected": -669.7880859375, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -0.0879017561674118, "rewards/margins": 6.2125349044799805, "rewards/rejected": -6.3004374504089355, "step": 21730 }, { "epoch": 0.26, "learning_rate": 4.619024688587166e-06, "logits/chosen": -3.096623659133911, "logits/rejected": -2.7886416912078857, "logps/chosen": -55.42578887939453, "logps/rejected": -708.6956787109375, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -0.06268550455570221, "rewards/margins": 6.634152889251709, "rewards/rejected": -6.69683837890625, "step": 21740 }, { "epoch": 0.26, "learning_rate": 4.618470206999957e-06, "logits/chosen": -3.156271457672119, "logits/rejected": -3.0537960529327393, "logps/chosen": -34.33808135986328, "logps/rejected": -557.224853515625, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": 0.03983963280916214, "rewards/margins": 5.220468997955322, "rewards/rejected": -5.180628776550293, "step": 21750 }, { "epoch": 0.26, "learning_rate": 4.617915355533734e-06, "logits/chosen": -3.1250369548797607, "logits/rejected": -2.732130765914917, "logps/chosen": -58.05432891845703, "logps/rejected": -721.8853149414062, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -0.03464460372924805, "rewards/margins": 6.7901763916015625, "rewards/rejected": -6.824821472167969, "step": 21760 }, { "epoch": 0.26, "learning_rate": 4.617360134285373e-06, "logits/chosen": -3.117593765258789, "logits/rejected": -2.7900009155273438, "logps/chosen": -61.07890701293945, "logps/rejected": -721.9984130859375, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.028153831139206886, "rewards/margins": 6.795071601867676, "rewards/rejected": -6.823225975036621, "step": 21770 }, { "epoch": 0.26, "learning_rate": 4.616804543351811e-06, "logits/chosen": -3.147059440612793, "logits/rejected": -2.9509387016296387, "logps/chosen": -83.87166595458984, "logps/rejected": -620.7733154296875, "loss": 0.1834, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.39928138256073, "rewards/margins": 5.426806926727295, "rewards/rejected": -5.8260884284973145, "step": 21780 }, { "epoch": 0.26, "learning_rate": 4.616248582830058e-06, "logits/chosen": -3.101336717605591, "logits/rejected": -2.9545583724975586, "logps/chosen": -47.75090408325195, "logps/rejected": -539.19970703125, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": -0.06089178845286369, "rewards/margins": 4.955605983734131, "rewards/rejected": -5.016497611999512, "step": 21790 }, { "epoch": 0.26, "learning_rate": 4.6156922528171775e-06, "logits/chosen": -3.1323935985565186, "logits/rejected": -2.9042675495147705, "logps/chosen": -41.97615432739258, "logps/rejected": -562.9465942382812, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": 0.020053168758749962, "rewards/margins": 5.25866174697876, "rewards/rejected": -5.238608360290527, "step": 21800 }, { "epoch": 0.26, "learning_rate": 4.615135553410307e-06, "logits/chosen": -3.067786455154419, "logits/rejected": -2.915066957473755, "logps/chosen": -36.48316192626953, "logps/rejected": -467.07684326171875, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": 0.030110513791441917, "rewards/margins": 4.338517665863037, "rewards/rejected": -4.308406829833984, "step": 21810 }, { "epoch": 0.26, "learning_rate": 4.614578484706643e-06, "logits/chosen": -3.080021381378174, "logits/rejected": -2.826787233352661, "logps/chosen": -46.61508560180664, "logps/rejected": -659.769775390625, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -0.030871037393808365, "rewards/margins": 6.167201995849609, "rewards/rejected": -6.198072910308838, "step": 21820 }, { "epoch": 0.26, "learning_rate": 4.614021046803449e-06, "logits/chosen": -3.072070360183716, "logits/rejected": -2.8426990509033203, "logps/chosen": -63.52638626098633, "logps/rejected": -659.6534423828125, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -0.132460355758667, "rewards/margins": 6.089032173156738, "rewards/rejected": -6.221491813659668, "step": 21830 }, { "epoch": 0.26, "learning_rate": 4.613463239798052e-06, "logits/chosen": -3.148507595062256, "logits/rejected": -3.018770694732666, "logps/chosen": -48.47768783569336, "logps/rejected": -593.8410034179688, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -0.048516277223825455, "rewards/margins": 5.500765800476074, "rewards/rejected": -5.549282073974609, "step": 21840 }, { "epoch": 0.26, "learning_rate": 4.612905063787843e-06, "logits/chosen": -3.134467124938965, "logits/rejected": -2.9449713230133057, "logps/chosen": -51.141212463378906, "logps/rejected": -578.6129150390625, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -0.048405908048152924, "rewards/margins": 5.357636451721191, "rewards/rejected": -5.406042575836182, "step": 21850 }, { "epoch": 0.26, "learning_rate": 4.6123465188702775e-06, "logits/chosen": -3.1556389331817627, "logits/rejected": -2.9913852214813232, "logps/chosen": -53.42987823486328, "logps/rejected": -557.5942993164062, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -0.0881529301404953, "rewards/margins": 5.117852210998535, "rewards/rejected": -5.206005573272705, "step": 21860 }, { "epoch": 0.26, "learning_rate": 4.611787605142877e-06, "logits/chosen": -3.1183829307556152, "logits/rejected": -2.7984228134155273, "logps/chosen": -55.50640106201172, "logps/rejected": -728.08251953125, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -0.07261080294847488, "rewards/margins": 6.829747676849365, "rewards/rejected": -6.902358055114746, "step": 21870 }, { "epoch": 0.26, "learning_rate": 4.611228322703225e-06, "logits/chosen": -3.075497627258301, "logits/rejected": -2.856069803237915, "logps/chosen": -64.63838195800781, "logps/rejected": -689.4146728515625, "loss": 0.1031, "rewards/accuracies": 1.0, "rewards/chosen": -0.16538557410240173, "rewards/margins": 6.325342178344727, "rewards/rejected": -6.490727424621582, "step": 21880 }, { "epoch": 0.26, "learning_rate": 4.610668671648973e-06, "logits/chosen": -3.1258676052093506, "logits/rejected": -2.8439836502075195, "logps/chosen": -65.10958862304688, "logps/rejected": -741.3134155273438, "loss": 0.1061, "rewards/accuracies": 1.0, "rewards/chosen": -0.12099556624889374, "rewards/margins": 6.904644012451172, "rewards/rejected": -7.025639533996582, "step": 21890 }, { "epoch": 0.26, "learning_rate": 4.610108652077833e-06, "logits/chosen": -3.075352430343628, "logits/rejected": -2.9570717811584473, "logps/chosen": -66.34516906738281, "logps/rejected": -604.6192626953125, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -0.2542700171470642, "rewards/margins": 5.40455961227417, "rewards/rejected": -5.658829689025879, "step": 21900 }, { "epoch": 0.26, "learning_rate": 4.609548264087582e-06, "logits/chosen": -3.0894534587860107, "logits/rejected": -2.6803696155548096, "logps/chosen": -85.55030059814453, "logps/rejected": -862.6126708984375, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -0.2920714020729065, "rewards/margins": 7.91655969619751, "rewards/rejected": -8.20863151550293, "step": 21910 }, { "epoch": 0.26, "learning_rate": 4.608987507776063e-06, "logits/chosen": -3.155576229095459, "logits/rejected": -3.009697198867798, "logps/chosen": -40.908538818359375, "logps/rejected": -491.92437744140625, "loss": 0.0819, "rewards/accuracies": 1.0, "rewards/chosen": 0.03092380240559578, "rewards/margins": 4.5677289962768555, "rewards/rejected": -4.536805152893066, "step": 21920 }, { "epoch": 0.26, "learning_rate": 4.608426383241184e-06, "logits/chosen": -3.1478869915008545, "logits/rejected": -2.913083553314209, "logps/chosen": -70.20276641845703, "logps/rejected": -685.904541015625, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.25134387612342834, "rewards/margins": 6.231573581695557, "rewards/rejected": -6.482916831970215, "step": 21930 }, { "epoch": 0.26, "learning_rate": 4.607864890580913e-06, "logits/chosen": -3.0578055381774902, "logits/rejected": -2.7060699462890625, "logps/chosen": -106.1322250366211, "logps/rejected": -949.0149536132812, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -0.48875322937965393, "rewards/margins": 8.589823722839355, "rewards/rejected": -9.078577041625977, "step": 21940 }, { "epoch": 0.26, "learning_rate": 4.607303029893287e-06, "logits/chosen": -3.1484391689300537, "logits/rejected": -3.022922992706299, "logps/chosen": -41.77128219604492, "logps/rejected": -569.1937255859375, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -0.008922788314521313, "rewards/margins": 5.301614761352539, "rewards/rejected": -5.310537815093994, "step": 21950 }, { "epoch": 0.26, "learning_rate": 4.606740801276404e-06, "logits/chosen": -3.1170449256896973, "logits/rejected": -2.8565683364868164, "logps/chosen": -68.39595794677734, "logps/rejected": -585.4613647460938, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -0.21699348092079163, "rewards/margins": 5.238879203796387, "rewards/rejected": -5.455873012542725, "step": 21960 }, { "epoch": 0.26, "learning_rate": 4.606178204828429e-06, "logits/chosen": -3.118394374847412, "logits/rejected": -2.8659398555755615, "logps/chosen": -62.748313903808594, "logps/rejected": -668.99267578125, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": -0.17018380761146545, "rewards/margins": 6.132913112640381, "rewards/rejected": -6.303097248077393, "step": 21970 }, { "epoch": 0.26, "learning_rate": 4.6056152406475895e-06, "logits/chosen": -3.1399242877960205, "logits/rejected": -2.939962387084961, "logps/chosen": -72.05432891845703, "logps/rejected": -501.93994140625, "loss": 0.1074, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.26017728447914124, "rewards/margins": 4.377857208251953, "rewards/rejected": -4.638034343719482, "step": 21980 }, { "epoch": 0.26, "learning_rate": 4.605051908832177e-06, "logits/chosen": -3.1013360023498535, "logits/rejected": -3.0353195667266846, "logps/chosen": -24.200511932373047, "logps/rejected": -397.27423095703125, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": 0.1139548271894455, "rewards/margins": 3.718924045562744, "rewards/rejected": -3.604969024658203, "step": 21990 }, { "epoch": 0.26, "learning_rate": 4.604488209480547e-06, "logits/chosen": -3.154310941696167, "logits/rejected": -3.0188114643096924, "logps/chosen": -50.711753845214844, "logps/rejected": -568.7420043945312, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -0.09930745512247086, "rewards/margins": 5.209636211395264, "rewards/rejected": -5.308944225311279, "step": 22000 }, { "epoch": 0.26, "learning_rate": 4.603924142691121e-06, "logits/chosen": -3.111082077026367, "logits/rejected": -2.9007320404052734, "logps/chosen": -84.9130630493164, "logps/rejected": -592.7562255859375, "loss": 0.0656, "rewards/accuracies": 1.0, "rewards/chosen": -0.4222756326198578, "rewards/margins": 5.1240763664245605, "rewards/rejected": -5.546352386474609, "step": 22010 }, { "epoch": 0.26, "learning_rate": 4.603359708562383e-06, "logits/chosen": -3.0971531867980957, "logits/rejected": -2.934065341949463, "logps/chosen": -83.66316223144531, "logps/rejected": -605.8751220703125, "loss": 0.0617, "rewards/accuracies": 1.0, "rewards/chosen": -0.453618586063385, "rewards/margins": 5.214969158172607, "rewards/rejected": -5.668587684631348, "step": 22020 }, { "epoch": 0.26, "learning_rate": 4.602794907192882e-06, "logits/chosen": -3.1221861839294434, "logits/rejected": -2.9184837341308594, "logps/chosen": -85.61122131347656, "logps/rejected": -614.8472900390625, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -0.418525755405426, "rewards/margins": 5.349049091339111, "rewards/rejected": -5.767575263977051, "step": 22030 }, { "epoch": 0.26, "learning_rate": 4.60222973868123e-06, "logits/chosen": -3.1453561782836914, "logits/rejected": -2.9490628242492676, "logps/chosen": -57.397193908691406, "logps/rejected": -669.4313354492188, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -0.11914539337158203, "rewards/margins": 6.184620380401611, "rewards/rejected": -6.303765296936035, "step": 22040 }, { "epoch": 0.26, "learning_rate": 4.601664203126105e-06, "logits/chosen": -3.1649973392486572, "logits/rejected": -2.8413186073303223, "logps/chosen": -81.78499603271484, "logps/rejected": -706.6976928710938, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -0.2745198607444763, "rewards/margins": 6.372366905212402, "rewards/rejected": -6.646886348724365, "step": 22050 }, { "epoch": 0.26, "learning_rate": 4.601098300626246e-06, "logits/chosen": -3.1234524250030518, "logits/rejected": -2.7875022888183594, "logps/chosen": -85.64444732666016, "logps/rejected": -749.0570068359375, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.2826905846595764, "rewards/margins": 6.80846643447876, "rewards/rejected": -7.091156959533691, "step": 22060 }, { "epoch": 0.26, "learning_rate": 4.600532031280461e-06, "logits/chosen": -3.0996527671813965, "logits/rejected": -2.884120464324951, "logps/chosen": -89.69102478027344, "logps/rejected": -606.4888916015625, "loss": 0.0979, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.41156792640686035, "rewards/margins": 5.260969161987305, "rewards/rejected": -5.672537326812744, "step": 22070 }, { "epoch": 0.26, "learning_rate": 4.599965395187616e-06, "logits/chosen": -3.1240782737731934, "logits/rejected": -2.847841262817383, "logps/chosen": -59.297607421875, "logps/rejected": -659.0006103515625, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -0.07123802602291107, "rewards/margins": 6.1312737464904785, "rewards/rejected": -6.202512264251709, "step": 22080 }, { "epoch": 0.26, "learning_rate": 4.599398392446646e-06, "logits/chosen": -3.0932281017303467, "logits/rejected": -2.764770984649658, "logps/chosen": -61.91822052001953, "logps/rejected": -593.0811157226562, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -0.10951298475265503, "rewards/margins": 5.431424140930176, "rewards/rejected": -5.540937423706055, "step": 22090 }, { "epoch": 0.26, "learning_rate": 4.598831023156547e-06, "logits/chosen": -3.1258044242858887, "logits/rejected": -2.8514833450317383, "logps/chosen": -56.04320526123047, "logps/rejected": -794.0297241210938, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.10942345857620239, "rewards/margins": 7.441355228424072, "rewards/rejected": -7.550778865814209, "step": 22100 }, { "epoch": 0.26, "learning_rate": 4.598263287416381e-06, "logits/chosen": -3.1187002658843994, "logits/rejected": -2.9021337032318115, "logps/chosen": -62.47227096557617, "logps/rejected": -548.6940307617188, "loss": 0.1109, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.16985230147838593, "rewards/margins": 4.9306464195251465, "rewards/rejected": -5.100498676300049, "step": 22110 }, { "epoch": 0.26, "learning_rate": 4.597695185325272e-06, "logits/chosen": -3.1118013858795166, "logits/rejected": -2.9312663078308105, "logps/chosen": -47.01138687133789, "logps/rejected": -530.6470947265625, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": -0.05319855362176895, "rewards/margins": 4.890280723571777, "rewards/rejected": -4.943479061126709, "step": 22120 }, { "epoch": 0.26, "learning_rate": 4.597126716982411e-06, "logits/chosen": -3.082740545272827, "logits/rejected": -2.7357449531555176, "logps/chosen": -63.7047119140625, "logps/rejected": -771.8421020507812, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": -0.11574794352054596, "rewards/margins": 7.198882102966309, "rewards/rejected": -7.314630031585693, "step": 22130 }, { "epoch": 0.27, "learning_rate": 4.5965578824870495e-06, "logits/chosen": -3.104123592376709, "logits/rejected": -2.8482017517089844, "logps/chosen": -52.169219970703125, "logps/rejected": -683.872802734375, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -0.03779030591249466, "rewards/margins": 6.410418510437012, "rewards/rejected": -6.448208808898926, "step": 22140 }, { "epoch": 0.27, "learning_rate": 4.5959886819385045e-06, "logits/chosen": -3.082658052444458, "logits/rejected": -2.784625291824341, "logps/chosen": -84.69111633300781, "logps/rejected": -669.6370239257812, "loss": 0.1044, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3776622712612152, "rewards/margins": 5.921093940734863, "rewards/rejected": -6.2987565994262695, "step": 22150 }, { "epoch": 0.27, "learning_rate": 4.595419115436157e-06, "logits/chosen": -3.0948970317840576, "logits/rejected": -2.8885905742645264, "logps/chosen": -50.26654815673828, "logps/rejected": -598.4276733398438, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": -0.03533736988902092, "rewards/margins": 5.556103229522705, "rewards/rejected": -5.591440677642822, "step": 22160 }, { "epoch": 0.27, "learning_rate": 4.5948491830794515e-06, "logits/chosen": -3.0998287200927734, "logits/rejected": -2.7735228538513184, "logps/chosen": -52.141990661621094, "logps/rejected": -691.2442016601562, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": 0.0001790106325643137, "rewards/margins": 6.525210380554199, "rewards/rejected": -6.525032043457031, "step": 22170 }, { "epoch": 0.27, "learning_rate": 4.594278884967896e-06, "logits/chosen": -3.115859031677246, "logits/rejected": -2.9928812980651855, "logps/chosen": -34.60225296020508, "logps/rejected": -529.7445678710938, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": 0.028098825365304947, "rewards/margins": 4.9398884773254395, "rewards/rejected": -4.911789417266846, "step": 22180 }, { "epoch": 0.27, "learning_rate": 4.593708221201065e-06, "logits/chosen": -3.188685178756714, "logits/rejected": -2.930332660675049, "logps/chosen": -44.55906295776367, "logps/rejected": -656.2522583007812, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -0.016950637102127075, "rewards/margins": 6.161761283874512, "rewards/rejected": -6.178711414337158, "step": 22190 }, { "epoch": 0.27, "learning_rate": 4.5931371918785934e-06, "logits/chosen": -3.0741019248962402, "logits/rejected": -2.8349428176879883, "logps/chosen": -49.63063430786133, "logps/rejected": -689.1280517578125, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -0.055577218532562256, "rewards/margins": 6.439640045166016, "rewards/rejected": -6.495217800140381, "step": 22200 }, { "epoch": 0.27, "learning_rate": 4.592565797100181e-06, "logits/chosen": -3.1038310527801514, "logits/rejected": -2.923771858215332, "logps/chosen": -32.27180480957031, "logps/rejected": -514.5037231445312, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": 0.08470857888460159, "rewards/margins": 4.852705001831055, "rewards/rejected": -4.767996788024902, "step": 22210 }, { "epoch": 0.27, "learning_rate": 4.591994036965593e-06, "logits/chosen": -3.143857955932617, "logits/rejected": -2.892087936401367, "logps/chosen": -38.86914825439453, "logps/rejected": -577.4126586914062, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": 0.03450890630483627, "rewards/margins": 5.422724723815918, "rewards/rejected": -5.388216018676758, "step": 22220 }, { "epoch": 0.27, "learning_rate": 4.591421911574656e-06, "logits/chosen": -3.109311580657959, "logits/rejected": -2.9399161338806152, "logps/chosen": -27.885921478271484, "logps/rejected": -500.4217834472656, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": 0.09502804279327393, "rewards/margins": 4.735795974731445, "rewards/rejected": -4.640767574310303, "step": 22230 }, { "epoch": 0.27, "learning_rate": 4.590849421027262e-06, "logits/chosen": -3.1162376403808594, "logits/rejected": -2.9365949630737305, "logps/chosen": -48.702247619628906, "logps/rejected": -551.3642578125, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -0.06366720050573349, "rewards/margins": 5.080742835998535, "rewards/rejected": -5.144410133361816, "step": 22240 }, { "epoch": 0.27, "learning_rate": 4.590276565423366e-06, "logits/chosen": -3.0664591789245605, "logits/rejected": -2.7107856273651123, "logps/chosen": -83.916748046875, "logps/rejected": -663.260009765625, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -0.292702853679657, "rewards/margins": 5.931966781616211, "rewards/rejected": -6.224669933319092, "step": 22250 }, { "epoch": 0.27, "learning_rate": 4.589703344862987e-06, "logits/chosen": -3.0937235355377197, "logits/rejected": -2.7673351764678955, "logps/chosen": -72.30692291259766, "logps/rejected": -685.6932373046875, "loss": 0.1009, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2528281509876251, "rewards/margins": 6.217257499694824, "rewards/rejected": -6.470086097717285, "step": 22260 }, { "epoch": 0.27, "learning_rate": 4.589129759446208e-06, "logits/chosen": -3.1137454509735107, "logits/rejected": -2.9060463905334473, "logps/chosen": -39.684513092041016, "logps/rejected": -575.931396484375, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": 0.05504896491765976, "rewards/margins": 5.4366230964660645, "rewards/rejected": -5.381573677062988, "step": 22270 }, { "epoch": 0.27, "learning_rate": 4.588555809273176e-06, "logits/chosen": -3.1186575889587402, "logits/rejected": -2.8349833488464355, "logps/chosen": -58.464622497558594, "logps/rejected": -633.9965209960938, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": -0.09112541377544403, "rewards/margins": 5.852601051330566, "rewards/rejected": -5.943726062774658, "step": 22280 }, { "epoch": 0.27, "learning_rate": 4.5879814944441e-06, "logits/chosen": -3.085360050201416, "logits/rejected": -2.733297109603882, "logps/chosen": -89.39470672607422, "logps/rejected": -815.5902099609375, "loss": 0.1517, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.37957924604415894, "rewards/margins": 7.364804744720459, "rewards/rejected": -7.744383335113525, "step": 22290 }, { "epoch": 0.27, "learning_rate": 4.587406815059254e-06, "logits/chosen": -3.1501901149749756, "logits/rejected": -2.8080873489379883, "logps/chosen": -63.610572814941406, "logps/rejected": -621.809326171875, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -0.15604518353939056, "rewards/margins": 5.661632061004639, "rewards/rejected": -5.817676544189453, "step": 22300 }, { "epoch": 0.27, "learning_rate": 4.586831771218976e-06, "logits/chosen": -3.133049488067627, "logits/rejected": -2.8646469116210938, "logps/chosen": -57.02942657470703, "logps/rejected": -789.3519287109375, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -0.07438927888870239, "rewards/margins": 7.421817779541016, "rewards/rejected": -7.496207237243652, "step": 22310 }, { "epoch": 0.27, "learning_rate": 4.586256363023667e-06, "logits/chosen": -3.078402042388916, "logits/rejected": -2.8789944648742676, "logps/chosen": -57.80956268310547, "logps/rejected": -570.1654052734375, "loss": 0.1216, "rewards/accuracies": 1.0, "rewards/chosen": -0.14137709140777588, "rewards/margins": 5.1831440925598145, "rewards/rejected": -5.324520111083984, "step": 22320 }, { "epoch": 0.27, "learning_rate": 4.58568059057379e-06, "logits/chosen": -3.147923231124878, "logits/rejected": -2.7437703609466553, "logps/chosen": -54.194908142089844, "logps/rejected": -725.329345703125, "loss": 0.1003, "rewards/accuracies": 1.0, "rewards/chosen": -0.04751657694578171, "rewards/margins": 6.809932708740234, "rewards/rejected": -6.857450008392334, "step": 22330 }, { "epoch": 0.27, "learning_rate": 4.585104453969875e-06, "logits/chosen": -3.131521701812744, "logits/rejected": -3.011514186859131, "logps/chosen": -29.91530990600586, "logps/rejected": -408.580810546875, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 0.09090471267700195, "rewards/margins": 3.7939517498016357, "rewards/rejected": -3.703047275543213, "step": 22340 }, { "epoch": 0.27, "learning_rate": 4.5845279533125145e-06, "logits/chosen": -3.1199958324432373, "logits/rejected": -2.773087978363037, "logps/chosen": -59.35894012451172, "logps/rejected": -620.61474609375, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -0.09563721716403961, "rewards/margins": 5.718904972076416, "rewards/rejected": -5.8145432472229, "step": 22350 }, { "epoch": 0.27, "learning_rate": 4.583951088702362e-06, "logits/chosen": -3.1242291927337646, "logits/rejected": -2.7433619499206543, "logps/chosen": -61.718910217285156, "logps/rejected": -620.7474365234375, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/chosen": -0.060401905328035355, "rewards/margins": 5.726819038391113, "rewards/rejected": -5.787221431732178, "step": 22360 }, { "epoch": 0.27, "learning_rate": 4.583373860240138e-06, "logits/chosen": -3.091418981552124, "logits/rejected": -2.7178256511688232, "logps/chosen": -48.85749435424805, "logps/rejected": -664.168701171875, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": 0.029735010117292404, "rewards/margins": 6.278361797332764, "rewards/rejected": -6.248626708984375, "step": 22370 }, { "epoch": 0.27, "learning_rate": 4.582796268026624e-06, "logits/chosen": -3.1134347915649414, "logits/rejected": -2.8635528087615967, "logps/chosen": -39.65397262573242, "logps/rejected": -588.1400146484375, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 0.05070480704307556, "rewards/margins": 5.54740047454834, "rewards/rejected": -5.4966959953308105, "step": 22380 }, { "epoch": 0.27, "learning_rate": 4.582218312162665e-06, "logits/chosen": -3.0657174587249756, "logits/rejected": -2.7301788330078125, "logps/chosen": -63.55730056762695, "logps/rejected": -878.4390869140625, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -0.1102268248796463, "rewards/margins": 8.264287948608398, "rewards/rejected": -8.374513626098633, "step": 22390 }, { "epoch": 0.27, "learning_rate": 4.581639992749174e-06, "logits/chosen": -3.067302703857422, "logits/rejected": -2.555332660675049, "logps/chosen": -88.87394714355469, "logps/rejected": -829.7978515625, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -0.23897810280323029, "rewards/margins": 7.624864101409912, "rewards/rejected": -7.863842010498047, "step": 22400 }, { "epoch": 0.27, "learning_rate": 4.581061309887121e-06, "logits/chosen": -3.1128456592559814, "logits/rejected": -2.8953983783721924, "logps/chosen": -42.509498596191406, "logps/rejected": -593.7626953125, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": 0.049496717751026154, "rewards/margins": 5.5996809005737305, "rewards/rejected": -5.55018424987793, "step": 22410 }, { "epoch": 0.27, "learning_rate": 4.580482263677543e-06, "logits/chosen": -3.09382963180542, "logits/rejected": -2.857395887374878, "logps/chosen": -40.63311767578125, "logps/rejected": -627.4033203125, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": 0.011837150901556015, "rewards/margins": 5.8843994140625, "rewards/rejected": -5.872562408447266, "step": 22420 }, { "epoch": 0.27, "learning_rate": 4.579902854221539e-06, "logits/chosen": -3.090061902999878, "logits/rejected": -2.714595317840576, "logps/chosen": -75.25702667236328, "logps/rejected": -658.33203125, "loss": 0.1046, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.23162564635276794, "rewards/margins": 5.954443454742432, "rewards/rejected": -6.186069011688232, "step": 22430 }, { "epoch": 0.27, "learning_rate": 4.579323081620275e-06, "logits/chosen": -3.1236767768859863, "logits/rejected": -2.8885397911071777, "logps/chosen": -40.87235641479492, "logps/rejected": -697.9632568359375, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": 0.014521384611725807, "rewards/margins": 6.593523979187012, "rewards/rejected": -6.57900333404541, "step": 22440 }, { "epoch": 0.27, "learning_rate": 4.578742945974976e-06, "logits/chosen": -3.1055996417999268, "logits/rejected": -2.9134345054626465, "logps/chosen": -47.63532257080078, "logps/rejected": -569.1014404296875, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -0.044188544154167175, "rewards/margins": 5.2727556228637695, "rewards/rejected": -5.3169450759887695, "step": 22450 }, { "epoch": 0.27, "learning_rate": 4.578162447386931e-06, "logits/chosen": -3.079932689666748, "logits/rejected": -2.665874481201172, "logps/chosen": -69.03897857666016, "logps/rejected": -802.8099365234375, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -0.08695268630981445, "rewards/margins": 7.528134346008301, "rewards/rejected": -7.615086555480957, "step": 22460 }, { "epoch": 0.27, "learning_rate": 4.577581585957495e-06, "logits/chosen": -3.1368203163146973, "logits/rejected": -2.942950487136841, "logps/chosen": -37.17471694946289, "logps/rejected": -614.2335815429688, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": 0.02806856855750084, "rewards/margins": 5.795950889587402, "rewards/rejected": -5.767882347106934, "step": 22470 }, { "epoch": 0.27, "learning_rate": 4.577000361788084e-06, "logits/chosen": -3.1049628257751465, "logits/rejected": -2.894577980041504, "logps/chosen": -63.655860900878906, "logps/rejected": -566.0516967773438, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -0.2143150120973587, "rewards/margins": 5.05056095123291, "rewards/rejected": -5.264875888824463, "step": 22480 }, { "epoch": 0.27, "learning_rate": 4.576418774980179e-06, "logits/chosen": -3.1057050228118896, "logits/rejected": -2.7926852703094482, "logps/chosen": -62.651512145996094, "logps/rejected": -572.9207763671875, "loss": 0.1807, "rewards/accuracies": 1.0, "rewards/chosen": -0.0821443572640419, "rewards/margins": 5.231635093688965, "rewards/rejected": -5.313779354095459, "step": 22490 }, { "epoch": 0.27, "learning_rate": 4.5758368256353225e-06, "logits/chosen": -3.118151903152466, "logits/rejected": -2.7846293449401855, "logps/chosen": -69.04756164550781, "logps/rejected": -689.3880004882812, "loss": 0.0743, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19698241353034973, "rewards/margins": 6.300002098083496, "rewards/rejected": -6.496983528137207, "step": 22500 }, { "epoch": 0.27, "learning_rate": 4.575254513855122e-06, "logits/chosen": -3.0846524238586426, "logits/rejected": -2.919038772583008, "logps/chosen": -46.440582275390625, "logps/rejected": -673.3758544921875, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.03876958787441254, "rewards/margins": 6.309329986572266, "rewards/rejected": -6.348099708557129, "step": 22510 }, { "epoch": 0.27, "learning_rate": 4.5746718397412475e-06, "logits/chosen": -3.104337215423584, "logits/rejected": -2.9607865810394287, "logps/chosen": -37.8288459777832, "logps/rejected": -490.359619140625, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": 0.016413476318120956, "rewards/margins": 4.533480167388916, "rewards/rejected": -4.51706600189209, "step": 22520 }, { "epoch": 0.27, "learning_rate": 4.5740888033954315e-06, "logits/chosen": -3.0877716541290283, "logits/rejected": -2.7888712882995605, "logps/chosen": -66.42626953125, "logps/rejected": -703.9131469726562, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -0.1380697786808014, "rewards/margins": 6.49618673324585, "rewards/rejected": -6.634256839752197, "step": 22530 }, { "epoch": 0.27, "learning_rate": 4.57350540491947e-06, "logits/chosen": -3.100773334503174, "logits/rejected": -2.7685530185699463, "logps/chosen": -58.287330627441406, "logps/rejected": -803.3146362304688, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -0.096389040350914, "rewards/margins": 7.546352386474609, "rewards/rejected": -7.6427412033081055, "step": 22540 }, { "epoch": 0.27, "learning_rate": 4.572921644415225e-06, "logits/chosen": -3.094921350479126, "logits/rejected": -2.7686705589294434, "logps/chosen": -68.5328140258789, "logps/rejected": -561.7407836914062, "loss": 0.1032, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19152812659740448, "rewards/margins": 5.03639554977417, "rewards/rejected": -5.227924346923828, "step": 22550 }, { "epoch": 0.27, "learning_rate": 4.572337521984618e-06, "logits/chosen": -3.0558953285217285, "logits/rejected": -2.763564348220825, "logps/chosen": -58.566566467285156, "logps/rejected": -690.737548828125, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -0.11186204105615616, "rewards/margins": 6.399884223937988, "rewards/rejected": -6.511746406555176, "step": 22560 }, { "epoch": 0.27, "learning_rate": 4.571753037729635e-06, "logits/chosen": -3.0955333709716797, "logits/rejected": -2.9146547317504883, "logps/chosen": -57.104896545410156, "logps/rejected": -630.2999267578125, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -0.17381514608860016, "rewards/margins": 5.734768867492676, "rewards/rejected": -5.908584117889404, "step": 22570 }, { "epoch": 0.27, "learning_rate": 4.571168191752325e-06, "logits/chosen": -3.0764005184173584, "logits/rejected": -2.791492462158203, "logps/chosen": -52.247406005859375, "logps/rejected": -712.38525390625, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.059077225625514984, "rewards/margins": 6.676168918609619, "rewards/rejected": -6.735246181488037, "step": 22580 }, { "epoch": 0.27, "learning_rate": 4.570582984154802e-06, "logits/chosen": -3.1470096111297607, "logits/rejected": -2.964766025543213, "logps/chosen": -46.37123489379883, "logps/rejected": -608.9732055664062, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": 0.0010619431268423796, "rewards/margins": 5.698750019073486, "rewards/rejected": -5.697688102722168, "step": 22590 }, { "epoch": 0.27, "learning_rate": 4.569997415039241e-06, "logits/chosen": -3.123356819152832, "logits/rejected": -2.9070582389831543, "logps/chosen": -54.51714324951172, "logps/rejected": -548.4496459960938, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": -0.1070564016699791, "rewards/margins": 4.996806621551514, "rewards/rejected": -5.10386323928833, "step": 22600 }, { "epoch": 0.27, "learning_rate": 4.5694114845078805e-06, "logits/chosen": -3.090562582015991, "logits/rejected": -2.915290355682373, "logps/chosen": -55.93550491333008, "logps/rejected": -565.0260009765625, "loss": 0.0966, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15051054954528809, "rewards/margins": 5.137541770935059, "rewards/rejected": -5.288052558898926, "step": 22610 }, { "epoch": 0.27, "learning_rate": 4.568825192663022e-06, "logits/chosen": -3.0872464179992676, "logits/rejected": -2.7787249088287354, "logps/chosen": -53.279579162597656, "logps/rejected": -668.2586669921875, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -0.06108647584915161, "rewards/margins": 6.234662055969238, "rewards/rejected": -6.295748710632324, "step": 22620 }, { "epoch": 0.27, "learning_rate": 4.56823853960703e-06, "logits/chosen": -3.119666576385498, "logits/rejected": -2.8087942600250244, "logps/chosen": -54.27294158935547, "logps/rejected": -611.9080810546875, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -0.08716871589422226, "rewards/margins": 5.651538372039795, "rewards/rejected": -5.738707542419434, "step": 22630 }, { "epoch": 0.27, "learning_rate": 4.567651525442335e-06, "logits/chosen": -3.0750744342803955, "logits/rejected": -2.7180068492889404, "logps/chosen": -80.31818389892578, "logps/rejected": -690.2134399414062, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -0.22033271193504333, "rewards/margins": 6.275387287139893, "rewards/rejected": -6.495719909667969, "step": 22640 }, { "epoch": 0.27, "learning_rate": 4.567064150271425e-06, "logits/chosen": -3.1063735485076904, "logits/rejected": -2.8655567169189453, "logps/chosen": -56.446563720703125, "logps/rejected": -605.8248291015625, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -0.13147102296352386, "rewards/margins": 5.542061805725098, "rewards/rejected": -5.673532485961914, "step": 22650 }, { "epoch": 0.27, "learning_rate": 4.566476414196856e-06, "logits/chosen": -3.1103603839874268, "logits/rejected": -2.831111431121826, "logps/chosen": -107.5071029663086, "logps/rejected": -658.6743774414062, "loss": 0.103, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5691009163856506, "rewards/margins": 5.630802631378174, "rewards/rejected": -6.199903964996338, "step": 22660 }, { "epoch": 0.27, "learning_rate": 4.5658883173212445e-06, "logits/chosen": -3.120365619659424, "logits/rejected": -2.857450008392334, "logps/chosen": -66.92469787597656, "logps/rejected": -734.5403442382812, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": -0.18949595093727112, "rewards/margins": 6.7642412185668945, "rewards/rejected": -6.953736782073975, "step": 22670 }, { "epoch": 0.27, "learning_rate": 4.56529985974727e-06, "logits/chosen": -3.1085879802703857, "logits/rejected": -2.640212059020996, "logps/chosen": -96.88712310791016, "logps/rejected": -1020.5803833007812, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.3924177885055542, "rewards/margins": 9.415289878845215, "rewards/rejected": -9.807708740234375, "step": 22680 }, { "epoch": 0.27, "learning_rate": 4.564711041577677e-06, "logits/chosen": -3.1251983642578125, "logits/rejected": -2.8412249088287354, "logps/chosen": -63.98286819458008, "logps/rejected": -705.1689453125, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -0.19991880655288696, "rewards/margins": 6.468618869781494, "rewards/rejected": -6.6685380935668945, "step": 22690 }, { "epoch": 0.27, "learning_rate": 4.56412186291527e-06, "logits/chosen": -3.122276544570923, "logits/rejected": -2.668353796005249, "logps/chosen": -113.94466400146484, "logps/rejected": -983.3162231445312, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6100425720214844, "rewards/margins": 8.811323165893555, "rewards/rejected": -9.421365737915039, "step": 22700 }, { "epoch": 0.27, "learning_rate": 4.563532323862918e-06, "logits/chosen": -3.1038880348205566, "logits/rejected": -2.7898545265197754, "logps/chosen": -69.14961242675781, "logps/rejected": -584.1009521484375, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -0.24150300025939941, "rewards/margins": 5.209463119506836, "rewards/rejected": -5.450965881347656, "step": 22710 }, { "epoch": 0.27, "learning_rate": 4.562942424523554e-06, "logits/chosen": -3.0952630043029785, "logits/rejected": -2.82918381690979, "logps/chosen": -97.86202239990234, "logps/rejected": -754.1895141601562, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -0.4671561121940613, "rewards/margins": 6.671212673187256, "rewards/rejected": -7.138368129730225, "step": 22720 }, { "epoch": 0.27, "learning_rate": 4.5623521650001725e-06, "logits/chosen": -3.1406548023223877, "logits/rejected": -2.898151159286499, "logps/chosen": -70.69344329833984, "logps/rejected": -696.8693237304688, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": -0.20948509871959686, "rewards/margins": 6.369440078735352, "rewards/rejected": -6.578926086425781, "step": 22730 }, { "epoch": 0.27, "learning_rate": 4.561761545395831e-06, "logits/chosen": -3.0814366340637207, "logits/rejected": -2.859973669052124, "logps/chosen": -91.23925018310547, "logps/rejected": -668.7178955078125, "loss": 0.1026, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.43669524788856506, "rewards/margins": 5.866851806640625, "rewards/rejected": -6.303547382354736, "step": 22740 }, { "epoch": 0.27, "learning_rate": 4.56117056581365e-06, "logits/chosen": -3.0621190071105957, "logits/rejected": -2.8252315521240234, "logps/chosen": -74.6125717163086, "logps/rejected": -730.05029296875, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": -0.2987014353275299, "rewards/margins": 6.621835231781006, "rewards/rejected": -6.92053747177124, "step": 22750 }, { "epoch": 0.27, "learning_rate": 4.5605792263568125e-06, "logits/chosen": -3.135014057159424, "logits/rejected": -2.8594889640808105, "logps/chosen": -73.78239440917969, "logps/rejected": -731.7828369140625, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -0.2696691155433655, "rewards/margins": 6.659680366516113, "rewards/rejected": -6.929349422454834, "step": 22760 }, { "epoch": 0.27, "learning_rate": 4.559987527128566e-06, "logits/chosen": -3.0816338062286377, "logits/rejected": -2.7245545387268066, "logps/chosen": -78.45550537109375, "logps/rejected": -693.1051635742188, "loss": 0.099, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3281888961791992, "rewards/margins": 6.2118940353393555, "rewards/rejected": -6.5400824546813965, "step": 22770 }, { "epoch": 0.27, "learning_rate": 4.5593954682322175e-06, "logits/chosen": -3.058802843093872, "logits/rejected": -2.7877373695373535, "logps/chosen": -89.76179504394531, "logps/rejected": -649.6354370117188, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -0.4061700403690338, "rewards/margins": 5.692797660827637, "rewards/rejected": -6.0989670753479, "step": 22780 }, { "epoch": 0.27, "learning_rate": 4.55880304977114e-06, "logits/chosen": -3.0978171825408936, "logits/rejected": -2.74145245552063, "logps/chosen": -88.74776458740234, "logps/rejected": -1007.4940185546875, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -0.3509858250617981, "rewards/margins": 9.31106948852539, "rewards/rejected": -9.662055969238281, "step": 22790 }, { "epoch": 0.27, "learning_rate": 4.558210271848768e-06, "logits/chosen": -3.1023664474487305, "logits/rejected": -2.8325304985046387, "logps/chosen": -55.29860305786133, "logps/rejected": -639.79736328125, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.07477273046970367, "rewards/margins": 5.933383941650391, "rewards/rejected": -6.008156776428223, "step": 22800 }, { "epoch": 0.27, "learning_rate": 4.5576171345685996e-06, "logits/chosen": -3.0883564949035645, "logits/rejected": -2.7459359169006348, "logps/chosen": -87.16360473632812, "logps/rejected": -937.3991088867188, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -0.4014441967010498, "rewards/margins": 8.563264846801758, "rewards/rejected": -8.964707374572754, "step": 22810 }, { "epoch": 0.27, "learning_rate": 4.557023638034194e-06, "logits/chosen": -3.092210531234741, "logits/rejected": -2.629220962524414, "logps/chosen": -85.11066436767578, "logps/rejected": -866.4148559570312, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -0.3716605305671692, "rewards/margins": 7.894343376159668, "rewards/rejected": -8.26600456237793, "step": 22820 }, { "epoch": 0.27, "learning_rate": 4.556429782349174e-06, "logits/chosen": -3.090535879135132, "logits/rejected": -2.7824132442474365, "logps/chosen": -82.38392639160156, "logps/rejected": -739.7654418945312, "loss": 0.1155, "rewards/accuracies": 1.0, "rewards/chosen": -0.35325753688812256, "rewards/margins": 6.645443916320801, "rewards/rejected": -6.9987006187438965, "step": 22830 }, { "epoch": 0.27, "learning_rate": 4.555835567617226e-06, "logits/chosen": -3.0762884616851807, "logits/rejected": -2.717372179031372, "logps/chosen": -125.6049575805664, "logps/rejected": -760.2620849609375, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -0.8007901906967163, "rewards/margins": 6.388542652130127, "rewards/rejected": -7.189333438873291, "step": 22840 }, { "epoch": 0.27, "learning_rate": 4.555240993942098e-06, "logits/chosen": -3.0862321853637695, "logits/rejected": -2.817218065261841, "logps/chosen": -70.42127990722656, "logps/rejected": -759.1617431640625, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -0.2529293894767761, "rewards/margins": 6.952933311462402, "rewards/rejected": -7.205862998962402, "step": 22850 }, { "epoch": 0.27, "learning_rate": 4.5546460614276005e-06, "logits/chosen": -3.1515681743621826, "logits/rejected": -2.82136869430542, "logps/chosen": -60.91660690307617, "logps/rejected": -692.9627075195312, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -0.12123823165893555, "rewards/margins": 6.401850700378418, "rewards/rejected": -6.523089408874512, "step": 22860 }, { "epoch": 0.27, "learning_rate": 4.554050770177607e-06, "logits/chosen": -3.108556032180786, "logits/rejected": -2.8537611961364746, "logps/chosen": -48.109798431396484, "logps/rejected": -586.2613525390625, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.060023099184036255, "rewards/margins": 5.418180465698242, "rewards/rejected": -5.478203296661377, "step": 22870 }, { "epoch": 0.27, "learning_rate": 4.5534551202960545e-06, "logits/chosen": -3.0608420372009277, "logits/rejected": -2.6038317680358887, "logps/chosen": -64.4658203125, "logps/rejected": -724.6534423828125, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.09087498486042023, "rewards/margins": 6.75048303604126, "rewards/rejected": -6.841357231140137, "step": 22880 }, { "epoch": 0.27, "learning_rate": 4.5528591118869406e-06, "logits/chosen": -3.055248737335205, "logits/rejected": -2.8882956504821777, "logps/chosen": -54.31690216064453, "logps/rejected": -571.1541748046875, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.1381005048751831, "rewards/margins": 5.196925163269043, "rewards/rejected": -5.335024833679199, "step": 22890 }, { "epoch": 0.27, "learning_rate": 4.5522627450543264e-06, "logits/chosen": -3.05971097946167, "logits/rejected": -2.803011417388916, "logps/chosen": -73.83190155029297, "logps/rejected": -624.6412353515625, "loss": 0.1007, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3562082350254059, "rewards/margins": 5.5104265213012695, "rewards/rejected": -5.866635322570801, "step": 22900 }, { "epoch": 0.27, "learning_rate": 4.551666019902338e-06, "logits/chosen": -3.0592825412750244, "logits/rejected": -2.807551622390747, "logps/chosen": -55.154075622558594, "logps/rejected": -714.4098510742188, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.12033107131719589, "rewards/margins": 6.632092475891113, "rewards/rejected": -6.752423286437988, "step": 22910 }, { "epoch": 0.27, "learning_rate": 4.551068936535161e-06, "logits/chosen": -3.048506021499634, "logits/rejected": -2.6585376262664795, "logps/chosen": -64.88656616210938, "logps/rejected": -750.1384887695312, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -0.1598636358976364, "rewards/margins": 6.943938255310059, "rewards/rejected": -7.103802680969238, "step": 22920 }, { "epoch": 0.27, "learning_rate": 4.550471495057044e-06, "logits/chosen": -3.057666778564453, "logits/rejected": -2.797081232070923, "logps/chosen": -48.69760513305664, "logps/rejected": -561.0244140625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.08708706498146057, "rewards/margins": 5.148760795593262, "rewards/rejected": -5.235846996307373, "step": 22930 }, { "epoch": 0.27, "learning_rate": 4.549873695572298e-06, "logits/chosen": -3.024622678756714, "logits/rejected": -2.5639519691467285, "logps/chosen": -63.01361083984375, "logps/rejected": -722.7438354492188, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.11505848169326782, "rewards/margins": 6.726194858551025, "rewards/rejected": -6.841253757476807, "step": 22940 }, { "epoch": 0.27, "learning_rate": 4.549275538185299e-06, "logits/chosen": -3.0235073566436768, "logits/rejected": -2.8851089477539062, "logps/chosen": -43.42146301269531, "logps/rejected": -617.26513671875, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.024106672033667564, "rewards/margins": 5.777769565582275, "rewards/rejected": -5.801876068115234, "step": 22950 }, { "epoch": 0.27, "learning_rate": 4.548677023000483e-06, "logits/chosen": -3.0114216804504395, "logits/rejected": -2.592381000518799, "logps/chosen": -64.48161315917969, "logps/rejected": -713.2682495117188, "loss": 0.1205, "rewards/accuracies": 1.0, "rewards/chosen": -0.13822980225086212, "rewards/margins": 6.60641622543335, "rewards/rejected": -6.7446465492248535, "step": 22960 }, { "epoch": 0.27, "learning_rate": 4.548078150122348e-06, "logits/chosen": -3.0827341079711914, "logits/rejected": -2.875229597091675, "logps/chosen": -47.6142578125, "logps/rejected": -654.9194946289062, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.0641755759716034, "rewards/margins": 6.106684684753418, "rewards/rejected": -6.170859336853027, "step": 22970 }, { "epoch": 0.28, "learning_rate": 4.5474789196554565e-06, "logits/chosen": -3.070740222930908, "logits/rejected": -2.732563018798828, "logps/chosen": -49.34684753417969, "logps/rejected": -703.8934326171875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.022049570456147194, "rewards/margins": 6.629585266113281, "rewards/rejected": -6.65163516998291, "step": 22980 }, { "epoch": 0.28, "learning_rate": 4.5468793317044325e-06, "logits/chosen": -3.0710244178771973, "logits/rejected": -2.8286080360412598, "logps/chosen": -52.556663513183594, "logps/rejected": -651.1754760742188, "loss": 0.1204, "rewards/accuracies": 1.0, "rewards/chosen": -0.10734958946704865, "rewards/margins": 6.026791572570801, "rewards/rejected": -6.134141445159912, "step": 22990 }, { "epoch": 0.28, "learning_rate": 4.546279386373962e-06, "logits/chosen": -2.9982731342315674, "logits/rejected": -2.5959744453430176, "logps/chosen": -66.91910552978516, "logps/rejected": -846.7991333007812, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.16487517952919006, "rewards/margins": 7.894474983215332, "rewards/rejected": -8.059350967407227, "step": 23000 }, { "epoch": 0.28, "learning_rate": 4.545679083768793e-06, "logits/chosen": -3.0115981101989746, "logits/rejected": -2.713416576385498, "logps/chosen": -55.10523223876953, "logps/rejected": -708.7697143554688, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.07752739638090134, "rewards/margins": 6.619076728820801, "rewards/rejected": -6.696604251861572, "step": 23010 }, { "epoch": 0.28, "learning_rate": 4.545078423993738e-06, "logits/chosen": -3.0110182762145996, "logits/rejected": -2.718935251235962, "logps/chosen": -81.55635833740234, "logps/rejected": -704.8812255859375, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.3709753453731537, "rewards/margins": 6.285110950469971, "rewards/rejected": -6.6560869216918945, "step": 23020 }, { "epoch": 0.28, "learning_rate": 4.5444774071536714e-06, "logits/chosen": -2.9822981357574463, "logits/rejected": -2.6341960430145264, "logps/chosen": -85.62696838378906, "logps/rejected": -676.5101318359375, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.3978120684623718, "rewards/margins": 5.970673561096191, "rewards/rejected": -6.368485927581787, "step": 23030 }, { "epoch": 0.28, "learning_rate": 4.543876033353527e-06, "logits/chosen": -2.9970498085021973, "logits/rejected": -2.749194383621216, "logps/chosen": -42.349422454833984, "logps/rejected": -534.9144897460938, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 0.004688193555921316, "rewards/margins": 4.974377632141113, "rewards/rejected": -4.969688892364502, "step": 23040 }, { "epoch": 0.28, "learning_rate": 4.543274302698304e-06, "logits/chosen": -3.025008201599121, "logits/rejected": -2.814877986907959, "logps/chosen": -86.04765319824219, "logps/rejected": -617.4050903320312, "loss": 0.1856, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.46785297989845276, "rewards/margins": 5.332581996917725, "rewards/rejected": -5.8004350662231445, "step": 23050 }, { "epoch": 0.28, "learning_rate": 4.542672215293062e-06, "logits/chosen": -2.9641997814178467, "logits/rejected": -2.475130796432495, "logps/chosen": -88.95848083496094, "logps/rejected": -826.5430908203125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.36842095851898193, "rewards/margins": 7.49365758895874, "rewards/rejected": -7.8620781898498535, "step": 23060 }, { "epoch": 0.28, "learning_rate": 4.5420697712429255e-06, "logits/chosen": -3.0118250846862793, "logits/rejected": -2.500429153442383, "logps/chosen": -87.92466735839844, "logps/rejected": -857.3741455078125, "loss": 0.0856, "rewards/accuracies": 1.0, "rewards/chosen": -0.34336453676223755, "rewards/margins": 7.8375420570373535, "rewards/rejected": -8.180906295776367, "step": 23070 }, { "epoch": 0.28, "learning_rate": 4.5414669706530775e-06, "logits/chosen": -3.008554458618164, "logits/rejected": -2.813176393508911, "logps/chosen": -47.022762298583984, "logps/rejected": -619.2450561523438, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.08005912601947784, "rewards/margins": 5.738627910614014, "rewards/rejected": -5.818687915802002, "step": 23080 }, { "epoch": 0.28, "learning_rate": 4.540863813628767e-06, "logits/chosen": -2.977526903152466, "logits/rejected": -2.521212339401245, "logps/chosen": -67.2664566040039, "logps/rejected": -818.7572021484375, "loss": 0.1291, "rewards/accuracies": 1.0, "rewards/chosen": -0.18239210546016693, "rewards/margins": 7.611503601074219, "rewards/rejected": -7.793896675109863, "step": 23090 }, { "epoch": 0.28, "learning_rate": 4.540260300275303e-06, "logits/chosen": -2.9526352882385254, "logits/rejected": -2.708012819290161, "logps/chosen": -56.795616149902344, "logps/rejected": -711.8842163085938, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.14031998813152313, "rewards/margins": 6.595346927642822, "rewards/rejected": -6.735666751861572, "step": 23100 }, { "epoch": 0.28, "learning_rate": 4.539656430698056e-06, "logits/chosen": -2.9997498989105225, "logits/rejected": -2.7082302570343018, "logps/chosen": -69.63445281982422, "logps/rejected": -779.2605590820312, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -0.2100212126970291, "rewards/margins": 7.203228950500488, "rewards/rejected": -7.413249969482422, "step": 23110 }, { "epoch": 0.28, "learning_rate": 4.539052205002462e-06, "logits/chosen": -2.9344067573547363, "logits/rejected": -2.606175422668457, "logps/chosen": -89.03213500976562, "logps/rejected": -734.8562622070312, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.3942978084087372, "rewards/margins": 6.561933994293213, "rewards/rejected": -6.956231594085693, "step": 23120 }, { "epoch": 0.28, "learning_rate": 4.538447623294017e-06, "logits/chosen": -2.9382243156433105, "logits/rejected": -2.5693109035491943, "logps/chosen": -81.71782684326172, "logps/rejected": -743.973876953125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.4425066411495209, "rewards/margins": 6.621415138244629, "rewards/rejected": -7.063921928405762, "step": 23130 }, { "epoch": 0.28, "learning_rate": 4.5378426856782774e-06, "logits/chosen": -2.899624824523926, "logits/rejected": -2.300678253173828, "logps/chosen": -108.38270568847656, "logps/rejected": -916.103515625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.6228877305984497, "rewards/margins": 8.157382011413574, "rewards/rejected": -8.780269622802734, "step": 23140 }, { "epoch": 0.28, "learning_rate": 4.537237392260865e-06, "logits/chosen": -2.892216920852661, "logits/rejected": -2.4315333366394043, "logps/chosen": -112.64691162109375, "logps/rejected": -906.9498901367188, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.6737239956855774, "rewards/margins": 8.005125045776367, "rewards/rejected": -8.678849220275879, "step": 23150 }, { "epoch": 0.28, "learning_rate": 4.536631743147463e-06, "logits/chosen": -2.8987224102020264, "logits/rejected": -2.3133654594421387, "logps/chosen": -144.38661193847656, "logps/rejected": -854.6887817382812, "loss": 0.1235, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9553545117378235, "rewards/margins": 7.212857246398926, "rewards/rejected": -8.168210983276367, "step": 23160 }, { "epoch": 0.28, "learning_rate": 4.536025738443813e-06, "logits/chosen": -2.869932174682617, "logits/rejected": -2.090514659881592, "logps/chosen": -163.46153259277344, "logps/rejected": -1020.9141845703125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.1286242008209229, "rewards/margins": 8.677214622497559, "rewards/rejected": -9.805838584899902, "step": 23170 }, { "epoch": 0.28, "learning_rate": 4.535419378255726e-06, "logits/chosen": -2.8866498470306396, "logits/rejected": -2.639857769012451, "logps/chosen": -89.06901550292969, "logps/rejected": -708.1326904296875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.49697160720825195, "rewards/margins": 6.202852725982666, "rewards/rejected": -6.699825286865234, "step": 23180 }, { "epoch": 0.28, "learning_rate": 4.534812662689068e-06, "logits/chosen": -2.912022352218628, "logits/rejected": -2.5154271125793457, "logps/chosen": -102.69178771972656, "logps/rejected": -751.8516845703125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.6004520654678345, "rewards/margins": 6.535566806793213, "rewards/rejected": -7.1360182762146, "step": 23190 }, { "epoch": 0.28, "learning_rate": 4.534205591849771e-06, "logits/chosen": -2.9252352714538574, "logits/rejected": -2.2609591484069824, "logps/chosen": -134.41146850585938, "logps/rejected": -1030.87548828125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.8035055994987488, "rewards/margins": 9.089605331420898, "rewards/rejected": -9.893110275268555, "step": 23200 }, { "epoch": 0.28, "learning_rate": 4.533598165843826e-06, "logits/chosen": -2.8922390937805176, "logits/rejected": -2.410249710083008, "logps/chosen": -148.41781616210938, "logps/rejected": -1003.1522216796875, "loss": 0.1394, "rewards/accuracies": 1.0, "rewards/chosen": -0.9736999273300171, "rewards/margins": 8.654867172241211, "rewards/rejected": -9.628565788269043, "step": 23210 }, { "epoch": 0.28, "learning_rate": 4.53299038477729e-06, "logits/chosen": -2.9176101684570312, "logits/rejected": -2.495251417160034, "logps/chosen": -108.57304382324219, "logps/rejected": -881.8526611328125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.5765022039413452, "rewards/margins": 7.859162330627441, "rewards/rejected": -8.435663223266602, "step": 23220 }, { "epoch": 0.28, "learning_rate": 4.532382248756279e-06, "logits/chosen": -2.904595136642456, "logits/rejected": -2.1116175651550293, "logps/chosen": -142.42857360839844, "logps/rejected": -1043.9246826171875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.8823928833007812, "rewards/margins": 9.143301010131836, "rewards/rejected": -10.025693893432617, "step": 23230 }, { "epoch": 0.28, "learning_rate": 4.531773757886974e-06, "logits/chosen": -2.901822090148926, "logits/rejected": -2.538301467895508, "logps/chosen": -109.32572174072266, "logps/rejected": -811.4755859375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6744692921638489, "rewards/margins": 7.062314033508301, "rewards/rejected": -7.7367844581604, "step": 23240 }, { "epoch": 0.28, "learning_rate": 4.5311649122756115e-06, "logits/chosen": -2.883836269378662, "logits/rejected": -2.3373236656188965, "logps/chosen": -154.1299285888672, "logps/rejected": -941.5797729492188, "loss": 0.2548, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0747649669647217, "rewards/margins": 7.939096927642822, "rewards/rejected": -9.013860702514648, "step": 23250 }, { "epoch": 0.28, "learning_rate": 4.530555712028497e-06, "logits/chosen": -2.910994052886963, "logits/rejected": -2.098632574081421, "logps/chosen": -176.64830017089844, "logps/rejected": -1063.741455078125, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -1.1369712352752686, "rewards/margins": 9.073308944702148, "rewards/rejected": -10.210281372070312, "step": 23260 }, { "epoch": 0.28, "learning_rate": 4.529946157251996e-06, "logits/chosen": -2.9115209579467773, "logits/rejected": -2.4038608074188232, "logps/chosen": -100.04991149902344, "logps/rejected": -738.9891357421875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.5498471260070801, "rewards/margins": 6.4488983154296875, "rewards/rejected": -6.998745918273926, "step": 23270 }, { "epoch": 0.28, "learning_rate": 4.529336248052533e-06, "logits/chosen": -2.911017894744873, "logits/rejected": -2.3478353023529053, "logps/chosen": -114.92195892333984, "logps/rejected": -936.4627075195312, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.6536059379577637, "rewards/margins": 8.315519332885742, "rewards/rejected": -8.969123840332031, "step": 23280 }, { "epoch": 0.28, "learning_rate": 4.528725984536598e-06, "logits/chosen": -2.914402484893799, "logits/rejected": -2.471789836883545, "logps/chosen": -92.395751953125, "logps/rejected": -840.88330078125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.4942072331905365, "rewards/margins": 7.523454189300537, "rewards/rejected": -8.017662048339844, "step": 23290 }, { "epoch": 0.28, "learning_rate": 4.52811536681074e-06, "logits/chosen": -2.9127936363220215, "logits/rejected": -2.5563223361968994, "logps/chosen": -106.05256652832031, "logps/rejected": -748.8832397460938, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.6403288245201111, "rewards/margins": 6.455768585205078, "rewards/rejected": -7.09609842300415, "step": 23300 }, { "epoch": 0.28, "learning_rate": 4.527504394981572e-06, "logits/chosen": -2.856445550918579, "logits/rejected": -2.4185400009155273, "logps/chosen": -113.5982894897461, "logps/rejected": -986.3701171875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.7070451974868774, "rewards/margins": 8.751580238342285, "rewards/rejected": -9.458624839782715, "step": 23310 }, { "epoch": 0.28, "learning_rate": 4.526893069155768e-06, "logits/chosen": -2.9611146450042725, "logits/rejected": -2.589937210083008, "logps/chosen": -99.9456787109375, "logps/rejected": -805.4605712890625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.5382565259933472, "rewards/margins": 7.147372245788574, "rewards/rejected": -7.685629367828369, "step": 23320 }, { "epoch": 0.28, "learning_rate": 4.526281389440063e-06, "logits/chosen": -2.8919410705566406, "logits/rejected": -2.489938974380493, "logps/chosen": -98.10470581054688, "logps/rejected": -792.2764892578125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.5481647253036499, "rewards/margins": 6.988974094390869, "rewards/rejected": -7.53713846206665, "step": 23330 }, { "epoch": 0.28, "learning_rate": 4.525669355941256e-06, "logits/chosen": -2.8910231590270996, "logits/rejected": -2.5659732818603516, "logps/chosen": -80.728271484375, "logps/rejected": -733.7880249023438, "loss": 0.1361, "rewards/accuracies": 1.0, "rewards/chosen": -0.41454753279685974, "rewards/margins": 6.53832483291626, "rewards/rejected": -6.952872276306152, "step": 23340 }, { "epoch": 0.28, "learning_rate": 4.525056968766205e-06, "logits/chosen": -2.8689839839935303, "logits/rejected": -2.4717726707458496, "logps/chosen": -143.32090759277344, "logps/rejected": -893.9612426757812, "loss": 0.0996, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9523137807846069, "rewards/margins": 7.585107326507568, "rewards/rejected": -8.537420272827148, "step": 23350 }, { "epoch": 0.28, "learning_rate": 4.524444228021832e-06, "logits/chosen": -2.8827481269836426, "logits/rejected": -2.110769510269165, "logps/chosen": -138.09417724609375, "logps/rejected": -916.3927612304688, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8553498983383179, "rewards/margins": 7.927396297454834, "rewards/rejected": -8.782747268676758, "step": 23360 }, { "epoch": 0.28, "learning_rate": 4.523831133815118e-06, "logits/chosen": -2.9373693466186523, "logits/rejected": -2.7250142097473145, "logps/chosen": -73.34427642822266, "logps/rejected": -701.1107177734375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.32960930466651917, "rewards/margins": 6.308867454528809, "rewards/rejected": -6.638476371765137, "step": 23370 }, { "epoch": 0.28, "learning_rate": 4.5232176862531105e-06, "logits/chosen": -2.8366665840148926, "logits/rejected": -2.398683786392212, "logps/chosen": -121.77024841308594, "logps/rejected": -900.7763671875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7494622468948364, "rewards/margins": 7.8495001792907715, "rewards/rejected": -8.598962783813477, "step": 23380 }, { "epoch": 0.28, "learning_rate": 4.522603885442914e-06, "logits/chosen": -2.8998684883117676, "logits/rejected": -2.3990185260772705, "logps/chosen": -101.27542877197266, "logps/rejected": -906.3961181640625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.577279269695282, "rewards/margins": 8.095779418945312, "rewards/rejected": -8.673059463500977, "step": 23390 }, { "epoch": 0.28, "learning_rate": 4.521989731491696e-06, "logits/chosen": -2.8330931663513184, "logits/rejected": -2.295919418334961, "logps/chosen": -125.61083984375, "logps/rejected": -982.9293823242188, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7747918367385864, "rewards/margins": 8.652502059936523, "rewards/rejected": -9.42729377746582, "step": 23400 }, { "epoch": 0.28, "learning_rate": 4.521375224506687e-06, "logits/chosen": -2.9272572994232178, "logits/rejected": -2.5849499702453613, "logps/chosen": -112.1739501953125, "logps/rejected": -856.1248779296875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.7053458094596863, "rewards/margins": 7.468052864074707, "rewards/rejected": -8.173398971557617, "step": 23410 }, { "epoch": 0.28, "learning_rate": 4.5207603645951785e-06, "logits/chosen": -2.9098331928253174, "logits/rejected": -2.4688608646392822, "logps/chosen": -98.70226287841797, "logps/rejected": -787.515625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.548157274723053, "rewards/margins": 6.9571051597595215, "rewards/rejected": -7.505262851715088, "step": 23420 }, { "epoch": 0.28, "learning_rate": 4.520145151864523e-06, "logits/chosen": -2.8598198890686035, "logits/rejected": -2.3694088459014893, "logps/chosen": -151.36410522460938, "logps/rejected": -910.3235473632812, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.025982141494751, "rewards/margins": 7.67995548248291, "rewards/rejected": -8.705937385559082, "step": 23430 }, { "epoch": 0.28, "learning_rate": 4.519529586422133e-06, "logits/chosen": -2.7855191230773926, "logits/rejected": -2.139549970626831, "logps/chosen": -186.3612060546875, "logps/rejected": -1019.4791259765625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.3653805255889893, "rewards/margins": 8.433401107788086, "rewards/rejected": -9.798782348632812, "step": 23440 }, { "epoch": 0.28, "learning_rate": 4.518913668375487e-06, "logits/chosen": -2.8713979721069336, "logits/rejected": -2.277693510055542, "logps/chosen": -128.84677124023438, "logps/rejected": -931.0783081054688, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.8148652911186218, "rewards/margins": 8.110733032226562, "rewards/rejected": -8.92559814453125, "step": 23450 }, { "epoch": 0.28, "learning_rate": 4.518297397832122e-06, "logits/chosen": -2.8850388526916504, "logits/rejected": -2.118056535720825, "logps/chosen": -163.4842071533203, "logps/rejected": -1014.0983276367188, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.1308507919311523, "rewards/margins": 8.593100547790527, "rewards/rejected": -9.723952293395996, "step": 23460 }, { "epoch": 0.28, "learning_rate": 4.517680774899638e-06, "logits/chosen": -2.88407039642334, "logits/rejected": -2.2637295722961426, "logps/chosen": -160.88673400878906, "logps/rejected": -1004.5896606445312, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.1393253803253174, "rewards/margins": 8.505979537963867, "rewards/rejected": -9.645304679870605, "step": 23470 }, { "epoch": 0.28, "learning_rate": 4.517063799685693e-06, "logits/chosen": -2.8763985633850098, "logits/rejected": -2.629027843475342, "logps/chosen": -95.96635437011719, "logps/rejected": -850.8864135742188, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.5759844779968262, "rewards/margins": 7.5628790855407715, "rewards/rejected": -8.138863563537598, "step": 23480 }, { "epoch": 0.28, "learning_rate": 4.516446472298011e-06, "logits/chosen": -2.8419482707977295, "logits/rejected": -2.3576159477233887, "logps/chosen": -122.68986511230469, "logps/rejected": -801.6746826171875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.8061152696609497, "rewards/margins": 6.851067543029785, "rewards/rejected": -7.657182216644287, "step": 23490 }, { "epoch": 0.28, "learning_rate": 4.515828792844375e-06, "logits/chosen": -2.8535633087158203, "logits/rejected": -2.3584208488464355, "logps/chosen": -183.27719116210938, "logps/rejected": -827.6116333007812, "loss": 0.126, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4258267879486084, "rewards/margins": 6.4739484786987305, "rewards/rejected": -7.899776458740234, "step": 23500 }, { "epoch": 0.28, "learning_rate": 4.515210761432632e-06, "logits/chosen": -2.8397789001464844, "logits/rejected": -2.336139678955078, "logps/chosen": -122.9322280883789, "logps/rejected": -810.9486083984375, "loss": 0.1283, "rewards/accuracies": 1.0, "rewards/chosen": -0.8204340934753418, "rewards/margins": 6.90386438369751, "rewards/rejected": -7.724298000335693, "step": 23510 }, { "epoch": 0.28, "learning_rate": 4.5145923781706865e-06, "logits/chosen": -2.9022624492645264, "logits/rejected": -2.4155659675598145, "logps/chosen": -142.259033203125, "logps/rejected": -896.2686767578125, "loss": 0.1264, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9408971071243286, "rewards/margins": 7.633919715881348, "rewards/rejected": -8.57481575012207, "step": 23520 }, { "epoch": 0.28, "learning_rate": 4.513973643166506e-06, "logits/chosen": -2.946990489959717, "logits/rejected": -2.585906744003296, "logps/chosen": -88.12177276611328, "logps/rejected": -814.2892456054688, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.46516475081443787, "rewards/margins": 7.29746150970459, "rewards/rejected": -7.762625694274902, "step": 23530 }, { "epoch": 0.28, "learning_rate": 4.513354556528122e-06, "logits/chosen": -2.921945333480835, "logits/rejected": -2.3039450645446777, "logps/chosen": -122.22830963134766, "logps/rejected": -906.9808349609375, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.7302871942520142, "rewards/margins": 7.9468865394592285, "rewards/rejected": -8.677173614501953, "step": 23540 }, { "epoch": 0.28, "learning_rate": 4.512735118363624e-06, "logits/chosen": -2.8815131187438965, "logits/rejected": -2.3485565185546875, "logps/chosen": -104.7455825805664, "logps/rejected": -961.5299682617188, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.5824931263923645, "rewards/margins": 8.663198471069336, "rewards/rejected": -9.245692253112793, "step": 23550 }, { "epoch": 0.28, "learning_rate": 4.512115328781164e-06, "logits/chosen": -2.928992509841919, "logits/rejected": -2.5563156604766846, "logps/chosen": -95.67515563964844, "logps/rejected": -819.2828369140625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.5278415083885193, "rewards/margins": 7.288478851318359, "rewards/rejected": -7.816320896148682, "step": 23560 }, { "epoch": 0.28, "learning_rate": 4.511495187888957e-06, "logits/chosen": -2.910996913909912, "logits/rejected": -2.3167243003845215, "logps/chosen": -140.69439697265625, "logps/rejected": -1019.0031127929688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.828663170337677, "rewards/margins": 8.965929985046387, "rewards/rejected": -9.79459285736084, "step": 23570 }, { "epoch": 0.28, "learning_rate": 4.510874695795276e-06, "logits/chosen": -2.896073341369629, "logits/rejected": -2.5509109497070312, "logps/chosen": -73.1644287109375, "logps/rejected": -772.44482421875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.3452719748020172, "rewards/margins": 7.009642124176025, "rewards/rejected": -7.354914665222168, "step": 23580 }, { "epoch": 0.28, "learning_rate": 4.510253852608459e-06, "logits/chosen": -2.916991710662842, "logits/rejected": -2.4182636737823486, "logps/chosen": -109.42935943603516, "logps/rejected": -874.9349365234375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.6057891249656677, "rewards/margins": 7.750197410583496, "rewards/rejected": -8.355985641479492, "step": 23590 }, { "epoch": 0.28, "learning_rate": 4.509632658436902e-06, "logits/chosen": -2.9109206199645996, "logits/rejected": -2.4124996662139893, "logps/chosen": -113.178955078125, "logps/rejected": -914.3366088867188, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.6579262018203735, "rewards/margins": 8.094051361083984, "rewards/rejected": -8.751977920532227, "step": 23600 }, { "epoch": 0.28, "learning_rate": 4.509011113389065e-06, "logits/chosen": -2.896679162979126, "logits/rejected": -2.5403172969818115, "logps/chosen": -107.8498306274414, "logps/rejected": -955.798828125, "loss": 0.1055, "rewards/accuracies": 1.0, "rewards/chosen": -0.6218674778938293, "rewards/margins": 8.550220489501953, "rewards/rejected": -9.172089576721191, "step": 23610 }, { "epoch": 0.28, "learning_rate": 4.508389217573466e-06, "logits/chosen": -2.8913121223449707, "logits/rejected": -2.3011844158172607, "logps/chosen": -137.4598846435547, "logps/rejected": -925.4031982421875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8616736531257629, "rewards/margins": 7.991981506347656, "rewards/rejected": -8.853654861450195, "step": 23620 }, { "epoch": 0.28, "learning_rate": 4.507766971098688e-06, "logits/chosen": -2.912950038909912, "logits/rejected": -2.4554696083068848, "logps/chosen": -114.89688873291016, "logps/rejected": -911.6301879882812, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.6677007079124451, "rewards/margins": 8.064923286437988, "rewards/rejected": -8.732625007629395, "step": 23630 }, { "epoch": 0.28, "learning_rate": 4.507144374073374e-06, "logits/chosen": -2.923353672027588, "logits/rejected": -2.646257162094116, "logps/chosen": -94.70161437988281, "logps/rejected": -729.7892456054688, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -0.5429432392120361, "rewards/margins": 6.390920162200928, "rewards/rejected": -6.933863639831543, "step": 23640 }, { "epoch": 0.28, "learning_rate": 4.506521426606226e-06, "logits/chosen": -2.8568990230560303, "logits/rejected": -2.1771388053894043, "logps/chosen": -144.80929565429688, "logps/rejected": -1158.341552734375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.8801441192626953, "rewards/margins": 10.291738510131836, "rewards/rejected": -11.171882629394531, "step": 23650 }, { "epoch": 0.28, "learning_rate": 4.5058981288060115e-06, "logits/chosen": -2.909104585647583, "logits/rejected": -2.280050754547119, "logps/chosen": -124.41926574707031, "logps/rejected": -865.5753173828125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.7563940286636353, "rewards/margins": 7.50310754776001, "rewards/rejected": -8.259500503540039, "step": 23660 }, { "epoch": 0.28, "learning_rate": 4.505274480781553e-06, "logits/chosen": -2.865924119949341, "logits/rejected": -2.119826316833496, "logps/chosen": -138.6614227294922, "logps/rejected": -1033.356201171875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.8576313257217407, "rewards/margins": 9.085982322692871, "rewards/rejected": -9.943615913391113, "step": 23670 }, { "epoch": 0.28, "learning_rate": 4.50465048264174e-06, "logits/chosen": -2.9061708450317383, "logits/rejected": -2.5003437995910645, "logps/chosen": -120.37782287597656, "logps/rejected": -915.1253662109375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7389349937438965, "rewards/margins": 8.027016639709473, "rewards/rejected": -8.765951156616211, "step": 23680 }, { "epoch": 0.28, "learning_rate": 4.504026134495521e-06, "logits/chosen": -2.946871280670166, "logits/rejected": -2.5518977642059326, "logps/chosen": -109.03001403808594, "logps/rejected": -897.029296875, "loss": 0.1518, "rewards/accuracies": 1.0, "rewards/chosen": -0.6951596140861511, "rewards/margins": 7.896334171295166, "rewards/rejected": -8.5914945602417, "step": 23690 }, { "epoch": 0.28, "learning_rate": 4.5034014364519044e-06, "logits/chosen": -2.8945322036743164, "logits/rejected": -2.3609747886657715, "logps/chosen": -139.64935302734375, "logps/rejected": -975.8201904296875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9157854914665222, "rewards/margins": 8.448027610778809, "rewards/rejected": -9.363812446594238, "step": 23700 }, { "epoch": 0.28, "learning_rate": 4.502776388619962e-06, "logits/chosen": -2.884524345397949, "logits/rejected": -2.1953206062316895, "logps/chosen": -164.70797729492188, "logps/rejected": -1052.30615234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1380853652954102, "rewards/margins": 8.984247207641602, "rewards/rejected": -10.122331619262695, "step": 23710 }, { "epoch": 0.28, "learning_rate": 4.502150991108824e-06, "logits/chosen": -2.938351631164551, "logits/rejected": -2.292006015777588, "logps/chosen": -128.05282592773438, "logps/rejected": -1001.9943237304688, "loss": 0.1499, "rewards/accuracies": 1.0, "rewards/chosen": -0.8117820024490356, "rewards/margins": 8.816335678100586, "rewards/rejected": -9.628119468688965, "step": 23720 }, { "epoch": 0.28, "learning_rate": 4.501525244027683e-06, "logits/chosen": -2.8903610706329346, "logits/rejected": -2.4128775596618652, "logps/chosen": -122.092529296875, "logps/rejected": -952.2891845703125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.7731412053108215, "rewards/margins": 8.366601943969727, "rewards/rejected": -9.139744758605957, "step": 23730 }, { "epoch": 0.28, "learning_rate": 4.500899147485794e-06, "logits/chosen": -2.872612237930298, "logits/rejected": -2.2030396461486816, "logps/chosen": -161.20913696289062, "logps/rejected": -1011.9054565429688, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.0733416080474854, "rewards/margins": 8.662885665893555, "rewards/rejected": -9.736227035522461, "step": 23740 }, { "epoch": 0.28, "learning_rate": 4.500272701592471e-06, "logits/chosen": -2.8374040126800537, "logits/rejected": -1.9595098495483398, "logps/chosen": -230.7265625, "logps/rejected": -1087.802978515625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.762991189956665, "rewards/margins": 8.713296890258789, "rewards/rejected": -10.476289749145508, "step": 23750 }, { "epoch": 0.28, "learning_rate": 4.49964590645709e-06, "logits/chosen": -2.8485655784606934, "logits/rejected": -2.390667200088501, "logps/chosen": -140.60513305664062, "logps/rejected": -893.2100830078125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9909254312515259, "rewards/margins": 7.558367729187012, "rewards/rejected": -8.54929256439209, "step": 23760 }, { "epoch": 0.28, "learning_rate": 4.499018762189087e-06, "logits/chosen": -2.8153159618377686, "logits/rejected": -2.300776243209839, "logps/chosen": -156.17047119140625, "logps/rejected": -944.56298828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1081500053405762, "rewards/margins": 7.936036109924316, "rewards/rejected": -9.044183731079102, "step": 23770 }, { "epoch": 0.28, "learning_rate": 4.498391268897961e-06, "logits/chosen": -2.880660057067871, "logits/rejected": -2.2418293952941895, "logps/chosen": -206.8676300048828, "logps/rejected": -980.1376953125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.5480836629867554, "rewards/margins": 7.848127841949463, "rewards/rejected": -9.396210670471191, "step": 23780 }, { "epoch": 0.28, "learning_rate": 4.497763426693269e-06, "logits/chosen": -2.812971591949463, "logits/rejected": -2.276301383972168, "logps/chosen": -180.52865600585938, "logps/rejected": -991.17578125, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.3328590393066406, "rewards/margins": 8.198637008666992, "rewards/rejected": -9.531496047973633, "step": 23790 }, { "epoch": 0.28, "learning_rate": 4.497135235684632e-06, "logits/chosen": -2.8942394256591797, "logits/rejected": -2.3036861419677734, "logps/chosen": -154.23306274414062, "logps/rejected": -881.06201171875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.013071894645691, "rewards/margins": 7.410065650939941, "rewards/rejected": -8.423138618469238, "step": 23800 }, { "epoch": 0.29, "learning_rate": 4.496506695981729e-06, "logits/chosen": -2.8403687477111816, "logits/rejected": -2.3349854946136475, "logps/chosen": -223.0986328125, "logps/rejected": -1007.6483154296875, "loss": 0.1484, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.739072561264038, "rewards/margins": 7.9575514793396, "rewards/rejected": -9.696624755859375, "step": 23810 }, { "epoch": 0.29, "learning_rate": 4.495877807694302e-06, "logits/chosen": -2.8863131999969482, "logits/rejected": -2.5677638053894043, "logps/chosen": -110.9229736328125, "logps/rejected": -756.3619384765625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.720306932926178, "rewards/margins": 6.475197792053223, "rewards/rejected": -7.195504665374756, "step": 23820 }, { "epoch": 0.29, "learning_rate": 4.495248570932153e-06, "logits/chosen": -2.815389633178711, "logits/rejected": -2.0920004844665527, "logps/chosen": -213.8906707763672, "logps/rejected": -1110.2904052734375, "loss": 0.0372, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6367461681365967, "rewards/margins": 9.061772346496582, "rewards/rejected": -10.698518753051758, "step": 23830 }, { "epoch": 0.29, "learning_rate": 4.494618985805146e-06, "logits/chosen": -2.8622896671295166, "logits/rejected": -2.3784549236297607, "logps/chosen": -151.96633911132812, "logps/rejected": -1035.1234130859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0516451597213745, "rewards/margins": 8.904211044311523, "rewards/rejected": -9.955854415893555, "step": 23840 }, { "epoch": 0.29, "learning_rate": 4.493989052423203e-06, "logits/chosen": -2.8600258827209473, "logits/rejected": -2.1486284732818604, "logps/chosen": -185.34152221679688, "logps/rejected": -1083.0001220703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3354854583740234, "rewards/margins": 9.105079650878906, "rewards/rejected": -10.44056510925293, "step": 23850 }, { "epoch": 0.29, "learning_rate": 4.49335877089631e-06, "logits/chosen": -2.8315868377685547, "logits/rejected": -2.419440507888794, "logps/chosen": -160.8179473876953, "logps/rejected": -981.7054443359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1557546854019165, "rewards/margins": 8.264741897583008, "rewards/rejected": -9.420495986938477, "step": 23860 }, { "epoch": 0.29, "learning_rate": 4.492728141334512e-06, "logits/chosen": -2.890315532684326, "logits/rejected": -2.4489688873291016, "logps/chosen": -189.75796508789062, "logps/rejected": -971.9806518554688, "loss": 0.1609, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.457087516784668, "rewards/margins": 7.8640851974487305, "rewards/rejected": -9.321172714233398, "step": 23870 }, { "epoch": 0.29, "learning_rate": 4.492097163847914e-06, "logits/chosen": -2.860633134841919, "logits/rejected": -2.5029048919677734, "logps/chosen": -121.16789245605469, "logps/rejected": -814.3958740234375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8115464448928833, "rewards/margins": 6.966025352478027, "rewards/rejected": -7.777571201324463, "step": 23880 }, { "epoch": 0.29, "learning_rate": 4.491465838546685e-06, "logits/chosen": -2.8556506633758545, "logits/rejected": -2.3272347450256348, "logps/chosen": -152.82083129882812, "logps/rejected": -969.0030517578125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0300194025039673, "rewards/margins": 8.267748832702637, "rewards/rejected": -9.297767639160156, "step": 23890 }, { "epoch": 0.29, "learning_rate": 4.490834165541051e-06, "logits/chosen": -2.8156161308288574, "logits/rejected": -2.1587002277374268, "logps/chosen": -179.09732055664062, "logps/rejected": -1056.62646484375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.3028347492218018, "rewards/margins": 8.867870330810547, "rewards/rejected": -10.170705795288086, "step": 23900 }, { "epoch": 0.29, "learning_rate": 4.490202144941302e-06, "logits/chosen": -2.8427629470825195, "logits/rejected": -2.1927127838134766, "logps/chosen": -166.747314453125, "logps/rejected": -1111.4453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1556411981582642, "rewards/margins": 9.566999435424805, "rewards/rejected": -10.722640991210938, "step": 23910 }, { "epoch": 0.29, "learning_rate": 4.489569776857785e-06, "logits/chosen": -2.8596203327178955, "logits/rejected": -2.3358519077301025, "logps/chosen": -170.88975524902344, "logps/rejected": -1005.3861083984375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.264808177947998, "rewards/margins": 8.400217056274414, "rewards/rejected": -9.66502571105957, "step": 23920 }, { "epoch": 0.29, "learning_rate": 4.488937061400912e-06, "logits/chosen": -2.8722691535949707, "logits/rejected": -2.4371142387390137, "logps/chosen": -165.7345428466797, "logps/rejected": -1008.1470947265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.133344292640686, "rewards/margins": 8.569211959838867, "rewards/rejected": -9.702555656433105, "step": 23930 }, { "epoch": 0.29, "learning_rate": 4.488303998681151e-06, "logits/chosen": -2.9090051651000977, "logits/rejected": -2.4145703315734863, "logps/chosen": -147.7571258544922, "logps/rejected": -923.4705200195312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9757390022277832, "rewards/margins": 7.860106468200684, "rewards/rejected": -8.835845947265625, "step": 23940 }, { "epoch": 0.29, "learning_rate": 4.487670588809034e-06, "logits/chosen": -2.8687469959259033, "logits/rejected": -2.2136993408203125, "logps/chosen": -172.47970581054688, "logps/rejected": -1050.921875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.2197695970535278, "rewards/margins": 8.890508651733398, "rewards/rejected": -10.11027717590332, "step": 23950 }, { "epoch": 0.29, "learning_rate": 4.487036831895154e-06, "logits/chosen": -2.9019227027893066, "logits/rejected": -2.3722612857818604, "logps/chosen": -157.54144287109375, "logps/rejected": -965.88671875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1352249383926392, "rewards/margins": 8.12467098236084, "rewards/rejected": -9.259896278381348, "step": 23960 }, { "epoch": 0.29, "learning_rate": 4.486402728050161e-06, "logits/chosen": -2.922060251235962, "logits/rejected": -2.039510726928711, "logps/chosen": -175.73428344726562, "logps/rejected": -1120.4464111328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2200431823730469, "rewards/margins": 9.594863891601562, "rewards/rejected": -10.814908027648926, "step": 23970 }, { "epoch": 0.29, "learning_rate": 4.4857682773847684e-06, "logits/chosen": -2.808260202407837, "logits/rejected": -2.1795125007629395, "logps/chosen": -188.6217498779297, "logps/rejected": -1013.8366088867188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.3851722478866577, "rewards/margins": 8.364921569824219, "rewards/rejected": -9.750093460083008, "step": 23980 }, { "epoch": 0.29, "learning_rate": 4.48513348000975e-06, "logits/chosen": -2.860495090484619, "logits/rejected": -2.5423319339752197, "logps/chosen": -151.3175048828125, "logps/rejected": -882.9339599609375, "loss": 0.1448, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1317479610443115, "rewards/margins": 7.327818393707275, "rewards/rejected": -8.459567070007324, "step": 23990 }, { "epoch": 0.29, "learning_rate": 4.4844983360359395e-06, "logits/chosen": -2.8685317039489746, "logits/rejected": -2.3544554710388184, "logps/chosen": -184.44009399414062, "logps/rejected": -887.7644653320312, "loss": 0.4534, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3545243740081787, "rewards/margins": 7.129232883453369, "rewards/rejected": -8.483757019042969, "step": 24000 }, { "epoch": 0.29, "eval_logits/chosen": -2.866511583328247, "eval_logits/rejected": -1.6281601190567017, "eval_logps/chosen": -296.2491149902344, "eval_logps/rejected": -1264.9957275390625, "eval_loss": 0.0077033983543515205, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -2.3506884574890137, "eval_rewards/margins": 9.832015991210938, "eval_rewards/rejected": -12.182703971862793, "eval_runtime": 1.2165, "eval_samples_per_second": 4.11, "eval_steps_per_second": 2.466, "step": 24000 }, { "epoch": 0.29, "learning_rate": 4.483862845574232e-06, "logits/chosen": -2.9140381813049316, "logits/rejected": -2.162526845932007, "logps/chosen": -162.3099822998047, "logps/rejected": -1144.6683349609375, "loss": 0.1317, "rewards/accuracies": 1.0, "rewards/chosen": -1.044755458831787, "rewards/margins": 10.001898765563965, "rewards/rejected": -11.046655654907227, "step": 24010 }, { "epoch": 0.29, "learning_rate": 4.4832270087355815e-06, "logits/chosen": -2.8408255577087402, "logits/rejected": -2.321622133255005, "logps/chosen": -114.53680419921875, "logps/rejected": -972.9656372070312, "loss": 0.1413, "rewards/accuracies": 1.0, "rewards/chosen": -0.6761296987533569, "rewards/margins": 8.655096054077148, "rewards/rejected": -9.331226348876953, "step": 24020 }, { "epoch": 0.29, "learning_rate": 4.482590825631002e-06, "logits/chosen": -2.8857884407043457, "logits/rejected": -2.3706486225128174, "logps/chosen": -128.46827697753906, "logps/rejected": -934.3226318359375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.7921255826950073, "rewards/margins": 8.158379554748535, "rewards/rejected": -8.950506210327148, "step": 24030 }, { "epoch": 0.29, "learning_rate": 4.481954296371572e-06, "logits/chosen": -2.9137227535247803, "logits/rejected": -2.2235612869262695, "logps/chosen": -124.28038024902344, "logps/rejected": -1004.8782958984375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.7426666617393494, "rewards/margins": 8.90013313293457, "rewards/rejected": -9.642800331115723, "step": 24040 }, { "epoch": 0.29, "learning_rate": 4.481317421068426e-06, "logits/chosen": -2.869734525680542, "logits/rejected": -2.5111641883850098, "logps/chosen": -101.38790130615234, "logps/rejected": -725.143310546875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6048827171325684, "rewards/margins": 6.270477771759033, "rewards/rejected": -6.875360012054443, "step": 24050 }, { "epoch": 0.29, "learning_rate": 4.480680199832761e-06, "logits/chosen": -2.8809397220611572, "logits/rejected": -2.1945323944091797, "logps/chosen": -152.81784057617188, "logps/rejected": -1034.878662109375, "loss": 0.256, "rewards/accuracies": 1.0, "rewards/chosen": -1.0025500059127808, "rewards/margins": 8.947794914245605, "rewards/rejected": -9.950343132019043, "step": 24060 }, { "epoch": 0.29, "learning_rate": 4.480042632775835e-06, "logits/chosen": -2.8573529720306396, "logits/rejected": -2.4209742546081543, "logps/chosen": -164.0460205078125, "logps/rejected": -929.6236572265625, "loss": 0.139, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1644309759140015, "rewards/margins": 7.738766670227051, "rewards/rejected": -8.903197288513184, "step": 24070 }, { "epoch": 0.29, "learning_rate": 4.479404720008964e-06, "logits/chosen": -2.8424954414367676, "logits/rejected": -2.2406790256500244, "logps/chosen": -139.879638671875, "logps/rejected": -988.4142456054688, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.8894098401069641, "rewards/margins": 8.606212615966797, "rewards/rejected": -9.495621681213379, "step": 24080 }, { "epoch": 0.29, "learning_rate": 4.478766461643527e-06, "logits/chosen": -2.860231399536133, "logits/rejected": -2.260693073272705, "logps/chosen": -144.3907470703125, "logps/rejected": -876.5945434570312, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.022406816482544, "rewards/margins": 7.371822357177734, "rewards/rejected": -8.394227981567383, "step": 24090 }, { "epoch": 0.29, "learning_rate": 4.478127857790961e-06, "logits/chosen": -2.8710572719573975, "logits/rejected": -2.315927743911743, "logps/chosen": -136.224609375, "logps/rejected": -812.5787963867188, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.8173061609268188, "rewards/margins": 6.91843318939209, "rewards/rejected": -7.735739707946777, "step": 24100 }, { "epoch": 0.29, "learning_rate": 4.477488908562765e-06, "logits/chosen": -2.8156814575195312, "logits/rejected": -2.10267972946167, "logps/chosen": -159.08424377441406, "logps/rejected": -966.1199340820312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1169002056121826, "rewards/margins": 8.144266128540039, "rewards/rejected": -9.2611665725708, "step": 24110 }, { "epoch": 0.29, "learning_rate": 4.476849614070498e-06, "logits/chosen": -2.892972469329834, "logits/rejected": -2.5916876792907715, "logps/chosen": -99.13163757324219, "logps/rejected": -718.2843627929688, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6138941645622253, "rewards/margins": 6.203441619873047, "rewards/rejected": -6.817335605621338, "step": 24120 }, { "epoch": 0.29, "learning_rate": 4.476209974425779e-06, "logits/chosen": -2.8616435527801514, "logits/rejected": -2.329023838043213, "logps/chosen": -148.4522705078125, "logps/rejected": -884.2677001953125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9972545504570007, "rewards/margins": 7.456286430358887, "rewards/rejected": -8.453540802001953, "step": 24130 }, { "epoch": 0.29, "learning_rate": 4.475569989740287e-06, "logits/chosen": -2.839946746826172, "logits/rejected": -2.0231432914733887, "logps/chosen": -185.38504028320312, "logps/rejected": -1051.419189453125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.2840384244918823, "rewards/margins": 8.821548461914062, "rewards/rejected": -10.105588912963867, "step": 24140 }, { "epoch": 0.29, "learning_rate": 4.474929660125762e-06, "logits/chosen": -2.8416683673858643, "logits/rejected": -2.1619958877563477, "logps/chosen": -177.3202362060547, "logps/rejected": -928.2454833984375, "loss": 0.133, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.314429521560669, "rewards/margins": 7.575801849365234, "rewards/rejected": -8.890230178833008, "step": 24150 }, { "epoch": 0.29, "learning_rate": 4.474288985694003e-06, "logits/chosen": -2.8483433723449707, "logits/rejected": -2.5145790576934814, "logps/chosen": -123.25480651855469, "logps/rejected": -829.1262817382812, "loss": 0.0267, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8512059450149536, "rewards/margins": 7.07293176651001, "rewards/rejected": -7.924136161804199, "step": 24160 }, { "epoch": 0.29, "learning_rate": 4.473647966556871e-06, "logits/chosen": -2.8550963401794434, "logits/rejected": -2.321906089782715, "logps/chosen": -177.64743041992188, "logps/rejected": -823.7438354492188, "loss": 0.1316, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3347512483596802, "rewards/margins": 6.512763023376465, "rewards/rejected": -7.8475141525268555, "step": 24170 }, { "epoch": 0.29, "learning_rate": 4.473006602826285e-06, "logits/chosen": -2.865139961242676, "logits/rejected": -2.5370497703552246, "logps/chosen": -134.89158630371094, "logps/rejected": -867.50390625, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.9506685137748718, "rewards/margins": 7.350579738616943, "rewards/rejected": -8.301248550415039, "step": 24180 }, { "epoch": 0.29, "learning_rate": 4.4723648946142264e-06, "logits/chosen": -2.858276605606079, "logits/rejected": -2.2960736751556396, "logps/chosen": -158.20779418945312, "logps/rejected": -984.6658935546875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.150151014328003, "rewards/margins": 8.306692123413086, "rewards/rejected": -9.456843376159668, "step": 24190 }, { "epoch": 0.29, "learning_rate": 4.471722842032735e-06, "logits/chosen": -2.780097484588623, "logits/rejected": -1.807905912399292, "logps/chosen": -192.89498901367188, "logps/rejected": -1093.2755126953125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.3417078256607056, "rewards/margins": 9.172231674194336, "rewards/rejected": -10.513936996459961, "step": 24200 }, { "epoch": 0.29, "learning_rate": 4.471080445193912e-06, "logits/chosen": -2.8571507930755615, "logits/rejected": -2.19807767868042, "logps/chosen": -145.3432159423828, "logps/rejected": -912.2689208984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9442561864852905, "rewards/margins": 7.790238857269287, "rewards/rejected": -8.734495162963867, "step": 24210 }, { "epoch": 0.29, "learning_rate": 4.470437704209917e-06, "logits/chosen": -2.8728418350219727, "logits/rejected": -2.472851276397705, "logps/chosen": -138.72406005859375, "logps/rejected": -811.5892333984375, "loss": 0.1228, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9591201543807983, "rewards/margins": 6.774960517883301, "rewards/rejected": -7.7340803146362305, "step": 24220 }, { "epoch": 0.29, "learning_rate": 4.469794619192972e-06, "logits/chosen": -2.9181787967681885, "logits/rejected": -2.432753086090088, "logps/chosen": -118.24796295166016, "logps/rejected": -852.6343994140625, "loss": 0.1564, "rewards/accuracies": 1.0, "rewards/chosen": -0.7660671472549438, "rewards/margins": 7.384127616882324, "rewards/rejected": -8.15019416809082, "step": 24230 }, { "epoch": 0.29, "learning_rate": 4.469151190255357e-06, "logits/chosen": -2.871311664581299, "logits/rejected": -2.1255271434783936, "logps/chosen": -153.62338256835938, "logps/rejected": -1043.769287109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9743515849113464, "rewards/margins": 9.045948028564453, "rewards/rejected": -10.020299911499023, "step": 24240 }, { "epoch": 0.29, "learning_rate": 4.468507417509413e-06, "logits/chosen": -2.9218995571136475, "logits/rejected": -2.383629322052002, "logps/chosen": -135.0908660888672, "logps/rejected": -947.7595825195312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8573564291000366, "rewards/margins": 8.225740432739258, "rewards/rejected": -9.08309555053711, "step": 24250 }, { "epoch": 0.29, "learning_rate": 4.467863301067542e-06, "logits/chosen": -2.9086954593658447, "logits/rejected": -2.4070098400115967, "logps/chosen": -134.1226348876953, "logps/rejected": -849.8118286132812, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.8447311520576477, "rewards/margins": 7.257652282714844, "rewards/rejected": -8.10238265991211, "step": 24260 }, { "epoch": 0.29, "learning_rate": 4.467218841042204e-06, "logits/chosen": -2.8837571144104004, "logits/rejected": -2.220219135284424, "logps/chosen": -137.1404266357422, "logps/rejected": -874.3839721679688, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.8892592191696167, "rewards/margins": 7.456414222717285, "rewards/rejected": -8.345673561096191, "step": 24270 }, { "epoch": 0.29, "learning_rate": 4.46657403754592e-06, "logits/chosen": -2.931734085083008, "logits/rejected": -2.4406898021698, "logps/chosen": -179.58834838867188, "logps/rejected": -824.63427734375, "loss": 0.2963, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3777192831039429, "rewards/margins": 6.494668483734131, "rewards/rejected": -7.872387886047363, "step": 24280 }, { "epoch": 0.29, "learning_rate": 4.4659288906912715e-06, "logits/chosen": -2.907315492630005, "logits/rejected": -2.323805809020996, "logps/chosen": -126.04273986816406, "logps/rejected": -928.6275634765625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7805195450782776, "rewards/margins": 8.098840713500977, "rewards/rejected": -8.87936019897461, "step": 24290 }, { "epoch": 0.29, "learning_rate": 4.465283400590897e-06, "logits/chosen": -2.895871162414551, "logits/rejected": -2.2476608753204346, "logps/chosen": -131.11141967773438, "logps/rejected": -942.5579833984375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8198140859603882, "rewards/margins": 8.21428108215332, "rewards/rejected": -9.034093856811523, "step": 24300 }, { "epoch": 0.29, "learning_rate": 4.464637567357501e-06, "logits/chosen": -2.869724750518799, "logits/rejected": -2.128943681716919, "logps/chosen": -158.0248260498047, "logps/rejected": -937.6766357421875, "loss": 0.0398, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0975090265274048, "rewards/margins": 7.880332946777344, "rewards/rejected": -8.9778413772583, "step": 24310 }, { "epoch": 0.29, "learning_rate": 4.463991391103841e-06, "logits/chosen": -2.848860502243042, "logits/rejected": -2.3175480365753174, "logps/chosen": -131.83448791503906, "logps/rejected": -799.369384765625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.9049404859542847, "rewards/margins": 6.698909759521484, "rewards/rejected": -7.603850364685059, "step": 24320 }, { "epoch": 0.29, "learning_rate": 4.46334487194274e-06, "logits/chosen": -2.8839287757873535, "logits/rejected": -2.29790997505188, "logps/chosen": -121.24061584472656, "logps/rejected": -857.8448486328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7426384687423706, "rewards/margins": 7.450047969818115, "rewards/rejected": -8.192686080932617, "step": 24330 }, { "epoch": 0.29, "learning_rate": 4.462698009987077e-06, "logits/chosen": -2.9290404319763184, "logits/rejected": -2.4752187728881836, "logps/chosen": -115.1811294555664, "logps/rejected": -857.4835815429688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.705842912197113, "rewards/margins": 7.492138862609863, "rewards/rejected": -8.197980880737305, "step": 24340 }, { "epoch": 0.29, "learning_rate": 4.462050805349793e-06, "logits/chosen": -2.903592824935913, "logits/rejected": -2.1063942909240723, "logps/chosen": -158.64247131347656, "logps/rejected": -983.0759887695312, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.0340955257415771, "rewards/margins": 8.390970230102539, "rewards/rejected": -9.425066947937012, "step": 24350 }, { "epoch": 0.29, "learning_rate": 4.461403258143886e-06, "logits/chosen": -2.8495535850524902, "logits/rejected": -2.044142723083496, "logps/chosen": -144.48727416992188, "logps/rejected": -927.9814453125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9429186582565308, "rewards/margins": 7.9292802810668945, "rewards/rejected": -8.872198104858398, "step": 24360 }, { "epoch": 0.29, "learning_rate": 4.460755368482419e-06, "logits/chosen": -2.8933053016662598, "logits/rejected": -2.501904010772705, "logps/chosen": -114.76025390625, "logps/rejected": -859.27099609375, "loss": 0.1624, "rewards/accuracies": 1.0, "rewards/chosen": -0.7008264660835266, "rewards/margins": 7.504843235015869, "rewards/rejected": -8.205670356750488, "step": 24370 }, { "epoch": 0.29, "learning_rate": 4.4601071364785104e-06, "logits/chosen": -2.899195671081543, "logits/rejected": -2.3775572776794434, "logps/chosen": -112.55765533447266, "logps/rejected": -890.1594848632812, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -0.7278403043746948, "rewards/margins": 7.795326232910156, "rewards/rejected": -8.52316665649414, "step": 24380 }, { "epoch": 0.29, "learning_rate": 4.459458562245339e-06, "logits/chosen": -2.863433837890625, "logits/rejected": -2.3691275119781494, "logps/chosen": -120.44866943359375, "logps/rejected": -881.6273193359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7518604397773743, "rewards/margins": 7.685037136077881, "rewards/rejected": -8.436899185180664, "step": 24390 }, { "epoch": 0.29, "learning_rate": 4.458809645896146e-06, "logits/chosen": -2.894434690475464, "logits/rejected": -2.20076060295105, "logps/chosen": -144.0785369873047, "logps/rejected": -980.6134033203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8595350980758667, "rewards/margins": 8.520220756530762, "rewards/rejected": -9.379755020141602, "step": 24400 }, { "epoch": 0.29, "learning_rate": 4.458160387544228e-06, "logits/chosen": -2.92492413520813, "logits/rejected": -2.400176763534546, "logps/chosen": -135.64450073242188, "logps/rejected": -940.1298828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8571176528930664, "rewards/margins": 8.134928703308105, "rewards/rejected": -8.992047309875488, "step": 24410 }, { "epoch": 0.29, "learning_rate": 4.457510787302946e-06, "logits/chosen": -2.8240838050842285, "logits/rejected": -2.192250967025757, "logps/chosen": -172.72857666015625, "logps/rejected": -976.2054443359375, "loss": 0.1378, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2007098197937012, "rewards/margins": 8.153264999389648, "rewards/rejected": -9.353974342346191, "step": 24420 }, { "epoch": 0.29, "learning_rate": 4.456860845285717e-06, "logits/chosen": -2.8762876987457275, "logits/rejected": -2.51588773727417, "logps/chosen": -118.05303955078125, "logps/rejected": -877.7072143554688, "loss": 0.0998, "rewards/accuracies": 1.0, "rewards/chosen": -0.703037679195404, "rewards/margins": 7.686689853668213, "rewards/rejected": -8.389727592468262, "step": 24430 }, { "epoch": 0.29, "learning_rate": 4.456210561606019e-06, "logits/chosen": -2.895127296447754, "logits/rejected": -2.2946226596832275, "logps/chosen": -164.59046936035156, "logps/rejected": -828.375, "loss": 0.1561, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1429383754730225, "rewards/margins": 6.7596635818481445, "rewards/rejected": -7.902601718902588, "step": 24440 }, { "epoch": 0.29, "learning_rate": 4.45555993637739e-06, "logits/chosen": -2.9255285263061523, "logits/rejected": -2.6735339164733887, "logps/chosen": -93.15947723388672, "logps/rejected": -766.834716796875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5774739980697632, "rewards/margins": 6.727049827575684, "rewards/rejected": -7.304523468017578, "step": 24450 }, { "epoch": 0.29, "learning_rate": 4.454908969713428e-06, "logits/chosen": -2.896923780441284, "logits/rejected": -2.4284353256225586, "logps/chosen": -129.66006469726562, "logps/rejected": -922.1276245117188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8382598757743835, "rewards/margins": 7.986872673034668, "rewards/rejected": -8.825132369995117, "step": 24460 }, { "epoch": 0.29, "learning_rate": 4.454257661727789e-06, "logits/chosen": -2.868795394897461, "logits/rejected": -2.33339262008667, "logps/chosen": -157.8091278076172, "logps/rejected": -859.7101440429688, "loss": 0.1465, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.074580192565918, "rewards/margins": 7.135387420654297, "rewards/rejected": -8.209966659545898, "step": 24470 }, { "epoch": 0.29, "learning_rate": 4.453606012534189e-06, "logits/chosen": -2.8697009086608887, "logits/rejected": -2.2698347568511963, "logps/chosen": -134.6388397216797, "logps/rejected": -953.7766723632812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8640658259391785, "rewards/margins": 8.261055946350098, "rewards/rejected": -9.1251220703125, "step": 24480 }, { "epoch": 0.29, "learning_rate": 4.452954022246407e-06, "logits/chosen": -2.8721718788146973, "logits/rejected": -2.4093151092529297, "logps/chosen": -118.79386138916016, "logps/rejected": -934.0693359375, "loss": 0.1261, "rewards/accuracies": 1.0, "rewards/chosen": -0.7325398325920105, "rewards/margins": 8.216986656188965, "rewards/rejected": -8.9495267868042, "step": 24490 }, { "epoch": 0.29, "learning_rate": 4.452301690978275e-06, "logits/chosen": -2.8652055263519287, "logits/rejected": -2.296140193939209, "logps/chosen": -136.7413787841797, "logps/rejected": -949.0173950195312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8692213296890259, "rewards/margins": 8.250223159790039, "rewards/rejected": -9.119443893432617, "step": 24500 }, { "epoch": 0.29, "learning_rate": 4.451649018843693e-06, "logits/chosen": -2.8949849605560303, "logits/rejected": -2.180666923522949, "logps/chosen": -142.77284240722656, "logps/rejected": -911.6439208984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9552629590034485, "rewards/margins": 7.772032260894775, "rewards/rejected": -8.727293968200684, "step": 24510 }, { "epoch": 0.29, "learning_rate": 4.45099600595661e-06, "logits/chosen": -2.8894920349121094, "logits/rejected": -2.4275341033935547, "logps/chosen": -119.97901916503906, "logps/rejected": -967.8255615234375, "loss": 0.1205, "rewards/accuracies": 1.0, "rewards/chosen": -0.7724272012710571, "rewards/margins": 8.519636154174805, "rewards/rejected": -9.29206371307373, "step": 24520 }, { "epoch": 0.29, "learning_rate": 4.450342652431044e-06, "logits/chosen": -2.8508918285369873, "logits/rejected": -2.185344934463501, "logps/chosen": -177.7515411376953, "logps/rejected": -982.1798095703125, "loss": 0.1223, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2493839263916016, "rewards/margins": 8.163115501403809, "rewards/rejected": -9.412500381469727, "step": 24530 }, { "epoch": 0.29, "learning_rate": 4.449688958381067e-06, "logits/chosen": -2.9038984775543213, "logits/rejected": -2.451444387435913, "logps/chosen": -112.94163513183594, "logps/rejected": -817.2518920898438, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7149707078933716, "rewards/margins": 7.079266548156738, "rewards/rejected": -7.7942376136779785, "step": 24540 }, { "epoch": 0.29, "learning_rate": 4.449034923920814e-06, "logits/chosen": -2.8624234199523926, "logits/rejected": -2.4966113567352295, "logps/chosen": -147.80682373046875, "logps/rejected": -840.1300659179688, "loss": 0.1494, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.067421555519104, "rewards/margins": 6.9565300941467285, "rewards/rejected": -8.023950576782227, "step": 24550 }, { "epoch": 0.29, "learning_rate": 4.448380549164475e-06, "logits/chosen": -2.9273781776428223, "logits/rejected": -2.6059963703155518, "logps/chosen": -110.09375, "logps/rejected": -848.5267333984375, "loss": 0.0233, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6648447513580322, "rewards/margins": 7.440715789794922, "rewards/rejected": -8.105561256408691, "step": 24560 }, { "epoch": 0.29, "learning_rate": 4.447725834226305e-06, "logits/chosen": -2.9054503440856934, "logits/rejected": -2.2827229499816895, "logps/chosen": -131.35931396484375, "logps/rejected": -1009.5491333007812, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.7965529561042786, "rewards/margins": 8.916001319885254, "rewards/rejected": -9.712553024291992, "step": 24570 }, { "epoch": 0.29, "learning_rate": 4.447070779220613e-06, "logits/chosen": -2.9008750915527344, "logits/rejected": -2.4110476970672607, "logps/chosen": -117.3923568725586, "logps/rejected": -940.8126831054688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6941434741020203, "rewards/margins": 8.340907096862793, "rewards/rejected": -9.035050392150879, "step": 24580 }, { "epoch": 0.29, "learning_rate": 4.4464153842617715e-06, "logits/chosen": -2.9248480796813965, "logits/rejected": -2.5405783653259277, "logps/chosen": -135.95425415039062, "logps/rejected": -857.0611572265625, "loss": 0.0941, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.911051869392395, "rewards/margins": 7.282872676849365, "rewards/rejected": -8.193924903869629, "step": 24590 }, { "epoch": 0.29, "learning_rate": 4.44575964946421e-06, "logits/chosen": -2.871333599090576, "logits/rejected": -2.347411632537842, "logps/chosen": -112.48746490478516, "logps/rejected": -915.6373291015625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.699536919593811, "rewards/margins": 8.073236465454102, "rewards/rejected": -8.772771835327148, "step": 24600 }, { "epoch": 0.29, "learning_rate": 4.445103574942416e-06, "logits/chosen": -2.8784923553466797, "logits/rejected": -2.3785345554351807, "logps/chosen": -132.0888214111328, "logps/rejected": -910.0374145507812, "loss": 0.1189, "rewards/accuracies": 1.0, "rewards/chosen": -0.7941867113113403, "rewards/margins": 7.918862819671631, "rewards/rejected": -8.713048934936523, "step": 24610 }, { "epoch": 0.29, "learning_rate": 4.444447160810942e-06, "logits/chosen": -2.878709316253662, "logits/rejected": -2.2970805168151855, "logps/chosen": -118.251953125, "logps/rejected": -939.9560546875, "loss": 0.1294, "rewards/accuracies": 1.0, "rewards/chosen": -0.702900767326355, "rewards/margins": 8.287869453430176, "rewards/rejected": -8.990771293640137, "step": 24620 }, { "epoch": 0.29, "learning_rate": 4.443790407184393e-06, "logits/chosen": -2.857520341873169, "logits/rejected": -2.294790029525757, "logps/chosen": -123.05082702636719, "logps/rejected": -956.0206298828125, "loss": 0.1102, "rewards/accuracies": 1.0, "rewards/chosen": -0.7407565712928772, "rewards/margins": 8.423355102539062, "rewards/rejected": -9.164111137390137, "step": 24630 }, { "epoch": 0.29, "learning_rate": 4.443133314177438e-06, "logits/chosen": -2.868814468383789, "logits/rejected": -2.17976450920105, "logps/chosen": -132.94288635253906, "logps/rejected": -1024.4814453125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7980502247810364, "rewards/margins": 9.05240249633789, "rewards/rejected": -9.850451469421387, "step": 24640 }, { "epoch": 0.3, "learning_rate": 4.442475881904804e-06, "logits/chosen": -2.8875341415405273, "logits/rejected": -2.2401795387268066, "logps/chosen": -143.57521057128906, "logps/rejected": -1051.8343505859375, "loss": 0.0266, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9369525909423828, "rewards/margins": 9.17593002319336, "rewards/rejected": -10.112882614135742, "step": 24650 }, { "epoch": 0.3, "learning_rate": 4.441818110481275e-06, "logits/chosen": -2.90203857421875, "logits/rejected": -2.3618998527526855, "logps/chosen": -133.96070861816406, "logps/rejected": -869.7398681640625, "loss": 0.0986, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.884691596031189, "rewards/margins": 7.441293239593506, "rewards/rejected": -8.325984954833984, "step": 24660 }, { "epoch": 0.3, "learning_rate": 4.441160000021697e-06, "logits/chosen": -2.8954873085021973, "logits/rejected": -2.444678783416748, "logps/chosen": -103.74967956542969, "logps/rejected": -849.0128173828125, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.6409894227981567, "rewards/margins": 7.478615760803223, "rewards/rejected": -8.119604110717773, "step": 24670 }, { "epoch": 0.3, "learning_rate": 4.440501550640975e-06, "logits/chosen": -2.8752522468566895, "logits/rejected": -2.523502826690674, "logps/chosen": -132.36614990234375, "logps/rejected": -853.8719482421875, "loss": 0.102, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9240382313728333, "rewards/margins": 7.232178688049316, "rewards/rejected": -8.156217575073242, "step": 24680 }, { "epoch": 0.3, "learning_rate": 4.439842762454072e-06, "logits/chosen": -2.85093355178833, "logits/rejected": -2.1854357719421387, "logps/chosen": -137.19717407226562, "logps/rejected": -1074.554443359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.878322958946228, "rewards/margins": 9.466150283813477, "rewards/rejected": -10.344472885131836, "step": 24690 }, { "epoch": 0.3, "learning_rate": 4.439183635576009e-06, "logits/chosen": -2.877315044403076, "logits/rejected": -2.352834939956665, "logps/chosen": -127.84468841552734, "logps/rejected": -985.2823486328125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.828940212726593, "rewards/margins": 8.632074356079102, "rewards/rejected": -9.461012840270996, "step": 24700 }, { "epoch": 0.3, "learning_rate": 4.43852417012187e-06, "logits/chosen": -2.882020950317383, "logits/rejected": -2.4497878551483154, "logps/chosen": -122.46099853515625, "logps/rejected": -887.56298828125, "loss": 0.0244, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7681968808174133, "rewards/margins": 7.717521667480469, "rewards/rejected": -8.485719680786133, "step": 24710 }, { "epoch": 0.3, "learning_rate": 4.437864366206795e-06, "logits/chosen": -2.8254432678222656, "logits/rejected": -2.244037389755249, "logps/chosen": -177.10162353515625, "logps/rejected": -882.9339599609375, "loss": 0.124, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2393549680709839, "rewards/margins": 7.205117225646973, "rewards/rejected": -8.44447135925293, "step": 24720 }, { "epoch": 0.3, "learning_rate": 4.4372042239459836e-06, "logits/chosen": -2.900308132171631, "logits/rejected": -2.4167909622192383, "logps/chosen": -139.28927612304688, "logps/rejected": -1011.1607666015625, "loss": 0.022, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9255129098892212, "rewards/margins": 8.792906761169434, "rewards/rejected": -9.71842098236084, "step": 24730 }, { "epoch": 0.3, "learning_rate": 4.4365437434546955e-06, "logits/chosen": -2.910010576248169, "logits/rejected": -2.234208345413208, "logps/chosen": -143.60720825195312, "logps/rejected": -1037.998046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9308199882507324, "rewards/margins": 9.058371543884277, "rewards/rejected": -9.989191055297852, "step": 24740 }, { "epoch": 0.3, "learning_rate": 4.435882924848248e-06, "logits/chosen": -2.8298158645629883, "logits/rejected": -2.355285167694092, "logps/chosen": -182.6412353515625, "logps/rejected": -990.5013427734375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.3561580181121826, "rewards/margins": 8.152766227722168, "rewards/rejected": -9.50892448425293, "step": 24750 }, { "epoch": 0.3, "learning_rate": 4.435221768242018e-06, "logits/chosen": -2.874541759490967, "logits/rejected": -2.3509602546691895, "logps/chosen": -124.8755874633789, "logps/rejected": -815.9197387695312, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.7805758714675903, "rewards/margins": 7.020749092102051, "rewards/rejected": -7.801324367523193, "step": 24760 }, { "epoch": 0.3, "learning_rate": 4.434560273751443e-06, "logits/chosen": -2.87306547164917, "logits/rejected": -2.2827820777893066, "logps/chosen": -129.68978881835938, "logps/rejected": -1054.6741943359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8565927743911743, "rewards/margins": 9.301770210266113, "rewards/rejected": -10.15836238861084, "step": 24770 }, { "epoch": 0.3, "learning_rate": 4.433898441492017e-06, "logits/chosen": -2.9323415756225586, "logits/rejected": -2.5210373401641846, "logps/chosen": -130.26376342773438, "logps/rejected": -962.6549072265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8403841257095337, "rewards/margins": 8.411439895629883, "rewards/rejected": -9.251825332641602, "step": 24780 }, { "epoch": 0.3, "learning_rate": 4.433236271579293e-06, "logits/chosen": -2.8322014808654785, "logits/rejected": -2.458822011947632, "logps/chosen": -128.51072692871094, "logps/rejected": -914.1778564453125, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.8494065403938293, "rewards/margins": 7.913049221038818, "rewards/rejected": -8.762455940246582, "step": 24790 }, { "epoch": 0.3, "learning_rate": 4.432573764128887e-06, "logits/chosen": -2.848580837249756, "logits/rejected": -2.3036751747131348, "logps/chosen": -148.11740112304688, "logps/rejected": -1092.9866943359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9885843992233276, "rewards/margins": 9.54234504699707, "rewards/rejected": -10.530929565429688, "step": 24800 }, { "epoch": 0.3, "learning_rate": 4.431910919256468e-06, "logits/chosen": -2.8713467121124268, "logits/rejected": -2.203472137451172, "logps/chosen": -156.21229553222656, "logps/rejected": -1102.4033203125, "loss": 0.0744, "rewards/accuracies": 1.0, "rewards/chosen": -0.9362654685974121, "rewards/margins": 9.69359016418457, "rewards/rejected": -10.629855155944824, "step": 24810 }, { "epoch": 0.3, "learning_rate": 4.431247737077769e-06, "logits/chosen": -2.8310346603393555, "logits/rejected": -2.246213436126709, "logps/chosen": -133.54832458496094, "logps/rejected": -1043.553955078125, "loss": 0.1156, "rewards/accuracies": 1.0, "rewards/chosen": -0.8235645294189453, "rewards/margins": 9.213201522827148, "rewards/rejected": -10.036766052246094, "step": 24820 }, { "epoch": 0.3, "learning_rate": 4.430584217708579e-06, "logits/chosen": -2.8443422317504883, "logits/rejected": -2.2766458988189697, "logps/chosen": -144.91500854492188, "logps/rejected": -1077.263916015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9405329823493958, "rewards/margins": 9.452838897705078, "rewards/rejected": -10.39337158203125, "step": 24830 }, { "epoch": 0.3, "learning_rate": 4.429920361264746e-06, "logits/chosen": -2.8859829902648926, "logits/rejected": -2.4697747230529785, "logps/chosen": -150.17428588867188, "logps/rejected": -829.2667236328125, "loss": 0.1571, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0748884677886963, "rewards/margins": 6.828263282775879, "rewards/rejected": -7.903151035308838, "step": 24840 }, { "epoch": 0.3, "learning_rate": 4.429256167862179e-06, "logits/chosen": -2.9631245136260986, "logits/rejected": -2.6377310752868652, "logps/chosen": -115.2367172241211, "logps/rejected": -906.9627075195312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7255967855453491, "rewards/margins": 7.967853546142578, "rewards/rejected": -8.693449974060059, "step": 24850 }, { "epoch": 0.3, "learning_rate": 4.428591637616843e-06, "logits/chosen": -2.9328384399414062, "logits/rejected": -2.593628406524658, "logps/chosen": -116.61634826660156, "logps/rejected": -863.4417724609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7618021965026855, "rewards/margins": 7.5080246925354, "rewards/rejected": -8.26982593536377, "step": 24860 }, { "epoch": 0.3, "learning_rate": 4.427926770644762e-06, "logits/chosen": -2.8708338737487793, "logits/rejected": -2.2469775676727295, "logps/chosen": -151.77830505371094, "logps/rejected": -1246.6376953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0424134731292725, "rewards/margins": 11.023834228515625, "rewards/rejected": -12.066247940063477, "step": 24870 }, { "epoch": 0.3, "learning_rate": 4.427261567062023e-06, "logits/chosen": -2.804673671722412, "logits/rejected": -2.3687732219696045, "logps/chosen": -163.50067138671875, "logps/rejected": -887.1438598632812, "loss": 0.2662, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1983503103256226, "rewards/margins": 7.2881317138671875, "rewards/rejected": -8.486482620239258, "step": 24880 }, { "epoch": 0.3, "learning_rate": 4.426596026984767e-06, "logits/chosen": -2.923854351043701, "logits/rejected": -2.457185745239258, "logps/chosen": -116.48646545410156, "logps/rejected": -995.8416748046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7133123278617859, "rewards/margins": 8.860292434692383, "rewards/rejected": -9.57360553741455, "step": 24890 }, { "epoch": 0.3, "learning_rate": 4.425930150529195e-06, "logits/chosen": -2.8492865562438965, "logits/rejected": -2.2133443355560303, "logps/chosen": -136.37429809570312, "logps/rejected": -1090.947265625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.8587106466293335, "rewards/margins": 9.643041610717773, "rewards/rejected": -10.501751899719238, "step": 24900 }, { "epoch": 0.3, "learning_rate": 4.425263937811568e-06, "logits/chosen": -2.830247402191162, "logits/rejected": -2.4269938468933105, "logps/chosen": -119.6771240234375, "logps/rejected": -910.7337646484375, "loss": 0.1515, "rewards/accuracies": 1.0, "rewards/chosen": -0.7601129412651062, "rewards/margins": 7.963074684143066, "rewards/rejected": -8.723188400268555, "step": 24910 }, { "epoch": 0.3, "learning_rate": 4.424597388948205e-06, "logits/chosen": -2.8564465045928955, "logits/rejected": -2.4543135166168213, "logps/chosen": -114.1666488647461, "logps/rejected": -852.8323364257812, "loss": 0.0269, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7043752074241638, "rewards/margins": 7.443744659423828, "rewards/rejected": -8.14811897277832, "step": 24920 }, { "epoch": 0.3, "learning_rate": 4.423930504055483e-06, "logits/chosen": -2.8553054332733154, "logits/rejected": -2.419797897338867, "logps/chosen": -113.1363525390625, "logps/rejected": -949.4363403320312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6816846132278442, "rewards/margins": 8.429170608520508, "rewards/rejected": -9.110854148864746, "step": 24930 }, { "epoch": 0.3, "learning_rate": 4.423263283249839e-06, "logits/chosen": -2.8794455528259277, "logits/rejected": -2.331275701522827, "logps/chosen": -135.80856323242188, "logps/rejected": -994.6173706054688, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8135692477226257, "rewards/margins": 8.733545303344727, "rewards/rejected": -9.547114372253418, "step": 24940 }, { "epoch": 0.3, "learning_rate": 4.422595726647766e-06, "logits/chosen": -2.8979949951171875, "logits/rejected": -2.5691049098968506, "logps/chosen": -107.01921081542969, "logps/rejected": -902.8006591796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6551316976547241, "rewards/margins": 7.99367618560791, "rewards/rejected": -8.648808479309082, "step": 24950 }, { "epoch": 0.3, "learning_rate": 4.42192783436582e-06, "logits/chosen": -2.8620524406433105, "logits/rejected": -2.409510850906372, "logps/chosen": -111.61331939697266, "logps/rejected": -833.45849609375, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.6579101085662842, "rewards/margins": 7.314427852630615, "rewards/rejected": -7.9723381996154785, "step": 24960 }, { "epoch": 0.3, "learning_rate": 4.4212596065206116e-06, "logits/chosen": -2.8841538429260254, "logits/rejected": -2.386568546295166, "logps/chosen": -137.71372985839844, "logps/rejected": -988.6090087890625, "loss": 0.111, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9094719886779785, "rewards/margins": 8.589155197143555, "rewards/rejected": -9.498627662658691, "step": 24970 }, { "epoch": 0.3, "learning_rate": 4.420591043228813e-06, "logits/chosen": -2.8566107749938965, "logits/rejected": -2.252633571624756, "logps/chosen": -122.0357894897461, "logps/rejected": -1016.2420043945312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7439332008361816, "rewards/margins": 9.019449234008789, "rewards/rejected": -9.763382911682129, "step": 24980 }, { "epoch": 0.3, "learning_rate": 4.419922144607152e-06, "logits/chosen": -2.8759870529174805, "logits/rejected": -2.5628609657287598, "logps/chosen": -161.49234008789062, "logps/rejected": -805.8387451171875, "loss": 0.2707, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1796534061431885, "rewards/margins": 6.492683410644531, "rewards/rejected": -7.672336578369141, "step": 24990 }, { "epoch": 0.3, "learning_rate": 4.419252910772416e-06, "logits/chosen": -2.8838462829589844, "logits/rejected": -2.164505958557129, "logps/chosen": -139.13809204101562, "logps/rejected": -1105.578857421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8085528612136841, "rewards/margins": 9.827295303344727, "rewards/rejected": -10.635848999023438, "step": 25000 }, { "epoch": 0.3, "learning_rate": 4.418583341841453e-06, "logits/chosen": -2.924107074737549, "logits/rejected": -2.551034927368164, "logps/chosen": -95.6103286743164, "logps/rejected": -851.6466064453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5655034184455872, "rewards/margins": 7.557781219482422, "rewards/rejected": -8.123283386230469, "step": 25010 }, { "epoch": 0.3, "learning_rate": 4.417913437931166e-06, "logits/chosen": -2.8336501121520996, "logits/rejected": -2.041138172149658, "logps/chosen": -159.33352661132812, "logps/rejected": -1236.263916015625, "loss": 0.2559, "rewards/accuracies": 1.0, "rewards/chosen": -0.9719502329826355, "rewards/margins": 10.966347694396973, "rewards/rejected": -11.938297271728516, "step": 25020 }, { "epoch": 0.3, "learning_rate": 4.417243199158521e-06, "logits/chosen": -2.8733770847320557, "logits/rejected": -2.1988770961761475, "logps/chosen": -127.87400817871094, "logps/rejected": -1162.6961669921875, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": -0.7148423194885254, "rewards/margins": 10.519170761108398, "rewards/rejected": -11.234013557434082, "step": 25030 }, { "epoch": 0.3, "learning_rate": 4.416572625640538e-06, "logits/chosen": -2.90714430809021, "logits/rejected": -2.488112688064575, "logps/chosen": -117.81854248046875, "logps/rejected": -824.7548828125, "loss": 0.1088, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7300714254379272, "rewards/margins": 7.13187313079834, "rewards/rejected": -7.861943244934082, "step": 25040 }, { "epoch": 0.3, "learning_rate": 4.415901717494297e-06, "logits/chosen": -2.8845255374908447, "logits/rejected": -2.481651782989502, "logps/chosen": -112.4525146484375, "logps/rejected": -969.9228515625, "loss": 0.0249, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6684327721595764, "rewards/margins": 8.646414756774902, "rewards/rejected": -9.314847946166992, "step": 25050 }, { "epoch": 0.3, "learning_rate": 4.415230474836938e-06, "logits/chosen": -2.857522964477539, "logits/rejected": -2.337484836578369, "logps/chosen": -110.77146911621094, "logps/rejected": -992.8049926757812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.627016544342041, "rewards/margins": 8.904159545898438, "rewards/rejected": -9.53117561340332, "step": 25060 }, { "epoch": 0.3, "learning_rate": 4.4145588977856564e-06, "logits/chosen": -2.905435800552368, "logits/rejected": -2.392704725265503, "logps/chosen": -119.85289001464844, "logps/rejected": -1018.9296875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6994253396987915, "rewards/margins": 9.102683067321777, "rewards/rejected": -9.802108764648438, "step": 25070 }, { "epoch": 0.3, "learning_rate": 4.4138869864577106e-06, "logits/chosen": -2.879598379135132, "logits/rejected": -2.078951835632324, "logps/chosen": -144.84957885742188, "logps/rejected": -935.2451171875, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -0.8493759036064148, "rewards/margins": 8.102178573608398, "rewards/rejected": -8.951555252075195, "step": 25080 }, { "epoch": 0.3, "learning_rate": 4.413214740970411e-06, "logits/chosen": -2.8652217388153076, "logits/rejected": -2.4792673587799072, "logps/chosen": -107.43037414550781, "logps/rejected": -891.33935546875, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": -0.6391773819923401, "rewards/margins": 7.884103298187256, "rewards/rejected": -8.52328109741211, "step": 25090 }, { "epoch": 0.3, "learning_rate": 4.412542161441132e-06, "logits/chosen": -2.8976266384124756, "logits/rejected": -2.3321022987365723, "logps/chosen": -127.37821960449219, "logps/rejected": -1089.2276611328125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7705778479576111, "rewards/margins": 9.724535942077637, "rewards/rejected": -10.49511432647705, "step": 25100 }, { "epoch": 0.3, "learning_rate": 4.4118692479873024e-06, "logits/chosen": -2.878995180130005, "logits/rejected": -2.3715128898620605, "logps/chosen": -122.61253356933594, "logps/rejected": -899.4609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7913326025009155, "rewards/margins": 7.813185214996338, "rewards/rejected": -8.604517936706543, "step": 25110 }, { "epoch": 0.3, "learning_rate": 4.411196000726413e-06, "logits/chosen": -2.9058775901794434, "logits/rejected": -2.3557353019714355, "logps/chosen": -117.7867431640625, "logps/rejected": -963.857421875, "loss": 0.0227, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6999245882034302, "rewards/margins": 8.555435180664062, "rewards/rejected": -9.255359649658203, "step": 25120 }, { "epoch": 0.3, "learning_rate": 4.4105224197760106e-06, "logits/chosen": -2.905024528503418, "logits/rejected": -2.1928515434265137, "logps/chosen": -148.18710327148438, "logps/rejected": -1131.822021484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9198927879333496, "rewards/margins": 9.98685359954834, "rewards/rejected": -10.906745910644531, "step": 25130 }, { "epoch": 0.3, "learning_rate": 4.409848505253699e-06, "logits/chosen": -2.8688395023345947, "logits/rejected": -2.4439539909362793, "logps/chosen": -117.93974304199219, "logps/rejected": -944.16552734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7431713938713074, "rewards/margins": 8.306650161743164, "rewards/rejected": -9.049821853637695, "step": 25140 }, { "epoch": 0.3, "learning_rate": 4.4091742572771434e-06, "logits/chosen": -2.854602098464966, "logits/rejected": -2.3059723377227783, "logps/chosen": -139.77239990234375, "logps/rejected": -1073.24755859375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9025083780288696, "rewards/margins": 9.420692443847656, "rewards/rejected": -10.323201179504395, "step": 25150 }, { "epoch": 0.3, "learning_rate": 4.408499675964064e-06, "logits/chosen": -2.876992702484131, "logits/rejected": -2.4077489376068115, "logps/chosen": -123.92134857177734, "logps/rejected": -989.3029174804688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8140594363212585, "rewards/margins": 8.680383682250977, "rewards/rejected": -9.494443893432617, "step": 25160 }, { "epoch": 0.3, "learning_rate": 4.407824761432243e-06, "logits/chosen": -2.896005630493164, "logits/rejected": -2.25596022605896, "logps/chosen": -143.2728271484375, "logps/rejected": -1112.452392578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8882452249526978, "rewards/margins": 9.827156066894531, "rewards/rejected": -10.715402603149414, "step": 25170 }, { "epoch": 0.3, "learning_rate": 4.407149513799517e-06, "logits/chosen": -2.9267919063568115, "logits/rejected": -2.3261184692382812, "logps/chosen": -137.7130126953125, "logps/rejected": -891.3590087890625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8909357190132141, "rewards/margins": 7.629688262939453, "rewards/rejected": -8.520624160766602, "step": 25180 }, { "epoch": 0.3, "learning_rate": 4.406473933183783e-06, "logits/chosen": -2.8822293281555176, "logits/rejected": -2.3640081882476807, "logps/chosen": -136.9666290283203, "logps/rejected": -1061.485107421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8541925549507141, "rewards/margins": 9.352137565612793, "rewards/rejected": -10.206330299377441, "step": 25190 }, { "epoch": 0.3, "learning_rate": 4.405798019702995e-06, "logits/chosen": -2.798874616622925, "logits/rejected": -2.0867362022399902, "logps/chosen": -144.36216735839844, "logps/rejected": -1129.636474609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9605634808540344, "rewards/margins": 9.938825607299805, "rewards/rejected": -10.899389266967773, "step": 25200 }, { "epoch": 0.3, "learning_rate": 4.405121773475165e-06, "logits/chosen": -2.8757808208465576, "logits/rejected": -2.3021819591522217, "logps/chosen": -125.4800796508789, "logps/rejected": -958.8356323242188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7529862523078918, "rewards/margins": 8.436172485351562, "rewards/rejected": -9.189159393310547, "step": 25210 }, { "epoch": 0.3, "learning_rate": 4.404445194618365e-06, "logits/chosen": -2.8773624897003174, "logits/rejected": -2.4481546878814697, "logps/chosen": -111.77809143066406, "logps/rejected": -888.3211059570312, "loss": 0.0875, "rewards/accuracies": 1.0, "rewards/chosen": -0.7011565566062927, "rewards/margins": 7.79782247543335, "rewards/rejected": -8.498979568481445, "step": 25220 }, { "epoch": 0.3, "learning_rate": 4.403768283250725e-06, "logits/chosen": -2.8184103965759277, "logits/rejected": -2.2857887744903564, "logps/chosen": -134.4824676513672, "logps/rejected": -1132.9212646484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8505501747131348, "rewards/margins": 10.089910507202148, "rewards/rejected": -10.940459251403809, "step": 25230 }, { "epoch": 0.3, "learning_rate": 4.403091039490428e-06, "logits/chosen": -2.854269504547119, "logits/rejected": -2.4503955841064453, "logps/chosen": -143.7808837890625, "logps/rejected": -820.7305908203125, "loss": 0.162, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9910133481025696, "rewards/margins": 6.858376979827881, "rewards/rejected": -7.849390506744385, "step": 25240 }, { "epoch": 0.3, "learning_rate": 4.402413463455722e-06, "logits/chosen": -2.8431589603424072, "logits/rejected": -2.279683828353882, "logps/chosen": -130.0699920654297, "logps/rejected": -1043.705810546875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8043317794799805, "rewards/margins": 9.243673324584961, "rewards/rejected": -10.048006057739258, "step": 25250 }, { "epoch": 0.3, "learning_rate": 4.40173555526491e-06, "logits/chosen": -2.8719420433044434, "logits/rejected": -2.2091152667999268, "logps/chosen": -133.84393310546875, "logps/rejected": -958.9518432617188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8607813119888306, "rewards/margins": 8.334235191345215, "rewards/rejected": -9.195015907287598, "step": 25260 }, { "epoch": 0.3, "learning_rate": 4.401057315036351e-06, "logits/chosen": -2.819377899169922, "logits/rejected": -2.091244697570801, "logps/chosen": -140.9500274658203, "logps/rejected": -1141.5498046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8422871828079224, "rewards/margins": 10.168233871459961, "rewards/rejected": -11.010519981384277, "step": 25270 }, { "epoch": 0.3, "learning_rate": 4.400378742888465e-06, "logits/chosen": -2.8777358531951904, "logits/rejected": -2.3460285663604736, "logps/chosen": -127.72879791259766, "logps/rejected": -975.2628173828125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.8256373405456543, "rewards/margins": 8.540739059448242, "rewards/rejected": -9.366376876831055, "step": 25280 }, { "epoch": 0.3, "learning_rate": 4.399699838939728e-06, "logits/chosen": -2.8490216732025146, "logits/rejected": -2.5378763675689697, "logps/chosen": -112.41349029541016, "logps/rejected": -885.8863525390625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7461585998535156, "rewards/margins": 7.741106986999512, "rewards/rejected": -8.487265586853027, "step": 25290 }, { "epoch": 0.3, "learning_rate": 4.399020603308676e-06, "logits/chosen": -2.8246307373046875, "logits/rejected": -2.2790896892547607, "logps/chosen": -126.95310974121094, "logps/rejected": -872.4796752929688, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -0.8061760067939758, "rewards/margins": 7.553067684173584, "rewards/rejected": -8.359243392944336, "step": 25300 }, { "epoch": 0.3, "learning_rate": 4.3983410361139e-06, "logits/chosen": -2.8749353885650635, "logits/rejected": -2.233415365219116, "logps/chosen": -148.0816650390625, "logps/rejected": -1009.5260620117188, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9554370641708374, "rewards/margins": 8.756368637084961, "rewards/rejected": -9.71180534362793, "step": 25310 }, { "epoch": 0.3, "learning_rate": 4.3976611374740515e-06, "logits/chosen": -2.882760524749756, "logits/rejected": -2.2918853759765625, "logps/chosen": -167.4017791748047, "logps/rejected": -894.8585205078125, "loss": 0.1573, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1086466312408447, "rewards/margins": 7.4347991943359375, "rewards/rejected": -8.543444633483887, "step": 25320 }, { "epoch": 0.3, "learning_rate": 4.39698090750784e-06, "logits/chosen": -2.8486814498901367, "logits/rejected": -2.3760311603546143, "logps/chosen": -115.44245910644531, "logps/rejected": -954.7520751953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7399400472640991, "rewards/margins": 8.440640449523926, "rewards/rejected": -9.18057918548584, "step": 25330 }, { "epoch": 0.3, "learning_rate": 4.396300346334031e-06, "logits/chosen": -2.8664193153381348, "logits/rejected": -2.442152261734009, "logps/chosen": -139.5799560546875, "logps/rejected": -950.2337646484375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.8855635523796082, "rewards/margins": 8.250083923339844, "rewards/rejected": -9.13564682006836, "step": 25340 }, { "epoch": 0.3, "learning_rate": 4.395619454071447e-06, "logits/chosen": -2.8664512634277344, "logits/rejected": -2.306347370147705, "logps/chosen": -145.900634765625, "logps/rejected": -968.8396606445312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8507791757583618, "rewards/margins": 8.448834419250488, "rewards/rejected": -9.299613952636719, "step": 25350 }, { "epoch": 0.3, "learning_rate": 4.394938230838972e-06, "logits/chosen": -2.818535566329956, "logits/rejected": -2.0662262439727783, "logps/chosen": -141.19236755371094, "logps/rejected": -1109.26318359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8788067102432251, "rewards/margins": 9.834362030029297, "rewards/rejected": -10.71316909790039, "step": 25360 }, { "epoch": 0.3, "learning_rate": 4.394256676755544e-06, "logits/chosen": -2.847947597503662, "logits/rejected": -2.5775959491729736, "logps/chosen": -120.78006744384766, "logps/rejected": -796.3350830078125, "loss": 0.1513, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7670981287956238, "rewards/margins": 6.833900451660156, "rewards/rejected": -7.600998878479004, "step": 25370 }, { "epoch": 0.3, "learning_rate": 4.393574791940162e-06, "logits/chosen": -2.7939648628234863, "logits/rejected": -2.0468955039978027, "logps/chosen": -152.30551147460938, "logps/rejected": -1152.164306640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9880954027175903, "rewards/margins": 10.128702163696289, "rewards/rejected": -11.116796493530273, "step": 25380 }, { "epoch": 0.3, "learning_rate": 4.39289257651188e-06, "logits/chosen": -2.8811440467834473, "logits/rejected": -2.458207607269287, "logps/chosen": -147.17654418945312, "logps/rejected": -1069.317626953125, "loss": 0.1492, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0038392543792725, "rewards/margins": 9.293469429016113, "rewards/rejected": -10.297307968139648, "step": 25390 }, { "epoch": 0.3, "learning_rate": 4.392210030589812e-06, "logits/chosen": -2.8376951217651367, "logits/rejected": -2.3850975036621094, "logps/chosen": -115.6949462890625, "logps/rejected": -1022.0133666992188, "loss": 0.127, "rewards/accuracies": 1.0, "rewards/chosen": -0.7059248685836792, "rewards/margins": 9.122071266174316, "rewards/rejected": -9.827997207641602, "step": 25400 }, { "epoch": 0.3, "learning_rate": 4.391527154293128e-06, "logits/chosen": -2.8827924728393555, "logits/rejected": -2.3432202339172363, "logps/chosen": -104.19273376464844, "logps/rejected": -930.7623901367188, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.6080432534217834, "rewards/margins": 8.314892768859863, "rewards/rejected": -8.92293643951416, "step": 25410 }, { "epoch": 0.3, "learning_rate": 4.390843947741057e-06, "logits/chosen": -2.8517332077026367, "logits/rejected": -2.387465715408325, "logps/chosen": -131.10569763183594, "logps/rejected": -848.76123046875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.829411506652832, "rewards/margins": 7.279838562011719, "rewards/rejected": -8.10925006866455, "step": 25420 }, { "epoch": 0.3, "learning_rate": 4.390160411052883e-06, "logits/chosen": -2.8817076683044434, "logits/rejected": -2.492694616317749, "logps/chosen": -117.8579330444336, "logps/rejected": -861.2265625, "loss": 0.0264, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7719769477844238, "rewards/margins": 7.469899654388428, "rewards/rejected": -8.241876602172852, "step": 25430 }, { "epoch": 0.3, "learning_rate": 4.3894765443479515e-06, "logits/chosen": -2.848449945449829, "logits/rejected": -2.1077158451080322, "logps/chosen": -146.89813232421875, "logps/rejected": -987.2149658203125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.0044329166412354, "rewards/margins": 8.475442886352539, "rewards/rejected": -9.479875564575195, "step": 25440 }, { "epoch": 0.3, "learning_rate": 4.388792347745663e-06, "logits/chosen": -2.8279261589050293, "logits/rejected": -2.3433525562286377, "logps/chosen": -109.70576477050781, "logps/rejected": -953.1531372070312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6139059066772461, "rewards/margins": 8.542166709899902, "rewards/rejected": -9.156072616577148, "step": 25450 }, { "epoch": 0.3, "learning_rate": 4.388107821365477e-06, "logits/chosen": -2.819469690322876, "logits/rejected": -2.457943916320801, "logps/chosen": -113.7479476928711, "logps/rejected": -947.7361450195312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7106624245643616, "rewards/margins": 8.387714385986328, "rewards/rejected": -9.098376274108887, "step": 25460 }, { "epoch": 0.3, "learning_rate": 4.387422965326909e-06, "logits/chosen": -2.891486406326294, "logits/rejected": -2.395489454269409, "logps/chosen": -105.7376480102539, "logps/rejected": -935.6921997070312, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6237174272537231, "rewards/margins": 8.345062255859375, "rewards/rejected": -8.968780517578125, "step": 25470 }, { "epoch": 0.31, "learning_rate": 4.386737779749533e-06, "logits/chosen": -2.853722333908081, "logits/rejected": -2.1658318042755127, "logps/chosen": -124.70329284667969, "logps/rejected": -1053.399169921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7284871339797974, "rewards/margins": 9.40239429473877, "rewards/rejected": -10.130881309509277, "step": 25480 }, { "epoch": 0.31, "learning_rate": 4.386052264752982e-06, "logits/chosen": -2.9041171073913574, "logits/rejected": -2.1055655479431152, "logps/chosen": -131.13778686523438, "logps/rejected": -1105.33447265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7476514577865601, "rewards/margins": 9.911872863769531, "rewards/rejected": -10.659524917602539, "step": 25490 }, { "epoch": 0.31, "learning_rate": 4.385366420456942e-06, "logits/chosen": -2.859224796295166, "logits/rejected": -2.3721134662628174, "logps/chosen": -115.18891906738281, "logps/rejected": -935.1485595703125, "loss": 0.0871, "rewards/accuracies": 1.0, "rewards/chosen": -0.6678058505058289, "rewards/margins": 8.28618049621582, "rewards/rejected": -8.953985214233398, "step": 25500 }, { "epoch": 0.31, "learning_rate": 4.3846802469811626e-06, "logits/chosen": -2.855161428451538, "logits/rejected": -2.5175251960754395, "logps/chosen": -98.38877868652344, "logps/rejected": -857.9074096679688, "loss": 0.3964, "rewards/accuracies": 1.0, "rewards/chosen": -0.5582862496376038, "rewards/margins": 7.643570899963379, "rewards/rejected": -8.201857566833496, "step": 25510 }, { "epoch": 0.31, "learning_rate": 4.383993744445446e-06, "logits/chosen": -2.895556926727295, "logits/rejected": -2.4222819805145264, "logps/chosen": -98.83324432373047, "logps/rejected": -929.0206909179688, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5467241406440735, "rewards/margins": 8.357897758483887, "rewards/rejected": -8.904623031616211, "step": 25520 }, { "epoch": 0.31, "learning_rate": 4.383306912969655e-06, "logits/chosen": -2.896193742752075, "logits/rejected": -2.460691213607788, "logps/chosen": -113.7043685913086, "logps/rejected": -1005.6741333007812, "loss": 0.028, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6893021464347839, "rewards/margins": 8.963821411132812, "rewards/rejected": -9.65312385559082, "step": 25530 }, { "epoch": 0.31, "learning_rate": 4.382619752673707e-06, "logits/chosen": -2.885470390319824, "logits/rejected": -2.3751893043518066, "logps/chosen": -109.60638427734375, "logps/rejected": -967.0010986328125, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -0.5323373675346375, "rewards/margins": 8.739812850952148, "rewards/rejected": -9.272150039672852, "step": 25540 }, { "epoch": 0.31, "learning_rate": 4.381932263677579e-06, "logits/chosen": -2.901762008666992, "logits/rejected": -2.329777479171753, "logps/chosen": -121.2799072265625, "logps/rejected": -897.5696411132812, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7334896326065063, "rewards/margins": 7.8498334884643555, "rewards/rejected": -8.583322525024414, "step": 25550 }, { "epoch": 0.31, "learning_rate": 4.3812444461013045e-06, "logits/chosen": -2.9056694507598877, "logits/rejected": -2.353248119354248, "logps/chosen": -121.03633880615234, "logps/rejected": -877.0812377929688, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.7082520127296448, "rewards/margins": 7.670393943786621, "rewards/rejected": -8.378645896911621, "step": 25560 }, { "epoch": 0.31, "learning_rate": 4.380556300064975e-06, "logits/chosen": -2.8838775157928467, "logits/rejected": -2.3052685260772705, "logps/chosen": -121.30165100097656, "logps/rejected": -1041.287353515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.716812014579773, "rewards/margins": 9.29780101776123, "rewards/rejected": -10.01461410522461, "step": 25570 }, { "epoch": 0.31, "learning_rate": 4.3798678256887384e-06, "logits/chosen": -2.8477444648742676, "logits/rejected": -2.31289005279541, "logps/chosen": -112.83842468261719, "logps/rejected": -919.8937377929688, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6573570966720581, "rewards/margins": 8.156347274780273, "rewards/rejected": -8.813704490661621, "step": 25580 }, { "epoch": 0.31, "learning_rate": 4.379179023092801e-06, "logits/chosen": -2.884082317352295, "logits/rejected": -2.3431389331817627, "logps/chosen": -125.8740234375, "logps/rejected": -972.4259033203125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7126818895339966, "rewards/margins": 8.58360481262207, "rewards/rejected": -9.296287536621094, "step": 25590 }, { "epoch": 0.31, "learning_rate": 4.378489892397425e-06, "logits/chosen": -2.849315881729126, "logits/rejected": -2.4764719009399414, "logps/chosen": -135.9573974609375, "logps/rejected": -943.67724609375, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.9481590390205383, "rewards/margins": 8.109904289245605, "rewards/rejected": -9.058064460754395, "step": 25600 }, { "epoch": 0.31, "learning_rate": 4.377800433722931e-06, "logits/chosen": -2.861287832260132, "logits/rejected": -2.2554309368133545, "logps/chosen": -117.35591125488281, "logps/rejected": -953.8526611328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7021939158439636, "rewards/margins": 8.455862045288086, "rewards/rejected": -9.158056259155273, "step": 25610 }, { "epoch": 0.31, "learning_rate": 4.377110647189697e-06, "logits/chosen": -2.874803066253662, "logits/rejected": -2.2195327281951904, "logps/chosen": -137.4882354736328, "logps/rejected": -1071.6771240234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8284287452697754, "rewards/margins": 9.498215675354004, "rewards/rejected": -10.326645851135254, "step": 25620 }, { "epoch": 0.31, "learning_rate": 4.376420532918156e-06, "logits/chosen": -2.8134169578552246, "logits/rejected": -2.174419403076172, "logps/chosen": -143.6593017578125, "logps/rejected": -1107.901611328125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9336010217666626, "rewards/margins": 9.731988906860352, "rewards/rejected": -10.66558837890625, "step": 25630 }, { "epoch": 0.31, "learning_rate": 4.375730091028803e-06, "logits/chosen": -2.8846993446350098, "logits/rejected": -2.4223601818084717, "logps/chosen": -109.02359771728516, "logps/rejected": -948.4298095703125, "loss": 0.1295, "rewards/accuracies": 1.0, "rewards/chosen": -0.6252447962760925, "rewards/margins": 8.472227096557617, "rewards/rejected": -9.097472190856934, "step": 25640 }, { "epoch": 0.31, "learning_rate": 4.375039321642185e-06, "logits/chosen": -2.8476028442382812, "logits/rejected": -2.270137310028076, "logps/chosen": -109.4472427368164, "logps/rejected": -977.2308349609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6284942030906677, "rewards/margins": 8.755623817443848, "rewards/rejected": -9.38411808013916, "step": 25650 }, { "epoch": 0.31, "learning_rate": 4.3743482248789096e-06, "logits/chosen": -2.8633036613464355, "logits/rejected": -2.5135350227355957, "logps/chosen": -127.65025329589844, "logps/rejected": -787.2622680664062, "loss": 0.1478, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8778699040412903, "rewards/margins": 6.628798484802246, "rewards/rejected": -7.506669044494629, "step": 25660 }, { "epoch": 0.31, "learning_rate": 4.37365680085964e-06, "logits/chosen": -2.822391986846924, "logits/rejected": -2.308108329772949, "logps/chosen": -130.11038208007812, "logps/rejected": -954.560546875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8247312307357788, "rewards/margins": 8.340229034423828, "rewards/rejected": -9.164960861206055, "step": 25670 }, { "epoch": 0.31, "learning_rate": 4.3729650497050965e-06, "logits/chosen": -2.8387832641601562, "logits/rejected": -2.1779117584228516, "logps/chosen": -178.2975311279297, "logps/rejected": -1055.947509765625, "loss": 0.1139, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2493040561676025, "rewards/margins": 8.908681869506836, "rewards/rejected": -10.157986640930176, "step": 25680 }, { "epoch": 0.31, "learning_rate": 4.3722729715360574e-06, "logits/chosen": -2.848510265350342, "logits/rejected": -2.4295783042907715, "logps/chosen": -101.95653533935547, "logps/rejected": -789.58056640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5887451767921448, "rewards/margins": 6.932786464691162, "rewards/rejected": -7.521531581878662, "step": 25690 }, { "epoch": 0.31, "learning_rate": 4.371580566473357e-06, "logits/chosen": -2.8729166984558105, "logits/rejected": -2.4653432369232178, "logps/chosen": -110.5628433227539, "logps/rejected": -872.9586181640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6603286862373352, "rewards/margins": 7.696025848388672, "rewards/rejected": -8.356353759765625, "step": 25700 }, { "epoch": 0.31, "learning_rate": 4.370887834637888e-06, "logits/chosen": -2.870898485183716, "logits/rejected": -2.2399370670318604, "logps/chosen": -134.52377319335938, "logps/rejected": -956.8446044921875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9191879034042358, "rewards/margins": 8.263012886047363, "rewards/rejected": -9.182199478149414, "step": 25710 }, { "epoch": 0.31, "learning_rate": 4.3701947761506006e-06, "logits/chosen": -2.8737003803253174, "logits/rejected": -2.245032787322998, "logps/chosen": -137.66860961914062, "logps/rejected": -1021.4744262695312, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": -0.8699010014533997, "rewards/margins": 8.957683563232422, "rewards/rejected": -9.827585220336914, "step": 25720 }, { "epoch": 0.31, "learning_rate": 4.369501391132498e-06, "logits/chosen": -2.8631680011749268, "logits/rejected": -2.259979009628296, "logps/chosen": -133.88174438476562, "logps/rejected": -1023.4396362304688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8186389803886414, "rewards/margins": 9.018617630004883, "rewards/rejected": -9.837257385253906, "step": 25730 }, { "epoch": 0.31, "learning_rate": 4.368807679704645e-06, "logits/chosen": -2.8355660438537598, "logits/rejected": -2.2224748134613037, "logps/chosen": -136.3138885498047, "logps/rejected": -991.287109375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8350127935409546, "rewards/margins": 8.694717407226562, "rewards/rejected": -9.529729843139648, "step": 25740 }, { "epoch": 0.31, "learning_rate": 4.368113641988162e-06, "logits/chosen": -2.899308443069458, "logits/rejected": -2.2953200340270996, "logps/chosen": -155.42124938964844, "logps/rejected": -992.3742065429688, "loss": 0.0757, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0833431482315063, "rewards/margins": 8.436923027038574, "rewards/rejected": -9.52026653289795, "step": 25750 }, { "epoch": 0.31, "learning_rate": 4.367419278104225e-06, "logits/chosen": -2.8453102111816406, "logits/rejected": -2.1561007499694824, "logps/chosen": -132.8269500732422, "logps/rejected": -1003.5779418945312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8588545918464661, "rewards/margins": 8.803661346435547, "rewards/rejected": -9.662515640258789, "step": 25760 }, { "epoch": 0.31, "learning_rate": 4.3667245881740685e-06, "logits/chosen": -2.769308567047119, "logits/rejected": -1.9356666803359985, "logps/chosen": -158.8428497314453, "logps/rejected": -1061.426025390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0590282678604126, "rewards/margins": 9.142843246459961, "rewards/rejected": -10.201871871948242, "step": 25770 }, { "epoch": 0.31, "learning_rate": 4.3660295723189845e-06, "logits/chosen": -2.8745648860931396, "logits/rejected": -2.5886974334716797, "logps/chosen": -93.84703063964844, "logps/rejected": -821.1856689453125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5489484071731567, "rewards/margins": 7.293181419372559, "rewards/rejected": -7.8421311378479, "step": 25780 }, { "epoch": 0.31, "learning_rate": 4.365334230660318e-06, "logits/chosen": -2.8575973510742188, "logits/rejected": -2.5728487968444824, "logps/chosen": -109.45365905761719, "logps/rejected": -824.2706298828125, "loss": 0.169, "rewards/accuracies": 1.0, "rewards/chosen": -0.7003733515739441, "rewards/margins": 7.179088592529297, "rewards/rejected": -7.879461765289307, "step": 25790 }, { "epoch": 0.31, "learning_rate": 4.364638563319478e-06, "logits/chosen": -2.9338998794555664, "logits/rejected": -2.5848424434661865, "logps/chosen": -109.20247650146484, "logps/rejected": -882.7271728515625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6755245923995972, "rewards/margins": 7.782122611999512, "rewards/rejected": -8.457647323608398, "step": 25800 }, { "epoch": 0.31, "learning_rate": 4.3639425704179226e-06, "logits/chosen": -2.8434441089630127, "logits/rejected": -2.2481021881103516, "logps/chosen": -136.55398559570312, "logps/rejected": -1049.700927734375, "loss": 0.0194, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8537734746932983, "rewards/margins": 9.256360054016113, "rewards/rejected": -10.110133171081543, "step": 25810 }, { "epoch": 0.31, "learning_rate": 4.36324625207717e-06, "logits/chosen": -2.877129316329956, "logits/rejected": -2.331974983215332, "logps/chosen": -110.34736633300781, "logps/rejected": -864.75146484375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6387189626693726, "rewards/margins": 7.629149436950684, "rewards/rejected": -8.267868041992188, "step": 25820 }, { "epoch": 0.31, "learning_rate": 4.362549608418799e-06, "logits/chosen": -2.873234748840332, "logits/rejected": -2.4167098999023438, "logps/chosen": -118.8527603149414, "logps/rejected": -894.0256958007812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7054109573364258, "rewards/margins": 7.865988254547119, "rewards/rejected": -8.571398735046387, "step": 25830 }, { "epoch": 0.31, "learning_rate": 4.361852639564438e-06, "logits/chosen": -2.8967108726501465, "logits/rejected": -2.1921377182006836, "logps/chosen": -133.85581970214844, "logps/rejected": -1073.3509521484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8544057011604309, "rewards/margins": 9.469306945800781, "rewards/rejected": -10.323712348937988, "step": 25840 }, { "epoch": 0.31, "learning_rate": 4.361155345635777e-06, "logits/chosen": -2.8967719078063965, "logits/rejected": -2.4458518028259277, "logps/chosen": -119.95231628417969, "logps/rejected": -948.0260620117188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.763551652431488, "rewards/margins": 8.329081535339355, "rewards/rejected": -9.092634201049805, "step": 25850 }, { "epoch": 0.31, "learning_rate": 4.3604577267545625e-06, "logits/chosen": -2.8553085327148438, "logits/rejected": -2.131591320037842, "logps/chosen": -158.0055694580078, "logps/rejected": -1184.4930419921875, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.9906138181686401, "rewards/margins": 10.454948425292969, "rewards/rejected": -11.445563316345215, "step": 25860 }, { "epoch": 0.31, "learning_rate": 4.359759783042596e-06, "logits/chosen": -2.882673740386963, "logits/rejected": -2.446901798248291, "logps/chosen": -104.6046371459961, "logps/rejected": -907.6497192382812, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -0.6268624067306519, "rewards/margins": 8.070838928222656, "rewards/rejected": -8.697701454162598, "step": 25870 }, { "epoch": 0.31, "learning_rate": 4.359061514621736e-06, "logits/chosen": -2.832618474960327, "logits/rejected": -2.2095179557800293, "logps/chosen": -143.92007446289062, "logps/rejected": -1054.4598388671875, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": -0.9507503509521484, "rewards/margins": 9.198831558227539, "rewards/rejected": -10.149581909179688, "step": 25880 }, { "epoch": 0.31, "learning_rate": 4.3583629216139e-06, "logits/chosen": -2.813105583190918, "logits/rejected": -2.236407518386841, "logps/chosen": -144.43788146972656, "logps/rejected": -1031.3253173828125, "loss": 0.1441, "rewards/accuracies": 1.0, "rewards/chosen": -0.9428094625473022, "rewards/margins": 8.98126220703125, "rewards/rejected": -9.924071311950684, "step": 25890 }, { "epoch": 0.31, "learning_rate": 4.357664004141058e-06, "logits/chosen": -2.8748514652252197, "logits/rejected": -2.3892598152160645, "logps/chosen": -147.03097534179688, "logps/rejected": -1074.9356689453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9829269647598267, "rewards/margins": 9.371578216552734, "rewards/rejected": -10.354504585266113, "step": 25900 }, { "epoch": 0.31, "learning_rate": 4.356964762325242e-06, "logits/chosen": -2.8549418449401855, "logits/rejected": -2.2058186531066895, "logps/chosen": -153.03057861328125, "logps/rejected": -954.5699462890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0352387428283691, "rewards/margins": 8.111554145812988, "rewards/rejected": -9.1467924118042, "step": 25910 }, { "epoch": 0.31, "learning_rate": 4.356265196288535e-06, "logits/chosen": -2.8374266624450684, "logits/rejected": -2.1403968334198, "logps/chosen": -145.8101806640625, "logps/rejected": -1062.64404296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9695646166801453, "rewards/margins": 9.270977973937988, "rewards/rejected": -10.240543365478516, "step": 25920 }, { "epoch": 0.31, "learning_rate": 4.3555653061530805e-06, "logits/chosen": -2.8607420921325684, "logits/rejected": -2.2747344970703125, "logps/chosen": -128.5349578857422, "logps/rejected": -1082.498291015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8159918785095215, "rewards/margins": 9.61512565612793, "rewards/rejected": -10.43111801147461, "step": 25930 }, { "epoch": 0.31, "learning_rate": 4.3548650920410765e-06, "logits/chosen": -2.851365566253662, "logits/rejected": -2.255446195602417, "logps/chosen": -155.67005920410156, "logps/rejected": -1028.87255859375, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": -1.1300348043441772, "rewards/margins": 8.770580291748047, "rewards/rejected": -9.900614738464355, "step": 25940 }, { "epoch": 0.31, "learning_rate": 4.354164554074779e-06, "logits/chosen": -2.8359203338623047, "logits/rejected": -2.511098623275757, "logps/chosen": -115.81919860839844, "logps/rejected": -886.6467895507812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7846499681472778, "rewards/margins": 7.702301979064941, "rewards/rejected": -8.486950874328613, "step": 25950 }, { "epoch": 0.31, "learning_rate": 4.353463692376501e-06, "logits/chosen": -2.887570858001709, "logits/rejected": -2.0265109539031982, "logps/chosen": -157.56478881835938, "logps/rejected": -1121.166259765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0251235961914062, "rewards/margins": 9.766530990600586, "rewards/rejected": -10.791654586791992, "step": 25960 }, { "epoch": 0.31, "learning_rate": 4.352762507068609e-06, "logits/chosen": -2.882960319519043, "logits/rejected": -2.305882692337036, "logps/chosen": -125.68257904052734, "logps/rejected": -1114.135009765625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7263770699501038, "rewards/margins": 10.028461456298828, "rewards/rejected": -10.754838943481445, "step": 25970 }, { "epoch": 0.31, "learning_rate": 4.352060998273529e-06, "logits/chosen": -2.862840175628662, "logits/rejected": -2.1131184101104736, "logps/chosen": -154.72689819335938, "logps/rejected": -1037.349853515625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0172126293182373, "rewards/margins": 8.94247817993164, "rewards/rejected": -9.959692001342773, "step": 25980 }, { "epoch": 0.31, "learning_rate": 4.3513591661137445e-06, "logits/chosen": -2.8047211170196533, "logits/rejected": -2.0450949668884277, "logps/chosen": -154.99166870117188, "logps/rejected": -981.5498046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.016337513923645, "rewards/margins": 8.400890350341797, "rewards/rejected": -9.417226791381836, "step": 25990 }, { "epoch": 0.31, "learning_rate": 4.35065701071179e-06, "logits/chosen": -2.834103584289551, "logits/rejected": -2.049621105194092, "logps/chosen": -156.29119873046875, "logps/rejected": -1319.513916015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.035936713218689, "rewards/margins": 11.756731033325195, "rewards/rejected": -12.792668342590332, "step": 26000 }, { "epoch": 0.31, "learning_rate": 4.349954532190262e-06, "logits/chosen": -2.8722643852233887, "logits/rejected": -2.5458409786224365, "logps/chosen": -120.90199279785156, "logps/rejected": -836.1456909179688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8108073472976685, "rewards/margins": 7.1795501708984375, "rewards/rejected": -7.990358829498291, "step": 26010 }, { "epoch": 0.31, "learning_rate": 4.34925173067181e-06, "logits/chosen": -2.8437252044677734, "logits/rejected": -2.1902015209198, "logps/chosen": -147.60311889648438, "logps/rejected": -1039.567626953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9624050259590149, "rewards/margins": 9.035548210144043, "rewards/rejected": -9.997954368591309, "step": 26020 }, { "epoch": 0.31, "learning_rate": 4.3485486062791426e-06, "logits/chosen": -2.865818500518799, "logits/rejected": -2.2541446685791016, "logps/chosen": -135.05043029785156, "logps/rejected": -1012.0769653320312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8979272842407227, "rewards/margins": 8.823835372924805, "rewards/rejected": -9.721762657165527, "step": 26030 }, { "epoch": 0.31, "learning_rate": 4.347845159135022e-06, "logits/chosen": -2.8624427318573, "logits/rejected": -2.164421319961548, "logps/chosen": -137.72622680664062, "logps/rejected": -999.8630981445312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8799500465393066, "rewards/margins": 8.734140396118164, "rewards/rejected": -9.614089965820312, "step": 26040 }, { "epoch": 0.31, "learning_rate": 4.34714138936227e-06, "logits/chosen": -2.8955349922180176, "logits/rejected": -2.3803350925445557, "logps/chosen": -129.02880859375, "logps/rejected": -980.3836059570312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7906595468521118, "rewards/margins": 8.623531341552734, "rewards/rejected": -9.414190292358398, "step": 26050 }, { "epoch": 0.31, "learning_rate": 4.346437297083761e-06, "logits/chosen": -2.8902201652526855, "logits/rejected": -2.461883306503296, "logps/chosen": -111.40803527832031, "logps/rejected": -885.4512939453125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6602407097816467, "rewards/margins": 7.815493583679199, "rewards/rejected": -8.475733757019043, "step": 26060 }, { "epoch": 0.31, "learning_rate": 4.345732882422427e-06, "logits/chosen": -2.857516288757324, "logits/rejected": -2.351999044418335, "logps/chosen": -144.0391845703125, "logps/rejected": -938.3599853515625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.9620283842086792, "rewards/margins": 8.029458999633789, "rewards/rejected": -8.991487503051758, "step": 26070 }, { "epoch": 0.31, "learning_rate": 4.34502814550126e-06, "logits/chosen": -2.8652420043945312, "logits/rejected": -2.1789817810058594, "logps/chosen": -130.26937866210938, "logps/rejected": -994.3199462890625, "loss": 0.1968, "rewards/accuracies": 1.0, "rewards/chosen": -0.7763252854347229, "rewards/margins": 8.772176742553711, "rewards/rejected": -9.548503875732422, "step": 26080 }, { "epoch": 0.31, "learning_rate": 4.344323086443302e-06, "logits/chosen": -2.8511929512023926, "logits/rejected": -2.216794729232788, "logps/chosen": -143.00595092773438, "logps/rejected": -991.6728515625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9194871187210083, "rewards/margins": 8.609743118286133, "rewards/rejected": -9.529230117797852, "step": 26090 }, { "epoch": 0.31, "learning_rate": 4.343617705371656e-06, "logits/chosen": -2.897219181060791, "logits/rejected": -2.5112462043762207, "logps/chosen": -114.31742858886719, "logps/rejected": -1021.2088012695312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7036069631576538, "rewards/margins": 9.125357627868652, "rewards/rejected": -9.82896614074707, "step": 26100 }, { "epoch": 0.31, "learning_rate": 4.342912002409478e-06, "logits/chosen": -2.893716812133789, "logits/rejected": -2.3907523155212402, "logps/chosen": -148.32791137695312, "logps/rejected": -996.2242431640625, "loss": 0.1166, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.020073652267456, "rewards/margins": 8.550408363342285, "rewards/rejected": -9.57048225402832, "step": 26110 }, { "epoch": 0.31, "learning_rate": 4.342205977679984e-06, "logits/chosen": -2.849951982498169, "logits/rejected": -2.2692489624023438, "logps/chosen": -125.35267639160156, "logps/rejected": -928.8211059570312, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.7649744153022766, "rewards/margins": 8.123016357421875, "rewards/rejected": -8.887991905212402, "step": 26120 }, { "epoch": 0.31, "learning_rate": 4.341499631306443e-06, "logits/chosen": -2.8403847217559814, "logits/rejected": -2.1741182804107666, "logps/chosen": -138.18736267089844, "logps/rejected": -1001.7546997070312, "loss": 0.1456, "rewards/accuracies": 1.0, "rewards/chosen": -0.8456984758377075, "rewards/margins": 8.769052505493164, "rewards/rejected": -9.614750862121582, "step": 26130 }, { "epoch": 0.31, "learning_rate": 4.340792963412181e-06, "logits/chosen": -2.877267599105835, "logits/rejected": -2.0933985710144043, "logps/chosen": -169.91134643554688, "logps/rejected": -953.2527465820312, "loss": 0.1296, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.184320092201233, "rewards/margins": 7.963408470153809, "rewards/rejected": -9.147729873657227, "step": 26140 }, { "epoch": 0.31, "learning_rate": 4.340085974120578e-06, "logits/chosen": -2.8908965587615967, "logits/rejected": -2.315396308898926, "logps/chosen": -147.0269317626953, "logps/rejected": -948.623046875, "loss": 0.0829, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9654879570007324, "rewards/margins": 8.115943908691406, "rewards/rejected": -9.081430435180664, "step": 26150 }, { "epoch": 0.31, "learning_rate": 4.3393786635550765e-06, "logits/chosen": -2.8382620811462402, "logits/rejected": -2.313910722732544, "logps/chosen": -115.01576232910156, "logps/rejected": -943.9689331054688, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.680419385433197, "rewards/margins": 8.371272087097168, "rewards/rejected": -9.051691055297852, "step": 26160 }, { "epoch": 0.31, "learning_rate": 4.338671031839169e-06, "logits/chosen": -2.8656699657440186, "logits/rejected": -2.6298136711120605, "logps/chosen": -88.16505432128906, "logps/rejected": -837.0711059570312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5379444360733032, "rewards/margins": 7.465717315673828, "rewards/rejected": -8.003661155700684, "step": 26170 }, { "epoch": 0.31, "learning_rate": 4.337963079096405e-06, "logits/chosen": -2.8717234134674072, "logits/rejected": -2.378744602203369, "logps/chosen": -149.20620727539062, "logps/rejected": -955.7735595703125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.0394682884216309, "rewards/margins": 8.141014099121094, "rewards/rejected": -9.180481910705566, "step": 26180 }, { "epoch": 0.31, "learning_rate": 4.337254805450393e-06, "logits/chosen": -2.854438304901123, "logits/rejected": -2.362591505050659, "logps/chosen": -126.5026626586914, "logps/rejected": -963.1281127929688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8173307180404663, "rewards/margins": 8.426717758178711, "rewards/rejected": -9.244049072265625, "step": 26190 }, { "epoch": 0.31, "learning_rate": 4.336546211024794e-06, "logits/chosen": -2.8214073181152344, "logits/rejected": -1.9734996557235718, "logps/chosen": -152.3507537841797, "logps/rejected": -1148.072265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9486855268478394, "rewards/margins": 10.128824234008789, "rewards/rejected": -11.077508926391602, "step": 26200 }, { "epoch": 0.31, "learning_rate": 4.335837295943328e-06, "logits/chosen": -2.8592734336853027, "logits/rejected": -2.4900670051574707, "logps/chosen": -96.87138366699219, "logps/rejected": -836.7840576171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.45617184042930603, "rewards/margins": 7.529702186584473, "rewards/rejected": -7.985874176025391, "step": 26210 }, { "epoch": 0.31, "learning_rate": 4.3351280603297685e-06, "logits/chosen": -2.9013259410858154, "logits/rejected": -2.496861696243286, "logps/chosen": -164.52732849121094, "logps/rejected": -928.1358642578125, "loss": 0.2406, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.180460810661316, "rewards/margins": 7.720693111419678, "rewards/rejected": -8.901153564453125, "step": 26220 }, { "epoch": 0.31, "learning_rate": 4.334418504307947e-06, "logits/chosen": -2.8201541900634766, "logits/rejected": -2.2864747047424316, "logps/chosen": -159.12591552734375, "logps/rejected": -1010.2435302734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0416715145111084, "rewards/margins": 8.673479080200195, "rewards/rejected": -9.715150833129883, "step": 26230 }, { "epoch": 0.31, "learning_rate": 4.333708628001749e-06, "logits/chosen": -2.877755641937256, "logits/rejected": -2.2441318035125732, "logps/chosen": -143.02565002441406, "logps/rejected": -1162.3785400390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9732662439346313, "rewards/margins": 10.259725570678711, "rewards/rejected": -11.232992172241211, "step": 26240 }, { "epoch": 0.31, "learning_rate": 4.332998431535118e-06, "logits/chosen": -2.9193015098571777, "logits/rejected": -2.1471495628356934, "logps/chosen": -142.25155639648438, "logps/rejected": -1061.4564208984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9318424463272095, "rewards/margins": 9.280160903930664, "rewards/rejected": -10.212003707885742, "step": 26250 }, { "epoch": 0.31, "learning_rate": 4.332287915032052e-06, "logits/chosen": -2.8808751106262207, "logits/rejected": -2.2742815017700195, "logps/chosen": -139.6966552734375, "logps/rejected": -1011.1769409179688, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.9044364094734192, "rewards/margins": 8.814738273620605, "rewards/rejected": -9.7191743850708, "step": 26260 }, { "epoch": 0.31, "learning_rate": 4.331577078616605e-06, "logits/chosen": -2.8562510013580322, "logits/rejected": -2.091041326522827, "logps/chosen": -145.89285278320312, "logps/rejected": -991.6654052734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9546915888786316, "rewards/margins": 8.5645112991333, "rewards/rejected": -9.51920223236084, "step": 26270 }, { "epoch": 0.31, "learning_rate": 4.3308659224128855e-06, "logits/chosen": -2.909390687942505, "logits/rejected": -2.566197395324707, "logps/chosen": -170.56625366210938, "logps/rejected": -897.6980590820312, "loss": 0.1542, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2405229806900024, "rewards/margins": 7.36193323135376, "rewards/rejected": -8.602457046508789, "step": 26280 }, { "epoch": 0.31, "learning_rate": 4.330154446545062e-06, "logits/chosen": -2.8904826641082764, "logits/rejected": -2.472276449203491, "logps/chosen": -114.88667297363281, "logps/rejected": -977.4935302734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6744096875190735, "rewards/margins": 8.723987579345703, "rewards/rejected": -9.398397445678711, "step": 26290 }, { "epoch": 0.31, "learning_rate": 4.329442651137355e-06, "logits/chosen": -2.8422727584838867, "logits/rejected": -2.4454822540283203, "logps/chosen": -127.2081527709961, "logps/rejected": -795.7586059570312, "loss": 0.1063, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7608423233032227, "rewards/margins": 6.819700717926025, "rewards/rejected": -7.580543518066406, "step": 26300 }, { "epoch": 0.31, "learning_rate": 4.328730536314043e-06, "logits/chosen": -2.902778148651123, "logits/rejected": -2.491816282272339, "logps/chosen": -131.71163940429688, "logps/rejected": -795.4857177734375, "loss": 0.0935, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8899896740913391, "rewards/margins": 6.699132442474365, "rewards/rejected": -7.589122772216797, "step": 26310 }, { "epoch": 0.32, "learning_rate": 4.328018102199457e-06, "logits/chosen": -2.858332395553589, "logits/rejected": -2.208204507827759, "logps/chosen": -141.54612731933594, "logps/rejected": -1038.350830078125, "loss": 0.0346, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8548539876937866, "rewards/margins": 9.13004207611084, "rewards/rejected": -9.984895706176758, "step": 26320 }, { "epoch": 0.32, "learning_rate": 4.327305348917989e-06, "logits/chosen": -2.877957344055176, "logits/rejected": -2.416472911834717, "logps/chosen": -118.9218978881836, "logps/rejected": -908.5322265625, "loss": 0.0904, "rewards/accuracies": 1.0, "rewards/chosen": -0.7093678116798401, "rewards/margins": 7.984743595123291, "rewards/rejected": -8.694111824035645, "step": 26330 }, { "epoch": 0.32, "learning_rate": 4.3265922765940815e-06, "logits/chosen": -2.874994993209839, "logits/rejected": -2.3740694522857666, "logps/chosen": -142.33432006835938, "logps/rejected": -1083.35888671875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9219774007797241, "rewards/margins": 9.505304336547852, "rewards/rejected": -10.427282333374023, "step": 26340 }, { "epoch": 0.32, "learning_rate": 4.325878885352236e-06, "logits/chosen": -2.8720593452453613, "logits/rejected": -2.529017210006714, "logps/chosen": -106.14726257324219, "logps/rejected": -768.9979248046875, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -0.6935823559761047, "rewards/margins": 6.625855445861816, "rewards/rejected": -7.3194379806518555, "step": 26350 }, { "epoch": 0.32, "learning_rate": 4.325165175317007e-06, "logits/chosen": -2.8912737369537354, "logits/rejected": -2.2827155590057373, "logps/chosen": -164.6175994873047, "logps/rejected": -870.4620971679688, "loss": 0.1036, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0776822566986084, "rewards/margins": 7.2217841148376465, "rewards/rejected": -8.299467086791992, "step": 26360 }, { "epoch": 0.32, "learning_rate": 4.324451146613009e-06, "logits/chosen": -2.872002124786377, "logits/rejected": -2.426400661468506, "logps/chosen": -125.34477233886719, "logps/rejected": -912.9196166992188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8370990753173828, "rewards/margins": 7.903332710266113, "rewards/rejected": -8.74043083190918, "step": 26370 }, { "epoch": 0.32, "learning_rate": 4.323736799364907e-06, "logits/chosen": -2.8731675148010254, "logits/rejected": -2.398150682449341, "logps/chosen": -138.06939697265625, "logps/rejected": -890.279296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8563949465751648, "rewards/margins": 7.676069736480713, "rewards/rejected": -8.532464981079102, "step": 26380 }, { "epoch": 0.32, "learning_rate": 4.323022133697426e-06, "logits/chosen": -2.891728639602661, "logits/rejected": -2.409140110015869, "logps/chosen": -139.13299560546875, "logps/rejected": -894.8019409179688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9388273358345032, "rewards/margins": 7.624202728271484, "rewards/rejected": -8.563029289245605, "step": 26390 }, { "epoch": 0.32, "learning_rate": 4.322307149735343e-06, "logits/chosen": -2.8895211219787598, "logits/rejected": -2.376626491546631, "logps/chosen": -143.26768493652344, "logps/rejected": -1035.676025390625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9373309016227722, "rewards/margins": 9.022530555725098, "rewards/rejected": -9.959860801696777, "step": 26400 }, { "epoch": 0.32, "learning_rate": 4.321591847603493e-06, "logits/chosen": -2.829862356185913, "logits/rejected": -2.1312928199768066, "logps/chosen": -134.77354431152344, "logps/rejected": -910.2951049804688, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8679337501525879, "rewards/margins": 7.842813968658447, "rewards/rejected": -8.710747718811035, "step": 26410 }, { "epoch": 0.32, "learning_rate": 4.3208762274267655e-06, "logits/chosen": -2.9061856269836426, "logits/rejected": -2.32133412361145, "logps/chosen": -147.55337524414062, "logps/rejected": -957.3629760742188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9712041616439819, "rewards/margins": 8.220364570617676, "rewards/rejected": -9.191568374633789, "step": 26420 }, { "epoch": 0.32, "learning_rate": 4.320160289330107e-06, "logits/chosen": -2.902869701385498, "logits/rejected": -2.1302411556243896, "logps/chosen": -178.322509765625, "logps/rejected": -1085.41796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1726716756820679, "rewards/margins": 9.269736289978027, "rewards/rejected": -10.442407608032227, "step": 26430 }, { "epoch": 0.32, "learning_rate": 4.319444033438516e-06, "logits/chosen": -2.897080183029175, "logits/rejected": -2.498478889465332, "logps/chosen": -154.97642517089844, "logps/rejected": -900.1802978515625, "loss": 0.1374, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0845730304718018, "rewards/margins": 7.527738094329834, "rewards/rejected": -8.612311363220215, "step": 26440 }, { "epoch": 0.32, "learning_rate": 4.318727459877051e-06, "logits/chosen": -2.813607931137085, "logits/rejected": -2.307692527770996, "logps/chosen": -148.32809448242188, "logps/rejected": -920.76611328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0296945571899414, "rewards/margins": 7.786606788635254, "rewards/rejected": -8.816301345825195, "step": 26450 }, { "epoch": 0.32, "learning_rate": 4.318010568770822e-06, "logits/chosen": -2.893770933151245, "logits/rejected": -2.4863972663879395, "logps/chosen": -137.83932495117188, "logps/rejected": -901.6195068359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9594482183456421, "rewards/margins": 7.677313327789307, "rewards/rejected": -8.636760711669922, "step": 26460 }, { "epoch": 0.32, "learning_rate": 4.317293360244997e-06, "logits/chosen": -2.8471250534057617, "logits/rejected": -2.330801486968994, "logps/chosen": -127.8653793334961, "logps/rejected": -954.0548706054688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8572174310684204, "rewards/margins": 8.30483341217041, "rewards/rejected": -9.162050247192383, "step": 26470 }, { "epoch": 0.32, "learning_rate": 4.316575834424799e-06, "logits/chosen": -2.8703575134277344, "logits/rejected": -2.2499096393585205, "logps/chosen": -150.41583251953125, "logps/rejected": -979.4118041992188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0176849365234375, "rewards/margins": 8.375687599182129, "rewards/rejected": -9.39337158203125, "step": 26480 }, { "epoch": 0.32, "learning_rate": 4.315857991435506e-06, "logits/chosen": -2.8548054695129395, "logits/rejected": -2.514566421508789, "logps/chosen": -116.3908920288086, "logps/rejected": -918.1716918945312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7355948090553284, "rewards/margins": 8.066011428833008, "rewards/rejected": -8.801605224609375, "step": 26490 }, { "epoch": 0.32, "learning_rate": 4.3151398314024505e-06, "logits/chosen": -2.8366055488586426, "logits/rejected": -2.4673256874084473, "logps/chosen": -146.79759216308594, "logps/rejected": -856.5377197265625, "loss": 0.0456, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.036656379699707, "rewards/margins": 7.163397312164307, "rewards/rejected": -8.200054168701172, "step": 26500 }, { "epoch": 0.32, "learning_rate": 4.314421354451022e-06, "logits/chosen": -2.8844971656799316, "logits/rejected": -2.432588577270508, "logps/chosen": -158.24978637695312, "logps/rejected": -977.8737182617188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1181588172912598, "rewards/margins": 8.255977630615234, "rewards/rejected": -9.374135971069336, "step": 26510 }, { "epoch": 0.32, "learning_rate": 4.313702560706664e-06, "logits/chosen": -2.908294200897217, "logits/rejected": -2.48262095451355, "logps/chosen": -146.09532165527344, "logps/rejected": -863.3346557617188, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9757612943649292, "rewards/margins": 7.278098106384277, "rewards/rejected": -8.253860473632812, "step": 26520 }, { "epoch": 0.32, "learning_rate": 4.312983450294877e-06, "logits/chosen": -2.8869032859802246, "logits/rejected": -2.2244668006896973, "logps/chosen": -163.71524047851562, "logps/rejected": -982.6141357421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0862981081008911, "rewards/margins": 8.349108695983887, "rewards/rejected": -9.435406684875488, "step": 26530 }, { "epoch": 0.32, "learning_rate": 4.312264023341214e-06, "logits/chosen": -2.9000236988067627, "logits/rejected": -2.2058911323547363, "logps/chosen": -221.85336303710938, "logps/rejected": -953.92626953125, "loss": 0.161, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7139517068862915, "rewards/margins": 7.4093780517578125, "rewards/rejected": -9.123329162597656, "step": 26540 }, { "epoch": 0.32, "learning_rate": 4.311544279971286e-06, "logits/chosen": -2.8788442611694336, "logits/rejected": -2.371551990509033, "logps/chosen": -135.18284606933594, "logps/rejected": -989.6571044921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8784546852111816, "rewards/margins": 8.619409561157227, "rewards/rejected": -9.497865676879883, "step": 26550 }, { "epoch": 0.32, "learning_rate": 4.310824220310758e-06, "logits/chosen": -2.871553659439087, "logits/rejected": -2.4460694789886475, "logps/chosen": -136.72860717773438, "logps/rejected": -931.7230224609375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8271929025650024, "rewards/margins": 8.109138488769531, "rewards/rejected": -8.936330795288086, "step": 26560 }, { "epoch": 0.32, "learning_rate": 4.310103844485351e-06, "logits/chosen": -2.858877658843994, "logits/rejected": -2.174652576446533, "logps/chosen": -166.62171936035156, "logps/rejected": -1142.667236328125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.1224459409713745, "rewards/margins": 9.902349472045898, "rewards/rejected": -11.024795532226562, "step": 26570 }, { "epoch": 0.32, "learning_rate": 4.309383152620841e-06, "logits/chosen": -2.85319185256958, "logits/rejected": -2.367663860321045, "logps/chosen": -165.7059783935547, "logps/rejected": -963.69775390625, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -1.1703414916992188, "rewards/margins": 8.084348678588867, "rewards/rejected": -9.254690170288086, "step": 26580 }, { "epoch": 0.32, "learning_rate": 4.308662144843057e-06, "logits/chosen": -2.8496077060699463, "logits/rejected": -2.2449169158935547, "logps/chosen": -177.90902709960938, "logps/rejected": -1037.1234130859375, "loss": 0.1163, "rewards/accuracies": 1.0, "rewards/chosen": -1.2581517696380615, "rewards/margins": 8.704339981079102, "rewards/rejected": -9.962491035461426, "step": 26590 }, { "epoch": 0.32, "learning_rate": 4.307940821277885e-06, "logits/chosen": -2.80566668510437, "logits/rejected": -2.0114500522613525, "logps/chosen": -239.4880828857422, "logps/rejected": -1131.2171630859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8691812753677368, "rewards/margins": 9.035964965820312, "rewards/rejected": -10.905147552490234, "step": 26600 }, { "epoch": 0.32, "learning_rate": 4.307219182051268e-06, "logits/chosen": -2.8089818954467773, "logits/rejected": -2.1018266677856445, "logps/chosen": -190.1416473388672, "logps/rejected": -1132.1741943359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4387671947479248, "rewards/margins": 9.478073120117188, "rewards/rejected": -10.916840553283691, "step": 26610 }, { "epoch": 0.32, "learning_rate": 4.3064972272892e-06, "logits/chosen": -2.7889931201934814, "logits/rejected": -2.010739803314209, "logps/chosen": -189.5944366455078, "logps/rejected": -1168.799560546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3600190877914429, "rewards/margins": 9.926948547363281, "rewards/rejected": -11.286968231201172, "step": 26620 }, { "epoch": 0.32, "learning_rate": 4.305774957117735e-06, "logits/chosen": -2.8376336097717285, "logits/rejected": -2.2415881156921387, "logps/chosen": -192.500244140625, "logps/rejected": -1005.4583129882812, "loss": 0.1618, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4561011791229248, "rewards/margins": 8.206470489501953, "rewards/rejected": -9.662571907043457, "step": 26630 }, { "epoch": 0.32, "learning_rate": 4.305052371662977e-06, "logits/chosen": -2.8250184059143066, "logits/rejected": -2.022928237915039, "logps/chosen": -204.58311462402344, "logps/rejected": -1106.0579833984375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4405300617218018, "rewards/margins": 9.218790054321289, "rewards/rejected": -10.659319877624512, "step": 26640 }, { "epoch": 0.32, "learning_rate": 4.304329471051088e-06, "logits/chosen": -2.8005270957946777, "logits/rejected": -2.169747829437256, "logps/chosen": -178.8899688720703, "logps/rejected": -1138.904052734375, "loss": 0.0818, "rewards/accuracies": 1.0, "rewards/chosen": -1.3251906633377075, "rewards/margins": 9.666698455810547, "rewards/rejected": -10.991888046264648, "step": 26650 }, { "epoch": 0.32, "learning_rate": 4.303606255408285e-06, "logits/chosen": -2.829056978225708, "logits/rejected": -2.3162002563476562, "logps/chosen": -171.2238006591797, "logps/rejected": -1090.169677734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2588553428649902, "rewards/margins": 9.260350227355957, "rewards/rejected": -10.519205093383789, "step": 26660 }, { "epoch": 0.32, "learning_rate": 4.30288272486084e-06, "logits/chosen": -2.8986222743988037, "logits/rejected": -2.478215456008911, "logps/chosen": -172.50294494628906, "logps/rejected": -922.38037109375, "loss": 0.1135, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3134304285049438, "rewards/margins": 7.518074989318848, "rewards/rejected": -8.831506729125977, "step": 26670 }, { "epoch": 0.32, "learning_rate": 4.302158879535076e-06, "logits/chosen": -2.8925492763519287, "logits/rejected": -2.4683594703674316, "logps/chosen": -150.93228149414062, "logps/rejected": -944.30078125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0764188766479492, "rewards/margins": 7.981712341308594, "rewards/rejected": -9.058130264282227, "step": 26680 }, { "epoch": 0.32, "learning_rate": 4.301434719557379e-06, "logits/chosen": -2.883079767227173, "logits/rejected": -2.2345705032348633, "logps/chosen": -201.13491821289062, "logps/rejected": -988.2384033203125, "loss": 0.0691, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5431625843048096, "rewards/margins": 7.9677863121032715, "rewards/rejected": -9.51094913482666, "step": 26690 }, { "epoch": 0.32, "learning_rate": 4.300710245054183e-06, "logits/chosen": -2.836193323135376, "logits/rejected": -2.3564517498016357, "logps/chosen": -192.3487091064453, "logps/rejected": -1012.1732177734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.4400848150253296, "rewards/margins": 8.285070419311523, "rewards/rejected": -9.7251558303833, "step": 26700 }, { "epoch": 0.32, "learning_rate": 4.299985456151979e-06, "logits/chosen": -2.836515188217163, "logits/rejected": -2.012813091278076, "logps/chosen": -222.4154510498047, "logps/rejected": -1144.154541015625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.6637321710586548, "rewards/margins": 9.36052417755127, "rewards/rejected": -11.024256706237793, "step": 26710 }, { "epoch": 0.32, "learning_rate": 4.299260352977313e-06, "logits/chosen": -2.8466744422912598, "logits/rejected": -2.3736796379089355, "logps/chosen": -170.17446899414062, "logps/rejected": -930.9334716796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.2138601541519165, "rewards/margins": 7.700470924377441, "rewards/rejected": -8.914331436157227, "step": 26720 }, { "epoch": 0.32, "learning_rate": 4.298534935656788e-06, "logits/chosen": -2.8299190998077393, "logits/rejected": -2.0993716716766357, "logps/chosen": -187.94908142089844, "logps/rejected": -1147.0557861328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3738592863082886, "rewards/margins": 9.6790771484375, "rewards/rejected": -11.052936553955078, "step": 26730 }, { "epoch": 0.32, "learning_rate": 4.2978092043170585e-06, "logits/chosen": -2.8097245693206787, "logits/rejected": -2.392650842666626, "logps/chosen": -136.74241638183594, "logps/rejected": -928.9464721679688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9564149975776672, "rewards/margins": 7.95181131362915, "rewards/rejected": -8.908226013183594, "step": 26740 }, { "epoch": 0.32, "learning_rate": 4.297083159084834e-06, "logits/chosen": -2.8063249588012695, "logits/rejected": -2.0295567512512207, "logps/chosen": -168.95571899414062, "logps/rejected": -1086.3349609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1613960266113281, "rewards/margins": 9.302705764770508, "rewards/rejected": -10.464101791381836, "step": 26750 }, { "epoch": 0.32, "learning_rate": 4.296356800086882e-06, "logits/chosen": -2.880103588104248, "logits/rejected": -2.431119441986084, "logps/chosen": -134.0780487060547, "logps/rejected": -1000.0540161132812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8910223245620728, "rewards/margins": 8.732016563415527, "rewards/rejected": -9.623040199279785, "step": 26760 }, { "epoch": 0.32, "learning_rate": 4.2956301274500215e-06, "logits/chosen": -2.8406176567077637, "logits/rejected": -2.1668803691864014, "logps/chosen": -157.7211456298828, "logps/rejected": -967.0857543945312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0967605113983154, "rewards/margins": 8.173023223876953, "rewards/rejected": -9.269783020019531, "step": 26770 }, { "epoch": 0.32, "learning_rate": 4.2949031413011274e-06, "logits/chosen": -2.851508378982544, "logits/rejected": -2.546599864959717, "logps/chosen": -117.70771789550781, "logps/rejected": -887.44189453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8035515546798706, "rewards/margins": 7.7077836990356445, "rewards/rejected": -8.511335372924805, "step": 26780 }, { "epoch": 0.32, "learning_rate": 4.294175841767131e-06, "logits/chosen": -2.8944694995880127, "logits/rejected": -2.3401918411254883, "logps/chosen": -171.6027374267578, "logps/rejected": -924.8099365234375, "loss": 0.1229, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2694485187530518, "rewards/margins": 7.598977088928223, "rewards/rejected": -8.868425369262695, "step": 26790 }, { "epoch": 0.32, "learning_rate": 4.293448228975015e-06, "logits/chosen": -2.88799786567688, "logits/rejected": -2.246453046798706, "logps/chosen": -171.6615447998047, "logps/rejected": -1101.61962890625, "loss": 0.1175, "rewards/accuracies": 1.0, "rewards/chosen": -1.198119044303894, "rewards/margins": 9.412071228027344, "rewards/rejected": -10.610189437866211, "step": 26800 }, { "epoch": 0.32, "learning_rate": 4.292720303051819e-06, "logits/chosen": -2.8074073791503906, "logits/rejected": -2.1577703952789307, "logps/chosen": -168.83621215820312, "logps/rejected": -994.8648681640625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.2309125661849976, "rewards/margins": 8.32957935333252, "rewards/rejected": -9.560491561889648, "step": 26810 }, { "epoch": 0.32, "learning_rate": 4.291992064124638e-06, "logits/chosen": -2.8466858863830566, "logits/rejected": -2.238569974899292, "logps/chosen": -157.1160430908203, "logps/rejected": -1088.21044921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1214308738708496, "rewards/margins": 9.375816345214844, "rewards/rejected": -10.497247695922852, "step": 26820 }, { "epoch": 0.32, "learning_rate": 4.2912635123206194e-06, "logits/chosen": -2.836949348449707, "logits/rejected": -2.1454017162323, "logps/chosen": -165.4131317138672, "logps/rejected": -1140.6610107421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1938059329986572, "rewards/margins": 9.817501068115234, "rewards/rejected": -11.011306762695312, "step": 26830 }, { "epoch": 0.32, "learning_rate": 4.290534647766966e-06, "logits/chosen": -2.8616943359375, "logits/rejected": -2.4062323570251465, "logps/chosen": -157.42909240722656, "logps/rejected": -944.3802490234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1109100580215454, "rewards/margins": 7.927023887634277, "rewards/rejected": -9.037935256958008, "step": 26840 }, { "epoch": 0.32, "learning_rate": 4.2898054705909365e-06, "logits/chosen": -2.863199234008789, "logits/rejected": -2.452892303466797, "logps/chosen": -166.4605712890625, "logps/rejected": -912.0227661132812, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.2466185092926025, "rewards/margins": 7.502072334289551, "rewards/rejected": -8.74869155883789, "step": 26850 }, { "epoch": 0.32, "learning_rate": 4.289075980919843e-06, "logits/chosen": -2.852574110031128, "logits/rejected": -2.074843168258667, "logps/chosen": -199.45401000976562, "logps/rejected": -1062.7547607421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4114325046539307, "rewards/margins": 8.797929763793945, "rewards/rejected": -10.20936107635498, "step": 26860 }, { "epoch": 0.32, "learning_rate": 4.288346178881052e-06, "logits/chosen": -2.8399765491485596, "logits/rejected": -2.111527919769287, "logps/chosen": -172.70584106445312, "logps/rejected": -1031.719482421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2304332256317139, "rewards/margins": 8.691086769104004, "rewards/rejected": -9.921521186828613, "step": 26870 }, { "epoch": 0.32, "learning_rate": 4.287616064601985e-06, "logits/chosen": -2.8433098793029785, "logits/rejected": -2.2490856647491455, "logps/chosen": -155.30313110351562, "logps/rejected": -1007.5276489257812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.075911045074463, "rewards/margins": 8.611145973205566, "rewards/rejected": -9.687057495117188, "step": 26880 }, { "epoch": 0.32, "learning_rate": 4.2868856382101186e-06, "logits/chosen": -2.881187677383423, "logits/rejected": -2.230266571044922, "logps/chosen": -150.4808807373047, "logps/rejected": -958.6744995117188, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0244364738464355, "rewards/margins": 8.180183410644531, "rewards/rejected": -9.204619407653809, "step": 26890 }, { "epoch": 0.32, "learning_rate": 4.286154899832981e-06, "logits/chosen": -2.834517240524292, "logits/rejected": -2.1500260829925537, "logps/chosen": -159.20480346679688, "logps/rejected": -953.7013549804688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0762449502944946, "rewards/margins": 8.068760871887207, "rewards/rejected": -9.145005226135254, "step": 26900 }, { "epoch": 0.32, "learning_rate": 4.28542384959816e-06, "logits/chosen": -2.903069496154785, "logits/rejected": -2.2424449920654297, "logps/chosen": -159.45379638671875, "logps/rejected": -975.6363525390625, "loss": 0.1677, "rewards/accuracies": 1.0, "rewards/chosen": -1.0933818817138672, "rewards/margins": 8.289738655090332, "rewards/rejected": -9.383118629455566, "step": 26910 }, { "epoch": 0.32, "learning_rate": 4.284692487633294e-06, "logits/chosen": -2.848829984664917, "logits/rejected": -1.9097576141357422, "logps/chosen": -212.33468627929688, "logps/rejected": -1147.387939453125, "loss": 0.1083, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5624336004257202, "rewards/margins": 9.507284164428711, "rewards/rejected": -11.069717407226562, "step": 26920 }, { "epoch": 0.32, "learning_rate": 4.283960814066076e-06, "logits/chosen": -2.866729259490967, "logits/rejected": -2.1577744483947754, "logps/chosen": -155.64207458496094, "logps/rejected": -999.0550537109375, "loss": 0.1346, "rewards/accuracies": 1.0, "rewards/chosen": -1.0326061248779297, "rewards/margins": 8.56602668762207, "rewards/rejected": -9.598633766174316, "step": 26930 }, { "epoch": 0.32, "learning_rate": 4.283228829024255e-06, "logits/chosen": -2.8579049110412598, "logits/rejected": -2.1593220233917236, "logps/chosen": -148.79312133789062, "logps/rejected": -975.8973388671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.960955798625946, "rewards/margins": 8.404687881469727, "rewards/rejected": -9.365643501281738, "step": 26940 }, { "epoch": 0.32, "learning_rate": 4.282496532635633e-06, "logits/chosen": -2.8791017532348633, "logits/rejected": -2.4956777095794678, "logps/chosen": -117.0339126586914, "logps/rejected": -846.1019287109375, "loss": 0.1194, "rewards/accuracies": 1.0, "rewards/chosen": -0.7433302998542786, "rewards/margins": 7.335906982421875, "rewards/rejected": -8.079237937927246, "step": 26950 }, { "epoch": 0.32, "learning_rate": 4.281763925028066e-06, "logits/chosen": -2.849565029144287, "logits/rejected": -2.3479180335998535, "logps/chosen": -154.8985137939453, "logps/rejected": -974.0719604492188, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.0200040340423584, "rewards/margins": 8.32102108001709, "rewards/rejected": -9.341026306152344, "step": 26960 }, { "epoch": 0.32, "learning_rate": 4.281031006329467e-06, "logits/chosen": -2.8774595260620117, "logits/rejected": -2.353679656982422, "logps/chosen": -131.50173950195312, "logps/rejected": -914.4328002929688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8752440214157104, "rewards/margins": 7.896660804748535, "rewards/rejected": -8.771903991699219, "step": 26970 }, { "epoch": 0.32, "learning_rate": 4.2802977766678015e-06, "logits/chosen": -2.901932954788208, "logits/rejected": -2.45334792137146, "logps/chosen": -148.0310821533203, "logps/rejected": -873.5787963867188, "loss": 0.0261, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0091559886932373, "rewards/margins": 7.353582859039307, "rewards/rejected": -8.362738609313965, "step": 26980 }, { "epoch": 0.32, "learning_rate": 4.279564236171088e-06, "logits/chosen": -2.8455655574798584, "logits/rejected": -2.5126636028289795, "logps/chosen": -166.16409301757812, "logps/rejected": -751.947021484375, "loss": 0.2459, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2757253646850586, "rewards/margins": 5.8683271408081055, "rewards/rejected": -7.144052028656006, "step": 26990 }, { "epoch": 0.32, "learning_rate": 4.278830384967402e-06, "logits/chosen": -2.895920515060425, "logits/rejected": -2.4961771965026855, "logps/chosen": -128.16549682617188, "logps/rejected": -902.2908325195312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8752721548080444, "rewards/margins": 7.769583225250244, "rewards/rejected": -8.644854545593262, "step": 27000 }, { "epoch": 0.32, "eval_logits/chosen": -2.860414505004883, "eval_logits/rejected": -1.7613921165466309, "eval_logps/chosen": -275.9768371582031, "eval_logps/rejected": -1113.1796875, "eval_loss": 0.0018522178288549185, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -2.147965431213379, "eval_rewards/margins": 8.516576766967773, "eval_rewards/rejected": -10.664542198181152, "eval_runtime": 1.2159, "eval_samples_per_second": 4.112, "eval_steps_per_second": 2.467, "step": 27000 }, { "epoch": 0.32, "learning_rate": 4.278096223184871e-06, "logits/chosen": -2.8184103965759277, "logits/rejected": -1.917778730392456, "logps/chosen": -166.48489379882812, "logps/rejected": -1044.0047607421875, "loss": 0.1166, "rewards/accuracies": 1.0, "rewards/chosen": -1.166245698928833, "rewards/margins": 8.877954483032227, "rewards/rejected": -10.04419994354248, "step": 27010 }, { "epoch": 0.32, "learning_rate": 4.277361750951677e-06, "logits/chosen": -2.846794366836548, "logits/rejected": -2.2574474811553955, "logps/chosen": -148.43753051757812, "logps/rejected": -973.22119140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0497303009033203, "rewards/margins": 8.298447608947754, "rewards/rejected": -9.348177909851074, "step": 27020 }, { "epoch": 0.32, "learning_rate": 4.276626968396059e-06, "logits/chosen": -2.850644588470459, "logits/rejected": -2.162432909011841, "logps/chosen": -205.0295867919922, "logps/rejected": -1056.5394287109375, "loss": 0.0592, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.56100594997406, "rewards/margins": 8.601457595825195, "rewards/rejected": -10.16246509552002, "step": 27030 }, { "epoch": 0.32, "learning_rate": 4.275891875646307e-06, "logits/chosen": -2.8732168674468994, "logits/rejected": -2.29927134513855, "logps/chosen": -150.34829711914062, "logps/rejected": -961.7911987304688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.0391043424606323, "rewards/margins": 8.201830863952637, "rewards/rejected": -9.240934371948242, "step": 27040 }, { "epoch": 0.32, "learning_rate": 4.275156472830765e-06, "logits/chosen": -2.838001012802124, "logits/rejected": -2.3130478858947754, "logps/chosen": -166.35964965820312, "logps/rejected": -1037.589111328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2037489414215088, "rewards/margins": 8.784011840820312, "rewards/rejected": -9.987760543823242, "step": 27050 }, { "epoch": 0.32, "learning_rate": 4.274420760077832e-06, "logits/chosen": -2.866668701171875, "logits/rejected": -2.3725316524505615, "logps/chosen": -152.4726104736328, "logps/rejected": -930.2235107421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.091593861579895, "rewards/margins": 7.824734687805176, "rewards/rejected": -8.916328430175781, "step": 27060 }, { "epoch": 0.32, "learning_rate": 4.273684737515964e-06, "logits/chosen": -2.864370822906494, "logits/rejected": -2.041508197784424, "logps/chosen": -194.99293518066406, "logps/rejected": -1055.7415771484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3344436883926392, "rewards/margins": 8.810834884643555, "rewards/rejected": -10.14527702331543, "step": 27070 }, { "epoch": 0.32, "learning_rate": 4.2729484052736665e-06, "logits/chosen": -2.8430798053741455, "logits/rejected": -2.336369037628174, "logps/chosen": -165.815185546875, "logps/rejected": -879.5901489257812, "loss": 0.1141, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2007038593292236, "rewards/margins": 7.2121992111206055, "rewards/rejected": -8.41290283203125, "step": 27080 }, { "epoch": 0.32, "learning_rate": 4.272211763479503e-06, "logits/chosen": -2.778357982635498, "logits/rejected": -2.0723342895507812, "logps/chosen": -192.99661254882812, "logps/rejected": -1019.0211791992188, "loss": 0.1203, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4229075908660889, "rewards/margins": 8.367267608642578, "rewards/rejected": -9.790175437927246, "step": 27090 }, { "epoch": 0.32, "learning_rate": 4.271474812262087e-06, "logits/chosen": -2.855503559112549, "logits/rejected": -2.3140087127685547, "logps/chosen": -184.35757446289062, "logps/rejected": -994.84521484375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.3473451137542725, "rewards/margins": 8.209895133972168, "rewards/rejected": -9.55724048614502, "step": 27100 }, { "epoch": 0.32, "learning_rate": 4.2707375517500875e-06, "logits/chosen": -2.896712303161621, "logits/rejected": -2.4870009422302246, "logps/chosen": -119.09187316894531, "logps/rejected": -821.6912231445312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7926130294799805, "rewards/margins": 7.057408809661865, "rewards/rejected": -7.850022315979004, "step": 27110 }, { "epoch": 0.32, "learning_rate": 4.269999982072231e-06, "logits/chosen": -2.8936688899993896, "logits/rejected": -2.4181838035583496, "logps/chosen": -138.36300659179688, "logps/rejected": -822.8558349609375, "loss": 0.0364, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9434863328933716, "rewards/margins": 6.913725852966309, "rewards/rejected": -7.857213020324707, "step": 27120 }, { "epoch": 0.32, "learning_rate": 4.269262103357292e-06, "logits/chosen": -2.856968402862549, "logits/rejected": -2.0275416374206543, "logps/chosen": -195.11019897460938, "logps/rejected": -1149.589111328125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.3272498846054077, "rewards/margins": 9.741581916809082, "rewards/rejected": -11.068831443786621, "step": 27130 }, { "epoch": 0.32, "learning_rate": 4.268523915734105e-06, "logits/chosen": -2.871340751647949, "logits/rejected": -2.400292158126831, "logps/chosen": -172.4004669189453, "logps/rejected": -948.4861450195312, "loss": 0.1644, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2593798637390137, "rewards/margins": 7.827649116516113, "rewards/rejected": -9.087028503417969, "step": 27140 }, { "epoch": 0.32, "learning_rate": 4.267785419331554e-06, "logits/chosen": -2.876814842224121, "logits/rejected": -2.499746799468994, "logps/chosen": -131.20162963867188, "logps/rejected": -905.7420654296875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9243763089179993, "rewards/margins": 7.749610900878906, "rewards/rejected": -8.673988342285156, "step": 27150 }, { "epoch": 0.33, "learning_rate": 4.267046614278578e-06, "logits/chosen": -2.9065022468566895, "logits/rejected": -2.6669888496398926, "logps/chosen": -97.9891128540039, "logps/rejected": -754.9495849609375, "loss": 0.0201, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6232994198799133, "rewards/margins": 6.565244197845459, "rewards/rejected": -7.188543796539307, "step": 27160 }, { "epoch": 0.33, "learning_rate": 4.266307500704171e-06, "logits/chosen": -2.8813414573669434, "logits/rejected": -2.372817039489746, "logps/chosen": -168.77516174316406, "logps/rejected": -994.6203002929688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.1961495876312256, "rewards/margins": 8.366128921508789, "rewards/rejected": -9.562278747558594, "step": 27170 }, { "epoch": 0.33, "learning_rate": 4.265568078737381e-06, "logits/chosen": -2.855201482772827, "logits/rejected": -2.410050868988037, "logps/chosen": -140.10433959960938, "logps/rejected": -907.7955322265625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.9396840333938599, "rewards/margins": 7.764674186706543, "rewards/rejected": -8.704358100891113, "step": 27180 }, { "epoch": 0.33, "learning_rate": 4.264828348507307e-06, "logits/chosen": -2.8562800884246826, "logits/rejected": -2.557523012161255, "logps/chosen": -116.83475494384766, "logps/rejected": -807.5606689453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8027432560920715, "rewards/margins": 6.89209508895874, "rewards/rejected": -7.694838523864746, "step": 27190 }, { "epoch": 0.33, "learning_rate": 4.264088310143105e-06, "logits/chosen": -2.8389334678649902, "logits/rejected": -2.3519349098205566, "logps/chosen": -165.89175415039062, "logps/rejected": -1078.268798828125, "loss": 0.1549, "rewards/accuracies": 1.0, "rewards/chosen": -1.1321935653686523, "rewards/margins": 9.259446144104004, "rewards/rejected": -10.391639709472656, "step": 27200 }, { "epoch": 0.33, "learning_rate": 4.263347963773985e-06, "logits/chosen": -2.844254970550537, "logits/rejected": -2.1961326599121094, "logps/chosen": -148.15524291992188, "logps/rejected": -1046.0513916015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9773964881896973, "rewards/margins": 9.073667526245117, "rewards/rejected": -10.051063537597656, "step": 27210 }, { "epoch": 0.33, "learning_rate": 4.2626073095292065e-06, "logits/chosen": -2.8727993965148926, "logits/rejected": -2.617828845977783, "logps/chosen": -111.83621978759766, "logps/rejected": -864.0408935546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7465523481369019, "rewards/margins": 7.522193908691406, "rewards/rejected": -8.268746376037598, "step": 27220 }, { "epoch": 0.33, "learning_rate": 4.2618663475380885e-06, "logits/chosen": -2.8019371032714844, "logits/rejected": -2.189444065093994, "logps/chosen": -171.8207550048828, "logps/rejected": -1148.688232421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.141353726387024, "rewards/margins": 9.943995475769043, "rewards/rejected": -11.085350036621094, "step": 27230 }, { "epoch": 0.33, "learning_rate": 4.261125077929998e-06, "logits/chosen": -2.8075618743896484, "logits/rejected": -2.2934274673461914, "logps/chosen": -144.82302856445312, "logps/rejected": -1058.322998046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0082050561904907, "rewards/margins": 9.189851760864258, "rewards/rejected": -10.1980562210083, "step": 27240 }, { "epoch": 0.33, "learning_rate": 4.260383500834361e-06, "logits/chosen": -2.8966281414031982, "logits/rejected": -2.36426043510437, "logps/chosen": -147.5418243408203, "logps/rejected": -974.8698120117188, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.9870814085006714, "rewards/margins": 8.364965438842773, "rewards/rejected": -9.352045059204102, "step": 27250 }, { "epoch": 0.33, "learning_rate": 4.259641616380654e-06, "logits/chosen": -2.8798470497131348, "logits/rejected": -2.552015781402588, "logps/chosen": -133.65829467773438, "logps/rejected": -865.5886840820312, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.9294564127922058, "rewards/margins": 7.346545219421387, "rewards/rejected": -8.276002883911133, "step": 27260 }, { "epoch": 0.33, "learning_rate": 4.258899424698408e-06, "logits/chosen": -2.8349032402038574, "logits/rejected": -2.150448799133301, "logps/chosen": -170.81912231445312, "logps/rejected": -1143.7467041015625, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -1.2629824876785278, "rewards/margins": 9.803110122680664, "rewards/rejected": -11.066092491149902, "step": 27270 }, { "epoch": 0.33, "learning_rate": 4.2581569259172084e-06, "logits/chosen": -2.865307331085205, "logits/rejected": -2.0969479084014893, "logps/chosen": -176.60720825195312, "logps/rejected": -1165.2762451171875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.2494535446166992, "rewards/margins": 9.989062309265137, "rewards/rejected": -11.23851490020752, "step": 27280 }, { "epoch": 0.33, "learning_rate": 4.2574141201666915e-06, "logits/chosen": -2.917107343673706, "logits/rejected": -2.23760724067688, "logps/chosen": -188.4457550048828, "logps/rejected": -912.9691162109375, "loss": 0.1453, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3731552362442017, "rewards/margins": 7.355914115905762, "rewards/rejected": -8.7290678024292, "step": 27290 }, { "epoch": 0.33, "learning_rate": 4.2566710075765506e-06, "logits/chosen": -2.866147518157959, "logits/rejected": -2.312101125717163, "logps/chosen": -145.1536865234375, "logps/rejected": -944.6597900390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0382031202316284, "rewards/margins": 8.010607719421387, "rewards/rejected": -9.048810958862305, "step": 27300 }, { "epoch": 0.33, "learning_rate": 4.255927588276531e-06, "logits/chosen": -2.8399147987365723, "logits/rejected": -2.1644387245178223, "logps/chosen": -175.57351684570312, "logps/rejected": -1102.3526611328125, "loss": 0.1447, "rewards/accuracies": 1.0, "rewards/chosen": -1.1974124908447266, "rewards/margins": 9.42703628540039, "rewards/rejected": -10.624448776245117, "step": 27310 }, { "epoch": 0.33, "learning_rate": 4.2551838623964305e-06, "logits/chosen": -2.84576416015625, "logits/rejected": -2.40639328956604, "logps/chosen": -140.84219360351562, "logps/rejected": -917.3987426757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9684313535690308, "rewards/margins": 7.821045875549316, "rewards/rejected": -8.789477348327637, "step": 27320 }, { "epoch": 0.33, "learning_rate": 4.2544398300661034e-06, "logits/chosen": -2.884486436843872, "logits/rejected": -2.3618412017822266, "logps/chosen": -135.88037109375, "logps/rejected": -934.0924072265625, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.9145196676254272, "rewards/margins": 8.04994010925293, "rewards/rejected": -8.964460372924805, "step": 27330 }, { "epoch": 0.33, "learning_rate": 4.253695491415455e-06, "logits/chosen": -2.8239126205444336, "logits/rejected": -2.2046027183532715, "logps/chosen": -167.87033081054688, "logps/rejected": -1000.8216552734375, "loss": 0.1601, "rewards/accuracies": 1.0, "rewards/chosen": -1.1821482181549072, "rewards/margins": 8.426358222961426, "rewards/rejected": -9.608506202697754, "step": 27340 }, { "epoch": 0.33, "learning_rate": 4.252950846574443e-06, "logits/chosen": -2.87569522857666, "logits/rejected": -2.2714412212371826, "logps/chosen": -166.81387329101562, "logps/rejected": -882.4035034179688, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.1490552425384521, "rewards/margins": 7.295505523681641, "rewards/rejected": -8.444561958312988, "step": 27350 }, { "epoch": 0.33, "learning_rate": 4.2522058956730825e-06, "logits/chosen": -2.89985990524292, "logits/rejected": -2.3475403785705566, "logps/chosen": -153.19859313964844, "logps/rejected": -959.7423095703125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.0341894626617432, "rewards/margins": 8.160520553588867, "rewards/rejected": -9.194710731506348, "step": 27360 }, { "epoch": 0.33, "learning_rate": 4.25146063884144e-06, "logits/chosen": -2.840686321258545, "logits/rejected": -2.1255602836608887, "logps/chosen": -170.2237548828125, "logps/rejected": -1102.1435546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1729154586791992, "rewards/margins": 9.45509147644043, "rewards/rejected": -10.628005981445312, "step": 27370 }, { "epoch": 0.33, "learning_rate": 4.250715076209633e-06, "logits/chosen": -2.875462770462036, "logits/rejected": -2.507850170135498, "logps/chosen": -145.09469604492188, "logps/rejected": -905.7911376953125, "loss": 0.2502, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.042332410812378, "rewards/margins": 7.63573694229126, "rewards/rejected": -8.678068161010742, "step": 27380 }, { "epoch": 0.33, "learning_rate": 4.249969207907837e-06, "logits/chosen": -2.8762331008911133, "logits/rejected": -2.169006824493408, "logps/chosen": -179.69677734375, "logps/rejected": -1106.015380859375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2032862901687622, "rewards/margins": 9.454562187194824, "rewards/rejected": -10.657849311828613, "step": 27390 }, { "epoch": 0.33, "learning_rate": 4.249223034066276e-06, "logits/chosen": -2.851716995239258, "logits/rejected": -2.169548749923706, "logps/chosen": -179.78131103515625, "logps/rejected": -1090.4853515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.241375207901001, "rewards/margins": 9.266326904296875, "rewards/rejected": -10.507702827453613, "step": 27400 }, { "epoch": 0.33, "learning_rate": 4.248476554815234e-06, "logits/chosen": -2.8084399700164795, "logits/rejected": -2.4315404891967773, "logps/chosen": -128.1970977783203, "logps/rejected": -863.4696044921875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8565271496772766, "rewards/margins": 7.402491569519043, "rewards/rejected": -8.259017944335938, "step": 27410 }, { "epoch": 0.33, "learning_rate": 4.24772977028504e-06, "logits/chosen": -2.797679901123047, "logits/rejected": -2.2365174293518066, "logps/chosen": -212.14852905273438, "logps/rejected": -925.9762573242188, "loss": 0.258, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.642068862915039, "rewards/margins": 7.2291998863220215, "rewards/rejected": -8.871269226074219, "step": 27420 }, { "epoch": 0.33, "learning_rate": 4.2469826806060834e-06, "logits/chosen": -2.862795829772949, "logits/rejected": -2.2077343463897705, "logps/chosen": -137.29385375976562, "logps/rejected": -979.9442138671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9185732007026672, "rewards/margins": 8.495084762573242, "rewards/rejected": -9.41365909576416, "step": 27430 }, { "epoch": 0.33, "learning_rate": 4.246235285908802e-06, "logits/chosen": -2.91808819770813, "logits/rejected": -2.109738826751709, "logps/chosen": -154.69187927246094, "logps/rejected": -1020.8707275390625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0254216194152832, "rewards/margins": 8.787006378173828, "rewards/rejected": -9.812426567077637, "step": 27440 }, { "epoch": 0.33, "learning_rate": 4.24548758632369e-06, "logits/chosen": -2.881347894668579, "logits/rejected": -2.4514732360839844, "logps/chosen": -134.6140899658203, "logps/rejected": -998.5220947265625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8848705291748047, "rewards/margins": 8.706355094909668, "rewards/rejected": -9.591226577758789, "step": 27450 }, { "epoch": 0.33, "learning_rate": 4.244739581981293e-06, "logits/chosen": -2.879004716873169, "logits/rejected": -2.26674747467041, "logps/chosen": -161.65011596679688, "logps/rejected": -959.5535278320312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.1188623905181885, "rewards/margins": 8.083963394165039, "rewards/rejected": -9.202826499938965, "step": 27460 }, { "epoch": 0.33, "learning_rate": 4.243991273012211e-06, "logits/chosen": -2.8803956508636475, "logits/rejected": -2.678116798400879, "logps/chosen": -104.0801010131836, "logps/rejected": -835.6173706054688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6690118312835693, "rewards/margins": 7.334500312805176, "rewards/rejected": -8.003511428833008, "step": 27470 }, { "epoch": 0.33, "learning_rate": 4.243242659547096e-06, "logits/chosen": -2.9090750217437744, "logits/rejected": -2.5229153633117676, "logps/chosen": -115.5831298828125, "logps/rejected": -936.2445068359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7430830001831055, "rewards/margins": 8.230489730834961, "rewards/rejected": -8.973572731018066, "step": 27480 }, { "epoch": 0.33, "learning_rate": 4.242493741716656e-06, "logits/chosen": -2.8935203552246094, "logits/rejected": -2.6183929443359375, "logps/chosen": -108.8540267944336, "logps/rejected": -854.1842041015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.679848313331604, "rewards/margins": 7.4929704666137695, "rewards/rejected": -8.172819137573242, "step": 27490 }, { "epoch": 0.33, "learning_rate": 4.241744519651647e-06, "logits/chosen": -2.8783926963806152, "logits/rejected": -2.284998655319214, "logps/chosen": -138.7719268798828, "logps/rejected": -1039.509521484375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9175409078598022, "rewards/margins": 9.091510772705078, "rewards/rejected": -10.009051322937012, "step": 27500 }, { "epoch": 0.33, "learning_rate": 4.240994993482882e-06, "logits/chosen": -2.8824617862701416, "logits/rejected": -2.3400707244873047, "logps/chosen": -134.2917022705078, "logps/rejected": -884.6477661132812, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -0.8353185653686523, "rewards/margins": 7.634271144866943, "rewards/rejected": -8.469589233398438, "step": 27510 }, { "epoch": 0.33, "learning_rate": 4.2402451633412265e-06, "logits/chosen": -2.8664920330047607, "logits/rejected": -2.603546619415283, "logps/chosen": -113.25297546386719, "logps/rejected": -860.8363037109375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7513056993484497, "rewards/margins": 7.482306003570557, "rewards/rejected": -8.233612060546875, "step": 27520 }, { "epoch": 0.33, "learning_rate": 4.2394950293576e-06, "logits/chosen": -2.8084795475006104, "logits/rejected": -2.355236530303955, "logps/chosen": -150.52224731445312, "logps/rejected": -985.3582763671875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0357182025909424, "rewards/margins": 8.430511474609375, "rewards/rejected": -9.466229438781738, "step": 27530 }, { "epoch": 0.33, "learning_rate": 4.238744591662971e-06, "logits/chosen": -2.846478223800659, "logits/rejected": -2.134976625442505, "logps/chosen": -147.8943634033203, "logps/rejected": -1016.4368286132812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0360556840896606, "rewards/margins": 8.739145278930664, "rewards/rejected": -9.775201797485352, "step": 27540 }, { "epoch": 0.33, "learning_rate": 4.2379938503883666e-06, "logits/chosen": -2.8613357543945312, "logits/rejected": -2.413268804550171, "logps/chosen": -139.54725646972656, "logps/rejected": -896.7174072265625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.9119018316268921, "rewards/margins": 7.650628566741943, "rewards/rejected": -8.562530517578125, "step": 27550 }, { "epoch": 0.33, "learning_rate": 4.237242805664863e-06, "logits/chosen": -2.863285541534424, "logits/rejected": -2.3682522773742676, "logps/chosen": -154.61439514160156, "logps/rejected": -945.7825317382812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.0504090785980225, "rewards/margins": 8.010553359985352, "rewards/rejected": -9.060961723327637, "step": 27560 }, { "epoch": 0.33, "learning_rate": 4.2364914576235906e-06, "logits/chosen": -2.828030586242676, "logits/rejected": -2.153226613998413, "logps/chosen": -155.0169677734375, "logps/rejected": -975.1486206054688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.086335301399231, "rewards/margins": 8.26413345336914, "rewards/rejected": -9.350468635559082, "step": 27570 }, { "epoch": 0.33, "learning_rate": 4.235739806395732e-06, "logits/chosen": -2.825148344039917, "logits/rejected": -2.299643039703369, "logps/chosen": -165.5185546875, "logps/rejected": -1031.951904296875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2092803716659546, "rewards/margins": 8.718694686889648, "rewards/rejected": -9.927974700927734, "step": 27580 }, { "epoch": 0.33, "learning_rate": 4.234987852112524e-06, "logits/chosen": -2.8840858936309814, "logits/rejected": -2.5294651985168457, "logps/chosen": -174.8788299560547, "logps/rejected": -886.7904052734375, "loss": 0.1249, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3386478424072266, "rewards/margins": 7.158747673034668, "rewards/rejected": -8.497395515441895, "step": 27590 }, { "epoch": 0.33, "learning_rate": 4.234235594905256e-06, "logits/chosen": -2.8270015716552734, "logits/rejected": -2.322627544403076, "logps/chosen": -143.04408264160156, "logps/rejected": -894.3502807617188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9671632647514343, "rewards/margins": 7.603279113769531, "rewards/rejected": -8.570442199707031, "step": 27600 }, { "epoch": 0.33, "learning_rate": 4.233483034905269e-06, "logits/chosen": -2.8687503337860107, "logits/rejected": -2.3823461532592773, "logps/chosen": -150.36781311035156, "logps/rejected": -956.1917724609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0509649515151978, "rewards/margins": 8.121445655822754, "rewards/rejected": -9.172409057617188, "step": 27610 }, { "epoch": 0.33, "learning_rate": 4.23273017224396e-06, "logits/chosen": -2.847991943359375, "logits/rejected": -2.3206124305725098, "logps/chosen": -177.21737670898438, "logps/rejected": -1022.0333251953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2979304790496826, "rewards/margins": 8.5224609375, "rewards/rejected": -9.820391654968262, "step": 27620 }, { "epoch": 0.33, "learning_rate": 4.231977007052774e-06, "logits/chosen": -2.8588061332702637, "logits/rejected": -2.351173162460327, "logps/chosen": -179.22996520996094, "logps/rejected": -967.9446411132812, "loss": 0.0847, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3327362537384033, "rewards/margins": 7.967179298400879, "rewards/rejected": -9.299914360046387, "step": 27630 }, { "epoch": 0.33, "learning_rate": 4.231223539463214e-06, "logits/chosen": -2.868149995803833, "logits/rejected": -2.488250255584717, "logps/chosen": -168.96737670898438, "logps/rejected": -949.0309448242188, "loss": 0.115, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2042486667633057, "rewards/margins": 7.893517971038818, "rewards/rejected": -9.097765922546387, "step": 27640 }, { "epoch": 0.33, "learning_rate": 4.230469769606832e-06, "logits/chosen": -2.8451778888702393, "logits/rejected": -2.2207274436950684, "logps/chosen": -181.74188232421875, "logps/rejected": -1011.4172973632812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.2733145952224731, "rewards/margins": 8.455906867980957, "rewards/rejected": -9.72922134399414, "step": 27650 }, { "epoch": 0.33, "learning_rate": 4.229715697615234e-06, "logits/chosen": -2.8354530334472656, "logits/rejected": -2.204545021057129, "logps/chosen": -186.11416625976562, "logps/rejected": -1096.638916015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.331974983215332, "rewards/margins": 9.241400718688965, "rewards/rejected": -10.573373794555664, "step": 27660 }, { "epoch": 0.33, "learning_rate": 4.22896132362008e-06, "logits/chosen": -2.8546884059906006, "logits/rejected": -2.4855237007141113, "logps/chosen": -167.36428833007812, "logps/rejected": -874.1521606445312, "loss": 0.1539, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2005083560943604, "rewards/margins": 7.160821437835693, "rewards/rejected": -8.361330032348633, "step": 27670 }, { "epoch": 0.33, "learning_rate": 4.2282066477530806e-06, "logits/chosen": -2.8395981788635254, "logits/rejected": -2.446019172668457, "logps/chosen": -138.6258544921875, "logps/rejected": -977.8128662109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9597203135490417, "rewards/margins": 8.425275802612305, "rewards/rejected": -9.38499641418457, "step": 27680 }, { "epoch": 0.33, "learning_rate": 4.2274516701460005e-06, "logits/chosen": -2.8966116905212402, "logits/rejected": -2.3782474994659424, "logps/chosen": -155.75820922851562, "logps/rejected": -988.5823974609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1418507099151611, "rewards/margins": 8.343647003173828, "rewards/rejected": -9.485498428344727, "step": 27690 }, { "epoch": 0.33, "learning_rate": 4.226696390930657e-06, "logits/chosen": -2.873805522918701, "logits/rejected": -2.2722644805908203, "logps/chosen": -170.18789672851562, "logps/rejected": -1176.19873046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2060283422470093, "rewards/margins": 10.138749122619629, "rewards/rejected": -11.344776153564453, "step": 27700 }, { "epoch": 0.33, "learning_rate": 4.225940810238918e-06, "logits/chosen": -2.89685320854187, "logits/rejected": -2.4425859451293945, "logps/chosen": -142.67520141601562, "logps/rejected": -994.60888671875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.9931705594062805, "rewards/margins": 8.565263748168945, "rewards/rejected": -9.558435440063477, "step": 27710 }, { "epoch": 0.33, "learning_rate": 4.225184928202708e-06, "logits/chosen": -2.8729846477508545, "logits/rejected": -2.4066500663757324, "logps/chosen": -152.13369750976562, "logps/rejected": -940.3917846679688, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.0435985326766968, "rewards/margins": 7.954093933105469, "rewards/rejected": -8.997693061828613, "step": 27720 }, { "epoch": 0.33, "learning_rate": 4.224428744954001e-06, "logits/chosen": -2.8430047035217285, "logits/rejected": -2.2428207397460938, "logps/chosen": -155.06968688964844, "logps/rejected": -1025.0123291015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0775456428527832, "rewards/margins": 8.778306007385254, "rewards/rejected": -9.855851173400879, "step": 27730 }, { "epoch": 0.33, "learning_rate": 4.2236722606248245e-06, "logits/chosen": -2.8870933055877686, "logits/rejected": -2.280410051345825, "logps/chosen": -156.83200073242188, "logps/rejected": -962.2814331054688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0937626361846924, "rewards/margins": 8.1448335647583, "rewards/rejected": -9.238595962524414, "step": 27740 }, { "epoch": 0.33, "learning_rate": 4.222915475347258e-06, "logits/chosen": -2.8190269470214844, "logits/rejected": -2.1631131172180176, "logps/chosen": -163.20474243164062, "logps/rejected": -1014.8983154296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0695165395736694, "rewards/margins": 8.683935165405273, "rewards/rejected": -9.753451347351074, "step": 27750 }, { "epoch": 0.33, "learning_rate": 4.222158389253435e-06, "logits/chosen": -2.853996992111206, "logits/rejected": -2.2737767696380615, "logps/chosen": -157.63490295410156, "logps/rejected": -1103.7447509765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1031521558761597, "rewards/margins": 9.535606384277344, "rewards/rejected": -10.638758659362793, "step": 27760 }, { "epoch": 0.33, "learning_rate": 4.221401002475541e-06, "logits/chosen": -2.8839564323425293, "logits/rejected": -2.3731963634490967, "logps/chosen": -180.9061279296875, "logps/rejected": -979.0550537109375, "loss": 0.1348, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3612041473388672, "rewards/margins": 8.025152206420898, "rewards/rejected": -9.38635540008545, "step": 27770 }, { "epoch": 0.33, "learning_rate": 4.220643315145813e-06, "logits/chosen": -2.800234317779541, "logits/rejected": -1.903624176979065, "logps/chosen": -222.0723876953125, "logps/rejected": -1143.5267333984375, "loss": 0.0918, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6944103240966797, "rewards/margins": 9.324445724487305, "rewards/rejected": -11.018856048583984, "step": 27780 }, { "epoch": 0.33, "learning_rate": 4.21988532739654e-06, "logits/chosen": -2.842714548110962, "logits/rejected": -2.3810267448425293, "logps/chosen": -145.955810546875, "logps/rejected": -1029.259521484375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0020751953125, "rewards/margins": 8.896617889404297, "rewards/rejected": -9.898693084716797, "step": 27790 }, { "epoch": 0.33, "learning_rate": 4.2191270393600656e-06, "logits/chosen": -2.863189220428467, "logits/rejected": -2.4201295375823975, "logps/chosen": -132.13571166992188, "logps/rejected": -994.5716552734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9097695350646973, "rewards/margins": 8.651670455932617, "rewards/rejected": -9.561440467834473, "step": 27800 }, { "epoch": 0.33, "learning_rate": 4.218368451168786e-06, "logits/chosen": -2.8647446632385254, "logits/rejected": -2.383817434310913, "logps/chosen": -153.30538940429688, "logps/rejected": -1000.3668212890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0703155994415283, "rewards/margins": 8.534473419189453, "rewards/rejected": -9.604789733886719, "step": 27810 }, { "epoch": 0.33, "learning_rate": 4.217609562955146e-06, "logits/chosen": -2.8795523643493652, "logits/rejected": -2.5106585025787354, "logps/chosen": -123.36006927490234, "logps/rejected": -845.6832275390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8240712881088257, "rewards/margins": 7.261805057525635, "rewards/rejected": -8.08587646484375, "step": 27820 }, { "epoch": 0.33, "learning_rate": 4.216850374851648e-06, "logits/chosen": -2.8313441276550293, "logits/rejected": -2.236114263534546, "logps/chosen": -155.72531127929688, "logps/rejected": -1088.6717529296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.004342794418335, "rewards/margins": 9.47977352142334, "rewards/rejected": -10.48411750793457, "step": 27830 }, { "epoch": 0.33, "learning_rate": 4.2160908869908426e-06, "logits/chosen": -2.853370189666748, "logits/rejected": -2.1810152530670166, "logps/chosen": -147.72921752929688, "logps/rejected": -1097.7410888671875, "loss": 0.1462, "rewards/accuracies": 1.0, "rewards/chosen": -0.9577093124389648, "rewards/margins": 9.632498741149902, "rewards/rejected": -10.590208053588867, "step": 27840 }, { "epoch": 0.33, "learning_rate": 4.215331099505334e-06, "logits/chosen": -2.837510108947754, "logits/rejected": -2.2334399223327637, "logps/chosen": -145.15338134765625, "logps/rejected": -1070.1497802734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0032243728637695, "rewards/margins": 9.285146713256836, "rewards/rejected": -10.288371086120605, "step": 27850 }, { "epoch": 0.33, "learning_rate": 4.21457101252778e-06, "logits/chosen": -2.830439329147339, "logits/rejected": -2.176298141479492, "logps/chosen": -141.1463623046875, "logps/rejected": -967.6891479492188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9050313830375671, "rewards/margins": 8.380156517028809, "rewards/rejected": -9.285187721252441, "step": 27860 }, { "epoch": 0.33, "learning_rate": 4.2138106261908895e-06, "logits/chosen": -2.8612265586853027, "logits/rejected": -2.486933469772339, "logps/chosen": -157.4488067626953, "logps/rejected": -855.6915893554688, "loss": 0.2463, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1525022983551025, "rewards/margins": 7.032189846038818, "rewards/rejected": -8.184691429138184, "step": 27870 }, { "epoch": 0.33, "learning_rate": 4.213049940627423e-06, "logits/chosen": -2.878352165222168, "logits/rejected": -2.0797340869903564, "logps/chosen": -141.7783966064453, "logps/rejected": -1031.155029296875, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": -0.9322888255119324, "rewards/margins": 8.98314094543457, "rewards/rejected": -9.91542911529541, "step": 27880 }, { "epoch": 0.33, "learning_rate": 4.212288955970195e-06, "logits/chosen": -2.874206066131592, "logits/rejected": -2.387045383453369, "logps/chosen": -127.02320861816406, "logps/rejected": -943.2510986328125, "loss": 0.1185, "rewards/accuracies": 1.0, "rewards/chosen": -0.8101357221603394, "rewards/margins": 8.24156379699707, "rewards/rejected": -9.051700592041016, "step": 27890 }, { "epoch": 0.33, "learning_rate": 4.211527672352071e-06, "logits/chosen": -2.8841662406921387, "logits/rejected": -2.3111672401428223, "logps/chosen": -124.59690856933594, "logps/rejected": -1016.767578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7096593379974365, "rewards/margins": 9.046900749206543, "rewards/rejected": -9.756559371948242, "step": 27900 }, { "epoch": 0.33, "learning_rate": 4.210766089905969e-06, "logits/chosen": -2.8837764263153076, "logits/rejected": -2.71476674079895, "logps/chosen": -86.86759185791016, "logps/rejected": -740.3260498046875, "loss": 0.0997, "rewards/accuracies": 1.0, "rewards/chosen": -0.5204012393951416, "rewards/margins": 6.51797342300415, "rewards/rejected": -7.038374423980713, "step": 27910 }, { "epoch": 0.33, "learning_rate": 4.210004208764858e-06, "logits/chosen": -2.885986804962158, "logits/rejected": -2.2672600746154785, "logps/chosen": -129.68850708007812, "logps/rejected": -964.2587890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8584462404251099, "rewards/margins": 8.385957717895508, "rewards/rejected": -9.244405746459961, "step": 27920 }, { "epoch": 0.33, "learning_rate": 4.209242029061763e-06, "logits/chosen": -2.8938164710998535, "logits/rejected": -2.45918607711792, "logps/chosen": -110.02040100097656, "logps/rejected": -894.5218505859375, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -0.6601782441139221, "rewards/margins": 7.902838706970215, "rewards/rejected": -8.563017845153809, "step": 27930 }, { "epoch": 0.33, "learning_rate": 4.208479550929756e-06, "logits/chosen": -2.8193256855010986, "logits/rejected": -2.279693365097046, "logps/chosen": -121.4762191772461, "logps/rejected": -994.1774291992188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7838600873947144, "rewards/margins": 8.773513793945312, "rewards/rejected": -9.557374000549316, "step": 27940 }, { "epoch": 0.33, "learning_rate": 4.207716774501966e-06, "logits/chosen": -2.8410303592681885, "logits/rejected": -2.3799004554748535, "logps/chosen": -137.0364532470703, "logps/rejected": -967.9967041015625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.9414355158805847, "rewards/margins": 8.348092079162598, "rewards/rejected": -9.28952693939209, "step": 27950 }, { "epoch": 0.33, "learning_rate": 4.206953699911569e-06, "logits/chosen": -2.8523786067962646, "logits/rejected": -2.3650405406951904, "logps/chosen": -156.3009796142578, "logps/rejected": -922.8123779296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0907859802246094, "rewards/margins": 7.7524919509887695, "rewards/rejected": -8.843278884887695, "step": 27960 }, { "epoch": 0.33, "learning_rate": 4.206190327291797e-06, "logits/chosen": -2.840423583984375, "logits/rejected": -2.18991756439209, "logps/chosen": -187.9404754638672, "logps/rejected": -1047.8392333984375, "loss": 0.1184, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4233574867248535, "rewards/margins": 8.66666030883789, "rewards/rejected": -10.090017318725586, "step": 27970 }, { "epoch": 0.33, "learning_rate": 4.205426656775932e-06, "logits/chosen": -2.8575408458709717, "logits/rejected": -2.2891769409179688, "logps/chosen": -155.1347198486328, "logps/rejected": -997.9778442382812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1075313091278076, "rewards/margins": 8.471973419189453, "rewards/rejected": -9.579504013061523, "step": 27980 }, { "epoch": 0.34, "learning_rate": 4.20466268849731e-06, "logits/chosen": -2.864319324493408, "logits/rejected": -2.4100680351257324, "logps/chosen": -143.7706756591797, "logps/rejected": -943.2371826171875, "loss": 0.1002, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.957955539226532, "rewards/margins": 8.083344459533691, "rewards/rejected": -9.041299819946289, "step": 27990 }, { "epoch": 0.34, "learning_rate": 4.2038984225893165e-06, "logits/chosen": -2.8282432556152344, "logits/rejected": -2.0913760662078857, "logps/chosen": -166.92318725585938, "logps/rejected": -1000.0110473632812, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.1664352416992188, "rewards/margins": 8.432541847229004, "rewards/rejected": -9.598978042602539, "step": 28000 }, { "epoch": 0.34, "learning_rate": 4.203133859185391e-06, "logits/chosen": -2.8753421306610107, "logits/rejected": -2.4506828784942627, "logps/chosen": -115.52938079833984, "logps/rejected": -834.2216796875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6928824782371521, "rewards/margins": 7.264540672302246, "rewards/rejected": -7.957423210144043, "step": 28010 }, { "epoch": 0.34, "learning_rate": 4.202368998419024e-06, "logits/chosen": -2.9068686962127686, "logits/rejected": -2.4385008811950684, "logps/chosen": -135.02157592773438, "logps/rejected": -887.07861328125, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -0.8605610132217407, "rewards/margins": 7.638319969177246, "rewards/rejected": -8.498880386352539, "step": 28020 }, { "epoch": 0.34, "learning_rate": 4.201603840423759e-06, "logits/chosen": -2.8373820781707764, "logits/rejected": -2.1123738288879395, "logps/chosen": -156.17819213867188, "logps/rejected": -1091.73828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0122472047805786, "rewards/margins": 9.507705688476562, "rewards/rejected": -10.519952774047852, "step": 28030 }, { "epoch": 0.34, "learning_rate": 4.200838385333188e-06, "logits/chosen": -2.835764169692993, "logits/rejected": -2.3133633136749268, "logps/chosen": -139.73509216308594, "logps/rejected": -1058.6298828125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9747819900512695, "rewards/margins": 9.22055721282959, "rewards/rejected": -10.195340156555176, "step": 28040 }, { "epoch": 0.34, "learning_rate": 4.200072633280959e-06, "logits/chosen": -2.854060173034668, "logits/rejected": -2.193927049636841, "logps/chosen": -153.89108276367188, "logps/rejected": -1065.3138427734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0367729663848877, "rewards/margins": 9.211912155151367, "rewards/rejected": -10.248684883117676, "step": 28050 }, { "epoch": 0.34, "learning_rate": 4.199306584400771e-06, "logits/chosen": -2.8951656818389893, "logits/rejected": -1.9130741357803345, "logps/chosen": -186.67005920410156, "logps/rejected": -1113.7325439453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2816632986068726, "rewards/margins": 9.451475143432617, "rewards/rejected": -10.733139038085938, "step": 28060 }, { "epoch": 0.34, "learning_rate": 4.198540238826372e-06, "logits/chosen": -2.853922128677368, "logits/rejected": -2.342238187789917, "logps/chosen": -134.71815490722656, "logps/rejected": -941.0919799804688, "loss": 0.0189, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8564281463623047, "rewards/margins": 8.175840377807617, "rewards/rejected": -9.032267570495605, "step": 28070 }, { "epoch": 0.34, "learning_rate": 4.197773596691565e-06, "logits/chosen": -2.8633675575256348, "logits/rejected": -2.4364490509033203, "logps/chosen": -117.8747329711914, "logps/rejected": -891.86279296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7589808106422424, "rewards/margins": 7.77963924407959, "rewards/rejected": -8.538619041442871, "step": 28080 }, { "epoch": 0.34, "learning_rate": 4.197006658130204e-06, "logits/chosen": -2.800138473510742, "logits/rejected": -2.5032050609588623, "logps/chosen": -106.41862487792969, "logps/rejected": -803.8717041015625, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -0.6566644906997681, "rewards/margins": 7.014919281005859, "rewards/rejected": -7.671584129333496, "step": 28090 }, { "epoch": 0.34, "learning_rate": 4.196239423276194e-06, "logits/chosen": -2.873279094696045, "logits/rejected": -2.0629236698150635, "logps/chosen": -182.49900817871094, "logps/rejected": -1209.8843994140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3043458461761475, "rewards/margins": 10.372697830200195, "rewards/rejected": -11.677042961120605, "step": 28100 }, { "epoch": 0.34, "learning_rate": 4.195471892263491e-06, "logits/chosen": -2.8409476280212402, "logits/rejected": -2.407139301300049, "logps/chosen": -129.21817016601562, "logps/rejected": -881.2899169921875, "loss": 0.029, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7796553373336792, "rewards/margins": 7.652136325836182, "rewards/rejected": -8.431792259216309, "step": 28110 }, { "epoch": 0.34, "learning_rate": 4.194704065226105e-06, "logits/chosen": -2.8508694171905518, "logits/rejected": -2.260457992553711, "logps/chosen": -166.44346618652344, "logps/rejected": -1088.509033203125, "loss": 0.1563, "rewards/accuracies": 1.0, "rewards/chosen": -1.1464754343032837, "rewards/margins": 9.347298622131348, "rewards/rejected": -10.4937744140625, "step": 28120 }, { "epoch": 0.34, "learning_rate": 4.193935942298095e-06, "logits/chosen": -2.864330768585205, "logits/rejected": -2.505110263824463, "logps/chosen": -113.24007415771484, "logps/rejected": -822.7718505859375, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.7274002432823181, "rewards/margins": 7.1267218589782715, "rewards/rejected": -7.854123115539551, "step": 28130 }, { "epoch": 0.34, "learning_rate": 4.1931675236135745e-06, "logits/chosen": -2.850153684616089, "logits/rejected": -2.3960981369018555, "logps/chosen": -134.849365234375, "logps/rejected": -1009.9114379882812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.906576931476593, "rewards/margins": 8.805379867553711, "rewards/rejected": -9.711955070495605, "step": 28140 }, { "epoch": 0.34, "learning_rate": 4.192398809306708e-06, "logits/chosen": -2.9235541820526123, "logits/rejected": -2.651637554168701, "logps/chosen": -105.59332275390625, "logps/rejected": -868.2347412109375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.6675414443016052, "rewards/margins": 7.652823448181152, "rewards/rejected": -8.320364952087402, "step": 28150 }, { "epoch": 0.34, "learning_rate": 4.1916297995117096e-06, "logits/chosen": -2.8190817832946777, "logits/rejected": -2.1018309593200684, "logps/chosen": -150.542236328125, "logps/rejected": -1020.4319458007812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0046590566635132, "rewards/margins": 8.800541877746582, "rewards/rejected": -9.805200576782227, "step": 28160 }, { "epoch": 0.34, "learning_rate": 4.190860494362846e-06, "logits/chosen": -2.8831474781036377, "logits/rejected": -2.444448232650757, "logps/chosen": -127.68985748291016, "logps/rejected": -946.3778076171875, "loss": 0.1218, "rewards/accuracies": 1.0, "rewards/chosen": -0.8566063642501831, "rewards/margins": 8.208784103393555, "rewards/rejected": -9.065389633178711, "step": 28170 }, { "epoch": 0.34, "learning_rate": 4.190090893994436e-06, "logits/chosen": -2.8462252616882324, "logits/rejected": -2.3056552410125732, "logps/chosen": -142.8496551513672, "logps/rejected": -844.4645385742188, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.9852257966995239, "rewards/margins": 7.086447238922119, "rewards/rejected": -8.071673393249512, "step": 28180 }, { "epoch": 0.34, "learning_rate": 4.18932099854085e-06, "logits/chosen": -2.834359645843506, "logits/rejected": -2.2340903282165527, "logps/chosen": -151.6586151123047, "logps/rejected": -1036.4063720703125, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -1.0287034511566162, "rewards/margins": 8.951766967773438, "rewards/rejected": -9.980470657348633, "step": 28190 }, { "epoch": 0.34, "learning_rate": 4.18855080813651e-06, "logits/chosen": -2.8980021476745605, "logits/rejected": -2.3738224506378174, "logps/chosen": -134.82818603515625, "logps/rejected": -951.2296752929688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8777016401290894, "rewards/margins": 8.251357078552246, "rewards/rejected": -9.129058837890625, "step": 28200 }, { "epoch": 0.34, "learning_rate": 4.187780322915889e-06, "logits/chosen": -2.8180859088897705, "logits/rejected": -1.9831609725952148, "logps/chosen": -181.25283813476562, "logps/rejected": -1147.548095703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2521517276763916, "rewards/margins": 9.826297760009766, "rewards/rejected": -11.078449249267578, "step": 28210 }, { "epoch": 0.34, "learning_rate": 4.18700954301351e-06, "logits/chosen": -2.8839149475097656, "logits/rejected": -2.533151149749756, "logps/chosen": -128.99072265625, "logps/rejected": -1005.9627685546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.865693211555481, "rewards/margins": 8.798611640930176, "rewards/rejected": -9.664304733276367, "step": 28220 }, { "epoch": 0.34, "learning_rate": 4.186238468563951e-06, "logits/chosen": -2.858393669128418, "logits/rejected": -2.1546902656555176, "logps/chosen": -158.7013702392578, "logps/rejected": -1207.2728271484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0813977718353271, "rewards/margins": 10.591772079467773, "rewards/rejected": -11.673171043395996, "step": 28230 }, { "epoch": 0.34, "learning_rate": 4.185467099701839e-06, "logits/chosen": -2.8416788578033447, "logits/rejected": -2.5203869342803955, "logps/chosen": -137.83006286621094, "logps/rejected": -911.6062622070312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9257171750068665, "rewards/margins": 7.807340145111084, "rewards/rejected": -8.733057022094727, "step": 28240 }, { "epoch": 0.34, "learning_rate": 4.184695436561852e-06, "logits/chosen": -2.8643953800201416, "logits/rejected": -2.188532829284668, "logps/chosen": -165.7711639404297, "logps/rejected": -1130.1072998046875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.14705491065979, "rewards/margins": 9.735913276672363, "rewards/rejected": -10.882967948913574, "step": 28250 }, { "epoch": 0.34, "learning_rate": 4.183923479278721e-06, "logits/chosen": -2.8934197425842285, "logits/rejected": -2.3706066608428955, "logps/chosen": -124.47684478759766, "logps/rejected": -980.3347778320312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8062186241149902, "rewards/margins": 8.615904808044434, "rewards/rejected": -9.422123908996582, "step": 28260 }, { "epoch": 0.34, "learning_rate": 4.183151227987227e-06, "logits/chosen": -2.8464369773864746, "logits/rejected": -2.1990058422088623, "logps/chosen": -151.0460205078125, "logps/rejected": -972.1461791992188, "loss": 0.1265, "rewards/accuracies": 1.0, "rewards/chosen": -0.9996660947799683, "rewards/margins": 8.320221900939941, "rewards/rejected": -9.3198881149292, "step": 28270 }, { "epoch": 0.34, "learning_rate": 4.182378682822204e-06, "logits/chosen": -2.8908445835113525, "logits/rejected": -2.286821126937866, "logps/chosen": -135.75497436523438, "logps/rejected": -1078.3819580078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9504184722900391, "rewards/margins": 9.439238548278809, "rewards/rejected": -10.389656066894531, "step": 28280 }, { "epoch": 0.34, "learning_rate": 4.181605843918536e-06, "logits/chosen": -2.8840630054473877, "logits/rejected": -2.2386908531188965, "logps/chosen": -171.44285583496094, "logps/rejected": -1200.66943359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0959441661834717, "rewards/margins": 10.499041557312012, "rewards/rejected": -11.594985961914062, "step": 28290 }, { "epoch": 0.34, "learning_rate": 4.180832711411157e-06, "logits/chosen": -2.801881790161133, "logits/rejected": -2.1020848751068115, "logps/chosen": -177.9874725341797, "logps/rejected": -1167.4794921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2165982723236084, "rewards/margins": 10.052513122558594, "rewards/rejected": -11.269111633300781, "step": 28300 }, { "epoch": 0.34, "learning_rate": 4.180059285435056e-06, "logits/chosen": -2.906628370285034, "logits/rejected": -2.3956828117370605, "logps/chosen": -157.87539672851562, "logps/rejected": -1068.332763671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1018192768096924, "rewards/margins": 9.173317909240723, "rewards/rejected": -10.275136947631836, "step": 28310 }, { "epoch": 0.34, "learning_rate": 4.17928556612527e-06, "logits/chosen": -2.85874605178833, "logits/rejected": -2.33134126663208, "logps/chosen": -141.13893127441406, "logps/rejected": -996.5870971679688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9569504857063293, "rewards/margins": 8.611534118652344, "rewards/rejected": -9.568483352661133, "step": 28320 }, { "epoch": 0.34, "learning_rate": 4.178511553616887e-06, "logits/chosen": -2.889277935028076, "logits/rejected": -2.4343793392181396, "logps/chosen": -127.2776870727539, "logps/rejected": -1004.2132568359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8247989416122437, "rewards/margins": 8.83563232421875, "rewards/rejected": -9.660430908203125, "step": 28330 }, { "epoch": 0.34, "learning_rate": 4.177737248045049e-06, "logits/chosen": -2.9003732204437256, "logits/rejected": -2.378932476043701, "logps/chosen": -129.52696228027344, "logps/rejected": -966.0343017578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8872798681259155, "rewards/margins": 8.383915901184082, "rewards/rejected": -9.271196365356445, "step": 28340 }, { "epoch": 0.34, "learning_rate": 4.1769626495449475e-06, "logits/chosen": -2.837369918823242, "logits/rejected": -2.145265579223633, "logps/chosen": -176.40151977539062, "logps/rejected": -1179.661865234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2322070598602295, "rewards/margins": 10.144784927368164, "rewards/rejected": -11.376993179321289, "step": 28350 }, { "epoch": 0.34, "learning_rate": 4.176187758251824e-06, "logits/chosen": -2.852870225906372, "logits/rejected": -2.2395777702331543, "logps/chosen": -149.63967895507812, "logps/rejected": -1096.52197265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0406744480133057, "rewards/margins": 9.525589942932129, "rewards/rejected": -10.566264152526855, "step": 28360 }, { "epoch": 0.34, "learning_rate": 4.1754125743009745e-06, "logits/chosen": -2.8516273498535156, "logits/rejected": -2.2085485458374023, "logps/chosen": -146.667724609375, "logps/rejected": -1001.5224609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9903373718261719, "rewards/margins": 8.641427993774414, "rewards/rejected": -9.631765365600586, "step": 28370 }, { "epoch": 0.34, "learning_rate": 4.174637097827743e-06, "logits/chosen": -2.8862972259521484, "logits/rejected": -2.2285654544830322, "logps/chosen": -154.20730590820312, "logps/rejected": -1058.284423828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0163694620132446, "rewards/margins": 9.158441543579102, "rewards/rejected": -10.174810409545898, "step": 28380 }, { "epoch": 0.34, "learning_rate": 4.1738613289675245e-06, "logits/chosen": -2.836076021194458, "logits/rejected": -2.313436508178711, "logps/chosen": -164.77906799316406, "logps/rejected": -1025.994873046875, "loss": 0.2607, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.205329179763794, "rewards/margins": 8.661287307739258, "rewards/rejected": -9.866618156433105, "step": 28390 }, { "epoch": 0.34, "learning_rate": 4.1730852678557675e-06, "logits/chosen": -2.8783411979675293, "logits/rejected": -2.3461947441101074, "logps/chosen": -137.08731079101562, "logps/rejected": -1050.138916015625, "loss": 0.1403, "rewards/accuracies": 1.0, "rewards/chosen": -0.8569399118423462, "rewards/margins": 9.2320556640625, "rewards/rejected": -10.088994979858398, "step": 28400 }, { "epoch": 0.34, "learning_rate": 4.172308914627968e-06, "logits/chosen": -2.916478157043457, "logits/rejected": -2.4770092964172363, "logps/chosen": -130.0752410888672, "logps/rejected": -893.1578979492188, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8738859295845032, "rewards/margins": 7.6527276039123535, "rewards/rejected": -8.526613235473633, "step": 28410 }, { "epoch": 0.34, "learning_rate": 4.171532269419678e-06, "logits/chosen": -2.9332189559936523, "logits/rejected": -2.4037394523620605, "logps/chosen": -114.59513092041016, "logps/rejected": -985.7486572265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.695578932762146, "rewards/margins": 8.767091751098633, "rewards/rejected": -9.46267032623291, "step": 28420 }, { "epoch": 0.34, "learning_rate": 4.170755332366496e-06, "logits/chosen": -2.8644747734069824, "logits/rejected": -2.479012966156006, "logps/chosen": -102.02571105957031, "logps/rejected": -939.4176635742188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5937352776527405, "rewards/margins": 8.407649993896484, "rewards/rejected": -9.001386642456055, "step": 28430 }, { "epoch": 0.34, "learning_rate": 4.169978103604074e-06, "logits/chosen": -2.9312965869903564, "logits/rejected": -2.673318862915039, "logps/chosen": -83.11814880371094, "logps/rejected": -792.5406494140625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4329395294189453, "rewards/margins": 7.118808746337891, "rewards/rejected": -7.551747798919678, "step": 28440 }, { "epoch": 0.34, "learning_rate": 4.169200583268113e-06, "logits/chosen": -2.8674826622009277, "logits/rejected": -2.458465337753296, "logps/chosen": -102.13255310058594, "logps/rejected": -1003.54736328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6196569204330444, "rewards/margins": 9.027203559875488, "rewards/rejected": -9.646860122680664, "step": 28450 }, { "epoch": 0.34, "learning_rate": 4.168422771494365e-06, "logits/chosen": -2.848522901535034, "logits/rejected": -2.3596203327178955, "logps/chosen": -117.8344955444336, "logps/rejected": -994.3561401367188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7545747756958008, "rewards/margins": 8.802572250366211, "rewards/rejected": -9.557147026062012, "step": 28460 }, { "epoch": 0.34, "learning_rate": 4.167644668418635e-06, "logits/chosen": -2.8762543201446533, "logits/rejected": -2.1782782077789307, "logps/chosen": -151.03482055664062, "logps/rejected": -1149.605224609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9818640947341919, "rewards/margins": 10.118537902832031, "rewards/rejected": -11.100401878356934, "step": 28470 }, { "epoch": 0.34, "learning_rate": 4.166866274176779e-06, "logits/chosen": -2.8480210304260254, "logits/rejected": -2.3218741416931152, "logps/chosen": -118.77195739746094, "logps/rejected": -928.16015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7554672360420227, "rewards/margins": 8.14904499053955, "rewards/rejected": -8.904512405395508, "step": 28480 }, { "epoch": 0.34, "learning_rate": 4.166087588904701e-06, "logits/chosen": -2.8983230590820312, "logits/rejected": -2.4186718463897705, "logps/chosen": -132.25469970703125, "logps/rejected": -841.1189575195312, "loss": 0.0909, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8841312527656555, "rewards/margins": 7.153872013092041, "rewards/rejected": -8.038002967834473, "step": 28490 }, { "epoch": 0.34, "learning_rate": 4.165308612738356e-06, "logits/chosen": -2.814166784286499, "logits/rejected": -2.2273213863372803, "logps/chosen": -165.49813842773438, "logps/rejected": -1066.8876953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1394894123077393, "rewards/margins": 9.13726806640625, "rewards/rejected": -10.276758193969727, "step": 28500 }, { "epoch": 0.34, "learning_rate": 4.164529345813754e-06, "logits/chosen": -2.8249599933624268, "logits/rejected": -2.3363006114959717, "logps/chosen": -124.24345397949219, "logps/rejected": -985.4332885742188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7774824500083923, "rewards/margins": 8.700121879577637, "rewards/rejected": -9.477605819702148, "step": 28510 }, { "epoch": 0.34, "learning_rate": 4.1637497882669494e-06, "logits/chosen": -2.8533289432525635, "logits/rejected": -2.1093578338623047, "logps/chosen": -157.10678100585938, "logps/rejected": -1122.115966796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0438082218170166, "rewards/margins": 9.781984329223633, "rewards/rejected": -10.825793266296387, "step": 28520 }, { "epoch": 0.34, "learning_rate": 4.162969940234054e-06, "logits/chosen": -2.8625781536102295, "logits/rejected": -2.4470038414001465, "logps/chosen": -125.39273834228516, "logps/rejected": -1004.3277587890625, "loss": 0.1516, "rewards/accuracies": 1.0, "rewards/chosen": -0.8364923596382141, "rewards/margins": 8.825689315795898, "rewards/rejected": -9.662181854248047, "step": 28530 }, { "epoch": 0.34, "learning_rate": 4.162189801851225e-06, "logits/chosen": -2.8607723712921143, "logits/rejected": -2.481997013092041, "logps/chosen": -141.19302368164062, "logps/rejected": -888.94140625, "loss": 0.1098, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9276762008666992, "rewards/margins": 7.595375061035156, "rewards/rejected": -8.523050308227539, "step": 28540 }, { "epoch": 0.34, "learning_rate": 4.161409373254674e-06, "logits/chosen": -2.816019058227539, "logits/rejected": -2.5032849311828613, "logps/chosen": -123.577880859375, "logps/rejected": -734.2301025390625, "loss": 0.1284, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8635820150375366, "rewards/margins": 6.126115798950195, "rewards/rejected": -6.9896979331970215, "step": 28550 }, { "epoch": 0.34, "learning_rate": 4.16062865458066e-06, "logits/chosen": -2.8879752159118652, "logits/rejected": -2.368285894393921, "logps/chosen": -137.06488037109375, "logps/rejected": -1142.097900390625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.9072192311286926, "rewards/margins": 10.102452278137207, "rewards/rejected": -11.009672164916992, "step": 28560 }, { "epoch": 0.34, "learning_rate": 4.159847645965496e-06, "logits/chosen": -2.9311935901641846, "logits/rejected": -2.2841033935546875, "logps/chosen": -155.91500854492188, "logps/rejected": -1176.4559326171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9890868067741394, "rewards/margins": 10.365130424499512, "rewards/rejected": -11.354218482971191, "step": 28570 }, { "epoch": 0.34, "learning_rate": 4.1590663475455414e-06, "logits/chosen": -2.8605384826660156, "logits/rejected": -2.4476895332336426, "logps/chosen": -95.86512756347656, "logps/rejected": -791.7171630859375, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": -0.5835856795310974, "rewards/margins": 6.968293190002441, "rewards/rejected": -7.551878452301025, "step": 28580 }, { "epoch": 0.34, "learning_rate": 4.158284759457211e-06, "logits/chosen": -2.8698570728302, "logits/rejected": -2.268778085708618, "logps/chosen": -150.5800323486328, "logps/rejected": -1118.3245849609375, "loss": 0.1244, "rewards/accuracies": 1.0, "rewards/chosen": -0.9430235028266907, "rewards/margins": 9.81999683380127, "rewards/rejected": -10.763020515441895, "step": 28590 }, { "epoch": 0.34, "learning_rate": 4.157502881836969e-06, "logits/chosen": -2.896181106567383, "logits/rejected": -2.2942590713500977, "logps/chosen": -154.5753173828125, "logps/rejected": -1085.5006103515625, "loss": 0.0755, "rewards/accuracies": 1.0, "rewards/chosen": -1.0174928903579712, "rewards/margins": 9.439143180847168, "rewards/rejected": -10.456636428833008, "step": 28600 }, { "epoch": 0.34, "learning_rate": 4.156720714821325e-06, "logits/chosen": -2.888235330581665, "logits/rejected": -2.506268262863159, "logps/chosen": -97.01377868652344, "logps/rejected": -941.0205078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5729541778564453, "rewards/margins": 8.462140083312988, "rewards/rejected": -9.035093307495117, "step": 28610 }, { "epoch": 0.34, "learning_rate": 4.155938258546847e-06, "logits/chosen": -2.8699419498443604, "logits/rejected": -2.3958470821380615, "logps/chosen": -125.08089447021484, "logps/rejected": -1060.5003662109375, "loss": 0.1073, "rewards/accuracies": 1.0, "rewards/chosen": -0.7312955856323242, "rewards/margins": 9.49245548248291, "rewards/rejected": -10.223750114440918, "step": 28620 }, { "epoch": 0.34, "learning_rate": 4.155155513150148e-06, "logits/chosen": -2.8949813842773438, "logits/rejected": -2.3852970600128174, "logps/chosen": -122.0856704711914, "logps/rejected": -1021.0574340820312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7871131300926208, "rewards/margins": 9.030299186706543, "rewards/rejected": -9.817413330078125, "step": 28630 }, { "epoch": 0.34, "learning_rate": 4.154372478767893e-06, "logits/chosen": -2.8493130207061768, "logits/rejected": -2.604959487915039, "logps/chosen": -90.8420639038086, "logps/rejected": -808.6243896484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.546757161617279, "rewards/margins": 7.154473781585693, "rewards/rejected": -7.701230525970459, "step": 28640 }, { "epoch": 0.34, "learning_rate": 4.153589155536798e-06, "logits/chosen": -2.876166582107544, "logits/rejected": -2.4261937141418457, "logps/chosen": -108.59757995605469, "logps/rejected": -898.1100463867188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6328473687171936, "rewards/margins": 7.96523904800415, "rewards/rejected": -8.5980863571167, "step": 28650 }, { "epoch": 0.34, "learning_rate": 4.15280554359363e-06, "logits/chosen": -2.90618896484375, "logits/rejected": -2.654550313949585, "logps/chosen": -90.31689453125, "logps/rejected": -805.5198974609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5301141738891602, "rewards/margins": 7.160614967346191, "rewards/rejected": -7.690728664398193, "step": 28660 }, { "epoch": 0.34, "learning_rate": 4.152021643075204e-06, "logits/chosen": -2.870981454849243, "logits/rejected": -2.1530871391296387, "logps/chosen": -142.6092987060547, "logps/rejected": -1101.5697021484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9346744418144226, "rewards/margins": 9.681814193725586, "rewards/rejected": -10.61648941040039, "step": 28670 }, { "epoch": 0.34, "learning_rate": 4.151237454118386e-06, "logits/chosen": -2.9085237979888916, "logits/rejected": -2.241166353225708, "logps/chosen": -150.85145568847656, "logps/rejected": -910.9436645507812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0368554592132568, "rewards/margins": 7.670359134674072, "rewards/rejected": -8.707215309143066, "step": 28680 }, { "epoch": 0.34, "learning_rate": 4.150452976860097e-06, "logits/chosen": -2.822147846221924, "logits/rejected": -2.416534423828125, "logps/chosen": -133.98318481445312, "logps/rejected": -975.5802001953125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9140518307685852, "rewards/margins": 8.470491409301758, "rewards/rejected": -9.384543418884277, "step": 28690 }, { "epoch": 0.34, "learning_rate": 4.149668211437301e-06, "logits/chosen": -2.8786590099334717, "logits/rejected": -2.495628833770752, "logps/chosen": -111.31428527832031, "logps/rejected": -960.0269775390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7208133339881897, "rewards/margins": 8.505191802978516, "rewards/rejected": -9.226005554199219, "step": 28700 }, { "epoch": 0.34, "learning_rate": 4.148883157987017e-06, "logits/chosen": -2.8665900230407715, "logits/rejected": -2.419865369796753, "logps/chosen": -155.91551208496094, "logps/rejected": -962.0695190429688, "loss": 0.2489, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1162402629852295, "rewards/margins": 8.12592887878418, "rewards/rejected": -9.242170333862305, "step": 28710 }, { "epoch": 0.34, "learning_rate": 4.148097816646314e-06, "logits/chosen": -2.8313002586364746, "logits/rejected": -2.0809130668640137, "logps/chosen": -172.25538635253906, "logps/rejected": -1234.2373046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1561466455459595, "rewards/margins": 10.790061950683594, "rewards/rejected": -11.946208000183105, "step": 28720 }, { "epoch": 0.34, "learning_rate": 4.147312187552308e-06, "logits/chosen": -2.850465774536133, "logits/rejected": -2.0546112060546875, "logps/chosen": -160.55816650390625, "logps/rejected": -1219.7889404296875, "loss": 0.1217, "rewards/accuracies": 1.0, "rewards/chosen": -1.0665714740753174, "rewards/margins": 10.726400375366211, "rewards/rejected": -11.792970657348633, "step": 28730 }, { "epoch": 0.34, "learning_rate": 4.146526270842171e-06, "logits/chosen": -2.8383612632751465, "logits/rejected": -2.1637914180755615, "logps/chosen": -144.22610473632812, "logps/rejected": -1080.0123291015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9346534609794617, "rewards/margins": 9.471938133239746, "rewards/rejected": -10.406591415405273, "step": 28740 }, { "epoch": 0.34, "learning_rate": 4.1457400666531186e-06, "logits/chosen": -2.8554930686950684, "logits/rejected": -2.1912922859191895, "logps/chosen": -145.53399658203125, "logps/rejected": -1008.8410034179688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.945770263671875, "rewards/margins": 8.752603530883789, "rewards/rejected": -9.698373794555664, "step": 28750 }, { "epoch": 0.34, "learning_rate": 4.1449535751224225e-06, "logits/chosen": -2.8767900466918945, "logits/rejected": -2.1026015281677246, "logps/chosen": -161.42535400390625, "logps/rejected": -1105.5506591796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.079328179359436, "rewards/margins": 9.592269897460938, "rewards/rejected": -10.671598434448242, "step": 28760 }, { "epoch": 0.34, "learning_rate": 4.1441667963874e-06, "logits/chosen": -2.8953256607055664, "logits/rejected": -2.399174213409424, "logps/chosen": -130.25714111328125, "logps/rejected": -977.9963989257812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8929640054702759, "rewards/margins": 8.513154983520508, "rewards/rejected": -9.406118392944336, "step": 28770 }, { "epoch": 0.34, "learning_rate": 4.1433797305854225e-06, "logits/chosen": -2.8502960205078125, "logits/rejected": -2.3055994510650635, "logps/chosen": -124.0104751586914, "logps/rejected": -994.9152221679688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8072817921638489, "rewards/margins": 8.767763137817383, "rewards/rejected": -9.575044631958008, "step": 28780 }, { "epoch": 0.34, "learning_rate": 4.142592377853908e-06, "logits/chosen": -2.8508994579315186, "logits/rejected": -2.0827462673187256, "logps/chosen": -148.7830810546875, "logps/rejected": -1178.1890869140625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.9465996623039246, "rewards/margins": 10.419453620910645, "rewards/rejected": -11.366050720214844, "step": 28790 }, { "epoch": 0.34, "learning_rate": 4.141804738330326e-06, "logits/chosen": -2.889505624771118, "logits/rejected": -2.3243305683135986, "logps/chosen": -138.6336669921875, "logps/rejected": -1043.8681640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9037731289863586, "rewards/margins": 9.143799781799316, "rewards/rejected": -10.04757308959961, "step": 28800 }, { "epoch": 0.34, "learning_rate": 4.141016812152197e-06, "logits/chosen": -2.8878836631774902, "logits/rejected": -2.453569173812866, "logps/chosen": -118.6556625366211, "logps/rejected": -981.5460205078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7766878008842468, "rewards/margins": 8.656949996948242, "rewards/rejected": -9.433637619018555, "step": 28810 }, { "epoch": 0.34, "learning_rate": 4.140228599457089e-06, "logits/chosen": -2.9125657081604004, "logits/rejected": -2.332376718521118, "logps/chosen": -145.78550720214844, "logps/rejected": -1083.43212890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9561694860458374, "rewards/margins": 9.492303848266602, "rewards/rejected": -10.448474884033203, "step": 28820 }, { "epoch": 0.35, "learning_rate": 4.139440100382624e-06, "logits/chosen": -2.8257253170013428, "logits/rejected": -2.270261764526367, "logps/chosen": -142.22323608398438, "logps/rejected": -971.76171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9667621850967407, "rewards/margins": 8.362199783325195, "rewards/rejected": -9.328962326049805, "step": 28830 }, { "epoch": 0.35, "learning_rate": 4.138651315066471e-06, "logits/chosen": -2.86064076423645, "logits/rejected": -2.3147470951080322, "logps/chosen": -163.31748962402344, "logps/rejected": -1047.267822265625, "loss": 0.0959, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2032420635223389, "rewards/margins": 8.889863967895508, "rewards/rejected": -10.093106269836426, "step": 28840 }, { "epoch": 0.35, "learning_rate": 4.13786224364635e-06, "logits/chosen": -2.776554584503174, "logits/rejected": -2.1800084114074707, "logps/chosen": -167.22549438476562, "logps/rejected": -1077.882080078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1978533267974854, "rewards/margins": 9.187877655029297, "rewards/rejected": -10.385730743408203, "step": 28850 }, { "epoch": 0.35, "learning_rate": 4.13707288626003e-06, "logits/chosen": -2.8894314765930176, "logits/rejected": -2.2525246143341064, "logps/chosen": -148.019287109375, "logps/rejected": -1034.4078369140625, "loss": 0.0936, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0013957023620605, "rewards/margins": 8.939939498901367, "rewards/rejected": -9.94133472442627, "step": 28860 }, { "epoch": 0.35, "learning_rate": 4.13628324304533e-06, "logits/chosen": -2.873432159423828, "logits/rejected": -2.1521732807159424, "logps/chosen": -170.33447265625, "logps/rejected": -1161.0528564453125, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -1.1529569625854492, "rewards/margins": 10.033071517944336, "rewards/rejected": -11.186027526855469, "step": 28870 }, { "epoch": 0.35, "learning_rate": 4.135493314140121e-06, "logits/chosen": -2.875183582305908, "logits/rejected": -2.375049591064453, "logps/chosen": -167.30819702148438, "logps/rejected": -908.2391357421875, "loss": 0.1717, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1903687715530396, "rewards/margins": 7.511181831359863, "rewards/rejected": -8.701550483703613, "step": 28880 }, { "epoch": 0.35, "learning_rate": 4.134703099682321e-06, "logits/chosen": -2.8262245655059814, "logits/rejected": -2.3466973304748535, "logps/chosen": -139.67274475097656, "logps/rejected": -1083.038818359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8880773782730103, "rewards/margins": 9.549588203430176, "rewards/rejected": -10.437664031982422, "step": 28890 }, { "epoch": 0.35, "learning_rate": 4.133912599809901e-06, "logits/chosen": -2.86234188079834, "logits/rejected": -2.290985107421875, "logps/chosen": -164.46847534179688, "logps/rejected": -1023.8175048828125, "loss": 0.1002, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.156954050064087, "rewards/margins": 8.687792778015137, "rewards/rejected": -9.844746589660645, "step": 28900 }, { "epoch": 0.35, "learning_rate": 4.133121814660879e-06, "logits/chosen": -2.932281732559204, "logits/rejected": -2.5513415336608887, "logps/chosen": -114.6536865234375, "logps/rejected": -912.2029418945312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7366880178451538, "rewards/margins": 8.007661819458008, "rewards/rejected": -8.744349479675293, "step": 28910 }, { "epoch": 0.35, "learning_rate": 4.132330744373324e-06, "logits/chosen": -2.8510634899139404, "logits/rejected": -2.4731602668762207, "logps/chosen": -99.66793060302734, "logps/rejected": -808.4557495117188, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.595958948135376, "rewards/margins": 7.116552829742432, "rewards/rejected": -7.712512016296387, "step": 28920 }, { "epoch": 0.35, "learning_rate": 4.131539389085354e-06, "logits/chosen": -2.874959945678711, "logits/rejected": -2.194185495376587, "logps/chosen": -144.18711853027344, "logps/rejected": -1099.1082763671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8714305758476257, "rewards/margins": 9.717093467712402, "rewards/rejected": -10.588523864746094, "step": 28930 }, { "epoch": 0.35, "learning_rate": 4.130747748935138e-06, "logits/chosen": -2.8336844444274902, "logits/rejected": -2.4546046257019043, "logps/chosen": -101.7095718383789, "logps/rejected": -825.4862060546875, "loss": 0.1588, "rewards/accuracies": 1.0, "rewards/chosen": -0.6410614252090454, "rewards/margins": 7.246225833892822, "rewards/rejected": -7.887287139892578, "step": 28940 }, { "epoch": 0.35, "learning_rate": 4.129955824060895e-06, "logits/chosen": -2.818227767944336, "logits/rejected": -2.3081984519958496, "logps/chosen": -130.49923706054688, "logps/rejected": -1031.168212890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8593565225601196, "rewards/margins": 9.054107666015625, "rewards/rejected": -9.913464546203613, "step": 28950 }, { "epoch": 0.35, "learning_rate": 4.129163614600892e-06, "logits/chosen": -2.8999500274658203, "logits/rejected": -2.417361259460449, "logps/chosen": -139.50779724121094, "logps/rejected": -1008.2882080078125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.9489455223083496, "rewards/margins": 8.737072944641113, "rewards/rejected": -9.686018943786621, "step": 28960 }, { "epoch": 0.35, "learning_rate": 4.128371120693446e-06, "logits/chosen": -2.827833652496338, "logits/rejected": -2.366814136505127, "logps/chosen": -122.33392333984375, "logps/rejected": -988.5992431640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7651048898696899, "rewards/margins": 8.732647895812988, "rewards/rejected": -9.497754096984863, "step": 28970 }, { "epoch": 0.35, "learning_rate": 4.127578342476926e-06, "logits/chosen": -2.8967156410217285, "logits/rejected": -2.349700927734375, "logps/chosen": -156.5720672607422, "logps/rejected": -1175.887939453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.074559211730957, "rewards/margins": 10.28908634185791, "rewards/rejected": -11.36364459991455, "step": 28980 }, { "epoch": 0.35, "learning_rate": 4.1267852800897465e-06, "logits/chosen": -2.801326274871826, "logits/rejected": -2.1185545921325684, "logps/chosen": -146.47293090820312, "logps/rejected": -1000.82080078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9575715065002441, "rewards/margins": 8.662413597106934, "rewards/rejected": -9.619985580444336, "step": 28990 }, { "epoch": 0.35, "learning_rate": 4.125991933670376e-06, "logits/chosen": -2.8368732929229736, "logits/rejected": -2.3624672889709473, "logps/chosen": -118.7857437133789, "logps/rejected": -874.1048583984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8039243817329407, "rewards/margins": 7.5621161460876465, "rewards/rejected": -8.366040229797363, "step": 29000 }, { "epoch": 0.35, "learning_rate": 4.12519830335733e-06, "logits/chosen": -2.8639261722564697, "logits/rejected": -2.246809959411621, "logps/chosen": -130.94793701171875, "logps/rejected": -991.0941162109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8608067631721497, "rewards/margins": 8.668355941772461, "rewards/rejected": -9.52916145324707, "step": 29010 }, { "epoch": 0.35, "learning_rate": 4.124404389289174e-06, "logits/chosen": -2.834660530090332, "logits/rejected": -2.474546432495117, "logps/chosen": -109.7819595336914, "logps/rejected": -823.6231689453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6978191137313843, "rewards/margins": 7.175013542175293, "rewards/rejected": -7.872833251953125, "step": 29020 }, { "epoch": 0.35, "learning_rate": 4.123610191604523e-06, "logits/chosen": -2.8741958141326904, "logits/rejected": -2.430870771408081, "logps/chosen": -140.58657836914062, "logps/rejected": -899.0950927734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9513980746269226, "rewards/margins": 7.661428928375244, "rewards/rejected": -8.61282730102539, "step": 29030 }, { "epoch": 0.35, "learning_rate": 4.122815710442042e-06, "logits/chosen": -2.817382574081421, "logits/rejected": -2.270596504211426, "logps/chosen": -139.6961669921875, "logps/rejected": -984.8972778320312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.943880558013916, "rewards/margins": 8.517671585083008, "rewards/rejected": -9.46155071258545, "step": 29040 }, { "epoch": 0.35, "learning_rate": 4.122020945940445e-06, "logits/chosen": -2.899348258972168, "logits/rejected": -2.2484700679779053, "logps/chosen": -162.56845092773438, "logps/rejected": -1021.5027465820312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.081925392150879, "rewards/margins": 8.743706703186035, "rewards/rejected": -9.825632095336914, "step": 29050 }, { "epoch": 0.35, "learning_rate": 4.121225898238496e-06, "logits/chosen": -2.912039279937744, "logits/rejected": -2.4982962608337402, "logps/chosen": -158.0211639404297, "logps/rejected": -834.3649291992188, "loss": 0.2557, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1444424390792847, "rewards/margins": 6.8332929611206055, "rewards/rejected": -7.977735996246338, "step": 29060 }, { "epoch": 0.35, "learning_rate": 4.120430567475008e-06, "logits/chosen": -2.8288979530334473, "logits/rejected": -2.133043050765991, "logps/chosen": -156.1840362548828, "logps/rejected": -1031.1387939453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0590126514434814, "rewards/margins": 8.85428237915039, "rewards/rejected": -9.91329574584961, "step": 29070 }, { "epoch": 0.35, "learning_rate": 4.119634953788843e-06, "logits/chosen": -2.857469081878662, "logits/rejected": -2.3874382972717285, "logps/chosen": -121.8730239868164, "logps/rejected": -999.9091796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8238698244094849, "rewards/margins": 8.80022144317627, "rewards/rejected": -9.624090194702148, "step": 29080 }, { "epoch": 0.35, "learning_rate": 4.118839057318914e-06, "logits/chosen": -2.8378829956054688, "logits/rejected": -2.374636173248291, "logps/chosen": -123.13671875, "logps/rejected": -933.3615112304688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7861449122428894, "rewards/margins": 8.176786422729492, "rewards/rejected": -8.962930679321289, "step": 29090 }, { "epoch": 0.35, "learning_rate": 4.118042878204182e-06, "logits/chosen": -2.849730968475342, "logits/rejected": -2.1084325313568115, "logps/chosen": -145.11041259765625, "logps/rejected": -1077.1595458984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9226900339126587, "rewards/margins": 9.464078903198242, "rewards/rejected": -10.38676929473877, "step": 29100 }, { "epoch": 0.35, "learning_rate": 4.1172464165836565e-06, "logits/chosen": -2.838991641998291, "logits/rejected": -1.9484096765518188, "logps/chosen": -199.48170471191406, "logps/rejected": -1173.0709228515625, "loss": 0.1138, "rewards/accuracies": 1.0, "rewards/chosen": -1.4445652961730957, "rewards/margins": 9.874334335327148, "rewards/rejected": -11.318899154663086, "step": 29110 }, { "epoch": 0.35, "learning_rate": 4.116449672596398e-06, "logits/chosen": -2.896341323852539, "logits/rejected": -2.228048801422119, "logps/chosen": -149.61949157714844, "logps/rejected": -988.7188720703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9909707307815552, "rewards/margins": 8.49658489227295, "rewards/rejected": -9.487554550170898, "step": 29120 }, { "epoch": 0.35, "learning_rate": 4.115652646381517e-06, "logits/chosen": -2.9031755924224854, "logits/rejected": -2.4737942218780518, "logps/chosen": -143.81039428710938, "logps/rejected": -963.2257690429688, "loss": 0.141, "rewards/accuracies": 1.0, "rewards/chosen": -0.9777595400810242, "rewards/margins": 8.249812126159668, "rewards/rejected": -9.227571487426758, "step": 29130 }, { "epoch": 0.35, "learning_rate": 4.114855338078172e-06, "logits/chosen": -2.86450457572937, "logits/rejected": -2.316222667694092, "logps/chosen": -137.5086669921875, "logps/rejected": -1083.6019287109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9229595065116882, "rewards/margins": 9.52777099609375, "rewards/rejected": -10.450729370117188, "step": 29140 }, { "epoch": 0.35, "learning_rate": 4.1140577478255686e-06, "logits/chosen": -2.7873990535736084, "logits/rejected": -2.0205397605895996, "logps/chosen": -183.6920623779297, "logps/rejected": -1229.2811279296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2912623882293701, "rewards/margins": 10.593619346618652, "rewards/rejected": -11.884881973266602, "step": 29150 }, { "epoch": 0.35, "learning_rate": 4.113259875762966e-06, "logits/chosen": -2.7831597328186035, "logits/rejected": -2.1863598823547363, "logps/chosen": -143.58506774902344, "logps/rejected": -934.7027587890625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.9396113157272339, "rewards/margins": 8.01533317565918, "rewards/rejected": -8.954943656921387, "step": 29160 }, { "epoch": 0.35, "learning_rate": 4.112461722029671e-06, "logits/chosen": -2.8603432178497314, "logits/rejected": -2.465939998626709, "logps/chosen": -153.04312133789062, "logps/rejected": -851.5474853515625, "loss": 0.1314, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0819549560546875, "rewards/margins": 7.06545877456665, "rewards/rejected": -8.14741325378418, "step": 29170 }, { "epoch": 0.35, "learning_rate": 4.111663286765035e-06, "logits/chosen": -2.8702874183654785, "logits/rejected": -2.294637680053711, "logps/chosen": -133.95352172851562, "logps/rejected": -940.0514526367188, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.8823984861373901, "rewards/margins": 8.135499954223633, "rewards/rejected": -9.017898559570312, "step": 29180 }, { "epoch": 0.35, "learning_rate": 4.110864570108467e-06, "logits/chosen": -2.8550753593444824, "logits/rejected": -2.3195886611938477, "logps/chosen": -130.13182067871094, "logps/rejected": -1047.2117919921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8253582715988159, "rewards/margins": 9.242847442626953, "rewards/rejected": -10.068205833435059, "step": 29190 }, { "epoch": 0.35, "learning_rate": 4.1100655721994185e-06, "logits/chosen": -2.9259910583496094, "logits/rejected": -2.6575160026550293, "logps/chosen": -92.81314849853516, "logps/rejected": -844.5223388671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5812787413597107, "rewards/margins": 7.490097999572754, "rewards/rejected": -8.07137680053711, "step": 29200 }, { "epoch": 0.35, "learning_rate": 4.109266293177393e-06, "logits/chosen": -2.917531728744507, "logits/rejected": -2.380113363265991, "logps/chosen": -122.7935562133789, "logps/rejected": -912.6173706054688, "loss": 0.1197, "rewards/accuracies": 1.0, "rewards/chosen": -0.7946887016296387, "rewards/margins": 7.953388214111328, "rewards/rejected": -8.748077392578125, "step": 29210 }, { "epoch": 0.35, "learning_rate": 4.108466733181943e-06, "logits/chosen": -2.8991341590881348, "logits/rejected": -2.5014359951019287, "logps/chosen": -143.10177612304688, "logps/rejected": -899.2249755859375, "loss": 0.1432, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9992507696151733, "rewards/margins": 7.596624851226807, "rewards/rejected": -8.595874786376953, "step": 29220 }, { "epoch": 0.35, "learning_rate": 4.1076668923526675e-06, "logits/chosen": -2.851256847381592, "logits/rejected": -2.3819947242736816, "logps/chosen": -128.02793884277344, "logps/rejected": -820.9757690429688, "loss": 0.1012, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8678500056266785, "rewards/margins": 6.965366363525391, "rewards/rejected": -7.833217620849609, "step": 29230 }, { "epoch": 0.35, "learning_rate": 4.106866770829219e-06, "logits/chosen": -2.8051695823669434, "logits/rejected": -2.2551865577697754, "logps/chosen": -143.2668914794922, "logps/rejected": -909.9854736328125, "loss": 0.0981, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9331271052360535, "rewards/margins": 7.7809247970581055, "rewards/rejected": -8.714052200317383, "step": 29240 }, { "epoch": 0.35, "learning_rate": 4.106066368751294e-06, "logits/chosen": -2.9041521549224854, "logits/rejected": -2.4740357398986816, "logps/chosen": -127.83526611328125, "logps/rejected": -932.0482177734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7985695600509644, "rewards/margins": 8.146231651306152, "rewards/rejected": -8.944801330566406, "step": 29250 }, { "epoch": 0.35, "learning_rate": 4.105265686258643e-06, "logits/chosen": -2.8847389221191406, "logits/rejected": -2.229191303253174, "logps/chosen": -134.61660766601562, "logps/rejected": -893.4671020507812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.817497730255127, "rewards/margins": 7.732647895812988, "rewards/rejected": -8.550146102905273, "step": 29260 }, { "epoch": 0.35, "learning_rate": 4.104464723491059e-06, "logits/chosen": -2.868323802947998, "logits/rejected": -2.081275463104248, "logps/chosen": -151.58267211914062, "logps/rejected": -995.3610229492188, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.9929534792900085, "rewards/margins": 8.576835632324219, "rewards/rejected": -9.569788932800293, "step": 29270 }, { "epoch": 0.35, "learning_rate": 4.103663480588393e-06, "logits/chosen": -2.8530938625335693, "logits/rejected": -2.112861156463623, "logps/chosen": -151.48471069335938, "logps/rejected": -913.1949462890625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.9970272183418274, "rewards/margins": 7.741335868835449, "rewards/rejected": -8.738362312316895, "step": 29280 }, { "epoch": 0.35, "learning_rate": 4.102861957690537e-06, "logits/chosen": -2.8887810707092285, "logits/rejected": -2.411745071411133, "logps/chosen": -119.25887298583984, "logps/rejected": -907.1046142578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7445276975631714, "rewards/margins": 7.932066440582275, "rewards/rejected": -8.676594734191895, "step": 29290 }, { "epoch": 0.35, "learning_rate": 4.1020601549374336e-06, "logits/chosen": -2.874619722366333, "logits/rejected": -2.272094488143921, "logps/chosen": -139.7052459716797, "logps/rejected": -931.4505004882812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9288040995597839, "rewards/margins": 7.986729621887207, "rewards/rejected": -8.915533065795898, "step": 29300 }, { "epoch": 0.35, "learning_rate": 4.101258072469078e-06, "logits/chosen": -2.8784279823303223, "logits/rejected": -2.1832571029663086, "logps/chosen": -142.9617919921875, "logps/rejected": -1026.614990234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.947475254535675, "rewards/margins": 8.927438735961914, "rewards/rejected": -9.874913215637207, "step": 29310 }, { "epoch": 0.35, "learning_rate": 4.100455710425509e-06, "logits/chosen": -2.911165952682495, "logits/rejected": -2.5793709754943848, "logps/chosen": -112.4802474975586, "logps/rejected": -898.5247192382812, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6901901960372925, "rewards/margins": 7.917112827301025, "rewards/rejected": -8.607303619384766, "step": 29320 }, { "epoch": 0.35, "learning_rate": 4.09965306894682e-06, "logits/chosen": -2.8641490936279297, "logits/rejected": -2.26420259475708, "logps/chosen": -183.5069580078125, "logps/rejected": -941.9034423828125, "loss": 0.1368, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.315344214439392, "rewards/margins": 7.713741302490234, "rewards/rejected": -9.029085159301758, "step": 29330 }, { "epoch": 0.35, "learning_rate": 4.098850148173146e-06, "logits/chosen": -2.886538505554199, "logits/rejected": -2.582227945327759, "logps/chosen": -120.4660415649414, "logps/rejected": -862.3302001953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.8143731355667114, "rewards/margins": 7.413285732269287, "rewards/rejected": -8.227659225463867, "step": 29340 }, { "epoch": 0.35, "learning_rate": 4.098046948244678e-06, "logits/chosen": -2.806835651397705, "logits/rejected": -2.2749361991882324, "logps/chosen": -147.81466674804688, "logps/rejected": -932.513671875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.0121654272079468, "rewards/margins": 7.933542728424072, "rewards/rejected": -8.945707321166992, "step": 29350 }, { "epoch": 0.35, "learning_rate": 4.097243469301651e-06, "logits/chosen": -2.8418548107147217, "logits/rejected": -2.4647376537323, "logps/chosen": -133.0880584716797, "logps/rejected": -847.09033203125, "loss": 0.199, "rewards/accuracies": 1.0, "rewards/chosen": -0.9391983151435852, "rewards/margins": 7.157954216003418, "rewards/rejected": -8.097151756286621, "step": 29360 }, { "epoch": 0.35, "learning_rate": 4.09643971148435e-06, "logits/chosen": -2.828680992126465, "logits/rejected": -2.054884195327759, "logps/chosen": -178.25802612304688, "logps/rejected": -1094.8055419921875, "loss": 0.1531, "rewards/accuracies": 1.0, "rewards/chosen": -1.242305874824524, "rewards/margins": 9.307446479797363, "rewards/rejected": -10.549753189086914, "step": 29370 }, { "epoch": 0.35, "learning_rate": 4.095635674933109e-06, "logits/chosen": -2.8069870471954346, "logits/rejected": -2.1959874629974365, "logps/chosen": -223.8590850830078, "logps/rejected": -1062.558837890625, "loss": 0.0229, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7543106079101562, "rewards/margins": 8.46496295928955, "rewards/rejected": -10.219273567199707, "step": 29380 }, { "epoch": 0.35, "learning_rate": 4.094831359788312e-06, "logits/chosen": -2.8049440383911133, "logits/rejected": -2.001821279525757, "logps/chosen": -223.3571319580078, "logps/rejected": -1193.642333984375, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.6999590396881104, "rewards/margins": 9.816499710083008, "rewards/rejected": -11.516458511352539, "step": 29390 }, { "epoch": 0.35, "learning_rate": 4.0940267661903884e-06, "logits/chosen": -2.843993663787842, "logits/rejected": -2.1361379623413086, "logps/chosen": -190.2041778564453, "logps/rejected": -1131.8341064453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3574721813201904, "rewards/margins": 9.556501388549805, "rewards/rejected": -10.913973808288574, "step": 29400 }, { "epoch": 0.35, "learning_rate": 4.093221894279818e-06, "logits/chosen": -2.824552297592163, "logits/rejected": -2.048394203186035, "logps/chosen": -182.18592834472656, "logps/rejected": -1061.5794677734375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.2479549646377563, "rewards/margins": 8.97160816192627, "rewards/rejected": -10.219561576843262, "step": 29410 }, { "epoch": 0.35, "learning_rate": 4.0924167441971306e-06, "logits/chosen": -2.885866403579712, "logits/rejected": -2.2946348190307617, "logps/chosen": -144.3824920654297, "logps/rejected": -959.9755859375, "loss": 0.1175, "rewards/accuracies": 1.0, "rewards/chosen": -0.9347718358039856, "rewards/margins": 8.261850357055664, "rewards/rejected": -9.196622848510742, "step": 29420 }, { "epoch": 0.35, "learning_rate": 4.091611316082903e-06, "logits/chosen": -2.939368724822998, "logits/rejected": -2.4157235622406006, "logps/chosen": -138.88296508789062, "logps/rejected": -888.0155029296875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.8783615827560425, "rewards/margins": 7.605108737945557, "rewards/rejected": -8.48346996307373, "step": 29430 }, { "epoch": 0.35, "learning_rate": 4.090805610077759e-06, "logits/chosen": -2.8580403327941895, "logits/rejected": -2.2984070777893066, "logps/chosen": -165.2490692138672, "logps/rejected": -960.5631713867188, "loss": 0.0214, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1521011590957642, "rewards/margins": 8.054617881774902, "rewards/rejected": -9.206718444824219, "step": 29440 }, { "epoch": 0.35, "learning_rate": 4.089999626322375e-06, "logits/chosen": -2.81703782081604, "logits/rejected": -2.2479724884033203, "logps/chosen": -180.59375, "logps/rejected": -963.0877075195312, "loss": 0.1151, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.327979326248169, "rewards/margins": 7.93912410736084, "rewards/rejected": -9.26710319519043, "step": 29450 }, { "epoch": 0.35, "learning_rate": 4.0891933649574715e-06, "logits/chosen": -2.870882987976074, "logits/rejected": -2.3027305603027344, "logps/chosen": -165.65322875976562, "logps/rejected": -1038.325927734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1802842617034912, "rewards/margins": 8.802689552307129, "rewards/rejected": -9.982973098754883, "step": 29460 }, { "epoch": 0.35, "learning_rate": 4.088386826123819e-06, "logits/chosen": -2.8344407081604004, "logits/rejected": -2.138404369354248, "logps/chosen": -173.39749145507812, "logps/rejected": -1060.71240234375, "loss": 0.0991, "rewards/accuracies": 1.0, "rewards/chosen": -1.2721116542816162, "rewards/margins": 8.938047409057617, "rewards/rejected": -10.210158348083496, "step": 29470 }, { "epoch": 0.35, "learning_rate": 4.08758000996224e-06, "logits/chosen": -2.8698930740356445, "logits/rejected": -2.5284183025360107, "logps/chosen": -121.3070297241211, "logps/rejected": -893.4114379882812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8062708973884583, "rewards/margins": 7.748509407043457, "rewards/rejected": -8.554780960083008, "step": 29480 }, { "epoch": 0.35, "learning_rate": 4.0867729166136e-06, "logits/chosen": -2.8684182167053223, "logits/rejected": -2.4449658393859863, "logps/chosen": -125.50962829589844, "logps/rejected": -898.0447387695312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8372182846069336, "rewards/margins": 7.774387359619141, "rewards/rejected": -8.611604690551758, "step": 29490 }, { "epoch": 0.35, "learning_rate": 4.085965546218815e-06, "logits/chosen": -2.822066068649292, "logits/rejected": -2.2983462810516357, "logps/chosen": -151.4875946044922, "logps/rejected": -1006.2579956054688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1094664335250854, "rewards/margins": 8.574023246765137, "rewards/rejected": -9.683489799499512, "step": 29500 }, { "epoch": 0.35, "learning_rate": 4.085157898918853e-06, "logits/chosen": -2.8888511657714844, "logits/rejected": -2.6687188148498535, "logps/chosen": -86.85728454589844, "logps/rejected": -807.34716796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5050806403160095, "rewards/margins": 7.200032711029053, "rewards/rejected": -7.705112457275391, "step": 29510 }, { "epoch": 0.35, "learning_rate": 4.084349974854722e-06, "logits/chosen": -2.897824764251709, "logits/rejected": -2.2208499908447266, "logps/chosen": -165.59814453125, "logps/rejected": -993.2374267578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1504825353622437, "rewards/margins": 8.392698287963867, "rewards/rejected": -9.543182373046875, "step": 29520 }, { "epoch": 0.35, "learning_rate": 4.0835417741674864e-06, "logits/chosen": -2.8079171180725098, "logits/rejected": -2.078826904296875, "logps/chosen": -193.60154724121094, "logps/rejected": -1109.2816162109375, "loss": 0.0783, "rewards/accuracies": 1.0, "rewards/chosen": -1.4048571586608887, "rewards/margins": 9.275979995727539, "rewards/rejected": -10.680837631225586, "step": 29530 }, { "epoch": 0.35, "learning_rate": 4.082733296998255e-06, "logits/chosen": -2.8454196453094482, "logits/rejected": -2.359290361404419, "logps/chosen": -146.69952392578125, "logps/rejected": -967.2815551757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9794334173202515, "rewards/margins": 8.312887191772461, "rewards/rejected": -9.292320251464844, "step": 29540 }, { "epoch": 0.35, "learning_rate": 4.081924543488186e-06, "logits/chosen": -2.892617702484131, "logits/rejected": -2.526859998703003, "logps/chosen": -120.02659606933594, "logps/rejected": -965.8952026367188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8017982244491577, "rewards/margins": 8.480890274047852, "rewards/rejected": -9.282687187194824, "step": 29550 }, { "epoch": 0.35, "learning_rate": 4.0811155137784856e-06, "logits/chosen": -2.8576407432556152, "logits/rejected": -2.4925715923309326, "logps/chosen": -113.53163146972656, "logps/rejected": -904.17822265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7218992710113525, "rewards/margins": 7.940262794494629, "rewards/rejected": -8.662161827087402, "step": 29560 }, { "epoch": 0.35, "learning_rate": 4.080306208010407e-06, "logits/chosen": -2.8816044330596924, "logits/rejected": -2.630552291870117, "logps/chosen": -126.36405181884766, "logps/rejected": -778.1295166015625, "loss": 0.1679, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.887816309928894, "rewards/margins": 6.517928123474121, "rewards/rejected": -7.405745029449463, "step": 29570 }, { "epoch": 0.35, "learning_rate": 4.079496626325255e-06, "logits/chosen": -2.895969867706299, "logits/rejected": -2.2584664821624756, "logps/chosen": -133.95919799804688, "logps/rejected": -1031.455322265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8688009977340698, "rewards/margins": 9.038497924804688, "rewards/rejected": -9.90729808807373, "step": 29580 }, { "epoch": 0.35, "learning_rate": 4.0786867688643775e-06, "logits/chosen": -2.8319151401519775, "logits/rejected": -2.1146655082702637, "logps/chosen": -184.60470581054688, "logps/rejected": -1150.109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.255425214767456, "rewards/margins": 9.839686393737793, "rewards/rejected": -11.095109939575195, "step": 29590 }, { "epoch": 0.35, "learning_rate": 4.077876635769176e-06, "logits/chosen": -2.780376672744751, "logits/rejected": -2.026463270187378, "logps/chosen": -173.47015380859375, "logps/rejected": -1096.105224609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2191317081451416, "rewards/margins": 9.331770896911621, "rewards/rejected": -10.5509033203125, "step": 29600 }, { "epoch": 0.35, "learning_rate": 4.0770662271810955e-06, "logits/chosen": -2.807340145111084, "logits/rejected": -2.1317293643951416, "logps/chosen": -131.95401000976562, "logps/rejected": -1005.8328247070312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8873509168624878, "rewards/margins": 8.784729957580566, "rewards/rejected": -9.672080039978027, "step": 29610 }, { "epoch": 0.35, "learning_rate": 4.076255543241632e-06, "logits/chosen": -2.8412654399871826, "logits/rejected": -1.9252179861068726, "logps/chosen": -181.9255828857422, "logps/rejected": -1192.2022705078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2757713794708252, "rewards/margins": 10.239069938659668, "rewards/rejected": -11.514841079711914, "step": 29620 }, { "epoch": 0.35, "learning_rate": 4.075444584092328e-06, "logits/chosen": -2.8228564262390137, "logits/rejected": -2.2796199321746826, "logps/chosen": -156.2971954345703, "logps/rejected": -1035.99609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0639383792877197, "rewards/margins": 8.907224655151367, "rewards/rejected": -9.971163749694824, "step": 29630 }, { "epoch": 0.35, "learning_rate": 4.0746333498747755e-06, "logits/chosen": -2.85432767868042, "logits/rejected": -2.4175522327423096, "logps/chosen": -131.99713134765625, "logps/rejected": -1007.0396728515625, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.9103025197982788, "rewards/margins": 8.771295547485352, "rewards/rejected": -9.681596755981445, "step": 29640 }, { "epoch": 0.35, "learning_rate": 4.073821840730614e-06, "logits/chosen": -2.8676211833953857, "logits/rejected": -2.1855216026306152, "logps/chosen": -150.01731872558594, "logps/rejected": -1040.514892578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9374359250068665, "rewards/margins": 9.079261779785156, "rewards/rejected": -10.016698837280273, "step": 29650 }, { "epoch": 0.36, "learning_rate": 4.073010056801529e-06, "logits/chosen": -2.837362289428711, "logits/rejected": -2.40274715423584, "logps/chosen": -124.00028991699219, "logps/rejected": -856.2926025390625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7819658517837524, "rewards/margins": 7.4042205810546875, "rewards/rejected": -8.186185836791992, "step": 29660 }, { "epoch": 0.36, "learning_rate": 4.072197998229258e-06, "logits/chosen": -2.9039790630340576, "logits/rejected": -2.4674253463745117, "logps/chosen": -125.25175476074219, "logps/rejected": -887.2140502929688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7571123242378235, "rewards/margins": 7.738768577575684, "rewards/rejected": -8.495881080627441, "step": 29670 }, { "epoch": 0.36, "learning_rate": 4.0713856651555835e-06, "logits/chosen": -2.8481812477111816, "logits/rejected": -2.230123281478882, "logps/chosen": -152.0886688232422, "logps/rejected": -1057.1331787109375, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": -1.028011679649353, "rewards/margins": 9.151813507080078, "rewards/rejected": -10.179826736450195, "step": 29680 }, { "epoch": 0.36, "learning_rate": 4.070573057722336e-06, "logits/chosen": -2.876059055328369, "logits/rejected": -2.212639331817627, "logps/chosen": -145.35818481445312, "logps/rejected": -1100.344970703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9955331087112427, "rewards/margins": 9.617193222045898, "rewards/rejected": -10.612726211547852, "step": 29690 }, { "epoch": 0.36, "learning_rate": 4.069760176071394e-06, "logits/chosen": -2.885298252105713, "logits/rejected": -2.3022959232330322, "logps/chosen": -156.97532653808594, "logps/rejected": -951.2224731445312, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.0468336343765259, "rewards/margins": 8.072661399841309, "rewards/rejected": -9.11949634552002, "step": 29700 }, { "epoch": 0.36, "learning_rate": 4.068947020344685e-06, "logits/chosen": -2.8909037113189697, "logits/rejected": -2.362504482269287, "logps/chosen": -171.0702667236328, "logps/rejected": -933.9485473632812, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.2480626106262207, "rewards/margins": 7.701844215393066, "rewards/rejected": -8.949905395507812, "step": 29710 }, { "epoch": 0.36, "learning_rate": 4.068133590684184e-06, "logits/chosen": -2.825666904449463, "logits/rejected": -2.1272644996643066, "logps/chosen": -168.0935516357422, "logps/rejected": -1090.544921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1393238306045532, "rewards/margins": 9.3617582321167, "rewards/rejected": -10.501082420349121, "step": 29720 }, { "epoch": 0.36, "learning_rate": 4.067319887231914e-06, "logits/chosen": -2.822129249572754, "logits/rejected": -2.3174545764923096, "logps/chosen": -165.75950622558594, "logps/rejected": -1054.251220703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1899900436401367, "rewards/margins": 8.941305160522461, "rewards/rejected": -10.131296157836914, "step": 29730 }, { "epoch": 0.36, "learning_rate": 4.066505910129945e-06, "logits/chosen": -2.9005284309387207, "logits/rejected": -2.6490931510925293, "logps/chosen": -114.39341735839844, "logps/rejected": -824.7652587890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7749820947647095, "rewards/margins": 7.108321189880371, "rewards/rejected": -7.883303642272949, "step": 29740 }, { "epoch": 0.36, "learning_rate": 4.0656916595203936e-06, "logits/chosen": -2.801839828491211, "logits/rejected": -2.324518918991089, "logps/chosen": -130.0945587158203, "logps/rejected": -914.3814697265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8547048568725586, "rewards/margins": 7.90444278717041, "rewards/rejected": -8.759147644042969, "step": 29750 }, { "epoch": 0.36, "learning_rate": 4.064877135545428e-06, "logits/chosen": -2.8694214820861816, "logits/rejected": -2.4119808673858643, "logps/chosen": -128.85342407226562, "logps/rejected": -918.1222534179688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8589984178543091, "rewards/margins": 7.9315595626831055, "rewards/rejected": -8.790557861328125, "step": 29760 }, { "epoch": 0.36, "learning_rate": 4.06406233834726e-06, "logits/chosen": -2.862574577331543, "logits/rejected": -2.5204215049743652, "logps/chosen": -128.7662353515625, "logps/rejected": -958.41943359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8404890298843384, "rewards/margins": 8.349997520446777, "rewards/rejected": -9.190485954284668, "step": 29770 }, { "epoch": 0.36, "learning_rate": 4.063247268068152e-06, "logits/chosen": -2.8685364723205566, "logits/rejected": -2.4339919090270996, "logps/chosen": -178.63316345214844, "logps/rejected": -1043.838134765625, "loss": 0.2462, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2807539701461792, "rewards/margins": 8.755585670471191, "rewards/rejected": -10.03633975982666, "step": 29780 }, { "epoch": 0.36, "learning_rate": 4.062431924850413e-06, "logits/chosen": -2.8446054458618164, "logits/rejected": -2.2996017932891846, "logps/chosen": -156.37045288085938, "logps/rejected": -892.0989990234375, "loss": 0.1083, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.124462366104126, "rewards/margins": 7.408804416656494, "rewards/rejected": -8.533266067504883, "step": 29790 }, { "epoch": 0.36, "learning_rate": 4.061616308836399e-06, "logits/chosen": -2.851302146911621, "logits/rejected": -2.2109177112579346, "logps/chosen": -142.1667938232422, "logps/rejected": -1031.039794921875, "loss": 0.0857, "rewards/accuracies": 1.0, "rewards/chosen": -0.9282752871513367, "rewards/margins": 8.993257522583008, "rewards/rejected": -9.921533584594727, "step": 29800 }, { "epoch": 0.36, "learning_rate": 4.060800420168515e-06, "logits/chosen": -2.824826717376709, "logits/rejected": -2.368544101715088, "logps/chosen": -122.56956481933594, "logps/rejected": -870.4593505859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8219034075737, "rewards/margins": 7.515493869781494, "rewards/rejected": -8.337396621704102, "step": 29810 }, { "epoch": 0.36, "learning_rate": 4.059984258989212e-06, "logits/chosen": -2.871730089187622, "logits/rejected": -2.286072254180908, "logps/chosen": -156.73348999023438, "logps/rejected": -1106.726806640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0949610471725464, "rewards/margins": 9.573282241821289, "rewards/rejected": -10.668244361877441, "step": 29820 }, { "epoch": 0.36, "learning_rate": 4.0591678254409904e-06, "logits/chosen": -2.831454038619995, "logits/rejected": -2.2513575553894043, "logps/chosen": -143.8210906982422, "logps/rejected": -1056.454833984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9204400777816772, "rewards/margins": 9.256158828735352, "rewards/rejected": -10.176597595214844, "step": 29830 }, { "epoch": 0.36, "learning_rate": 4.0583511196663975e-06, "logits/chosen": -2.8893375396728516, "logits/rejected": -2.5119290351867676, "logps/chosen": -128.0638885498047, "logps/rejected": -864.8585205078125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8632670640945435, "rewards/margins": 7.4212517738342285, "rewards/rejected": -8.28451919555664, "step": 29840 }, { "epoch": 0.36, "learning_rate": 4.057534141808027e-06, "logits/chosen": -2.86077880859375, "logits/rejected": -2.204646587371826, "logps/chosen": -171.31063842773438, "logps/rejected": -1040.275634765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.172881007194519, "rewards/margins": 8.841414451599121, "rewards/rejected": -10.01429557800293, "step": 29850 }, { "epoch": 0.36, "learning_rate": 4.056716892008521e-06, "logits/chosen": -2.872765302658081, "logits/rejected": -2.263606309890747, "logps/chosen": -161.31503295898438, "logps/rejected": -1122.9427490234375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.1480438709259033, "rewards/margins": 9.672999382019043, "rewards/rejected": -10.821043968200684, "step": 29860 }, { "epoch": 0.36, "learning_rate": 4.055899370410568e-06, "logits/chosen": -2.8437561988830566, "logits/rejected": -2.2989609241485596, "logps/chosen": -158.09884643554688, "logps/rejected": -896.5133056640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1236212253570557, "rewards/margins": 7.465950012207031, "rewards/rejected": -8.589570045471191, "step": 29870 }, { "epoch": 0.36, "learning_rate": 4.055081577156908e-06, "logits/chosen": -2.845470905303955, "logits/rejected": -2.2220911979675293, "logps/chosen": -212.0629425048828, "logps/rejected": -1025.5166015625, "loss": 0.1306, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.583808183670044, "rewards/margins": 8.282888412475586, "rewards/rejected": -9.866697311401367, "step": 29880 }, { "epoch": 0.36, "learning_rate": 4.054263512390323e-06, "logits/chosen": -2.8299012184143066, "logits/rejected": -2.2310473918914795, "logps/chosen": -178.69674682617188, "logps/rejected": -1086.5770263671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.292299509048462, "rewards/margins": 9.198907852172852, "rewards/rejected": -10.491209030151367, "step": 29890 }, { "epoch": 0.36, "learning_rate": 4.053445176253645e-06, "logits/chosen": -2.794926881790161, "logits/rejected": -2.048229694366455, "logps/chosen": -164.5497283935547, "logps/rejected": -1125.19189453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1883447170257568, "rewards/margins": 9.666925430297852, "rewards/rejected": -10.855270385742188, "step": 29900 }, { "epoch": 0.36, "learning_rate": 4.0526265688897545e-06, "logits/chosen": -2.864830732345581, "logits/rejected": -2.480891466140747, "logps/chosen": -174.2861328125, "logps/rejected": -940.5968627929688, "loss": 0.0919, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2937662601470947, "rewards/margins": 7.733537197113037, "rewards/rejected": -9.027303695678711, "step": 29910 }, { "epoch": 0.36, "learning_rate": 4.051807690441577e-06, "logits/chosen": -2.833083391189575, "logits/rejected": -2.0227084159851074, "logps/chosen": -192.242431640625, "logps/rejected": -1087.542236328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.340015172958374, "rewards/margins": 9.15163516998291, "rewards/rejected": -10.491650581359863, "step": 29920 }, { "epoch": 0.36, "learning_rate": 4.050988541052088e-06, "logits/chosen": -2.8035247325897217, "logits/rejected": -2.081386089324951, "logps/chosen": -184.62294006347656, "logps/rejected": -1133.123291015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2883249521255493, "rewards/margins": 9.648985862731934, "rewards/rejected": -10.937311172485352, "step": 29930 }, { "epoch": 0.36, "learning_rate": 4.050169120864306e-06, "logits/chosen": -2.815208673477173, "logits/rejected": -2.1648898124694824, "logps/chosen": -176.13778686523438, "logps/rejected": -1106.693603515625, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -1.2427315711975098, "rewards/margins": 9.43492603302002, "rewards/rejected": -10.677656173706055, "step": 29940 }, { "epoch": 0.36, "learning_rate": 4.049349430021301e-06, "logits/chosen": -2.812713146209717, "logits/rejected": -2.356606960296631, "logps/chosen": -141.83078002929688, "logps/rejected": -1022.6107177734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9825863838195801, "rewards/margins": 8.858789443969727, "rewards/rejected": -9.841375350952148, "step": 29950 }, { "epoch": 0.36, "learning_rate": 4.048529468666189e-06, "logits/chosen": -2.8450710773468018, "logits/rejected": -2.2153491973876953, "logps/chosen": -176.089111328125, "logps/rejected": -1034.356689453125, "loss": 0.133, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3214352130889893, "rewards/margins": 8.637155532836914, "rewards/rejected": -9.95859146118164, "step": 29960 }, { "epoch": 0.36, "learning_rate": 4.047709236942132e-06, "logits/chosen": -2.857919216156006, "logits/rejected": -2.4319498538970947, "logps/chosen": -135.45700073242188, "logps/rejected": -934.0960693359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9574702382087708, "rewards/margins": 8.003863334655762, "rewards/rejected": -8.961334228515625, "step": 29970 }, { "epoch": 0.36, "learning_rate": 4.046888734992342e-06, "logits/chosen": -2.8122501373291016, "logits/rejected": -2.0965957641601562, "logps/chosen": -161.46058654785156, "logps/rejected": -1115.9398193359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.130631685256958, "rewards/margins": 9.631868362426758, "rewards/rejected": -10.76249885559082, "step": 29980 }, { "epoch": 0.36, "learning_rate": 4.046067962960074e-06, "logits/chosen": -2.8040640354156494, "logits/rejected": -2.414027690887451, "logps/chosen": -193.27464294433594, "logps/rejected": -855.6357421875, "loss": 0.2324, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5246741771697998, "rewards/margins": 6.649544715881348, "rewards/rejected": -8.174219131469727, "step": 29990 }, { "epoch": 0.36, "learning_rate": 4.045246920988634e-06, "logits/chosen": -2.871974468231201, "logits/rejected": -2.5249505043029785, "logps/chosen": -101.83731079101562, "logps/rejected": -895.4842529296875, "loss": 0.1404, "rewards/accuracies": 1.0, "rewards/chosen": -0.6521726846694946, "rewards/margins": 7.921012878417969, "rewards/rejected": -8.573185920715332, "step": 30000 }, { "epoch": 0.36, "eval_logits/chosen": -2.8352091312408447, "eval_logits/rejected": -1.5906795263290405, "eval_logps/chosen": -310.8155212402344, "eval_logps/rejected": -1287.7384033203125, "eval_loss": 0.0001787526998668909, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -2.4963526725769043, "eval_rewards/margins": 9.913776397705078, "eval_rewards/rejected": -12.41012954711914, "eval_runtime": 1.2165, "eval_samples_per_second": 4.11, "eval_steps_per_second": 2.466, "step": 30000 }, { "epoch": 0.36, "learning_rate": 4.044425609221374e-06, "logits/chosen": -2.9259371757507324, "logits/rejected": -2.550546169281006, "logps/chosen": -128.18606567382812, "logps/rejected": -888.8206787109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8325536847114563, "rewards/margins": 7.651612281799316, "rewards/rejected": -8.484167098999023, "step": 30010 }, { "epoch": 0.36, "learning_rate": 4.043604027801691e-06, "logits/chosen": -2.753920555114746, "logits/rejected": -1.7189184427261353, "logps/chosen": -208.04977416992188, "logps/rejected": -1263.915283203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.451521635055542, "rewards/margins": 10.773628234863281, "rewards/rejected": -12.225150108337402, "step": 30020 }, { "epoch": 0.36, "learning_rate": 4.042782176873033e-06, "logits/chosen": -2.8231890201568604, "logits/rejected": -2.2560806274414062, "logps/chosen": -179.4003143310547, "logps/rejected": -954.8126831054688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3098514080047607, "rewards/margins": 7.8618364334106445, "rewards/rejected": -9.171688079833984, "step": 30030 }, { "epoch": 0.36, "learning_rate": 4.041960056578891e-06, "logits/chosen": -2.900726795196533, "logits/rejected": -2.568756580352783, "logps/chosen": -125.01881408691406, "logps/rejected": -990.6234130859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8436461687088013, "rewards/margins": 8.678909301757812, "rewards/rejected": -9.52255630493164, "step": 30040 }, { "epoch": 0.36, "learning_rate": 4.041137667062806e-06, "logits/chosen": -2.8032214641571045, "logits/rejected": -2.2447614669799805, "logps/chosen": -147.16627502441406, "logps/rejected": -1067.23291015625, "loss": 0.1699, "rewards/accuracies": 1.0, "rewards/chosen": -1.0270965099334717, "rewards/margins": 9.26608943939209, "rewards/rejected": -10.29318618774414, "step": 30050 }, { "epoch": 0.36, "learning_rate": 4.0403150084683655e-06, "logits/chosen": -2.867271900177002, "logits/rejected": -2.272611141204834, "logps/chosen": -149.45297241210938, "logps/rejected": -1029.833984375, "loss": 0.0248, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9351451992988586, "rewards/margins": 8.969160079956055, "rewards/rejected": -9.904305458068848, "step": 30060 }, { "epoch": 0.36, "learning_rate": 4.039492080939201e-06, "logits/chosen": -2.898414134979248, "logits/rejected": -2.200148105621338, "logps/chosen": -132.6900177001953, "logps/rejected": -881.7589721679688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8434394001960754, "rewards/margins": 7.587389945983887, "rewards/rejected": -8.430829048156738, "step": 30070 }, { "epoch": 0.36, "learning_rate": 4.038668884618996e-06, "logits/chosen": -2.9142253398895264, "logits/rejected": -2.368577003479004, "logps/chosen": -149.5153045654297, "logps/rejected": -937.0634765625, "loss": 0.1079, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0535250902175903, "rewards/margins": 7.927011966705322, "rewards/rejected": -8.980538368225098, "step": 30080 }, { "epoch": 0.36, "learning_rate": 4.037845419651477e-06, "logits/chosen": -2.871846914291382, "logits/rejected": -2.1374104022979736, "logps/chosen": -148.74002075195312, "logps/rejected": -975.4763793945312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9444762468338013, "rewards/margins": 8.414827346801758, "rewards/rejected": -9.359304428100586, "step": 30090 }, { "epoch": 0.36, "learning_rate": 4.037021686180419e-06, "logits/chosen": -2.8504977226257324, "logits/rejected": -2.132707118988037, "logps/chosen": -146.66244506835938, "logps/rejected": -1074.5931396484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.935197651386261, "rewards/margins": 9.415977478027344, "rewards/rejected": -10.351176261901855, "step": 30100 }, { "epoch": 0.36, "learning_rate": 4.0361976843496426e-06, "logits/chosen": -2.8483150005340576, "logits/rejected": -2.345158338546753, "logps/chosen": -134.114990234375, "logps/rejected": -1029.246337890625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8933641314506531, "rewards/margins": 8.990220069885254, "rewards/rejected": -9.883584976196289, "step": 30110 }, { "epoch": 0.36, "learning_rate": 4.035373414303017e-06, "logits/chosen": -2.836071729660034, "logits/rejected": -2.130971908569336, "logps/chosen": -194.55349731445312, "logps/rejected": -1071.1640625, "loss": 0.1161, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4920240640640259, "rewards/margins": 8.815710067749023, "rewards/rejected": -10.307733535766602, "step": 30120 }, { "epoch": 0.36, "learning_rate": 4.034548876184459e-06, "logits/chosen": -2.8517160415649414, "logits/rejected": -2.4001505374908447, "logps/chosen": -135.84127807617188, "logps/rejected": -932.4035034179688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8669630289077759, "rewards/margins": 8.074654579162598, "rewards/rejected": -8.941617965698242, "step": 30130 }, { "epoch": 0.36, "learning_rate": 4.033724070137929e-06, "logits/chosen": -2.859647035598755, "logits/rejected": -2.3495118618011475, "logps/chosen": -137.4516143798828, "logps/rejected": -1018.52197265625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.8628755807876587, "rewards/margins": 8.93474292755127, "rewards/rejected": -9.797618865966797, "step": 30140 }, { "epoch": 0.36, "learning_rate": 4.032898996307436e-06, "logits/chosen": -2.8479809761047363, "logits/rejected": -2.3413243293762207, "logps/chosen": -158.77896118164062, "logps/rejected": -965.5379028320312, "loss": 0.1227, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1209228038787842, "rewards/margins": 8.15034294128418, "rewards/rejected": -9.271265029907227, "step": 30150 }, { "epoch": 0.36, "learning_rate": 4.032073654837036e-06, "logits/chosen": -2.88735032081604, "logits/rejected": -2.2819292545318604, "logps/chosen": -153.53375244140625, "logps/rejected": -1071.405517578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0735533237457275, "rewards/margins": 9.247358322143555, "rewards/rejected": -10.32091236114502, "step": 30160 }, { "epoch": 0.36, "learning_rate": 4.03124804587083e-06, "logits/chosen": -2.8382022380828857, "logits/rejected": -2.304965019226074, "logps/chosen": -154.55801391601562, "logps/rejected": -1058.116455078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1090307235717773, "rewards/margins": 9.0833101272583, "rewards/rejected": -10.192339897155762, "step": 30170 }, { "epoch": 0.36, "learning_rate": 4.0304221695529675e-06, "logits/chosen": -2.8231539726257324, "logits/rejected": -2.3901379108428955, "logps/chosen": -126.0860595703125, "logps/rejected": -921.8268432617188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8559178113937378, "rewards/margins": 7.990964412689209, "rewards/rejected": -8.846881866455078, "step": 30180 }, { "epoch": 0.36, "learning_rate": 4.0295960260276464e-06, "logits/chosen": -2.7761495113372803, "logits/rejected": -2.072286605834961, "logps/chosen": -200.7161102294922, "logps/rejected": -1208.3509521484375, "loss": 0.1026, "rewards/accuracies": 1.0, "rewards/chosen": -1.5093291997909546, "rewards/margins": 10.185949325561523, "rewards/rejected": -11.695279121398926, "step": 30190 }, { "epoch": 0.36, "learning_rate": 4.028769615439107e-06, "logits/chosen": -2.8533120155334473, "logits/rejected": -2.308107852935791, "logps/chosen": -173.81463623046875, "logps/rejected": -1041.5673828125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2875462770462036, "rewards/margins": 8.724843978881836, "rewards/rejected": -10.012392044067383, "step": 30200 }, { "epoch": 0.36, "learning_rate": 4.027942937931638e-06, "logits/chosen": -2.8176915645599365, "logits/rejected": -2.2619142532348633, "logps/chosen": -178.79750061035156, "logps/rejected": -1102.5042724609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.330484390258789, "rewards/margins": 9.308137893676758, "rewards/rejected": -10.638622283935547, "step": 30210 }, { "epoch": 0.36, "learning_rate": 4.027115993649577e-06, "logits/chosen": -2.902130603790283, "logits/rejected": -2.3732619285583496, "logps/chosen": -163.93167114257812, "logps/rejected": -1032.802001953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1512588262557983, "rewards/margins": 8.776993751525879, "rewards/rejected": -9.928253173828125, "step": 30220 }, { "epoch": 0.36, "learning_rate": 4.026288782737304e-06, "logits/chosen": -2.8667285442352295, "logits/rejected": -2.1751022338867188, "logps/chosen": -182.30699157714844, "logps/rejected": -1119.503173828125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3758416175842285, "rewards/margins": 9.434327125549316, "rewards/rejected": -10.81016731262207, "step": 30230 }, { "epoch": 0.36, "learning_rate": 4.025461305339249e-06, "logits/chosen": -2.8435847759246826, "logits/rejected": -2.51409912109375, "logps/chosen": -111.38932800292969, "logps/rejected": -857.3543090820312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7057012915611267, "rewards/margins": 7.491950988769531, "rewards/rejected": -8.197651863098145, "step": 30240 }, { "epoch": 0.36, "learning_rate": 4.024633561599886e-06, "logits/chosen": -2.867187023162842, "logits/rejected": -2.293466567993164, "logps/chosen": -174.25656127929688, "logps/rejected": -1106.974365234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.2643754482269287, "rewards/margins": 9.406652450561523, "rewards/rejected": -10.671028137207031, "step": 30250 }, { "epoch": 0.36, "learning_rate": 4.023805551663738e-06, "logits/chosen": -2.8152012825012207, "logits/rejected": -2.2548012733459473, "logps/chosen": -142.0057830810547, "logps/rejected": -984.7369384765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9778871536254883, "rewards/margins": 8.486311912536621, "rewards/rejected": -9.46419906616211, "step": 30260 }, { "epoch": 0.36, "learning_rate": 4.022977275675373e-06, "logits/chosen": -2.8896484375, "logits/rejected": -2.327854871749878, "logps/chosen": -210.94577026367188, "logps/rejected": -919.3162841796875, "loss": 0.1032, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6060940027236938, "rewards/margins": 7.195218086242676, "rewards/rejected": -8.801312446594238, "step": 30270 }, { "epoch": 0.36, "learning_rate": 4.022148733779406e-06, "logits/chosen": -2.801754951477051, "logits/rejected": -2.1517624855041504, "logps/chosen": -177.81234741210938, "logps/rejected": -1042.334228515625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2911096811294556, "rewards/margins": 8.731073379516602, "rewards/rejected": -10.022183418273926, "step": 30280 }, { "epoch": 0.36, "learning_rate": 4.021319926120497e-06, "logits/chosen": -2.899669647216797, "logits/rejected": -2.094867467880249, "logps/chosen": -180.48068237304688, "logps/rejected": -1043.9251708984375, "loss": 0.1215, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.328393816947937, "rewards/margins": 8.717866897583008, "rewards/rejected": -10.046260833740234, "step": 30290 }, { "epoch": 0.36, "learning_rate": 4.020490852843355e-06, "logits/chosen": -2.810110330581665, "logits/rejected": -2.057389736175537, "logps/chosen": -202.98294067382812, "logps/rejected": -1237.483154296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5364611148834229, "rewards/margins": 10.435853958129883, "rewards/rejected": -11.972312927246094, "step": 30300 }, { "epoch": 0.36, "learning_rate": 4.019661514092733e-06, "logits/chosen": -2.84177565574646, "logits/rejected": -2.2284514904022217, "logps/chosen": -156.76309204101562, "logps/rejected": -1153.600341796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.066187858581543, "rewards/margins": 10.060620307922363, "rewards/rejected": -11.126809120178223, "step": 30310 }, { "epoch": 0.36, "learning_rate": 4.01883191001343e-06, "logits/chosen": -2.841604232788086, "logits/rejected": -2.3358559608459473, "logps/chosen": -147.26458740234375, "logps/rejected": -1034.0418701171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.9957646131515503, "rewards/margins": 8.942593574523926, "rewards/rejected": -9.93835735321045, "step": 30320 }, { "epoch": 0.36, "learning_rate": 4.018002040750295e-06, "logits/chosen": -2.873779058456421, "logits/rejected": -2.415173053741455, "logps/chosen": -141.77133178710938, "logps/rejected": -888.8797607421875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9764531254768372, "rewards/margins": 7.53563928604126, "rewards/rejected": -8.512093544006348, "step": 30330 }, { "epoch": 0.36, "learning_rate": 4.017171906448219e-06, "logits/chosen": -2.8913159370422363, "logits/rejected": -2.4345858097076416, "logps/chosen": -153.81011962890625, "logps/rejected": -909.9236450195312, "loss": 0.1081, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1247153282165527, "rewards/margins": 7.602403163909912, "rewards/rejected": -8.727119445800781, "step": 30340 }, { "epoch": 0.36, "learning_rate": 4.016341507252143e-06, "logits/chosen": -2.871549129486084, "logits/rejected": -2.387918472290039, "logps/chosen": -144.69125366210938, "logps/rejected": -895.0755004882812, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.980684757232666, "rewards/margins": 7.59298038482666, "rewards/rejected": -8.573663711547852, "step": 30350 }, { "epoch": 0.36, "learning_rate": 4.015510843307051e-06, "logits/chosen": -2.8399548530578613, "logits/rejected": -2.4825825691223145, "logps/chosen": -139.37989807128906, "logps/rejected": -959.5363159179688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9772016406059265, "rewards/margins": 8.245668411254883, "rewards/rejected": -9.222870826721191, "step": 30360 }, { "epoch": 0.36, "learning_rate": 4.014679914757974e-06, "logits/chosen": -2.837554454803467, "logits/rejected": -1.978632926940918, "logps/chosen": -199.11708068847656, "logps/rejected": -1099.0950927734375, "loss": 0.0289, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.490200161933899, "rewards/margins": 9.109952926635742, "rewards/rejected": -10.600152969360352, "step": 30370 }, { "epoch": 0.36, "learning_rate": 4.013848721749992e-06, "logits/chosen": -2.8729944229125977, "logits/rejected": -2.1999754905700684, "logps/chosen": -141.1526336669922, "logps/rejected": -1018.5535888671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9242919683456421, "rewards/margins": 8.850942611694336, "rewards/rejected": -9.775235176086426, "step": 30380 }, { "epoch": 0.36, "learning_rate": 4.013017264428227e-06, "logits/chosen": -2.8635032176971436, "logits/rejected": -2.2148308753967285, "logps/chosen": -170.3861083984375, "logps/rejected": -1181.774658203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.182145595550537, "rewards/margins": 10.23646354675293, "rewards/rejected": -11.418608665466309, "step": 30390 }, { "epoch": 0.36, "learning_rate": 4.01218554293785e-06, "logits/chosen": -2.816784381866455, "logits/rejected": -2.141448497772217, "logps/chosen": -191.5861358642578, "logps/rejected": -1131.976806640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3469007015228271, "rewards/margins": 9.576367378234863, "rewards/rejected": -10.923269271850586, "step": 30400 }, { "epoch": 0.36, "learning_rate": 4.011353557424077e-06, "logits/chosen": -2.786952018737793, "logits/rejected": -2.0631296634674072, "logps/chosen": -176.47592163085938, "logps/rejected": -1142.5435791015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2104551792144775, "rewards/margins": 9.814736366271973, "rewards/rejected": -11.025192260742188, "step": 30410 }, { "epoch": 0.36, "learning_rate": 4.01052130803217e-06, "logits/chosen": -2.8522660732269287, "logits/rejected": -2.3172640800476074, "logps/chosen": -144.8547821044922, "logps/rejected": -1044.4951171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9708884954452515, "rewards/margins": 9.084632873535156, "rewards/rejected": -10.055521011352539, "step": 30420 }, { "epoch": 0.36, "learning_rate": 4.009688794907439e-06, "logits/chosen": -2.8346240520477295, "logits/rejected": -2.1691067218780518, "logps/chosen": -180.32420349121094, "logps/rejected": -1071.731689453125, "loss": 0.1351, "rewards/accuracies": 1.0, "rewards/chosen": -1.288600206375122, "rewards/margins": 9.032957077026367, "rewards/rejected": -10.321557998657227, "step": 30430 }, { "epoch": 0.36, "learning_rate": 4.008856018195237e-06, "logits/chosen": -2.849053144454956, "logits/rejected": -2.538076877593994, "logps/chosen": -113.54496765136719, "logps/rejected": -937.1048583984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7423156499862671, "rewards/margins": 8.243245124816895, "rewards/rejected": -8.985560417175293, "step": 30440 }, { "epoch": 0.36, "learning_rate": 4.008022978040965e-06, "logits/chosen": -2.8547329902648926, "logits/rejected": -2.2821455001831055, "logps/chosen": -173.59156799316406, "logps/rejected": -966.3402099609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.286773443222046, "rewards/margins": 8.003499984741211, "rewards/rejected": -9.290273666381836, "step": 30450 }, { "epoch": 0.36, "learning_rate": 4.0071896745900685e-06, "logits/chosen": -2.8153786659240723, "logits/rejected": -2.062469959259033, "logps/chosen": -185.6283416748047, "logps/rejected": -1131.62646484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.272116780281067, "rewards/margins": 9.630932807922363, "rewards/rejected": -10.903048515319824, "step": 30460 }, { "epoch": 0.36, "learning_rate": 4.006356107988042e-06, "logits/chosen": -2.8635334968566895, "logits/rejected": -2.443896532058716, "logps/chosen": -125.19035339355469, "logps/rejected": -1040.807373046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.825281023979187, "rewards/margins": 9.198504447937012, "rewards/rejected": -10.023784637451172, "step": 30470 }, { "epoch": 0.36, "learning_rate": 4.005522278380422e-06, "logits/chosen": -2.852186679840088, "logits/rejected": -2.5338644981384277, "logps/chosen": -96.07049560546875, "logps/rejected": -833.4015502929688, "loss": 0.171, "rewards/accuracies": 1.0, "rewards/chosen": -0.5878051519393921, "rewards/margins": 7.384461879730225, "rewards/rejected": -7.972267150878906, "step": 30480 }, { "epoch": 0.36, "learning_rate": 4.004688185912794e-06, "logits/chosen": -2.8572123050689697, "logits/rejected": -2.1151938438415527, "logps/chosen": -147.31680297851562, "logps/rejected": -1154.948486328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9113383293151855, "rewards/margins": 10.230451583862305, "rewards/rejected": -11.141788482666016, "step": 30490 }, { "epoch": 0.37, "learning_rate": 4.003853830730787e-06, "logits/chosen": -2.8931832313537598, "logits/rejected": -2.3908469676971436, "logps/chosen": -113.962646484375, "logps/rejected": -953.8309326171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6826528310775757, "rewards/margins": 8.47120475769043, "rewards/rejected": -9.153857231140137, "step": 30500 }, { "epoch": 0.37, "learning_rate": 4.003019212980079e-06, "logits/chosen": -2.848581314086914, "logits/rejected": -2.1698861122131348, "logps/chosen": -157.8196563720703, "logps/rejected": -1005.8240966796875, "loss": 0.0584, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9600106477737427, "rewards/margins": 8.692405700683594, "rewards/rejected": -9.652416229248047, "step": 30510 }, { "epoch": 0.37, "learning_rate": 4.0021843328063905e-06, "logits/chosen": -2.839613676071167, "logits/rejected": -2.331594944000244, "logps/chosen": -124.52339172363281, "logps/rejected": -1004.5294799804688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7638401389122009, "rewards/margins": 8.883334159851074, "rewards/rejected": -9.647174835205078, "step": 30520 }, { "epoch": 0.37, "learning_rate": 4.0013491903554905e-06, "logits/chosen": -2.859361171722412, "logits/rejected": -2.447200298309326, "logps/chosen": -117.02315521240234, "logps/rejected": -915.6246948242188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7073370814323425, "rewards/margins": 8.075948715209961, "rewards/rejected": -8.783286094665527, "step": 30530 }, { "epoch": 0.37, "learning_rate": 4.00051378577319e-06, "logits/chosen": -2.925366163253784, "logits/rejected": -2.5120551586151123, "logps/chosen": -146.33419799804688, "logps/rejected": -950.5572509765625, "loss": 0.2921, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0224101543426514, "rewards/margins": 8.101585388183594, "rewards/rejected": -9.123994827270508, "step": 30540 }, { "epoch": 0.37, "learning_rate": 3.99967811920535e-06, "logits/chosen": -2.856778621673584, "logits/rejected": -2.0334649085998535, "logps/chosen": -159.86251831054688, "logps/rejected": -1175.247314453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9611592292785645, "rewards/margins": 10.353971481323242, "rewards/rejected": -11.315130233764648, "step": 30550 }, { "epoch": 0.37, "learning_rate": 3.998842190797877e-06, "logits/chosen": -2.8244919776916504, "logits/rejected": -2.3814303874969482, "logps/chosen": -115.23655700683594, "logps/rejected": -845.70703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.727310299873352, "rewards/margins": 7.355551242828369, "rewards/rejected": -8.08286190032959, "step": 30560 }, { "epoch": 0.37, "learning_rate": 3.9980060006967204e-06, "logits/chosen": -2.8433432579040527, "logits/rejected": -2.215257406234741, "logps/chosen": -134.6202850341797, "logps/rejected": -1066.0634765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8161349296569824, "rewards/margins": 9.445867538452148, "rewards/rejected": -10.262002944946289, "step": 30570 }, { "epoch": 0.37, "learning_rate": 3.997169549047876e-06, "logits/chosen": -2.8943073749542236, "logits/rejected": -2.451348066329956, "logps/chosen": -133.084716796875, "logps/rejected": -979.708984375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.8744800686836243, "rewards/margins": 8.52291488647461, "rewards/rejected": -9.397394180297852, "step": 30580 }, { "epoch": 0.37, "learning_rate": 3.996332835997386e-06, "logits/chosen": -2.8946499824523926, "logits/rejected": -2.373845100402832, "logps/chosen": -127.54109191894531, "logps/rejected": -988.3406372070312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7799742221832275, "rewards/margins": 8.688294410705566, "rewards/rejected": -9.468269348144531, "step": 30590 }, { "epoch": 0.37, "learning_rate": 3.995495861691338e-06, "logits/chosen": -2.844564914703369, "logits/rejected": -2.397561550140381, "logps/chosen": -117.9669418334961, "logps/rejected": -906.4486083984375, "loss": 0.1345, "rewards/accuracies": 1.0, "rewards/chosen": -0.6982757449150085, "rewards/margins": 7.9913010597229, "rewards/rejected": -8.689576148986816, "step": 30600 }, { "epoch": 0.37, "learning_rate": 3.994658626275868e-06, "logits/chosen": -2.8565285205841064, "logits/rejected": -2.4288437366485596, "logps/chosen": -110.15576171875, "logps/rejected": -873.0792236328125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6507124304771423, "rewards/margins": 7.70599889755249, "rewards/rejected": -8.356710433959961, "step": 30610 }, { "epoch": 0.37, "learning_rate": 3.993821129897153e-06, "logits/chosen": -2.8856301307678223, "logits/rejected": -2.4381766319274902, "logps/chosen": -122.37188720703125, "logps/rejected": -1009.0875244140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7436491250991821, "rewards/margins": 8.956830024719238, "rewards/rejected": -9.700479507446289, "step": 30620 }, { "epoch": 0.37, "learning_rate": 3.992983372701417e-06, "logits/chosen": -2.872562885284424, "logits/rejected": -2.296952724456787, "logps/chosen": -134.31434631347656, "logps/rejected": -1114.777099609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8779300451278687, "rewards/margins": 9.8651123046875, "rewards/rejected": -10.7430419921875, "step": 30630 }, { "epoch": 0.37, "learning_rate": 3.992145354834931e-06, "logits/chosen": -2.8703744411468506, "logits/rejected": -2.402758836746216, "logps/chosen": -147.95114135742188, "logps/rejected": -998.2960815429688, "loss": 0.1345, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.024452567100525, "rewards/margins": 8.58251667022705, "rewards/rejected": -9.606968879699707, "step": 30640 }, { "epoch": 0.37, "learning_rate": 3.99130707644401e-06, "logits/chosen": -2.9002978801727295, "logits/rejected": -2.526418685913086, "logps/chosen": -104.10733795166016, "logps/rejected": -947.75244140625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5974917411804199, "rewards/margins": 8.50369930267334, "rewards/rejected": -9.101190567016602, "step": 30650 }, { "epoch": 0.37, "learning_rate": 3.990468537675015e-06, "logits/chosen": -2.8451483249664307, "logits/rejected": -2.1518077850341797, "logps/chosen": -158.87771606445312, "logps/rejected": -1139.90234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0357844829559326, "rewards/margins": 9.970184326171875, "rewards/rejected": -11.00596809387207, "step": 30660 }, { "epoch": 0.37, "learning_rate": 3.989629738674353e-06, "logits/chosen": -2.819547653198242, "logits/rejected": -2.0287792682647705, "logps/chosen": -150.62570190429688, "logps/rejected": -1082.5853271484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9675824046134949, "rewards/margins": 9.451704025268555, "rewards/rejected": -10.419286727905273, "step": 30670 }, { "epoch": 0.37, "learning_rate": 3.9887906795884766e-06, "logits/chosen": -2.8962855339050293, "logits/rejected": -2.426668167114258, "logps/chosen": -128.80674743652344, "logps/rejected": -1040.0250244140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8208327293395996, "rewards/margins": 9.173076629638672, "rewards/rejected": -9.99390983581543, "step": 30680 }, { "epoch": 0.37, "learning_rate": 3.987951360563882e-06, "logits/chosen": -2.882380962371826, "logits/rejected": -2.3665592670440674, "logps/chosen": -117.88338470458984, "logps/rejected": -944.98876953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7208682894706726, "rewards/margins": 8.341985702514648, "rewards/rejected": -9.062853813171387, "step": 30690 }, { "epoch": 0.37, "learning_rate": 3.9871117817471125e-06, "logits/chosen": -2.9012362957000732, "logits/rejected": -2.473912477493286, "logps/chosen": -115.16927337646484, "logps/rejected": -941.9221801757812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7342844605445862, "rewards/margins": 8.303667068481445, "rewards/rejected": -9.03795051574707, "step": 30700 }, { "epoch": 0.37, "learning_rate": 3.986271943284756e-06, "logits/chosen": -2.8341562747955322, "logits/rejected": -2.308499813079834, "logps/chosen": -131.95065307617188, "logps/rejected": -836.6524658203125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8337417840957642, "rewards/margins": 7.145423889160156, "rewards/rejected": -7.979165554046631, "step": 30710 }, { "epoch": 0.37, "learning_rate": 3.985431845323447e-06, "logits/chosen": -2.8592824935913086, "logits/rejected": -2.0207905769348145, "logps/chosen": -165.41616821289062, "logps/rejected": -1145.967041015625, "loss": 0.1135, "rewards/accuracies": 1.0, "rewards/chosen": -1.1065056324005127, "rewards/margins": 9.949918746948242, "rewards/rejected": -11.056424140930176, "step": 30720 }, { "epoch": 0.37, "learning_rate": 3.984591488009863e-06, "logits/chosen": -2.890568494796753, "logits/rejected": -2.2414801120758057, "logps/chosen": -155.72640991210938, "logps/rejected": -1090.747314453125, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": -1.0014808177947998, "rewards/margins": 9.503037452697754, "rewards/rejected": -10.504518508911133, "step": 30730 }, { "epoch": 0.37, "learning_rate": 3.983750871490728e-06, "logits/chosen": -2.8654913902282715, "logits/rejected": -2.274982452392578, "logps/chosen": -128.54981994628906, "logps/rejected": -913.5515747070312, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.7685356140136719, "rewards/margins": 7.9716644287109375, "rewards/rejected": -8.740200996398926, "step": 30740 }, { "epoch": 0.37, "learning_rate": 3.982909995912812e-06, "logits/chosen": -2.869305372238159, "logits/rejected": -2.348451852798462, "logps/chosen": -118.42256927490234, "logps/rejected": -958.0109252929688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7470406889915466, "rewards/margins": 8.431536674499512, "rewards/rejected": -9.178576469421387, "step": 30750 }, { "epoch": 0.37, "learning_rate": 3.9820688614229295e-06, "logits/chosen": -2.8469059467315674, "logits/rejected": -2.2835116386413574, "logps/chosen": -124.21339416503906, "logps/rejected": -1063.400634765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8113448023796082, "rewards/margins": 9.40074348449707, "rewards/rejected": -10.212089538574219, "step": 30760 }, { "epoch": 0.37, "learning_rate": 3.98122746816794e-06, "logits/chosen": -2.881042242050171, "logits/rejected": -2.4992804527282715, "logps/chosen": -103.1847152709961, "logps/rejected": -872.8533935546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6204521059989929, "rewards/margins": 7.731003761291504, "rewards/rejected": -8.351455688476562, "step": 30770 }, { "epoch": 0.37, "learning_rate": 3.980385816294748e-06, "logits/chosen": -2.8880317211151123, "logits/rejected": -2.4040637016296387, "logps/chosen": -127.64300537109375, "logps/rejected": -902.8431396484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7736323475837708, "rewards/margins": 7.868894100189209, "rewards/rejected": -8.642526626586914, "step": 30780 }, { "epoch": 0.37, "learning_rate": 3.979543905950305e-06, "logits/chosen": -2.9013333320617676, "logits/rejected": -2.130686044692993, "logps/chosen": -149.29823303222656, "logps/rejected": -1023.8648681640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9560238718986511, "rewards/margins": 8.870136260986328, "rewards/rejected": -9.826159477233887, "step": 30790 }, { "epoch": 0.37, "learning_rate": 3.978701737281605e-06, "logits/chosen": -2.8745570182800293, "logits/rejected": -2.413168430328369, "logps/chosen": -110.73614501953125, "logps/rejected": -948.0061645507812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6605747938156128, "rewards/margins": 8.446496963500977, "rewards/rejected": -9.107070922851562, "step": 30800 }, { "epoch": 0.37, "learning_rate": 3.977859310435688e-06, "logits/chosen": -2.7976672649383545, "logits/rejected": -2.1048741340637207, "logps/chosen": -129.01321411132812, "logps/rejected": -1041.524169921875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7984343767166138, "rewards/margins": 9.243867874145508, "rewards/rejected": -10.042302131652832, "step": 30810 }, { "epoch": 0.37, "learning_rate": 3.9770166255596404e-06, "logits/chosen": -2.822765827178955, "logits/rejected": -2.3858542442321777, "logps/chosen": -95.30064392089844, "logps/rejected": -906.4219970703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5329908132553101, "rewards/margins": 8.163529396057129, "rewards/rejected": -8.69651985168457, "step": 30820 }, { "epoch": 0.37, "learning_rate": 3.976173682800593e-06, "logits/chosen": -2.916719436645508, "logits/rejected": -2.2736639976501465, "logps/chosen": -129.80877685546875, "logps/rejected": -1033.553955078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8380451202392578, "rewards/margins": 9.085359573364258, "rewards/rejected": -9.923404693603516, "step": 30830 }, { "epoch": 0.37, "learning_rate": 3.975330482305719e-06, "logits/chosen": -2.873253107070923, "logits/rejected": -2.3322529792785645, "logps/chosen": -114.06964111328125, "logps/rejected": -980.8045654296875, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7228467464447021, "rewards/margins": 8.71323299407959, "rewards/rejected": -9.436080932617188, "step": 30840 }, { "epoch": 0.37, "learning_rate": 3.974487024222241e-06, "logits/chosen": -2.86896014213562, "logits/rejected": -2.379739284515381, "logps/chosen": -122.38372802734375, "logps/rejected": -1057.635498046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7413030862808228, "rewards/margins": 9.453280448913574, "rewards/rejected": -10.194583892822266, "step": 30850 }, { "epoch": 0.37, "learning_rate": 3.9736433086974236e-06, "logits/chosen": -2.8629326820373535, "logits/rejected": -2.1902801990509033, "logps/chosen": -130.46054077148438, "logps/rejected": -1026.8487548828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7311946749687195, "rewards/margins": 9.151731491088867, "rewards/rejected": -9.882926940917969, "step": 30860 }, { "epoch": 0.37, "learning_rate": 3.9727993358785774e-06, "logits/chosen": -2.8905200958251953, "logits/rejected": -2.3546640872955322, "logps/chosen": -128.82693481445312, "logps/rejected": -937.4654541015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.79844731092453, "rewards/margins": 8.201614379882812, "rewards/rejected": -9.000062942504883, "step": 30870 }, { "epoch": 0.37, "learning_rate": 3.971955105913058e-06, "logits/chosen": -2.9077677726745605, "logits/rejected": -2.4019088745117188, "logps/chosen": -123.19412994384766, "logps/rejected": -1033.472412109375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7955222129821777, "rewards/margins": 9.147661209106445, "rewards/rejected": -9.943183898925781, "step": 30880 }, { "epoch": 0.37, "learning_rate": 3.971110618948265e-06, "logits/chosen": -2.8471927642822266, "logits/rejected": -2.3723742961883545, "logps/chosen": -124.5479736328125, "logps/rejected": -944.1710815429688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7775744199752808, "rewards/margins": 8.284610748291016, "rewards/rejected": -9.062186241149902, "step": 30890 }, { "epoch": 0.37, "learning_rate": 3.970265875131644e-06, "logits/chosen": -2.8150458335876465, "logits/rejected": -2.214707136154175, "logps/chosen": -119.9740219116211, "logps/rejected": -910.4518432617188, "loss": 0.1047, "rewards/accuracies": 1.0, "rewards/chosen": -0.771077036857605, "rewards/margins": 7.934139251708984, "rewards/rejected": -8.705216407775879, "step": 30900 }, { "epoch": 0.37, "learning_rate": 3.969420874610684e-06, "logits/chosen": -2.844198703765869, "logits/rejected": -1.842551827430725, "logps/chosen": -169.73135375976562, "logps/rejected": -1143.203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0417253971099854, "rewards/margins": 9.967037200927734, "rewards/rejected": -11.008761405944824, "step": 30910 }, { "epoch": 0.37, "learning_rate": 3.968575617532921e-06, "logits/chosen": -2.8746707439422607, "logits/rejected": -2.344517469406128, "logps/chosen": -131.036376953125, "logps/rejected": -1015.2091674804688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.778598964214325, "rewards/margins": 8.981001853942871, "rewards/rejected": -9.759600639343262, "step": 30920 }, { "epoch": 0.37, "learning_rate": 3.967730104045935e-06, "logits/chosen": -2.8450136184692383, "logits/rejected": -2.4279582500457764, "logps/chosen": -111.89107513427734, "logps/rejected": -874.5665283203125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.6956592202186584, "rewards/margins": 7.670037269592285, "rewards/rejected": -8.365696907043457, "step": 30930 }, { "epoch": 0.37, "learning_rate": 3.966884334297348e-06, "logits/chosen": -2.831481456756592, "logits/rejected": -2.3950607776641846, "logps/chosen": -110.39241027832031, "logps/rejected": -838.9307861328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7124820947647095, "rewards/margins": 7.310302734375, "rewards/rejected": -8.022784233093262, "step": 30940 }, { "epoch": 0.37, "learning_rate": 3.9660383084348306e-06, "logits/chosen": -2.8649566173553467, "logits/rejected": -2.10810923576355, "logps/chosen": -170.57180786132812, "logps/rejected": -987.30810546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2729604244232178, "rewards/margins": 8.192646026611328, "rewards/rejected": -9.465605735778809, "step": 30950 }, { "epoch": 0.37, "learning_rate": 3.965192026606096e-06, "logits/chosen": -2.8258252143859863, "logits/rejected": -2.3180184364318848, "logps/chosen": -146.9258270263672, "logps/rejected": -999.2122802734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0093202590942383, "rewards/margins": 8.59770679473877, "rewards/rejected": -9.607026100158691, "step": 30960 }, { "epoch": 0.37, "learning_rate": 3.9643454889589024e-06, "logits/chosen": -2.851327419281006, "logits/rejected": -2.054222583770752, "logps/chosen": -161.78668212890625, "logps/rejected": -1052.3594970703125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.0827878713607788, "rewards/margins": 9.04196834564209, "rewards/rejected": -10.124757766723633, "step": 30970 }, { "epoch": 0.37, "learning_rate": 3.963498695641055e-06, "logits/chosen": -2.855894088745117, "logits/rejected": -2.038025379180908, "logps/chosen": -173.97935485839844, "logps/rejected": -1045.201416015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1991289854049683, "rewards/margins": 8.841911315917969, "rewards/rejected": -10.041040420532227, "step": 30980 }, { "epoch": 0.37, "learning_rate": 3.9626516468004e-06, "logits/chosen": -2.906658172607422, "logits/rejected": -2.1879210472106934, "logps/chosen": -153.71627807617188, "logps/rejected": -972.4627075195312, "loss": 0.1456, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0597774982452393, "rewards/margins": 8.272936820983887, "rewards/rejected": -9.332715034484863, "step": 30990 }, { "epoch": 0.37, "learning_rate": 3.961804342584829e-06, "logits/chosen": -2.859726667404175, "logits/rejected": -2.540595293045044, "logps/chosen": -92.6646728515625, "logps/rejected": -787.2091064453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5311723947525024, "rewards/margins": 6.973498344421387, "rewards/rejected": -7.504671573638916, "step": 31000 }, { "epoch": 0.37, "learning_rate": 3.96095678314228e-06, "logits/chosen": -2.8658268451690674, "logits/rejected": -2.4903697967529297, "logps/chosen": -96.3727798461914, "logps/rejected": -850.1925659179688, "loss": 0.1624, "rewards/accuracies": 1.0, "rewards/chosen": -0.5529135465621948, "rewards/margins": 7.570807456970215, "rewards/rejected": -8.123720169067383, "step": 31010 }, { "epoch": 0.37, "learning_rate": 3.960108968620734e-06, "logits/chosen": -2.883201837539673, "logits/rejected": -2.3602428436279297, "logps/chosen": -159.2306365966797, "logps/rejected": -1003.05517578125, "loss": 0.04, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.109915018081665, "rewards/margins": 8.519317626953125, "rewards/rejected": -9.629232406616211, "step": 31020 }, { "epoch": 0.37, "learning_rate": 3.9592608991682174e-06, "logits/chosen": -2.8554654121398926, "logits/rejected": -2.3442983627319336, "logps/chosen": -115.57438659667969, "logps/rejected": -919.3179931640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7519338726997375, "rewards/margins": 8.053853988647461, "rewards/rejected": -8.805788040161133, "step": 31030 }, { "epoch": 0.37, "learning_rate": 3.958412574932801e-06, "logits/chosen": -2.8741648197174072, "logits/rejected": -2.3509953022003174, "logps/chosen": -108.05826568603516, "logps/rejected": -912.81396484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6267738938331604, "rewards/margins": 8.11313533782959, "rewards/rejected": -8.739909172058105, "step": 31040 }, { "epoch": 0.37, "learning_rate": 3.957563996062599e-06, "logits/chosen": -2.8756585121154785, "logits/rejected": -2.4634757041931152, "logps/chosen": -127.0878677368164, "logps/rejected": -851.3571166992188, "loss": 0.1592, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8705394864082336, "rewards/margins": 7.273043632507324, "rewards/rejected": -8.143583297729492, "step": 31050 }, { "epoch": 0.37, "learning_rate": 3.956715162705773e-06, "logits/chosen": -2.8736939430236816, "logits/rejected": -2.399967908859253, "logps/chosen": -148.5421600341797, "logps/rejected": -937.5642700195312, "loss": 0.1304, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9792853593826294, "rewards/margins": 7.999818325042725, "rewards/rejected": -8.979104995727539, "step": 31060 }, { "epoch": 0.37, "learning_rate": 3.955866075010524e-06, "logits/chosen": -2.875195264816284, "logits/rejected": -2.4037489891052246, "logps/chosen": -117.82551574707031, "logps/rejected": -973.3243408203125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7009459733963013, "rewards/margins": 8.644433975219727, "rewards/rejected": -9.345380783081055, "step": 31070 }, { "epoch": 0.37, "learning_rate": 3.955016733125102e-06, "logits/chosen": -2.857250213623047, "logits/rejected": -2.0966758728027344, "logps/chosen": -146.80014038085938, "logps/rejected": -1254.138916015625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.901119589805603, "rewards/margins": 11.232244491577148, "rewards/rejected": -12.1333646774292, "step": 31080 }, { "epoch": 0.37, "learning_rate": 3.9541671371978e-06, "logits/chosen": -2.8222947120666504, "logits/rejected": -2.293140411376953, "logps/chosen": -148.25660705566406, "logps/rejected": -1045.526123046875, "loss": 0.1036, "rewards/accuracies": 1.0, "rewards/chosen": -0.9782193899154663, "rewards/margins": 9.059653282165527, "rewards/rejected": -10.037874221801758, "step": 31090 }, { "epoch": 0.37, "learning_rate": 3.953317287376955e-06, "logits/chosen": -2.8770511150360107, "logits/rejected": -2.3141884803771973, "logps/chosen": -152.9336395263672, "logps/rejected": -1056.184814453125, "loss": 0.1193, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9686394929885864, "rewards/margins": 9.196928024291992, "rewards/rejected": -10.165568351745605, "step": 31100 }, { "epoch": 0.37, "learning_rate": 3.9524671838109476e-06, "logits/chosen": -2.843196392059326, "logits/rejected": -2.023106098175049, "logps/chosen": -155.98318481445312, "logps/rejected": -1199.822021484375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9976803064346313, "rewards/margins": 10.58841323852539, "rewards/rejected": -11.58609390258789, "step": 31110 }, { "epoch": 0.37, "learning_rate": 3.951616826648203e-06, "logits/chosen": -2.911543846130371, "logits/rejected": -2.464517593383789, "logps/chosen": -108.02571105957031, "logps/rejected": -943.5037841796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5997445583343506, "rewards/margins": 8.428984642028809, "rewards/rejected": -9.028729438781738, "step": 31120 }, { "epoch": 0.37, "learning_rate": 3.950766216037193e-06, "logits/chosen": -2.845858573913574, "logits/rejected": -2.200071334838867, "logps/chosen": -146.6768341064453, "logps/rejected": -1145.980712890625, "loss": 0.1065, "rewards/accuracies": 1.0, "rewards/chosen": -0.9293681979179382, "rewards/margins": 10.120161056518555, "rewards/rejected": -11.049530029296875, "step": 31130 }, { "epoch": 0.37, "learning_rate": 3.949915352126431e-06, "logits/chosen": -2.8611364364624023, "logits/rejected": -2.468596935272217, "logps/chosen": -134.10174560546875, "logps/rejected": -1044.5789794921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8850655555725098, "rewards/margins": 9.185359954833984, "rewards/rejected": -10.070425033569336, "step": 31140 }, { "epoch": 0.37, "learning_rate": 3.949064235064476e-06, "logits/chosen": -2.8407909870147705, "logits/rejected": -2.404723644256592, "logps/chosen": -149.08526611328125, "logps/rejected": -889.04150390625, "loss": 0.1665, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0502712726593018, "rewards/margins": 7.455628871917725, "rewards/rejected": -8.505900382995605, "step": 31150 }, { "epoch": 0.37, "learning_rate": 3.94821286499993e-06, "logits/chosen": -2.875206708908081, "logits/rejected": -2.273970127105713, "logps/chosen": -130.0867919921875, "logps/rejected": -1062.1181640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7554080486297607, "rewards/margins": 9.476112365722656, "rewards/rejected": -10.231520652770996, "step": 31160 }, { "epoch": 0.37, "learning_rate": 3.947361242081439e-06, "logits/chosen": -2.83918833732605, "logits/rejected": -2.0483856201171875, "logps/chosen": -139.81398010253906, "logps/rejected": -1034.465087890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8474612236022949, "rewards/margins": 9.107099533081055, "rewards/rejected": -9.954559326171875, "step": 31170 }, { "epoch": 0.37, "learning_rate": 3.946509366457695e-06, "logits/chosen": -2.8379006385803223, "logits/rejected": -2.206371545791626, "logps/chosen": -118.5379638671875, "logps/rejected": -873.6868896484375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7032082080841064, "rewards/margins": 7.656927585601807, "rewards/rejected": -8.360135078430176, "step": 31180 }, { "epoch": 0.37, "learning_rate": 3.945657238277434e-06, "logits/chosen": -2.8857052326202393, "logits/rejected": -2.4705810546875, "logps/chosen": -118.64630126953125, "logps/rejected": -808.5891723632812, "loss": 0.0817, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7817886471748352, "rewards/margins": 6.936747074127197, "rewards/rejected": -7.718535423278809, "step": 31190 }, { "epoch": 0.37, "learning_rate": 3.944804857689434e-06, "logits/chosen": -2.881049871444702, "logits/rejected": -2.420900583267212, "logps/chosen": -108.90254211425781, "logps/rejected": -924.2861328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6742708086967468, "rewards/margins": 8.176600456237793, "rewards/rejected": -8.850871086120605, "step": 31200 }, { "epoch": 0.37, "learning_rate": 3.9439522248425184e-06, "logits/chosen": -2.8540830612182617, "logits/rejected": -2.2278378009796143, "logps/chosen": -119.04276275634766, "logps/rejected": -992.125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6885947585105896, "rewards/margins": 8.841641426086426, "rewards/rejected": -9.530237197875977, "step": 31210 }, { "epoch": 0.37, "learning_rate": 3.943099339885556e-06, "logits/chosen": -2.8739144802093506, "logits/rejected": -2.2772154808044434, "logps/chosen": -162.85105895996094, "logps/rejected": -1021.8600463867188, "loss": 0.1376, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1396257877349854, "rewards/margins": 8.689789772033691, "rewards/rejected": -9.829414367675781, "step": 31220 }, { "epoch": 0.37, "learning_rate": 3.9422462029674554e-06, "logits/chosen": -2.909554958343506, "logits/rejected": -2.3842616081237793, "logps/chosen": -108.51118469238281, "logps/rejected": -951.3465576171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6415196657180786, "rewards/margins": 8.488149642944336, "rewards/rejected": -9.129669189453125, "step": 31230 }, { "epoch": 0.37, "learning_rate": 3.941392814237174e-06, "logits/chosen": -2.895123243331909, "logits/rejected": -2.0943281650543213, "logps/chosen": -126.341796875, "logps/rejected": -966.9366455078125, "loss": 0.0904, "rewards/accuracies": 1.0, "rewards/chosen": -0.7499261498451233, "rewards/margins": 8.537245750427246, "rewards/rejected": -9.287172317504883, "step": 31240 }, { "epoch": 0.37, "learning_rate": 3.94053917384371e-06, "logits/chosen": -2.8174753189086914, "logits/rejected": -2.3213531970977783, "logps/chosen": -129.54330444335938, "logps/rejected": -973.783203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8714226484298706, "rewards/margins": 8.469550132751465, "rewards/rejected": -9.340972900390625, "step": 31250 }, { "epoch": 0.37, "learning_rate": 3.939685281936108e-06, "logits/chosen": -2.874189853668213, "logits/rejected": -2.2270214557647705, "logps/chosen": -129.0608367919922, "logps/rejected": -978.9749755859375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.7491179704666138, "rewards/margins": 8.634786605834961, "rewards/rejected": -9.383903503417969, "step": 31260 }, { "epoch": 0.37, "learning_rate": 3.938831138663454e-06, "logits/chosen": -2.847743272781372, "logits/rejected": -1.9978796243667603, "logps/chosen": -140.7433319091797, "logps/rejected": -1162.1107177734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8383606672286987, "rewards/margins": 10.382784843444824, "rewards/rejected": -11.221145629882812, "step": 31270 }, { "epoch": 0.37, "learning_rate": 3.93797674417488e-06, "logits/chosen": -2.863201379776001, "logits/rejected": -2.270329236984253, "logps/chosen": -143.18313598632812, "logps/rejected": -1055.0908203125, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -0.9614168405532837, "rewards/margins": 9.183844566345215, "rewards/rejected": -10.145261764526367, "step": 31280 }, { "epoch": 0.37, "learning_rate": 3.937122098619559e-06, "logits/chosen": -2.8493106365203857, "logits/rejected": -2.5163283348083496, "logps/chosen": -97.77381134033203, "logps/rejected": -872.5813598632812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5838338732719421, "rewards/margins": 7.762662410736084, "rewards/rejected": -8.346494674682617, "step": 31290 }, { "epoch": 0.37, "learning_rate": 3.936267202146712e-06, "logits/chosen": -2.8479764461517334, "logits/rejected": -2.2580859661102295, "logps/chosen": -129.04457092285156, "logps/rejected": -1052.517822265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7981202006340027, "rewards/margins": 9.339273452758789, "rewards/rejected": -10.137393951416016, "step": 31300 }, { "epoch": 0.37, "learning_rate": 3.9354120549056006e-06, "logits/chosen": -2.8577988147735596, "logits/rejected": -2.0113070011138916, "logps/chosen": -138.55606079101562, "logps/rejected": -1090.228515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8633920550346375, "rewards/margins": 9.641897201538086, "rewards/rejected": -10.505289077758789, "step": 31310 }, { "epoch": 0.37, "learning_rate": 3.934556657045531e-06, "logits/chosen": -2.9055631160736084, "logits/rejected": -2.2666001319885254, "logps/chosen": -152.3577423095703, "logps/rejected": -1181.812255859375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9683696627616882, "rewards/margins": 10.462777137756348, "rewards/rejected": -11.431147575378418, "step": 31320 }, { "epoch": 0.38, "learning_rate": 3.9337010087158525e-06, "logits/chosen": -2.8815948963165283, "logits/rejected": -2.084930896759033, "logps/chosen": -165.2855987548828, "logps/rejected": -1129.1524658203125, "loss": 0.1677, "rewards/accuracies": 1.0, "rewards/chosen": -1.0380074977874756, "rewards/margins": 9.851842880249023, "rewards/rejected": -10.889850616455078, "step": 31330 }, { "epoch": 0.38, "learning_rate": 3.93284511006596e-06, "logits/chosen": -2.8554935455322266, "logits/rejected": -2.3409759998321533, "logps/chosen": -109.47422790527344, "logps/rejected": -947.2048950195312, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6040589213371277, "rewards/margins": 8.481551170349121, "rewards/rejected": -9.085610389709473, "step": 31340 }, { "epoch": 0.38, "learning_rate": 3.931988961245292e-06, "logits/chosen": -2.869846820831299, "logits/rejected": -2.3146610260009766, "logps/chosen": -138.11898803710938, "logps/rejected": -986.8859252929688, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.9543516039848328, "rewards/margins": 8.524868965148926, "rewards/rejected": -9.479220390319824, "step": 31350 }, { "epoch": 0.38, "learning_rate": 3.931132562403328e-06, "logits/chosen": -2.8582446575164795, "logits/rejected": -2.2991819381713867, "logps/chosen": -123.27760314941406, "logps/rejected": -1016.0006103515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7586316466331482, "rewards/margins": 9.010858535766602, "rewards/rejected": -9.769490242004395, "step": 31360 }, { "epoch": 0.38, "learning_rate": 3.930275913689593e-06, "logits/chosen": -2.863163471221924, "logits/rejected": -2.5419673919677734, "logps/chosen": -93.24127960205078, "logps/rejected": -856.6853637695312, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5539905428886414, "rewards/margins": 7.647668361663818, "rewards/rejected": -8.20166015625, "step": 31370 }, { "epoch": 0.38, "learning_rate": 3.929419015253656e-06, "logits/chosen": -2.8859996795654297, "logits/rejected": -2.1636087894439697, "logps/chosen": -141.99476623535156, "logps/rejected": -1079.8370361328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9453876614570618, "rewards/margins": 9.452882766723633, "rewards/rejected": -10.398271560668945, "step": 31380 }, { "epoch": 0.38, "learning_rate": 3.928561867245129e-06, "logits/chosen": -2.8537914752960205, "logits/rejected": -2.342790126800537, "logps/chosen": -157.63525390625, "logps/rejected": -1050.1839599609375, "loss": 0.0716, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1285086870193481, "rewards/margins": 8.968118667602539, "rewards/rejected": -10.096627235412598, "step": 31390 }, { "epoch": 0.38, "learning_rate": 3.927704469813668e-06, "logits/chosen": -2.8868744373321533, "logits/rejected": -2.5261783599853516, "logps/chosen": -94.04238891601562, "logps/rejected": -814.0087280273438, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5676549077033997, "rewards/margins": 7.20569372177124, "rewards/rejected": -7.773348331451416, "step": 31400 }, { "epoch": 0.38, "learning_rate": 3.926846823108972e-06, "logits/chosen": -2.8807833194732666, "logits/rejected": -2.426802158355713, "logps/chosen": -107.978271484375, "logps/rejected": -920.7110595703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6170765161514282, "rewards/margins": 8.195782661437988, "rewards/rejected": -8.812859535217285, "step": 31410 }, { "epoch": 0.38, "learning_rate": 3.9259889272807825e-06, "logits/chosen": -2.8306291103363037, "logits/rejected": -2.4503912925720215, "logps/chosen": -121.33992004394531, "logps/rejected": -983.716796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7902069091796875, "rewards/margins": 8.667545318603516, "rewards/rejected": -9.45775318145752, "step": 31420 }, { "epoch": 0.38, "learning_rate": 3.925130782478888e-06, "logits/chosen": -2.850970506668091, "logits/rejected": -2.054676055908203, "logps/chosen": -155.39694213867188, "logps/rejected": -1162.1610107421875, "loss": 0.0375, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.032193660736084, "rewards/margins": 10.186128616333008, "rewards/rejected": -11.218321800231934, "step": 31430 }, { "epoch": 0.38, "learning_rate": 3.9242723888531156e-06, "logits/chosen": -2.8682656288146973, "logits/rejected": -2.4159324169158936, "logps/chosen": -152.51119995117188, "logps/rejected": -1023.2216796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9982807040214539, "rewards/margins": 8.857051849365234, "rewards/rejected": -9.855332374572754, "step": 31440 }, { "epoch": 0.38, "learning_rate": 3.923413746553341e-06, "logits/chosen": -2.8236680030822754, "logits/rejected": -2.0937082767486572, "logps/chosen": -153.53440856933594, "logps/rejected": -1067.9315185546875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9582105875015259, "rewards/margins": 9.31690788269043, "rewards/rejected": -10.275120735168457, "step": 31450 }, { "epoch": 0.38, "learning_rate": 3.92255485572948e-06, "logits/chosen": -2.860276699066162, "logits/rejected": -2.3833484649658203, "logps/chosen": -138.48098754882812, "logps/rejected": -1059.814697265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.947398841381073, "rewards/margins": 9.253324508666992, "rewards/rejected": -10.200723648071289, "step": 31460 }, { "epoch": 0.38, "learning_rate": 3.921695716531491e-06, "logits/chosen": -2.8515381813049316, "logits/rejected": -2.3802754878997803, "logps/chosen": -110.57258605957031, "logps/rejected": -834.3046875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6486480832099915, "rewards/margins": 7.32565450668335, "rewards/rejected": -7.974303245544434, "step": 31470 }, { "epoch": 0.38, "learning_rate": 3.92083632910938e-06, "logits/chosen": -2.9051151275634766, "logits/rejected": -2.6126673221588135, "logps/chosen": -126.3029556274414, "logps/rejected": -849.5299682617188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8107396960258484, "rewards/margins": 7.297253608703613, "rewards/rejected": -8.107994079589844, "step": 31480 }, { "epoch": 0.38, "learning_rate": 3.91997669361319e-06, "logits/chosen": -2.94618558883667, "logits/rejected": -2.2451233863830566, "logps/chosen": -135.39186096191406, "logps/rejected": -1041.5279541015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8581299781799316, "rewards/margins": 9.162530899047852, "rewards/rejected": -10.020660400390625, "step": 31490 }, { "epoch": 0.38, "learning_rate": 3.919116810193014e-06, "logits/chosen": -2.875420093536377, "logits/rejected": -2.2285497188568115, "logps/chosen": -144.6129608154297, "logps/rejected": -1012.28515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9389975666999817, "rewards/margins": 8.793067932128906, "rewards/rejected": -9.73206615447998, "step": 31500 }, { "epoch": 0.38, "learning_rate": 3.918256678998985e-06, "logits/chosen": -2.859243869781494, "logits/rejected": -2.393436908721924, "logps/chosen": -124.479248046875, "logps/rejected": -917.2705078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7885807752609253, "rewards/margins": 7.996277809143066, "rewards/rejected": -8.784858703613281, "step": 31510 }, { "epoch": 0.38, "learning_rate": 3.917396300181278e-06, "logits/chosen": -2.8580644130706787, "logits/rejected": -2.031221866607666, "logps/chosen": -152.3767547607422, "logps/rejected": -1173.741455078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0226192474365234, "rewards/margins": 10.304807662963867, "rewards/rejected": -11.327427864074707, "step": 31520 }, { "epoch": 0.38, "learning_rate": 3.916535673890115e-06, "logits/chosen": -2.9126505851745605, "logits/rejected": -2.6016664505004883, "logps/chosen": -103.96415710449219, "logps/rejected": -787.1171875, "loss": 0.0554, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6146665811538696, "rewards/margins": 6.879422664642334, "rewards/rejected": -7.494088649749756, "step": 31530 }, { "epoch": 0.38, "learning_rate": 3.915674800275756e-06, "logits/chosen": -2.828312397003174, "logits/rejected": -2.1781022548675537, "logps/chosen": -141.30795288085938, "logps/rejected": -1084.1201171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8947529792785645, "rewards/margins": 9.540834426879883, "rewards/rejected": -10.435588836669922, "step": 31540 }, { "epoch": 0.38, "learning_rate": 3.9148136794885105e-06, "logits/chosen": -2.880358934402466, "logits/rejected": -2.4481358528137207, "logps/chosen": -124.12773132324219, "logps/rejected": -999.8695068359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.763116180896759, "rewards/margins": 8.850953102111816, "rewards/rejected": -9.614068984985352, "step": 31550 }, { "epoch": 0.38, "learning_rate": 3.913952311678725e-06, "logits/chosen": -2.884450674057007, "logits/rejected": -2.446537733078003, "logps/chosen": -114.83631896972656, "logps/rejected": -1032.740234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6951103806495667, "rewards/margins": 9.250234603881836, "rewards/rejected": -9.945344924926758, "step": 31560 }, { "epoch": 0.38, "learning_rate": 3.913090696996793e-06, "logits/chosen": -2.8908703327178955, "logits/rejected": -2.468257427215576, "logps/chosen": -94.39466857910156, "logps/rejected": -859.4212646484375, "loss": 0.1547, "rewards/accuracies": 1.0, "rewards/chosen": -0.5352733731269836, "rewards/margins": 7.692286491394043, "rewards/rejected": -8.227560043334961, "step": 31570 }, { "epoch": 0.38, "learning_rate": 3.91222883559315e-06, "logits/chosen": -2.8574209213256836, "logits/rejected": -2.15836763381958, "logps/chosen": -167.32736206054688, "logps/rejected": -1146.855224609375, "loss": 0.1604, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1576783657073975, "rewards/margins": 9.911508560180664, "rewards/rejected": -11.06918716430664, "step": 31580 }, { "epoch": 0.38, "learning_rate": 3.911366727618274e-06, "logits/chosen": -2.863830089569092, "logits/rejected": -2.302027463912964, "logps/chosen": -141.64358520507812, "logps/rejected": -918.22705078125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8979777097702026, "rewards/margins": 7.890724182128906, "rewards/rejected": -8.788701057434082, "step": 31590 }, { "epoch": 0.38, "learning_rate": 3.910504373222688e-06, "logits/chosen": -2.86923885345459, "logits/rejected": -2.505864143371582, "logps/chosen": -118.81974029541016, "logps/rejected": -796.7587890625, "loss": 0.1156, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.805096447467804, "rewards/margins": 6.789209842681885, "rewards/rejected": -7.594305515289307, "step": 31600 }, { "epoch": 0.38, "learning_rate": 3.909641772556956e-06, "logits/chosen": -2.8545238971710205, "logits/rejected": -2.24043345451355, "logps/chosen": -112.22599029541016, "logps/rejected": -888.3712768554688, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6967067122459412, "rewards/margins": 7.804445743560791, "rewards/rejected": -8.501152038574219, "step": 31610 }, { "epoch": 0.38, "learning_rate": 3.908778925771685e-06, "logits/chosen": -2.9350178241729736, "logits/rejected": -2.526057720184326, "logps/chosen": -116.58082580566406, "logps/rejected": -930.4348754882812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6994043588638306, "rewards/margins": 8.214316368103027, "rewards/rejected": -8.913721084594727, "step": 31620 }, { "epoch": 0.38, "learning_rate": 3.907915833017527e-06, "logits/chosen": -2.8491299152374268, "logits/rejected": -2.3032798767089844, "logps/chosen": -122.78578186035156, "logps/rejected": -1065.576416015625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7283012866973877, "rewards/margins": 9.528711318969727, "rewards/rejected": -10.257013320922852, "step": 31630 }, { "epoch": 0.38, "learning_rate": 3.907052494445175e-06, "logits/chosen": -2.828540325164795, "logits/rejected": -2.3505935668945312, "logps/chosen": -156.64891052246094, "logps/rejected": -883.7560424804688, "loss": 0.2348, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1449735164642334, "rewards/margins": 7.311293125152588, "rewards/rejected": -8.456266403198242, "step": 31640 }, { "epoch": 0.38, "learning_rate": 3.906188910205365e-06, "logits/chosen": -2.9051320552825928, "logits/rejected": -2.308847427368164, "logps/chosen": -143.5491180419922, "logps/rejected": -1096.6954345703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.959882378578186, "rewards/margins": 9.601287841796875, "rewards/rejected": -10.56117057800293, "step": 31650 }, { "epoch": 0.38, "learning_rate": 3.905325080448877e-06, "logits/chosen": -2.8426153659820557, "logits/rejected": -2.438082456588745, "logps/chosen": -93.75993347167969, "logps/rejected": -939.9989013671875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5215052366256714, "rewards/margins": 8.493277549743652, "rewards/rejected": -9.01478385925293, "step": 31660 }, { "epoch": 0.38, "learning_rate": 3.904461005326532e-06, "logits/chosen": -2.8767220973968506, "logits/rejected": -2.32498836517334, "logps/chosen": -121.08375549316406, "logps/rejected": -974.8406982421875, "loss": 0.0254, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7149168848991394, "rewards/margins": 8.650715827941895, "rewards/rejected": -9.365633010864258, "step": 31670 }, { "epoch": 0.38, "learning_rate": 3.903596684989197e-06, "logits/chosen": -2.873610258102417, "logits/rejected": -2.6022448539733887, "logps/chosen": -146.32179260253906, "logps/rejected": -776.3074951171875, "loss": 0.2565, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.061808705329895, "rewards/margins": 6.323770046234131, "rewards/rejected": -7.385578155517578, "step": 31680 }, { "epoch": 0.38, "learning_rate": 3.90273211958778e-06, "logits/chosen": -2.8486886024475098, "logits/rejected": -2.3072381019592285, "logps/chosen": -139.76170349121094, "logps/rejected": -1039.0936279296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9170258641242981, "rewards/margins": 9.065778732299805, "rewards/rejected": -9.982806205749512, "step": 31690 }, { "epoch": 0.38, "learning_rate": 3.90186730927323e-06, "logits/chosen": -2.8699376583099365, "logits/rejected": -2.32123064994812, "logps/chosen": -121.06453704833984, "logps/rejected": -1023.5103759765625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.730624794960022, "rewards/margins": 9.11681079864502, "rewards/rejected": -9.84743595123291, "step": 31700 }, { "epoch": 0.38, "learning_rate": 3.901002254196541e-06, "logits/chosen": -2.8648171424865723, "logits/rejected": -2.2051568031311035, "logps/chosen": -125.17744445800781, "logps/rejected": -992.0833740234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.736562967300415, "rewards/margins": 8.786370277404785, "rewards/rejected": -9.522933959960938, "step": 31710 }, { "epoch": 0.38, "learning_rate": 3.900136954508749e-06, "logits/chosen": -2.8106861114501953, "logits/rejected": -2.20198392868042, "logps/chosen": -163.99594116210938, "logps/rejected": -1038.114501953125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.0721969604492188, "rewards/margins": 8.895275115966797, "rewards/rejected": -9.967473030090332, "step": 31720 }, { "epoch": 0.38, "learning_rate": 3.8992714103609355e-06, "logits/chosen": -2.8354086875915527, "logits/rejected": -2.1838107109069824, "logps/chosen": -155.935791015625, "logps/rejected": -1138.025634765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0374963283538818, "rewards/margins": 9.934907913208008, "rewards/rejected": -10.972402572631836, "step": 31730 }, { "epoch": 0.38, "learning_rate": 3.8984056219042184e-06, "logits/chosen": -2.8274106979370117, "logits/rejected": -2.337935209274292, "logps/chosen": -149.1072235107422, "logps/rejected": -932.64990234375, "loss": 0.0912, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0658046007156372, "rewards/margins": 7.881128787994385, "rewards/rejected": -8.946934700012207, "step": 31740 }, { "epoch": 0.38, "learning_rate": 3.897539589289765e-06, "logits/chosen": -2.8413281440734863, "logits/rejected": -2.3752777576446533, "logps/chosen": -115.4068374633789, "logps/rejected": -1003.4558715820312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6958876848220825, "rewards/margins": 8.958736419677734, "rewards/rejected": -9.65462589263916, "step": 31750 }, { "epoch": 0.38, "learning_rate": 3.896673312668779e-06, "logits/chosen": -2.841257095336914, "logits/rejected": -2.2556843757629395, "logps/chosen": -157.29373168945312, "logps/rejected": -1212.790283203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0568691492080688, "rewards/margins": 10.654876708984375, "rewards/rejected": -11.711746215820312, "step": 31760 }, { "epoch": 0.38, "learning_rate": 3.895806792192513e-06, "logits/chosen": -2.8612887859344482, "logits/rejected": -2.3612544536590576, "logps/chosen": -133.71530151367188, "logps/rejected": -1002.2581787109375, "loss": 0.1485, "rewards/accuracies": 1.0, "rewards/chosen": -0.9145331382751465, "rewards/margins": 8.721324920654297, "rewards/rejected": -9.635858535766602, "step": 31770 }, { "epoch": 0.38, "learning_rate": 3.894940028012257e-06, "logits/chosen": -2.8312807083129883, "logits/rejected": -1.940163016319275, "logps/chosen": -192.6482391357422, "logps/rejected": -1226.27197265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2928435802459717, "rewards/margins": 10.565217971801758, "rewards/rejected": -11.858061790466309, "step": 31780 }, { "epoch": 0.38, "learning_rate": 3.894073020279348e-06, "logits/chosen": -2.845933198928833, "logits/rejected": -2.6206374168395996, "logps/chosen": -88.31398010253906, "logps/rejected": -723.4906616210938, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.518436074256897, "rewards/margins": 6.358093738555908, "rewards/rejected": -6.876530647277832, "step": 31790 }, { "epoch": 0.38, "learning_rate": 3.893205769145159e-06, "logits/chosen": -2.822355270385742, "logits/rejected": -2.3954415321350098, "logps/chosen": -123.47941589355469, "logps/rejected": -977.6931762695312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7923177480697632, "rewards/margins": 8.594743728637695, "rewards/rejected": -9.387060165405273, "step": 31800 }, { "epoch": 0.38, "learning_rate": 3.892338274761114e-06, "logits/chosen": -2.8400492668151855, "logits/rejected": -2.3153421878814697, "logps/chosen": -144.95790100097656, "logps/rejected": -906.7478637695312, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9789970517158508, "rewards/margins": 7.695188999176025, "rewards/rejected": -8.674185752868652, "step": 31810 }, { "epoch": 0.38, "learning_rate": 3.891470537278672e-06, "logits/chosen": -2.8511815071105957, "logits/rejected": -2.3218445777893066, "logps/chosen": -134.84068298339844, "logps/rejected": -1071.2730712890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8300089836120605, "rewards/margins": 9.495267868041992, "rewards/rejected": -10.325277328491211, "step": 31820 }, { "epoch": 0.38, "learning_rate": 3.89060255684934e-06, "logits/chosen": -2.850742816925049, "logits/rejected": -2.237950086593628, "logps/chosen": -134.75994873046875, "logps/rejected": -999.3709106445312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.806967556476593, "rewards/margins": 8.787324905395508, "rewards/rejected": -9.594291687011719, "step": 31830 }, { "epoch": 0.38, "learning_rate": 3.889734333624663e-06, "logits/chosen": -2.8629372119903564, "logits/rejected": -2.3112549781799316, "logps/chosen": -115.43254089355469, "logps/rejected": -950.1932373046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7088245153427124, "rewards/margins": 8.4119234085083, "rewards/rejected": -9.120747566223145, "step": 31840 }, { "epoch": 0.38, "learning_rate": 3.888865867756232e-06, "logits/chosen": -2.849045991897583, "logits/rejected": -2.147409439086914, "logps/chosen": -146.57632446289062, "logps/rejected": -1101.5284423828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8946933746337891, "rewards/margins": 9.709738731384277, "rewards/rejected": -10.604432106018066, "step": 31850 }, { "epoch": 0.38, "learning_rate": 3.887997159395678e-06, "logits/chosen": -2.8570122718811035, "logits/rejected": -2.355294704437256, "logps/chosen": -155.42471313476562, "logps/rejected": -932.3946533203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.063273310661316, "rewards/margins": 7.892806053161621, "rewards/rejected": -8.956079483032227, "step": 31860 }, { "epoch": 0.38, "learning_rate": 3.887128208694674e-06, "logits/chosen": -2.8507354259490967, "logits/rejected": -2.1767261028289795, "logps/chosen": -140.94223022460938, "logps/rejected": -1055.820068359375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.8764346241950989, "rewards/margins": 9.281169891357422, "rewards/rejected": -10.157602310180664, "step": 31870 }, { "epoch": 0.38, "learning_rate": 3.8862590158049376e-06, "logits/chosen": -2.8336126804351807, "logits/rejected": -2.281632661819458, "logps/chosen": -147.037109375, "logps/rejected": -1061.8056640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9782211184501648, "rewards/margins": 9.217042922973633, "rewards/rejected": -10.19526481628418, "step": 31880 }, { "epoch": 0.38, "learning_rate": 3.8853895808782275e-06, "logits/chosen": -2.9005870819091797, "logits/rejected": -2.209899425506592, "logps/chosen": -149.32106018066406, "logps/rejected": -1110.2220458984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9563854336738586, "rewards/margins": 9.735295295715332, "rewards/rejected": -10.691680908203125, "step": 31890 }, { "epoch": 0.38, "learning_rate": 3.884519904066345e-06, "logits/chosen": -2.8401315212249756, "logits/rejected": -2.1499907970428467, "logps/chosen": -168.7283935546875, "logps/rejected": -1124.768310546875, "loss": 0.1172, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1463181972503662, "rewards/margins": 9.708612442016602, "rewards/rejected": -10.854930877685547, "step": 31900 }, { "epoch": 0.38, "learning_rate": 3.883649985521131e-06, "logits/chosen": -2.8730130195617676, "logits/rejected": -2.563068389892578, "logps/chosen": -98.80353546142578, "logps/rejected": -896.4197387695312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5915366411209106, "rewards/margins": 8.007875442504883, "rewards/rejected": -8.59941291809082, "step": 31910 }, { "epoch": 0.38, "learning_rate": 3.882779825394474e-06, "logits/chosen": -2.837372303009033, "logits/rejected": -2.3161745071411133, "logps/chosen": -140.98863220214844, "logps/rejected": -1015.1759643554688, "loss": 0.1671, "rewards/accuracies": 1.0, "rewards/chosen": -0.9453901052474976, "rewards/margins": 8.814632415771484, "rewards/rejected": -9.76002311706543, "step": 31920 }, { "epoch": 0.38, "learning_rate": 3.881909423838299e-06, "logits/chosen": -2.815173864364624, "logits/rejected": -2.2603564262390137, "logps/chosen": -148.9246368408203, "logps/rejected": -1074.352783203125, "loss": 0.2374, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0162054300308228, "rewards/margins": 9.3384428024292, "rewards/rejected": -10.354650497436523, "step": 31930 }, { "epoch": 0.38, "learning_rate": 3.881038781004577e-06, "logits/chosen": -2.8548760414123535, "logits/rejected": -2.4954042434692383, "logps/chosen": -102.8625717163086, "logps/rejected": -915.7025146484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5948793292045593, "rewards/margins": 8.18185043334961, "rewards/rejected": -8.776728630065918, "step": 31940 }, { "epoch": 0.38, "learning_rate": 3.880167897045319e-06, "logits/chosen": -2.9052186012268066, "logits/rejected": -2.4188194274902344, "logps/chosen": -107.67093658447266, "logps/rejected": -931.9290161132812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6545271277427673, "rewards/margins": 8.28626537322998, "rewards/rejected": -8.94079303741455, "step": 31950 }, { "epoch": 0.38, "learning_rate": 3.87929677211258e-06, "logits/chosen": -2.8363091945648193, "logits/rejected": -2.1497604846954346, "logps/chosen": -124.3491439819336, "logps/rejected": -986.08056640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7715919613838196, "rewards/margins": 8.69169807434082, "rewards/rejected": -9.463289260864258, "step": 31960 }, { "epoch": 0.38, "learning_rate": 3.878425406358456e-06, "logits/chosen": -2.8418781757354736, "logits/rejected": -2.1200976371765137, "logps/chosen": -125.77156829833984, "logps/rejected": -1064.433349609375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7249535322189331, "rewards/margins": 9.526762008666992, "rewards/rejected": -10.251714706420898, "step": 31970 }, { "epoch": 0.38, "learning_rate": 3.8775537999350835e-06, "logits/chosen": -2.857038974761963, "logits/rejected": -2.175792694091797, "logps/chosen": -158.89439392089844, "logps/rejected": -963.4664916992188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0259228944778442, "rewards/margins": 8.222589492797852, "rewards/rejected": -9.248510360717773, "step": 31980 }, { "epoch": 0.38, "learning_rate": 3.876681952994644e-06, "logits/chosen": -2.8512821197509766, "logits/rejected": -2.355839252471924, "logps/chosen": -135.1194305419922, "logps/rejected": -980.7939453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8778467178344727, "rewards/margins": 8.532308578491211, "rewards/rejected": -9.410155296325684, "step": 31990 }, { "epoch": 0.38, "learning_rate": 3.8758098656893585e-06, "logits/chosen": -2.8901448249816895, "logits/rejected": -2.6334280967712402, "logps/chosen": -102.44041442871094, "logps/rejected": -772.1617431640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.626318097114563, "rewards/margins": 6.721220970153809, "rewards/rejected": -7.347538948059082, "step": 32000 }, { "epoch": 0.38, "learning_rate": 3.8749375381714915e-06, "logits/chosen": -2.8532702922821045, "logits/rejected": -2.171642780303955, "logps/chosen": -153.44314575195312, "logps/rejected": -1035.589111328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.033299207687378, "rewards/margins": 8.926616668701172, "rewards/rejected": -9.959916114807129, "step": 32010 }, { "epoch": 0.38, "learning_rate": 3.87406497059335e-06, "logits/chosen": -2.851375102996826, "logits/rejected": -1.9470256567001343, "logps/chosen": -197.42422485351562, "logps/rejected": -1152.0775146484375, "loss": 0.1145, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3300623893737793, "rewards/margins": 9.772130966186523, "rewards/rejected": -11.102193832397461, "step": 32020 }, { "epoch": 0.38, "learning_rate": 3.87319216310728e-06, "logits/chosen": -2.8133544921875, "logits/rejected": -2.021533489227295, "logps/chosen": -158.30291748046875, "logps/rejected": -1111.9798583984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0490059852600098, "rewards/margins": 9.668371200561523, "rewards/rejected": -10.717377662658691, "step": 32030 }, { "epoch": 0.38, "learning_rate": 3.872319115865673e-06, "logits/chosen": -2.8757786750793457, "logits/rejected": -2.378788471221924, "logps/chosen": -158.3203125, "logps/rejected": -908.84814453125, "loss": 0.1307, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0938745737075806, "rewards/margins": 7.590606689453125, "rewards/rejected": -8.684481620788574, "step": 32040 }, { "epoch": 0.38, "learning_rate": 3.87144582902096e-06, "logits/chosen": -2.8313686847686768, "logits/rejected": -2.181471109390259, "logps/chosen": -115.55183410644531, "logps/rejected": -1097.928466796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.683755099773407, "rewards/margins": 9.910384178161621, "rewards/rejected": -10.594138145446777, "step": 32050 }, { "epoch": 0.38, "learning_rate": 3.870572302725613e-06, "logits/chosen": -2.813692569732666, "logits/rejected": -2.2123970985412598, "logps/chosen": -157.35208129882812, "logps/rejected": -985.4368896484375, "loss": 0.1018, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.087618112564087, "rewards/margins": 8.361495971679688, "rewards/rejected": -9.449114799499512, "step": 32060 }, { "epoch": 0.38, "learning_rate": 3.8696985371321495e-06, "logits/chosen": -2.866046667098999, "logits/rejected": -2.1044230461120605, "logps/chosen": -123.24824523925781, "logps/rejected": -1011.6260986328125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7425860166549683, "rewards/margins": 8.98518180847168, "rewards/rejected": -9.727766990661621, "step": 32070 }, { "epoch": 0.38, "learning_rate": 3.868824532393125e-06, "logits/chosen": -2.846595525741577, "logits/rejected": -2.2967350482940674, "logps/chosen": -154.4574432373047, "logps/rejected": -962.1884765625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.094165325164795, "rewards/margins": 8.148100852966309, "rewards/rejected": -9.242266654968262, "step": 32080 }, { "epoch": 0.38, "learning_rate": 3.86795028866114e-06, "logits/chosen": -2.844484329223633, "logits/rejected": -2.327178955078125, "logps/chosen": -111.19193267822266, "logps/rejected": -920.1990966796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6811956763267517, "rewards/margins": 8.13879680633545, "rewards/rejected": -8.819992065429688, "step": 32090 }, { "epoch": 0.38, "learning_rate": 3.867075806088833e-06, "logits/chosen": -2.8532116413116455, "logits/rejected": -2.369352102279663, "logps/chosen": -123.31510925292969, "logps/rejected": -953.8131103515625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7784653902053833, "rewards/margins": 8.36114501953125, "rewards/rejected": -9.13961124420166, "step": 32100 }, { "epoch": 0.38, "learning_rate": 3.8662010848288874e-06, "logits/chosen": -2.8171792030334473, "logits/rejected": -2.189650297164917, "logps/chosen": -158.36642456054688, "logps/rejected": -1131.2567138671875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.0379694700241089, "rewards/margins": 9.861349105834961, "rewards/rejected": -10.899320602416992, "step": 32110 }, { "epoch": 0.38, "learning_rate": 3.865326125034027e-06, "logits/chosen": -2.8398802280426025, "logits/rejected": -2.3271644115448, "logps/chosen": -131.65255737304688, "logps/rejected": -1015.1148681640625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8595322370529175, "rewards/margins": 8.908570289611816, "rewards/rejected": -9.768102645874023, "step": 32120 }, { "epoch": 0.38, "learning_rate": 3.864450926857017e-06, "logits/chosen": -2.838022232055664, "logits/rejected": -2.32511830329895, "logps/chosen": -117.4525146484375, "logps/rejected": -990.1813354492188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7617067098617554, "rewards/margins": 8.753612518310547, "rewards/rejected": -9.515317916870117, "step": 32130 }, { "epoch": 0.38, "learning_rate": 3.863575490450665e-06, "logits/chosen": -2.8899941444396973, "logits/rejected": -2.372241497039795, "logps/chosen": -118.42236328125, "logps/rejected": -1010.2813720703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6989623308181763, "rewards/margins": 9.005056381225586, "rewards/rejected": -9.704018592834473, "step": 32140 }, { "epoch": 0.38, "learning_rate": 3.862699815967818e-06, "logits/chosen": -2.871619701385498, "logits/rejected": -2.460310697555542, "logps/chosen": -115.8957290649414, "logps/rejected": -946.0721435546875, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -0.676772952079773, "rewards/margins": 8.400187492370605, "rewards/rejected": -9.076960563659668, "step": 32150 }, { "epoch": 0.38, "learning_rate": 3.86182390356137e-06, "logits/chosen": -2.821486711502075, "logits/rejected": -2.309356451034546, "logps/chosen": -111.9522705078125, "logps/rejected": -985.9134521484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6729966402053833, "rewards/margins": 8.78612232208252, "rewards/rejected": -9.459117889404297, "step": 32160 }, { "epoch": 0.39, "learning_rate": 3.86094775338425e-06, "logits/chosen": -2.874340534210205, "logits/rejected": -2.4396395683288574, "logps/chosen": -135.72787475585938, "logps/rejected": -955.7987060546875, "loss": 0.1595, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9215067625045776, "rewards/margins": 8.247716903686523, "rewards/rejected": -9.169224739074707, "step": 32170 }, { "epoch": 0.39, "learning_rate": 3.860071365589432e-06, "logits/chosen": -2.861651659011841, "logits/rejected": -2.422818422317505, "logps/chosen": -113.89717864990234, "logps/rejected": -1019.4664916992188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6448542475700378, "rewards/margins": 9.163827896118164, "rewards/rejected": -9.808683395385742, "step": 32180 }, { "epoch": 0.39, "learning_rate": 3.859194740329931e-06, "logits/chosen": -2.832192897796631, "logits/rejected": -2.292357921600342, "logps/chosen": -132.66419982910156, "logps/rejected": -1056.8668212890625, "loss": 0.0768, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8478231430053711, "rewards/margins": 9.322676658630371, "rewards/rejected": -10.170499801635742, "step": 32190 }, { "epoch": 0.39, "learning_rate": 3.858317877758804e-06, "logits/chosen": -2.8601784706115723, "logits/rejected": -2.1230380535125732, "logps/chosen": -136.0660858154297, "logps/rejected": -1102.5767822265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7743038535118103, "rewards/margins": 9.838112831115723, "rewards/rejected": -10.612417221069336, "step": 32200 }, { "epoch": 0.39, "learning_rate": 3.857440778029147e-06, "logits/chosen": -2.801725387573242, "logits/rejected": -2.084568500518799, "logps/chosen": -150.6627960205078, "logps/rejected": -1126.31689453125, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -0.9598224759101868, "rewards/margins": 9.896084785461426, "rewards/rejected": -10.855905532836914, "step": 32210 }, { "epoch": 0.39, "learning_rate": 3.856563441294101e-06, "logits/chosen": -2.8933753967285156, "logits/rejected": -2.569108009338379, "logps/chosen": -85.65943145751953, "logps/rejected": -813.0988159179688, "loss": 0.0227, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4854450821876526, "rewards/margins": 7.2742919921875, "rewards/rejected": -7.75973653793335, "step": 32220 }, { "epoch": 0.39, "learning_rate": 3.8556858677068455e-06, "logits/chosen": -2.820010185241699, "logits/rejected": -2.2872424125671387, "logps/chosen": -119.39030456542969, "logps/rejected": -1017.9305419921875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6988458037376404, "rewards/margins": 9.087789535522461, "rewards/rejected": -9.786635398864746, "step": 32230 }, { "epoch": 0.39, "learning_rate": 3.8548080574206025e-06, "logits/chosen": -2.862114191055298, "logits/rejected": -2.24090576171875, "logps/chosen": -122.48020935058594, "logps/rejected": -967.4854736328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7023281455039978, "rewards/margins": 8.587121963500977, "rewards/rejected": -9.289449691772461, "step": 32240 }, { "epoch": 0.39, "learning_rate": 3.853930010588636e-06, "logits/chosen": -2.8693737983703613, "logits/rejected": -2.47261381149292, "logps/chosen": -99.36997985839844, "logps/rejected": -917.6651611328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5686281323432922, "rewards/margins": 8.22203254699707, "rewards/rejected": -8.79065990447998, "step": 32250 }, { "epoch": 0.39, "learning_rate": 3.85305172736425e-06, "logits/chosen": -2.8303754329681396, "logits/rejected": -2.158522129058838, "logps/chosen": -131.7655792236328, "logps/rejected": -1126.4171142578125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7915476560592651, "rewards/margins": 10.07734489440918, "rewards/rejected": -10.868891716003418, "step": 32260 }, { "epoch": 0.39, "learning_rate": 3.852173207900791e-06, "logits/chosen": -2.864236354827881, "logits/rejected": -2.470254421234131, "logps/chosen": -129.81150817871094, "logps/rejected": -986.5567626953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8167428970336914, "rewards/margins": 8.647968292236328, "rewards/rejected": -9.46471118927002, "step": 32270 }, { "epoch": 0.39, "learning_rate": 3.851294452351644e-06, "logits/chosen": -2.8651010990142822, "logits/rejected": -2.4823741912841797, "logps/chosen": -118.36932373046875, "logps/rejected": -975.3499755859375, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -0.6982989311218262, "rewards/margins": 8.659932136535645, "rewards/rejected": -9.358230590820312, "step": 32280 }, { "epoch": 0.39, "learning_rate": 3.850415460870239e-06, "logits/chosen": -2.851184606552124, "logits/rejected": -2.304041862487793, "logps/chosen": -103.0980224609375, "logps/rejected": -846.57958984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6025571227073669, "rewards/margins": 7.493583679199219, "rewards/rejected": -8.096141815185547, "step": 32290 }, { "epoch": 0.39, "learning_rate": 3.8495362336100465e-06, "logits/chosen": -2.820132255554199, "logits/rejected": -2.097429037094116, "logps/chosen": -134.5751495361328, "logps/rejected": -1062.0758056640625, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.8042244911193848, "rewards/margins": 9.412344932556152, "rewards/rejected": -10.216569900512695, "step": 32300 }, { "epoch": 0.39, "learning_rate": 3.848656770724575e-06, "logits/chosen": -2.920056104660034, "logits/rejected": -2.4799704551696777, "logps/chosen": -102.37236022949219, "logps/rejected": -870.3341064453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.591608464717865, "rewards/margins": 7.727357387542725, "rewards/rejected": -8.31896686553955, "step": 32310 }, { "epoch": 0.39, "learning_rate": 3.847777072367376e-06, "logits/chosen": -2.871796131134033, "logits/rejected": -2.3561818599700928, "logps/chosen": -142.47921752929688, "logps/rejected": -1028.1068115234375, "loss": 0.1588, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9778965711593628, "rewards/margins": 8.90510368347168, "rewards/rejected": -9.882999420166016, "step": 32320 }, { "epoch": 0.39, "learning_rate": 3.846897138692045e-06, "logits/chosen": -2.8487119674682617, "logits/rejected": -2.364480495452881, "logps/chosen": -143.65420532226562, "logps/rejected": -895.79150390625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.001626968383789, "rewards/margins": 7.5641279220581055, "rewards/rejected": -8.565754890441895, "step": 32330 }, { "epoch": 0.39, "learning_rate": 3.8460169698522135e-06, "logits/chosen": -2.81941556930542, "logits/rejected": -1.9780397415161133, "logps/chosen": -203.88719177246094, "logps/rejected": -1088.4686279296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4262380599975586, "rewards/margins": 9.053464889526367, "rewards/rejected": -10.479702949523926, "step": 32340 }, { "epoch": 0.39, "learning_rate": 3.845136566001557e-06, "logits/chosen": -2.8485612869262695, "logits/rejected": -2.2854409217834473, "logps/chosen": -172.09967041015625, "logps/rejected": -895.6019287109375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2549924850463867, "rewards/margins": 7.329827308654785, "rewards/rejected": -8.584819793701172, "step": 32350 }, { "epoch": 0.39, "learning_rate": 3.844255927293794e-06, "logits/chosen": -2.852808952331543, "logits/rejected": -2.236196994781494, "logps/chosen": -191.31993103027344, "logps/rejected": -1098.710693359375, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4432542324066162, "rewards/margins": 9.157867431640625, "rewards/rejected": -10.60112190246582, "step": 32360 }, { "epoch": 0.39, "learning_rate": 3.843375053882677e-06, "logits/chosen": -2.8764328956604004, "logits/rejected": -2.4879486560821533, "logps/chosen": -161.81077575683594, "logps/rejected": -881.41552734375, "loss": 0.2692, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2264641523361206, "rewards/margins": 7.211177825927734, "rewards/rejected": -8.437642097473145, "step": 32370 }, { "epoch": 0.39, "learning_rate": 3.842493945922009e-06, "logits/chosen": -2.944108486175537, "logits/rejected": -2.2647886276245117, "logps/chosen": -156.80532836914062, "logps/rejected": -978.5593872070312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.139922857284546, "rewards/margins": 8.268399238586426, "rewards/rejected": -9.408320426940918, "step": 32380 }, { "epoch": 0.39, "learning_rate": 3.841612603565626e-06, "logits/chosen": -2.903642416000366, "logits/rejected": -2.323399305343628, "logps/chosen": -176.77407836914062, "logps/rejected": -933.9827880859375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.271186351776123, "rewards/margins": 7.682246208190918, "rewards/rejected": -8.953432083129883, "step": 32390 }, { "epoch": 0.39, "learning_rate": 3.840731026967408e-06, "logits/chosen": -2.8547072410583496, "logits/rejected": -2.2146334648132324, "logps/chosen": -171.11911010742188, "logps/rejected": -1127.07373046875, "loss": 0.1453, "rewards/accuracies": 1.0, "rewards/chosen": -1.1993271112442017, "rewards/margins": 9.671178817749023, "rewards/rejected": -10.870504379272461, "step": 32400 }, { "epoch": 0.39, "learning_rate": 3.839849216281276e-06, "logits/chosen": -2.866851329803467, "logits/rejected": -2.3451151847839355, "logps/chosen": -191.94224548339844, "logps/rejected": -924.0928955078125, "loss": 0.1182, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4203386306762695, "rewards/margins": 7.434126377105713, "rewards/rejected": -8.854464530944824, "step": 32410 }, { "epoch": 0.39, "learning_rate": 3.8389671716611935e-06, "logits/chosen": -2.828634738922119, "logits/rejected": -2.3648478984832764, "logps/chosen": -186.39036560058594, "logps/rejected": -948.8386840820312, "loss": 0.147, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.362051248550415, "rewards/margins": 7.743176460266113, "rewards/rejected": -9.105226516723633, "step": 32420 }, { "epoch": 0.39, "learning_rate": 3.8380848932611605e-06, "logits/chosen": -2.8603568077087402, "logits/rejected": -2.3329508304595947, "logps/chosen": -123.86860656738281, "logps/rejected": -893.52197265625, "loss": 0.134, "rewards/accuracies": 1.0, "rewards/chosen": -0.7604683637619019, "rewards/margins": 7.800985813140869, "rewards/rejected": -8.561452865600586, "step": 32430 }, { "epoch": 0.39, "learning_rate": 3.837202381235221e-06, "logits/chosen": -2.8761236667633057, "logits/rejected": -2.609241008758545, "logps/chosen": -85.64232635498047, "logps/rejected": -814.6127319335938, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.4933987557888031, "rewards/margins": 7.2892937660217285, "rewards/rejected": -7.782692909240723, "step": 32440 }, { "epoch": 0.39, "learning_rate": 3.836319635737462e-06, "logits/chosen": -2.8278801441192627, "logits/rejected": -2.315800189971924, "logps/chosen": -165.27174377441406, "logps/rejected": -995.72314453125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.2225888967514038, "rewards/margins": 8.34480094909668, "rewards/rejected": -9.567390441894531, "step": 32450 }, { "epoch": 0.39, "learning_rate": 3.8354366569220036e-06, "logits/chosen": -2.8396284580230713, "logits/rejected": -2.27188777923584, "logps/chosen": -160.11819458007812, "logps/rejected": -1083.128662109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.102787971496582, "rewards/margins": 9.337956428527832, "rewards/rejected": -10.440744400024414, "step": 32460 }, { "epoch": 0.39, "learning_rate": 3.834553444943015e-06, "logits/chosen": -2.8663978576660156, "logits/rejected": -2.3809256553649902, "logps/chosen": -159.49038696289062, "logps/rejected": -978.2755737304688, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.1748641729354858, "rewards/margins": 8.216432571411133, "rewards/rejected": -9.391298294067383, "step": 32470 }, { "epoch": 0.39, "learning_rate": 3.833669999954701e-06, "logits/chosen": -2.8516147136688232, "logits/rejected": -2.2631032466888428, "logps/chosen": -194.35240173339844, "logps/rejected": -1109.622802734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4947311878204346, "rewards/margins": 9.211477279663086, "rewards/rejected": -10.706209182739258, "step": 32480 }, { "epoch": 0.39, "learning_rate": 3.83278632211131e-06, "logits/chosen": -2.8399384021759033, "logits/rejected": -2.326906204223633, "logps/chosen": -150.68772888183594, "logps/rejected": -977.6156005859375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0722510814666748, "rewards/margins": 8.330488204956055, "rewards/rejected": -9.402738571166992, "step": 32490 }, { "epoch": 0.39, "learning_rate": 3.8319024115671284e-06, "logits/chosen": -2.851186513900757, "logits/rejected": -2.4128406047821045, "logps/chosen": -167.6039276123047, "logps/rejected": -991.5450439453125, "loss": 0.2915, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2568460702896118, "rewards/margins": 8.281291007995605, "rewards/rejected": -9.538137435913086, "step": 32500 }, { "epoch": 0.39, "learning_rate": 3.831018268476485e-06, "logits/chosen": -2.8709628582000732, "logits/rejected": -2.4407167434692383, "logps/chosen": -149.6250457763672, "logps/rejected": -952.0265502929688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0668412446975708, "rewards/margins": 8.054364204406738, "rewards/rejected": -9.12120532989502, "step": 32510 }, { "epoch": 0.39, "learning_rate": 3.830133892993749e-06, "logits/chosen": -2.8984901905059814, "logits/rejected": -2.3852477073669434, "logps/chosen": -151.85848999023438, "logps/rejected": -899.310546875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.096724271774292, "rewards/margins": 7.5160813331604, "rewards/rejected": -8.612805366516113, "step": 32520 }, { "epoch": 0.39, "learning_rate": 3.8292492852733296e-06, "logits/chosen": -2.8292717933654785, "logits/rejected": -2.37770676612854, "logps/chosen": -137.8844757080078, "logps/rejected": -920.0828247070312, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.8996620178222656, "rewards/margins": 7.92104959487915, "rewards/rejected": -8.820712089538574, "step": 32530 }, { "epoch": 0.39, "learning_rate": 3.828364445469676e-06, "logits/chosen": -2.8438143730163574, "logits/rejected": -2.2834367752075195, "logps/chosen": -150.37680053710938, "logps/rejected": -894.9548950195312, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.029129981994629, "rewards/margins": 7.529448509216309, "rewards/rejected": -8.558577537536621, "step": 32540 }, { "epoch": 0.39, "learning_rate": 3.827479373737279e-06, "logits/chosen": -2.8238439559936523, "logits/rejected": -2.381174325942993, "logps/chosen": -124.47896575927734, "logps/rejected": -909.0338745117188, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.8155431747436523, "rewards/margins": 7.8962883949279785, "rewards/rejected": -8.711831092834473, "step": 32550 }, { "epoch": 0.39, "learning_rate": 3.826594070230672e-06, "logits/chosen": -2.8276121616363525, "logits/rejected": -2.2762298583984375, "logps/chosen": -164.27279663085938, "logps/rejected": -1049.460205078125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.128237009048462, "rewards/margins": 8.98685073852539, "rewards/rejected": -10.115087509155273, "step": 32560 }, { "epoch": 0.39, "learning_rate": 3.8257085351044246e-06, "logits/chosen": -2.8327794075012207, "logits/rejected": -2.144618511199951, "logps/chosen": -177.166259765625, "logps/rejected": -1118.0809326171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.2368565797805786, "rewards/margins": 9.543916702270508, "rewards/rejected": -10.780773162841797, "step": 32570 }, { "epoch": 0.39, "learning_rate": 3.824822768513148e-06, "logits/chosen": -2.8397164344787598, "logits/rejected": -2.1190168857574463, "logps/chosen": -164.84634399414062, "logps/rejected": -1005.6658935546875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1761994361877441, "rewards/margins": 8.497200012207031, "rewards/rejected": -9.6733980178833, "step": 32580 }, { "epoch": 0.39, "learning_rate": 3.823936770611496e-06, "logits/chosen": -2.815194606781006, "logits/rejected": -2.2819290161132812, "logps/chosen": -173.74169921875, "logps/rejected": -1076.6290283203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.2890739440917969, "rewards/margins": 9.071172714233398, "rewards/rejected": -10.360246658325195, "step": 32590 }, { "epoch": 0.39, "learning_rate": 3.823050541554161e-06, "logits/chosen": -2.804095983505249, "logits/rejected": -2.0961177349090576, "logps/chosen": -170.87197875976562, "logps/rejected": -1097.913330078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1807410717010498, "rewards/margins": 9.398977279663086, "rewards/rejected": -10.579719543457031, "step": 32600 }, { "epoch": 0.39, "learning_rate": 3.822164081495876e-06, "logits/chosen": -2.8118934631347656, "logits/rejected": -2.3203301429748535, "logps/chosen": -213.0812530517578, "logps/rejected": -934.6354370117188, "loss": 0.1607, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6777064800262451, "rewards/margins": 7.276059627532959, "rewards/rejected": -8.953766822814941, "step": 32610 }, { "epoch": 0.39, "learning_rate": 3.821277390591415e-06, "logits/chosen": -2.9285194873809814, "logits/rejected": -2.4706807136535645, "logps/chosen": -139.78472900390625, "logps/rejected": -900.9602661132812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.930916965007782, "rewards/margins": 7.707518577575684, "rewards/rejected": -8.638435363769531, "step": 32620 }, { "epoch": 0.39, "learning_rate": 3.82039046899559e-06, "logits/chosen": -2.8683528900146484, "logits/rejected": -2.352558135986328, "logps/chosen": -144.6925811767578, "logps/rejected": -944.3414306640625, "loss": 0.0435, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9514150619506836, "rewards/margins": 8.11053466796875, "rewards/rejected": -9.061949729919434, "step": 32630 }, { "epoch": 0.39, "learning_rate": 3.819503316863256e-06, "logits/chosen": -2.8149430751800537, "logits/rejected": -2.373549222946167, "logps/chosen": -120.27473449707031, "logps/rejected": -884.6903076171875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7920671701431274, "rewards/margins": 7.690371513366699, "rewards/rejected": -8.482439041137695, "step": 32640 }, { "epoch": 0.39, "learning_rate": 3.81861593434931e-06, "logits/chosen": -2.8934237957000732, "logits/rejected": -2.262728452682495, "logps/chosen": -165.02664184570312, "logps/rejected": -1029.7147216796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1769365072250366, "rewards/margins": 8.723367691040039, "rewards/rejected": -9.90030288696289, "step": 32650 }, { "epoch": 0.39, "learning_rate": 3.8177283216086826e-06, "logits/chosen": -2.849771022796631, "logits/rejected": -2.418980121612549, "logps/chosen": -158.69467163085938, "logps/rejected": -1052.481201171875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1266785860061646, "rewards/margins": 9.0037260055542, "rewards/rejected": -10.13040542602539, "step": 32660 }, { "epoch": 0.39, "learning_rate": 3.81684047879635e-06, "logits/chosen": -2.855173349380493, "logits/rejected": -2.4111196994781494, "logps/chosen": -152.0334930419922, "logps/rejected": -954.5985107421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0887130498886108, "rewards/margins": 8.08228588104248, "rewards/rejected": -9.170999526977539, "step": 32670 }, { "epoch": 0.39, "learning_rate": 3.815952406067328e-06, "logits/chosen": -2.837256908416748, "logits/rejected": -2.2983548641204834, "logps/chosen": -150.7821044921875, "logps/rejected": -925.2581176757812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0770273208618164, "rewards/margins": 7.795363426208496, "rewards/rejected": -8.872392654418945, "step": 32680 }, { "epoch": 0.39, "learning_rate": 3.81506410357667e-06, "logits/chosen": -2.8134961128234863, "logits/rejected": -2.475498676300049, "logps/chosen": -108.9641342163086, "logps/rejected": -831.7379150390625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7104811072349548, "rewards/margins": 7.243869781494141, "rewards/rejected": -7.954350471496582, "step": 32690 }, { "epoch": 0.39, "learning_rate": 3.8141755714794726e-06, "logits/chosen": -2.8395626544952393, "logits/rejected": -2.45137882232666, "logps/chosen": -139.5896453857422, "logps/rejected": -888.2501831054688, "loss": 0.1241, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9568082094192505, "rewards/margins": 7.545867919921875, "rewards/rejected": -8.502676010131836, "step": 32700 }, { "epoch": 0.39, "learning_rate": 3.8132868099308705e-06, "logits/chosen": -2.868483543395996, "logits/rejected": -1.9442672729492188, "logps/chosen": -245.14089965820312, "logps/rejected": -1114.9866943359375, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.9137732982635498, "rewards/margins": 8.832928657531738, "rewards/rejected": -10.746702194213867, "step": 32710 }, { "epoch": 0.39, "learning_rate": 3.812397819086038e-06, "logits/chosen": -2.832953691482544, "logits/rejected": -2.1499829292297363, "logps/chosen": -182.02728271484375, "logps/rejected": -1104.638916015625, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.261316180229187, "rewards/margins": 9.383905410766602, "rewards/rejected": -10.645220756530762, "step": 32720 }, { "epoch": 0.39, "learning_rate": 3.811508599100192e-06, "logits/chosen": -2.8921892642974854, "logits/rejected": -2.4339194297790527, "logps/chosen": -130.59690856933594, "logps/rejected": -925.6898193359375, "loss": 0.0215, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8542805910110474, "rewards/margins": 8.014659881591797, "rewards/rejected": -8.868940353393555, "step": 32730 }, { "epoch": 0.39, "learning_rate": 3.810619150128587e-06, "logits/chosen": -2.7937521934509277, "logits/rejected": -2.130307674407959, "logps/chosen": -205.39846801757812, "logps/rejected": -1117.31005859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.5407308340072632, "rewards/margins": 9.232293128967285, "rewards/rejected": -10.773022651672363, "step": 32740 }, { "epoch": 0.39, "learning_rate": 3.809729472326519e-06, "logits/chosen": -2.8759617805480957, "logits/rejected": -2.4700353145599365, "logps/chosen": -158.5503692626953, "logps/rejected": -958.0426025390625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1378151178359985, "rewards/margins": 8.054243087768555, "rewards/rejected": -9.192057609558105, "step": 32750 }, { "epoch": 0.39, "learning_rate": 3.8088395658493218e-06, "logits/chosen": -2.8670220375061035, "logits/rejected": -2.2320656776428223, "logps/chosen": -137.21612548828125, "logps/rejected": -1017.5968017578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8356670141220093, "rewards/margins": 8.942865371704102, "rewards/rejected": -9.778532028198242, "step": 32760 }, { "epoch": 0.39, "learning_rate": 3.8079494308523717e-06, "logits/chosen": -2.859365463256836, "logits/rejected": -2.323028087615967, "logps/chosen": -135.85299682617188, "logps/rejected": -927.5789794921875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9064275026321411, "rewards/margins": 7.9902544021606445, "rewards/rejected": -8.89668083190918, "step": 32770 }, { "epoch": 0.39, "learning_rate": 3.8070590674910845e-06, "logits/chosen": -2.8505451679229736, "logits/rejected": -2.1026999950408936, "logps/chosen": -209.2323760986328, "logps/rejected": -1144.447265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.5642001628875732, "rewards/margins": 9.468476295471191, "rewards/rejected": -11.032675743103027, "step": 32780 }, { "epoch": 0.39, "learning_rate": 3.8061684759209135e-06, "logits/chosen": -2.8964736461639404, "logits/rejected": -2.410244941711426, "logps/chosen": -155.83328247070312, "logps/rejected": -914.7761840820312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1286259889602661, "rewards/margins": 7.6390204429626465, "rewards/rejected": -8.767646789550781, "step": 32790 }, { "epoch": 0.39, "learning_rate": 3.8052776562973544e-06, "logits/chosen": -2.8852648735046387, "logits/rejected": -2.4194607734680176, "logps/chosen": -157.9014434814453, "logps/rejected": -949.7918090820312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1449987888336182, "rewards/margins": 7.956869602203369, "rewards/rejected": -9.10186767578125, "step": 32800 }, { "epoch": 0.39, "learning_rate": 3.804386608775942e-06, "logits/chosen": -2.822158098220825, "logits/rejected": -2.379218578338623, "logps/chosen": -165.1958465576172, "logps/rejected": -958.7330932617188, "loss": 0.1098, "rewards/accuracies": 1.0, "rewards/chosen": -1.2197457551956177, "rewards/margins": 7.983401298522949, "rewards/rejected": -9.203146934509277, "step": 32810 }, { "epoch": 0.39, "learning_rate": 3.8034953335122504e-06, "logits/chosen": -2.8228912353515625, "logits/rejected": -2.2597053050994873, "logps/chosen": -158.13223266601562, "logps/rejected": -987.6168823242188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0934010744094849, "rewards/margins": 8.396806716918945, "rewards/rejected": -9.490206718444824, "step": 32820 }, { "epoch": 0.39, "learning_rate": 3.802603830661894e-06, "logits/chosen": -2.810117244720459, "logits/rejected": -2.313472270965576, "logps/chosen": -167.37974548339844, "logps/rejected": -1047.568603515625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.2219582796096802, "rewards/margins": 8.864115715026855, "rewards/rejected": -10.086074829101562, "step": 32830 }, { "epoch": 0.39, "learning_rate": 3.8017121003805255e-06, "logits/chosen": -2.8688807487487793, "logits/rejected": -2.1594247817993164, "logps/chosen": -178.13218688964844, "logps/rejected": -1074.302978515625, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -1.2708618640899658, "rewards/margins": 9.072896003723145, "rewards/rejected": -10.343757629394531, "step": 32840 }, { "epoch": 0.39, "learning_rate": 3.8008201428238413e-06, "logits/chosen": -2.838778018951416, "logits/rejected": -2.429511785507202, "logps/chosen": -139.1396484375, "logps/rejected": -998.25146484375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9231016039848328, "rewards/margins": 8.683287620544434, "rewards/rejected": -9.606389999389648, "step": 32850 }, { "epoch": 0.39, "learning_rate": 3.7999279581475716e-06, "logits/chosen": -2.8553249835968018, "logits/rejected": -2.2203073501586914, "logps/chosen": -137.96409606933594, "logps/rejected": -1027.603759765625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9379909634590149, "rewards/margins": 8.945764541625977, "rewards/rejected": -9.883755683898926, "step": 32860 }, { "epoch": 0.39, "learning_rate": 3.7990355465074925e-06, "logits/chosen": -2.893167495727539, "logits/rejected": -2.138329267501831, "logps/chosen": -197.6990203857422, "logps/rejected": -1155.1845703125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.4612294435501099, "rewards/margins": 9.684525489807129, "rewards/rejected": -11.145753860473633, "step": 32870 }, { "epoch": 0.39, "learning_rate": 3.7981429080594133e-06, "logits/chosen": -2.815924882888794, "logits/rejected": -2.467862606048584, "logps/chosen": -134.15269470214844, "logps/rejected": -900.1329345703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9462852478027344, "rewards/margins": 7.6797919273376465, "rewards/rejected": -8.626077651977539, "step": 32880 }, { "epoch": 0.39, "learning_rate": 3.7972500429591885e-06, "logits/chosen": -2.8476710319519043, "logits/rejected": -2.501211643218994, "logps/chosen": -156.34219360351562, "logps/rejected": -875.4078369140625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1575229167938232, "rewards/margins": 7.226267337799072, "rewards/rejected": -8.3837890625, "step": 32890 }, { "epoch": 0.39, "learning_rate": 3.7963569513627084e-06, "logits/chosen": -2.851897716522217, "logits/rejected": -2.344691753387451, "logps/chosen": -178.8645782470703, "logps/rejected": -978.8665161132812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.2950860261917114, "rewards/margins": 8.10539722442627, "rewards/rejected": -9.400484085083008, "step": 32900 }, { "epoch": 0.39, "learning_rate": 3.7954636334259064e-06, "logits/chosen": -2.869523525238037, "logits/rejected": -2.4876174926757812, "logps/chosen": -116.7326431274414, "logps/rejected": -872.54052734375, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": -0.7297607660293579, "rewards/margins": 7.612143039703369, "rewards/rejected": -8.341903686523438, "step": 32910 }, { "epoch": 0.39, "learning_rate": 3.794570089304751e-06, "logits/chosen": -2.7870125770568848, "logits/rejected": -2.1582295894622803, "logps/chosen": -203.08348083496094, "logps/rejected": -1083.218505859375, "loss": 0.1465, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.533665418624878, "rewards/margins": 8.919461250305176, "rewards/rejected": -10.453126907348633, "step": 32920 }, { "epoch": 0.39, "learning_rate": 3.793676319155254e-06, "logits/chosen": -2.8765816688537598, "logits/rejected": -2.3285560607910156, "logps/chosen": -152.9999237060547, "logps/rejected": -1036.290771484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1090940237045288, "rewards/margins": 8.855607986450195, "rewards/rejected": -9.964703559875488, "step": 32930 }, { "epoch": 0.39, "learning_rate": 3.7927823231334647e-06, "logits/chosen": -2.8355705738067627, "logits/rejected": -2.2599947452545166, "logps/chosen": -149.5023193359375, "logps/rejected": -1005.7111206054688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0300447940826416, "rewards/margins": 8.636136054992676, "rewards/rejected": -9.666180610656738, "step": 32940 }, { "epoch": 0.39, "learning_rate": 3.791888101395472e-06, "logits/chosen": -2.8781771659851074, "logits/rejected": -2.3572025299072266, "logps/chosen": -181.0421142578125, "logps/rejected": -1012.7833251953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3364604711532593, "rewards/margins": 8.415980339050293, "rewards/rejected": -9.752440452575684, "step": 32950 }, { "epoch": 0.39, "learning_rate": 3.7909936540974052e-06, "logits/chosen": -2.8315579891204834, "logits/rejected": -2.1936607360839844, "logps/chosen": -193.01048278808594, "logps/rejected": -1022.4075317382812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.4024460315704346, "rewards/margins": 8.43514347076416, "rewards/rejected": -9.837590217590332, "step": 32960 }, { "epoch": 0.39, "learning_rate": 3.7900989813954318e-06, "logits/chosen": -2.8275067806243896, "logits/rejected": -2.114340305328369, "logps/chosen": -177.97715759277344, "logps/rejected": -1008.2971801757812, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.2580573558807373, "rewards/margins": 8.45206069946289, "rewards/rejected": -9.710118293762207, "step": 32970 }, { "epoch": 0.39, "learning_rate": 3.7892040834457593e-06, "logits/chosen": -2.877157688140869, "logits/rejected": -2.2649950981140137, "logps/chosen": -167.0144805908203, "logps/rejected": -1078.905029296875, "loss": 0.3049, "rewards/accuracies": 1.0, "rewards/chosen": -1.2364686727523804, "rewards/margins": 9.160337448120117, "rewards/rejected": -10.396806716918945, "step": 32980 }, { "epoch": 0.39, "learning_rate": 3.7883089604046345e-06, "logits/chosen": -2.873547077178955, "logits/rejected": -2.3165056705474854, "logps/chosen": -176.5134735107422, "logps/rejected": -904.9298095703125, "loss": 0.1505, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.328758955001831, "rewards/margins": 7.3313140869140625, "rewards/rejected": -8.660073280334473, "step": 32990 }, { "epoch": 0.4, "learning_rate": 3.7874136124283435e-06, "logits/chosen": -2.8732504844665527, "logits/rejected": -2.4004387855529785, "logps/chosen": -135.23696899414062, "logps/rejected": -849.1142578125, "loss": 0.0198, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8981987833976746, "rewards/margins": 7.203125, "rewards/rejected": -8.101323127746582, "step": 33000 }, { "epoch": 0.4, "eval_logits/chosen": -2.8371589183807373, "eval_logits/rejected": -1.6627854108810425, "eval_logps/chosen": -369.1990966796875, "eval_logps/rejected": -1380.1964111328125, "eval_loss": 0.0008540212293155491, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -3.080188512802124, "eval_rewards/margins": 10.254522323608398, "eval_rewards/rejected": -13.334710121154785, "eval_runtime": 1.2161, "eval_samples_per_second": 4.112, "eval_steps_per_second": 2.467, "step": 33000 }, { "epoch": 0.4, "learning_rate": 3.7865180396732116e-06, "logits/chosen": -2.880133867263794, "logits/rejected": -2.5352160930633545, "logps/chosen": -140.33663940429688, "logps/rejected": -853.81005859375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9581306576728821, "rewards/margins": 7.192389488220215, "rewards/rejected": -8.150520324707031, "step": 33010 }, { "epoch": 0.4, "learning_rate": 3.7856222422956024e-06, "logits/chosen": -2.8450045585632324, "logits/rejected": -2.171567916870117, "logps/chosen": -190.48306274414062, "logps/rejected": -1165.924072265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3286393880844116, "rewards/margins": 9.917198181152344, "rewards/rejected": -11.245837211608887, "step": 33020 }, { "epoch": 0.4, "learning_rate": 3.7847262204519206e-06, "logits/chosen": -2.825949192047119, "logits/rejected": -2.1388676166534424, "logps/chosen": -177.57318115234375, "logps/rejected": -1019.6173706054688, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.182140588760376, "rewards/margins": 8.615095138549805, "rewards/rejected": -9.797235488891602, "step": 33030 }, { "epoch": 0.4, "learning_rate": 3.7838299742986094e-06, "logits/chosen": -2.8636560440063477, "logits/rejected": -2.4855856895446777, "logps/chosen": -153.77664184570312, "logps/rejected": -857.380859375, "loss": 0.1175, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1066100597381592, "rewards/margins": 7.082883358001709, "rewards/rejected": -8.189493179321289, "step": 33040 }, { "epoch": 0.4, "learning_rate": 3.78293350399215e-06, "logits/chosen": -2.867171049118042, "logits/rejected": -2.3027265071868896, "logps/chosen": -157.3863983154297, "logps/rejected": -957.2687377929688, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.100428819656372, "rewards/margins": 8.077383041381836, "rewards/rejected": -9.177810668945312, "step": 33050 }, { "epoch": 0.4, "learning_rate": 3.782036809689064e-06, "logits/chosen": -2.8881523609161377, "logits/rejected": -2.42809796333313, "logps/chosen": -126.8236083984375, "logps/rejected": -962.8294677734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7790042161941528, "rewards/margins": 8.462605476379395, "rewards/rejected": -9.241607666015625, "step": 33060 }, { "epoch": 0.4, "learning_rate": 3.7811398915459115e-06, "logits/chosen": -2.8752639293670654, "logits/rejected": -2.403200626373291, "logps/chosen": -148.7676544189453, "logps/rejected": -896.3541870117188, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.0165717601776123, "rewards/margins": 7.538824558258057, "rewards/rejected": -8.555397033691406, "step": 33070 }, { "epoch": 0.4, "learning_rate": 3.7802427497192917e-06, "logits/chosen": -2.8556056022644043, "logits/rejected": -2.561201810836792, "logps/chosen": -112.38453674316406, "logps/rejected": -870.1724853515625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7676750421524048, "rewards/margins": 7.5679192543029785, "rewards/rejected": -8.33559513092041, "step": 33080 }, { "epoch": 0.4, "learning_rate": 3.7793453843658444e-06, "logits/chosen": -2.869978427886963, "logits/rejected": -2.2478020191192627, "logps/chosen": -253.9155731201172, "logps/rejected": -1012.9401245117188, "loss": 0.121, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.984130859375, "rewards/margins": 7.725883483886719, "rewards/rejected": -9.710014343261719, "step": 33090 }, { "epoch": 0.4, "learning_rate": 3.778447795642246e-06, "logits/chosen": -2.8742918968200684, "logits/rejected": -2.6252036094665527, "logps/chosen": -155.73890686035156, "logps/rejected": -839.5055541992188, "loss": 0.1065, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1812973022460938, "rewards/margins": 6.854480743408203, "rewards/rejected": -8.035778045654297, "step": 33100 }, { "epoch": 0.4, "learning_rate": 3.7775499837052134e-06, "logits/chosen": -2.834167003631592, "logits/rejected": -2.2401251792907715, "logps/chosen": -216.3916473388672, "logps/rejected": -1045.3680419921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.647735595703125, "rewards/margins": 8.415053367614746, "rewards/rejected": -10.062789916992188, "step": 33110 }, { "epoch": 0.4, "learning_rate": 3.7766519487115017e-06, "logits/chosen": -2.849093198776245, "logits/rejected": -2.36073899269104, "logps/chosen": -193.5852508544922, "logps/rejected": -966.7887573242188, "loss": 0.0197, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5015184879302979, "rewards/margins": 7.774783134460449, "rewards/rejected": -9.276301383972168, "step": 33120 }, { "epoch": 0.4, "learning_rate": 3.775753690817906e-06, "logits/chosen": -2.790587902069092, "logits/rejected": -2.235114336013794, "logps/chosen": -177.36744689941406, "logps/rejected": -897.7686767578125, "loss": 0.0199, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3457931280136108, "rewards/margins": 7.247551918029785, "rewards/rejected": -8.593344688415527, "step": 33130 }, { "epoch": 0.4, "learning_rate": 3.7748552101812587e-06, "logits/chosen": -2.7927165031433105, "logits/rejected": -1.8063280582427979, "logps/chosen": -283.2340087890625, "logps/rejected": -1268.5513916015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.2006051540374756, "rewards/margins": 10.061203002929688, "rewards/rejected": -12.261808395385742, "step": 33140 }, { "epoch": 0.4, "learning_rate": 3.7739565069584324e-06, "logits/chosen": -2.817699909210205, "logits/rejected": -2.324453353881836, "logps/chosen": -186.1438446044922, "logps/rejected": -951.11669921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3780969381332397, "rewards/margins": 7.748912811279297, "rewards/rejected": -9.127009391784668, "step": 33150 }, { "epoch": 0.4, "learning_rate": 3.7730575813063376e-06, "logits/chosen": -2.851804256439209, "logits/rejected": -2.5273776054382324, "logps/chosen": -163.0340576171875, "logps/rejected": -907.3016357421875, "loss": 0.1062, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2270134687423706, "rewards/margins": 7.475198268890381, "rewards/rejected": -8.7022123336792, "step": 33160 }, { "epoch": 0.4, "learning_rate": 3.7721584333819263e-06, "logits/chosen": -2.7787792682647705, "logits/rejected": -2.024567127227783, "logps/chosen": -241.60440063476562, "logps/rejected": -1116.603271484375, "loss": 0.0996, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9556150436401367, "rewards/margins": 8.827994346618652, "rewards/rejected": -10.783609390258789, "step": 33170 }, { "epoch": 0.4, "learning_rate": 3.771259063342185e-06, "logits/chosen": -2.864278793334961, "logits/rejected": -2.2780566215515137, "logps/chosen": -214.27389526367188, "logps/rejected": -1027.80615234375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.5616506338119507, "rewards/margins": 8.326292037963867, "rewards/rejected": -9.887943267822266, "step": 33180 }, { "epoch": 0.4, "learning_rate": 3.7703594713441414e-06, "logits/chosen": -2.842559337615967, "logits/rejected": -2.341585636138916, "logps/chosen": -196.37428283691406, "logps/rejected": -957.6959228515625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.5097782611846924, "rewards/margins": 7.6785569190979, "rewards/rejected": -9.188335418701172, "step": 33190 }, { "epoch": 0.4, "learning_rate": 3.769459657544864e-06, "logits/chosen": -2.835749387741089, "logits/rejected": -2.2848963737487793, "logps/chosen": -225.8023223876953, "logps/rejected": -1117.2315673828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7843818664550781, "rewards/margins": 8.980813980102539, "rewards/rejected": -10.765195846557617, "step": 33200 }, { "epoch": 0.4, "learning_rate": 3.7685596221014547e-06, "logits/chosen": -2.7545297145843506, "logits/rejected": -2.2957603931427, "logps/chosen": -176.9827880859375, "logps/rejected": -1091.8338623046875, "loss": 0.1096, "rewards/accuracies": 1.0, "rewards/chosen": -1.331418752670288, "rewards/margins": 9.190265655517578, "rewards/rejected": -10.521684646606445, "step": 33210 }, { "epoch": 0.4, "learning_rate": 3.767659365171059e-06, "logits/chosen": -2.81463885307312, "logits/rejected": -2.188502073287964, "logps/chosen": -209.00595092773438, "logps/rejected": -1016.3972778320312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.5976287126541138, "rewards/margins": 8.183141708374023, "rewards/rejected": -9.780770301818848, "step": 33220 }, { "epoch": 0.4, "learning_rate": 3.7667588869108597e-06, "logits/chosen": -2.856934070587158, "logits/rejected": -2.3276829719543457, "logps/chosen": -205.75888061523438, "logps/rejected": -978.4537963867188, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -1.6151275634765625, "rewards/margins": 7.797614097595215, "rewards/rejected": -9.412741661071777, "step": 33230 }, { "epoch": 0.4, "learning_rate": 3.765858187478076e-06, "logits/chosen": -2.8607044219970703, "logits/rejected": -2.4520905017852783, "logps/chosen": -144.60153198242188, "logps/rejected": -935.3844604492188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0268208980560303, "rewards/margins": 7.944612979888916, "rewards/rejected": -8.971433639526367, "step": 33240 }, { "epoch": 0.4, "learning_rate": 3.764957267029969e-06, "logits/chosen": -2.899887800216675, "logits/rejected": -2.372126817703247, "logps/chosen": -160.40701293945312, "logps/rejected": -928.6204223632812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1997737884521484, "rewards/margins": 7.7144293785095215, "rewards/rejected": -8.914202690124512, "step": 33250 }, { "epoch": 0.4, "learning_rate": 3.7640561257238363e-06, "logits/chosen": -2.8333308696746826, "logits/rejected": -2.3890795707702637, "logps/chosen": -189.7158203125, "logps/rejected": -981.8087768554688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4769469499588013, "rewards/margins": 7.964356422424316, "rewards/rejected": -9.441304206848145, "step": 33260 }, { "epoch": 0.4, "learning_rate": 3.763154763717014e-06, "logits/chosen": -2.831247568130493, "logits/rejected": -2.168795108795166, "logps/chosen": -256.29339599609375, "logps/rejected": -939.4224853515625, "loss": 0.1721, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0155959129333496, "rewards/margins": 6.986252784729004, "rewards/rejected": -9.001848220825195, "step": 33270 }, { "epoch": 0.4, "learning_rate": 3.7622531811668793e-06, "logits/chosen": -2.8588602542877197, "logits/rejected": -2.1047322750091553, "logps/chosen": -261.1983642578125, "logps/rejected": -1131.7181396484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.063432216644287, "rewards/margins": 8.859847068786621, "rewards/rejected": -10.923280715942383, "step": 33280 }, { "epoch": 0.4, "learning_rate": 3.7613513782308454e-06, "logits/chosen": -2.8596601486206055, "logits/rejected": -2.430534839630127, "logps/chosen": -175.28216552734375, "logps/rejected": -969.0325317382812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.3217992782592773, "rewards/margins": 7.99770450592041, "rewards/rejected": -9.319502830505371, "step": 33290 }, { "epoch": 0.4, "learning_rate": 3.7604493550663634e-06, "logits/chosen": -2.783271312713623, "logits/rejected": -2.3793091773986816, "logps/chosen": -211.86288452148438, "logps/rejected": -984.0861206054688, "loss": 0.1594, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6811187267303467, "rewards/margins": 7.77942419052124, "rewards/rejected": -9.460542678833008, "step": 33300 }, { "epoch": 0.4, "learning_rate": 3.7595471118309246e-06, "logits/chosen": -2.814246654510498, "logits/rejected": -2.1062912940979004, "logps/chosen": -272.0345764160156, "logps/rejected": -1248.711669921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.180184841156006, "rewards/margins": 9.90774154663086, "rewards/rejected": -12.087926864624023, "step": 33310 }, { "epoch": 0.4, "learning_rate": 3.758644648682058e-06, "logits/chosen": -2.8178420066833496, "logits/rejected": -2.2340052127838135, "logps/chosen": -197.8603973388672, "logps/rejected": -1018.6712036132812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5077462196350098, "rewards/margins": 8.288796424865723, "rewards/rejected": -9.796541213989258, "step": 33320 }, { "epoch": 0.4, "learning_rate": 3.757741965777333e-06, "logits/chosen": -2.8693690299987793, "logits/rejected": -2.390345573425293, "logps/chosen": -175.94497680664062, "logps/rejected": -925.7640380859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3267301321029663, "rewards/margins": 7.5568528175354, "rewards/rejected": -8.883583068847656, "step": 33330 }, { "epoch": 0.4, "learning_rate": 3.7568390632743524e-06, "logits/chosen": -2.807246685028076, "logits/rejected": -2.0903677940368652, "logps/chosen": -248.6110382080078, "logps/rejected": -1083.4063720703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9839950799942017, "rewards/margins": 8.444438934326172, "rewards/rejected": -10.428434371948242, "step": 33340 }, { "epoch": 0.4, "learning_rate": 3.7559359413307627e-06, "logits/chosen": -2.8149027824401855, "logits/rejected": -2.4386324882507324, "logps/chosen": -209.4242401123047, "logps/rejected": -878.7560424804688, "loss": 0.1342, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.698971152305603, "rewards/margins": 6.722006320953369, "rewards/rejected": -8.420976638793945, "step": 33350 }, { "epoch": 0.4, "learning_rate": 3.7550326001042458e-06, "logits/chosen": -2.812744140625, "logits/rejected": -2.2392420768737793, "logps/chosen": -287.4289855957031, "logps/rejected": -1136.009033203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.349562406539917, "rewards/margins": 8.629786491394043, "rewards/rejected": -10.979349136352539, "step": 33360 }, { "epoch": 0.4, "learning_rate": 3.7541290397525225e-06, "logits/chosen": -2.8479151725769043, "logits/rejected": -2.583644390106201, "logps/chosen": -152.5054931640625, "logps/rejected": -960.3995361328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1419163942337036, "rewards/margins": 8.078019142150879, "rewards/rejected": -9.219935417175293, "step": 33370 }, { "epoch": 0.4, "learning_rate": 3.753225260433352e-06, "logits/chosen": -2.7993948459625244, "logits/rejected": -1.9268171787261963, "logps/chosen": -254.8163299560547, "logps/rejected": -1065.1861572265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.0031535625457764, "rewards/margins": 8.265700340270996, "rewards/rejected": -10.268854141235352, "step": 33380 }, { "epoch": 0.4, "learning_rate": 3.7523212623045324e-06, "logits/chosen": -2.8587188720703125, "logits/rejected": -2.302243232727051, "logps/chosen": -187.52926635742188, "logps/rejected": -1089.93310546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3804914951324463, "rewards/margins": 9.134798049926758, "rewards/rejected": -10.515287399291992, "step": 33390 }, { "epoch": 0.4, "learning_rate": 3.751417045523898e-06, "logits/chosen": -2.7932920455932617, "logits/rejected": -2.0992774963378906, "logps/chosen": -218.199951171875, "logps/rejected": -1081.971435546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.7466061115264893, "rewards/margins": 8.679779052734375, "rewards/rejected": -10.426385879516602, "step": 33400 }, { "epoch": 0.4, "learning_rate": 3.7505126102493226e-06, "logits/chosen": -2.818535327911377, "logits/rejected": -1.832564115524292, "logps/chosen": -275.1846923828125, "logps/rejected": -1355.9080810546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.2399911880493164, "rewards/margins": 10.915493965148926, "rewards/rejected": -13.155487060546875, "step": 33410 }, { "epoch": 0.4, "learning_rate": 3.7496079566387193e-06, "logits/chosen": -2.8772265911102295, "logits/rejected": -2.5061533451080322, "logps/chosen": -118.6159896850586, "logps/rejected": -884.3966064453125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7503321766853333, "rewards/margins": 7.726827144622803, "rewards/rejected": -8.477160453796387, "step": 33420 }, { "epoch": 0.4, "learning_rate": 3.7487030848500378e-06, "logits/chosen": -2.801649570465088, "logits/rejected": -1.9630050659179688, "logps/chosen": -257.591796875, "logps/rejected": -1143.040283203125, "loss": 0.1604, "rewards/accuracies": 1.0, "rewards/chosen": -2.0469985008239746, "rewards/margins": 8.985068321228027, "rewards/rejected": -11.032066345214844, "step": 33430 }, { "epoch": 0.4, "learning_rate": 3.7477979950412653e-06, "logits/chosen": -2.7814857959747314, "logits/rejected": -2.1917312145233154, "logps/chosen": -167.779296875, "logps/rejected": -982.4319458007812, "loss": 0.1144, "rewards/accuracies": 1.0, "rewards/chosen": -1.2413692474365234, "rewards/margins": 8.18665885925293, "rewards/rejected": -9.42802906036377, "step": 33440 }, { "epoch": 0.4, "learning_rate": 3.746892687370428e-06, "logits/chosen": -2.80479097366333, "logits/rejected": -2.3889262676239014, "logps/chosen": -152.892333984375, "logps/rejected": -921.189453125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.09091317653656, "rewards/margins": 7.739428520202637, "rewards/rejected": -8.830341339111328, "step": 33450 }, { "epoch": 0.4, "learning_rate": 3.7459871619955916e-06, "logits/chosen": -2.8121254444122314, "logits/rejected": -2.326659917831421, "logps/chosen": -194.56686401367188, "logps/rejected": -966.2879028320312, "loss": 0.1559, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5219680070877075, "rewards/margins": 7.760704040527344, "rewards/rejected": -9.282671928405762, "step": 33460 }, { "epoch": 0.4, "learning_rate": 3.745081419074857e-06, "logits/chosen": -2.810720682144165, "logits/rejected": -2.245959758758545, "logps/chosen": -214.2635498046875, "logps/rejected": -1003.2720947265625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.6910327672958374, "rewards/margins": 7.949720859527588, "rewards/rejected": -9.640753746032715, "step": 33470 }, { "epoch": 0.4, "learning_rate": 3.744175458766364e-06, "logits/chosen": -2.7978906631469727, "logits/rejected": -2.500380516052246, "logps/chosen": -149.2782440185547, "logps/rejected": -818.7060546875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0954951047897339, "rewards/margins": 6.722492218017578, "rewards/rejected": -7.817986965179443, "step": 33480 }, { "epoch": 0.4, "learning_rate": 3.7432692812282915e-06, "logits/chosen": -2.7583329677581787, "logits/rejected": -2.073366165161133, "logps/chosen": -253.34848022460938, "logps/rejected": -1056.968994140625, "loss": 0.1277, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0066375732421875, "rewards/margins": 8.169225692749023, "rewards/rejected": -10.175863265991211, "step": 33490 }, { "epoch": 0.4, "learning_rate": 3.742362886618856e-06, "logits/chosen": -2.832157611846924, "logits/rejected": -2.2324557304382324, "logps/chosen": -183.14674377441406, "logps/rejected": -998.9700927734375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.3584264516830444, "rewards/margins": 8.247218132019043, "rewards/rejected": -9.605645179748535, "step": 33500 }, { "epoch": 0.4, "learning_rate": 3.7414562750963114e-06, "logits/chosen": -2.832473039627075, "logits/rejected": -2.3088812828063965, "logps/chosen": -172.93780517578125, "logps/rejected": -871.8395385742188, "loss": 0.1435, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3012632131576538, "rewards/margins": 7.039280891418457, "rewards/rejected": -8.340543746948242, "step": 33510 }, { "epoch": 0.4, "learning_rate": 3.7405494468189483e-06, "logits/chosen": -2.8270039558410645, "logits/rejected": -2.2481741905212402, "logps/chosen": -166.0845489501953, "logps/rejected": -970.9308471679688, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1954247951507568, "rewards/margins": 8.139967918395996, "rewards/rejected": -9.335393905639648, "step": 33520 }, { "epoch": 0.4, "learning_rate": 3.739642401945098e-06, "logits/chosen": -2.834588050842285, "logits/rejected": -2.2790465354919434, "logps/chosen": -226.4752655029297, "logps/rejected": -1011.4329833984375, "loss": 0.1085, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7629257440567017, "rewards/margins": 7.964678764343262, "rewards/rejected": -9.727605819702148, "step": 33530 }, { "epoch": 0.4, "learning_rate": 3.738735140633127e-06, "logits/chosen": -2.830745220184326, "logits/rejected": -2.3582465648651123, "logps/chosen": -176.56179809570312, "logps/rejected": -946.6510009765625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.2832396030426025, "rewards/margins": 7.80508279800415, "rewards/rejected": -9.088322639465332, "step": 33540 }, { "epoch": 0.4, "learning_rate": 3.737827663041441e-06, "logits/chosen": -2.805126905441284, "logits/rejected": -2.210503101348877, "logps/chosen": -177.5099639892578, "logps/rejected": -860.2432861328125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.3093417882919312, "rewards/margins": 6.930985927581787, "rewards/rejected": -8.240327835083008, "step": 33550 }, { "epoch": 0.4, "learning_rate": 3.736919969328483e-06, "logits/chosen": -2.8501689434051514, "logits/rejected": -2.283073663711548, "logps/chosen": -212.4102020263672, "logps/rejected": -1116.908447265625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.6525354385375977, "rewards/margins": 9.129232406616211, "rewards/rejected": -10.781766891479492, "step": 33560 }, { "epoch": 0.4, "learning_rate": 3.7360120596527334e-06, "logits/chosen": -2.8109045028686523, "logits/rejected": -2.297213315963745, "logps/chosen": -174.2841339111328, "logps/rejected": -1073.883056640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3423823118209839, "rewards/margins": 9.01378059387207, "rewards/rejected": -10.356161117553711, "step": 33570 }, { "epoch": 0.4, "learning_rate": 3.735103934172711e-06, "logits/chosen": -2.8393688201904297, "logits/rejected": -2.2058331966400146, "logps/chosen": -237.7396240234375, "logps/rejected": -1078.161376953125, "loss": 0.262, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8632891178131104, "rewards/margins": 8.52612590789795, "rewards/rejected": -10.389415740966797, "step": 33580 }, { "epoch": 0.4, "learning_rate": 3.7341955930469724e-06, "logits/chosen": -2.8361804485321045, "logits/rejected": -2.423367738723755, "logps/chosen": -209.5580291748047, "logps/rejected": -978.3978271484375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6185846328735352, "rewards/margins": 7.767490386962891, "rewards/rejected": -9.38607406616211, "step": 33590 }, { "epoch": 0.4, "learning_rate": 3.7332870364341106e-06, "logits/chosen": -2.812206745147705, "logits/rejected": -1.8961312770843506, "logps/chosen": -268.0103759765625, "logps/rejected": -1240.7412109375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.090270519256592, "rewards/margins": 9.904935836791992, "rewards/rejected": -11.995206832885742, "step": 33600 }, { "epoch": 0.4, "learning_rate": 3.732378264492758e-06, "logits/chosen": -2.869408130645752, "logits/rejected": -2.5753893852233887, "logps/chosen": -125.9310531616211, "logps/rejected": -856.5661010742188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8813593983650208, "rewards/margins": 7.313893795013428, "rewards/rejected": -8.195253372192383, "step": 33610 }, { "epoch": 0.4, "learning_rate": 3.7314692773815837e-06, "logits/chosen": -2.872122287750244, "logits/rejected": -2.4152615070343018, "logps/chosen": -150.51097106933594, "logps/rejected": -863.9876708984375, "loss": 0.0216, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.113515019416809, "rewards/margins": 7.154450416564941, "rewards/rejected": -8.267965316772461, "step": 33620 }, { "epoch": 0.4, "learning_rate": 3.730560075259293e-06, "logits/chosen": -2.826936721801758, "logits/rejected": -2.4594554901123047, "logps/chosen": -157.12387084960938, "logps/rejected": -928.4656982421875, "loss": 0.0209, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1274211406707764, "rewards/margins": 7.77166223526001, "rewards/rejected": -8.899083137512207, "step": 33630 }, { "epoch": 0.4, "learning_rate": 3.729650658284631e-06, "logits/chosen": -2.8052115440368652, "logits/rejected": -2.194892406463623, "logps/chosen": -227.45407104492188, "logps/rejected": -1090.510009765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.8355400562286377, "rewards/margins": 8.67466926574707, "rewards/rejected": -10.510209083557129, "step": 33640 }, { "epoch": 0.4, "learning_rate": 3.7287410266163795e-06, "logits/chosen": -2.7540442943573, "logits/rejected": -2.199986457824707, "logps/chosen": -191.9583282470703, "logps/rejected": -1052.305908203125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.4675391912460327, "rewards/margins": 8.664994239807129, "rewards/rejected": -10.132533073425293, "step": 33650 }, { "epoch": 0.4, "learning_rate": 3.7278311804133573e-06, "logits/chosen": -2.8617196083068848, "logits/rejected": -2.3662731647491455, "logps/chosen": -176.71865844726562, "logps/rejected": -1050.029541015625, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -1.2658647298812866, "rewards/margins": 8.837950706481934, "rewards/rejected": -10.103816032409668, "step": 33660 }, { "epoch": 0.4, "learning_rate": 3.7269211198344214e-06, "logits/chosen": -2.884089708328247, "logits/rejected": -2.2906460762023926, "logps/chosen": -186.54234313964844, "logps/rejected": -996.8551025390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3846123218536377, "rewards/margins": 8.189699172973633, "rewards/rejected": -9.574311256408691, "step": 33670 }, { "epoch": 0.4, "learning_rate": 3.726010845038465e-06, "logits/chosen": -2.8316080570220947, "logits/rejected": -2.3000264167785645, "logps/chosen": -184.52877807617188, "logps/rejected": -1080.258056640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3872498273849487, "rewards/margins": 9.029830932617188, "rewards/rejected": -10.417081832885742, "step": 33680 }, { "epoch": 0.4, "learning_rate": 3.72510035618442e-06, "logits/chosen": -2.854010820388794, "logits/rejected": -2.508852958679199, "logps/chosen": -157.59304809570312, "logps/rejected": -932.00927734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0950273275375366, "rewards/margins": 7.843360900878906, "rewards/rejected": -8.938387870788574, "step": 33690 }, { "epoch": 0.4, "learning_rate": 3.724189653431256e-06, "logits/chosen": -2.8182485103607178, "logits/rejected": -2.2014694213867188, "logps/chosen": -189.9137420654297, "logps/rejected": -1029.7701416015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.436266541481018, "rewards/margins": 8.473395347595215, "rewards/rejected": -9.909662246704102, "step": 33700 }, { "epoch": 0.4, "learning_rate": 3.7232787369379784e-06, "logits/chosen": -2.8026444911956787, "logits/rejected": -2.3241617679595947, "logps/chosen": -176.51617431640625, "logps/rejected": -949.0919799804688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.3256381750106812, "rewards/margins": 7.780881404876709, "rewards/rejected": -9.10651969909668, "step": 33710 }, { "epoch": 0.4, "learning_rate": 3.7223676068636316e-06, "logits/chosen": -2.8668065071105957, "logits/rejected": -2.3121581077575684, "logps/chosen": -188.13949584960938, "logps/rejected": -968.3211669921875, "loss": 0.1332, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3923494815826416, "rewards/margins": 7.907665252685547, "rewards/rejected": -9.300015449523926, "step": 33720 }, { "epoch": 0.4, "learning_rate": 3.721456263367295e-06, "logits/chosen": -2.8317604064941406, "logits/rejected": -2.332688093185425, "logps/chosen": -181.5986328125, "logps/rejected": -1010.5950317382812, "loss": 0.0418, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.347500205039978, "rewards/margins": 8.371577262878418, "rewards/rejected": -9.719077110290527, "step": 33730 }, { "epoch": 0.4, "learning_rate": 3.720544706608087e-06, "logits/chosen": -2.8687376976013184, "logits/rejected": -2.196601152420044, "logps/chosen": -220.11648559570312, "logps/rejected": -1079.1861572265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.7411830425262451, "rewards/margins": 8.644091606140137, "rewards/rejected": -10.385274887084961, "step": 33740 }, { "epoch": 0.4, "learning_rate": 3.7196329367451632e-06, "logits/chosen": -2.8121306896209717, "logits/rejected": -2.1525096893310547, "logps/chosen": -197.9635009765625, "logps/rejected": -1060.3863525390625, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -1.4918709993362427, "rewards/margins": 8.7217435836792, "rewards/rejected": -10.213615417480469, "step": 33750 }, { "epoch": 0.4, "learning_rate": 3.718720953937716e-06, "logits/chosen": -2.8682548999786377, "logits/rejected": -2.377790927886963, "logps/chosen": -175.0745086669922, "logps/rejected": -922.2325439453125, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -1.3419997692108154, "rewards/margins": 7.508854866027832, "rewards/rejected": -8.85085391998291, "step": 33760 }, { "epoch": 0.4, "learning_rate": 3.717808758344975e-06, "logits/chosen": -2.808008909225464, "logits/rejected": -2.1789116859436035, "logps/chosen": -215.28189086914062, "logps/rejected": -1208.623291015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.6647045612335205, "rewards/margins": 10.022052764892578, "rewards/rejected": -11.686758041381836, "step": 33770 }, { "epoch": 0.4, "learning_rate": 3.716896350126208e-06, "logits/chosen": -2.8114173412323, "logits/rejected": -2.0747618675231934, "logps/chosen": -242.75131225585938, "logps/rejected": -1116.693115234375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.8325653076171875, "rewards/margins": 8.943808555603027, "rewards/rejected": -10.776372909545898, "step": 33780 }, { "epoch": 0.4, "learning_rate": 3.7159837294407174e-06, "logits/chosen": -2.834444046020508, "logits/rejected": -2.2521328926086426, "logps/chosen": -225.6796417236328, "logps/rejected": -1032.635986328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.771264672279358, "rewards/margins": 8.15191650390625, "rewards/rejected": -9.923181533813477, "step": 33790 }, { "epoch": 0.4, "learning_rate": 3.715070896447845e-06, "logits/chosen": -2.8391306400299072, "logits/rejected": -2.237771511077881, "logps/chosen": -222.5473175048828, "logps/rejected": -1077.61962890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7701870203018188, "rewards/margins": 8.622843742370605, "rewards/rejected": -10.393031120300293, "step": 33800 }, { "epoch": 0.4, "learning_rate": 3.7141578513069686e-06, "logits/chosen": -2.787498950958252, "logits/rejected": -2.238004207611084, "logps/chosen": -214.7902374267578, "logps/rejected": -1147.547119140625, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -1.579807996749878, "rewards/margins": 9.498322486877441, "rewards/rejected": -11.078129768371582, "step": 33810 }, { "epoch": 0.4, "learning_rate": 3.7132445941775034e-06, "logits/chosen": -2.8507893085479736, "logits/rejected": -2.3375778198242188, "logps/chosen": -227.2764129638672, "logps/rejected": -1090.844482421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.7960774898529053, "rewards/margins": 8.717844009399414, "rewards/rejected": -10.513921737670898, "step": 33820 }, { "epoch": 0.4, "learning_rate": 3.712331125218902e-06, "logits/chosen": -2.8780369758605957, "logits/rejected": -2.2762153148651123, "logps/chosen": -158.26254272460938, "logps/rejected": -1017.2526245117188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1840521097183228, "rewards/margins": 8.610220909118652, "rewards/rejected": -9.794272422790527, "step": 33830 }, { "epoch": 0.41, "learning_rate": 3.711417444590653e-06, "logits/chosen": -2.799753427505493, "logits/rejected": -2.0098230838775635, "logps/chosen": -266.5409240722656, "logps/rejected": -1198.1201171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.138701915740967, "rewards/margins": 9.432214736938477, "rewards/rejected": -11.570917129516602, "step": 33840 }, { "epoch": 0.41, "learning_rate": 3.7105035524522835e-06, "logits/chosen": -2.8276758193969727, "logits/rejected": -2.2481470108032227, "logps/chosen": -172.55043029785156, "logps/rejected": -1008.2752685546875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.240575909614563, "rewards/margins": 8.449021339416504, "rewards/rejected": -9.689596176147461, "step": 33850 }, { "epoch": 0.41, "learning_rate": 3.7095894489633554e-06, "logits/chosen": -2.824831485748291, "logits/rejected": -2.287966251373291, "logps/chosen": -150.61534118652344, "logps/rejected": -906.9609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0704596042633057, "rewards/margins": 7.614956855773926, "rewards/rejected": -8.685417175292969, "step": 33860 }, { "epoch": 0.41, "learning_rate": 3.708675134283468e-06, "logits/chosen": -2.872824192047119, "logits/rejected": -2.358323574066162, "logps/chosen": -188.7220001220703, "logps/rejected": -954.5422973632812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.4273449182510376, "rewards/margins": 7.724429130554199, "rewards/rejected": -9.151774406433105, "step": 33870 }, { "epoch": 0.41, "learning_rate": 3.7077606085722607e-06, "logits/chosen": -2.8634345531463623, "logits/rejected": -2.2881157398223877, "logps/chosen": -166.15138244628906, "logps/rejected": -925.3232421875, "loss": 0.156, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2210419178009033, "rewards/margins": 7.656652927398682, "rewards/rejected": -8.877695083618164, "step": 33880 }, { "epoch": 0.41, "learning_rate": 3.7068458719894047e-06, "logits/chosen": -2.790198802947998, "logits/rejected": -1.916914939880371, "logps/chosen": -284.4891052246094, "logps/rejected": -1218.200927734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.283602476119995, "rewards/margins": 9.4835205078125, "rewards/rejected": -11.767123222351074, "step": 33890 }, { "epoch": 0.41, "learning_rate": 3.7059309246946114e-06, "logits/chosen": -2.8432106971740723, "logits/rejected": -2.3435332775115967, "logps/chosen": -220.413818359375, "logps/rejected": -884.4625244140625, "loss": 0.2508, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.753320336341858, "rewards/margins": 6.713014125823975, "rewards/rejected": -8.466333389282227, "step": 33900 }, { "epoch": 0.41, "learning_rate": 3.705015766847628e-06, "logits/chosen": -2.81545090675354, "logits/rejected": -2.233187198638916, "logps/chosen": -205.9581756591797, "logps/rejected": -1042.372802734375, "loss": 0.1131, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5764497518539429, "rewards/margins": 8.447348594665527, "rewards/rejected": -10.023798942565918, "step": 33910 }, { "epoch": 0.41, "learning_rate": 3.704100398608238e-06, "logits/chosen": -2.792539119720459, "logits/rejected": -2.1013004779815674, "logps/chosen": -226.630126953125, "logps/rejected": -1193.0731201171875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -1.7621780633926392, "rewards/margins": 9.729389190673828, "rewards/rejected": -11.49156665802002, "step": 33920 }, { "epoch": 0.41, "learning_rate": 3.703184820136263e-06, "logits/chosen": -2.8876969814300537, "logits/rejected": -2.521389961242676, "logps/chosen": -118.03367614746094, "logps/rejected": -859.3211059570312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7466031312942505, "rewards/margins": 7.4655585289001465, "rewards/rejected": -8.21216106414795, "step": 33930 }, { "epoch": 0.41, "learning_rate": 3.7022690315915603e-06, "logits/chosen": -2.8573853969573975, "logits/rejected": -2.3648431301116943, "logps/chosen": -146.15518188476562, "logps/rejected": -929.6159057617188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0263621807098389, "rewards/margins": 7.895484924316406, "rewards/rejected": -8.921846389770508, "step": 33940 }, { "epoch": 0.41, "learning_rate": 3.7013530331340244e-06, "logits/chosen": -2.882056951522827, "logits/rejected": -2.4730002880096436, "logps/chosen": -129.7207794189453, "logps/rejected": -932.3719482421875, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.8466474413871765, "rewards/margins": 8.081624984741211, "rewards/rejected": -8.928271293640137, "step": 33950 }, { "epoch": 0.41, "learning_rate": 3.7004368249235843e-06, "logits/chosen": -2.8328402042388916, "logits/rejected": -2.374220848083496, "logps/chosen": -140.01927185058594, "logps/rejected": -954.90283203125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9829839468002319, "rewards/margins": 8.170467376708984, "rewards/rejected": -9.153451919555664, "step": 33960 }, { "epoch": 0.41, "learning_rate": 3.6995204071202096e-06, "logits/chosen": -2.8658511638641357, "logits/rejected": -2.3224973678588867, "logps/chosen": -181.5069122314453, "logps/rejected": -1019.4749145507812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.3578901290893555, "rewards/margins": 8.446150779724121, "rewards/rejected": -9.804040908813477, "step": 33970 }, { "epoch": 0.41, "learning_rate": 3.698603779883903e-06, "logits/chosen": -2.848261594772339, "logits/rejected": -1.7376827001571655, "logps/chosen": -305.9453125, "logps/rejected": -1247.8505859375, "loss": 0.1732, "rewards/accuracies": 1.0, "rewards/chosen": -2.4667789936065674, "rewards/margins": 9.593491554260254, "rewards/rejected": -12.060270309448242, "step": 33980 }, { "epoch": 0.41, "learning_rate": 3.6976869433747053e-06, "logits/chosen": -2.84385085105896, "logits/rejected": -2.459606170654297, "logps/chosen": -140.00743103027344, "logps/rejected": -913.0372924804688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9791421890258789, "rewards/margins": 7.775829315185547, "rewards/rejected": -8.754971504211426, "step": 33990 }, { "epoch": 0.41, "learning_rate": 3.6967698977526933e-06, "logits/chosen": -2.7874841690063477, "logits/rejected": -2.020738363265991, "logps/chosen": -273.39239501953125, "logps/rejected": -1016.0217895507812, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2372207641601562, "rewards/margins": 7.5278143882751465, "rewards/rejected": -9.765035629272461, "step": 34000 }, { "epoch": 0.41, "learning_rate": 3.6958526431779823e-06, "logits/chosen": -2.8422374725341797, "logits/rejected": -2.0552334785461426, "logps/chosen": -226.6020050048828, "logps/rejected": -1161.498291015625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.7168638706207275, "rewards/margins": 9.489072799682617, "rewards/rejected": -11.20593547821045, "step": 34010 }, { "epoch": 0.41, "learning_rate": 3.6949351798107204e-06, "logits/chosen": -2.8448269367218018, "logits/rejected": -2.1170310974121094, "logps/chosen": -207.5953369140625, "logps/rejected": -1081.862548828125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.5374423265457153, "rewards/margins": 8.865285873413086, "rewards/rejected": -10.402729988098145, "step": 34020 }, { "epoch": 0.41, "learning_rate": 3.6940175078110944e-06, "logits/chosen": -2.8516178131103516, "logits/rejected": -2.060373306274414, "logps/chosen": -214.21044921875, "logps/rejected": -1151.806884765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.619051218032837, "rewards/margins": 9.496034622192383, "rewards/rejected": -11.115084648132324, "step": 34030 }, { "epoch": 0.41, "learning_rate": 3.693099627339329e-06, "logits/chosen": -2.8259148597717285, "logits/rejected": -1.915157675743103, "logps/chosen": -219.67294311523438, "logps/rejected": -1103.609619140625, "loss": 0.0241, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6513960361480713, "rewards/margins": 8.988612174987793, "rewards/rejected": -10.640008926391602, "step": 34040 }, { "epoch": 0.41, "learning_rate": 3.6921815385556813e-06, "logits/chosen": -2.823082208633423, "logits/rejected": -2.240736961364746, "logps/chosen": -168.3030548095703, "logps/rejected": -998.4400634765625, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -1.2018449306488037, "rewards/margins": 8.374151229858398, "rewards/rejected": -9.575997352600098, "step": 34050 }, { "epoch": 0.41, "learning_rate": 3.6912632416204488e-06, "logits/chosen": -2.812938928604126, "logits/rejected": -2.3562474250793457, "logps/chosen": -154.59567260742188, "logps/rejected": -981.1737060546875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.1422451734542847, "rewards/margins": 8.29238224029541, "rewards/rejected": -9.434626579284668, "step": 34060 }, { "epoch": 0.41, "learning_rate": 3.6903447366939628e-06, "logits/chosen": -2.8419530391693115, "logits/rejected": -1.899390459060669, "logps/chosen": -273.4407043457031, "logps/rejected": -1239.681884765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.186352014541626, "rewards/margins": 9.800905227661133, "rewards/rejected": -11.98725700378418, "step": 34070 }, { "epoch": 0.41, "learning_rate": 3.689426023936591e-06, "logits/chosen": -2.7805802822113037, "logits/rejected": -2.0564401149749756, "logps/chosen": -198.79563903808594, "logps/rejected": -1179.227294921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.483533263206482, "rewards/margins": 9.914375305175781, "rewards/rejected": -11.397908210754395, "step": 34080 }, { "epoch": 0.41, "learning_rate": 3.6885071035087394e-06, "logits/chosen": -2.783268928527832, "logits/rejected": -2.074246406555176, "logps/chosen": -201.22482299804688, "logps/rejected": -971.7965087890625, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -1.5135809183120728, "rewards/margins": 7.814120292663574, "rewards/rejected": -9.3277006149292, "step": 34090 }, { "epoch": 0.41, "learning_rate": 3.6875879755708483e-06, "logits/chosen": -2.8264501094818115, "logits/rejected": -2.2008769512176514, "logps/chosen": -197.0595703125, "logps/rejected": -1109.3265380859375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.4533591270446777, "rewards/margins": 9.242748260498047, "rewards/rejected": -10.69610595703125, "step": 34100 }, { "epoch": 0.41, "learning_rate": 3.6866686402833956e-06, "logits/chosen": -2.795071840286255, "logits/rejected": -2.1732118129730225, "logps/chosen": -225.2750244140625, "logps/rejected": -1123.2677001953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7644836902618408, "rewards/margins": 9.075891494750977, "rewards/rejected": -10.840374946594238, "step": 34110 }, { "epoch": 0.41, "learning_rate": 3.685749097806893e-06, "logits/chosen": -2.8789281845092773, "logits/rejected": -2.3096907138824463, "logps/chosen": -206.2819061279297, "logps/rejected": -946.7600708007812, "loss": 0.1285, "rewards/accuracies": 1.0, "rewards/chosen": -1.5811595916748047, "rewards/margins": 7.495946407318115, "rewards/rejected": -9.077106475830078, "step": 34120 }, { "epoch": 0.41, "learning_rate": 3.6848293483018927e-06, "logits/chosen": -2.8316903114318848, "logits/rejected": -2.4252164363861084, "logps/chosen": -163.01895141601562, "logps/rejected": -1018.1248168945312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.2094173431396484, "rewards/margins": 8.579912185668945, "rewards/rejected": -9.789331436157227, "step": 34130 }, { "epoch": 0.41, "learning_rate": 3.683909391928978e-06, "logits/chosen": -2.7780659198760986, "logits/rejected": -1.8100427389144897, "logps/chosen": -269.2161865234375, "logps/rejected": -1401.5601806640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.041430950164795, "rewards/margins": 11.562496185302734, "rewards/rejected": -13.603927612304688, "step": 34140 }, { "epoch": 0.41, "learning_rate": 3.682989228848772e-06, "logits/chosen": -2.792567729949951, "logits/rejected": -2.1524224281311035, "logps/chosen": -201.53170776367188, "logps/rejected": -993.5979614257812, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.54677414894104, "rewards/margins": 8.017107009887695, "rewards/rejected": -9.563880920410156, "step": 34150 }, { "epoch": 0.41, "learning_rate": 3.6820688592219316e-06, "logits/chosen": -2.8061485290527344, "logits/rejected": -2.2800984382629395, "logps/chosen": -187.77268981933594, "logps/rejected": -1050.5189208984375, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -1.4393572807312012, "rewards/margins": 8.678357124328613, "rewards/rejected": -10.117715835571289, "step": 34160 }, { "epoch": 0.41, "learning_rate": 3.681148283209153e-06, "logits/chosen": -2.804304838180542, "logits/rejected": -2.061997175216675, "logps/chosen": -177.03163146972656, "logps/rejected": -1074.8604736328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2736213207244873, "rewards/margins": 9.078907012939453, "rewards/rejected": -10.352527618408203, "step": 34170 }, { "epoch": 0.41, "learning_rate": 3.680227500971163e-06, "logits/chosen": -2.8944520950317383, "logits/rejected": -2.072802782058716, "logps/chosen": -198.11947631835938, "logps/rejected": -1062.681396484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4628182649612427, "rewards/margins": 8.762811660766602, "rewards/rejected": -10.225628852844238, "step": 34180 }, { "epoch": 0.41, "learning_rate": 3.67930651266873e-06, "logits/chosen": -2.821540355682373, "logits/rejected": -2.3232007026672363, "logps/chosen": -173.03176879882812, "logps/rejected": -938.1849365234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3122012615203857, "rewards/margins": 7.701999664306641, "rewards/rejected": -9.014201164245605, "step": 34190 }, { "epoch": 0.41, "learning_rate": 3.678385318462655e-06, "logits/chosen": -2.845499277114868, "logits/rejected": -2.3105151653289795, "logps/chosen": -145.83824157714844, "logps/rejected": -964.666015625, "loss": 0.1594, "rewards/accuracies": 1.0, "rewards/chosen": -1.0154321193695068, "rewards/margins": 8.24905776977539, "rewards/rejected": -9.26448917388916, "step": 34200 }, { "epoch": 0.41, "learning_rate": 3.677463918513777e-06, "logits/chosen": -2.8408894538879395, "logits/rejected": -2.556427001953125, "logps/chosen": -114.48246002197266, "logps/rejected": -866.7853393554688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7985178232192993, "rewards/margins": 7.501927852630615, "rewards/rejected": -8.300445556640625, "step": 34210 }, { "epoch": 0.41, "learning_rate": 3.6765423129829682e-06, "logits/chosen": -2.8277318477630615, "logits/rejected": -2.1527740955352783, "logps/chosen": -220.8113555908203, "logps/rejected": -1120.104248046875, "loss": 0.0217, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.730017900466919, "rewards/margins": 9.068763732910156, "rewards/rejected": -10.79878044128418, "step": 34220 }, { "epoch": 0.41, "learning_rate": 3.6756205020311395e-06, "logits/chosen": -2.7834830284118652, "logits/rejected": -2.0982022285461426, "logps/chosen": -219.93081665039062, "logps/rejected": -1198.878173828125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.740931510925293, "rewards/margins": 9.841747283935547, "rewards/rejected": -11.582677841186523, "step": 34230 }, { "epoch": 0.41, "learning_rate": 3.6746984858192363e-06, "logits/chosen": -2.859832286834717, "logits/rejected": -2.5231881141662598, "logps/chosen": -112.80562591552734, "logps/rejected": -864.1536254882812, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.7270470261573792, "rewards/margins": 7.551322937011719, "rewards/rejected": -8.27837085723877, "step": 34240 }, { "epoch": 0.41, "learning_rate": 3.67377626450824e-06, "logits/chosen": -2.8410074710845947, "logits/rejected": -2.386401414871216, "logps/chosen": -138.204833984375, "logps/rejected": -919.2219848632812, "loss": 0.0214, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.992304801940918, "rewards/margins": 7.808711051940918, "rewards/rejected": -8.801015853881836, "step": 34250 }, { "epoch": 0.41, "learning_rate": 3.6728538382591676e-06, "logits/chosen": -2.81388521194458, "logits/rejected": -2.1489176750183105, "logps/chosen": -179.23255920410156, "logps/rejected": -1046.1163330078125, "loss": 0.099, "rewards/accuracies": 1.0, "rewards/chosen": -1.2828912734985352, "rewards/margins": 8.779400825500488, "rewards/rejected": -10.062291145324707, "step": 34260 }, { "epoch": 0.41, "learning_rate": 3.6719312072330736e-06, "logits/chosen": -2.8475804328918457, "logits/rejected": -2.104801654815674, "logps/chosen": -178.66409301757812, "logps/rejected": -1003.2640380859375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.2820104360580444, "rewards/margins": 8.344679832458496, "rewards/rejected": -9.626691818237305, "step": 34270 }, { "epoch": 0.41, "learning_rate": 3.671008371591045e-06, "logits/chosen": -2.798093318939209, "logits/rejected": -1.922154426574707, "logps/chosen": -195.81814575195312, "logps/rejected": -1112.8765869140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3246331214904785, "rewards/margins": 9.41310977935791, "rewards/rejected": -10.73774242401123, "step": 34280 }, { "epoch": 0.41, "learning_rate": 3.6700853314942065e-06, "logits/chosen": -2.825686454772949, "logits/rejected": -2.1530396938323975, "logps/chosen": -147.25856018066406, "logps/rejected": -913.9622802734375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0049920082092285, "rewards/margins": 7.747808933258057, "rewards/rejected": -8.752800941467285, "step": 34290 }, { "epoch": 0.41, "learning_rate": 3.66916208710372e-06, "logits/chosen": -2.8308637142181396, "logits/rejected": -2.3004138469696045, "logps/chosen": -152.44564819335938, "logps/rejected": -1028.8721923828125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0868110656738281, "rewards/margins": 8.814888000488281, "rewards/rejected": -9.901698112487793, "step": 34300 }, { "epoch": 0.41, "learning_rate": 3.66823863858078e-06, "logits/chosen": -2.873037576675415, "logits/rejected": -2.294893503189087, "logps/chosen": -137.8103790283203, "logps/rejected": -912.857421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9592658877372742, "rewards/margins": 7.786257743835449, "rewards/rejected": -8.745524406433105, "step": 34310 }, { "epoch": 0.41, "learning_rate": 3.6673149860866176e-06, "logits/chosen": -2.7888615131378174, "logits/rejected": -2.2669994831085205, "logps/chosen": -159.91348266601562, "logps/rejected": -969.5982666015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.179378867149353, "rewards/margins": 8.13746452331543, "rewards/rejected": -9.316843032836914, "step": 34320 }, { "epoch": 0.41, "learning_rate": 3.666391129782501e-06, "logits/chosen": -2.8637304306030273, "logits/rejected": -2.3687424659729004, "logps/chosen": -153.90103149414062, "logps/rejected": -891.0770263671875, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -1.0599969625473022, "rewards/margins": 7.479578971862793, "rewards/rejected": -8.539575576782227, "step": 34330 }, { "epoch": 0.41, "learning_rate": 3.665467069829733e-06, "logits/chosen": -2.810253381729126, "logits/rejected": -2.1619555950164795, "logps/chosen": -187.3565673828125, "logps/rejected": -1094.057373046875, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.3913934230804443, "rewards/margins": 9.145989418029785, "rewards/rejected": -10.537383079528809, "step": 34340 }, { "epoch": 0.41, "learning_rate": 3.6645428063896515e-06, "logits/chosen": -2.8203184604644775, "logits/rejected": -2.225782871246338, "logps/chosen": -146.13632202148438, "logps/rejected": -924.68408203125, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.981157660484314, "rewards/margins": 7.890301704406738, "rewards/rejected": -8.871459007263184, "step": 34350 }, { "epoch": 0.41, "learning_rate": 3.6636183396236302e-06, "logits/chosen": -2.7816002368927, "logits/rejected": -2.217359781265259, "logps/chosen": -153.98129272460938, "logps/rejected": -995.6092529296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0627025365829468, "rewards/margins": 8.495884895324707, "rewards/rejected": -9.558588027954102, "step": 34360 }, { "epoch": 0.41, "learning_rate": 3.662693669693079e-06, "logits/chosen": -2.8072948455810547, "logits/rejected": -2.0832152366638184, "logps/chosen": -173.73178100585938, "logps/rejected": -950.9619140625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.2451236248016357, "rewards/margins": 7.887960910797119, "rewards/rejected": -9.133085250854492, "step": 34370 }, { "epoch": 0.41, "learning_rate": 3.661768796759442e-06, "logits/chosen": -2.8398561477661133, "logits/rejected": -2.073636531829834, "logps/chosen": -168.55941772460938, "logps/rejected": -1052.651611328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.167978048324585, "rewards/margins": 8.960336685180664, "rewards/rejected": -10.128313064575195, "step": 34380 }, { "epoch": 0.41, "learning_rate": 3.6608437209842e-06, "logits/chosen": -2.8020994663238525, "logits/rejected": -2.313464641571045, "logps/chosen": -162.9152374267578, "logps/rejected": -883.7261962890625, "loss": 0.1556, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2068376541137695, "rewards/margins": 7.248194217681885, "rewards/rejected": -8.455031394958496, "step": 34390 }, { "epoch": 0.41, "learning_rate": 3.659918442528868e-06, "logits/chosen": -2.817465305328369, "logits/rejected": -2.0972201824188232, "logps/chosen": -202.6859130859375, "logps/rejected": -1116.5230712890625, "loss": 0.1078, "rewards/accuracies": 1.0, "rewards/chosen": -1.459437370300293, "rewards/margins": 9.303529739379883, "rewards/rejected": -10.762968063354492, "step": 34400 }, { "epoch": 0.41, "learning_rate": 3.658992961554998e-06, "logits/chosen": -2.8431615829467773, "logits/rejected": -2.3578898906707764, "logps/chosen": -139.44522094726562, "logps/rejected": -944.1795043945312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9840308427810669, "rewards/margins": 8.060900688171387, "rewards/rejected": -9.044931411743164, "step": 34410 }, { "epoch": 0.41, "learning_rate": 3.6580672782241756e-06, "logits/chosen": -2.7922725677490234, "logits/rejected": -2.0627803802490234, "logps/chosen": -191.4573974609375, "logps/rejected": -1055.687744140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.391089677810669, "rewards/margins": 8.774157524108887, "rewards/rejected": -10.16524887084961, "step": 34420 }, { "epoch": 0.41, "learning_rate": 3.6571413926980224e-06, "logits/chosen": -2.890456199645996, "logits/rejected": -2.595834255218506, "logps/chosen": -100.77781677246094, "logps/rejected": -817.5843505859375, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": -0.6100149750709534, "rewards/margins": 7.18706750869751, "rewards/rejected": -7.797083377838135, "step": 34430 }, { "epoch": 0.41, "learning_rate": 3.6562153051381966e-06, "logits/chosen": -2.8178915977478027, "logits/rejected": -2.4051594734191895, "logps/chosen": -133.85903930664062, "logps/rejected": -943.31884765625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.931541919708252, "rewards/margins": 8.118375778198242, "rewards/rejected": -9.049918174743652, "step": 34440 }, { "epoch": 0.41, "learning_rate": 3.655289015706389e-06, "logits/chosen": -2.839099884033203, "logits/rejected": -2.1576523780822754, "logps/chosen": -169.10487365722656, "logps/rejected": -953.0794067382812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.2464940547943115, "rewards/margins": 7.905646324157715, "rewards/rejected": -9.152140617370605, "step": 34450 }, { "epoch": 0.41, "learning_rate": 3.6543625245643286e-06, "logits/chosen": -2.863466739654541, "logits/rejected": -2.3737521171569824, "logps/chosen": -154.2891387939453, "logps/rejected": -945.5853271484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0451433658599854, "rewards/margins": 8.021421432495117, "rewards/rejected": -9.066564559936523, "step": 34460 }, { "epoch": 0.41, "learning_rate": 3.6534358318737773e-06, "logits/chosen": -2.830472469329834, "logits/rejected": -2.153618812561035, "logps/chosen": -158.07595825195312, "logps/rejected": -1052.3055419921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.085979700088501, "rewards/margins": 9.037094116210938, "rewards/rejected": -10.123074531555176, "step": 34470 }, { "epoch": 0.41, "learning_rate": 3.6525089377965336e-06, "logits/chosen": -2.8091378211975098, "logits/rejected": -2.1840567588806152, "logps/chosen": -148.40814208984375, "logps/rejected": -977.5880737304688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9886379241943359, "rewards/margins": 8.394742012023926, "rewards/rejected": -9.383380889892578, "step": 34480 }, { "epoch": 0.41, "learning_rate": 3.6515818424944304e-06, "logits/chosen": -2.8145205974578857, "logits/rejected": -2.521848201751709, "logps/chosen": -137.9136962890625, "logps/rejected": -782.777099609375, "loss": 0.1072, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9389583468437195, "rewards/margins": 6.522151947021484, "rewards/rejected": -7.4611101150512695, "step": 34490 }, { "epoch": 0.41, "learning_rate": 3.6506545461293353e-06, "logits/chosen": -2.8787474632263184, "logits/rejected": -2.201923131942749, "logps/chosen": -159.8608856201172, "logps/rejected": -1118.7401123046875, "loss": 0.021, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1271412372589111, "rewards/margins": 9.67378044128418, "rewards/rejected": -10.800921440124512, "step": 34500 }, { "epoch": 0.41, "learning_rate": 3.649727048863152e-06, "logits/chosen": -2.795511245727539, "logits/rejected": -2.1027438640594482, "logps/chosen": -172.36337280273438, "logps/rejected": -1135.4130859375, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.2361977100372314, "rewards/margins": 9.727700233459473, "rewards/rejected": -10.963896751403809, "step": 34510 }, { "epoch": 0.41, "learning_rate": 3.6487993508578196e-06, "logits/chosen": -2.7971084117889404, "logits/rejected": -2.165332078933716, "logps/chosen": -163.77508544921875, "logps/rejected": -968.3644409179688, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.11893630027771, "rewards/margins": 8.176759719848633, "rewards/rejected": -9.295696258544922, "step": 34520 }, { "epoch": 0.41, "learning_rate": 3.6478714522753116e-06, "logits/chosen": -2.860109567642212, "logits/rejected": -2.385401964187622, "logps/chosen": -118.46234130859375, "logps/rejected": -920.3834838867188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.772404134273529, "rewards/margins": 8.050785064697266, "rewards/rejected": -8.823188781738281, "step": 34530 }, { "epoch": 0.41, "learning_rate": 3.6469433532776356e-06, "logits/chosen": -2.7979273796081543, "logits/rejected": -2.157729148864746, "logps/chosen": -167.2542724609375, "logps/rejected": -1072.5201416015625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.2032026052474976, "rewards/margins": 9.120772361755371, "rewards/rejected": -10.323974609375, "step": 34540 }, { "epoch": 0.41, "learning_rate": 3.646015054026836e-06, "logits/chosen": -2.798261880874634, "logits/rejected": -2.217629909515381, "logps/chosen": -142.08871459960938, "logps/rejected": -989.2775268554688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9345016479492188, "rewards/margins": 8.557684898376465, "rewards/rejected": -9.4921875, "step": 34550 }, { "epoch": 0.41, "learning_rate": 3.6450865546849902e-06, "logits/chosen": -2.7607853412628174, "logits/rejected": -1.940765619277954, "logps/chosen": -163.3284454345703, "logps/rejected": -1163.9178466796875, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -1.0787891149520874, "rewards/margins": 10.16617202758789, "rewards/rejected": -11.244961738586426, "step": 34560 }, { "epoch": 0.41, "learning_rate": 3.6441578554142125e-06, "logits/chosen": -2.826927900314331, "logits/rejected": -2.1949541568756104, "logps/chosen": -174.75326538085938, "logps/rejected": -1134.680419921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2671537399291992, "rewards/margins": 9.675294876098633, "rewards/rejected": -10.942449569702148, "step": 34570 }, { "epoch": 0.41, "learning_rate": 3.643228956376651e-06, "logits/chosen": -2.8173611164093018, "logits/rejected": -2.1783080101013184, "logps/chosen": -179.06515502929688, "logps/rejected": -1073.3675537109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3196334838867188, "rewards/margins": 9.027544975280762, "rewards/rejected": -10.34717845916748, "step": 34580 }, { "epoch": 0.41, "learning_rate": 3.64229985773449e-06, "logits/chosen": -2.865111827850342, "logits/rejected": -2.433793067932129, "logps/chosen": -128.53823852539062, "logps/rejected": -1029.965087890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8975502848625183, "rewards/margins": 9.029680252075195, "rewards/rejected": -9.927229881286621, "step": 34590 }, { "epoch": 0.41, "learning_rate": 3.641370559649945e-06, "logits/chosen": -2.8717215061187744, "logits/rejected": -2.225508213043213, "logps/chosen": -210.0164031982422, "logps/rejected": -1112.2874755859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6073739528656006, "rewards/margins": 9.125898361206055, "rewards/rejected": -10.73327350616455, "step": 34600 }, { "epoch": 0.41, "learning_rate": 3.640441062285271e-06, "logits/chosen": -2.829948663711548, "logits/rejected": -2.185655117034912, "logps/chosen": -189.3984832763672, "logps/rejected": -1034.810791015625, "loss": 0.1451, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828388452529907, "rewards/margins": 8.567952156066895, "rewards/rejected": -9.95079231262207, "step": 34610 }, { "epoch": 0.41, "learning_rate": 3.639511365802755e-06, "logits/chosen": -2.8075788021087646, "logits/rejected": -2.2753167152404785, "logps/chosen": -174.20005798339844, "logps/rejected": -1045.702392578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.281227946281433, "rewards/margins": 8.769811630249023, "rewards/rejected": -10.051040649414062, "step": 34620 }, { "epoch": 0.41, "learning_rate": 3.6385814703647194e-06, "logits/chosen": -2.813535690307617, "logits/rejected": -2.193556070327759, "logps/chosen": -183.72372436523438, "logps/rejected": -1113.2530517578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3581759929656982, "rewards/margins": 9.386919975280762, "rewards/rejected": -10.745096206665039, "step": 34630 }, { "epoch": 0.41, "learning_rate": 3.637651376133521e-06, "logits/chosen": -2.8081717491149902, "logits/rejected": -2.079035758972168, "logps/chosen": -202.9134521484375, "logps/rejected": -1145.5419921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.5163748264312744, "rewards/margins": 9.539298057556152, "rewards/rejected": -11.055673599243164, "step": 34640 }, { "epoch": 0.41, "learning_rate": 3.6367210832715526e-06, "logits/chosen": -2.8073313236236572, "logits/rejected": -2.2198643684387207, "logps/chosen": -148.98638916015625, "logps/rejected": -958.4381713867188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0451886653900146, "rewards/margins": 8.173638343811035, "rewards/rejected": -9.218828201293945, "step": 34650 }, { "epoch": 0.41, "learning_rate": 3.6357905919412394e-06, "logits/chosen": -2.896179437637329, "logits/rejected": -2.574286937713623, "logps/chosen": -132.61460876464844, "logps/rejected": -797.901611328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9484061002731323, "rewards/margins": 6.658184051513672, "rewards/rejected": -7.606590270996094, "step": 34660 }, { "epoch": 0.42, "learning_rate": 3.6348599023050434e-06, "logits/chosen": -2.8112235069274902, "logits/rejected": -2.261775255203247, "logps/chosen": -200.07412719726562, "logps/rejected": -1035.433837890625, "loss": 0.0984, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5611934661865234, "rewards/margins": 8.405787467956543, "rewards/rejected": -9.966981887817383, "step": 34670 }, { "epoch": 0.42, "learning_rate": 3.6339290145254608e-06, "logits/chosen": -2.8445863723754883, "logits/rejected": -2.0993494987487793, "logps/chosen": -193.95217895507812, "logps/rejected": -1068.8648681640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4347970485687256, "rewards/margins": 8.875432014465332, "rewards/rejected": -10.31022834777832, "step": 34680 }, { "epoch": 0.42, "learning_rate": 3.632997928765022e-06, "logits/chosen": -2.802473306655884, "logits/rejected": -2.1205716133117676, "logps/chosen": -210.51766967773438, "logps/rejected": -1181.5048828125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.5886485576629639, "rewards/margins": 9.833474159240723, "rewards/rejected": -11.42212200164795, "step": 34690 }, { "epoch": 0.42, "learning_rate": 3.632066645186291e-06, "logits/chosen": -2.8556935787200928, "logits/rejected": -2.5392377376556396, "logps/chosen": -120.73835754394531, "logps/rejected": -902.4791259765625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7988625168800354, "rewards/margins": 7.844045162200928, "rewards/rejected": -8.642908096313477, "step": 34700 }, { "epoch": 0.42, "learning_rate": 3.6311351639518677e-06, "logits/chosen": -2.880563735961914, "logits/rejected": -2.105384588241577, "logps/chosen": -198.33734130859375, "logps/rejected": -1108.0108642578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.5202727317810059, "rewards/margins": 9.172426223754883, "rewards/rejected": -10.69269847869873, "step": 34710 }, { "epoch": 0.42, "learning_rate": 3.6302034852243873e-06, "logits/chosen": -2.8047657012939453, "logits/rejected": -2.181652307510376, "logps/chosen": -165.78863525390625, "logps/rejected": -1007.7306518554688, "loss": 0.138, "rewards/accuracies": 1.0, "rewards/chosen": -1.1585164070129395, "rewards/margins": 8.537137985229492, "rewards/rejected": -9.695656776428223, "step": 34720 }, { "epoch": 0.42, "learning_rate": 3.629271609166517e-06, "logits/chosen": -2.8074164390563965, "logits/rejected": -2.2064836025238037, "logps/chosen": -155.66763305664062, "logps/rejected": -1087.9334716796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0635658502578735, "rewards/margins": 9.428860664367676, "rewards/rejected": -10.492425918579102, "step": 34730 }, { "epoch": 0.42, "learning_rate": 3.6283395359409596e-06, "logits/chosen": -2.815830707550049, "logits/rejected": -2.1928000450134277, "logps/chosen": -181.4318389892578, "logps/rejected": -1105.6995849609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3555375337600708, "rewards/margins": 9.305932998657227, "rewards/rejected": -10.661470413208008, "step": 34740 }, { "epoch": 0.42, "learning_rate": 3.627407265710453e-06, "logits/chosen": -2.841251850128174, "logits/rejected": -2.3548426628112793, "logps/chosen": -159.54440307617188, "logps/rejected": -997.4378051757812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1515549421310425, "rewards/margins": 8.430363655090332, "rewards/rejected": -9.581918716430664, "step": 34750 }, { "epoch": 0.42, "learning_rate": 3.62647479863777e-06, "logits/chosen": -2.8562140464782715, "logits/rejected": -2.3053603172302246, "logps/chosen": -198.1571807861328, "logps/rejected": -1070.8955078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4823777675628662, "rewards/margins": 8.83335018157959, "rewards/rejected": -10.315728187561035, "step": 34760 }, { "epoch": 0.42, "learning_rate": 3.6255421348857153e-06, "logits/chosen": -2.8827338218688965, "logits/rejected": -2.4021947383880615, "logps/chosen": -158.65560913085938, "logps/rejected": -934.4669799804688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1711848974227905, "rewards/margins": 7.800090789794922, "rewards/rejected": -8.971277236938477, "step": 34770 }, { "epoch": 0.42, "learning_rate": 3.6246092746171304e-06, "logits/chosen": -2.8336424827575684, "logits/rejected": -2.3107104301452637, "logps/chosen": -180.4916534423828, "logps/rejected": -977.1837158203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3345520496368408, "rewards/margins": 8.063337326049805, "rewards/rejected": -9.397890090942383, "step": 34780 }, { "epoch": 0.42, "learning_rate": 3.6236762179948886e-06, "logits/chosen": -2.7746846675872803, "logits/rejected": -2.0083298683166504, "logps/chosen": -187.2056884765625, "logps/rejected": -1074.922607421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.3443472385406494, "rewards/margins": 9.015296936035156, "rewards/rejected": -10.359643936157227, "step": 34790 }, { "epoch": 0.42, "learning_rate": 3.6227429651819003e-06, "logits/chosen": -2.8355472087860107, "logits/rejected": -2.238751173019409, "logps/chosen": -224.1301727294922, "logps/rejected": -988.2469482421875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.7461332082748413, "rewards/margins": 7.7368292808532715, "rewards/rejected": -9.482963562011719, "step": 34800 }, { "epoch": 0.42, "learning_rate": 3.6218095163411095e-06, "logits/chosen": -2.7941811084747314, "logits/rejected": -2.0928149223327637, "logps/chosen": -175.61630249023438, "logps/rejected": -1040.130126953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2596420049667358, "rewards/margins": 8.753835678100586, "rewards/rejected": -10.01347827911377, "step": 34810 }, { "epoch": 0.42, "learning_rate": 3.6208758716354917e-06, "logits/chosen": -2.8291990756988525, "logits/rejected": -2.376054286956787, "logps/chosen": -186.27731323242188, "logps/rejected": -1022.40380859375, "loss": 0.1266, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3980005979537964, "rewards/margins": 8.440081596374512, "rewards/rejected": -9.838081359863281, "step": 34820 }, { "epoch": 0.42, "learning_rate": 3.61994203122806e-06, "logits/chosen": -2.826671838760376, "logits/rejected": -1.9470030069351196, "logps/chosen": -249.1355438232422, "logps/rejected": -1184.3907470703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.931785225868225, "rewards/margins": 9.494012832641602, "rewards/rejected": -11.425797462463379, "step": 34830 }, { "epoch": 0.42, "learning_rate": 3.6190079952818603e-06, "logits/chosen": -2.8048923015594482, "logits/rejected": -2.1486411094665527, "logps/chosen": -191.41600036621094, "logps/rejected": -1040.025390625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.451967477798462, "rewards/margins": 8.560717582702637, "rewards/rejected": -10.012685775756836, "step": 34840 }, { "epoch": 0.42, "learning_rate": 3.6180737639599733e-06, "logits/chosen": -2.7931065559387207, "logits/rejected": -2.1334524154663086, "logps/chosen": -178.05197143554688, "logps/rejected": -984.9567260742188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3202588558197021, "rewards/margins": 8.141145706176758, "rewards/rejected": -9.461404800415039, "step": 34850 }, { "epoch": 0.42, "learning_rate": 3.6171393374255116e-06, "logits/chosen": -2.7704217433929443, "logits/rejected": -2.0187902450561523, "logps/chosen": -200.88961791992188, "logps/rejected": -1020.3797607421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.5469703674316406, "rewards/margins": 8.247662544250488, "rewards/rejected": -9.794632911682129, "step": 34860 }, { "epoch": 0.42, "learning_rate": 3.6162047158416243e-06, "logits/chosen": -2.854346513748169, "logits/rejected": -2.199134349822998, "logps/chosen": -210.94650268554688, "logps/rejected": -1095.2105712890625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.5681259632110596, "rewards/margins": 8.997122764587402, "rewards/rejected": -10.565248489379883, "step": 34870 }, { "epoch": 0.42, "learning_rate": 3.615269899371494e-06, "logits/chosen": -2.7929084300994873, "logits/rejected": -2.1774086952209473, "logps/chosen": -170.02471923828125, "logps/rejected": -1070.711669921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2490627765655518, "rewards/margins": 9.067575454711914, "rewards/rejected": -10.316640853881836, "step": 34880 }, { "epoch": 0.42, "learning_rate": 3.614334888178337e-06, "logits/chosen": -2.8011889457702637, "logits/rejected": -2.338491201400757, "logps/chosen": -157.6764373779297, "logps/rejected": -962.6829223632812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1589123010635376, "rewards/margins": 8.077817916870117, "rewards/rejected": -9.236730575561523, "step": 34890 }, { "epoch": 0.42, "learning_rate": 3.613399682425403e-06, "logits/chosen": -2.8372044563293457, "logits/rejected": -2.3139212131500244, "logps/chosen": -203.80691528320312, "logps/rejected": -928.5185546875, "loss": 0.1122, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5978835821151733, "rewards/margins": 7.320339202880859, "rewards/rejected": -8.918222427368164, "step": 34900 }, { "epoch": 0.42, "learning_rate": 3.612464282275977e-06, "logits/chosen": -2.868009090423584, "logits/rejected": -2.0907092094421387, "logps/chosen": -204.57948303222656, "logps/rejected": -1033.3883056640625, "loss": 0.1273, "rewards/accuracies": 1.0, "rewards/chosen": -1.5107544660568237, "rewards/margins": 8.411932945251465, "rewards/rejected": -9.922689437866211, "step": 34910 }, { "epoch": 0.42, "learning_rate": 3.6115286878933776e-06, "logits/chosen": -2.8109006881713867, "logits/rejected": -2.2462708950042725, "logps/chosen": -181.89035034179688, "logps/rejected": -976.8577270507812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.383434534072876, "rewards/margins": 7.988525390625, "rewards/rejected": -9.371960639953613, "step": 34920 }, { "epoch": 0.42, "learning_rate": 3.6105928994409556e-06, "logits/chosen": -2.8642756938934326, "logits/rejected": -2.4570250511169434, "logps/chosen": -116.69085693359375, "logps/rejected": -893.7840576171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7561830282211304, "rewards/margins": 7.794604301452637, "rewards/rejected": -8.550786972045898, "step": 34930 }, { "epoch": 0.42, "learning_rate": 3.6096569170820976e-06, "logits/chosen": -2.823585271835327, "logits/rejected": -2.109114646911621, "logps/chosen": -178.7008819580078, "logps/rejected": -1045.135009765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.275162696838379, "rewards/margins": 8.785500526428223, "rewards/rejected": -10.060663223266602, "step": 34940 }, { "epoch": 0.42, "learning_rate": 3.6087207409802248e-06, "logits/chosen": -2.817619800567627, "logits/rejected": -2.2817397117614746, "logps/chosen": -171.7192840576172, "logps/rejected": -962.8751220703125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2507859468460083, "rewards/margins": 7.985523223876953, "rewards/rejected": -9.236308097839355, "step": 34950 }, { "epoch": 0.42, "learning_rate": 3.6077843712987893e-06, "logits/chosen": -2.8075082302093506, "logits/rejected": -2.089176893234253, "logps/chosen": -206.8737335205078, "logps/rejected": -1090.2427978515625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.573991298675537, "rewards/margins": 8.929533004760742, "rewards/rejected": -10.503522872924805, "step": 34960 }, { "epoch": 0.42, "learning_rate": 3.6068478082012797e-06, "logits/chosen": -2.856614112854004, "logits/rejected": -2.298250675201416, "logps/chosen": -177.56936645507812, "logps/rejected": -976.5218505859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.306409478187561, "rewards/margins": 8.04616641998291, "rewards/rejected": -9.35257625579834, "step": 34970 }, { "epoch": 0.42, "learning_rate": 3.6059110518512165e-06, "logits/chosen": -2.884535312652588, "logits/rejected": -2.4132602214813232, "logps/chosen": -156.61756896972656, "logps/rejected": -988.9021606445312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1001397371292114, "rewards/margins": 8.394085884094238, "rewards/rejected": -9.494226455688477, "step": 34980 }, { "epoch": 0.42, "learning_rate": 3.6049741024121553e-06, "logits/chosen": -2.8740389347076416, "logits/rejected": -2.2960562705993652, "logps/chosen": -131.76779174804688, "logps/rejected": -880.70458984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8748533129692078, "rewards/margins": 7.55566930770874, "rewards/rejected": -8.430522918701172, "step": 34990 }, { "epoch": 0.42, "learning_rate": 3.604036960047685e-06, "logits/chosen": -2.8870809078216553, "logits/rejected": -2.3598556518554688, "logps/chosen": -156.22703552246094, "logps/rejected": -844.64013671875, "loss": 0.1151, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0901694297790527, "rewards/margins": 6.976644039154053, "rewards/rejected": -8.066812515258789, "step": 35000 }, { "epoch": 0.42, "learning_rate": 3.603099624921428e-06, "logits/chosen": -2.80389142036438, "logits/rejected": -2.2290244102478027, "logps/chosen": -145.8316192626953, "logps/rejected": -966.4181518554688, "loss": 0.1156, "rewards/accuracies": 1.0, "rewards/chosen": -0.9420437812805176, "rewards/margins": 8.332018852233887, "rewards/rejected": -9.27406120300293, "step": 35010 }, { "epoch": 0.42, "learning_rate": 3.6021620971970394e-06, "logits/chosen": -2.90421986579895, "logits/rejected": -2.292074680328369, "logps/chosen": -130.4989013671875, "logps/rejected": -923.4255981445312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7891799211502075, "rewards/margins": 8.063901901245117, "rewards/rejected": -8.853080749511719, "step": 35020 }, { "epoch": 0.42, "learning_rate": 3.6012243770382107e-06, "logits/chosen": -2.8399529457092285, "logits/rejected": -2.3203511238098145, "logps/chosen": -174.27464294433594, "logps/rejected": -971.9281005859375, "loss": 0.1529, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2847793102264404, "rewards/margins": 8.045957565307617, "rewards/rejected": -9.33073616027832, "step": 35030 }, { "epoch": 0.42, "learning_rate": 3.6002864646086644e-06, "logits/chosen": -2.9324309825897217, "logits/rejected": -2.389078140258789, "logps/chosen": -112.54893493652344, "logps/rejected": -923.5594482421875, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -0.657425045967102, "rewards/margins": 8.186551094055176, "rewards/rejected": -8.843976020812988, "step": 35040 }, { "epoch": 0.42, "learning_rate": 3.5993483600721563e-06, "logits/chosen": -2.8484883308410645, "logits/rejected": -2.4268696308135986, "logps/chosen": -124.8656234741211, "logps/rejected": -880.8785400390625, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.8561732172966003, "rewards/margins": 7.563568115234375, "rewards/rejected": -8.419740676879883, "step": 35050 }, { "epoch": 0.42, "learning_rate": 3.598410063592478e-06, "logits/chosen": -2.8884525299072266, "logits/rejected": -2.3751840591430664, "logps/chosen": -148.5589599609375, "logps/rejected": -961.3663940429688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.023810625076294, "rewards/margins": 8.18799114227295, "rewards/rejected": -9.211801528930664, "step": 35060 }, { "epoch": 0.42, "learning_rate": 3.597471575333454e-06, "logits/chosen": -2.834813356399536, "logits/rejected": -2.3205373287200928, "logps/chosen": -144.2298583984375, "logps/rejected": -995.5885620117188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9876428842544556, "rewards/margins": 8.583745956420898, "rewards/rejected": -9.571388244628906, "step": 35070 }, { "epoch": 0.42, "learning_rate": 3.596532895458941e-06, "logits/chosen": -2.859978437423706, "logits/rejected": -2.371595621109009, "logps/chosen": -128.46495056152344, "logps/rejected": -915.3571166992188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8503177762031555, "rewards/margins": 7.9199419021606445, "rewards/rejected": -8.770259857177734, "step": 35080 }, { "epoch": 0.42, "learning_rate": 3.595594024132829e-06, "logits/chosen": -2.863349437713623, "logits/rejected": -2.4096004962921143, "logps/chosen": -139.54544067382812, "logps/rejected": -1017.5823974609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9458395838737488, "rewards/margins": 8.827038764953613, "rewards/rejected": -9.772878646850586, "step": 35090 }, { "epoch": 0.42, "learning_rate": 3.594654961519044e-06, "logits/chosen": -2.8533623218536377, "logits/rejected": -2.3031952381134033, "logps/chosen": -151.70687866210938, "logps/rejected": -946.2936401367188, "loss": 0.0824, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0973191261291504, "rewards/margins": 7.970943450927734, "rewards/rejected": -9.068263053894043, "step": 35100 }, { "epoch": 0.42, "learning_rate": 3.5937157077815432e-06, "logits/chosen": -2.8731703758239746, "logits/rejected": -2.305382251739502, "logps/chosen": -178.06809997558594, "logps/rejected": -991.8837890625, "loss": 0.1311, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.282676100730896, "rewards/margins": 8.248217582702637, "rewards/rejected": -9.53089427947998, "step": 35110 }, { "epoch": 0.42, "learning_rate": 3.592776263084317e-06, "logits/chosen": -2.8442351818084717, "logits/rejected": -2.346315860748291, "logps/chosen": -168.39649963378906, "logps/rejected": -900.4782104492188, "loss": 0.0926, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2537609338760376, "rewards/margins": 7.371588230133057, "rewards/rejected": -8.625349998474121, "step": 35120 }, { "epoch": 0.42, "learning_rate": 3.59183662759139e-06, "logits/chosen": -2.8549916744232178, "logits/rejected": -2.1700093746185303, "logps/chosen": -176.70896911621094, "logps/rejected": -1121.10693359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.306555151939392, "rewards/margins": 9.508227348327637, "rewards/rejected": -10.814783096313477, "step": 35130 }, { "epoch": 0.42, "learning_rate": 3.590896801466821e-06, "logits/chosen": -2.879870891571045, "logits/rejected": -2.3348121643066406, "logps/chosen": -174.34024047851562, "logps/rejected": -1043.54052734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.225740671157837, "rewards/margins": 8.807647705078125, "rewards/rejected": -10.033388137817383, "step": 35140 }, { "epoch": 0.42, "learning_rate": 3.5899567848747003e-06, "logits/chosen": -2.843533515930176, "logits/rejected": -2.196735382080078, "logps/chosen": -177.9673614501953, "logps/rejected": -1002.9141845703125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.2377418279647827, "rewards/margins": 8.378015518188477, "rewards/rejected": -9.615757942199707, "step": 35150 }, { "epoch": 0.42, "learning_rate": 3.589016577979152e-06, "logits/chosen": -2.7430319786071777, "logits/rejected": -1.8420488834381104, "logps/chosen": -221.25344848632812, "logps/rejected": -1114.975830078125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6629890203475952, "rewards/margins": 9.07852554321289, "rewards/rejected": -10.741515159606934, "step": 35160 }, { "epoch": 0.42, "learning_rate": 3.5880761809443333e-06, "logits/chosen": -2.7525389194488525, "logits/rejected": -2.089862108230591, "logps/chosen": -255.5714111328125, "logps/rejected": -1198.2589111328125, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -2.0196104049682617, "rewards/margins": 9.558527946472168, "rewards/rejected": -11.578141212463379, "step": 35170 }, { "epoch": 0.42, "learning_rate": 3.5871355939344355e-06, "logits/chosen": -2.8070731163024902, "logits/rejected": -2.0964958667755127, "logps/chosen": -227.22232055664062, "logps/rejected": -1130.1895751953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.732442855834961, "rewards/margins": 9.150467872619629, "rewards/rejected": -10.882911682128906, "step": 35180 }, { "epoch": 0.42, "learning_rate": 3.586194817113683e-06, "logits/chosen": -2.8535521030426025, "logits/rejected": -2.487457513809204, "logps/chosen": -105.165283203125, "logps/rejected": -845.5519409179688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6395365595817566, "rewards/margins": 7.453866004943848, "rewards/rejected": -8.093401908874512, "step": 35190 }, { "epoch": 0.42, "learning_rate": 3.5852538506463318e-06, "logits/chosen": -2.8205230236053467, "logits/rejected": -2.1897225379943848, "logps/chosen": -188.98451232910156, "logps/rejected": -1107.384521484375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.3792058229446411, "rewards/margins": 9.28300666809082, "rewards/rejected": -10.662212371826172, "step": 35200 }, { "epoch": 0.42, "learning_rate": 3.5843126946966726e-06, "logits/chosen": -2.8388962745666504, "logits/rejected": -2.148263454437256, "logps/chosen": -235.90377807617188, "logps/rejected": -962.2801513671875, "loss": 0.2655, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.824345350265503, "rewards/margins": 7.413863182067871, "rewards/rejected": -9.23820972442627, "step": 35210 }, { "epoch": 0.42, "learning_rate": 3.5833713494290285e-06, "logits/chosen": -2.855795383453369, "logits/rejected": -2.298434019088745, "logps/chosen": -164.13693237304688, "logps/rejected": -960.1728515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.1797360181808472, "rewards/margins": 8.03390121459961, "rewards/rejected": -9.213637351989746, "step": 35220 }, { "epoch": 0.42, "learning_rate": 3.5824298150077563e-06, "logits/chosen": -2.813621997833252, "logits/rejected": -2.3340582847595215, "logps/chosen": -143.69528198242188, "logps/rejected": -937.6488037109375, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -1.0154048204421997, "rewards/margins": 7.965829372406006, "rewards/rejected": -8.981234550476074, "step": 35230 }, { "epoch": 0.42, "learning_rate": 3.581488091597244e-06, "logits/chosen": -2.824002981185913, "logits/rejected": -2.0807838439941406, "logps/chosen": -171.4739990234375, "logps/rejected": -1023.3502197265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2576143741607666, "rewards/margins": 8.578093528747559, "rewards/rejected": -9.835708618164062, "step": 35240 }, { "epoch": 0.42, "learning_rate": 3.5805461793619145e-06, "logits/chosen": -2.877155065536499, "logits/rejected": -2.1675777435302734, "logps/chosen": -196.2163543701172, "logps/rejected": -971.8536376953125, "loss": 0.1105, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4968746900558472, "rewards/margins": 7.836045265197754, "rewards/rejected": -9.332921028137207, "step": 35250 }, { "epoch": 0.42, "learning_rate": 3.5796040784662228e-06, "logits/chosen": -2.7719035148620605, "logits/rejected": -2.2806389331817627, "logps/chosen": -184.73458862304688, "logps/rejected": -1043.196044921875, "loss": 0.0281, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4183423519134521, "rewards/margins": 8.618631362915039, "rewards/rejected": -10.03697395324707, "step": 35260 }, { "epoch": 0.42, "learning_rate": 3.5786617890746582e-06, "logits/chosen": -2.848127841949463, "logits/rejected": -2.118051767349243, "logps/chosen": -167.8643035888672, "logps/rejected": -1033.902587890625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1429316997528076, "rewards/margins": 8.802308082580566, "rewards/rejected": -9.945240020751953, "step": 35270 }, { "epoch": 0.42, "learning_rate": 3.577719311351741e-06, "logits/chosen": -2.8301196098327637, "logits/rejected": -2.1059515476226807, "logps/chosen": -214.3427734375, "logps/rejected": -986.4453125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.6785026788711548, "rewards/margins": 7.811875820159912, "rewards/rejected": -9.490378379821777, "step": 35280 }, { "epoch": 0.42, "learning_rate": 3.5767766454620252e-06, "logits/chosen": -2.8376126289367676, "logits/rejected": -2.3147943019866943, "logps/chosen": -157.962890625, "logps/rejected": -1010.5548706054688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1025859117507935, "rewards/margins": 8.612083435058594, "rewards/rejected": -9.714669227600098, "step": 35290 }, { "epoch": 0.42, "learning_rate": 3.575833791570098e-06, "logits/chosen": -2.810209035873413, "logits/rejected": -2.047895908355713, "logps/chosen": -208.11376953125, "logps/rejected": -1110.7958984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.5249760150909424, "rewards/margins": 9.172327995300293, "rewards/rejected": -10.697303771972656, "step": 35300 }, { "epoch": 0.42, "learning_rate": 3.574890749840578e-06, "logits/chosen": -2.8385872840881348, "logits/rejected": -1.9440838098526, "logps/chosen": -202.36984252929688, "logps/rejected": -1108.0751953125, "loss": 0.0776, "rewards/accuracies": 1.0, "rewards/chosen": -1.4691920280456543, "rewards/margins": 9.19078540802002, "rewards/rejected": -10.659977912902832, "step": 35310 }, { "epoch": 0.42, "learning_rate": 3.573947520438119e-06, "logits/chosen": -2.841021776199341, "logits/rejected": -1.9880809783935547, "logps/chosen": -206.60952758789062, "logps/rejected": -1113.057373046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4599716663360596, "rewards/margins": 9.25885009765625, "rewards/rejected": -10.71882152557373, "step": 35320 }, { "epoch": 0.42, "learning_rate": 3.5730041035274064e-06, "logits/chosen": -2.820674419403076, "logits/rejected": -2.488024950027466, "logps/chosen": -144.78286743164062, "logps/rejected": -811.1311645507812, "loss": 0.0248, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0505859851837158, "rewards/margins": 6.684765815734863, "rewards/rejected": -7.735350608825684, "step": 35330 }, { "epoch": 0.42, "learning_rate": 3.5720604992731567e-06, "logits/chosen": -2.848781108856201, "logits/rejected": -2.2032814025878906, "logps/chosen": -149.00479125976562, "logps/rejected": -1066.53271484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0143473148345947, "rewards/margins": 9.258783340454102, "rewards/rejected": -10.273130416870117, "step": 35340 }, { "epoch": 0.42, "learning_rate": 3.571116707840121e-06, "logits/chosen": -2.7974820137023926, "logits/rejected": -2.2028157711029053, "logps/chosen": -187.0413360595703, "logps/rejected": -1054.9610595703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.410618543624878, "rewards/margins": 8.751504898071289, "rewards/rejected": -10.16212272644043, "step": 35350 }, { "epoch": 0.42, "learning_rate": 3.5701727293930832e-06, "logits/chosen": -2.8671422004699707, "logits/rejected": -2.184948682785034, "logps/chosen": -176.08937072753906, "logps/rejected": -1026.072998046875, "loss": 0.1028, "rewards/accuracies": 1.0, "rewards/chosen": -1.2332141399383545, "rewards/margins": 8.623885154724121, "rewards/rejected": -9.857097625732422, "step": 35360 }, { "epoch": 0.42, "learning_rate": 3.5692285640968594e-06, "logits/chosen": -2.7917466163635254, "logits/rejected": -2.09832763671875, "logps/chosen": -213.29360961914062, "logps/rejected": -1249.9991455078125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.604113221168518, "rewards/margins": 10.487678527832031, "rewards/rejected": -12.091792106628418, "step": 35370 }, { "epoch": 0.42, "learning_rate": 3.5682842121162973e-06, "logits/chosen": -2.8602991104125977, "logits/rejected": -2.3106820583343506, "logps/chosen": -155.2224884033203, "logps/rejected": -1015.7752075195312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.1209110021591187, "rewards/margins": 8.645739555358887, "rewards/rejected": -9.766651153564453, "step": 35380 }, { "epoch": 0.42, "learning_rate": 3.5673396736162804e-06, "logits/chosen": -2.837925434112549, "logits/rejected": -2.3121120929718018, "logps/chosen": -147.65744018554688, "logps/rejected": -952.7694091796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0351035594940186, "rewards/margins": 8.118085861206055, "rewards/rejected": -9.153188705444336, "step": 35390 }, { "epoch": 0.42, "learning_rate": 3.5663949487617195e-06, "logits/chosen": -2.844451427459717, "logits/rejected": -2.249171495437622, "logps/chosen": -213.4397430419922, "logps/rejected": -1016.5079956054688, "loss": 0.2647, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6161243915557861, "rewards/margins": 8.164536476135254, "rewards/rejected": -9.780662536621094, "step": 35400 }, { "epoch": 0.42, "learning_rate": 3.565450037717562e-06, "logits/chosen": -2.8446898460388184, "logits/rejected": -2.421851873397827, "logps/chosen": -143.236083984375, "logps/rejected": -1046.2513427734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9815024137496948, "rewards/margins": 9.08446216583252, "rewards/rejected": -10.065964698791504, "step": 35410 }, { "epoch": 0.42, "learning_rate": 3.564504940648788e-06, "logits/chosen": -2.9235923290252686, "logits/rejected": -2.154951572418213, "logps/chosen": -165.64007568359375, "logps/rejected": -1006.6346435546875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.1803982257843018, "rewards/margins": 8.498294830322266, "rewards/rejected": -9.678691864013672, "step": 35420 }, { "epoch": 0.42, "learning_rate": 3.563559657720408e-06, "logits/chosen": -2.8574700355529785, "logits/rejected": -2.214089870452881, "logps/chosen": -160.56961059570312, "logps/rejected": -1071.9791259765625, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0905786752700806, "rewards/margins": 9.237500190734863, "rewards/rejected": -10.328079223632812, "step": 35430 }, { "epoch": 0.42, "learning_rate": 3.5626141890974652e-06, "logits/chosen": -2.8299031257629395, "logits/rejected": -2.243462324142456, "logps/chosen": -153.528564453125, "logps/rejected": -1016.2166137695312, "loss": 0.1131, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.092637062072754, "rewards/margins": 8.696500778198242, "rewards/rejected": -9.78913688659668, "step": 35440 }, { "epoch": 0.42, "learning_rate": 3.5616685349450365e-06, "logits/chosen": -2.8073182106018066, "logits/rejected": -2.1556131839752197, "logps/chosen": -152.8472137451172, "logps/rejected": -1030.4083251953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0693747997283936, "rewards/margins": 8.843603134155273, "rewards/rejected": -9.912978172302246, "step": 35450 }, { "epoch": 0.42, "learning_rate": 3.5607226954282305e-06, "logits/chosen": -2.861576795578003, "logits/rejected": -2.4219937324523926, "logps/chosen": -122.67948913574219, "logps/rejected": -954.8304443359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7783765196800232, "rewards/margins": 8.378687858581543, "rewards/rejected": -9.157065391540527, "step": 35460 }, { "epoch": 0.42, "learning_rate": 3.559776670712187e-06, "logits/chosen": -2.792863368988037, "logits/rejected": -2.100360870361328, "logps/chosen": -178.79872131347656, "logps/rejected": -1132.4619140625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.2387137413024902, "rewards/margins": 9.678796768188477, "rewards/rejected": -10.917510986328125, "step": 35470 }, { "epoch": 0.42, "learning_rate": 3.5588304609620804e-06, "logits/chosen": -2.8371853828430176, "logits/rejected": -2.23455810546875, "logps/chosen": -184.69241333007812, "logps/rejected": -1113.6285400390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3342880010604858, "rewards/margins": 9.397668838500977, "rewards/rejected": -10.731958389282227, "step": 35480 }, { "epoch": 0.42, "learning_rate": 3.5578840663431177e-06, "logits/chosen": -2.876981258392334, "logits/rejected": -2.27280855178833, "logps/chosen": -168.42181396484375, "logps/rejected": -1038.6597900390625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.2031075954437256, "rewards/margins": 8.784555435180664, "rewards/rejected": -9.987663269042969, "step": 35490 }, { "epoch": 0.42, "learning_rate": 3.5569374870205333e-06, "logits/chosen": -2.8782432079315186, "logits/rejected": -2.202357053756714, "logps/chosen": -152.6694793701172, "logps/rejected": -1075.49658203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0623342990875244, "rewards/margins": 9.299166679382324, "rewards/rejected": -10.36150074005127, "step": 35500 }, { "epoch": 0.43, "learning_rate": 3.5559907231596e-06, "logits/chosen": -2.9098517894744873, "logits/rejected": -2.4001667499542236, "logps/chosen": -147.24168395996094, "logps/rejected": -1041.314453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9822317957878113, "rewards/margins": 9.045062065124512, "rewards/rejected": -10.027294158935547, "step": 35510 }, { "epoch": 0.43, "learning_rate": 3.5550437749256186e-06, "logits/chosen": -2.829329252243042, "logits/rejected": -2.3808305263519287, "logps/chosen": -177.9983367919922, "logps/rejected": -933.7044677734375, "loss": 0.161, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3743853569030762, "rewards/margins": 7.5806450843811035, "rewards/rejected": -8.95503044128418, "step": 35520 }, { "epoch": 0.43, "learning_rate": 3.554096642483925e-06, "logits/chosen": -2.81781268119812, "logits/rejected": -1.7953822612762451, "logps/chosen": -194.98817443847656, "logps/rejected": -1231.053955078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.39195716381073, "rewards/margins": 10.501445770263672, "rewards/rejected": -11.893404006958008, "step": 35530 }, { "epoch": 0.43, "learning_rate": 3.553149325999885e-06, "logits/chosen": -2.8656954765319824, "logits/rejected": -2.4727883338928223, "logps/chosen": -148.3072509765625, "logps/rejected": -939.1436767578125, "loss": 0.1294, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0617492198944092, "rewards/margins": 7.9475297927856445, "rewards/rejected": -9.009279251098633, "step": 35540 }, { "epoch": 0.43, "learning_rate": 3.552201825638898e-06, "logits/chosen": -2.83343243598938, "logits/rejected": -2.142578125, "logps/chosen": -150.0762481689453, "logps/rejected": -982.3695068359375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0108449459075928, "rewards/margins": 8.430109024047852, "rewards/rejected": -9.440954208374023, "step": 35550 }, { "epoch": 0.43, "learning_rate": 3.5512541415663946e-06, "logits/chosen": -2.832585096359253, "logits/rejected": -2.2389330863952637, "logps/chosen": -208.1522979736328, "logps/rejected": -1015.0716552734375, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": -1.5798479318618774, "rewards/margins": 8.16804313659668, "rewards/rejected": -9.747891426086426, "step": 35560 }, { "epoch": 0.43, "learning_rate": 3.550306273947838e-06, "logits/chosen": -2.8333632946014404, "logits/rejected": -2.0099852085113525, "logps/chosen": -178.65733337402344, "logps/rejected": -1188.7730712890625, "loss": 0.0787, "rewards/accuracies": 1.0, "rewards/chosen": -1.2341431379318237, "rewards/margins": 10.238115310668945, "rewards/rejected": -11.472257614135742, "step": 35570 }, { "epoch": 0.43, "learning_rate": 3.5493582229487223e-06, "logits/chosen": -2.848126173019409, "logits/rejected": -2.2631611824035645, "logps/chosen": -131.48367309570312, "logps/rejected": -919.23828125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8877779841423035, "rewards/margins": 7.924224853515625, "rewards/rejected": -8.812002182006836, "step": 35580 }, { "epoch": 0.43, "learning_rate": 3.5484099887345758e-06, "logits/chosen": -2.8078513145446777, "logits/rejected": -2.3180127143859863, "logps/chosen": -135.30250549316406, "logps/rejected": -1000.8775634765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9236966967582703, "rewards/margins": 8.695025444030762, "rewards/rejected": -9.618721008300781, "step": 35590 }, { "epoch": 0.43, "learning_rate": 3.547461571470957e-06, "logits/chosen": -2.8482041358947754, "logits/rejected": -2.1959877014160156, "logps/chosen": -174.5605010986328, "logps/rejected": -1059.6483154296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.241194248199463, "rewards/margins": 8.970890998840332, "rewards/rejected": -10.212084770202637, "step": 35600 }, { "epoch": 0.43, "learning_rate": 3.546512971323457e-06, "logits/chosen": -2.8644649982452393, "logits/rejected": -2.0816946029663086, "logps/chosen": -183.51449584960938, "logps/rejected": -1102.663330078125, "loss": 0.1215, "rewards/accuracies": 1.0, "rewards/chosen": -1.2907147407531738, "rewards/margins": 9.345968246459961, "rewards/rejected": -10.636682510375977, "step": 35610 }, { "epoch": 0.43, "learning_rate": 3.5455641884576986e-06, "logits/chosen": -2.8662831783294678, "logits/rejected": -2.4109067916870117, "logps/chosen": -152.802978515625, "logps/rejected": -961.3720703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0865525007247925, "rewards/margins": 8.149334907531738, "rewards/rejected": -9.23588752746582, "step": 35620 }, { "epoch": 0.43, "learning_rate": 3.5446152230393372e-06, "logits/chosen": -2.835767984390259, "logits/rejected": -2.0689985752105713, "logps/chosen": -179.9556884765625, "logps/rejected": -1109.7989501953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.325301170349121, "rewards/margins": 9.368292808532715, "rewards/rejected": -10.693593978881836, "step": 35630 }, { "epoch": 0.43, "learning_rate": 3.543666075234059e-06, "logits/chosen": -2.8087055683135986, "logits/rejected": -2.481867551803589, "logps/chosen": -109.04615783691406, "logps/rejected": -809.5763549804688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6989554762840271, "rewards/margins": 7.044347286224365, "rewards/rejected": -7.743302822113037, "step": 35640 }, { "epoch": 0.43, "learning_rate": 3.542716745207583e-06, "logits/chosen": -2.840075969696045, "logits/rejected": -2.298994779586792, "logps/chosen": -168.898193359375, "logps/rejected": -995.94580078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2147009372711182, "rewards/margins": 8.353118896484375, "rewards/rejected": -9.56782054901123, "step": 35650 }, { "epoch": 0.43, "learning_rate": 3.541767233125659e-06, "logits/chosen": -2.8694701194763184, "logits/rejected": -2.1200637817382812, "logps/chosen": -177.56399536132812, "logps/rejected": -1138.3502197265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.228214979171753, "rewards/margins": 9.748651504516602, "rewards/rejected": -10.976865768432617, "step": 35660 }, { "epoch": 0.43, "learning_rate": 3.5408175391540696e-06, "logits/chosen": -2.854907989501953, "logits/rejected": -2.3584840297698975, "logps/chosen": -136.5789794921875, "logps/rejected": -921.83642578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9032353162765503, "rewards/margins": 7.939211845397949, "rewards/rejected": -8.842447280883789, "step": 35670 }, { "epoch": 0.43, "learning_rate": 3.539867663458628e-06, "logits/chosen": -2.8202006816864014, "logits/rejected": -2.24080228805542, "logps/chosen": -162.3863525390625, "logps/rejected": -1027.573974609375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.1305882930755615, "rewards/margins": 8.755300521850586, "rewards/rejected": -9.885889053344727, "step": 35680 }, { "epoch": 0.43, "learning_rate": 3.5389176062051817e-06, "logits/chosen": -2.8371505737304688, "logits/rejected": -2.2772164344787598, "logps/chosen": -146.77590942382812, "logps/rejected": -984.0360107421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.905993640422821, "rewards/margins": 8.541722297668457, "rewards/rejected": -9.447715759277344, "step": 35690 }, { "epoch": 0.43, "learning_rate": 3.537967367559606e-06, "logits/chosen": -2.824450969696045, "logits/rejected": -2.3389244079589844, "logps/chosen": -112.6371078491211, "logps/rejected": -886.259765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.705595850944519, "rewards/margins": 7.789720058441162, "rewards/rejected": -8.495316505432129, "step": 35700 }, { "epoch": 0.43, "learning_rate": 3.5370169476878106e-06, "logits/chosen": -2.8224358558654785, "logits/rejected": -2.331226348876953, "logps/chosen": -149.20143127441406, "logps/rejected": -941.7703857421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0903828144073486, "rewards/margins": 7.9419403076171875, "rewards/rejected": -9.032323837280273, "step": 35710 }, { "epoch": 0.43, "learning_rate": 3.536066346755737e-06, "logits/chosen": -2.846714735031128, "logits/rejected": -2.2133164405822754, "logps/chosen": -167.2526092529297, "logps/rejected": -1019.5120239257812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2144134044647217, "rewards/margins": 8.588605880737305, "rewards/rejected": -9.803019523620605, "step": 35720 }, { "epoch": 0.43, "learning_rate": 3.535115564929356e-06, "logits/chosen": -2.855950117111206, "logits/rejected": -2.305159091949463, "logps/chosen": -197.46493530273438, "logps/rejected": -1014.0905151367188, "loss": 0.0798, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5081264972686768, "rewards/margins": 8.229107856750488, "rewards/rejected": -9.737235069274902, "step": 35730 }, { "epoch": 0.43, "learning_rate": 3.534164602374673e-06, "logits/chosen": -2.8685426712036133, "logits/rejected": -2.2638092041015625, "logps/chosen": -147.98570251464844, "logps/rejected": -963.0418090820312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0161830186843872, "rewards/margins": 8.221271514892578, "rewards/rejected": -9.237455368041992, "step": 35740 }, { "epoch": 0.43, "learning_rate": 3.5332134592577226e-06, "logits/chosen": -2.855161190032959, "logits/rejected": -2.292785167694092, "logps/chosen": -138.263671875, "logps/rejected": -958.3806762695312, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.9476863741874695, "rewards/margins": 8.259936332702637, "rewards/rejected": -9.207623481750488, "step": 35750 }, { "epoch": 0.43, "learning_rate": 3.532262135744572e-06, "logits/chosen": -2.824321746826172, "logits/rejected": -1.9579639434814453, "logps/chosen": -219.3304443359375, "logps/rejected": -1224.383056640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5995126962661743, "rewards/margins": 10.23630142211914, "rewards/rejected": -11.835814476013184, "step": 35760 }, { "epoch": 0.43, "learning_rate": 3.5313106320013184e-06, "logits/chosen": -2.8427956104278564, "logits/rejected": -2.3860929012298584, "logps/chosen": -135.08303833007812, "logps/rejected": -1013.703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9166976809501648, "rewards/margins": 8.832353591918945, "rewards/rejected": -9.749052047729492, "step": 35770 }, { "epoch": 0.43, "learning_rate": 3.5303589481940936e-06, "logits/chosen": -2.826277017593384, "logits/rejected": -2.261343002319336, "logps/chosen": -172.91831970214844, "logps/rejected": -1047.132568359375, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.2677348852157593, "rewards/margins": 8.809497833251953, "rewards/rejected": -10.077234268188477, "step": 35780 }, { "epoch": 0.43, "learning_rate": 3.529407084489058e-06, "logits/chosen": -2.823957920074463, "logits/rejected": -2.1206302642822266, "logps/chosen": -154.04110717773438, "logps/rejected": -1191.7093505859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0212472677230835, "rewards/margins": 10.500096321105957, "rewards/rejected": -11.521342277526855, "step": 35790 }, { "epoch": 0.43, "learning_rate": 3.5284550410524043e-06, "logits/chosen": -2.832811117172241, "logits/rejected": -2.094555616378784, "logps/chosen": -195.10748291015625, "logps/rejected": -1067.7667236328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3872134685516357, "rewards/margins": 8.890693664550781, "rewards/rejected": -10.277907371520996, "step": 35800 }, { "epoch": 0.43, "learning_rate": 3.5275028180503573e-06, "logits/chosen": -2.8142364025115967, "logits/rejected": -2.01326584815979, "logps/chosen": -169.76614379882812, "logps/rejected": -1070.4852294921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.144879937171936, "rewards/margins": 9.160379409790039, "rewards/rejected": -10.305257797241211, "step": 35810 }, { "epoch": 0.43, "learning_rate": 3.526550415649172e-06, "logits/chosen": -2.8460850715637207, "logits/rejected": -2.529634714126587, "logps/chosen": -113.5857162475586, "logps/rejected": -799.6378173828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7680196762084961, "rewards/margins": 6.865581512451172, "rewards/rejected": -7.63360071182251, "step": 35820 }, { "epoch": 0.43, "learning_rate": 3.525597834015135e-06, "logits/chosen": -2.871654987335205, "logits/rejected": -2.4402146339416504, "logps/chosen": -158.13284301757812, "logps/rejected": -924.1212158203125, "loss": 0.1039, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1462081670761108, "rewards/margins": 7.712719917297363, "rewards/rejected": -8.858928680419922, "step": 35830 }, { "epoch": 0.43, "learning_rate": 3.5246450733145643e-06, "logits/chosen": -2.823660373687744, "logits/rejected": -2.3483407497406006, "logps/chosen": -121.39320373535156, "logps/rejected": -939.0906982421875, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.7962546944618225, "rewards/margins": 8.221405982971191, "rewards/rejected": -9.017660140991211, "step": 35840 }, { "epoch": 0.43, "learning_rate": 3.5236921337138107e-06, "logits/chosen": -2.870453357696533, "logits/rejected": -2.2807133197784424, "logps/chosen": -139.1483612060547, "logps/rejected": -1000.9899291992188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9241293668746948, "rewards/margins": 8.709505081176758, "rewards/rejected": -9.633634567260742, "step": 35850 }, { "epoch": 0.43, "learning_rate": 3.522739015379253e-06, "logits/chosen": -2.8494224548339844, "logits/rejected": -2.329354763031006, "logps/chosen": -176.208740234375, "logps/rejected": -962.7841796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2596238851547241, "rewards/margins": 7.979836940765381, "rewards/rejected": -9.239460945129395, "step": 35860 }, { "epoch": 0.43, "learning_rate": 3.5217857184773035e-06, "logits/chosen": -2.8455605506896973, "logits/rejected": -1.9648218154907227, "logps/chosen": -176.41798400878906, "logps/rejected": -1028.971435546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.1817388534545898, "rewards/margins": 8.718250274658203, "rewards/rejected": -9.899989128112793, "step": 35870 }, { "epoch": 0.43, "learning_rate": 3.520832243174406e-06, "logits/chosen": -2.796186923980713, "logits/rejected": -2.0943055152893066, "logps/chosen": -180.53018188476562, "logps/rejected": -1182.2056884765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3079599142074585, "rewards/margins": 10.1080961227417, "rewards/rejected": -11.416055679321289, "step": 35880 }, { "epoch": 0.43, "learning_rate": 3.519878589637034e-06, "logits/chosen": -2.842339038848877, "logits/rejected": -2.413083553314209, "logps/chosen": -157.64730834960938, "logps/rejected": -828.0838623046875, "loss": 0.1723, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1743696928024292, "rewards/margins": 6.740894317626953, "rewards/rejected": -7.915264129638672, "step": 35890 }, { "epoch": 0.43, "learning_rate": 3.5189247580316927e-06, "logits/chosen": -2.8624799251556396, "logits/rejected": -2.4572272300720215, "logps/chosen": -106.0838851928711, "logps/rejected": -867.6310424804688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6635807752609253, "rewards/margins": 7.622140407562256, "rewards/rejected": -8.285720825195312, "step": 35900 }, { "epoch": 0.43, "learning_rate": 3.5179707485249192e-06, "logits/chosen": -2.832812547683716, "logits/rejected": -2.344400405883789, "logps/chosen": -148.02651977539062, "logps/rejected": -956.0110473632812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.052610993385315, "rewards/margins": 8.121213912963867, "rewards/rejected": -9.17382526397705, "step": 35910 }, { "epoch": 0.43, "learning_rate": 3.51701656128328e-06, "logits/chosen": -2.8238205909729004, "logits/rejected": -2.097494125366211, "logps/chosen": -168.86477661132812, "logps/rejected": -1086.193115234375, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": -1.222399115562439, "rewards/margins": 9.235562324523926, "rewards/rejected": -10.45796012878418, "step": 35920 }, { "epoch": 0.43, "learning_rate": 3.5160621964733726e-06, "logits/chosen": -2.8562674522399902, "logits/rejected": -2.122690200805664, "logps/chosen": -170.99356079101562, "logps/rejected": -879.1348876953125, "loss": 0.0881, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2264139652252197, "rewards/margins": 7.166894435882568, "rewards/rejected": -8.393308639526367, "step": 35930 }, { "epoch": 0.43, "learning_rate": 3.515107654261829e-06, "logits/chosen": -2.8605403900146484, "logits/rejected": -2.371250629425049, "logps/chosen": -156.27859497070312, "logps/rejected": -1040.785400390625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0749752521514893, "rewards/margins": 8.945809364318848, "rewards/rejected": -10.020784378051758, "step": 35940 }, { "epoch": 0.43, "learning_rate": 3.514152934815308e-06, "logits/chosen": -2.863403797149658, "logits/rejected": -2.24395489692688, "logps/chosen": -149.9030303955078, "logps/rejected": -1012.3724365234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.041770339012146, "rewards/margins": 8.696390151977539, "rewards/rejected": -9.7381591796875, "step": 35950 }, { "epoch": 0.43, "learning_rate": 3.513198038300501e-06, "logits/chosen": -2.8470492362976074, "logits/rejected": -2.493532419204712, "logps/chosen": -117.76863098144531, "logps/rejected": -874.3128662109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7589207291603088, "rewards/margins": 7.612212181091309, "rewards/rejected": -8.371133804321289, "step": 35960 }, { "epoch": 0.43, "learning_rate": 3.5122429648841305e-06, "logits/chosen": -2.882072687149048, "logits/rejected": -2.258513927459717, "logps/chosen": -135.12644958496094, "logps/rejected": -1057.278564453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.851439356803894, "rewards/margins": 9.325495719909668, "rewards/rejected": -10.176935195922852, "step": 35970 }, { "epoch": 0.43, "learning_rate": 3.5112877147329506e-06, "logits/chosen": -2.844242811203003, "logits/rejected": -2.136507749557495, "logps/chosen": -196.5873260498047, "logps/rejected": -1068.0439453125, "loss": 0.0273, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4297467470169067, "rewards/margins": 8.845266342163086, "rewards/rejected": -10.27501392364502, "step": 35980 }, { "epoch": 0.43, "learning_rate": 3.5103322880137434e-06, "logits/chosen": -2.822385549545288, "logits/rejected": -2.187047004699707, "logps/chosen": -189.72035217285156, "logps/rejected": -1089.617919921875, "loss": 0.0962, "rewards/accuracies": 1.0, "rewards/chosen": -1.3970191478729248, "rewards/margins": 9.08775520324707, "rewards/rejected": -10.484774589538574, "step": 35990 }, { "epoch": 0.43, "learning_rate": 3.5093766848933253e-06, "logits/chosen": -2.8551976680755615, "logits/rejected": -2.415975332260132, "logps/chosen": -145.037841796875, "logps/rejected": -961.7623291015625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.0350277423858643, "rewards/margins": 8.19888687133789, "rewards/rejected": -9.233915328979492, "step": 36000 }, { "epoch": 0.43, "eval_logits/chosen": -2.824150800704956, "eval_logits/rejected": -1.628164291381836, "eval_logps/chosen": -339.1852111816406, "eval_logps/rejected": -1304.8731689453125, "eval_loss": 0.0003760583349503577, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -2.7800493240356445, "eval_rewards/margins": 9.80142879486084, "eval_rewards/rejected": -12.581477165222168, "eval_runtime": 1.2152, "eval_samples_per_second": 4.115, "eval_steps_per_second": 2.469, "step": 36000 }, { "epoch": 0.43, "learning_rate": 3.5084209055385415e-06, "logits/chosen": -2.7828011512756348, "logits/rejected": -2.0834927558898926, "logps/chosen": -183.77297973632812, "logps/rejected": -1161.0992431640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3336246013641357, "rewards/margins": 9.864982604980469, "rewards/rejected": -11.198606491088867, "step": 36010 }, { "epoch": 0.43, "learning_rate": 3.5074649501162676e-06, "logits/chosen": -2.8008952140808105, "logits/rejected": -1.8491811752319336, "logps/chosen": -196.10589599609375, "logps/rejected": -1087.5562744140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3072340488433838, "rewards/margins": 9.159341812133789, "rewards/rejected": -10.466575622558594, "step": 36020 }, { "epoch": 0.43, "learning_rate": 3.5065088187934126e-06, "logits/chosen": -2.8392691612243652, "logits/rejected": -2.1436829566955566, "logps/chosen": -152.8093719482422, "logps/rejected": -970.9641723632812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0201674699783325, "rewards/margins": 8.300945281982422, "rewards/rejected": -9.321113586425781, "step": 36030 }, { "epoch": 0.43, "learning_rate": 3.505552511736913e-06, "logits/chosen": -2.8677451610565186, "logits/rejected": -2.309098482131958, "logps/chosen": -139.10218811035156, "logps/rejected": -1065.0301513671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9221677780151367, "rewards/margins": 9.353044509887695, "rewards/rejected": -10.2752103805542, "step": 36040 }, { "epoch": 0.43, "learning_rate": 3.5045960291137372e-06, "logits/chosen": -2.846308469772339, "logits/rejected": -1.867640495300293, "logps/chosen": -182.28713989257812, "logps/rejected": -1222.48095703125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.227383017539978, "rewards/margins": 10.579680442810059, "rewards/rejected": -11.807063102722168, "step": 36050 }, { "epoch": 0.43, "learning_rate": 3.503639371090885e-06, "logits/chosen": -2.8057756423950195, "logits/rejected": -2.0599169731140137, "logps/chosen": -174.88462829589844, "logps/rejected": -1068.7679443359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1920359134674072, "rewards/margins": 9.100476264953613, "rewards/rejected": -10.292510986328125, "step": 36060 }, { "epoch": 0.43, "learning_rate": 3.5026825378353867e-06, "logits/chosen": -2.860083818435669, "logits/rejected": -2.535043716430664, "logps/chosen": -110.74068450927734, "logps/rejected": -935.66748046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7187614440917969, "rewards/margins": 8.249094009399414, "rewards/rejected": -8.967855453491211, "step": 36070 }, { "epoch": 0.43, "learning_rate": 3.5017255295143015e-06, "logits/chosen": -2.794811487197876, "logits/rejected": -2.2293174266815186, "logps/chosen": -164.71719360351562, "logps/rejected": -1120.9373779296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1666231155395508, "rewards/margins": 9.634271621704102, "rewards/rejected": -10.800894737243652, "step": 36080 }, { "epoch": 0.43, "learning_rate": 3.500768346294721e-06, "logits/chosen": -2.8332440853118896, "logits/rejected": -2.288867235183716, "logps/chosen": -142.63290405273438, "logps/rejected": -1110.52001953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9937431216239929, "rewards/margins": 9.713444709777832, "rewards/rejected": -10.70718765258789, "step": 36090 }, { "epoch": 0.43, "learning_rate": 3.499810988343767e-06, "logits/chosen": -2.847893238067627, "logits/rejected": -2.139526844024658, "logps/chosen": -149.18930053710938, "logps/rejected": -1095.48974609375, "loss": 0.1485, "rewards/accuracies": 1.0, "rewards/chosen": -0.9856404066085815, "rewards/margins": 9.56566333770752, "rewards/rejected": -10.551301956176758, "step": 36100 }, { "epoch": 0.43, "learning_rate": 3.4988534558285912e-06, "logits/chosen": -2.8356332778930664, "logits/rejected": -2.3162059783935547, "logps/chosen": -144.06588745117188, "logps/rejected": -972.1930541992188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9961265325546265, "rewards/margins": 8.33177375793457, "rewards/rejected": -9.327900886535645, "step": 36110 }, { "epoch": 0.43, "learning_rate": 3.497895748916377e-06, "logits/chosen": -2.9099178314208984, "logits/rejected": -2.4970548152923584, "logps/chosen": -120.4365463256836, "logps/rejected": -889.23388671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8019098043441772, "rewards/margins": 7.7158098220825195, "rewards/rejected": -8.517720222473145, "step": 36120 }, { "epoch": 0.43, "learning_rate": 3.4969378677743352e-06, "logits/chosen": -2.813882350921631, "logits/rejected": -2.2188985347747803, "logps/chosen": -166.59007263183594, "logps/rejected": -1045.8548583984375, "loss": 0.1737, "rewards/accuracies": 1.0, "rewards/chosen": -1.1729694604873657, "rewards/margins": 8.880038261413574, "rewards/rejected": -10.053009033203125, "step": 36130 }, { "epoch": 0.43, "learning_rate": 3.495979812569712e-06, "logits/chosen": -2.8491508960723877, "logits/rejected": -2.542757511138916, "logps/chosen": -151.6588897705078, "logps/rejected": -787.4182739257812, "loss": 0.254, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1358988285064697, "rewards/margins": 6.379666328430176, "rewards/rejected": -7.515565395355225, "step": 36140 }, { "epoch": 0.43, "learning_rate": 3.495021583469779e-06, "logits/chosen": -2.8493828773498535, "logits/rejected": -2.552865982055664, "logps/chosen": -115.93560791015625, "logps/rejected": -846.7786865234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7582176923751831, "rewards/margins": 7.3206939697265625, "rewards/rejected": -8.078911781311035, "step": 36150 }, { "epoch": 0.43, "learning_rate": 3.49406318064184e-06, "logits/chosen": -2.856473445892334, "logits/rejected": -2.4142701625823975, "logps/chosen": -162.9839324951172, "logps/rejected": -947.3416748046875, "loss": 0.1373, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1863681077957153, "rewards/margins": 7.905173301696777, "rewards/rejected": -9.091541290283203, "step": 36160 }, { "epoch": 0.43, "learning_rate": 3.493104604253232e-06, "logits/chosen": -2.871084690093994, "logits/rejected": -2.6065633296966553, "logps/chosen": -125.10311126708984, "logps/rejected": -789.0618896484375, "loss": 0.1021, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.872046172618866, "rewards/margins": 6.637911319732666, "rewards/rejected": -7.509957313537598, "step": 36170 }, { "epoch": 0.43, "learning_rate": 3.4921458544713184e-06, "logits/chosen": -2.872922897338867, "logits/rejected": -2.440380811691284, "logps/chosen": -160.33071899414062, "logps/rejected": -844.3137817382812, "loss": 0.2466, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1685123443603516, "rewards/margins": 6.897483825683594, "rewards/rejected": -8.065996170043945, "step": 36180 }, { "epoch": 0.43, "learning_rate": 3.4911869314634933e-06, "logits/chosen": -2.8431804180145264, "logits/rejected": -2.2931947708129883, "logps/chosen": -143.42359924316406, "logps/rejected": -959.3567504882812, "loss": 0.1095, "rewards/accuracies": 1.0, "rewards/chosen": -0.9801626205444336, "rewards/margins": 8.229952812194824, "rewards/rejected": -9.210115432739258, "step": 36190 }, { "epoch": 0.43, "learning_rate": 3.490227835397183e-06, "logits/chosen": -2.8626906871795654, "logits/rejected": -2.2153823375701904, "logps/chosen": -176.06488037109375, "logps/rejected": -1055.163330078125, "loss": 0.1473, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.254744291305542, "rewards/margins": 8.903841972351074, "rewards/rejected": -10.158586502075195, "step": 36200 }, { "epoch": 0.43, "learning_rate": 3.4892685664398433e-06, "logits/chosen": -2.8573989868164062, "logits/rejected": -2.4385714530944824, "logps/chosen": -113.52748107910156, "logps/rejected": -966.4185791015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7204570174217224, "rewards/margins": 8.56159782409668, "rewards/rejected": -9.28205394744873, "step": 36210 }, { "epoch": 0.43, "learning_rate": 3.488309124758959e-06, "logits/chosen": -2.8628787994384766, "logits/rejected": -2.5531020164489746, "logps/chosen": -99.76203155517578, "logps/rejected": -860.9978637695312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5953884124755859, "rewards/margins": 7.640622615814209, "rewards/rejected": -8.23601245880127, "step": 36220 }, { "epoch": 0.43, "learning_rate": 3.4873495105220466e-06, "logits/chosen": -2.812581777572632, "logits/rejected": -2.0826239585876465, "logps/chosen": -194.27105712890625, "logps/rejected": -1118.4140625, "loss": 0.1028, "rewards/accuracies": 1.0, "rewards/chosen": -1.4564462900161743, "rewards/margins": 9.345768928527832, "rewards/rejected": -10.802213668823242, "step": 36230 }, { "epoch": 0.43, "learning_rate": 3.486389723896653e-06, "logits/chosen": -2.8903565406799316, "logits/rejected": -2.4180755615234375, "logps/chosen": -134.06814575195312, "logps/rejected": -956.5284423828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9009743928909302, "rewards/margins": 8.270204544067383, "rewards/rejected": -9.171178817749023, "step": 36240 }, { "epoch": 0.43, "learning_rate": 3.4854297650503514e-06, "logits/chosen": -2.8587114810943604, "logits/rejected": -2.399665594100952, "logps/chosen": -122.7525863647461, "logps/rejected": -941.8571166992188, "loss": 0.0692, "rewards/accuracies": 1.0, "rewards/chosen": -0.777091383934021, "rewards/margins": 8.25822925567627, "rewards/rejected": -9.035320281982422, "step": 36250 }, { "epoch": 0.43, "learning_rate": 3.484469634150751e-06, "logits/chosen": -2.8956055641174316, "logits/rejected": -2.419609308242798, "logps/chosen": -143.1798553466797, "logps/rejected": -992.9334106445312, "loss": 0.1603, "rewards/accuracies": 1.0, "rewards/chosen": -0.9022936820983887, "rewards/margins": 8.632747650146484, "rewards/rejected": -9.535041809082031, "step": 36260 }, { "epoch": 0.43, "learning_rate": 3.483509331365486e-06, "logits/chosen": -2.853041172027588, "logits/rejected": -2.5594146251678467, "logps/chosen": -112.56196594238281, "logps/rejected": -805.55126953125, "loss": 0.1036, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.757107138633728, "rewards/margins": 6.924432277679443, "rewards/rejected": -7.681540489196777, "step": 36270 }, { "epoch": 0.43, "learning_rate": 3.4825488568622235e-06, "logits/chosen": -2.9000465869903564, "logits/rejected": -2.4517550468444824, "logps/chosen": -96.5441665649414, "logps/rejected": -849.34375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5640098452568054, "rewards/margins": 7.546544075012207, "rewards/rejected": -8.110553741455078, "step": 36280 }, { "epoch": 0.43, "learning_rate": 3.4815882108086594e-06, "logits/chosen": -2.8684332370758057, "logits/rejected": -2.45983624458313, "logps/chosen": -127.00785064697266, "logps/rejected": -934.1546630859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8316377401351929, "rewards/margins": 8.127019882202148, "rewards/rejected": -8.958658218383789, "step": 36290 }, { "epoch": 0.43, "learning_rate": 3.4806273933725208e-06, "logits/chosen": -2.9168951511383057, "logits/rejected": -2.5413851737976074, "logps/chosen": -103.9629898071289, "logps/rejected": -877.8259887695312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6538920998573303, "rewards/margins": 7.743563652038574, "rewards/rejected": -8.397455215454102, "step": 36300 }, { "epoch": 0.43, "learning_rate": 3.4796664047215624e-06, "logits/chosen": -2.804527759552002, "logits/rejected": -2.2265818119049072, "logps/chosen": -115.50422668457031, "logps/rejected": -951.365234375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6730011105537415, "rewards/margins": 8.451690673828125, "rewards/rejected": -9.124692916870117, "step": 36310 }, { "epoch": 0.43, "learning_rate": 3.4787052450235697e-06, "logits/chosen": -2.856049060821533, "logits/rejected": -2.238543748855591, "logps/chosen": -123.60831451416016, "logps/rejected": -980.8248291015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7294420003890991, "rewards/margins": 8.69359302520752, "rewards/rejected": -9.423036575317383, "step": 36320 }, { "epoch": 0.43, "learning_rate": 3.477743914446361e-06, "logits/chosen": -2.871337652206421, "logits/rejected": -2.2598228454589844, "logps/chosen": -156.30259704589844, "logps/rejected": -1038.40234375, "loss": 0.1587, "rewards/accuracies": 1.0, "rewards/chosen": -1.1039270162582397, "rewards/margins": 8.896812438964844, "rewards/rejected": -10.000739097595215, "step": 36330 }, { "epoch": 0.44, "learning_rate": 3.4767824131577793e-06, "logits/chosen": -2.864629030227661, "logits/rejected": -2.2622151374816895, "logps/chosen": -146.45480346679688, "logps/rejected": -995.6066284179688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9288593530654907, "rewards/margins": 8.64913558959961, "rewards/rejected": -9.577995300292969, "step": 36340 }, { "epoch": 0.44, "learning_rate": 3.4758207413257015e-06, "logits/chosen": -2.803515672683716, "logits/rejected": -1.9977586269378662, "logps/chosen": -170.70132446289062, "logps/rejected": -1148.0814208984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1949011087417603, "rewards/margins": 9.887910842895508, "rewards/rejected": -11.08281135559082, "step": 36350 }, { "epoch": 0.44, "learning_rate": 3.474858899118033e-06, "logits/chosen": -2.868521213531494, "logits/rejected": -2.511443614959717, "logps/chosen": -96.71669006347656, "logps/rejected": -888.05224609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5799463987350464, "rewards/margins": 7.912977695465088, "rewards/rejected": -8.492925643920898, "step": 36360 }, { "epoch": 0.44, "learning_rate": 3.4738968867027072e-06, "logits/chosen": -2.860703229904175, "logits/rejected": -2.388221263885498, "logps/chosen": -151.79296875, "logps/rejected": -967.4713745117188, "loss": 0.2119, "rewards/accuracies": 1.0, "rewards/chosen": -1.0593937635421753, "rewards/margins": 8.206038475036621, "rewards/rejected": -9.265432357788086, "step": 36370 }, { "epoch": 0.44, "learning_rate": 3.4729347042476906e-06, "logits/chosen": -2.866057872772217, "logits/rejected": -2.416961908340454, "logps/chosen": -142.00595092773438, "logps/rejected": -951.6253051757812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0012657642364502, "rewards/margins": 8.125711441040039, "rewards/rejected": -9.126976013183594, "step": 36380 }, { "epoch": 0.44, "learning_rate": 3.4719723519209765e-06, "logits/chosen": -2.8338704109191895, "logits/rejected": -2.521352767944336, "logps/chosen": -115.8935775756836, "logps/rejected": -966.0607299804688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7693896889686584, "rewards/margins": 8.502891540527344, "rewards/rejected": -9.272281646728516, "step": 36390 }, { "epoch": 0.44, "learning_rate": 3.4710098298905898e-06, "logits/chosen": -2.8243212699890137, "logits/rejected": -2.1945555210113525, "logps/chosen": -158.79586791992188, "logps/rejected": -1062.0184326171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0753713846206665, "rewards/margins": 9.142923355102539, "rewards/rejected": -10.218295097351074, "step": 36400 }, { "epoch": 0.44, "learning_rate": 3.4700471383245834e-06, "logits/chosen": -2.8808929920196533, "logits/rejected": -2.303104877471924, "logps/chosen": -152.0747833251953, "logps/rejected": -1018.7032470703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.023861289024353, "rewards/margins": 8.749043464660645, "rewards/rejected": -9.772904396057129, "step": 36410 }, { "epoch": 0.44, "learning_rate": 3.4690842773910404e-06, "logits/chosen": -2.860330581665039, "logits/rejected": -2.1335017681121826, "logps/chosen": -135.44540405273438, "logps/rejected": -1052.4615478515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8639191389083862, "rewards/margins": 9.25938606262207, "rewards/rejected": -10.123305320739746, "step": 36420 }, { "epoch": 0.44, "learning_rate": 3.4681212472580744e-06, "logits/chosen": -2.8257346153259277, "logits/rejected": -2.3322386741638184, "logps/chosen": -146.1584930419922, "logps/rejected": -949.0966796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0211325883865356, "rewards/margins": 8.07833194732666, "rewards/rejected": -9.099464416503906, "step": 36430 }, { "epoch": 0.44, "learning_rate": 3.467158048093827e-06, "logits/chosen": -2.8117542266845703, "logits/rejected": -2.313852548599243, "logps/chosen": -154.76089477539062, "logps/rejected": -1193.1165771484375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.1050363779067993, "rewards/margins": 10.424295425415039, "rewards/rejected": -11.529333114624023, "step": 36440 }, { "epoch": 0.44, "learning_rate": 3.4661946800664715e-06, "logits/chosen": -2.8480021953582764, "logits/rejected": -2.0678935050964355, "logps/chosen": -176.8106231689453, "logps/rejected": -1122.235107421875, "loss": 0.0668, "rewards/accuracies": 1.0, "rewards/chosen": -1.179955244064331, "rewards/margins": 9.630359649658203, "rewards/rejected": -10.81031608581543, "step": 36450 }, { "epoch": 0.44, "learning_rate": 3.4652311433442082e-06, "logits/chosen": -2.870495319366455, "logits/rejected": -2.4389395713806152, "logps/chosen": -157.44564819335938, "logps/rejected": -843.5516357421875, "loss": 0.1924, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1449013948440552, "rewards/margins": 6.920158386230469, "rewards/rejected": -8.065059661865234, "step": 36460 }, { "epoch": 0.44, "learning_rate": 3.4642674380952678e-06, "logits/chosen": -2.8142266273498535, "logits/rejected": -2.1809070110321045, "logps/chosen": -129.0203399658203, "logps/rejected": -913.3883666992188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8289103507995605, "rewards/margins": 7.914041042327881, "rewards/rejected": -8.742952346801758, "step": 36470 }, { "epoch": 0.44, "learning_rate": 3.463303564487911e-06, "logits/chosen": -2.8891441822052, "logits/rejected": -2.3657310009002686, "logps/chosen": -138.61846923828125, "logps/rejected": -857.0505981445312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8841639757156372, "rewards/margins": 7.3114519119262695, "rewards/rejected": -8.195615768432617, "step": 36480 }, { "epoch": 0.44, "learning_rate": 3.462339522690427e-06, "logits/chosen": -2.832319736480713, "logits/rejected": -2.2846930027008057, "logps/chosen": -161.15444946289062, "logps/rejected": -981.2321166992188, "loss": 0.152, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1212437152862549, "rewards/margins": 8.30409049987793, "rewards/rejected": -9.425334930419922, "step": 36490 }, { "epoch": 0.44, "learning_rate": 3.461375312871135e-06, "logits/chosen": -2.851088285446167, "logits/rejected": -2.1079554557800293, "logps/chosen": -165.22012329101562, "logps/rejected": -935.5490112304688, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1813480854034424, "rewards/margins": 7.786523342132568, "rewards/rejected": -8.96787166595459, "step": 36500 }, { "epoch": 0.44, "learning_rate": 3.460410935198384e-06, "logits/chosen": -2.8437163829803467, "logits/rejected": -2.099459648132324, "logps/chosen": -185.4303741455078, "logps/rejected": -1145.52392578125, "loss": 0.2443, "rewards/accuracies": 1.0, "rewards/chosen": -1.3013036251068115, "rewards/margins": 9.746404647827148, "rewards/rejected": -11.047708511352539, "step": 36510 }, { "epoch": 0.44, "learning_rate": 3.4594463898405518e-06, "logits/chosen": -2.867985486984253, "logits/rejected": -2.402768611907959, "logps/chosen": -150.38650512695312, "logps/rejected": -968.095703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0791860818862915, "rewards/margins": 8.217735290527344, "rewards/rejected": -9.29692268371582, "step": 36520 }, { "epoch": 0.44, "learning_rate": 3.4584816769660432e-06, "logits/chosen": -2.815249443054199, "logits/rejected": -2.1750433444976807, "logps/chosen": -173.11663818359375, "logps/rejected": -1060.4149169921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.229346513748169, "rewards/margins": 8.961050033569336, "rewards/rejected": -10.190397262573242, "step": 36530 }, { "epoch": 0.44, "learning_rate": 3.4575167967432967e-06, "logits/chosen": -2.8393969535827637, "logits/rejected": -2.2673487663269043, "logps/chosen": -151.1456298828125, "logps/rejected": -946.6471557617188, "loss": 0.1481, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0299880504608154, "rewards/margins": 8.04565143585205, "rewards/rejected": -9.075639724731445, "step": 36540 }, { "epoch": 0.44, "learning_rate": 3.4565517493407765e-06, "logits/chosen": -2.820176839828491, "logits/rejected": -2.289287567138672, "logps/chosen": -144.30218505859375, "logps/rejected": -959.2027587890625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.9367614984512329, "rewards/margins": 8.26638412475586, "rewards/rejected": -9.203145980834961, "step": 36550 }, { "epoch": 0.44, "learning_rate": 3.455586534926978e-06, "logits/chosen": -2.8426976203918457, "logits/rejected": -2.341765880584717, "logps/chosen": -149.00868225097656, "logps/rejected": -1002.5013427734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0335098505020142, "rewards/margins": 8.593354225158691, "rewards/rejected": -9.62686538696289, "step": 36560 }, { "epoch": 0.44, "learning_rate": 3.454621153670424e-06, "logits/chosen": -2.8439228534698486, "logits/rejected": -2.4323365688323975, "logps/chosen": -105.16922760009766, "logps/rejected": -923.6375732421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6805343627929688, "rewards/margins": 8.185497283935547, "rewards/rejected": -8.866031646728516, "step": 36570 }, { "epoch": 0.44, "learning_rate": 3.453655605739668e-06, "logits/chosen": -2.8736062049865723, "logits/rejected": -2.289795398712158, "logps/chosen": -128.96170043945312, "logps/rejected": -980.9846801757812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046357035636902, "rewards/margins": 8.61786937713623, "rewards/rejected": -9.422506332397461, "step": 36580 }, { "epoch": 0.44, "learning_rate": 3.4526898913032923e-06, "logits/chosen": -2.800231456756592, "logits/rejected": -2.074232816696167, "logps/chosen": -157.6381378173828, "logps/rejected": -1083.914306640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0593931674957275, "rewards/margins": 9.382204055786133, "rewards/rejected": -10.441597938537598, "step": 36590 }, { "epoch": 0.44, "learning_rate": 3.4517240105299076e-06, "logits/chosen": -2.8396923542022705, "logits/rejected": -2.401840925216675, "logps/chosen": -103.15736389160156, "logps/rejected": -933.0914916992188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5962678790092468, "rewards/margins": 8.34691333770752, "rewards/rejected": -8.943181037902832, "step": 36600 }, { "epoch": 0.44, "learning_rate": 3.450757963588153e-06, "logits/chosen": -2.883495330810547, "logits/rejected": -2.650930881500244, "logps/chosen": -81.46037292480469, "logps/rejected": -770.7631225585938, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.4705498218536377, "rewards/margins": 6.8804521560668945, "rewards/rejected": -7.351001739501953, "step": 36610 }, { "epoch": 0.44, "learning_rate": 3.4497917506467e-06, "logits/chosen": -2.883916139602661, "logits/rejected": -2.406388282775879, "logps/chosen": -125.3983383178711, "logps/rejected": -964.66552734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7978317141532898, "rewards/margins": 8.475751876831055, "rewards/rejected": -9.273584365844727, "step": 36620 }, { "epoch": 0.44, "learning_rate": 3.448825371874245e-06, "logits/chosen": -2.8010287284851074, "logits/rejected": -2.393789291381836, "logps/chosen": -107.1709213256836, "logps/rejected": -852.5953979492188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6737688183784485, "rewards/margins": 7.48907470703125, "rewards/rejected": -8.162843704223633, "step": 36630 }, { "epoch": 0.44, "learning_rate": 3.4478588274395134e-06, "logits/chosen": -2.8077664375305176, "logits/rejected": -2.197887420654297, "logps/chosen": -147.9922332763672, "logps/rejected": -1011.1144409179688, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.9864640235900879, "rewards/margins": 8.75214958190918, "rewards/rejected": -9.738612174987793, "step": 36640 }, { "epoch": 0.44, "learning_rate": 3.4468921175112654e-06, "logits/chosen": -2.836329936981201, "logits/rejected": -2.427905559539795, "logps/chosen": -113.3798599243164, "logps/rejected": -877.6189575195312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6843786835670471, "rewards/margins": 7.711691379547119, "rewards/rejected": -8.39607048034668, "step": 36650 }, { "epoch": 0.44, "learning_rate": 3.445925242258282e-06, "logits/chosen": -2.864978790283203, "logits/rejected": -2.0628232955932617, "logps/chosen": -171.80967712402344, "logps/rejected": -1124.4940185546875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.2140594720840454, "rewards/margins": 9.627952575683594, "rewards/rejected": -10.842012405395508, "step": 36660 }, { "epoch": 0.44, "learning_rate": 3.444958201849379e-06, "logits/chosen": -2.836946487426758, "logits/rejected": -2.3180718421936035, "logps/chosen": -145.9908905029297, "logps/rejected": -1076.7801513671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9836190938949585, "rewards/margins": 9.387557029724121, "rewards/rejected": -10.371175765991211, "step": 36670 }, { "epoch": 0.44, "learning_rate": 3.4439909964533997e-06, "logits/chosen": -2.775926113128662, "logits/rejected": -2.127540111541748, "logps/chosen": -154.12466430664062, "logps/rejected": -1142.38720703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0363188982009888, "rewards/margins": 9.999540328979492, "rewards/rejected": -11.035860061645508, "step": 36680 }, { "epoch": 0.44, "learning_rate": 3.443023626239213e-06, "logits/chosen": -2.7801883220672607, "logits/rejected": -2.10491943359375, "logps/chosen": -187.9136199951172, "logps/rejected": -1064.887451171875, "loss": 0.0233, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3312714099884033, "rewards/margins": 8.908397674560547, "rewards/rejected": -10.23967170715332, "step": 36690 }, { "epoch": 0.44, "learning_rate": 3.4420560913757206e-06, "logits/chosen": -2.8668177127838135, "logits/rejected": -2.5411038398742676, "logps/chosen": -111.2612075805664, "logps/rejected": -855.92822265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7021812796592712, "rewards/margins": 7.480035305023193, "rewards/rejected": -8.18221664428711, "step": 36700 }, { "epoch": 0.44, "learning_rate": 3.4410883920318503e-06, "logits/chosen": -2.8622539043426514, "logits/rejected": -2.3013036251068115, "logps/chosen": -162.10525512695312, "logps/rejected": -936.3160400390625, "loss": 0.0748, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1381356716156006, "rewards/margins": 7.81997537612915, "rewards/rejected": -8.958111763000488, "step": 36710 }, { "epoch": 0.44, "learning_rate": 3.440120528376562e-06, "logits/chosen": -2.8106722831726074, "logits/rejected": -2.450226068496704, "logps/chosen": -135.7144775390625, "logps/rejected": -855.8650512695312, "loss": 0.1046, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9365406036376953, "rewards/margins": 7.248265743255615, "rewards/rejected": -8.184805870056152, "step": 36720 }, { "epoch": 0.44, "learning_rate": 3.4391525005788395e-06, "logits/chosen": -2.8431332111358643, "logits/rejected": -2.189152717590332, "logps/chosen": -158.002197265625, "logps/rejected": -1050.3968505859375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0461139678955078, "rewards/margins": 9.07397174835205, "rewards/rejected": -10.120084762573242, "step": 36730 }, { "epoch": 0.44, "learning_rate": 3.4381843088076995e-06, "logits/chosen": -2.833514451980591, "logits/rejected": -2.171299695968628, "logps/chosen": -162.90628051757812, "logps/rejected": -1067.6038818359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0698531866073608, "rewards/margins": 9.207807540893555, "rewards/rejected": -10.277661323547363, "step": 36740 }, { "epoch": 0.44, "learning_rate": 3.437215953232184e-06, "logits/chosen": -2.868945598602295, "logits/rejected": -2.294175863265991, "logps/chosen": -175.39974975585938, "logps/rejected": -1145.51318359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3062350749969482, "rewards/margins": 9.75031852722168, "rewards/rejected": -11.056553840637207, "step": 36750 }, { "epoch": 0.44, "learning_rate": 3.436247434021367e-06, "logits/chosen": -2.8220417499542236, "logits/rejected": -2.1781675815582275, "logps/chosen": -172.62069702148438, "logps/rejected": -1108.599365234375, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.2281242609024048, "rewards/margins": 9.443177223205566, "rewards/rejected": -10.671302795410156, "step": 36760 }, { "epoch": 0.44, "learning_rate": 3.4352787513443473e-06, "logits/chosen": -2.860922336578369, "logits/rejected": -2.582064390182495, "logps/chosen": -132.24249267578125, "logps/rejected": -947.47265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9208696484565735, "rewards/margins": 8.179600715637207, "rewards/rejected": -9.100470542907715, "step": 36770 }, { "epoch": 0.44, "learning_rate": 3.434309905370256e-06, "logits/chosen": -2.858236074447632, "logits/rejected": -2.473905086517334, "logps/chosen": -143.84555053710938, "logps/rejected": -963.0535278320312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9913387298583984, "rewards/margins": 8.253595352172852, "rewards/rejected": -9.24493408203125, "step": 36780 }, { "epoch": 0.44, "learning_rate": 3.43334089626825e-06, "logits/chosen": -2.8497135639190674, "logits/rejected": -1.9059851169586182, "logps/chosen": -170.14810180664062, "logps/rejected": -1166.0228271484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1498857736587524, "rewards/margins": 10.104586601257324, "rewards/rejected": -11.254472732543945, "step": 36790 }, { "epoch": 0.44, "learning_rate": 3.4323717242075153e-06, "logits/chosen": -2.851292371749878, "logits/rejected": -2.397397518157959, "logps/chosen": -169.8014678955078, "logps/rejected": -890.2318115234375, "loss": 0.1283, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2390519380569458, "rewards/margins": 7.304884433746338, "rewards/rejected": -8.543935775756836, "step": 36800 }, { "epoch": 0.44, "learning_rate": 3.431402389357268e-06, "logits/chosen": -2.82436203956604, "logits/rejected": -2.243095874786377, "logps/chosen": -158.57614135742188, "logps/rejected": -1109.379150390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.0759456157684326, "rewards/margins": 9.621274948120117, "rewards/rejected": -10.697220802307129, "step": 36810 }, { "epoch": 0.44, "learning_rate": 3.4304328918867497e-06, "logits/chosen": -2.8456249237060547, "logits/rejected": -2.2335073947906494, "logps/chosen": -179.10015869140625, "logps/rejected": -1078.2391357421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3246040344238281, "rewards/margins": 9.040021896362305, "rewards/rejected": -10.364625930786133, "step": 36820 }, { "epoch": 0.44, "learning_rate": 3.4294632319652327e-06, "logits/chosen": -2.856685161590576, "logits/rejected": -2.1856391429901123, "logps/chosen": -155.46807861328125, "logps/rejected": -1075.5181884765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.026342511177063, "rewards/margins": 9.343240737915039, "rewards/rejected": -10.369583129882812, "step": 36830 }, { "epoch": 0.44, "learning_rate": 3.428493409762017e-06, "logits/chosen": -2.850433826446533, "logits/rejected": -1.9207611083984375, "logps/chosen": -190.0975799560547, "logps/rejected": -1212.5291748046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3254163265228271, "rewards/margins": 10.376200675964355, "rewards/rejected": -11.701617240905762, "step": 36840 }, { "epoch": 0.44, "learning_rate": 3.427523425446431e-06, "logits/chosen": -2.8508803844451904, "logits/rejected": -2.119231700897217, "logps/chosen": -169.68698120117188, "logps/rejected": -1079.2587890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1689183712005615, "rewards/margins": 9.215152740478516, "rewards/rejected": -10.384072303771973, "step": 36850 }, { "epoch": 0.44, "learning_rate": 3.42655327918783e-06, "logits/chosen": -2.8426291942596436, "logits/rejected": -2.227721929550171, "logps/chosen": -136.6001434326172, "logps/rejected": -887.3712158203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.874710202217102, "rewards/margins": 7.6312408447265625, "rewards/rejected": -8.505950927734375, "step": 36860 }, { "epoch": 0.44, "learning_rate": 3.4255829711556004e-06, "logits/chosen": -2.8421237468719482, "logits/rejected": -2.222240447998047, "logps/chosen": -158.40072631835938, "logps/rejected": -956.6525268554688, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -1.1018126010894775, "rewards/margins": 8.068059921264648, "rewards/rejected": -9.169872283935547, "step": 36870 }, { "epoch": 0.44, "learning_rate": 3.4246125015191555e-06, "logits/chosen": -2.8668179512023926, "logits/rejected": -2.3706817626953125, "logps/chosen": -162.54519653320312, "logps/rejected": -942.2136840820312, "loss": 0.1165, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1651712656021118, "rewards/margins": 7.882822513580322, "rewards/rejected": -9.047994613647461, "step": 36880 }, { "epoch": 0.44, "learning_rate": 3.4236418704479354e-06, "logits/chosen": -2.817279100418091, "logits/rejected": -2.3838963508605957, "logps/chosen": -139.3191375732422, "logps/rejected": -942.728515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9645838737487793, "rewards/margins": 8.082040786743164, "rewards/rejected": -9.046625137329102, "step": 36890 }, { "epoch": 0.44, "learning_rate": 3.422671078111409e-06, "logits/chosen": -2.855189561843872, "logits/rejected": -2.4046289920806885, "logps/chosen": -147.19544982910156, "logps/rejected": -965.5540161132812, "loss": 0.1262, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0445799827575684, "rewards/margins": 8.221362113952637, "rewards/rejected": -9.26594352722168, "step": 36900 }, { "epoch": 0.44, "learning_rate": 3.4217001246790767e-06, "logits/chosen": -2.8130221366882324, "logits/rejected": -2.167325258255005, "logps/chosen": -162.1162567138672, "logps/rejected": -989.9050903320312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.116165041923523, "rewards/margins": 8.382390975952148, "rewards/rejected": -9.498555183410645, "step": 36910 }, { "epoch": 0.44, "learning_rate": 3.4207290103204615e-06, "logits/chosen": -2.8819477558135986, "logits/rejected": -2.5253753662109375, "logps/chosen": -95.20622253417969, "logps/rejected": -847.8458862304688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5699328184127808, "rewards/margins": 7.53845739364624, "rewards/rejected": -8.108390808105469, "step": 36920 }, { "epoch": 0.44, "learning_rate": 3.4197577352051183e-06, "logits/chosen": -2.836547374725342, "logits/rejected": -2.2139103412628174, "logps/chosen": -163.7213897705078, "logps/rejected": -1076.26123046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1745589971542358, "rewards/margins": 9.199063301086426, "rewards/rejected": -10.37362289428711, "step": 36930 }, { "epoch": 0.44, "learning_rate": 3.4187862995026295e-06, "logits/chosen": -2.7903239727020264, "logits/rejected": -2.083303213119507, "logps/chosen": -164.70448303222656, "logps/rejected": -1057.887939453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.183377981185913, "rewards/margins": 9.008482933044434, "rewards/rejected": -10.191861152648926, "step": 36940 }, { "epoch": 0.44, "learning_rate": 3.417814703382604e-06, "logits/chosen": -2.8170571327209473, "logits/rejected": -2.3199305534362793, "logps/chosen": -137.15065002441406, "logps/rejected": -951.615234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9232546091079712, "rewards/margins": 8.212593078613281, "rewards/rejected": -9.135847091674805, "step": 36950 }, { "epoch": 0.44, "learning_rate": 3.416842947014681e-06, "logits/chosen": -2.8428683280944824, "logits/rejected": -2.393014430999756, "logps/chosen": -153.7288360595703, "logps/rejected": -903.2498168945312, "loss": 0.0983, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0879765748977661, "rewards/margins": 7.56795597076416, "rewards/rejected": -8.655932426452637, "step": 36960 }, { "epoch": 0.44, "learning_rate": 3.4158710305685255e-06, "logits/chosen": -2.867140293121338, "logits/rejected": -2.3423123359680176, "logps/chosen": -139.7147979736328, "logps/rejected": -1061.426513671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9391278028488159, "rewards/margins": 9.282011032104492, "rewards/rejected": -10.221138000488281, "step": 36970 }, { "epoch": 0.44, "learning_rate": 3.414898954213832e-06, "logits/chosen": -2.8502910137176514, "logits/rejected": -2.262141227722168, "logps/chosen": -164.19810485839844, "logps/rejected": -1177.5955810546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.154721975326538, "rewards/margins": 10.21773624420166, "rewards/rejected": -11.372457504272461, "step": 36980 }, { "epoch": 0.44, "learning_rate": 3.4139267181203216e-06, "logits/chosen": -2.8362619876861572, "logits/rejected": -2.3153674602508545, "logps/chosen": -139.68258666992188, "logps/rejected": -966.6256103515625, "loss": 0.1319, "rewards/accuracies": 1.0, "rewards/chosen": -0.9542640447616577, "rewards/margins": 8.317359924316406, "rewards/rejected": -9.271625518798828, "step": 36990 }, { "epoch": 0.44, "learning_rate": 3.4129543224577454e-06, "logits/chosen": -2.873162031173706, "logits/rejected": -2.5863430500030518, "logps/chosen": -106.47978210449219, "logps/rejected": -862.8571166992188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6437837481498718, "rewards/margins": 7.607438564300537, "rewards/rejected": -8.251222610473633, "step": 37000 }, { "epoch": 0.44, "learning_rate": 3.41198176739588e-06, "logits/chosen": -2.87031888961792, "logits/rejected": -2.2499477863311768, "logps/chosen": -173.5327911376953, "logps/rejected": -951.3377075195312, "loss": 0.1503, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2902324199676514, "rewards/margins": 7.8246355056762695, "rewards/rejected": -9.114869117736816, "step": 37010 }, { "epoch": 0.44, "learning_rate": 3.4110090531045304e-06, "logits/chosen": -2.8027853965759277, "logits/rejected": -2.1342155933380127, "logps/chosen": -177.85183715820312, "logps/rejected": -1030.2904052734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.298342227935791, "rewards/margins": 8.6133451461792, "rewards/rejected": -9.911687850952148, "step": 37020 }, { "epoch": 0.44, "learning_rate": 3.4100361797535307e-06, "logits/chosen": -2.837830066680908, "logits/rejected": -2.2879798412323, "logps/chosen": -161.057861328125, "logps/rejected": -1058.5484619140625, "loss": 0.0906, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1414910554885864, "rewards/margins": 9.044387817382812, "rewards/rejected": -10.18587875366211, "step": 37030 }, { "epoch": 0.44, "learning_rate": 3.4090631475127416e-06, "logits/chosen": -2.795294761657715, "logits/rejected": -2.2113327980041504, "logps/chosen": -208.89892578125, "logps/rejected": -1015.3359375, "loss": 0.1229, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6125571727752686, "rewards/margins": 8.133808135986328, "rewards/rejected": -9.746365547180176, "step": 37040 }, { "epoch": 0.44, "learning_rate": 3.4080899565520516e-06, "logits/chosen": -2.901883602142334, "logits/rejected": -2.342907667160034, "logps/chosen": -133.1141815185547, "logps/rejected": -986.5653076171875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.8611367344856262, "rewards/margins": 8.612480163574219, "rewards/rejected": -9.473616600036621, "step": 37050 }, { "epoch": 0.44, "learning_rate": 3.4071166070413765e-06, "logits/chosen": -2.85368013381958, "logits/rejected": -2.247753858566284, "logps/chosen": -151.48265075683594, "logps/rejected": -989.06298828125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.9582587480545044, "rewards/margins": 8.545907974243164, "rewards/rejected": -9.504166603088379, "step": 37060 }, { "epoch": 0.44, "learning_rate": 3.4061430991506622e-06, "logits/chosen": -2.844780206680298, "logits/rejected": -2.22023868560791, "logps/chosen": -172.30984497070312, "logps/rejected": -953.97607421875, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.2323758602142334, "rewards/margins": 7.927872657775879, "rewards/rejected": -9.160248756408691, "step": 37070 }, { "epoch": 0.44, "learning_rate": 3.4051694330498792e-06, "logits/chosen": -2.8397841453552246, "logits/rejected": -2.3184397220611572, "logps/chosen": -151.2587432861328, "logps/rejected": -955.0548095703125, "loss": 0.0354, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0227646827697754, "rewards/margins": 8.144929885864258, "rewards/rejected": -9.167694091796875, "step": 37080 }, { "epoch": 0.44, "learning_rate": 3.4041956089090274e-06, "logits/chosen": -2.8444926738739014, "logits/rejected": -2.3297085762023926, "logps/chosen": -142.4736785888672, "logps/rejected": -920.4945068359375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9785359501838684, "rewards/margins": 7.848919868469238, "rewards/rejected": -8.827455520629883, "step": 37090 }, { "epoch": 0.44, "learning_rate": 3.4032216268981338e-06, "logits/chosen": -2.86843204498291, "logits/rejected": -2.4269514083862305, "logps/chosen": -147.62484741210938, "logps/rejected": -901.1911010742188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9865685701370239, "rewards/margins": 7.658916473388672, "rewards/rejected": -8.645484924316406, "step": 37100 }, { "epoch": 0.44, "learning_rate": 3.4022474871872525e-06, "logits/chosen": -2.864847421646118, "logits/rejected": -2.318650245666504, "logps/chosen": -184.23684692382812, "logps/rejected": -1081.0986328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3027948141098022, "rewards/margins": 9.11939525604248, "rewards/rejected": -10.422189712524414, "step": 37110 }, { "epoch": 0.44, "learning_rate": 3.4012731899464653e-06, "logits/chosen": -2.8649609088897705, "logits/rejected": -2.257150173187256, "logps/chosen": -189.05331420898438, "logps/rejected": -1163.150390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3713810443878174, "rewards/margins": 9.858770370483398, "rewards/rejected": -11.230152130126953, "step": 37120 }, { "epoch": 0.44, "learning_rate": 3.4002987353458826e-06, "logits/chosen": -2.7964348793029785, "logits/rejected": -1.7774686813354492, "logps/chosen": -251.2821807861328, "logps/rejected": -1243.4925537109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8537555932998657, "rewards/margins": 10.168207168579102, "rewards/rejected": -12.021963119506836, "step": 37130 }, { "epoch": 0.44, "learning_rate": 3.399324123555642e-06, "logits/chosen": -2.8422982692718506, "logits/rejected": -1.9908653497695923, "logps/chosen": -182.45651245117188, "logps/rejected": -1075.09765625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3096990585327148, "rewards/margins": 9.038368225097656, "rewards/rejected": -10.348066329956055, "step": 37140 }, { "epoch": 0.44, "learning_rate": 3.3983493547459055e-06, "logits/chosen": -2.8172926902770996, "logits/rejected": -2.3869054317474365, "logps/chosen": -160.92433166503906, "logps/rejected": -941.0841064453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1901370286941528, "rewards/margins": 7.839153289794922, "rewards/rejected": -9.029291152954102, "step": 37150 }, { "epoch": 0.44, "learning_rate": 3.397374429086868e-06, "logits/chosen": -2.8144912719726562, "logits/rejected": -2.155468463897705, "logps/chosen": -204.79116821289062, "logps/rejected": -1057.454345703125, "loss": 0.0196, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5416429042816162, "rewards/margins": 8.626935005187988, "rewards/rejected": -10.1685791015625, "step": 37160 }, { "epoch": 0.44, "learning_rate": 3.3963993467487476e-06, "logits/chosen": -2.8611679077148438, "logits/rejected": -2.4284379482269287, "logps/chosen": -124.38871765136719, "logps/rejected": -872.0969848632812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8439159393310547, "rewards/margins": 7.4989519119262695, "rewards/rejected": -8.342867851257324, "step": 37170 }, { "epoch": 0.45, "learning_rate": 3.3954241079017903e-06, "logits/chosen": -2.8115906715393066, "logits/rejected": -1.8265079259872437, "logps/chosen": -229.7362823486328, "logps/rejected": -1212.22021484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6931740045547485, "rewards/margins": 9.9990873336792, "rewards/rejected": -11.6922607421875, "step": 37180 }, { "epoch": 0.45, "learning_rate": 3.394448712716271e-06, "logits/chosen": -2.86883807182312, "logits/rejected": -2.5882203578948975, "logps/chosen": -84.26618194580078, "logps/rejected": -819.1593627929688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4708612561225891, "rewards/margins": 7.366737365722656, "rewards/rejected": -7.837599277496338, "step": 37190 }, { "epoch": 0.45, "learning_rate": 3.393473161362491e-06, "logits/chosen": -2.845336437225342, "logits/rejected": -2.094006299972534, "logps/chosen": -167.60018920898438, "logps/rejected": -905.7068481445312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.1917335987091064, "rewards/margins": 7.49993371963501, "rewards/rejected": -8.691668510437012, "step": 37200 }, { "epoch": 0.45, "learning_rate": 3.392497454010778e-06, "logits/chosen": -2.820033311843872, "logits/rejected": -2.091625452041626, "logps/chosen": -234.64566040039062, "logps/rejected": -1203.345703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.7904478311538696, "rewards/margins": 9.827474594116211, "rewards/rejected": -11.617923736572266, "step": 37210 }, { "epoch": 0.45, "learning_rate": 3.3915215908314884e-06, "logits/chosen": -2.8531227111816406, "logits/rejected": -2.1299920082092285, "logps/chosen": -157.4677734375, "logps/rejected": -1020.4454956054688, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.0759656429290771, "rewards/margins": 8.750085830688477, "rewards/rejected": -9.826051712036133, "step": 37220 }, { "epoch": 0.45, "learning_rate": 3.390545571995005e-06, "logits/chosen": -2.8169095516204834, "logits/rejected": -2.1375157833099365, "logps/chosen": -172.2823944091797, "logps/rejected": -1073.316162109375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2130405902862549, "rewards/margins": 9.136390686035156, "rewards/rejected": -10.349431991577148, "step": 37230 }, { "epoch": 0.45, "learning_rate": 3.389569397671739e-06, "logits/chosen": -2.8220832347869873, "logits/rejected": -2.026562452316284, "logps/chosen": -240.1860809326172, "logps/rejected": -1175.130126953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.7982313632965088, "rewards/margins": 9.549551963806152, "rewards/rejected": -11.347784042358398, "step": 37240 }, { "epoch": 0.45, "learning_rate": 3.388593068032126e-06, "logits/chosen": -2.7854037284851074, "logits/rejected": -2.1691977977752686, "logps/chosen": -197.76953125, "logps/rejected": -1175.5860595703125, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -1.470922589302063, "rewards/margins": 9.88029670715332, "rewards/rejected": -11.35122013092041, "step": 37250 }, { "epoch": 0.45, "learning_rate": 3.387616583246631e-06, "logits/chosen": -2.9013867378234863, "logits/rejected": -2.2824788093566895, "logps/chosen": -203.4356231689453, "logps/rejected": -1077.13134765625, "loss": 0.1649, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5544679164886475, "rewards/margins": 8.817298889160156, "rewards/rejected": -10.371767044067383, "step": 37260 }, { "epoch": 0.45, "learning_rate": 3.3866399434857468e-06, "logits/chosen": -2.8080029487609863, "logits/rejected": -2.1396172046661377, "logps/chosen": -193.79539489746094, "logps/rejected": -1110.9591064453125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.4386932849884033, "rewards/margins": 9.27753734588623, "rewards/rejected": -10.716231346130371, "step": 37270 }, { "epoch": 0.45, "learning_rate": 3.3856631489199898e-06, "logits/chosen": -2.8716466426849365, "logits/rejected": -2.219705581665039, "logps/chosen": -220.3063507080078, "logps/rejected": -1134.672119140625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6463844776153564, "rewards/margins": 9.283571243286133, "rewards/rejected": -10.92995548248291, "step": 37280 }, { "epoch": 0.45, "learning_rate": 3.3846861997199077e-06, "logits/chosen": -2.820046901702881, "logits/rejected": -2.1633050441741943, "logps/chosen": -174.857666015625, "logps/rejected": -1080.2586669921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.251772165298462, "rewards/margins": 9.164637565612793, "rewards/rejected": -10.416410446166992, "step": 37290 }, { "epoch": 0.45, "learning_rate": 3.3837090960560726e-06, "logits/chosen": -2.8031623363494873, "logits/rejected": -2.250466823577881, "logps/chosen": -186.02297973632812, "logps/rejected": -977.6192626953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3751779794692993, "rewards/margins": 8.008598327636719, "rewards/rejected": -9.38377571105957, "step": 37300 }, { "epoch": 0.45, "learning_rate": 3.3827318380990824e-06, "logits/chosen": -2.847055673599243, "logits/rejected": -2.2179534435272217, "logps/chosen": -186.29025268554688, "logps/rejected": -1085.7158203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.364990472793579, "rewards/margins": 9.11182689666748, "rewards/rejected": -10.47681713104248, "step": 37310 }, { "epoch": 0.45, "learning_rate": 3.381754426019566e-06, "logits/chosen": -2.8894829750061035, "logits/rejected": -2.340117931365967, "logps/chosen": -185.4428253173828, "logps/rejected": -1006.5341796875, "loss": 0.1141, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3691304922103882, "rewards/margins": 8.30241870880127, "rewards/rejected": -9.671548843383789, "step": 37320 }, { "epoch": 0.45, "learning_rate": 3.3807768599881765e-06, "logits/chosen": -2.870425224304199, "logits/rejected": -2.2286200523376465, "logps/chosen": -184.2877655029297, "logps/rejected": -1067.5712890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.32205069065094, "rewards/margins": 8.945318222045898, "rewards/rejected": -10.267369270324707, "step": 37330 }, { "epoch": 0.45, "learning_rate": 3.379799140175592e-06, "logits/chosen": -2.8511598110198975, "logits/rejected": -2.193328857421875, "logps/chosen": -191.20477294921875, "logps/rejected": -1122.893310546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.3807135820388794, "rewards/margins": 9.437963485717773, "rewards/rejected": -10.81867790222168, "step": 37340 }, { "epoch": 0.45, "learning_rate": 3.378821266752521e-06, "logits/chosen": -2.8548502922058105, "logits/rejected": -2.100947380065918, "logps/chosen": -185.85842895507812, "logps/rejected": -1180.946044921875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.3681787252426147, "rewards/margins": 10.042994499206543, "rewards/rejected": -11.411171913146973, "step": 37350 }, { "epoch": 0.45, "learning_rate": 3.377843239889699e-06, "logits/chosen": -2.804037570953369, "logits/rejected": -2.184419870376587, "logps/chosen": -252.3379669189453, "logps/rejected": -1163.4068603515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.002274990081787, "rewards/margins": 9.223470687866211, "rewards/rejected": -11.225747108459473, "step": 37360 }, { "epoch": 0.45, "learning_rate": 3.3768650597578846e-06, "logits/chosen": -2.855114459991455, "logits/rejected": -2.533398389816284, "logps/chosen": -112.70194244384766, "logps/rejected": -901.7701416015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7193372249603271, "rewards/margins": 7.923573970794678, "rewards/rejected": -8.642911911010742, "step": 37370 }, { "epoch": 0.45, "learning_rate": 3.375886726527866e-06, "logits/chosen": -2.8383371829986572, "logits/rejected": -2.4185826778411865, "logps/chosen": -163.8059539794922, "logps/rejected": -916.3697509765625, "loss": 0.1419, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2196028232574463, "rewards/margins": 7.563150882720947, "rewards/rejected": -8.782753944396973, "step": 37380 }, { "epoch": 0.45, "learning_rate": 3.3749082403704582e-06, "logits/chosen": -2.7792086601257324, "logits/rejected": -2.318002223968506, "logps/chosen": -158.46640014648438, "logps/rejected": -1034.9031982421875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1434834003448486, "rewards/margins": 8.836118698120117, "rewards/rejected": -9.979601860046387, "step": 37390 }, { "epoch": 0.45, "learning_rate": 3.3739296014565013e-06, "logits/chosen": -2.857485055923462, "logits/rejected": -2.303774356842041, "logps/chosen": -136.49746704101562, "logps/rejected": -1034.327880859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9201390147209167, "rewards/margins": 9.031693458557129, "rewards/rejected": -9.95183277130127, "step": 37400 }, { "epoch": 0.45, "learning_rate": 3.372950809956863e-06, "logits/chosen": -2.8732523918151855, "logits/rejected": -2.389212131500244, "logps/chosen": -169.76475524902344, "logps/rejected": -955.1666259765625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.2780992984771729, "rewards/margins": 7.897047996520996, "rewards/rejected": -9.175146102905273, "step": 37410 }, { "epoch": 0.45, "learning_rate": 3.3719718660424383e-06, "logits/chosen": -2.883265256881714, "logits/rejected": -2.548684597015381, "logps/chosen": -174.24574279785156, "logps/rejected": -906.0628662109375, "loss": 0.1019, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.322542428970337, "rewards/margins": 7.3625078201293945, "rewards/rejected": -8.685049057006836, "step": 37420 }, { "epoch": 0.45, "learning_rate": 3.3709927698841477e-06, "logits/chosen": -2.8646507263183594, "logits/rejected": -2.2768075466156006, "logps/chosen": -183.24295043945312, "logps/rejected": -1044.58251953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3752024173736572, "rewards/margins": 8.680330276489258, "rewards/rejected": -10.055532455444336, "step": 37430 }, { "epoch": 0.45, "learning_rate": 3.3700135216529385e-06, "logits/chosen": -2.838135242462158, "logits/rejected": -2.4185588359832764, "logps/chosen": -138.84536743164062, "logps/rejected": -969.0836181640625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.989843487739563, "rewards/margins": 8.313703536987305, "rewards/rejected": -9.303548812866211, "step": 37440 }, { "epoch": 0.45, "learning_rate": 3.369034121519784e-06, "logits/chosen": -2.8393006324768066, "logits/rejected": -2.1590065956115723, "logps/chosen": -205.73153686523438, "logps/rejected": -1173.4810791015625, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -1.5601732730865479, "rewards/margins": 9.769338607788086, "rewards/rejected": -11.329511642456055, "step": 37450 }, { "epoch": 0.45, "learning_rate": 3.3680545696556868e-06, "logits/chosen": -2.8254289627075195, "logits/rejected": -2.3636891841888428, "logps/chosen": -184.6701202392578, "logps/rejected": -880.9865112304688, "loss": 0.1025, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3847274780273438, "rewards/margins": 7.053018093109131, "rewards/rejected": -8.437746047973633, "step": 37460 }, { "epoch": 0.45, "learning_rate": 3.367074866231671e-06, "logits/chosen": -2.855909824371338, "logits/rejected": -2.1646690368652344, "logps/chosen": -192.58181762695312, "logps/rejected": -1144.5841064453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4071232080459595, "rewards/margins": 9.624700546264648, "rewards/rejected": -11.031824111938477, "step": 37470 }, { "epoch": 0.45, "learning_rate": 3.366095011418793e-06, "logits/chosen": -2.819768190383911, "logits/rejected": -2.4072463512420654, "logps/chosen": -180.50955200195312, "logps/rejected": -987.1419677734375, "loss": 0.096, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3626760244369507, "rewards/margins": 8.118661880493164, "rewards/rejected": -9.48133659362793, "step": 37480 }, { "epoch": 0.45, "learning_rate": 3.3651150053881303e-06, "logits/chosen": -2.8158671855926514, "logits/rejected": -2.058642625808716, "logps/chosen": -168.55780029296875, "logps/rejected": -1075.847412109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.2049674987792969, "rewards/margins": 9.162113189697266, "rewards/rejected": -10.367080688476562, "step": 37490 }, { "epoch": 0.45, "learning_rate": 3.3641348483107915e-06, "logits/chosen": -2.8133769035339355, "logits/rejected": -1.860472321510315, "logps/chosen": -225.34213256835938, "logps/rejected": -1141.8779296875, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": -1.7077754735946655, "rewards/margins": 9.309350967407227, "rewards/rejected": -11.017126083374023, "step": 37500 }, { "epoch": 0.45, "learning_rate": 3.3631545403579075e-06, "logits/chosen": -2.8419718742370605, "logits/rejected": -2.3690061569213867, "logps/chosen": -158.07862854003906, "logps/rejected": -977.06396484375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.1563321352005005, "rewards/margins": 8.216797828674316, "rewards/rejected": -9.373129844665527, "step": 37510 }, { "epoch": 0.45, "learning_rate": 3.3621740817006386e-06, "logits/chosen": -2.788604259490967, "logits/rejected": -1.884639024734497, "logps/chosen": -256.2830505371094, "logps/rejected": -1208.9107666015625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.0268375873565674, "rewards/margins": 9.651142120361328, "rewards/rejected": -11.677978515625, "step": 37520 }, { "epoch": 0.45, "learning_rate": 3.3611934725101698e-06, "logits/chosen": -2.851803779602051, "logits/rejected": -2.0692286491394043, "logps/chosen": -236.87271118164062, "logps/rejected": -1134.8251953125, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.820021390914917, "rewards/margins": 9.139965057373047, "rewards/rejected": -10.959986686706543, "step": 37530 }, { "epoch": 0.45, "learning_rate": 3.360212712957712e-06, "logits/chosen": -2.7884631156921387, "logits/rejected": -2.212432622909546, "logps/chosen": -223.3857879638672, "logps/rejected": -1069.8486328125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.7736804485321045, "rewards/margins": 8.528166770935059, "rewards/rejected": -10.301846504211426, "step": 37540 }, { "epoch": 0.45, "learning_rate": 3.3592318032145037e-06, "logits/chosen": -2.823859453201294, "logits/rejected": -2.3195748329162598, "logps/chosen": -164.23251342773438, "logps/rejected": -1006.3521728515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1979652643203735, "rewards/margins": 8.473116874694824, "rewards/rejected": -9.671083450317383, "step": 37550 }, { "epoch": 0.45, "learning_rate": 3.3582507434518097e-06, "logits/chosen": -2.8281338214874268, "logits/rejected": -2.2576680183410645, "logps/chosen": -189.6920166015625, "logps/rejected": -1099.463623046875, "loss": 0.0906, "rewards/accuracies": 1.0, "rewards/chosen": -1.4338486194610596, "rewards/margins": 9.17713737487793, "rewards/rejected": -10.61098575592041, "step": 37560 }, { "epoch": 0.45, "learning_rate": 3.357269533840919e-06, "logits/chosen": -2.8608505725860596, "logits/rejected": -2.2713723182678223, "logps/chosen": -168.9383087158203, "logps/rejected": -931.1423950195312, "loss": 0.0415, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2613914012908936, "rewards/margins": 7.670156002044678, "rewards/rejected": -8.931547164916992, "step": 37570 }, { "epoch": 0.45, "learning_rate": 3.3562881745531486e-06, "logits/chosen": -2.8630549907684326, "logits/rejected": -2.3283309936523438, "logps/chosen": -189.94427490234375, "logps/rejected": -1173.93603515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4734865427017212, "rewards/margins": 9.875327110290527, "rewards/rejected": -11.348814010620117, "step": 37580 }, { "epoch": 0.45, "learning_rate": 3.3553066657598417e-06, "logits/chosen": -2.832235813140869, "logits/rejected": -2.0770676136016846, "logps/chosen": -274.8958435058594, "logps/rejected": -995.1637573242188, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -2.216031789779663, "rewards/margins": 7.343949317932129, "rewards/rejected": -9.559981346130371, "step": 37590 }, { "epoch": 0.45, "learning_rate": 3.354325007632366e-06, "logits/chosen": -2.85195255279541, "logits/rejected": -2.1803011894226074, "logps/chosen": -180.5742645263672, "logps/rejected": -998.8306884765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3217217922210693, "rewards/margins": 8.288817405700684, "rewards/rejected": -9.610540390014648, "step": 37600 }, { "epoch": 0.45, "learning_rate": 3.3533432003421163e-06, "logits/chosen": -2.8799571990966797, "logits/rejected": -2.481336832046509, "logps/chosen": -132.2906036376953, "logps/rejected": -924.9814453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8987300992012024, "rewards/margins": 7.972906589508057, "rewards/rejected": -8.871637344360352, "step": 37610 }, { "epoch": 0.45, "learning_rate": 3.3523612440605145e-06, "logits/chosen": -2.7932419776916504, "logits/rejected": -2.2392709255218506, "logps/chosen": -133.8256072998047, "logps/rejected": -929.15869140625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8981121778488159, "rewards/margins": 8.02460765838623, "rewards/rejected": -8.922719955444336, "step": 37620 }, { "epoch": 0.45, "learning_rate": 3.3513791389590066e-06, "logits/chosen": -2.8413913249969482, "logits/rejected": -2.2131989002227783, "logps/chosen": -211.9867706298828, "logps/rejected": -1036.06201171875, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -1.6181461811065674, "rewards/margins": 8.355487823486328, "rewards/rejected": -9.973634719848633, "step": 37630 }, { "epoch": 0.45, "learning_rate": 3.3503968852090657e-06, "logits/chosen": -2.837235927581787, "logits/rejected": -2.554138660430908, "logps/chosen": -186.86903381347656, "logps/rejected": -918.20263671875, "loss": 0.1517, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4894622564315796, "rewards/margins": 7.325051307678223, "rewards/rejected": -8.814513206481934, "step": 37640 }, { "epoch": 0.45, "learning_rate": 3.3494144829821905e-06, "logits/chosen": -2.807133197784424, "logits/rejected": -2.077280282974243, "logps/chosen": -198.9453887939453, "logps/rejected": -1020.1403198242188, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.4536798000335693, "rewards/margins": 8.357495307922363, "rewards/rejected": -9.811175346374512, "step": 37650 }, { "epoch": 0.45, "learning_rate": 3.348431932449906e-06, "logits/chosen": -2.850942611694336, "logits/rejected": -2.0721747875213623, "logps/chosen": -205.75228881835938, "logps/rejected": -1063.950927734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.5424429178237915, "rewards/margins": 8.711533546447754, "rewards/rejected": -10.253975868225098, "step": 37660 }, { "epoch": 0.45, "learning_rate": 3.3474492337837622e-06, "logits/chosen": -2.8432509899139404, "logits/rejected": -2.2897000312805176, "logps/chosen": -147.3599090576172, "logps/rejected": -971.1937255859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0201364755630493, "rewards/margins": 8.314386367797852, "rewards/rejected": -9.334522247314453, "step": 37670 }, { "epoch": 0.45, "learning_rate": 3.346466387155336e-06, "logits/chosen": -2.9260354042053223, "logits/rejected": -2.5381016731262207, "logps/chosen": -131.77102661132812, "logps/rejected": -908.52197265625, "loss": 0.0222, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8603798151016235, "rewards/margins": 7.842525482177734, "rewards/rejected": -8.702905654907227, "step": 37680 }, { "epoch": 0.45, "learning_rate": 3.3454833927362295e-06, "logits/chosen": -2.856214761734009, "logits/rejected": -2.3379123210906982, "logps/chosen": -190.6143035888672, "logps/rejected": -1086.576171875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.4026930332183838, "rewards/margins": 9.065936088562012, "rewards/rejected": -10.4686279296875, "step": 37690 }, { "epoch": 0.45, "learning_rate": 3.3445002506980705e-06, "logits/chosen": -2.860781192779541, "logits/rejected": -2.132997989654541, "logps/chosen": -187.08067321777344, "logps/rejected": -1140.199462890625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3749815225601196, "rewards/margins": 9.614248275756836, "rewards/rejected": -10.989229202270508, "step": 37700 }, { "epoch": 0.45, "learning_rate": 3.3435169612125136e-06, "logits/chosen": -2.8209574222564697, "logits/rejected": -2.2529962062835693, "logps/chosen": -159.62892150878906, "logps/rejected": -1075.14501953125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.1168184280395508, "rewards/margins": 9.250612258911133, "rewards/rejected": -10.367430686950684, "step": 37710 }, { "epoch": 0.45, "learning_rate": 3.3425335244512384e-06, "logits/chosen": -2.8526127338409424, "logits/rejected": -2.25978422164917, "logps/chosen": -144.25405883789062, "logps/rejected": -997.9305419921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.0114762783050537, "rewards/margins": 8.578104019165039, "rewards/rejected": -9.589579582214355, "step": 37720 }, { "epoch": 0.45, "learning_rate": 3.34154994058595e-06, "logits/chosen": -2.8144259452819824, "logits/rejected": -2.077706813812256, "logps/chosen": -191.66024780273438, "logps/rejected": -1058.6951904296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.4404869079589844, "rewards/margins": 8.762646675109863, "rewards/rejected": -10.203132629394531, "step": 37730 }, { "epoch": 0.45, "learning_rate": 3.3405662097883784e-06, "logits/chosen": -2.8380656242370605, "logits/rejected": -2.15372896194458, "logps/chosen": -188.95822143554688, "logps/rejected": -1131.9598388671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3381651639938354, "rewards/margins": 9.578546524047852, "rewards/rejected": -10.916711807250977, "step": 37740 }, { "epoch": 0.45, "learning_rate": 3.3395823322302816e-06, "logits/chosen": -2.836702346801758, "logits/rejected": -2.03324294090271, "logps/chosen": -218.6448211669922, "logps/rejected": -1161.016357421875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.7038275003433228, "rewards/margins": 9.496360778808594, "rewards/rejected": -11.200186729431152, "step": 37750 }, { "epoch": 0.45, "learning_rate": 3.338598308083441e-06, "logits/chosen": -2.854738712310791, "logits/rejected": -2.2442376613616943, "logps/chosen": -203.3309783935547, "logps/rejected": -1065.33642578125, "loss": 0.1527, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5092737674713135, "rewards/margins": 8.765810012817383, "rewards/rejected": -10.275083541870117, "step": 37760 }, { "epoch": 0.45, "learning_rate": 3.3376141375196653e-06, "logits/chosen": -2.866654634475708, "logits/rejected": -2.236421585083008, "logps/chosen": -153.78553771972656, "logps/rejected": -973.2664794921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0950486660003662, "rewards/margins": 8.265706062316895, "rewards/rejected": -9.360754013061523, "step": 37770 }, { "epoch": 0.45, "learning_rate": 3.3366298207107882e-06, "logits/chosen": -2.859003782272339, "logits/rejected": -2.225743055343628, "logps/chosen": -153.70413208007812, "logps/rejected": -1012.09033203125, "loss": 0.2164, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0925520658493042, "rewards/margins": 8.638211250305176, "rewards/rejected": -9.73076343536377, "step": 37780 }, { "epoch": 0.45, "learning_rate": 3.3356453578286676e-06, "logits/chosen": -2.8952980041503906, "logits/rejected": -2.2123937606811523, "logps/chosen": -198.561767578125, "logps/rejected": -1211.8763427734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4359365701675415, "rewards/margins": 10.28128433227539, "rewards/rejected": -11.7172212600708, "step": 37790 }, { "epoch": 0.45, "learning_rate": 3.3346607490451878e-06, "logits/chosen": -2.835548162460327, "logits/rejected": -2.2345166206359863, "logps/chosen": -188.62879943847656, "logps/rejected": -1106.967041015625, "loss": 0.0974, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.333290934562683, "rewards/margins": 9.339014053344727, "rewards/rejected": -10.6723051071167, "step": 37800 }, { "epoch": 0.45, "learning_rate": 3.3336759945322593e-06, "logits/chosen": -2.870884656906128, "logits/rejected": -2.344200372695923, "logps/chosen": -179.72763061523438, "logps/rejected": -1144.4945068359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2261393070220947, "rewards/margins": 9.80739974975586, "rewards/rejected": -11.033539772033691, "step": 37810 }, { "epoch": 0.45, "learning_rate": 3.3326910944618184e-06, "logits/chosen": -2.897535800933838, "logits/rejected": -2.295194149017334, "logps/chosen": -125.91993713378906, "logps/rejected": -999.4075927734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8279749155044556, "rewards/margins": 8.769967079162598, "rewards/rejected": -9.597941398620605, "step": 37820 }, { "epoch": 0.45, "learning_rate": 3.331706049005825e-06, "logits/chosen": -2.877483367919922, "logits/rejected": -2.354365587234497, "logps/chosen": -142.02821350097656, "logps/rejected": -957.8323364257812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9651579856872559, "rewards/margins": 8.221487998962402, "rewards/rejected": -9.1866455078125, "step": 37830 }, { "epoch": 0.45, "learning_rate": 3.330720858336265e-06, "logits/chosen": -2.870344638824463, "logits/rejected": -2.3837084770202637, "logps/chosen": -127.95225524902344, "logps/rejected": -1000.3831176757812, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8207968473434448, "rewards/margins": 8.77960205078125, "rewards/rejected": -9.6003999710083, "step": 37840 }, { "epoch": 0.45, "learning_rate": 3.329735522625151e-06, "logits/chosen": -2.791417121887207, "logits/rejected": -2.245215892791748, "logps/chosen": -167.8745574951172, "logps/rejected": -1088.00244140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2217037677764893, "rewards/margins": 9.261517524719238, "rewards/rejected": -10.483221054077148, "step": 37850 }, { "epoch": 0.45, "learning_rate": 3.3287500420445184e-06, "logits/chosen": -2.844200611114502, "logits/rejected": -2.236417770385742, "logps/chosen": -150.23098754882812, "logps/rejected": -987.58251953125, "loss": 0.1328, "rewards/accuracies": 1.0, "rewards/chosen": -1.0253911018371582, "rewards/margins": 8.469950675964355, "rewards/rejected": -9.495341300964355, "step": 37860 }, { "epoch": 0.45, "learning_rate": 3.327764416766431e-06, "logits/chosen": -2.8999040126800537, "logits/rejected": -2.496415138244629, "logps/chosen": -119.69660949707031, "logps/rejected": -949.0554809570312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7948631644248962, "rewards/margins": 8.310214042663574, "rewards/rejected": -9.105077743530273, "step": 37870 }, { "epoch": 0.45, "learning_rate": 3.3267786469629756e-06, "logits/chosen": -2.8497421741485596, "logits/rejected": -2.4188950061798096, "logps/chosen": -140.52694702148438, "logps/rejected": -1025.5499267578125, "loss": 0.1602, "rewards/accuracies": 1.0, "rewards/chosen": -0.9588449597358704, "rewards/margins": 8.915658950805664, "rewards/rejected": -9.874504089355469, "step": 37880 }, { "epoch": 0.45, "learning_rate": 3.3257927328062634e-06, "logits/chosen": -2.829742908477783, "logits/rejected": -2.219223976135254, "logps/chosen": -135.44903564453125, "logps/rejected": -1065.94921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9002478718757629, "rewards/margins": 9.380292892456055, "rewards/rejected": -10.280542373657227, "step": 37890 }, { "epoch": 0.45, "learning_rate": 3.324806674468435e-06, "logits/chosen": -2.8426764011383057, "logits/rejected": -2.3629231452941895, "logps/chosen": -135.6930694580078, "logps/rejected": -914.2861328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9354158639907837, "rewards/margins": 7.833096504211426, "rewards/rejected": -8.768512725830078, "step": 37900 }, { "epoch": 0.45, "learning_rate": 3.323820472121651e-06, "logits/chosen": -2.849186658859253, "logits/rejected": -2.406862258911133, "logps/chosen": -165.1432342529297, "logps/rejected": -1067.690673828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1880466938018799, "rewards/margins": 9.097406387329102, "rewards/rejected": -10.285454750061035, "step": 37910 }, { "epoch": 0.45, "learning_rate": 3.322834125938101e-06, "logits/chosen": -2.8320631980895996, "logits/rejected": -2.366616725921631, "logps/chosen": -146.1759796142578, "logps/rejected": -978.3211669921875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.962289035320282, "rewards/margins": 8.427963256835938, "rewards/rejected": -9.390253067016602, "step": 37920 }, { "epoch": 0.45, "learning_rate": 3.321847636089998e-06, "logits/chosen": -2.8325040340423584, "logits/rejected": -2.1122994422912598, "logps/chosen": -205.1544952392578, "logps/rejected": -1159.4404296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5199475288391113, "rewards/margins": 9.657583236694336, "rewards/rejected": -11.177530288696289, "step": 37930 }, { "epoch": 0.45, "learning_rate": 3.32086100274958e-06, "logits/chosen": -2.8328521251678467, "logits/rejected": -2.2548811435699463, "logps/chosen": -177.9635467529297, "logps/rejected": -1096.7042236328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2905175685882568, "rewards/margins": 9.278111457824707, "rewards/rejected": -10.56862735748291, "step": 37940 }, { "epoch": 0.45, "learning_rate": 3.3198742260891113e-06, "logits/chosen": -2.8623178005218506, "logits/rejected": -2.4199841022491455, "logps/chosen": -162.67909240722656, "logps/rejected": -993.5099487304688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2004643678665161, "rewards/margins": 8.352850914001465, "rewards/rejected": -9.553316116333008, "step": 37950 }, { "epoch": 0.45, "learning_rate": 3.3188873062808795e-06, "logits/chosen": -2.8330419063568115, "logits/rejected": -2.491011142730713, "logps/chosen": -182.70425415039062, "logps/rejected": -968.9791870117188, "loss": 0.1835, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3993116617202759, "rewards/margins": 7.896519660949707, "rewards/rejected": -9.295831680297852, "step": 37960 }, { "epoch": 0.45, "learning_rate": 3.3179002434971983e-06, "logits/chosen": -2.8544647693634033, "logits/rejected": -2.3934874534606934, "logps/chosen": -189.58529663085938, "logps/rejected": -1062.0560302734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4184478521347046, "rewards/margins": 8.805856704711914, "rewards/rejected": -10.22430419921875, "step": 37970 }, { "epoch": 0.45, "learning_rate": 3.316913037910407e-06, "logits/chosen": -2.83414363861084, "logits/rejected": -2.3513872623443604, "logps/chosen": -180.14976501464844, "logps/rejected": -1049.515869140625, "loss": 0.1324, "rewards/accuracies": 1.0, "rewards/chosen": -1.3379911184310913, "rewards/margins": 8.765729904174805, "rewards/rejected": -10.103721618652344, "step": 37980 }, { "epoch": 0.45, "learning_rate": 3.3159256896928672e-06, "logits/chosen": -2.8265767097473145, "logits/rejected": -2.1131913661956787, "logps/chosen": -195.50961303710938, "logps/rejected": -1145.241943359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4656521081924438, "rewards/margins": 9.579296112060547, "rewards/rejected": -11.044947624206543, "step": 37990 }, { "epoch": 0.45, "learning_rate": 3.314938199016968e-06, "logits/chosen": -2.819091320037842, "logits/rejected": -2.336883068084717, "logps/chosen": -180.44403076171875, "logps/rejected": -1077.556396484375, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.373195767402649, "rewards/margins": 9.005842208862305, "rewards/rejected": -10.379037857055664, "step": 38000 }, { "epoch": 0.45, "learning_rate": 3.313950566055124e-06, "logits/chosen": -2.8264400959014893, "logits/rejected": -2.27006196975708, "logps/chosen": -170.71311950683594, "logps/rejected": -1052.0338134765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2705931663513184, "rewards/margins": 8.861085891723633, "rewards/rejected": -10.131677627563477, "step": 38010 }, { "epoch": 0.46, "learning_rate": 3.312962790979771e-06, "logits/chosen": -2.8740127086639404, "logits/rejected": -2.4843554496765137, "logps/chosen": -119.76094818115234, "logps/rejected": -914.7164306640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7731634974479675, "rewards/margins": 7.991721153259277, "rewards/rejected": -8.764884948730469, "step": 38020 }, { "epoch": 0.46, "learning_rate": 3.311974873963372e-06, "logits/chosen": -2.8630897998809814, "logits/rejected": -2.364475727081299, "logps/chosen": -128.35606384277344, "logps/rejected": -979.7291870117188, "loss": 0.0903, "rewards/accuracies": 1.0, "rewards/chosen": -0.8402711749076843, "rewards/margins": 8.565096855163574, "rewards/rejected": -9.405369758605957, "step": 38030 }, { "epoch": 0.46, "learning_rate": 3.3109868151784163e-06, "logits/chosen": -2.7803797721862793, "logits/rejected": -2.1200921535491943, "logps/chosen": -213.6295928955078, "logps/rejected": -1210.032958984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6208398342132568, "rewards/margins": 10.074654579162598, "rewards/rejected": -11.695493698120117, "step": 38040 }, { "epoch": 0.46, "learning_rate": 3.3099986147974145e-06, "logits/chosen": -2.821165084838867, "logits/rejected": -2.237776517868042, "logps/chosen": -202.17236328125, "logps/rejected": -967.673828125, "loss": 0.1467, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5859240293502808, "rewards/margins": 7.703432559967041, "rewards/rejected": -9.289355278015137, "step": 38050 }, { "epoch": 0.46, "learning_rate": 3.3090102729929043e-06, "logits/chosen": -2.842437744140625, "logits/rejected": -2.3076670169830322, "logps/chosen": -157.3260955810547, "logps/rejected": -937.3582763671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1190935373306274, "rewards/margins": 7.863360404968262, "rewards/rejected": -8.982454299926758, "step": 38060 }, { "epoch": 0.46, "learning_rate": 3.3080217899374466e-06, "logits/chosen": -2.8144850730895996, "logits/rejected": -2.3836190700531006, "logps/chosen": -175.92605590820312, "logps/rejected": -1009.6242065429688, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125379085540771, "rewards/margins": 8.385542869567871, "rewards/rejected": -9.698081016540527, "step": 38070 }, { "epoch": 0.46, "learning_rate": 3.30703316580363e-06, "logits/chosen": -2.8670005798339844, "logits/rejected": -2.3307464122772217, "logps/chosen": -178.00167846679688, "logps/rejected": -1147.0911865234375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3171563148498535, "rewards/margins": 9.7545166015625, "rewards/rejected": -11.071672439575195, "step": 38080 }, { "epoch": 0.46, "learning_rate": 3.306044400764063e-06, "logits/chosen": -2.8746280670166016, "logits/rejected": -1.9243500232696533, "logps/chosen": -190.5938262939453, "logps/rejected": -1123.0194091796875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.377722978591919, "rewards/margins": 9.442522048950195, "rewards/rejected": -10.820245742797852, "step": 38090 }, { "epoch": 0.46, "learning_rate": 3.3050554949913827e-06, "logits/chosen": -2.808685779571533, "logits/rejected": -1.959139108657837, "logps/chosen": -242.87142944335938, "logps/rejected": -1109.288818359375, "loss": 0.2755, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8301271200180054, "rewards/margins": 8.853950500488281, "rewards/rejected": -10.684078216552734, "step": 38100 }, { "epoch": 0.46, "learning_rate": 3.3040664486582493e-06, "logits/chosen": -2.839256525039673, "logits/rejected": -2.3712573051452637, "logps/chosen": -143.3567352294922, "logps/rejected": -993.0232543945312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9631635546684265, "rewards/margins": 8.58248519897461, "rewards/rejected": -9.545649528503418, "step": 38110 }, { "epoch": 0.46, "learning_rate": 3.3030772619373463e-06, "logits/chosen": -2.8327107429504395, "logits/rejected": -2.374788999557495, "logps/chosen": -159.89456176757812, "logps/rejected": -935.4528198242188, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.165673851966858, "rewards/margins": 7.801817893981934, "rewards/rejected": -8.96749210357666, "step": 38120 }, { "epoch": 0.46, "learning_rate": 3.302087935001384e-06, "logits/chosen": -2.8421003818511963, "logits/rejected": -2.052978277206421, "logps/chosen": -173.22872924804688, "logps/rejected": -1124.923828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2493891716003418, "rewards/margins": 9.606374740600586, "rewards/rejected": -10.855762481689453, "step": 38130 }, { "epoch": 0.46, "learning_rate": 3.301098468023096e-06, "logits/chosen": -2.839728832244873, "logits/rejected": -2.2926905155181885, "logps/chosen": -134.0223388671875, "logps/rejected": -899.3200073242188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.917587399482727, "rewards/margins": 7.6964898109436035, "rewards/rejected": -8.614076614379883, "step": 38140 }, { "epoch": 0.46, "learning_rate": 3.3001088611752408e-06, "logits/chosen": -2.845820665359497, "logits/rejected": -2.336927890777588, "logps/chosen": -173.41444396972656, "logps/rejected": -1064.4361572265625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.253630518913269, "rewards/margins": 9.006935119628906, "rewards/rejected": -10.260566711425781, "step": 38150 }, { "epoch": 0.46, "learning_rate": 3.2991191146306008e-06, "logits/chosen": -2.81562876701355, "logits/rejected": -2.3125343322753906, "logps/chosen": -134.43484497070312, "logps/rejected": -1017.1101684570312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9166097640991211, "rewards/margins": 8.873673439025879, "rewards/rejected": -9.790284156799316, "step": 38160 }, { "epoch": 0.46, "learning_rate": 3.2981292285619826e-06, "logits/chosen": -2.849085569381714, "logits/rejected": -2.4900879859924316, "logps/chosen": -120.83164978027344, "logps/rejected": -900.66552734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7874013185501099, "rewards/margins": 7.8515825271606445, "rewards/rejected": -8.638982772827148, "step": 38170 }, { "epoch": 0.46, "learning_rate": 3.297139203142218e-06, "logits/chosen": -2.866487741470337, "logits/rejected": -2.39050555229187, "logps/chosen": -131.42747497558594, "logps/rejected": -976.2529296875, "loss": 0.0954, "rewards/accuracies": 1.0, "rewards/chosen": -0.8786888122558594, "rewards/margins": 8.499889373779297, "rewards/rejected": -9.378578186035156, "step": 38180 }, { "epoch": 0.46, "learning_rate": 3.296149038544162e-06, "logits/chosen": -2.843073606491089, "logits/rejected": -2.26769757270813, "logps/chosen": -193.3769989013672, "logps/rejected": -1039.0594482421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4071271419525146, "rewards/margins": 8.609115600585938, "rewards/rejected": -10.016242980957031, "step": 38190 }, { "epoch": 0.46, "learning_rate": 3.2951587349406956e-06, "logits/chosen": -2.8673298358917236, "logits/rejected": -2.1972389221191406, "logps/chosen": -160.40634155273438, "logps/rejected": -969.7678833007812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.086795687675476, "rewards/margins": 8.231858253479004, "rewards/rejected": -9.318655014038086, "step": 38200 }, { "epoch": 0.46, "learning_rate": 3.2941682925047223e-06, "logits/chosen": -2.828770160675049, "logits/rejected": -2.1178035736083984, "logps/chosen": -196.97549438476562, "logps/rejected": -1027.327392578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3944034576416016, "rewards/margins": 8.483818054199219, "rewards/rejected": -9.878222465515137, "step": 38210 }, { "epoch": 0.46, "learning_rate": 3.2931777114091705e-06, "logits/chosen": -2.849170446395874, "logits/rejected": -2.2303404808044434, "logps/chosen": -183.9539794921875, "logps/rejected": -1186.6407470703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.379216194152832, "rewards/margins": 10.091268539428711, "rewards/rejected": -11.470483779907227, "step": 38220 }, { "epoch": 0.46, "learning_rate": 3.2921869918269934e-06, "logits/chosen": -2.8912193775177, "logits/rejected": -2.4408202171325684, "logps/chosen": -114.85723876953125, "logps/rejected": -900.228515625, "loss": 0.098, "rewards/accuracies": 1.0, "rewards/chosen": -0.7381735444068909, "rewards/margins": 7.896505832672119, "rewards/rejected": -8.634679794311523, "step": 38230 }, { "epoch": 0.46, "learning_rate": 3.291196133931168e-06, "logits/chosen": -2.8161158561706543, "logits/rejected": -2.0820415019989014, "logps/chosen": -204.81546020507812, "logps/rejected": -1205.21630859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5477204322814941, "rewards/margins": 10.097986221313477, "rewards/rejected": -11.645706176757812, "step": 38240 }, { "epoch": 0.46, "learning_rate": 3.2902051378946952e-06, "logits/chosen": -2.86834454536438, "logits/rejected": -2.1944522857666016, "logps/chosen": -184.39202880859375, "logps/rejected": -980.4841918945312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4040024280548096, "rewards/margins": 8.016641616821289, "rewards/rejected": -9.420642852783203, "step": 38250 }, { "epoch": 0.46, "learning_rate": 3.2892140038905995e-06, "logits/chosen": -2.839717149734497, "logits/rejected": -2.265815019607544, "logps/chosen": -121.38138580322266, "logps/rejected": -1059.0040283203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7868622541427612, "rewards/margins": 9.421369552612305, "rewards/rejected": -10.208230972290039, "step": 38260 }, { "epoch": 0.46, "learning_rate": 3.2882227320919315e-06, "logits/chosen": -2.848998546600342, "logits/rejected": -2.266934394836426, "logps/chosen": -185.99661254882812, "logps/rejected": -1100.625244140625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.3881409168243408, "rewards/margins": 9.203752517700195, "rewards/rejected": -10.591893196105957, "step": 38270 }, { "epoch": 0.46, "learning_rate": 3.2872313226717627e-06, "logits/chosen": -2.8435778617858887, "logits/rejected": -2.2976737022399902, "logps/chosen": -193.16287231445312, "logps/rejected": -1057.8935546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4512522220611572, "rewards/margins": 8.721263885498047, "rewards/rejected": -10.172517776489258, "step": 38280 }, { "epoch": 0.46, "learning_rate": 3.2862397758031917e-06, "logits/chosen": -2.8711016178131104, "logits/rejected": -2.4020392894744873, "logps/chosen": -131.6126251220703, "logps/rejected": -977.1819458007812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9051011800765991, "rewards/margins": 8.469156265258789, "rewards/rejected": -9.374258041381836, "step": 38290 }, { "epoch": 0.46, "learning_rate": 3.2852480916593397e-06, "logits/chosen": -2.84281587600708, "logits/rejected": -2.1949641704559326, "logps/chosen": -167.7205352783203, "logps/rejected": -1069.827880859375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.1964302062988281, "rewards/margins": 9.112261772155762, "rewards/rejected": -10.30869197845459, "step": 38300 }, { "epoch": 0.46, "learning_rate": 3.2842562704133517e-06, "logits/chosen": -2.8712844848632812, "logits/rejected": -2.3775582313537598, "logps/chosen": -120.7071304321289, "logps/rejected": -887.9185791015625, "loss": 0.1582, "rewards/accuracies": 1.0, "rewards/chosen": -0.7986602783203125, "rewards/margins": 7.6961541175842285, "rewards/rejected": -8.494813919067383, "step": 38310 }, { "epoch": 0.46, "learning_rate": 3.283264312238398e-06, "logits/chosen": -2.8425958156585693, "logits/rejected": -2.431464433670044, "logps/chosen": -141.5484161376953, "logps/rejected": -939.7498779296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9809405207633972, "rewards/margins": 8.037649154663086, "rewards/rejected": -9.018588066101074, "step": 38320 }, { "epoch": 0.46, "learning_rate": 3.2822722173076693e-06, "logits/chosen": -2.8181517124176025, "logits/rejected": -2.4236183166503906, "logps/chosen": -145.60848999023438, "logps/rejected": -919.8638916015625, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -1.042267084121704, "rewards/margins": 7.762246608734131, "rewards/rejected": -8.804513931274414, "step": 38330 }, { "epoch": 0.46, "learning_rate": 3.2812799857943843e-06, "logits/chosen": -2.881762742996216, "logits/rejected": -2.288318157196045, "logps/chosen": -132.9899139404297, "logps/rejected": -965.8580932617188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8691131472587585, "rewards/margins": 8.399892807006836, "rewards/rejected": -9.26900577545166, "step": 38340 }, { "epoch": 0.46, "learning_rate": 3.2802876178717846e-06, "logits/chosen": -2.845921516418457, "logits/rejected": -2.353595018386841, "logps/chosen": -170.89820861816406, "logps/rejected": -1030.341064453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2143570184707642, "rewards/margins": 8.685748100280762, "rewards/rejected": -9.900105476379395, "step": 38350 }, { "epoch": 0.46, "learning_rate": 3.2792951137131336e-06, "logits/chosen": -2.7781472206115723, "logits/rejected": -2.091362237930298, "logps/chosen": -184.41671752929688, "logps/rejected": -1074.4912109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.3480827808380127, "rewards/margins": 8.996675491333008, "rewards/rejected": -10.344758987426758, "step": 38360 }, { "epoch": 0.46, "learning_rate": 3.27830247349172e-06, "logits/chosen": -2.895902395248413, "logits/rejected": -2.271732807159424, "logps/chosen": -148.500732421875, "logps/rejected": -1088.1019287109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0188570022583008, "rewards/margins": 9.469999313354492, "rewards/rejected": -10.488856315612793, "step": 38370 }, { "epoch": 0.46, "learning_rate": 3.277309697380856e-06, "logits/chosen": -2.806445598602295, "logits/rejected": -2.111063241958618, "logps/chosen": -192.3894805908203, "logps/rejected": -1094.9921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4161300659179688, "rewards/margins": 9.137401580810547, "rewards/rejected": -10.553532600402832, "step": 38380 }, { "epoch": 0.46, "learning_rate": 3.276316785553878e-06, "logits/chosen": -2.8312454223632812, "logits/rejected": -2.3334524631500244, "logps/chosen": -154.41213989257812, "logps/rejected": -973.4791259765625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.0713098049163818, "rewards/margins": 8.295854568481445, "rewards/rejected": -9.367164611816406, "step": 38390 }, { "epoch": 0.46, "learning_rate": 3.2753237381841455e-06, "logits/chosen": -2.8046693801879883, "logits/rejected": -2.291623830795288, "logps/chosen": -182.42034912109375, "logps/rejected": -1071.177001953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3252242803573608, "rewards/margins": 8.993574142456055, "rewards/rejected": -10.31879997253418, "step": 38400 }, { "epoch": 0.46, "learning_rate": 3.274330555445042e-06, "logits/chosen": -2.8398828506469727, "logits/rejected": -2.176948308944702, "logps/chosen": -155.66485595703125, "logps/rejected": -1107.6475830078125, "loss": 0.1161, "rewards/accuracies": 1.0, "rewards/chosen": -1.0836155414581299, "rewards/margins": 9.605941772460938, "rewards/rejected": -10.689558029174805, "step": 38410 }, { "epoch": 0.46, "learning_rate": 3.273337237509973e-06, "logits/chosen": -2.8361313343048096, "logits/rejected": -2.3952059745788574, "logps/chosen": -134.92623901367188, "logps/rejected": -954.7861328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9162739515304565, "rewards/margins": 8.252683639526367, "rewards/rejected": -9.16895866394043, "step": 38420 }, { "epoch": 0.46, "learning_rate": 3.2723437845523714e-06, "logits/chosen": -2.8518123626708984, "logits/rejected": -2.438477039337158, "logps/chosen": -137.322509765625, "logps/rejected": -964.728515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9577625393867493, "rewards/margins": 8.292634963989258, "rewards/rejected": -9.250397682189941, "step": 38430 }, { "epoch": 0.46, "learning_rate": 3.2713501967456896e-06, "logits/chosen": -2.8468971252441406, "logits/rejected": -2.286332607269287, "logps/chosen": -180.71922302246094, "logps/rejected": -999.5140380859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.362347960472107, "rewards/margins": 8.232303619384766, "rewards/rejected": -9.59465217590332, "step": 38440 }, { "epoch": 0.46, "learning_rate": 3.270356474263405e-06, "logits/chosen": -2.843923807144165, "logits/rejected": -2.2538509368896484, "logps/chosen": -183.22372436523438, "logps/rejected": -1173.0108642578125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3138766288757324, "rewards/margins": 10.005029678344727, "rewards/rejected": -11.318906784057617, "step": 38450 }, { "epoch": 0.46, "learning_rate": 3.269362617279021e-06, "logits/chosen": -2.822787284851074, "logits/rejected": -2.1807243824005127, "logps/chosen": -180.57164001464844, "logps/rejected": -1084.151123046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.319414734840393, "rewards/margins": 9.123745918273926, "rewards/rejected": -10.443161964416504, "step": 38460 }, { "epoch": 0.46, "learning_rate": 3.26836862596606e-06, "logits/chosen": -2.9093940258026123, "logits/rejected": -2.4919791221618652, "logps/chosen": -146.55160522460938, "logps/rejected": -892.7267456054688, "loss": 0.1012, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0001555681228638, "rewards/margins": 7.554490089416504, "rewards/rejected": -8.554646492004395, "step": 38470 }, { "epoch": 0.46, "learning_rate": 3.267374500498071e-06, "logits/chosen": -2.866746425628662, "logits/rejected": -2.4081625938415527, "logps/chosen": -124.97647857666016, "logps/rejected": -974.0179443359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8315127491950989, "rewards/margins": 8.530570983886719, "rewards/rejected": -9.362085342407227, "step": 38480 }, { "epoch": 0.46, "learning_rate": 3.2663802410486256e-06, "logits/chosen": -2.7785732746124268, "logits/rejected": -2.1056504249572754, "logps/chosen": -206.31741333007812, "logps/rejected": -1214.216796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5402960777282715, "rewards/margins": 10.199732780456543, "rewards/rejected": -11.740030288696289, "step": 38490 }, { "epoch": 0.46, "learning_rate": 3.265385847791319e-06, "logits/chosen": -2.8744773864746094, "logits/rejected": -2.334179162979126, "logps/chosen": -148.1166534423828, "logps/rejected": -1036.163330078125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0264496803283691, "rewards/margins": 8.934895515441895, "rewards/rejected": -9.961345672607422, "step": 38500 }, { "epoch": 0.46, "learning_rate": 3.264391320899769e-06, "logits/chosen": -2.792996644973755, "logits/rejected": -2.305640935897827, "logps/chosen": -214.76217651367188, "logps/rejected": -1124.279052734375, "loss": 0.0831, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6654794216156006, "rewards/margins": 9.18052864074707, "rewards/rejected": -10.846007347106934, "step": 38510 }, { "epoch": 0.46, "learning_rate": 3.263396660547617e-06, "logits/chosen": -2.8526573181152344, "logits/rejected": -2.4265401363372803, "logps/chosen": -139.08377075195312, "logps/rejected": -952.0827026367188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9488943815231323, "rewards/margins": 8.194025993347168, "rewards/rejected": -9.142919540405273, "step": 38520 }, { "epoch": 0.46, "learning_rate": 3.262401866908529e-06, "logits/chosen": -2.843369722366333, "logits/rejected": -2.281137704849243, "logps/chosen": -169.12533569335938, "logps/rejected": -1098.6998291015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1944589614868164, "rewards/margins": 9.400899887084961, "rewards/rejected": -10.595357894897461, "step": 38530 }, { "epoch": 0.46, "learning_rate": 3.261406940156191e-06, "logits/chosen": -2.814697742462158, "logits/rejected": -2.333199977874756, "logps/chosen": -145.10218811035156, "logps/rejected": -991.1253051757812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9998713731765747, "rewards/margins": 8.520025253295898, "rewards/rejected": -9.5198974609375, "step": 38540 }, { "epoch": 0.46, "learning_rate": 3.260411880464317e-06, "logits/chosen": -2.877378225326538, "logits/rejected": -2.3272602558135986, "logps/chosen": -177.339599609375, "logps/rejected": -1113.7333984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2645939588546753, "rewards/margins": 9.47540283203125, "rewards/rejected": -10.739995956420898, "step": 38550 }, { "epoch": 0.46, "learning_rate": 3.259416688006641e-06, "logits/chosen": -2.8845736980438232, "logits/rejected": -2.48005747795105, "logps/chosen": -135.7949981689453, "logps/rejected": -939.9142456054688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9484812021255493, "rewards/margins": 8.085047721862793, "rewards/rejected": -9.033529281616211, "step": 38560 }, { "epoch": 0.46, "learning_rate": 3.2584213629569196e-06, "logits/chosen": -2.86370587348938, "logits/rejected": -2.3255410194396973, "logps/chosen": -153.83554077148438, "logps/rejected": -1104.6339111328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.031256914138794, "rewards/margins": 9.606647491455078, "rewards/rejected": -10.63790512084961, "step": 38570 }, { "epoch": 0.46, "learning_rate": 3.257425905488935e-06, "logits/chosen": -2.7741189002990723, "logits/rejected": -2.166647434234619, "logps/chosen": -140.3098602294922, "logps/rejected": -1091.4249267578125, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -0.9211140871047974, "rewards/margins": 9.603761672973633, "rewards/rejected": -10.524876594543457, "step": 38580 }, { "epoch": 0.46, "learning_rate": 3.2564303157764914e-06, "logits/chosen": -2.7917568683624268, "logits/rejected": -2.260925531387329, "logps/chosen": -181.0507354736328, "logps/rejected": -1016.1554565429688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3293195962905884, "rewards/margins": 8.432604789733887, "rewards/rejected": -9.76192569732666, "step": 38590 }, { "epoch": 0.46, "learning_rate": 3.2554345939934153e-06, "logits/chosen": -2.8863232135772705, "logits/rejected": -2.1729276180267334, "logps/chosen": -147.53268432617188, "logps/rejected": -1025.69091796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9920374751091003, "rewards/margins": 8.866601943969727, "rewards/rejected": -9.858638763427734, "step": 38600 }, { "epoch": 0.46, "learning_rate": 3.254438740313557e-06, "logits/chosen": -2.8373985290527344, "logits/rejected": -2.258939743041992, "logps/chosen": -175.53976440429688, "logps/rejected": -1041.1888427734375, "loss": 0.1436, "rewards/accuracies": 1.0, "rewards/chosen": -1.2407658100128174, "rewards/margins": 8.782329559326172, "rewards/rejected": -10.023096084594727, "step": 38610 }, { "epoch": 0.46, "learning_rate": 3.2534427549107907e-06, "logits/chosen": -2.8256611824035645, "logits/rejected": -2.23762583732605, "logps/chosen": -174.77745056152344, "logps/rejected": -1119.12060546875, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2652688026428223, "rewards/margins": 9.543596267700195, "rewards/rejected": -10.808863639831543, "step": 38620 }, { "epoch": 0.46, "learning_rate": 3.252446637959011e-06, "logits/chosen": -2.827545166015625, "logits/rejected": -2.1787333488464355, "logps/chosen": -181.06936645507812, "logps/rejected": -1115.0269775390625, "loss": 0.1199, "rewards/accuracies": 1.0, "rewards/chosen": -1.2443087100982666, "rewards/margins": 9.510854721069336, "rewards/rejected": -10.755165100097656, "step": 38630 }, { "epoch": 0.46, "learning_rate": 3.2514503896321386e-06, "logits/chosen": -2.7755463123321533, "logits/rejected": -2.0606942176818848, "logps/chosen": -191.927490234375, "logps/rejected": -1113.945068359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.373410940170288, "rewards/margins": 9.393658638000488, "rewards/rejected": -10.767068862915039, "step": 38640 }, { "epoch": 0.46, "learning_rate": 3.250454010104115e-06, "logits/chosen": -2.8367056846618652, "logits/rejected": -2.179719924926758, "logps/chosen": -192.35159301757812, "logps/rejected": -1037.2257080078125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.4121711254119873, "rewards/margins": 8.571943283081055, "rewards/rejected": -9.984115600585938, "step": 38650 }, { "epoch": 0.46, "learning_rate": 3.2494574995489066e-06, "logits/chosen": -2.7904250621795654, "logits/rejected": -2.116455554962158, "logps/chosen": -173.59934997558594, "logps/rejected": -1036.6094970703125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.2603707313537598, "rewards/margins": 8.706210136413574, "rewards/rejected": -9.966580390930176, "step": 38660 }, { "epoch": 0.46, "learning_rate": 3.248460858140499e-06, "logits/chosen": -2.8302738666534424, "logits/rejected": -2.0901036262512207, "logps/chosen": -177.74977111816406, "logps/rejected": -1253.2601318359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2597239017486572, "rewards/margins": 10.87623405456543, "rewards/rejected": -12.135958671569824, "step": 38670 }, { "epoch": 0.46, "learning_rate": 3.2474640860529046e-06, "logits/chosen": -2.8081717491149902, "logits/rejected": -2.1922214031219482, "logps/chosen": -165.735595703125, "logps/rejected": -1167.968017578125, "loss": 0.1237, "rewards/accuracies": 1.0, "rewards/chosen": -1.207684874534607, "rewards/margins": 10.072526931762695, "rewards/rejected": -11.280210494995117, "step": 38680 }, { "epoch": 0.46, "learning_rate": 3.2464671834601572e-06, "logits/chosen": -2.8088386058807373, "logits/rejected": -2.139158248901367, "logps/chosen": -167.62771606445312, "logps/rejected": -1154.12548828125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.1896151304244995, "rewards/margins": 9.960565567016602, "rewards/rejected": -11.15018081665039, "step": 38690 }, { "epoch": 0.46, "learning_rate": 3.245470150536313e-06, "logits/chosen": -2.8768928050994873, "logits/rejected": -2.5913548469543457, "logps/chosen": -102.98063659667969, "logps/rejected": -875.9129028320312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6672395467758179, "rewards/margins": 7.722010612487793, "rewards/rejected": -8.389249801635742, "step": 38700 }, { "epoch": 0.46, "learning_rate": 3.24447298745545e-06, "logits/chosen": -2.874873638153076, "logits/rejected": -2.23667049407959, "logps/chosen": -169.3359832763672, "logps/rejected": -1115.27783203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.215749979019165, "rewards/margins": 9.541158676147461, "rewards/rejected": -10.75690746307373, "step": 38710 }, { "epoch": 0.46, "learning_rate": 3.2434756943916724e-06, "logits/chosen": -2.8499562740325928, "logits/rejected": -2.1867947578430176, "logps/chosen": -216.2727813720703, "logps/rejected": -1165.6529541015625, "loss": 0.0554, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6413885354995728, "rewards/margins": 9.621805191040039, "rewards/rejected": -11.263193130493164, "step": 38720 }, { "epoch": 0.46, "learning_rate": 3.2424782715191024e-06, "logits/chosen": -2.8901517391204834, "logits/rejected": -2.471160411834717, "logps/chosen": -120.90205383300781, "logps/rejected": -962.6419677734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7778642177581787, "rewards/margins": 8.46617603302002, "rewards/rejected": -9.244038581848145, "step": 38730 }, { "epoch": 0.46, "learning_rate": 3.241480719011888e-06, "logits/chosen": -2.8519980907440186, "logits/rejected": -2.148127794265747, "logps/chosen": -207.45614624023438, "logps/rejected": -1285.0008544921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5661485195159912, "rewards/margins": 10.869616508483887, "rewards/rejected": -12.43576431274414, "step": 38740 }, { "epoch": 0.46, "learning_rate": 3.2404830370442e-06, "logits/chosen": -2.8350281715393066, "logits/rejected": -2.232499361038208, "logps/chosen": -226.51669311523438, "logps/rejected": -1098.9273681640625, "loss": 0.3207, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.7396043539047241, "rewards/margins": 8.862207412719727, "rewards/rejected": -10.601812362670898, "step": 38750 }, { "epoch": 0.46, "learning_rate": 3.2394852257902302e-06, "logits/chosen": -2.847224712371826, "logits/rejected": -2.356827974319458, "logps/chosen": -147.11801147460938, "logps/rejected": -1066.086669921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0025920867919922, "rewards/margins": 9.264852523803711, "rewards/rejected": -10.26744556427002, "step": 38760 }, { "epoch": 0.46, "learning_rate": 3.238487285424194e-06, "logits/chosen": -2.8597331047058105, "logits/rejected": -2.1664795875549316, "logps/chosen": -180.9124755859375, "logps/rejected": -1148.85107421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.325637936592102, "rewards/margins": 9.751836776733398, "rewards/rejected": -11.077474594116211, "step": 38770 }, { "epoch": 0.46, "learning_rate": 3.2374892161203293e-06, "logits/chosen": -2.8471527099609375, "logits/rejected": -2.2085442543029785, "logps/chosen": -166.6773223876953, "logps/rejected": -1048.363525390625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.1577298641204834, "rewards/margins": 8.926097869873047, "rewards/rejected": -10.083827018737793, "step": 38780 }, { "epoch": 0.46, "learning_rate": 3.236491018052895e-06, "logits/chosen": -2.8260488510131836, "logits/rejected": -2.165944814682007, "logps/chosen": -211.987060546875, "logps/rejected": -1033.1925048828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5746046304702759, "rewards/margins": 8.348493576049805, "rewards/rejected": -9.923095703125, "step": 38790 }, { "epoch": 0.46, "learning_rate": 3.2354926913961747e-06, "logits/chosen": -2.8319737911224365, "logits/rejected": -2.4942054748535156, "logps/chosen": -138.64111328125, "logps/rejected": -869.3352661132812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9715455770492554, "rewards/margins": 7.339973449707031, "rewards/rejected": -8.311518669128418, "step": 38800 }, { "epoch": 0.46, "learning_rate": 3.234494236324473e-06, "logits/chosen": -2.8895621299743652, "logits/rejected": -2.3740267753601074, "logps/chosen": -136.10147094726562, "logps/rejected": -983.0218505859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9106643795967102, "rewards/margins": 8.538064956665039, "rewards/rejected": -9.448729515075684, "step": 38810 }, { "epoch": 0.46, "learning_rate": 3.2334956530121188e-06, "logits/chosen": -2.8242104053497314, "logits/rejected": -2.297992706298828, "logps/chosen": -171.23165893554688, "logps/rejected": -1076.002685546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2635278701782227, "rewards/margins": 9.104805946350098, "rewards/rejected": -10.36833381652832, "step": 38820 }, { "epoch": 0.46, "learning_rate": 3.2324969416334593e-06, "logits/chosen": -2.923633575439453, "logits/rejected": -2.483940362930298, "logps/chosen": -121.9642562866211, "logps/rejected": -870.2800903320312, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.775212287902832, "rewards/margins": 7.559614658355713, "rewards/rejected": -8.334827423095703, "step": 38830 }, { "epoch": 0.46, "learning_rate": 3.231498102362869e-06, "logits/chosen": -2.878755569458008, "logits/rejected": -2.461824417114258, "logps/chosen": -125.84004211425781, "logps/rejected": -987.1814575195312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8137100338935852, "rewards/margins": 8.660833358764648, "rewards/rejected": -9.474542617797852, "step": 38840 }, { "epoch": 0.47, "learning_rate": 3.2304991353747415e-06, "logits/chosen": -2.8341867923736572, "logits/rejected": -2.3713645935058594, "logps/chosen": -171.56092834472656, "logps/rejected": -1106.84423828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2151883840560913, "rewards/margins": 9.449601173400879, "rewards/rejected": -10.664789199829102, "step": 38850 }, { "epoch": 0.47, "learning_rate": 3.2295000408434935e-06, "logits/chosen": -2.8316264152526855, "logits/rejected": -2.215506076812744, "logps/chosen": -189.87283325195312, "logps/rejected": -1100.926513671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3852264881134033, "rewards/margins": 9.221176147460938, "rewards/rejected": -10.606404304504395, "step": 38860 }, { "epoch": 0.47, "learning_rate": 3.2285008189435645e-06, "logits/chosen": -2.8861706256866455, "logits/rejected": -2.540897846221924, "logps/chosen": -116.09754943847656, "logps/rejected": -960.6950073242188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7531237006187439, "rewards/margins": 8.476908683776855, "rewards/rejected": -9.230031967163086, "step": 38870 }, { "epoch": 0.47, "learning_rate": 3.2275014698494167e-06, "logits/chosen": -2.842136859893799, "logits/rejected": -2.470824956893921, "logps/chosen": -150.57870483398438, "logps/rejected": -1020.1943359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0222562551498413, "rewards/margins": 8.784614562988281, "rewards/rejected": -9.806870460510254, "step": 38880 }, { "epoch": 0.47, "learning_rate": 3.226501993735532e-06, "logits/chosen": -2.844466209411621, "logits/rejected": -2.581578493118286, "logps/chosen": -134.06393432617188, "logps/rejected": -796.3292846679688, "loss": 0.1885, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.989175021648407, "rewards/margins": 6.609312534332275, "rewards/rejected": -7.59848690032959, "step": 38890 }, { "epoch": 0.47, "learning_rate": 3.225502390776417e-06, "logits/chosen": -2.824944019317627, "logits/rejected": -2.0372536182403564, "logps/chosen": -210.43215942382812, "logps/rejected": -1153.011962890625, "loss": 0.1216, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4913581609725952, "rewards/margins": 9.626947402954102, "rewards/rejected": -11.118307113647461, "step": 38900 }, { "epoch": 0.47, "learning_rate": 3.2245026611465994e-06, "logits/chosen": -2.839573383331299, "logits/rejected": -2.2074015140533447, "logps/chosen": -142.63107299804688, "logps/rejected": -940.8273315429688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9259702563285828, "rewards/margins": 8.102333068847656, "rewards/rejected": -9.028303146362305, "step": 38910 }, { "epoch": 0.47, "learning_rate": 3.2235028050206304e-06, "logits/chosen": -2.8753857612609863, "logits/rejected": -2.390132427215576, "logps/chosen": -148.32730102539062, "logps/rejected": -1088.543701171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9797574877738953, "rewards/margins": 9.50471305847168, "rewards/rejected": -10.484471321105957, "step": 38920 }, { "epoch": 0.47, "learning_rate": 3.2225028225730804e-06, "logits/chosen": -2.840402364730835, "logits/rejected": -2.315040349960327, "logps/chosen": -169.82325744628906, "logps/rejected": -985.3734130859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.2388440370559692, "rewards/margins": 8.220052719116211, "rewards/rejected": -9.458897590637207, "step": 38930 }, { "epoch": 0.47, "learning_rate": 3.2215027139785437e-06, "logits/chosen": -2.8306052684783936, "logits/rejected": -2.136110305786133, "logps/chosen": -172.4607696533203, "logps/rejected": -1058.132568359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1902482509613037, "rewards/margins": 9.007756233215332, "rewards/rejected": -10.198005676269531, "step": 38940 }, { "epoch": 0.47, "learning_rate": 3.220502479411638e-06, "logits/chosen": -2.8454155921936035, "logits/rejected": -2.1990809440612793, "logps/chosen": -206.1549530029297, "logps/rejected": -1228.32275390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4886828660964966, "rewards/margins": 10.379433631896973, "rewards/rejected": -11.86811637878418, "step": 38950 }, { "epoch": 0.47, "learning_rate": 3.2195021190469997e-06, "logits/chosen": -2.8840723037719727, "logits/rejected": -2.482835531234741, "logps/chosen": -136.97276306152344, "logps/rejected": -980.0035400390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9612058401107788, "rewards/margins": 8.46159553527832, "rewards/rejected": -9.422801971435547, "step": 38960 }, { "epoch": 0.47, "learning_rate": 3.21850163305929e-06, "logits/chosen": -2.811668872833252, "logits/rejected": -2.243431806564331, "logps/chosen": -165.8543701171875, "logps/rejected": -1099.5518798828125, "loss": 0.1326, "rewards/accuracies": 1.0, "rewards/chosen": -1.1680915355682373, "rewards/margins": 9.42074203491211, "rewards/rejected": -10.58883285522461, "step": 38970 }, { "epoch": 0.47, "learning_rate": 3.2175010216231913e-06, "logits/chosen": -2.8537986278533936, "logits/rejected": -2.185746669769287, "logps/chosen": -211.91964721679688, "logps/rejected": -1082.1160888671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.5708459615707397, "rewards/margins": 8.83907413482666, "rewards/rejected": -10.409918785095215, "step": 38980 }, { "epoch": 0.47, "learning_rate": 3.2165002849134062e-06, "logits/chosen": -2.879807233810425, "logits/rejected": -2.6219303607940674, "logps/chosen": -87.97693634033203, "logps/rejected": -835.0494995117188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5200470685958862, "rewards/margins": 7.460509300231934, "rewards/rejected": -7.980557441711426, "step": 38990 }, { "epoch": 0.47, "learning_rate": 3.2154994231046616e-06, "logits/chosen": -2.851015567779541, "logits/rejected": -2.0711333751678467, "logps/chosen": -195.34706115722656, "logps/rejected": -1195.071533203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4219911098480225, "rewards/margins": 10.119071960449219, "rewards/rejected": -11.54106330871582, "step": 39000 }, { "epoch": 0.47, "eval_logits/chosen": -2.8402750492095947, "eval_logits/rejected": -1.667192816734314, "eval_logps/chosen": -360.3922119140625, "eval_logps/rejected": -1367.6129150390625, "eval_loss": 0.0006924488116055727, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -2.992119550704956, "eval_rewards/margins": 10.216755867004395, "eval_rewards/rejected": -13.208874702453613, "eval_runtime": 1.2168, "eval_samples_per_second": 4.109, "eval_steps_per_second": 2.465, "step": 39000 }, { "epoch": 0.47, "learning_rate": 3.2144984363717053e-06, "logits/chosen": -2.875757932662964, "logits/rejected": -2.196087598800659, "logps/chosen": -158.6478729248047, "logps/rejected": -1074.4483642578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.028151273727417, "rewards/margins": 9.303728103637695, "rewards/rejected": -10.331877708435059, "step": 39010 }, { "epoch": 0.47, "learning_rate": 3.2134973248893065e-06, "logits/chosen": -2.86523699760437, "logits/rejected": -2.376521587371826, "logps/chosen": -149.74488830566406, "logps/rejected": -991.2844848632812, "loss": 0.1531, "rewards/accuracies": 1.0, "rewards/chosen": -1.0757849216461182, "rewards/margins": 8.457499504089355, "rewards/rejected": -9.533285140991211, "step": 39020 }, { "epoch": 0.47, "learning_rate": 3.2124960888322556e-06, "logits/chosen": -2.8595385551452637, "logits/rejected": -2.424363613128662, "logps/chosen": -116.434814453125, "logps/rejected": -1007.3406982421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7160892486572266, "rewards/margins": 8.973588943481445, "rewards/rejected": -9.689677238464355, "step": 39030 }, { "epoch": 0.47, "learning_rate": 3.2114947283753673e-06, "logits/chosen": -2.8555691242218018, "logits/rejected": -2.1285388469696045, "logps/chosen": -198.12098693847656, "logps/rejected": -1080.4681396484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3952219486236572, "rewards/margins": 9.006317138671875, "rewards/rejected": -10.401538848876953, "step": 39040 }, { "epoch": 0.47, "learning_rate": 3.2104932436934754e-06, "logits/chosen": -2.8814492225646973, "logits/rejected": -2.422538995742798, "logps/chosen": -124.4350357055664, "logps/rejected": -971.3143310546875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.7962058186531067, "rewards/margins": 8.534327507019043, "rewards/rejected": -9.330533027648926, "step": 39050 }, { "epoch": 0.47, "learning_rate": 3.2094916349614356e-06, "logits/chosen": -2.847869396209717, "logits/rejected": -2.403933048248291, "logps/chosen": -152.43417358398438, "logps/rejected": -957.9049072265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.068269968032837, "rewards/margins": 8.130961418151855, "rewards/rejected": -9.19923210144043, "step": 39060 }, { "epoch": 0.47, "learning_rate": 3.208489902354128e-06, "logits/chosen": -2.745326280593872, "logits/rejected": -1.999293565750122, "logps/chosen": -269.90167236328125, "logps/rejected": -1287.3681640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.1416735649108887, "rewards/margins": 10.339760780334473, "rewards/rejected": -12.481435775756836, "step": 39070 }, { "epoch": 0.47, "learning_rate": 3.2074880460464507e-06, "logits/chosen": -2.7970690727233887, "logits/rejected": -2.003246545791626, "logps/chosen": -246.0345916748047, "logps/rejected": -1291.4354248046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.9205188751220703, "rewards/margins": 10.588005065917969, "rewards/rejected": -12.508523941040039, "step": 39080 }, { "epoch": 0.47, "learning_rate": 3.206486066213326e-06, "logits/chosen": -2.8063201904296875, "logits/rejected": -2.3117470741271973, "logps/chosen": -186.619384765625, "logps/rejected": -1017.2931518554688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4045923948287964, "rewards/margins": 8.373527526855469, "rewards/rejected": -9.778120040893555, "step": 39090 }, { "epoch": 0.47, "learning_rate": 3.2054839630296953e-06, "logits/chosen": -2.8119094371795654, "logits/rejected": -2.1474075317382812, "logps/chosen": -193.42236328125, "logps/rejected": -981.9845581054688, "loss": 0.0867, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4747451543807983, "rewards/margins": 7.955779075622559, "rewards/rejected": -9.430524826049805, "step": 39100 }, { "epoch": 0.47, "learning_rate": 3.204481736670525e-06, "logits/chosen": -2.805839776992798, "logits/rejected": -2.097085475921631, "logps/chosen": -204.09814453125, "logps/rejected": -1112.99462890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.560988187789917, "rewards/margins": 9.171533584594727, "rewards/rejected": -10.732522010803223, "step": 39110 }, { "epoch": 0.47, "learning_rate": 3.2034793873107995e-06, "logits/chosen": -2.785386800765991, "logits/rejected": -2.0966291427612305, "logps/chosen": -197.87728881835938, "logps/rejected": -1124.4111328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5230278968811035, "rewards/margins": 9.31324577331543, "rewards/rejected": -10.836273193359375, "step": 39120 }, { "epoch": 0.47, "learning_rate": 3.202476915125527e-06, "logits/chosen": -2.7926740646362305, "logits/rejected": -2.210648536682129, "logps/chosen": -203.68154907226562, "logps/rejected": -976.5389404296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.572274923324585, "rewards/margins": 7.805392265319824, "rewards/rejected": -9.377668380737305, "step": 39130 }, { "epoch": 0.47, "learning_rate": 3.2014743202897363e-06, "logits/chosen": -2.798917055130005, "logits/rejected": -2.216592311859131, "logps/chosen": -227.3325958251953, "logps/rejected": -1035.287353515625, "loss": 0.1108, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8218328952789307, "rewards/margins": 8.151820182800293, "rewards/rejected": -9.973652839660645, "step": 39140 }, { "epoch": 0.47, "learning_rate": 3.200471602978477e-06, "logits/chosen": -2.715869188308716, "logits/rejected": -1.9437954425811768, "logps/chosen": -299.4216003417969, "logps/rejected": -1188.944580078125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.4588465690612793, "rewards/margins": 9.021781921386719, "rewards/rejected": -11.48062801361084, "step": 39150 }, { "epoch": 0.47, "learning_rate": 3.199468763366822e-06, "logits/chosen": -2.833261489868164, "logits/rejected": -2.491349458694458, "logps/chosen": -155.33761596679688, "logps/rejected": -971.43212890625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1419469118118286, "rewards/margins": 8.204601287841797, "rewards/rejected": -9.346548080444336, "step": 39160 }, { "epoch": 0.47, "learning_rate": 3.198465801629863e-06, "logits/chosen": -2.7806782722473145, "logits/rejected": -2.3933017253875732, "logps/chosen": -162.17884826660156, "logps/rejected": -967.7589721679688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2362596988677979, "rewards/margins": 8.071764945983887, "rewards/rejected": -9.308025360107422, "step": 39170 }, { "epoch": 0.47, "learning_rate": 3.1974627179427155e-06, "logits/chosen": -2.7966647148132324, "logits/rejected": -2.3457465171813965, "logps/chosen": -207.727783203125, "logps/rejected": -1090.5333251953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6553623676300049, "rewards/margins": 8.860037803649902, "rewards/rejected": -10.515399932861328, "step": 39180 }, { "epoch": 0.47, "learning_rate": 3.1964595124805136e-06, "logits/chosen": -2.8278963565826416, "logits/rejected": -2.161994695663452, "logps/chosen": -200.6087188720703, "logps/rejected": -1154.548583984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5150139331817627, "rewards/margins": 9.635234832763672, "rewards/rejected": -11.150249481201172, "step": 39190 }, { "epoch": 0.47, "learning_rate": 3.1954561854184164e-06, "logits/chosen": -2.8284504413604736, "logits/rejected": -1.9222240447998047, "logps/chosen": -232.26577758789062, "logps/rejected": -1216.47900390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.7275301218032837, "rewards/margins": 10.035983085632324, "rewards/rejected": -11.763513565063477, "step": 39200 }, { "epoch": 0.47, "learning_rate": 3.1944527369316004e-06, "logits/chosen": -2.7587084770202637, "logits/rejected": -2.0186820030212402, "logps/chosen": -222.23684692382812, "logps/rejected": -1146.380859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.7030999660491943, "rewards/margins": 9.373677253723145, "rewards/rejected": -11.076776504516602, "step": 39210 }, { "epoch": 0.47, "learning_rate": 3.193449167195265e-06, "logits/chosen": -2.8427317142486572, "logits/rejected": -2.54695463180542, "logps/chosen": -123.00337982177734, "logps/rejected": -964.8516845703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8481804728507996, "rewards/margins": 8.426894187927246, "rewards/rejected": -9.275075912475586, "step": 39220 }, { "epoch": 0.47, "learning_rate": 3.192445476384631e-06, "logits/chosen": -2.8319027423858643, "logits/rejected": -2.298490285873413, "logps/chosen": -135.28488159179688, "logps/rejected": -922.4122924804688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9366421699523926, "rewards/margins": 7.909201622009277, "rewards/rejected": -8.845845222473145, "step": 39230 }, { "epoch": 0.47, "learning_rate": 3.1914416646749404e-06, "logits/chosen": -2.8264331817626953, "logits/rejected": -2.049470901489258, "logps/chosen": -198.2686767578125, "logps/rejected": -1179.728515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4465819597244263, "rewards/margins": 9.960562705993652, "rewards/rejected": -11.407144546508789, "step": 39240 }, { "epoch": 0.47, "learning_rate": 3.190437732241455e-06, "logits/chosen": -2.7897565364837646, "logits/rejected": -2.1012845039367676, "logps/chosen": -209.0184783935547, "logps/rejected": -1033.252685546875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.5925801992416382, "rewards/margins": 8.379476547241211, "rewards/rejected": -9.97205638885498, "step": 39250 }, { "epoch": 0.47, "learning_rate": 3.1894336792594595e-06, "logits/chosen": -2.801314353942871, "logits/rejected": -2.139566659927368, "logps/chosen": -264.49444580078125, "logps/rejected": -1229.582763671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.1117537021636963, "rewards/margins": 9.790441513061523, "rewards/rejected": -11.90219497680664, "step": 39260 }, { "epoch": 0.47, "learning_rate": 3.1884295059042585e-06, "logits/chosen": -2.8308510780334473, "logits/rejected": -2.139106035232544, "logps/chosen": -261.03424072265625, "logps/rejected": -1040.5452880859375, "loss": 0.1068, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.136211633682251, "rewards/margins": 7.869429588317871, "rewards/rejected": -10.005640983581543, "step": 39270 }, { "epoch": 0.47, "learning_rate": 3.1874252123511775e-06, "logits/chosen": -2.8403258323669434, "logits/rejected": -2.1087779998779297, "logps/chosen": -229.0601043701172, "logps/rejected": -1203.9935302734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.7376394271850586, "rewards/margins": 9.896855354309082, "rewards/rejected": -11.634495735168457, "step": 39280 }, { "epoch": 0.47, "learning_rate": 3.1864207987755636e-06, "logits/chosen": -2.810351610183716, "logits/rejected": -2.1650993824005127, "logps/chosen": -216.7449493408203, "logps/rejected": -1147.610595703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.6461206674575806, "rewards/margins": 9.434735298156738, "rewards/rejected": -11.080855369567871, "step": 39290 }, { "epoch": 0.47, "learning_rate": 3.1854162653527855e-06, "logits/chosen": -2.8513264656066895, "logits/rejected": -2.365572929382324, "logps/chosen": -158.0353240966797, "logps/rejected": -1008.1221923828125, "loss": 0.1526, "rewards/accuracies": 1.0, "rewards/chosen": -1.175784945487976, "rewards/margins": 8.533517837524414, "rewards/rejected": -9.70930290222168, "step": 39300 }, { "epoch": 0.47, "learning_rate": 3.1844116122582304e-06, "logits/chosen": -2.8245363235473633, "logits/rejected": -2.3270816802978516, "logps/chosen": -154.22171020507812, "logps/rejected": -944.7478637695312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1219258308410645, "rewards/margins": 7.93071985244751, "rewards/rejected": -9.052645683288574, "step": 39310 }, { "epoch": 0.47, "learning_rate": 3.1834068396673085e-06, "logits/chosen": -2.8683342933654785, "logits/rejected": -2.2051193714141846, "logps/chosen": -165.9799346923828, "logps/rejected": -1093.14794921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1977840662002563, "rewards/margins": 9.337265014648438, "rewards/rejected": -10.535049438476562, "step": 39320 }, { "epoch": 0.47, "learning_rate": 3.1824019477554507e-06, "logits/chosen": -2.8374314308166504, "logits/rejected": -2.2866649627685547, "logps/chosen": -180.96273803710938, "logps/rejected": -1109.4383544921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.3227920532226562, "rewards/margins": 9.371162414550781, "rewards/rejected": -10.693952560424805, "step": 39330 }, { "epoch": 0.47, "learning_rate": 3.181396936698109e-06, "logits/chosen": -2.8526787757873535, "logits/rejected": -1.9724361896514893, "logps/chosen": -206.2121124267578, "logps/rejected": -1135.644775390625, "loss": 0.157, "rewards/accuracies": 1.0, "rewards/chosen": -1.5521814823150635, "rewards/margins": 9.40638256072998, "rewards/rejected": -10.958564758300781, "step": 39340 }, { "epoch": 0.47, "learning_rate": 3.1803918066707533e-06, "logits/chosen": -2.860823631286621, "logits/rejected": -2.391754627227783, "logps/chosen": -166.78750610351562, "logps/rejected": -997.6535034179688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1737868785858154, "rewards/margins": 8.410857200622559, "rewards/rejected": -9.584644317626953, "step": 39350 }, { "epoch": 0.47, "learning_rate": 3.179386557848878e-06, "logits/chosen": -2.840182065963745, "logits/rejected": -2.2284038066864014, "logps/chosen": -198.0294189453125, "logps/rejected": -1083.1998291015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4788657426834106, "rewards/margins": 8.960592269897461, "rewards/rejected": -10.439457893371582, "step": 39360 }, { "epoch": 0.47, "learning_rate": 3.1783811904079977e-06, "logits/chosen": -2.842623710632324, "logits/rejected": -2.2092010974884033, "logps/chosen": -193.22679138183594, "logps/rejected": -1092.3321533203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4067225456237793, "rewards/margins": 9.118200302124023, "rewards/rejected": -10.524923324584961, "step": 39370 }, { "epoch": 0.47, "learning_rate": 3.177375704523644e-06, "logits/chosen": -2.8334555625915527, "logits/rejected": -2.191551685333252, "logps/chosen": -193.62869262695312, "logps/rejected": -1117.3577880859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4815983772277832, "rewards/margins": 9.293556213378906, "rewards/rejected": -10.775156021118164, "step": 39380 }, { "epoch": 0.47, "learning_rate": 3.176370100371375e-06, "logits/chosen": -2.7787718772888184, "logits/rejected": -2.153294801712036, "logps/chosen": -197.88299560546875, "logps/rejected": -1172.134521484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4877415895462036, "rewards/margins": 9.82953929901123, "rewards/rejected": -11.317280769348145, "step": 39390 }, { "epoch": 0.47, "learning_rate": 3.175364378126765e-06, "logits/chosen": -2.8190979957580566, "logits/rejected": -2.3868205547332764, "logps/chosen": -185.16015625, "logps/rejected": -994.5914306640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4466512203216553, "rewards/margins": 8.123405456542969, "rewards/rejected": -9.570055961608887, "step": 39400 }, { "epoch": 0.47, "learning_rate": 3.1743585379654095e-06, "logits/chosen": -2.8390774726867676, "logits/rejected": -2.2456088066101074, "logps/chosen": -180.2454833984375, "logps/rejected": -1061.693115234375, "loss": 0.0998, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.34958815574646, "rewards/margins": 8.870882034301758, "rewards/rejected": -10.220468521118164, "step": 39410 }, { "epoch": 0.47, "learning_rate": 3.173352580062927e-06, "logits/chosen": -2.896688938140869, "logits/rejected": -2.2225306034088135, "logps/chosen": -228.96884155273438, "logps/rejected": -1165.9703369140625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.8098732233047485, "rewards/margins": 9.45915699005127, "rewards/rejected": -11.269031524658203, "step": 39420 }, { "epoch": 0.47, "learning_rate": 3.172346504594954e-06, "logits/chosen": -2.810978412628174, "logits/rejected": -2.1868088245391846, "logps/chosen": -194.00692749023438, "logps/rejected": -1202.623291015625, "loss": 0.2472, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4466110467910767, "rewards/margins": 10.191108703613281, "rewards/rejected": -11.637721061706543, "step": 39430 }, { "epoch": 0.47, "learning_rate": 3.1713403117371485e-06, "logits/chosen": -2.8179638385772705, "logits/rejected": -2.1855149269104004, "logps/chosen": -200.42959594726562, "logps/rejected": -1164.9366455078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4804508686065674, "rewards/margins": 9.754638671875, "rewards/rejected": -11.235089302062988, "step": 39440 }, { "epoch": 0.47, "learning_rate": 3.170334001665189e-06, "logits/chosen": -2.8824572563171387, "logits/rejected": -2.4895801544189453, "logps/chosen": -111.87290954589844, "logps/rejected": -945.7919921875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7170838117599487, "rewards/margins": 8.361085891723633, "rewards/rejected": -9.078169822692871, "step": 39450 }, { "epoch": 0.47, "learning_rate": 3.1693275745547754e-06, "logits/chosen": -2.864645481109619, "logits/rejected": -2.432734966278076, "logps/chosen": -131.89889526367188, "logps/rejected": -909.2642822265625, "loss": 0.117, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8635137677192688, "rewards/margins": 7.858515739440918, "rewards/rejected": -8.722030639648438, "step": 39460 }, { "epoch": 0.47, "learning_rate": 3.1683210305816254e-06, "logits/chosen": -2.893763780593872, "logits/rejected": -2.547915458679199, "logps/chosen": -101.65472412109375, "logps/rejected": -905.1383666992188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6031093001365662, "rewards/margins": 8.061108589172363, "rewards/rejected": -8.664217948913574, "step": 39470 }, { "epoch": 0.47, "learning_rate": 3.167314369921479e-06, "logits/chosen": -2.8483123779296875, "logits/rejected": -2.2465627193450928, "logps/chosen": -141.4933624267578, "logps/rejected": -1031.216064453125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.904890239238739, "rewards/margins": 9.01974105834961, "rewards/rejected": -9.924631118774414, "step": 39480 }, { "epoch": 0.47, "learning_rate": 3.166307592750098e-06, "logits/chosen": -2.8629086017608643, "logits/rejected": -2.569450855255127, "logps/chosen": -128.700439453125, "logps/rejected": -956.7611083984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8684031367301941, "rewards/margins": 8.310163497924805, "rewards/rejected": -9.178565979003906, "step": 39490 }, { "epoch": 0.47, "learning_rate": 3.165300699243262e-06, "logits/chosen": -2.8443758487701416, "logits/rejected": -2.5687954425811768, "logps/chosen": -102.65872955322266, "logps/rejected": -992.97998046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6511741876602173, "rewards/margins": 8.903182983398438, "rewards/rejected": -9.554357528686523, "step": 39500 }, { "epoch": 0.47, "learning_rate": 3.1642936895767716e-06, "logits/chosen": -2.8603529930114746, "logits/rejected": -2.3479084968566895, "logps/chosen": -175.07180786132812, "logps/rejected": -1120.984619140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2536303997039795, "rewards/margins": 9.564458847045898, "rewards/rejected": -10.818090438842773, "step": 39510 }, { "epoch": 0.47, "learning_rate": 3.1632865639264477e-06, "logits/chosen": -2.844651699066162, "logits/rejected": -2.363905429840088, "logps/chosen": -144.6046600341797, "logps/rejected": -1117.442138671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9308940768241882, "rewards/margins": 9.85293960571289, "rewards/rejected": -10.783834457397461, "step": 39520 }, { "epoch": 0.47, "learning_rate": 3.1622793224681326e-06, "logits/chosen": -2.794194459915161, "logits/rejected": -2.250009775161743, "logps/chosen": -170.20852661132812, "logps/rejected": -1072.3184814453125, "loss": 0.1128, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.250154733657837, "rewards/margins": 9.092077255249023, "rewards/rejected": -10.342232704162598, "step": 39530 }, { "epoch": 0.47, "learning_rate": 3.1612719653776865e-06, "logits/chosen": -2.87284517288208, "logits/rejected": -2.304469585418701, "logps/chosen": -124.1990737915039, "logps/rejected": -951.9697265625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.7006875872612, "rewards/margins": 8.426888465881348, "rewards/rejected": -9.12757682800293, "step": 39540 }, { "epoch": 0.47, "learning_rate": 3.160264492830992e-06, "logits/chosen": -2.877385377883911, "logits/rejected": -2.3253984451293945, "logps/chosen": -107.54435729980469, "logps/rejected": -934.0975341796875, "loss": 0.1901, "rewards/accuracies": 1.0, "rewards/chosen": -0.5886451601982117, "rewards/margins": 8.380115509033203, "rewards/rejected": -8.96876049041748, "step": 39550 }, { "epoch": 0.47, "learning_rate": 3.1592569050039512e-06, "logits/chosen": -2.8868460655212402, "logits/rejected": -2.3840417861938477, "logps/chosen": -117.80912780761719, "logps/rejected": -956.84765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7030202150344849, "rewards/margins": 8.47569465637207, "rewards/rejected": -9.17871379852295, "step": 39560 }, { "epoch": 0.47, "learning_rate": 3.1582492020724853e-06, "logits/chosen": -2.9083151817321777, "logits/rejected": -2.483825206756592, "logps/chosen": -108.99169921875, "logps/rejected": -828.0548706054688, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6393635869026184, "rewards/margins": 7.262545108795166, "rewards/rejected": -7.901909828186035, "step": 39570 }, { "epoch": 0.47, "learning_rate": 3.1572413842125373e-06, "logits/chosen": -2.8706297874450684, "logits/rejected": -2.223947525024414, "logps/chosen": -142.3879852294922, "logps/rejected": -1055.27490234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.85722416639328, "rewards/margins": 9.287920951843262, "rewards/rejected": -10.14514446258545, "step": 39580 }, { "epoch": 0.47, "learning_rate": 3.156233451600068e-06, "logits/chosen": -2.8981947898864746, "logits/rejected": -2.295172929763794, "logps/chosen": -112.99169921875, "logps/rejected": -896.8723754882812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6492644548416138, "rewards/margins": 7.931482791900635, "rewards/rejected": -8.5807466506958, "step": 39590 }, { "epoch": 0.47, "learning_rate": 3.1552254044110614e-06, "logits/chosen": -2.899451732635498, "logits/rejected": -2.3465564250946045, "logps/chosen": -119.01710510253906, "logps/rejected": -1092.615966796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7029748558998108, "rewards/margins": 9.817707061767578, "rewards/rejected": -10.520682334899902, "step": 39600 }, { "epoch": 0.47, "learning_rate": 3.154217242821519e-06, "logits/chosen": -2.9006733894348145, "logits/rejected": -2.5569052696228027, "logps/chosen": -101.72017669677734, "logps/rejected": -905.2652587890625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5181452035903931, "rewards/margins": 8.14728832244873, "rewards/rejected": -8.665433883666992, "step": 39610 }, { "epoch": 0.47, "learning_rate": 3.153208967007464e-06, "logits/chosen": -2.897181272506714, "logits/rejected": -2.2956395149230957, "logps/chosen": -134.44891357421875, "logps/rejected": -992.8029174804688, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": -0.8053510785102844, "rewards/margins": 8.716337203979492, "rewards/rejected": -9.521688461303711, "step": 39620 }, { "epoch": 0.47, "learning_rate": 3.152200577144936e-06, "logits/chosen": -2.8902928829193115, "logits/rejected": -2.3472774028778076, "logps/chosen": -115.43354797363281, "logps/rejected": -998.4930419921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6331021189689636, "rewards/margins": 8.963098526000977, "rewards/rejected": -9.596200942993164, "step": 39630 }, { "epoch": 0.47, "learning_rate": 3.1511920734099995e-06, "logits/chosen": -2.882178783416748, "logits/rejected": -2.4651219844818115, "logps/chosen": -97.53626251220703, "logps/rejected": -937.9562377929688, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5183827877044678, "rewards/margins": 8.479047775268555, "rewards/rejected": -8.997430801391602, "step": 39640 }, { "epoch": 0.47, "learning_rate": 3.1501834559787353e-06, "logits/chosen": -2.881126642227173, "logits/rejected": -2.5014963150024414, "logps/chosen": -91.19293212890625, "logps/rejected": -892.8868408203125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5009805560112, "rewards/margins": 8.04319953918457, "rewards/rejected": -8.544179916381836, "step": 39650 }, { "epoch": 0.47, "learning_rate": 3.149174725027246e-06, "logits/chosen": -2.9055464267730713, "logits/rejected": -2.3938145637512207, "logps/chosen": -129.64132690429688, "logps/rejected": -956.1356201171875, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": -0.7583301663398743, "rewards/margins": 8.42167854309082, "rewards/rejected": -9.180009841918945, "step": 39660 }, { "epoch": 0.47, "learning_rate": 3.148165880731652e-06, "logits/chosen": -2.8294126987457275, "logits/rejected": -2.221498489379883, "logps/chosen": -133.82960510253906, "logps/rejected": -1023.21923828125, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": -0.8408204317092896, "rewards/margins": 8.995532989501953, "rewards/rejected": -9.836353302001953, "step": 39670 }, { "epoch": 0.47, "learning_rate": 3.1471569232680955e-06, "logits/chosen": -2.855804204940796, "logits/rejected": -2.614443302154541, "logps/chosen": -89.58292388916016, "logps/rejected": -775.84423828125, "loss": 0.1912, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5456705093383789, "rewards/margins": 6.848987579345703, "rewards/rejected": -7.394658088684082, "step": 39680 }, { "epoch": 0.48, "learning_rate": 3.146147852812738e-06, "logits/chosen": -2.8913254737854004, "logits/rejected": -2.2407758235931396, "logps/chosen": -151.94454956054688, "logps/rejected": -988.7178955078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9424858093261719, "rewards/margins": 8.531221389770508, "rewards/rejected": -9.47370719909668, "step": 39690 }, { "epoch": 0.48, "learning_rate": 3.1451386695417603e-06, "logits/chosen": -2.876943826675415, "logits/rejected": -2.2150115966796875, "logps/chosen": -159.75753784179688, "logps/rejected": -1012.2302856445312, "loss": 0.0667, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0700551271438599, "rewards/margins": 8.647648811340332, "rewards/rejected": -9.717703819274902, "step": 39700 }, { "epoch": 0.48, "learning_rate": 3.1441293736313617e-06, "logits/chosen": -2.88671875, "logits/rejected": -2.4988315105438232, "logps/chosen": -127.05622863769531, "logps/rejected": -913.7755126953125, "loss": 0.031, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8141683340072632, "rewards/margins": 7.919226169586182, "rewards/rejected": -8.73339557647705, "step": 39710 }, { "epoch": 0.48, "learning_rate": 3.1431199652577648e-06, "logits/chosen": -2.8799686431884766, "logits/rejected": -2.2881627082824707, "logps/chosen": -116.06787109375, "logps/rejected": -996.5863037109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6905525326728821, "rewards/margins": 8.870736122131348, "rewards/rejected": -9.561288833618164, "step": 39720 }, { "epoch": 0.48, "learning_rate": 3.142110444597207e-06, "logits/chosen": -2.8783762454986572, "logits/rejected": -2.300504446029663, "logps/chosen": -124.05406188964844, "logps/rejected": -1008.4967651367188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7622409462928772, "rewards/margins": 8.932554244995117, "rewards/rejected": -9.694796562194824, "step": 39730 }, { "epoch": 0.48, "learning_rate": 3.141100811825949e-06, "logits/chosen": -2.889251232147217, "logits/rejected": -2.2982380390167236, "logps/chosen": -126.2799301147461, "logps/rejected": -939.2496337890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7886072993278503, "rewards/margins": 8.207741737365723, "rewards/rejected": -8.996349334716797, "step": 39740 }, { "epoch": 0.48, "learning_rate": 3.14009106712027e-06, "logits/chosen": -2.863036632537842, "logits/rejected": -2.4133670330047607, "logps/chosen": -109.6854476928711, "logps/rejected": -930.2263793945312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6663210391998291, "rewards/margins": 8.255139350891113, "rewards/rejected": -8.921459197998047, "step": 39750 }, { "epoch": 0.48, "learning_rate": 3.1390812106564696e-06, "logits/chosen": -2.887348175048828, "logits/rejected": -2.422332763671875, "logps/chosen": -117.01029968261719, "logps/rejected": -927.0057373046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7343708872795105, "rewards/margins": 8.15443229675293, "rewards/rejected": -8.888803482055664, "step": 39760 }, { "epoch": 0.48, "learning_rate": 3.138071242610863e-06, "logits/chosen": -2.8747737407684326, "logits/rejected": -2.324444532394409, "logps/chosen": -131.08755493164062, "logps/rejected": -936.5118408203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8114193677902222, "rewards/margins": 8.145745277404785, "rewards/rejected": -8.957164764404297, "step": 39770 }, { "epoch": 0.48, "learning_rate": 3.13706116315979e-06, "logits/chosen": -2.8927390575408936, "logits/rejected": -2.3615622520446777, "logps/chosen": -115.2569351196289, "logps/rejected": -1033.649169921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7079039812088013, "rewards/margins": 9.236292839050293, "rewards/rejected": -9.944197654724121, "step": 39780 }, { "epoch": 0.48, "learning_rate": 3.1360509724796086e-06, "logits/chosen": -2.870802402496338, "logits/rejected": -2.3762359619140625, "logps/chosen": -123.16361236572266, "logps/rejected": -913.9573364257812, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.7402955293655396, "rewards/margins": 8.002378463745117, "rewards/rejected": -8.742674827575684, "step": 39790 }, { "epoch": 0.48, "learning_rate": 3.1350406707466926e-06, "logits/chosen": -2.8242616653442383, "logits/rejected": -2.380991220474243, "logps/chosen": -133.30224609375, "logps/rejected": -869.0548706054688, "loss": 0.0273, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8822706341743469, "rewards/margins": 7.4220290184021, "rewards/rejected": -8.304300308227539, "step": 39800 }, { "epoch": 0.48, "learning_rate": 3.13403025813744e-06, "logits/chosen": -2.8986098766326904, "logits/rejected": -2.4010977745056152, "logps/chosen": -119.06983947753906, "logps/rejected": -937.1633911132812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7354077100753784, "rewards/margins": 8.248968124389648, "rewards/rejected": -8.984376907348633, "step": 39810 }, { "epoch": 0.48, "learning_rate": 3.133019734828264e-06, "logits/chosen": -2.878751039505005, "logits/rejected": -2.3709394931793213, "logps/chosen": -135.4638214111328, "logps/rejected": -872.1649169921875, "loss": 0.091, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.866962730884552, "rewards/margins": 7.471412658691406, "rewards/rejected": -8.338374137878418, "step": 39820 }, { "epoch": 0.48, "learning_rate": 3.1320091009956015e-06, "logits/chosen": -2.869882822036743, "logits/rejected": -2.062575578689575, "logps/chosen": -168.31185913085938, "logps/rejected": -1148.0452880859375, "loss": 0.1686, "rewards/accuracies": 1.0, "rewards/chosen": -1.0908174514770508, "rewards/margins": 9.968141555786133, "rewards/rejected": -11.058958053588867, "step": 39830 }, { "epoch": 0.48, "learning_rate": 3.130998356815905e-06, "logits/chosen": -2.8520007133483887, "logits/rejected": -2.172616481781006, "logps/chosen": -172.0139923095703, "logps/rejected": -1130.449951171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1530001163482666, "rewards/margins": 9.7278470993042, "rewards/rejected": -10.880846977233887, "step": 39840 }, { "epoch": 0.48, "learning_rate": 3.129987502465649e-06, "logits/chosen": -2.865429639816284, "logits/rejected": -2.4230637550354004, "logps/chosen": -139.6229705810547, "logps/rejected": -1041.464111328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8866075277328491, "rewards/margins": 9.126481056213379, "rewards/rejected": -10.013089179992676, "step": 39850 }, { "epoch": 0.48, "learning_rate": 3.1289765381213233e-06, "logits/chosen": -2.8388772010803223, "logits/rejected": -2.46382474899292, "logps/chosen": -140.3583526611328, "logps/rejected": -950.9318237304688, "loss": 0.0439, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9468053579330444, "rewards/margins": 8.17180061340332, "rewards/rejected": -9.11860466003418, "step": 39860 }, { "epoch": 0.48, "learning_rate": 3.1279654639594416e-06, "logits/chosen": -2.873345375061035, "logits/rejected": -2.525858163833618, "logps/chosen": -105.76866149902344, "logps/rejected": -899.8685302734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6475944519042969, "rewards/margins": 7.968022346496582, "rewards/rejected": -8.615616798400879, "step": 39870 }, { "epoch": 0.48, "learning_rate": 3.1269542801565343e-06, "logits/chosen": -2.8515892028808594, "logits/rejected": -2.293921947479248, "logps/chosen": -128.50865173339844, "logps/rejected": -972.59423828125, "loss": 0.0209, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7841429114341736, "rewards/margins": 8.558996200561523, "rewards/rejected": -9.343138694763184, "step": 39880 }, { "epoch": 0.48, "learning_rate": 3.1259429868891507e-06, "logits/chosen": -2.851634979248047, "logits/rejected": -2.346764087677002, "logps/chosen": -125.27542877197266, "logps/rejected": -996.9728393554688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6994463801383972, "rewards/margins": 8.881528854370117, "rewards/rejected": -9.580973625183105, "step": 39890 }, { "epoch": 0.48, "learning_rate": 3.1249315843338595e-06, "logits/chosen": -2.900926113128662, "logits/rejected": -2.1982691287994385, "logps/chosen": -144.9410400390625, "logps/rejected": -1016.9295654296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8751069903373718, "rewards/margins": 8.900044441223145, "rewards/rejected": -9.775151252746582, "step": 39900 }, { "epoch": 0.48, "learning_rate": 3.1239200726672496e-06, "logits/chosen": -2.909754514694214, "logits/rejected": -2.3171467781066895, "logps/chosen": -122.54833984375, "logps/rejected": -960.9749755859375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.7764630317687988, "rewards/margins": 8.445714950561523, "rewards/rejected": -9.22217845916748, "step": 39910 }, { "epoch": 0.48, "learning_rate": 3.1229084520659288e-06, "logits/chosen": -2.87420916557312, "logits/rejected": -2.352416515350342, "logps/chosen": -132.3212127685547, "logps/rejected": -1032.5291748046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8608909845352173, "rewards/margins": 9.065999984741211, "rewards/rejected": -9.926891326904297, "step": 39920 }, { "epoch": 0.48, "learning_rate": 3.121896722706521e-06, "logits/chosen": -2.875448703765869, "logits/rejected": -2.386685848236084, "logps/chosen": -131.72767639160156, "logps/rejected": -941.7996215820312, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8669220805168152, "rewards/margins": 8.175834655761719, "rewards/rejected": -9.042757034301758, "step": 39930 }, { "epoch": 0.48, "learning_rate": 3.1208848847656736e-06, "logits/chosen": -2.8645381927490234, "logits/rejected": -2.2191803455352783, "logps/chosen": -131.8549346923828, "logps/rejected": -1019.80419921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8296968340873718, "rewards/margins": 8.979586601257324, "rewards/rejected": -9.809283256530762, "step": 39940 }, { "epoch": 0.48, "learning_rate": 3.11987293842005e-06, "logits/chosen": -2.7986960411071777, "logits/rejected": -2.007046699523926, "logps/chosen": -188.60594177246094, "logps/rejected": -1077.611572265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3874114751815796, "rewards/margins": 8.992666244506836, "rewards/rejected": -10.380077362060547, "step": 39950 }, { "epoch": 0.48, "learning_rate": 3.118860883846332e-06, "logits/chosen": -2.8407821655273438, "logits/rejected": -1.9256356954574585, "logps/chosen": -180.49241638183594, "logps/rejected": -1080.5706787109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2289224863052368, "rewards/margins": 9.16771125793457, "rewards/rejected": -10.396634101867676, "step": 39960 }, { "epoch": 0.48, "learning_rate": 3.1178487212212226e-06, "logits/chosen": -2.8699963092803955, "logits/rejected": -2.1672635078430176, "logps/chosen": -150.876953125, "logps/rejected": -1028.3642578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9889694452285767, "rewards/margins": 8.89990234375, "rewards/rejected": -9.888871192932129, "step": 39970 }, { "epoch": 0.48, "learning_rate": 3.116836450721443e-06, "logits/chosen": -2.865630626678467, "logits/rejected": -2.253511428833008, "logps/chosen": -136.05453491210938, "logps/rejected": -949.3382568359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9115120768547058, "rewards/margins": 8.180425643920898, "rewards/rejected": -9.091937065124512, "step": 39980 }, { "epoch": 0.48, "learning_rate": 3.1158240725237323e-06, "logits/chosen": -2.8673746585845947, "logits/rejected": -2.3529324531555176, "logps/chosen": -156.50697326660156, "logps/rejected": -885.7130126953125, "loss": 0.1215, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.023432970046997, "rewards/margins": 7.4301862716674805, "rewards/rejected": -8.453619003295898, "step": 39990 }, { "epoch": 0.48, "learning_rate": 3.1148115868048483e-06, "logits/chosen": -2.84138560295105, "logits/rejected": -2.4454684257507324, "logps/chosen": -140.49081420898438, "logps/rejected": -877.7703247070312, "loss": 0.1577, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0017915964126587, "rewards/margins": 7.4183030128479, "rewards/rejected": -8.420095443725586, "step": 40000 }, { "epoch": 0.48, "learning_rate": 3.113798993741569e-06, "logits/chosen": -2.9115917682647705, "logits/rejected": -1.9809764623641968, "logps/chosen": -164.702392578125, "logps/rejected": -1033.73095703125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.1087702512741089, "rewards/margins": 8.821587562561035, "rewards/rejected": -9.930356979370117, "step": 40010 }, { "epoch": 0.48, "learning_rate": 3.112786293510691e-06, "logits/chosen": -2.8111395835876465, "logits/rejected": -2.026905059814453, "logps/chosen": -173.0191192626953, "logps/rejected": -1145.77197265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1972068548202515, "rewards/margins": 9.838772773742676, "rewards/rejected": -11.035978317260742, "step": 40020 }, { "epoch": 0.48, "learning_rate": 3.111773486289027e-06, "logits/chosen": -2.8570001125335693, "logits/rejected": -2.208378314971924, "logps/chosen": -141.7888641357422, "logps/rejected": -1045.9686279296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8849408030509949, "rewards/margins": 9.179056167602539, "rewards/rejected": -10.063996315002441, "step": 40030 }, { "epoch": 0.48, "learning_rate": 3.1107605722534124e-06, "logits/chosen": -2.863187074661255, "logits/rejected": -2.366302490234375, "logps/chosen": -120.6576156616211, "logps/rejected": -871.1008911132812, "loss": 0.0803, "rewards/accuracies": 1.0, "rewards/chosen": -0.7825535535812378, "rewards/margins": 7.543178558349609, "rewards/rejected": -8.32573127746582, "step": 40040 }, { "epoch": 0.48, "learning_rate": 3.1097475515806973e-06, "logits/chosen": -2.916656017303467, "logits/rejected": -2.2932307720184326, "logps/chosen": -130.63682556152344, "logps/rejected": -998.490234375, "loss": 0.1007, "rewards/accuracies": 1.0, "rewards/chosen": -0.800247311592102, "rewards/margins": 8.804367065429688, "rewards/rejected": -9.6046142578125, "step": 40050 }, { "epoch": 0.48, "learning_rate": 3.1087344244477534e-06, "logits/chosen": -2.8609814643859863, "logits/rejected": -2.453956365585327, "logps/chosen": -110.4376449584961, "logps/rejected": -910.0368041992188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6673017740249634, "rewards/margins": 8.04764175415039, "rewards/rejected": -8.714942932128906, "step": 40060 }, { "epoch": 0.48, "learning_rate": 3.1077211910314705e-06, "logits/chosen": -2.890076160430908, "logits/rejected": -2.4180855751037598, "logps/chosen": -112.82710266113281, "logps/rejected": -914.6575927734375, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.7033356428146362, "rewards/margins": 8.073155403137207, "rewards/rejected": -8.776491165161133, "step": 40070 }, { "epoch": 0.48, "learning_rate": 3.1067078515087547e-06, "logits/chosen": -2.8618578910827637, "logits/rejected": -2.279101610183716, "logps/chosen": -136.23318481445312, "logps/rejected": -998.6018676757812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9116157293319702, "rewards/margins": 8.68718147277832, "rewards/rejected": -9.598797798156738, "step": 40080 }, { "epoch": 0.48, "learning_rate": 3.105694406056533e-06, "logits/chosen": -2.8452000617980957, "logits/rejected": -2.429701805114746, "logps/chosen": -106.15660095214844, "logps/rejected": -895.9013671875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5973354578018188, "rewards/margins": 7.976217746734619, "rewards/rejected": -8.573553085327148, "step": 40090 }, { "epoch": 0.48, "learning_rate": 3.1046808548517504e-06, "logits/chosen": -2.8402185440063477, "logits/rejected": -2.0888190269470215, "logps/chosen": -156.57241821289062, "logps/rejected": -1057.127197265625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9975894093513489, "rewards/margins": 9.169675827026367, "rewards/rejected": -10.167266845703125, "step": 40100 }, { "epoch": 0.48, "learning_rate": 3.10366719807137e-06, "logits/chosen": -2.830137252807617, "logits/rejected": -2.248652935028076, "logps/chosen": -153.69044494628906, "logps/rejected": -1006.435546875, "loss": 0.0948, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0518639087677002, "rewards/margins": 8.615891456604004, "rewards/rejected": -9.667754173278809, "step": 40110 }, { "epoch": 0.48, "learning_rate": 3.102653435892374e-06, "logits/chosen": -2.87330961227417, "logits/rejected": -2.286221981048584, "logps/chosen": -137.7262725830078, "logps/rejected": -935.2136840820312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9064399600028992, "rewards/margins": 8.05791187286377, "rewards/rejected": -8.964351654052734, "step": 40120 }, { "epoch": 0.48, "learning_rate": 3.101639568491761e-06, "logits/chosen": -2.9322330951690674, "logits/rejected": -2.7281811237335205, "logps/chosen": -83.48158264160156, "logps/rejected": -793.4346923828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.47972577810287476, "rewards/margins": 7.078790187835693, "rewards/rejected": -7.558516025543213, "step": 40130 }, { "epoch": 0.48, "learning_rate": 3.100625596046551e-06, "logits/chosen": -2.84269118309021, "logits/rejected": -2.21886944770813, "logps/chosen": -144.38525390625, "logps/rejected": -988.8094482421875, "loss": 0.1604, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9873839616775513, "rewards/margins": 8.519112586975098, "rewards/rejected": -9.506495475769043, "step": 40140 }, { "epoch": 0.48, "learning_rate": 3.0996115187337796e-06, "logits/chosen": -2.864565372467041, "logits/rejected": -2.3560714721679688, "logps/chosen": -109.3364028930664, "logps/rejected": -901.6151123046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6488001346588135, "rewards/margins": 7.989743232727051, "rewards/rejected": -8.638543128967285, "step": 40150 }, { "epoch": 0.48, "learning_rate": 3.0985973367305023e-06, "logits/chosen": -2.886810779571533, "logits/rejected": -2.544445753097534, "logps/chosen": -89.06460571289062, "logps/rejected": -845.2825317382812, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": -0.5010775327682495, "rewards/margins": 7.583953857421875, "rewards/rejected": -8.085031509399414, "step": 40160 }, { "epoch": 0.48, "learning_rate": 3.097583050213793e-06, "logits/chosen": -2.895413637161255, "logits/rejected": -2.2158403396606445, "logps/chosen": -159.70547485351562, "logps/rejected": -900.70849609375, "loss": 0.1301, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1455070972442627, "rewards/margins": 7.478243827819824, "rewards/rejected": -8.623750686645508, "step": 40170 }, { "epoch": 0.48, "learning_rate": 3.096568659360743e-06, "logits/chosen": -2.814664363861084, "logits/rejected": -2.295182943344116, "logps/chosen": -124.5439682006836, "logps/rejected": -972.3626708984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7998842000961304, "rewards/margins": 8.536909103393555, "rewards/rejected": -9.336793899536133, "step": 40180 }, { "epoch": 0.48, "learning_rate": 3.0955541643484615e-06, "logits/chosen": -2.831355333328247, "logits/rejected": -2.3151638507843018, "logps/chosen": -123.0036849975586, "logps/rejected": -918.4879150390625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.763797402381897, "rewards/margins": 8.031538009643555, "rewards/rejected": -8.79533576965332, "step": 40190 }, { "epoch": 0.48, "learning_rate": 3.0945395653540767e-06, "logits/chosen": -2.8804516792297363, "logits/rejected": -2.490669012069702, "logps/chosen": -112.3538589477539, "logps/rejected": -936.8367919921875, "loss": 0.1451, "rewards/accuracies": 1.0, "rewards/chosen": -0.7149341702461243, "rewards/margins": 8.268424034118652, "rewards/rejected": -8.983358383178711, "step": 40200 }, { "epoch": 0.48, "learning_rate": 3.0935248625547357e-06, "logits/chosen": -2.9016685485839844, "logits/rejected": -2.1187667846679688, "logps/chosen": -166.54437255859375, "logps/rejected": -1094.3436279296875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1030921936035156, "rewards/margins": 9.443429946899414, "rewards/rejected": -10.54652214050293, "step": 40210 }, { "epoch": 0.48, "learning_rate": 3.092510056127602e-06, "logits/chosen": -2.8374972343444824, "logits/rejected": -2.270540952682495, "logps/chosen": -138.10643005371094, "logps/rejected": -1081.7012939453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8996322751045227, "rewards/margins": 9.524888038635254, "rewards/rejected": -10.424520492553711, "step": 40220 }, { "epoch": 0.48, "learning_rate": 3.091495146249858e-06, "logits/chosen": -2.8775687217712402, "logits/rejected": -2.378899574279785, "logps/chosen": -113.90706634521484, "logps/rejected": -936.21044921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6611319780349731, "rewards/margins": 8.323343276977539, "rewards/rejected": -8.984475135803223, "step": 40230 }, { "epoch": 0.48, "learning_rate": 3.0904801330987045e-06, "logits/chosen": -2.879209518432617, "logits/rejected": -2.2710812091827393, "logps/chosen": -132.44468688964844, "logps/rejected": -979.2918090820312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7978708148002625, "rewards/margins": 8.611736297607422, "rewards/rejected": -9.409607887268066, "step": 40240 }, { "epoch": 0.48, "learning_rate": 3.089465016851359e-06, "logits/chosen": -2.8682973384857178, "logits/rejected": -2.4059767723083496, "logps/chosen": -131.5323486328125, "logps/rejected": -985.2692260742188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8542981147766113, "rewards/margins": 8.614645957946777, "rewards/rejected": -9.468942642211914, "step": 40250 }, { "epoch": 0.48, "learning_rate": 3.088449797685059e-06, "logits/chosen": -2.883625030517578, "logits/rejected": -2.527247428894043, "logps/chosen": -100.90335845947266, "logps/rejected": -909.1046142578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5911697149276733, "rewards/margins": 8.117376327514648, "rewards/rejected": -8.70854663848877, "step": 40260 }, { "epoch": 0.48, "learning_rate": 3.0874344757770596e-06, "logits/chosen": -2.89353609085083, "logits/rejected": -2.198730945587158, "logps/chosen": -160.56752014160156, "logps/rejected": -1042.233642578125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9463257789611816, "rewards/margins": 9.081735610961914, "rewards/rejected": -10.028061866760254, "step": 40270 }, { "epoch": 0.48, "learning_rate": 3.0864190513046316e-06, "logits/chosen": -2.887012004852295, "logits/rejected": -2.350477695465088, "logps/chosen": -134.27450561523438, "logps/rejected": -959.4217529296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8298087120056152, "rewards/margins": 8.363468170166016, "rewards/rejected": -9.193277359008789, "step": 40280 }, { "epoch": 0.48, "learning_rate": 3.085403524445066e-06, "logits/chosen": -2.8453688621520996, "logits/rejected": -2.2772536277770996, "logps/chosen": -132.19912719726562, "logps/rejected": -1007.8045043945312, "loss": 0.16, "rewards/accuracies": 1.0, "rewards/chosen": -0.8429725766181946, "rewards/margins": 8.838179588317871, "rewards/rejected": -9.68115234375, "step": 40290 }, { "epoch": 0.48, "learning_rate": 3.0843878953756713e-06, "logits/chosen": -2.886094570159912, "logits/rejected": -2.304121255874634, "logps/chosen": -129.40457153320312, "logps/rejected": -1023.6793823242188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8049567341804504, "rewards/margins": 9.048234939575195, "rewards/rejected": -9.853192329406738, "step": 40300 }, { "epoch": 0.48, "learning_rate": 3.0833721642737723e-06, "logits/chosen": -2.8491029739379883, "logits/rejected": -2.2441115379333496, "logps/chosen": -128.15101623535156, "logps/rejected": -955.9329223632812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7814013957977295, "rewards/margins": 8.386310577392578, "rewards/rejected": -9.16771125793457, "step": 40310 }, { "epoch": 0.48, "learning_rate": 3.082356331316714e-06, "logits/chosen": -2.8663387298583984, "logits/rejected": -2.350844144821167, "logps/chosen": -108.11592864990234, "logps/rejected": -927.2449340820312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6796005964279175, "rewards/margins": 8.217287063598633, "rewards/rejected": -8.89688777923584, "step": 40320 }, { "epoch": 0.48, "learning_rate": 3.081340396681858e-06, "logits/chosen": -2.859884262084961, "logits/rejected": -2.443881034851074, "logps/chosen": -123.4979476928711, "logps/rejected": -917.9967651367188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8065655827522278, "rewards/margins": 7.990392208099365, "rewards/rejected": -8.796957969665527, "step": 40330 }, { "epoch": 0.48, "learning_rate": 3.0803243605465834e-06, "logits/chosen": -2.848527431488037, "logits/rejected": -2.290194034576416, "logps/chosen": -128.3804473876953, "logps/rejected": -1022.2902221679688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8085096478462219, "rewards/margins": 9.006604194641113, "rewards/rejected": -9.81511402130127, "step": 40340 }, { "epoch": 0.48, "learning_rate": 3.0793082230882864e-06, "logits/chosen": -2.8891029357910156, "logits/rejected": -2.266232490539551, "logps/chosen": -134.3442840576172, "logps/rejected": -1017.8001708984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8042021989822388, "rewards/margins": 8.969118118286133, "rewards/rejected": -9.773320198059082, "step": 40350 }, { "epoch": 0.48, "learning_rate": 3.0782919844843834e-06, "logits/chosen": -2.844836711883545, "logits/rejected": -2.284237861633301, "logps/chosen": -116.66031646728516, "logps/rejected": -950.9710083007812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7249956727027893, "rewards/margins": 8.404356956481934, "rewards/rejected": -9.129354476928711, "step": 40360 }, { "epoch": 0.48, "learning_rate": 3.077275644912306e-06, "logits/chosen": -2.857020616531372, "logits/rejected": -2.285228967666626, "logps/chosen": -133.90869140625, "logps/rejected": -983.1187744140625, "loss": 0.0222, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8671789169311523, "rewards/margins": 8.575403213500977, "rewards/rejected": -9.442583084106445, "step": 40370 }, { "epoch": 0.48, "learning_rate": 3.0762592045495045e-06, "logits/chosen": -2.864211320877075, "logits/rejected": -2.2685940265655518, "logps/chosen": -145.2040252685547, "logps/rejected": -971.1726684570312, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.9949928522109985, "rewards/margins": 8.317804336547852, "rewards/rejected": -9.312796592712402, "step": 40380 }, { "epoch": 0.48, "learning_rate": 3.0752426635734456e-06, "logits/chosen": -2.8577511310577393, "logits/rejected": -2.5656299591064453, "logps/chosen": -137.05711364746094, "logps/rejected": -844.8361206054688, "loss": 0.1653, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9625255465507507, "rewards/margins": 7.111957550048828, "rewards/rejected": -8.074482917785645, "step": 40390 }, { "epoch": 0.48, "learning_rate": 3.074226022161616e-06, "logits/chosen": -2.858342170715332, "logits/rejected": -2.4477319717407227, "logps/chosen": -129.15884399414062, "logps/rejected": -783.0576782226562, "loss": 0.1658, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8921300768852234, "rewards/margins": 6.5652055740356445, "rewards/rejected": -7.457335472106934, "step": 40400 }, { "epoch": 0.48, "learning_rate": 3.073209280491517e-06, "logits/chosen": -2.879735231399536, "logits/rejected": -2.2256691455841064, "logps/chosen": -165.2672576904297, "logps/rejected": -1008.7355346679688, "loss": 0.0962, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1589157581329346, "rewards/margins": 8.529712677001953, "rewards/rejected": -9.688628196716309, "step": 40410 }, { "epoch": 0.48, "learning_rate": 3.0721924387406705e-06, "logits/chosen": -2.8635544776916504, "logits/rejected": -2.212435245513916, "logps/chosen": -154.44210815429688, "logps/rejected": -961.470703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.033355951309204, "rewards/margins": 8.200065612792969, "rewards/rejected": -9.233421325683594, "step": 40420 }, { "epoch": 0.48, "learning_rate": 3.0711754970866124e-06, "logits/chosen": -2.814483404159546, "logits/rejected": -2.0818867683410645, "logps/chosen": -174.0628662109375, "logps/rejected": -1041.773681640625, "loss": 0.0783, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2245399951934814, "rewards/margins": 8.795114517211914, "rewards/rejected": -10.019655227661133, "step": 40430 }, { "epoch": 0.48, "learning_rate": 3.0701584557069e-06, "logits/chosen": -2.9302945137023926, "logits/rejected": -2.576442241668701, "logps/chosen": -136.0111541748047, "logps/rejected": -847.8326416015625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.9268911480903625, "rewards/margins": 7.1593499183654785, "rewards/rejected": -8.086240768432617, "step": 40440 }, { "epoch": 0.48, "learning_rate": 3.069141314779104e-06, "logits/chosen": -2.9051525592803955, "logits/rejected": -2.3652491569519043, "logps/chosen": -126.02889251708984, "logps/rejected": -942.1611328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7573686242103577, "rewards/margins": 8.280942916870117, "rewards/rejected": -9.038312911987305, "step": 40450 }, { "epoch": 0.48, "learning_rate": 3.0681240744808155e-06, "logits/chosen": -2.85866117477417, "logits/rejected": -2.1833529472351074, "logps/chosen": -157.62539672851562, "logps/rejected": -886.1641845703125, "loss": 0.1361, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.074965000152588, "rewards/margins": 7.4008588790893555, "rewards/rejected": -8.475824356079102, "step": 40460 }, { "epoch": 0.48, "learning_rate": 3.0671067349896414e-06, "logits/chosen": -2.9049525260925293, "logits/rejected": -2.4635064601898193, "logps/chosen": -93.25125885009766, "logps/rejected": -872.3060302734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5042322874069214, "rewards/margins": 7.846030235290527, "rewards/rejected": -8.350262641906738, "step": 40470 }, { "epoch": 0.48, "learning_rate": 3.0660892964832063e-06, "logits/chosen": -2.8567094802856445, "logits/rejected": -2.3490407466888428, "logps/chosen": -120.60008239746094, "logps/rejected": -865.7765502929688, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.7051612138748169, "rewards/margins": 7.53530216217041, "rewards/rejected": -8.240462303161621, "step": 40480 }, { "epoch": 0.48, "learning_rate": 3.0650717591391525e-06, "logits/chosen": -2.85884952545166, "logits/rejected": -2.1521718502044678, "logps/chosen": -153.60467529296875, "logps/rejected": -1026.5003662109375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0588277578353882, "rewards/margins": 8.799504280090332, "rewards/rejected": -9.858332633972168, "step": 40490 }, { "epoch": 0.48, "learning_rate": 3.0640541231351396e-06, "logits/chosen": -2.8063087463378906, "logits/rejected": -2.2508037090301514, "logps/chosen": -131.93357849121094, "logps/rejected": -1030.438720703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8734769821166992, "rewards/margins": 9.021103858947754, "rewards/rejected": -9.89458179473877, "step": 40500 }, { "epoch": 0.48, "learning_rate": 3.0630363886488425e-06, "logits/chosen": -2.8520493507385254, "logits/rejected": -2.3950729370117188, "logps/chosen": -108.89466857910156, "logps/rejected": -836.8155517578125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.6554235816001892, "rewards/margins": 7.3412981033325195, "rewards/rejected": -7.996720790863037, "step": 40510 }, { "epoch": 0.49, "learning_rate": 3.0620185558579556e-06, "logits/chosen": -2.88016676902771, "logits/rejected": -2.4926228523254395, "logps/chosen": -131.96926879882812, "logps/rejected": -941.8318481445312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9008775949478149, "rewards/margins": 8.123581886291504, "rewards/rejected": -9.024458885192871, "step": 40520 }, { "epoch": 0.49, "learning_rate": 3.06100062494019e-06, "logits/chosen": -2.8412413597106934, "logits/rejected": -2.4445958137512207, "logps/chosen": -112.544189453125, "logps/rejected": -823.3718872070312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6863248348236084, "rewards/margins": 7.164790153503418, "rewards/rejected": -7.8511152267456055, "step": 40530 }, { "epoch": 0.49, "learning_rate": 3.0599825960732738e-06, "logits/chosen": -2.871368408203125, "logits/rejected": -2.343414783477783, "logps/chosen": -147.40377807617188, "logps/rejected": -851.9412841796875, "loss": 0.1161, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0044552087783813, "rewards/margins": 7.134436130523682, "rewards/rejected": -8.138890266418457, "step": 40540 }, { "epoch": 0.49, "learning_rate": 3.0589644694349514e-06, "logits/chosen": -2.8895351886749268, "logits/rejected": -2.520014762878418, "logps/chosen": -114.09393310546875, "logps/rejected": -954.7579956054688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7006633877754211, "rewards/margins": 8.453529357910156, "rewards/rejected": -9.154192924499512, "step": 40550 }, { "epoch": 0.49, "learning_rate": 3.057946245202985e-06, "logits/chosen": -2.817511558532715, "logits/rejected": -2.2404210567474365, "logps/chosen": -119.1873550415039, "logps/rejected": -980.7691650390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7691472768783569, "rewards/margins": 8.644603729248047, "rewards/rejected": -9.413751602172852, "step": 40560 }, { "epoch": 0.49, "learning_rate": 3.056927923555154e-06, "logits/chosen": -2.8516952991485596, "logits/rejected": -2.3144097328186035, "logps/chosen": -150.09275817871094, "logps/rejected": -945.5909423828125, "loss": 0.0683, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0620018243789673, "rewards/margins": 8.000494956970215, "rewards/rejected": -9.06249713897705, "step": 40570 }, { "epoch": 0.49, "learning_rate": 3.055909504669254e-06, "logits/chosen": -2.8606536388397217, "logits/rejected": -2.3196749687194824, "logps/chosen": -142.82473754882812, "logps/rejected": -998.3361206054688, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.972525954246521, "rewards/margins": 8.612702369689941, "rewards/rejected": -9.585227966308594, "step": 40580 }, { "epoch": 0.49, "learning_rate": 3.0548909887230994e-06, "logits/chosen": -2.846381664276123, "logits/rejected": -2.2712948322296143, "logps/chosen": -138.00732421875, "logps/rejected": -977.6062622070312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9285805821418762, "rewards/margins": 8.442197799682617, "rewards/rejected": -9.370779037475586, "step": 40590 }, { "epoch": 0.49, "learning_rate": 3.0538723758945184e-06, "logits/chosen": -2.8375072479248047, "logits/rejected": -2.2909748554229736, "logps/chosen": -133.6136474609375, "logps/rejected": -933.6142578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8837841153144836, "rewards/margins": 8.0653657913208, "rewards/rejected": -8.949150085449219, "step": 40600 }, { "epoch": 0.49, "learning_rate": 3.0528536663613585e-06, "logits/chosen": -2.9185433387756348, "logits/rejected": -2.600667715072632, "logps/chosen": -99.45135498046875, "logps/rejected": -863.1331787109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5944146513938904, "rewards/margins": 7.676963806152344, "rewards/rejected": -8.271378517150879, "step": 40610 }, { "epoch": 0.49, "learning_rate": 3.051834860301484e-06, "logits/chosen": -2.865281820297241, "logits/rejected": -1.900408148765564, "logps/chosen": -183.21246337890625, "logps/rejected": -1116.5576171875, "loss": 0.1569, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2916494607925415, "rewards/margins": 9.4744291305542, "rewards/rejected": -10.766079902648926, "step": 40620 }, { "epoch": 0.49, "learning_rate": 3.0508159578927764e-06, "logits/chosen": -2.8375649452209473, "logits/rejected": -2.310776710510254, "logps/chosen": -113.53791809082031, "logps/rejected": -907.6793823242188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.685112714767456, "rewards/margins": 8.003865242004395, "rewards/rejected": -8.688976287841797, "step": 40630 }, { "epoch": 0.49, "learning_rate": 3.0497969593131317e-06, "logits/chosen": -2.8859362602233887, "logits/rejected": -2.101442813873291, "logps/chosen": -173.18544006347656, "logps/rejected": -1085.526123046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.163927435874939, "rewards/margins": 9.2783203125, "rewards/rejected": -10.44224739074707, "step": 40640 }, { "epoch": 0.49, "learning_rate": 3.0487778647404653e-06, "logits/chosen": -2.851226329803467, "logits/rejected": -2.393444538116455, "logps/chosen": -101.7668228149414, "logps/rejected": -845.0349731445312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6147980690002441, "rewards/margins": 7.455562591552734, "rewards/rejected": -8.07036018371582, "step": 40650 }, { "epoch": 0.49, "learning_rate": 3.0477586743527076e-06, "logits/chosen": -2.8582024574279785, "logits/rejected": -2.4264349937438965, "logps/chosen": -121.2823257446289, "logps/rejected": -906.8746337890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7648338675498962, "rewards/margins": 7.908563137054443, "rewards/rejected": -8.673396110534668, "step": 40660 }, { "epoch": 0.49, "learning_rate": 3.0467393883278074e-06, "logits/chosen": -2.8242712020874023, "logits/rejected": -2.295184373855591, "logps/chosen": -137.03717041015625, "logps/rejected": -955.42236328125, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": -0.9324377775192261, "rewards/margins": 8.244138717651367, "rewards/rejected": -9.176576614379883, "step": 40670 }, { "epoch": 0.49, "learning_rate": 3.0457200068437277e-06, "logits/chosen": -2.8887720108032227, "logits/rejected": -2.5077011585235596, "logps/chosen": -120.3170166015625, "logps/rejected": -884.234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7720333337783813, "rewards/margins": 7.680296421051025, "rewards/rejected": -8.4523286819458, "step": 40680 }, { "epoch": 0.49, "learning_rate": 3.044700530078451e-06, "logits/chosen": -2.8417863845825195, "logits/rejected": -2.1128792762756348, "logps/chosen": -154.24462890625, "logps/rejected": -1138.384033203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0169073343276978, "rewards/margins": 9.9417724609375, "rewards/rejected": -10.958680152893066, "step": 40690 }, { "epoch": 0.49, "learning_rate": 3.0436809582099743e-06, "logits/chosen": -2.8665847778320312, "logits/rejected": -2.0377490520477295, "logps/chosen": -160.64288330078125, "logps/rejected": -1078.55224609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.035942554473877, "rewards/margins": 9.357168197631836, "rewards/rejected": -10.393110275268555, "step": 40700 }, { "epoch": 0.49, "learning_rate": 3.0426612914163124e-06, "logits/chosen": -2.8552041053771973, "logits/rejected": -2.2298293113708496, "logps/chosen": -121.9701919555664, "logps/rejected": -966.7138671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7765935659408569, "rewards/margins": 8.498388290405273, "rewards/rejected": -9.274980545043945, "step": 40710 }, { "epoch": 0.49, "learning_rate": 3.0416415298754962e-06, "logits/chosen": -2.895089864730835, "logits/rejected": -2.27766752243042, "logps/chosen": -128.53860473632812, "logps/rejected": -938.6735229492188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8119398951530457, "rewards/margins": 8.175932884216309, "rewards/rejected": -8.987874031066895, "step": 40720 }, { "epoch": 0.49, "learning_rate": 3.040621673765573e-06, "logits/chosen": -2.9200873374938965, "logits/rejected": -2.197251796722412, "logps/chosen": -135.294921875, "logps/rejected": -1049.461669921875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.8375388383865356, "rewards/margins": 9.252429962158203, "rewards/rejected": -10.08996868133545, "step": 40730 }, { "epoch": 0.49, "learning_rate": 3.0396017232646073e-06, "logits/chosen": -2.890105724334717, "logits/rejected": -2.228100538253784, "logps/chosen": -148.17538452148438, "logps/rejected": -1002.7830200195312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9829796552658081, "rewards/margins": 8.648077011108398, "rewards/rejected": -9.63105583190918, "step": 40740 }, { "epoch": 0.49, "learning_rate": 3.0385816785506792e-06, "logits/chosen": -2.8724312782287598, "logits/rejected": -1.9630836248397827, "logps/chosen": -171.0369873046875, "logps/rejected": -1104.502197265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1637805700302124, "rewards/margins": 9.47104549407959, "rewards/rejected": -10.634825706481934, "step": 40750 }, { "epoch": 0.49, "learning_rate": 3.0375615398018864e-06, "logits/chosen": -2.837472438812256, "logits/rejected": -2.4258198738098145, "logps/chosen": -114.36662292480469, "logps/rejected": -877.8318481445312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7339057326316833, "rewards/margins": 7.660697937011719, "rewards/rejected": -8.39460277557373, "step": 40760 }, { "epoch": 0.49, "learning_rate": 3.036541307196341e-06, "logits/chosen": -2.8392891883850098, "logits/rejected": -2.272036075592041, "logps/chosen": -160.39663696289062, "logps/rejected": -905.7637939453125, "loss": 0.1053, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.087186336517334, "rewards/margins": 7.575888156890869, "rewards/rejected": -8.66307544708252, "step": 40770 }, { "epoch": 0.49, "learning_rate": 3.035520980912174e-06, "logits/chosen": -2.8219754695892334, "logits/rejected": -2.118436813354492, "logps/chosen": -136.11082458496094, "logps/rejected": -998.87109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8691538572311401, "rewards/margins": 8.722683906555176, "rewards/rejected": -9.591837882995605, "step": 40780 }, { "epoch": 0.49, "learning_rate": 3.034500561127531e-06, "logits/chosen": -2.856229305267334, "logits/rejected": -2.3898048400878906, "logps/chosen": -114.73591613769531, "logps/rejected": -1006.2918090820312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7034280896186829, "rewards/margins": 8.957483291625977, "rewards/rejected": -9.660911560058594, "step": 40790 }, { "epoch": 0.49, "learning_rate": 3.033480048020575e-06, "logits/chosen": -2.875345468521118, "logits/rejected": -2.436068058013916, "logps/chosen": -119.90214538574219, "logps/rejected": -895.3720703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7633280158042908, "rewards/margins": 7.7986884117126465, "rewards/rejected": -8.562015533447266, "step": 40800 }, { "epoch": 0.49, "learning_rate": 3.0324594417694836e-06, "logits/chosen": -2.8775367736816406, "logits/rejected": -2.674838066101074, "logps/chosen": -78.53042602539062, "logps/rejected": -746.4503173828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.42887720465660095, "rewards/margins": 6.681785583496094, "rewards/rejected": -7.110662937164307, "step": 40810 }, { "epoch": 0.49, "learning_rate": 3.031438742552453e-06, "logits/chosen": -2.8626019954681396, "logits/rejected": -2.189910888671875, "logps/chosen": -137.88815307617188, "logps/rejected": -1011.9432373046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9132882356643677, "rewards/margins": 8.803974151611328, "rewards/rejected": -9.717263221740723, "step": 40820 }, { "epoch": 0.49, "learning_rate": 3.030417950547694e-06, "logits/chosen": -2.869297742843628, "logits/rejected": -2.1266074180603027, "logps/chosen": -150.73890686035156, "logps/rejected": -1019.2088012695312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9869253039360046, "rewards/margins": 8.820610046386719, "rewards/rejected": -9.807535171508789, "step": 40830 }, { "epoch": 0.49, "learning_rate": 3.0293970659334333e-06, "logits/chosen": -2.8321938514709473, "logits/rejected": -2.252892255783081, "logps/chosen": -130.18035888671875, "logps/rejected": -886.11181640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8174501657485962, "rewards/margins": 7.6616034507751465, "rewards/rejected": -8.479053497314453, "step": 40840 }, { "epoch": 0.49, "learning_rate": 3.0283760888879156e-06, "logits/chosen": -2.921273708343506, "logits/rejected": -2.4584028720855713, "logps/chosen": -107.85789489746094, "logps/rejected": -919.61181640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6581001281738281, "rewards/margins": 8.165040969848633, "rewards/rejected": -8.823141098022461, "step": 40850 }, { "epoch": 0.49, "learning_rate": 3.0273550195894e-06, "logits/chosen": -2.8114590644836426, "logits/rejected": -2.14046049118042, "logps/chosen": -124.001953125, "logps/rejected": -935.8834838867188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7876402139663696, "rewards/margins": 8.171032905578613, "rewards/rejected": -8.958673477172852, "step": 40860 }, { "epoch": 0.49, "learning_rate": 3.0263338582161627e-06, "logits/chosen": -2.8370048999786377, "logits/rejected": -2.4136900901794434, "logps/chosen": -136.8712921142578, "logps/rejected": -849.0738525390625, "loss": 0.0691, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9455030560493469, "rewards/margins": 7.167639255523682, "rewards/rejected": -8.113142013549805, "step": 40870 }, { "epoch": 0.49, "learning_rate": 3.0253126049464967e-06, "logits/chosen": -2.8692328929901123, "logits/rejected": -2.4164199829101562, "logps/chosen": -104.64665222167969, "logps/rejected": -887.3726806640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6365005373954773, "rewards/margins": 7.867271423339844, "rewards/rejected": -8.50377368927002, "step": 40880 }, { "epoch": 0.49, "learning_rate": 3.0242912599587076e-06, "logits/chosen": -2.8613791465759277, "logits/rejected": -2.4811184406280518, "logps/chosen": -99.96186828613281, "logps/rejected": -813.7516479492188, "loss": 0.0184, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6024389266967773, "rewards/margins": 7.152321815490723, "rewards/rejected": -7.754761695861816, "step": 40890 }, { "epoch": 0.49, "learning_rate": 3.023269823431121e-06, "logits/chosen": -2.8579611778259277, "logits/rejected": -2.443110942840576, "logps/chosen": -126.77961730957031, "logps/rejected": -891.9693603515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8078278303146362, "rewards/margins": 7.722739219665527, "rewards/rejected": -8.530567169189453, "step": 40900 }, { "epoch": 0.49, "learning_rate": 3.022248295542077e-06, "logits/chosen": -2.8883605003356934, "logits/rejected": -2.4191529750823975, "logps/chosen": -137.96751403808594, "logps/rejected": -917.0338745117188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9128260612487793, "rewards/margins": 7.88326358795166, "rewards/rejected": -8.796089172363281, "step": 40910 }, { "epoch": 0.49, "learning_rate": 3.0212266764699307e-06, "logits/chosen": -2.864948034286499, "logits/rejected": -2.3820769786834717, "logps/chosen": -113.82655334472656, "logps/rejected": -982.1744384765625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7266505360603333, "rewards/margins": 8.6965913772583, "rewards/rejected": -9.42324161529541, "step": 40920 }, { "epoch": 0.49, "learning_rate": 3.0202049663930537e-06, "logits/chosen": -2.8756425380706787, "logits/rejected": -2.3876826763153076, "logps/chosen": -112.752685546875, "logps/rejected": -967.4010009765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6855894327163696, "rewards/margins": 8.610010147094727, "rewards/rejected": -9.295598983764648, "step": 40930 }, { "epoch": 0.49, "learning_rate": 3.019183165489835e-06, "logits/chosen": -2.888251543045044, "logits/rejected": -2.3675696849823, "logps/chosen": -117.9036865234375, "logps/rejected": -960.9072265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7354979515075684, "rewards/margins": 8.488811492919922, "rewards/rejected": -9.224309921264648, "step": 40940 }, { "epoch": 0.49, "learning_rate": 3.0181612739386783e-06, "logits/chosen": -2.8356566429138184, "logits/rejected": -2.2049460411071777, "logps/chosen": -175.03982543945312, "logps/rejected": -925.09375, "loss": 0.0777, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2195966243743896, "rewards/margins": 7.6256866455078125, "rewards/rejected": -8.845281600952148, "step": 40950 }, { "epoch": 0.49, "learning_rate": 3.0171392919180014e-06, "logits/chosen": -2.834665298461914, "logits/rejected": -2.3966376781463623, "logps/chosen": -107.3597640991211, "logps/rejected": -944.7669677734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.640462338924408, "rewards/margins": 8.427995681762695, "rewards/rejected": -9.068456649780273, "step": 40960 }, { "epoch": 0.49, "learning_rate": 3.0161172196062407e-06, "logits/chosen": -2.8738160133361816, "logits/rejected": -2.3570590019226074, "logps/chosen": -118.42927551269531, "logps/rejected": -906.9837036132812, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.751390814781189, "rewards/margins": 7.937554359436035, "rewards/rejected": -8.688944816589355, "step": 40970 }, { "epoch": 0.49, "learning_rate": 3.0150950571818473e-06, "logits/chosen": -2.9096598625183105, "logits/rejected": -2.337559223175049, "logps/chosen": -206.78573608398438, "logps/rejected": -912.1154174804688, "loss": 0.1341, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5055917501449585, "rewards/margins": 7.2246294021606445, "rewards/rejected": -8.730220794677734, "step": 40980 }, { "epoch": 0.49, "learning_rate": 3.0140728048232865e-06, "logits/chosen": -2.845179319381714, "logits/rejected": -2.334623336791992, "logps/chosen": -135.8502960205078, "logps/rejected": -987.0008544921875, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": -0.8931536674499512, "rewards/margins": 8.589895248413086, "rewards/rejected": -9.483049392700195, "step": 40990 }, { "epoch": 0.49, "learning_rate": 3.0130504627090425e-06, "logits/chosen": -2.82080078125, "logits/rejected": -2.5829384326934814, "logps/chosen": -84.75005340576172, "logps/rejected": -767.3696899414062, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4968138337135315, "rewards/margins": 6.810273170471191, "rewards/rejected": -7.307086944580078, "step": 41000 }, { "epoch": 0.49, "learning_rate": 3.0120280310176126e-06, "logits/chosen": -2.820117473602295, "logits/rejected": -2.3364033699035645, "logps/chosen": -124.0632095336914, "logps/rejected": -943.9197387695312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.786409854888916, "rewards/margins": 8.266572952270508, "rewards/rejected": -9.052983283996582, "step": 41010 }, { "epoch": 0.49, "learning_rate": 3.0110055099275104e-06, "logits/chosen": -2.824516773223877, "logits/rejected": -2.1098859310150146, "logps/chosen": -177.07199096679688, "logps/rejected": -1156.276123046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2391252517700195, "rewards/margins": 9.917610168457031, "rewards/rejected": -11.156734466552734, "step": 41020 }, { "epoch": 0.49, "learning_rate": 3.0099828996172654e-06, "logits/chosen": -2.841189384460449, "logits/rejected": -2.396393299102783, "logps/chosen": -140.34657287597656, "logps/rejected": -1019.3298950195312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9551149606704712, "rewards/margins": 8.85020637512207, "rewards/rejected": -9.805319786071777, "step": 41030 }, { "epoch": 0.49, "learning_rate": 3.0089602002654224e-06, "logits/chosen": -2.8829498291015625, "logits/rejected": -2.1805598735809326, "logps/chosen": -150.16079711914062, "logps/rejected": -912.1214599609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9689191579818726, "rewards/margins": 7.767147064208984, "rewards/rejected": -8.736065864562988, "step": 41040 }, { "epoch": 0.49, "learning_rate": 3.0079374120505427e-06, "logits/chosen": -2.800351858139038, "logits/rejected": -1.970587134361267, "logps/chosen": -173.48587036132812, "logps/rejected": -1116.4149169921875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1857709884643555, "rewards/margins": 9.566943168640137, "rewards/rejected": -10.752714157104492, "step": 41050 }, { "epoch": 0.49, "learning_rate": 3.0069145351512007e-06, "logits/chosen": -2.784496784210205, "logits/rejected": -2.095949172973633, "logps/chosen": -152.20982360839844, "logps/rejected": -1129.59326171875, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -1.0513007640838623, "rewards/margins": 9.848540306091309, "rewards/rejected": -10.899840354919434, "step": 41060 }, { "epoch": 0.49, "learning_rate": 3.0058915697459897e-06, "logits/chosen": -2.7794299125671387, "logits/rejected": -2.0292232036590576, "logps/chosen": -165.56178283691406, "logps/rejected": -1102.91650390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.118927001953125, "rewards/margins": 9.510871887207031, "rewards/rejected": -10.62979793548584, "step": 41070 }, { "epoch": 0.49, "learning_rate": 3.0048685160135148e-06, "logits/chosen": -2.8003151416778564, "logits/rejected": -1.905822992324829, "logps/chosen": -164.3187255859375, "logps/rejected": -1109.755126953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0466572046279907, "rewards/margins": 9.636335372924805, "rewards/rejected": -10.682992935180664, "step": 41080 }, { "epoch": 0.49, "learning_rate": 3.0038453741323993e-06, "logits/chosen": -2.8189356327056885, "logits/rejected": -2.2727019786834717, "logps/chosen": -143.00669860839844, "logps/rejected": -975.9396362304688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9472231864929199, "rewards/margins": 8.412187576293945, "rewards/rejected": -9.359410285949707, "step": 41090 }, { "epoch": 0.49, "learning_rate": 3.002822144281281e-06, "logits/chosen": -2.864546298980713, "logits/rejected": -2.4445908069610596, "logps/chosen": -129.09542846679688, "logps/rejected": -968.3610229492188, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.8444811701774597, "rewards/margins": 8.469938278198242, "rewards/rejected": -9.31441879272461, "step": 41100 }, { "epoch": 0.49, "learning_rate": 3.001798826638813e-06, "logits/chosen": -2.8642659187316895, "logits/rejected": -2.6819324493408203, "logps/chosen": -93.15373992919922, "logps/rejected": -836.1686401367188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5958638191223145, "rewards/margins": 7.412384033203125, "rewards/rejected": -8.008247375488281, "step": 41110 }, { "epoch": 0.49, "learning_rate": 3.000775421383664e-06, "logits/chosen": -2.873906373977661, "logits/rejected": -2.3757176399230957, "logps/chosen": -103.82596588134766, "logps/rejected": -909.9478759765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6487157344818115, "rewards/margins": 8.070582389831543, "rewards/rejected": -8.7192964553833, "step": 41120 }, { "epoch": 0.49, "learning_rate": 2.9997519286945164e-06, "logits/chosen": -2.8311448097229004, "logits/rejected": -1.9076673984527588, "logps/chosen": -219.885009765625, "logps/rejected": -1281.8546142578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.6446670293807983, "rewards/margins": 10.766424179077148, "rewards/rejected": -12.411089897155762, "step": 41130 }, { "epoch": 0.49, "learning_rate": 2.9987283487500707e-06, "logits/chosen": -2.8770604133605957, "logits/rejected": -2.498070001602173, "logps/chosen": -113.60420989990234, "logps/rejected": -898.3328247070312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.739896297454834, "rewards/margins": 7.8749680519104, "rewards/rejected": -8.614863395690918, "step": 41140 }, { "epoch": 0.49, "learning_rate": 2.99770468172904e-06, "logits/chosen": -2.808530807495117, "logits/rejected": -2.2928850650787354, "logps/chosen": -158.36187744140625, "logps/rejected": -979.0367431640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1208014488220215, "rewards/margins": 8.290545463562012, "rewards/rejected": -9.411348342895508, "step": 41150 }, { "epoch": 0.49, "learning_rate": 2.9966809278101544e-06, "logits/chosen": -2.908705234527588, "logits/rejected": -2.2616209983825684, "logps/chosen": -140.51596069335938, "logps/rejected": -974.3199462890625, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.9699597358703613, "rewards/margins": 8.3782320022583, "rewards/rejected": -9.348191261291504, "step": 41160 }, { "epoch": 0.49, "learning_rate": 2.9956570871721584e-06, "logits/chosen": -2.838057279586792, "logits/rejected": -2.330026865005493, "logps/chosen": -134.01939392089844, "logps/rejected": -994.4320068359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8641765713691711, "rewards/margins": 8.69267749786377, "rewards/rejected": -9.556853294372559, "step": 41170 }, { "epoch": 0.49, "learning_rate": 2.9946331599938117e-06, "logits/chosen": -2.875605344772339, "logits/rejected": -2.397742748260498, "logps/chosen": -140.71664428710938, "logps/rejected": -975.3810424804688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9236105680465698, "rewards/margins": 8.43175983428955, "rewards/rejected": -9.355369567871094, "step": 41180 }, { "epoch": 0.49, "learning_rate": 2.9936091464538885e-06, "logits/chosen": -2.8729355335235596, "logits/rejected": -2.4458422660827637, "logps/chosen": -129.86448669433594, "logps/rejected": -914.2267456054688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.856694221496582, "rewards/margins": 7.907456874847412, "rewards/rejected": -8.764151573181152, "step": 41190 }, { "epoch": 0.49, "learning_rate": 2.9925850467311797e-06, "logits/chosen": -2.8486218452453613, "logits/rejected": -2.2507805824279785, "logps/chosen": -158.85812377929688, "logps/rejected": -921.708984375, "loss": 0.0425, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1590849161148071, "rewards/margins": 7.682137966156006, "rewards/rejected": -8.841222763061523, "step": 41200 }, { "epoch": 0.49, "learning_rate": 2.9915608610044898e-06, "logits/chosen": -2.8732106685638428, "logits/rejected": -2.490483522415161, "logps/chosen": -115.1361083984375, "logps/rejected": -948.5988159179688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7464905977249146, "rewards/margins": 8.352680206298828, "rewards/rejected": -9.099169731140137, "step": 41210 }, { "epoch": 0.49, "learning_rate": 2.990536589452639e-06, "logits/chosen": -2.88769268989563, "logits/rejected": -2.3149666786193848, "logps/chosen": -155.80331420898438, "logps/rejected": -1007.9894409179688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0854657888412476, "rewards/margins": 8.605852127075195, "rewards/rejected": -9.691316604614258, "step": 41220 }, { "epoch": 0.49, "learning_rate": 2.9895122322544612e-06, "logits/chosen": -2.8765177726745605, "logits/rejected": -2.356896162033081, "logps/chosen": -139.03948974609375, "logps/rejected": -1067.847900390625, "loss": 0.3041, "rewards/accuracies": 1.0, "rewards/chosen": -0.9067920446395874, "rewards/margins": 9.372636795043945, "rewards/rejected": -10.27942943572998, "step": 41230 }, { "epoch": 0.49, "learning_rate": 2.9884877895888085e-06, "logits/chosen": -2.836484432220459, "logits/rejected": -2.2408816814422607, "logps/chosen": -151.23959350585938, "logps/rejected": -989.4378662109375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.036791205406189, "rewards/margins": 8.462575912475586, "rewards/rejected": -9.499366760253906, "step": 41240 }, { "epoch": 0.49, "learning_rate": 2.9874632616345435e-06, "logits/chosen": -2.8557236194610596, "logits/rejected": -2.1177902221679688, "logps/chosen": -166.5412139892578, "logps/rejected": -1087.80224609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0877654552459717, "rewards/margins": 9.382636070251465, "rewards/rejected": -10.4704008102417, "step": 41250 }, { "epoch": 0.49, "learning_rate": 2.986438648570547e-06, "logits/chosen": -2.8319272994995117, "logits/rejected": -2.24762225151062, "logps/chosen": -143.8888397216797, "logps/rejected": -978.6516723632812, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.9461467862129211, "rewards/margins": 8.45515251159668, "rewards/rejected": -9.401300430297852, "step": 41260 }, { "epoch": 0.49, "learning_rate": 2.985413950575714e-06, "logits/chosen": -2.8997278213500977, "logits/rejected": -2.3119990825653076, "logps/chosen": -152.19981384277344, "logps/rejected": -1021.4207153320312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.067381501197815, "rewards/margins": 8.756425857543945, "rewards/rejected": -9.823808670043945, "step": 41270 }, { "epoch": 0.49, "learning_rate": 2.9843891678289526e-06, "logits/chosen": -2.8857569694519043, "logits/rejected": -2.2316396236419678, "logps/chosen": -142.93695068359375, "logps/rejected": -1050.2105712890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9456119537353516, "rewards/margins": 9.167841911315918, "rewards/rejected": -10.113454818725586, "step": 41280 }, { "epoch": 0.49, "learning_rate": 2.9833643005091885e-06, "logits/chosen": -2.838683605194092, "logits/rejected": -2.1500346660614014, "logps/chosen": -179.6587677001953, "logps/rejected": -1099.7440185546875, "loss": 0.1266, "rewards/accuracies": 1.0, "rewards/chosen": -1.289250373840332, "rewards/margins": 9.308027267456055, "rewards/rejected": -10.59727668762207, "step": 41290 }, { "epoch": 0.49, "learning_rate": 2.98233934879536e-06, "logits/chosen": -2.8567616939544678, "logits/rejected": -2.4671130180358887, "logps/chosen": -108.27262878417969, "logps/rejected": -894.650390625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6761083006858826, "rewards/margins": 7.894402980804443, "rewards/rejected": -8.570510864257812, "step": 41300 }, { "epoch": 0.49, "learning_rate": 2.9813143128664207e-06, "logits/chosen": -2.8566176891326904, "logits/rejected": -2.4341654777526855, "logps/chosen": -114.27192687988281, "logps/rejected": -952.36083984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7185840606689453, "rewards/margins": 8.4082612991333, "rewards/rejected": -9.126845359802246, "step": 41310 }, { "epoch": 0.49, "learning_rate": 2.980289192901339e-06, "logits/chosen": -2.8122594356536865, "logits/rejected": -2.4015822410583496, "logps/chosen": -106.8113021850586, "logps/rejected": -885.5930786132812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6394336223602295, "rewards/margins": 7.833498477935791, "rewards/rejected": -8.472932815551758, "step": 41320 }, { "epoch": 0.49, "learning_rate": 2.9792639890790986e-06, "logits/chosen": -2.89357590675354, "logits/rejected": -2.465427875518799, "logps/chosen": -121.3507308959961, "logps/rejected": -996.001953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7690643072128296, "rewards/margins": 8.799559593200684, "rewards/rejected": -9.568624496459961, "step": 41330 }, { "epoch": 0.49, "learning_rate": 2.978238701578696e-06, "logits/chosen": -2.8062856197357178, "logits/rejected": -2.077111005783081, "logps/chosen": -166.80044555664062, "logps/rejected": -1096.796142578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1367467641830444, "rewards/margins": 9.429625511169434, "rewards/rejected": -10.56637191772461, "step": 41340 }, { "epoch": 0.49, "learning_rate": 2.977213330579145e-06, "logits/chosen": -2.827932119369507, "logits/rejected": -2.2851614952087402, "logps/chosen": -156.14382934570312, "logps/rejected": -1043.7646484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1028329133987427, "rewards/margins": 8.947687149047852, "rewards/rejected": -10.050520896911621, "step": 41350 }, { "epoch": 0.5, "learning_rate": 2.9761878762594717e-06, "logits/chosen": -2.8698787689208984, "logits/rejected": -2.6202526092529297, "logps/chosen": -89.22371673583984, "logps/rejected": -848.0421752929688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5106884241104126, "rewards/margins": 7.59198522567749, "rewards/rejected": -8.10267448425293, "step": 41360 }, { "epoch": 0.5, "learning_rate": 2.9751623387987185e-06, "logits/chosen": -2.8432199954986572, "logits/rejected": -2.4146080017089844, "logps/chosen": -140.27993774414062, "logps/rejected": -844.2247314453125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9676167368888855, "rewards/margins": 7.10282039642334, "rewards/rejected": -8.070436477661133, "step": 41370 }, { "epoch": 0.5, "learning_rate": 2.9741367183759395e-06, "logits/chosen": -2.82784366607666, "logits/rejected": -2.410457134246826, "logps/chosen": -117.27848815917969, "logps/rejected": -909.31005859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7613162994384766, "rewards/margins": 7.943158149719238, "rewards/rejected": -8.704473495483398, "step": 41380 }, { "epoch": 0.5, "learning_rate": 2.973111015170207e-06, "logits/chosen": -2.8462185859680176, "logits/rejected": -2.345463275909424, "logps/chosen": -113.77203369140625, "logps/rejected": -1023.3976440429688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7103739380836487, "rewards/margins": 9.12901496887207, "rewards/rejected": -9.839387893676758, "step": 41390 }, { "epoch": 0.5, "learning_rate": 2.972085229360605e-06, "logits/chosen": -2.8912723064422607, "logits/rejected": -2.61568021774292, "logps/chosen": -129.03025817871094, "logps/rejected": -864.298828125, "loss": 0.123, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8999068140983582, "rewards/margins": 7.365779876708984, "rewards/rejected": -8.26568603515625, "step": 41400 }, { "epoch": 0.5, "learning_rate": 2.9710593611262334e-06, "logits/chosen": -2.9061763286590576, "logits/rejected": -2.591146469116211, "logps/chosen": -107.8279037475586, "logps/rejected": -894.1730346679688, "loss": 0.1363, "rewards/accuracies": 1.0, "rewards/chosen": -0.653650164604187, "rewards/margins": 7.8998284339904785, "rewards/rejected": -8.553479194641113, "step": 41410 }, { "epoch": 0.5, "learning_rate": 2.9700334106462046e-06, "logits/chosen": -2.855318546295166, "logits/rejected": -2.32348370552063, "logps/chosen": -167.45413208007812, "logps/rejected": -963.1915893554688, "loss": 0.1225, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2192124128341675, "rewards/margins": 8.014144897460938, "rewards/rejected": -9.233357429504395, "step": 41420 }, { "epoch": 0.5, "learning_rate": 2.9690073780996498e-06, "logits/chosen": -2.904387950897217, "logits/rejected": -2.178037166595459, "logps/chosen": -151.82989501953125, "logps/rejected": -1085.0721435546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0270016193389893, "rewards/margins": 9.425265312194824, "rewards/rejected": -10.45226764678955, "step": 41430 }, { "epoch": 0.5, "learning_rate": 2.967981263665708e-06, "logits/chosen": -2.8655619621276855, "logits/rejected": -2.1838297843933105, "logps/chosen": -149.8107452392578, "logps/rejected": -1033.35009765625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.9430648684501648, "rewards/margins": 9.001802444458008, "rewards/rejected": -9.944867134094238, "step": 41440 }, { "epoch": 0.5, "learning_rate": 2.966955067523537e-06, "logits/chosen": -2.893981456756592, "logits/rejected": -2.2859725952148438, "logps/chosen": -173.56655883789062, "logps/rejected": -1077.5064697265625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.2071189880371094, "rewards/margins": 9.15794849395752, "rewards/rejected": -10.365066528320312, "step": 41450 }, { "epoch": 0.5, "learning_rate": 2.9659287898523087e-06, "logits/chosen": -2.8215408325195312, "logits/rejected": -2.1852364540100098, "logps/chosen": -182.3054656982422, "logps/rejected": -1026.073486328125, "loss": 0.0599, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3369100093841553, "rewards/margins": 8.523279190063477, "rewards/rejected": -9.860188484191895, "step": 41460 }, { "epoch": 0.5, "learning_rate": 2.964902430831208e-06, "logits/chosen": -2.7666337490081787, "logits/rejected": -2.1950788497924805, "logps/chosen": -181.48306274414062, "logps/rejected": -1018.4591674804688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3557196855545044, "rewards/margins": 8.448415756225586, "rewards/rejected": -9.804136276245117, "step": 41470 }, { "epoch": 0.5, "learning_rate": 2.9638759906394337e-06, "logits/chosen": -2.7695531845092773, "logits/rejected": -2.2269060611724854, "logps/chosen": -192.29745483398438, "logps/rejected": -1106.8330078125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4733912944793701, "rewards/margins": 9.19933795928955, "rewards/rejected": -10.6727294921875, "step": 41480 }, { "epoch": 0.5, "learning_rate": 2.9628494694561997e-06, "logits/chosen": -2.862382173538208, "logits/rejected": -2.5203969478607178, "logps/chosen": -120.70344543457031, "logps/rejected": -883.7760620117188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8165820240974426, "rewards/margins": 7.645615577697754, "rewards/rejected": -8.462197303771973, "step": 41490 }, { "epoch": 0.5, "learning_rate": 2.961822867460734e-06, "logits/chosen": -2.8276100158691406, "logits/rejected": -2.3341946601867676, "logps/chosen": -150.00555419921875, "logps/rejected": -965.2267456054688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.998176097869873, "rewards/margins": 8.275033950805664, "rewards/rejected": -9.273210525512695, "step": 41500 }, { "epoch": 0.5, "learning_rate": 2.960796184832278e-06, "logits/chosen": -2.830874443054199, "logits/rejected": -2.285115957260132, "logps/chosen": -134.75155639648438, "logps/rejected": -966.7503051757812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9125251770019531, "rewards/margins": 8.376331329345703, "rewards/rejected": -9.288857460021973, "step": 41510 }, { "epoch": 0.5, "learning_rate": 2.9597694217500883e-06, "logits/chosen": -2.7974350452423096, "logits/rejected": -2.195128917694092, "logps/chosen": -150.67642211914062, "logps/rejected": -968.4779052734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0401207208633423, "rewards/margins": 8.265223503112793, "rewards/rejected": -9.305344581604004, "step": 41520 }, { "epoch": 0.5, "learning_rate": 2.9587425783934348e-06, "logits/chosen": -2.8364665508270264, "logits/rejected": -2.340512990951538, "logps/chosen": -125.9188461303711, "logps/rejected": -932.7801513671875, "loss": 0.0204, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7887707948684692, "rewards/margins": 8.144311904907227, "rewards/rejected": -8.933082580566406, "step": 41530 }, { "epoch": 0.5, "learning_rate": 2.9577156549416007e-06, "logits/chosen": -2.899904251098633, "logits/rejected": -2.305891513824463, "logps/chosen": -169.55520629882812, "logps/rejected": -1009.2982177734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.235223650932312, "rewards/margins": 8.464694023132324, "rewards/rejected": -9.699918746948242, "step": 41540 }, { "epoch": 0.5, "learning_rate": 2.9566886515738847e-06, "logits/chosen": -2.8451523780822754, "logits/rejected": -2.483976364135742, "logps/chosen": -129.9611358642578, "logps/rejected": -897.9920043945312, "loss": 0.0759, "rewards/accuracies": 1.0, "rewards/chosen": -0.847606360912323, "rewards/margins": 7.764647483825684, "rewards/rejected": -8.612255096435547, "step": 41550 }, { "epoch": 0.5, "learning_rate": 2.9556615684695987e-06, "logits/chosen": -2.8133366107940674, "logits/rejected": -1.9244937896728516, "logps/chosen": -197.92739868164062, "logps/rejected": -1201.985595703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.453383445739746, "rewards/margins": 10.149224281311035, "rewards/rejected": -11.602606773376465, "step": 41560 }, { "epoch": 0.5, "learning_rate": 2.954634405808068e-06, "logits/chosen": -2.850238561630249, "logits/rejected": -2.5732738971710205, "logps/chosen": -104.80961608886719, "logps/rejected": -898.4974365234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6779561638832092, "rewards/margins": 7.9361066818237305, "rewards/rejected": -8.614062309265137, "step": 41570 }, { "epoch": 0.5, "learning_rate": 2.953607163768633e-06, "logits/chosen": -2.8637537956237793, "logits/rejected": -2.5880987644195557, "logps/chosen": -100.96119689941406, "logps/rejected": -886.7279052734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6291521787643433, "rewards/margins": 7.862941741943359, "rewards/rejected": -8.492093086242676, "step": 41580 }, { "epoch": 0.5, "learning_rate": 2.9525798425306473e-06, "logits/chosen": -2.86112380027771, "logits/rejected": -2.349409341812134, "logps/chosen": -128.9974365234375, "logps/rejected": -942.9474487304688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8634252548217773, "rewards/margins": 8.180943489074707, "rewards/rejected": -9.044368743896484, "step": 41590 }, { "epoch": 0.5, "learning_rate": 2.9515524422734776e-06, "logits/chosen": -2.8230488300323486, "logits/rejected": -2.077518939971924, "logps/chosen": -183.06088256835938, "logps/rejected": -1210.71337890625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2851773500442505, "rewards/margins": 10.418951988220215, "rewards/rejected": -11.704130172729492, "step": 41600 }, { "epoch": 0.5, "learning_rate": 2.9505249631765056e-06, "logits/chosen": -2.854222059249878, "logits/rejected": -2.3375325202941895, "logps/chosen": -151.41635131835938, "logps/rejected": -1073.7958984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.097940444946289, "rewards/margins": 9.230459213256836, "rewards/rejected": -10.328400611877441, "step": 41610 }, { "epoch": 0.5, "learning_rate": 2.949497405419126e-06, "logits/chosen": -2.833346128463745, "logits/rejected": -2.6262216567993164, "logps/chosen": -91.67680358886719, "logps/rejected": -868.2618408203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5472410321235657, "rewards/margins": 7.751858711242676, "rewards/rejected": -8.299099922180176, "step": 41620 }, { "epoch": 0.5, "learning_rate": 2.948469769180749e-06, "logits/chosen": -2.838771343231201, "logits/rejected": -2.142505168914795, "logps/chosen": -163.3514404296875, "logps/rejected": -1059.988525390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.063493013381958, "rewards/margins": 9.138764381408691, "rewards/rejected": -10.202256202697754, "step": 41630 }, { "epoch": 0.5, "learning_rate": 2.947442054640795e-06, "logits/chosen": -2.811474323272705, "logits/rejected": -2.294022798538208, "logps/chosen": -144.0270233154297, "logps/rejected": -1005.1365356445312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0207302570343018, "rewards/margins": 8.642104148864746, "rewards/rejected": -9.662835121154785, "step": 41640 }, { "epoch": 0.5, "learning_rate": 2.946414261978701e-06, "logits/chosen": -2.8065414428710938, "logits/rejected": -2.0629723072052, "logps/chosen": -184.31700134277344, "logps/rejected": -1114.16796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3253767490386963, "rewards/margins": 9.421188354492188, "rewards/rejected": -10.746563911437988, "step": 41650 }, { "epoch": 0.5, "learning_rate": 2.9453863913739177e-06, "logits/chosen": -2.852607011795044, "logits/rejected": -2.4418747425079346, "logps/chosen": -124.2032470703125, "logps/rejected": -932.9522705078125, "loss": 0.1273, "rewards/accuracies": 1.0, "rewards/chosen": -0.8568958044052124, "rewards/margins": 8.087506294250488, "rewards/rejected": -8.944401741027832, "step": 41660 }, { "epoch": 0.5, "learning_rate": 2.944358443005906e-06, "logits/chosen": -2.834577798843384, "logits/rejected": -2.2792277336120605, "logps/chosen": -120.52561950683594, "logps/rejected": -999.54248046875, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.7395012974739075, "rewards/margins": 8.86058235168457, "rewards/rejected": -9.600083351135254, "step": 41670 }, { "epoch": 0.5, "learning_rate": 2.9433304170541453e-06, "logits/chosen": -2.8581767082214355, "logits/rejected": -2.3493492603302, "logps/chosen": -155.7626190185547, "logps/rejected": -944.806640625, "loss": 0.0186, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0565603971481323, "rewards/margins": 8.008936882019043, "rewards/rejected": -9.065497398376465, "step": 41680 }, { "epoch": 0.5, "learning_rate": 2.942302313698125e-06, "logits/chosen": -2.808806896209717, "logits/rejected": -1.9838182926177979, "logps/chosen": -203.88682556152344, "logps/rejected": -1049.7734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.5198103189468384, "rewards/margins": 8.580053329467773, "rewards/rejected": -10.099863052368164, "step": 41690 }, { "epoch": 0.5, "learning_rate": 2.941274133117348e-06, "logits/chosen": -2.8408219814300537, "logits/rejected": -2.2934467792510986, "logps/chosen": -167.3534698486328, "logps/rejected": -1060.392333984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.158940315246582, "rewards/margins": 9.063011169433594, "rewards/rejected": -10.221951484680176, "step": 41700 }, { "epoch": 0.5, "learning_rate": 2.9402458754913345e-06, "logits/chosen": -2.868438959121704, "logits/rejected": -2.375880002975464, "logps/chosen": -123.0304946899414, "logps/rejected": -996.5695190429688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7955440282821655, "rewards/margins": 8.79343318939209, "rewards/rejected": -9.588976860046387, "step": 41710 }, { "epoch": 0.5, "learning_rate": 2.939217540999613e-06, "logits/chosen": -2.880321502685547, "logits/rejected": -2.384953737258911, "logps/chosen": -149.5255126953125, "logps/rejected": -964.58544921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.05672287940979, "rewards/margins": 8.198710441589355, "rewards/rejected": -9.255434036254883, "step": 41720 }, { "epoch": 0.5, "learning_rate": 2.938189129821729e-06, "logits/chosen": -2.8201682567596436, "logits/rejected": -2.2056427001953125, "logps/chosen": -158.2982940673828, "logps/rejected": -1059.755859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1119388341903687, "rewards/margins": 9.080989837646484, "rewards/rejected": -10.1929292678833, "step": 41730 }, { "epoch": 0.5, "learning_rate": 2.93716064213724e-06, "logits/chosen": -2.8047890663146973, "logits/rejected": -2.230799436569214, "logps/chosen": -129.4680633544922, "logps/rejected": -1027.6851806640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7987362146377563, "rewards/margins": 9.08050537109375, "rewards/rejected": -9.879240989685059, "step": 41740 }, { "epoch": 0.5, "learning_rate": 2.9361320781257173e-06, "logits/chosen": -2.801602363586426, "logits/rejected": -2.0596299171447754, "logps/chosen": -178.51431274414062, "logps/rejected": -1087.874755859375, "loss": 0.1492, "rewards/accuracies": 1.0, "rewards/chosen": -1.1457186937332153, "rewards/margins": 9.322103500366211, "rewards/rejected": -10.46782112121582, "step": 41750 }, { "epoch": 0.5, "learning_rate": 2.9351034379667443e-06, "logits/chosen": -2.889622449874878, "logits/rejected": -2.167670726776123, "logps/chosen": -149.90255737304688, "logps/rejected": -898.8224487304688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9751474261283875, "rewards/margins": 7.620999336242676, "rewards/rejected": -8.596144676208496, "step": 41760 }, { "epoch": 0.5, "learning_rate": 2.93407472183992e-06, "logits/chosen": -2.886223316192627, "logits/rejected": -2.223673105239868, "logps/chosen": -134.59298706054688, "logps/rejected": -1109.5220947265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8347653150558472, "rewards/margins": 9.866714477539062, "rewards/rejected": -10.701478958129883, "step": 41770 }, { "epoch": 0.5, "learning_rate": 2.9330459299248547e-06, "logits/chosen": -2.833432674407959, "logits/rejected": -2.422626256942749, "logps/chosen": -99.16529846191406, "logps/rejected": -879.9752197265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5843225717544556, "rewards/margins": 7.830253601074219, "rewards/rejected": -8.414576530456543, "step": 41780 }, { "epoch": 0.5, "learning_rate": 2.932017062401173e-06, "logits/chosen": -2.817917585372925, "logits/rejected": -2.259269952774048, "logps/chosen": -141.01181030273438, "logps/rejected": -1023.7311401367188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9289242625236511, "rewards/margins": 8.916269302368164, "rewards/rejected": -9.845193862915039, "step": 41790 }, { "epoch": 0.5, "learning_rate": 2.9309881194485117e-06, "logits/chosen": -2.847367525100708, "logits/rejected": -2.253126621246338, "logps/chosen": -142.06814575195312, "logps/rejected": -1019.9949951171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9051774740219116, "rewards/margins": 8.898876190185547, "rewards/rejected": -9.80405330657959, "step": 41800 }, { "epoch": 0.5, "learning_rate": 2.9299591012465222e-06, "logits/chosen": -2.8527770042419434, "logits/rejected": -2.237805128097534, "logps/chosen": -149.6547088623047, "logps/rejected": -985.4556884765625, "loss": 0.0381, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9944249987602234, "rewards/margins": 8.471660614013672, "rewards/rejected": -9.466085433959961, "step": 41810 }, { "epoch": 0.5, "learning_rate": 2.9289300079748677e-06, "logits/chosen": -2.8933730125427246, "logits/rejected": -2.2177422046661377, "logps/chosen": -127.30842590332031, "logps/rejected": -975.4989013671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8140283823013306, "rewards/margins": 8.559020042419434, "rewards/rejected": -9.373048782348633, "step": 41820 }, { "epoch": 0.5, "learning_rate": 2.9279008398132246e-06, "logits/chosen": -2.8631699085235596, "logits/rejected": -2.4012157917022705, "logps/chosen": -136.52853393554688, "logps/rejected": -1047.855224609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8817535638809204, "rewards/margins": 9.194738388061523, "rewards/rejected": -10.076493263244629, "step": 41830 }, { "epoch": 0.5, "learning_rate": 2.9268715969412838e-06, "logits/chosen": -2.8180243968963623, "logits/rejected": -2.227849245071411, "logps/chosen": -147.0569610595703, "logps/rejected": -988.2521362304688, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.9834804534912109, "rewards/margins": 8.51293659210205, "rewards/rejected": -9.496416091918945, "step": 41840 }, { "epoch": 0.5, "learning_rate": 2.9258422795387483e-06, "logits/chosen": -2.828099489212036, "logits/rejected": -2.15948224067688, "logps/chosen": -132.15158081054688, "logps/rejected": -1075.220458984375, "loss": 0.1708, "rewards/accuracies": 1.0, "rewards/chosen": -0.8164883852005005, "rewards/margins": 9.552046775817871, "rewards/rejected": -10.368534088134766, "step": 41850 }, { "epoch": 0.5, "learning_rate": 2.9248128877853327e-06, "logits/chosen": -2.8884904384613037, "logits/rejected": -2.32165789604187, "logps/chosen": -169.28919982910156, "logps/rejected": -988.7151489257812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.228365182876587, "rewards/margins": 8.27620792388916, "rewards/rejected": -9.504571914672852, "step": 41860 }, { "epoch": 0.5, "learning_rate": 2.9237834218607665e-06, "logits/chosen": -2.8184924125671387, "logits/rejected": -2.4067866802215576, "logps/chosen": -104.90879821777344, "logps/rejected": -902.3029174804688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5977513790130615, "rewards/margins": 8.050182342529297, "rewards/rejected": -8.647934913635254, "step": 41870 }, { "epoch": 0.5, "learning_rate": 2.922753881944792e-06, "logits/chosen": -2.815009355545044, "logits/rejected": -2.1174798011779785, "logps/chosen": -173.45176696777344, "logps/rejected": -1085.6279296875, "loss": 0.1467, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2089815139770508, "rewards/margins": 9.25293254852295, "rewards/rejected": -10.4619140625, "step": 41880 }, { "epoch": 0.5, "learning_rate": 2.9217242682171647e-06, "logits/chosen": -2.823174238204956, "logits/rejected": -2.1604437828063965, "logps/chosen": -133.92930603027344, "logps/rejected": -923.93212890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8655182719230652, "rewards/margins": 7.9921135902404785, "rewards/rejected": -8.857632637023926, "step": 41890 }, { "epoch": 0.5, "learning_rate": 2.920694580857651e-06, "logits/chosen": -2.8676183223724365, "logits/rejected": -2.3343920707702637, "logps/chosen": -135.59744262695312, "logps/rejected": -1063.910888671875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8350116014480591, "rewards/margins": 9.408167839050293, "rewards/rejected": -10.243179321289062, "step": 41900 }, { "epoch": 0.5, "learning_rate": 2.9196648200460316e-06, "logits/chosen": -2.8535468578338623, "logits/rejected": -2.51721453666687, "logps/chosen": -80.3296127319336, "logps/rejected": -771.123779296875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.4307963252067566, "rewards/margins": 6.912186622619629, "rewards/rejected": -7.342982292175293, "step": 41910 }, { "epoch": 0.5, "learning_rate": 2.9186349859621e-06, "logits/chosen": -2.8155605792999268, "logits/rejected": -2.3066933155059814, "logps/chosen": -125.0068588256836, "logps/rejected": -929.0208129882812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8308083415031433, "rewards/margins": 8.07040023803711, "rewards/rejected": -8.901208877563477, "step": 41920 }, { "epoch": 0.5, "learning_rate": 2.917605078785663e-06, "logits/chosen": -2.849794626235962, "logits/rejected": -2.472602128982544, "logps/chosen": -89.89253997802734, "logps/rejected": -898.4881591796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4790368974208832, "rewards/margins": 8.125811576843262, "rewards/rejected": -8.604848861694336, "step": 41930 }, { "epoch": 0.5, "learning_rate": 2.9165750986965387e-06, "logits/chosen": -2.8171958923339844, "logits/rejected": -2.1841092109680176, "logps/chosen": -133.9302520751953, "logps/rejected": -1039.8037109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8726711273193359, "rewards/margins": 9.137657165527344, "rewards/rejected": -10.01032829284668, "step": 41940 }, { "epoch": 0.5, "learning_rate": 2.91554504587456e-06, "logits/chosen": -2.799959182739258, "logits/rejected": -2.2697205543518066, "logps/chosen": -160.18211364746094, "logps/rejected": -906.7353515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1260830163955688, "rewards/margins": 7.544116020202637, "rewards/rejected": -8.670198440551758, "step": 41950 }, { "epoch": 0.5, "learning_rate": 2.91451492049957e-06, "logits/chosen": -2.8589088916778564, "logits/rejected": -1.995973825454712, "logps/chosen": -172.8059539794922, "logps/rejected": -1171.5435791015625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1544562578201294, "rewards/margins": 10.160613059997559, "rewards/rejected": -11.315070152282715, "step": 41960 }, { "epoch": 0.5, "learning_rate": 2.9134847227514258e-06, "logits/chosen": -2.8543198108673096, "logits/rejected": -2.5038697719573975, "logps/chosen": -94.58264923095703, "logps/rejected": -855.2052001953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5608333945274353, "rewards/margins": 7.611001014709473, "rewards/rejected": -8.171833992004395, "step": 41970 }, { "epoch": 0.5, "learning_rate": 2.912454452809998e-06, "logits/chosen": -2.850719928741455, "logits/rejected": -2.2196438312530518, "logps/chosen": -155.54745483398438, "logps/rejected": -1047.074462890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9362722635269165, "rewards/margins": 9.131612777709961, "rewards/rejected": -10.067885398864746, "step": 41980 }, { "epoch": 0.5, "learning_rate": 2.9114241108551676e-06, "logits/chosen": -2.9012386798858643, "logits/rejected": -2.597168445587158, "logps/chosen": -85.16287231445312, "logps/rejected": -858.4459838867188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.47831669449806213, "rewards/margins": 7.733572959899902, "rewards/rejected": -8.211889266967773, "step": 41990 }, { "epoch": 0.5, "learning_rate": 2.9103936970668305e-06, "logits/chosen": -2.8990371227264404, "logits/rejected": -2.147277355194092, "logps/chosen": -155.44976806640625, "logps/rejected": -1106.620849609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0002912282943726, "rewards/margins": 9.66693115234375, "rewards/rejected": -10.66722297668457, "step": 42000 }, { "epoch": 0.5, "eval_logits/chosen": -2.840019941329956, "eval_logits/rejected": -1.6475192308425903, "eval_logps/chosen": -292.2453918457031, "eval_logps/rejected": -1234.2608642578125, "eval_loss": 0.001300981268286705, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -2.3106515407562256, "eval_rewards/margins": 9.564702033996582, "eval_rewards/rejected": -11.875354766845703, "eval_runtime": 1.2147, "eval_samples_per_second": 4.116, "eval_steps_per_second": 2.47, "step": 42000 }, { "epoch": 0.5, "learning_rate": 2.9093632116248936e-06, "logits/chosen": -2.8994476795196533, "logits/rejected": -2.2626993656158447, "logps/chosen": -134.60987854003906, "logps/rejected": -1039.73681640625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.825607180595398, "rewards/margins": 9.177937507629395, "rewards/rejected": -10.003544807434082, "step": 42010 }, { "epoch": 0.5, "learning_rate": 2.9083326547092773e-06, "logits/chosen": -2.82564377784729, "logits/rejected": -2.3415780067443848, "logps/chosen": -119.8065185546875, "logps/rejected": -1017.4342651367188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7502686381340027, "rewards/margins": 9.045557975769043, "rewards/rejected": -9.79582691192627, "step": 42020 }, { "epoch": 0.5, "learning_rate": 2.907302026499913e-06, "logits/chosen": -2.8402459621429443, "logits/rejected": -2.1882834434509277, "logps/chosen": -123.56568908691406, "logps/rejected": -1040.60791015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7883288860321045, "rewards/margins": 9.234331130981445, "rewards/rejected": -10.022658348083496, "step": 42030 }, { "epoch": 0.5, "learning_rate": 2.906271327176746e-06, "logits/chosen": -2.785062313079834, "logits/rejected": -2.097301959991455, "logps/chosen": -161.5580291748047, "logps/rejected": -1037.4169921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1164271831512451, "rewards/margins": 8.863439559936523, "rewards/rejected": -9.979865074157715, "step": 42040 }, { "epoch": 0.5, "learning_rate": 2.905240556919735e-06, "logits/chosen": -2.7915141582489014, "logits/rejected": -1.9087356328964233, "logps/chosen": -151.93617248535156, "logps/rejected": -1102.5028076171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.042720079421997, "rewards/margins": 9.577995300292969, "rewards/rejected": -10.62071418762207, "step": 42050 }, { "epoch": 0.5, "learning_rate": 2.9042097159088467e-06, "logits/chosen": -2.8371522426605225, "logits/rejected": -2.309011936187744, "logps/chosen": -122.37711334228516, "logps/rejected": -921.93603515625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7827643156051636, "rewards/margins": 8.050346374511719, "rewards/rejected": -8.833110809326172, "step": 42060 }, { "epoch": 0.5, "learning_rate": 2.9031788043240657e-06, "logits/chosen": -2.8484604358673096, "logits/rejected": -2.180502414703369, "logps/chosen": -209.60232543945312, "logps/rejected": -1150.4058837890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.572496771812439, "rewards/margins": 9.515517234802246, "rewards/rejected": -11.088014602661133, "step": 42070 }, { "epoch": 0.5, "learning_rate": 2.9021478223453846e-06, "logits/chosen": -2.8268322944641113, "logits/rejected": -2.4299445152282715, "logps/chosen": -106.7199935913086, "logps/rejected": -866.42138671875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6253587603569031, "rewards/margins": 7.6628737449646, "rewards/rejected": -8.288232803344727, "step": 42080 }, { "epoch": 0.5, "learning_rate": 2.901116770152811e-06, "logits/chosen": -2.8914010524749756, "logits/rejected": -2.276724338531494, "logps/chosen": -126.108154296875, "logps/rejected": -1025.9996337890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8223276138305664, "rewards/margins": 9.049917221069336, "rewards/rejected": -9.872243881225586, "step": 42090 }, { "epoch": 0.5, "learning_rate": 2.9000856479263628e-06, "logits/chosen": -2.85810923576355, "logits/rejected": -2.3584368228912354, "logps/chosen": -138.01222229003906, "logps/rejected": -871.2364501953125, "loss": 0.0884, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9585093259811401, "rewards/margins": 7.381806373596191, "rewards/rejected": -8.340314865112305, "step": 42100 }, { "epoch": 0.5, "learning_rate": 2.8990544558460724e-06, "logits/chosen": -2.8649673461914062, "logits/rejected": -2.623922824859619, "logps/chosen": -70.13176727294922, "logps/rejected": -810.7017822265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3494275212287903, "rewards/margins": 7.391992092132568, "rewards/rejected": -7.741419792175293, "step": 42110 }, { "epoch": 0.5, "learning_rate": 2.8980231940919815e-06, "logits/chosen": -2.8450052738189697, "logits/rejected": -2.466113805770874, "logps/chosen": -110.48130798339844, "logps/rejected": -930.2654418945312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.697556734085083, "rewards/margins": 8.230757713317871, "rewards/rejected": -8.928314208984375, "step": 42120 }, { "epoch": 0.5, "learning_rate": 2.896991862844147e-06, "logits/chosen": -2.8578896522521973, "logits/rejected": -2.4590179920196533, "logps/chosen": -108.04341888427734, "logps/rejected": -963.4371948242188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.606407105922699, "rewards/margins": 8.637351036071777, "rewards/rejected": -9.243758201599121, "step": 42130 }, { "epoch": 0.5, "learning_rate": 2.895960462282636e-06, "logits/chosen": -2.868267059326172, "logits/rejected": -2.4438815116882324, "logps/chosen": -140.0236053466797, "logps/rejected": -955.1705932617188, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.965090274810791, "rewards/margins": 8.206018447875977, "rewards/rejected": -9.171109199523926, "step": 42140 }, { "epoch": 0.5, "learning_rate": 2.894928992587528e-06, "logits/chosen": -2.9088454246520996, "logits/rejected": -2.348055601119995, "logps/chosen": -130.6787109375, "logps/rejected": -1007.6394653320312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8483743667602539, "rewards/margins": 8.818907737731934, "rewards/rejected": -9.667282104492188, "step": 42150 }, { "epoch": 0.5, "learning_rate": 2.8938974539389143e-06, "logits/chosen": -2.8455307483673096, "logits/rejected": -2.526076555252075, "logps/chosen": -98.60310363769531, "logps/rejected": -854.3486328125, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -0.6164063215255737, "rewards/margins": 7.551621913909912, "rewards/rejected": -8.168027877807617, "step": 42160 }, { "epoch": 0.5, "learning_rate": 2.8928658465168996e-06, "logits/chosen": -2.883350372314453, "logits/rejected": -2.4711270332336426, "logps/chosen": -131.21975708007812, "logps/rejected": -953.6639404296875, "loss": 0.1396, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8648093938827515, "rewards/margins": 8.297703742980957, "rewards/rejected": -9.162511825561523, "step": 42170 }, { "epoch": 0.5, "learning_rate": 2.8918341705015986e-06, "logits/chosen": -2.8847317695617676, "logits/rejected": -2.224034547805786, "logps/chosen": -127.29075622558594, "logps/rejected": -987.859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8101488351821899, "rewards/margins": 8.67247200012207, "rewards/rejected": -9.482622146606445, "step": 42180 }, { "epoch": 0.51, "learning_rate": 2.89080242607314e-06, "logits/chosen": -2.8585565090179443, "logits/rejected": -2.36887788772583, "logps/chosen": -131.63763427734375, "logps/rejected": -889.9296875, "loss": 0.1359, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8578287959098816, "rewards/margins": 7.660687446594238, "rewards/rejected": -8.518515586853027, "step": 42190 }, { "epoch": 0.51, "learning_rate": 2.8897706134116634e-06, "logits/chosen": -2.826432704925537, "logits/rejected": -2.2600226402282715, "logps/chosen": -134.7710418701172, "logps/rejected": -982.2926025390625, "loss": 0.2144, "rewards/accuracies": 1.0, "rewards/chosen": -0.9024569392204285, "rewards/margins": 8.537482261657715, "rewards/rejected": -9.439939498901367, "step": 42200 }, { "epoch": 0.51, "learning_rate": 2.8887387326973206e-06, "logits/chosen": -2.8138270378112793, "logits/rejected": -2.2094826698303223, "logps/chosen": -168.0941925048828, "logps/rejected": -990.0689697265625, "loss": 0.1615, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1604125499725342, "rewards/margins": 8.345621109008789, "rewards/rejected": -9.506032943725586, "step": 42210 }, { "epoch": 0.51, "learning_rate": 2.887706784110274e-06, "logits/chosen": -2.841925621032715, "logits/rejected": -2.3329594135284424, "logps/chosen": -137.0069580078125, "logps/rejected": -892.8590087890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9058354496955872, "rewards/margins": 7.618008613586426, "rewards/rejected": -8.523843765258789, "step": 42220 }, { "epoch": 0.51, "learning_rate": 2.8866747678307e-06, "logits/chosen": -2.8485138416290283, "logits/rejected": -2.48755145072937, "logps/chosen": -99.80088806152344, "logps/rejected": -889.6961059570312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6046862006187439, "rewards/margins": 7.903459072113037, "rewards/rejected": -8.508145332336426, "step": 42230 }, { "epoch": 0.51, "learning_rate": 2.885642684038786e-06, "logits/chosen": -2.8609347343444824, "logits/rejected": -2.2768094539642334, "logps/chosen": -119.13822174072266, "logps/rejected": -886.3855590820312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6857538819313049, "rewards/margins": 7.808599948883057, "rewards/rejected": -8.494354248046875, "step": 42240 }, { "epoch": 0.51, "learning_rate": 2.8846105329147295e-06, "logits/chosen": -2.8698668479919434, "logits/rejected": -2.6113057136535645, "logps/chosen": -81.93974304199219, "logps/rejected": -793.4849853515625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4317445158958435, "rewards/margins": 7.126431465148926, "rewards/rejected": -7.558175563812256, "step": 42250 }, { "epoch": 0.51, "learning_rate": 2.8835783146387426e-06, "logits/chosen": -2.8666820526123047, "logits/rejected": -2.488219976425171, "logps/chosen": -95.7990951538086, "logps/rejected": -817.0460205078125, "loss": 0.0344, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5668507814407349, "rewards/margins": 7.234869480133057, "rewards/rejected": -7.80172061920166, "step": 42260 }, { "epoch": 0.51, "learning_rate": 2.8825460293910475e-06, "logits/chosen": -2.899686336517334, "logits/rejected": -2.563626766204834, "logps/chosen": -116.66557312011719, "logps/rejected": -787.1256103515625, "loss": 0.2713, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7673969268798828, "rewards/margins": 6.738550662994385, "rewards/rejected": -7.505948066711426, "step": 42270 }, { "epoch": 0.51, "learning_rate": 2.881513677351878e-06, "logits/chosen": -2.8474948406219482, "logits/rejected": -2.448585271835327, "logps/chosen": -88.50750732421875, "logps/rejected": -902.7429809570312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.45988407731056213, "rewards/margins": 8.196141242980957, "rewards/rejected": -8.656023979187012, "step": 42280 }, { "epoch": 0.51, "learning_rate": 2.88048125870148e-06, "logits/chosen": -2.8663573265075684, "logits/rejected": -2.2772889137268066, "logps/chosen": -134.41250610351562, "logps/rejected": -1054.1644287109375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8645963668823242, "rewards/margins": 9.268954277038574, "rewards/rejected": -10.133550643920898, "step": 42290 }, { "epoch": 0.51, "learning_rate": 2.8794487736201104e-06, "logits/chosen": -2.854255199432373, "logits/rejected": -2.0695924758911133, "logps/chosen": -148.79029846191406, "logps/rejected": -1138.6226806640625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.921920657157898, "rewards/margins": 10.05801773071289, "rewards/rejected": -10.979938507080078, "step": 42300 }, { "epoch": 0.51, "learning_rate": 2.8784162222880395e-06, "logits/chosen": -2.818483829498291, "logits/rejected": -2.1865761280059814, "logps/chosen": -126.60511779785156, "logps/rejected": -942.6495971679688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7849931716918945, "rewards/margins": 8.253789901733398, "rewards/rejected": -9.038783073425293, "step": 42310 }, { "epoch": 0.51, "learning_rate": 2.8773836048855475e-06, "logits/chosen": -2.8459763526916504, "logits/rejected": -2.2456796169281006, "logps/chosen": -112.783203125, "logps/rejected": -953.9805908203125, "loss": 0.0684, "rewards/accuracies": 1.0, "rewards/chosen": -0.6769278049468994, "rewards/margins": 8.482232093811035, "rewards/rejected": -9.159159660339355, "step": 42320 }, { "epoch": 0.51, "learning_rate": 2.8763509215929254e-06, "logits/chosen": -2.8602004051208496, "logits/rejected": -2.2767317295074463, "logps/chosen": -140.60643005371094, "logps/rejected": -953.8142700195312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0102336406707764, "rewards/margins": 8.129252433776855, "rewards/rejected": -9.139485359191895, "step": 42330 }, { "epoch": 0.51, "learning_rate": 2.8753181725904778e-06, "logits/chosen": -2.8378398418426514, "logits/rejected": -2.4233360290527344, "logps/chosen": -130.09738159179688, "logps/rejected": -899.5637817382812, "loss": 0.0901, "rewards/accuracies": 1.0, "rewards/chosen": -0.8416959047317505, "rewards/margins": 7.763832092285156, "rewards/rejected": -8.605527877807617, "step": 42340 }, { "epoch": 0.51, "learning_rate": 2.87428535805852e-06, "logits/chosen": -2.8012068271636963, "logits/rejected": -2.151939630508423, "logps/chosen": -132.55950927734375, "logps/rejected": -985.9019775390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8469120860099792, "rewards/margins": 8.626667976379395, "rewards/rejected": -9.473580360412598, "step": 42350 }, { "epoch": 0.51, "learning_rate": 2.873252478177378e-06, "logits/chosen": -2.831883192062378, "logits/rejected": -2.352389097213745, "logps/chosen": -109.7757797241211, "logps/rejected": -888.7273559570312, "loss": 0.157, "rewards/accuracies": 1.0, "rewards/chosen": -0.7177994251251221, "rewards/margins": 7.78766393661499, "rewards/rejected": -8.505463600158691, "step": 42360 }, { "epoch": 0.51, "learning_rate": 2.8722195331273906e-06, "logits/chosen": -2.8362629413604736, "logits/rejected": -2.2320151329040527, "logps/chosen": -126.55696868896484, "logps/rejected": -978.1153564453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.778630256652832, "rewards/margins": 8.620489120483398, "rewards/rejected": -9.399118423461914, "step": 42370 }, { "epoch": 0.51, "learning_rate": 2.871186523088906e-06, "logits/chosen": -2.839226722717285, "logits/rejected": -2.189434289932251, "logps/chosen": -137.4472198486328, "logps/rejected": -1102.812744140625, "loss": 0.1348, "rewards/accuracies": 1.0, "rewards/chosen": -0.8879517316818237, "rewards/margins": 9.733583450317383, "rewards/rejected": -10.621535301208496, "step": 42380 }, { "epoch": 0.51, "learning_rate": 2.870153448242285e-06, "logits/chosen": -2.878538131713867, "logits/rejected": -2.544654130935669, "logps/chosen": -131.56564331054688, "logps/rejected": -841.7683715820312, "loss": 0.1688, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9017974138259888, "rewards/margins": 7.133139610290527, "rewards/rejected": -8.034937858581543, "step": 42390 }, { "epoch": 0.51, "learning_rate": 2.8691203087679008e-06, "logits/chosen": -2.8605828285217285, "logits/rejected": -1.969291090965271, "logps/chosen": -187.88815307617188, "logps/rejected": -1207.2440185546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2878522872924805, "rewards/margins": 10.364702224731445, "rewards/rejected": -11.652555465698242, "step": 42400 }, { "epoch": 0.51, "learning_rate": 2.868087104846135e-06, "logits/chosen": -2.7831153869628906, "logits/rejected": -2.11216402053833, "logps/chosen": -178.8863983154297, "logps/rejected": -916.36279296875, "loss": 0.1208, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2874865531921387, "rewards/margins": 7.491957187652588, "rewards/rejected": -8.779443740844727, "step": 42410 }, { "epoch": 0.51, "learning_rate": 2.8670538366573834e-06, "logits/chosen": -2.8371312618255615, "logits/rejected": -2.2862963676452637, "logps/chosen": -111.74772644042969, "logps/rejected": -976.056640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6932138204574585, "rewards/margins": 8.686315536499023, "rewards/rejected": -9.37952995300293, "step": 42420 }, { "epoch": 0.51, "learning_rate": 2.866020504382052e-06, "logits/chosen": -2.840125322341919, "logits/rejected": -2.0902304649353027, "logps/chosen": -154.169921875, "logps/rejected": -1122.1575927734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0235737562179565, "rewards/margins": 9.776843070983887, "rewards/rejected": -10.800416946411133, "step": 42430 }, { "epoch": 0.51, "learning_rate": 2.8649871082005566e-06, "logits/chosen": -2.8804826736450195, "logits/rejected": -2.611921787261963, "logps/chosen": -105.53678894042969, "logps/rejected": -791.2073974609375, "loss": 0.0868, "rewards/accuracies": 1.0, "rewards/chosen": -0.6700330376625061, "rewards/margins": 6.857178688049316, "rewards/rejected": -7.527211666107178, "step": 42440 }, { "epoch": 0.51, "learning_rate": 2.8639536482933254e-06, "logits/chosen": -2.8877692222595215, "logits/rejected": -2.375394344329834, "logps/chosen": -158.35433959960938, "logps/rejected": -967.87109375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.132211685180664, "rewards/margins": 8.155562400817871, "rewards/rejected": -9.287774085998535, "step": 42450 }, { "epoch": 0.51, "learning_rate": 2.8629201248407982e-06, "logits/chosen": -2.8063156604766846, "logits/rejected": -2.1659598350524902, "logps/chosen": -173.5998992919922, "logps/rejected": -1094.3870849609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.2201403379440308, "rewards/margins": 9.336048126220703, "rewards/rejected": -10.556188583374023, "step": 42460 }, { "epoch": 0.51, "learning_rate": 2.861886538023426e-06, "logits/chosen": -2.826491594314575, "logits/rejected": -2.018245220184326, "logps/chosen": -208.94058227539062, "logps/rejected": -1048.179443359375, "loss": 0.0493, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.49363374710083, "rewards/margins": 8.56509017944336, "rewards/rejected": -10.058725357055664, "step": 42470 }, { "epoch": 0.51, "learning_rate": 2.8608528880216686e-06, "logits/chosen": -2.831721305847168, "logits/rejected": -2.0097625255584717, "logps/chosen": -213.0562286376953, "logps/rejected": -1183.916259765625, "loss": 0.1896, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.574532151222229, "rewards/margins": 9.85125732421875, "rewards/rejected": -11.425790786743164, "step": 42480 }, { "epoch": 0.51, "learning_rate": 2.859819175015999e-06, "logits/chosen": -2.8285441398620605, "logits/rejected": -2.1141839027404785, "logps/chosen": -169.28233337402344, "logps/rejected": -965.1472778320312, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.166102409362793, "rewards/margins": 8.082427978515625, "rewards/rejected": -9.248530387878418, "step": 42490 }, { "epoch": 0.51, "learning_rate": 2.8587853991869018e-06, "logits/chosen": -2.8151469230651855, "logits/rejected": -2.089036464691162, "logps/chosen": -149.3580780029297, "logps/rejected": -1125.771240234375, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.9616020321846008, "rewards/margins": 9.906123161315918, "rewards/rejected": -10.867724418640137, "step": 42500 }, { "epoch": 0.51, "learning_rate": 2.85775156071487e-06, "logits/chosen": -2.830650568008423, "logits/rejected": -2.2374508380889893, "logps/chosen": -134.82467651367188, "logps/rejected": -883.4588623046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8701631426811218, "rewards/margins": 7.586419582366943, "rewards/rejected": -8.456583023071289, "step": 42510 }, { "epoch": 0.51, "learning_rate": 2.8567176597804086e-06, "logits/chosen": -2.8434579372406006, "logits/rejected": -2.225597858428955, "logps/chosen": -126.3125228881836, "logps/rejected": -971.7645263671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8227087259292603, "rewards/margins": 8.520174026489258, "rewards/rejected": -9.34288215637207, "step": 42520 }, { "epoch": 0.51, "learning_rate": 2.855683696564035e-06, "logits/chosen": -2.8286819458007812, "logits/rejected": -2.2034549713134766, "logps/chosen": -160.97122192382812, "logps/rejected": -1083.4180908203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0764633417129517, "rewards/margins": 9.36819839477539, "rewards/rejected": -10.444662094116211, "step": 42530 }, { "epoch": 0.51, "learning_rate": 2.8546496712462753e-06, "logits/chosen": -2.838853359222412, "logits/rejected": -2.3247952461242676, "logps/chosen": -145.6475830078125, "logps/rejected": -1063.950927734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0459668636322021, "rewards/margins": 9.205255508422852, "rewards/rejected": -10.251222610473633, "step": 42540 }, { "epoch": 0.51, "learning_rate": 2.8536155840076685e-06, "logits/chosen": -2.8389410972595215, "logits/rejected": -2.312570571899414, "logps/chosen": -131.03131103515625, "logps/rejected": -982.9525146484375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7959569692611694, "rewards/margins": 8.643147468566895, "rewards/rejected": -9.439104080200195, "step": 42550 }, { "epoch": 0.51, "learning_rate": 2.8525814350287632e-06, "logits/chosen": -2.8571057319641113, "logits/rejected": -2.3070225715637207, "logps/chosen": -144.23941040039062, "logps/rejected": -1112.018310546875, "loss": 0.1062, "rewards/accuracies": 1.0, "rewards/chosen": -0.9588390588760376, "rewards/margins": 9.769951820373535, "rewards/rejected": -10.728790283203125, "step": 42560 }, { "epoch": 0.51, "learning_rate": 2.851547224490118e-06, "logits/chosen": -2.885187864303589, "logits/rejected": -2.1554818153381348, "logps/chosen": -176.6527099609375, "logps/rejected": -1098.4619140625, "loss": 0.0947, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2351421117782593, "rewards/margins": 9.35069751739502, "rewards/rejected": -10.585840225219727, "step": 42570 }, { "epoch": 0.51, "learning_rate": 2.850512952572304e-06, "logits/chosen": -2.878333330154419, "logits/rejected": -2.3530077934265137, "logps/chosen": -111.27335357666016, "logps/rejected": -920.7713012695312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6505948305130005, "rewards/margins": 8.181909561157227, "rewards/rejected": -8.832504272460938, "step": 42580 }, { "epoch": 0.51, "learning_rate": 2.849478619455903e-06, "logits/chosen": -2.815530300140381, "logits/rejected": -2.133833885192871, "logps/chosen": -154.33705139160156, "logps/rejected": -911.4942626953125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.990859866142273, "rewards/margins": 7.7240753173828125, "rewards/rejected": -8.714935302734375, "step": 42590 }, { "epoch": 0.51, "learning_rate": 2.848444225321504e-06, "logits/chosen": -2.8844761848449707, "logits/rejected": -2.3881285190582275, "logps/chosen": -96.2110595703125, "logps/rejected": -998.8302001953125, "loss": 0.1081, "rewards/accuracies": 1.0, "rewards/chosen": -0.5239501595497131, "rewards/margins": 9.073963165283203, "rewards/rejected": -9.597912788391113, "step": 42600 }, { "epoch": 0.51, "learning_rate": 2.8474097703497117e-06, "logits/chosen": -2.8500325679779053, "logits/rejected": -2.2283849716186523, "logps/chosen": -147.81906127929688, "logps/rejected": -1014.44091796875, "loss": 0.1133, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9991158246994019, "rewards/margins": 8.750452995300293, "rewards/rejected": -9.749567985534668, "step": 42610 }, { "epoch": 0.51, "learning_rate": 2.8463752547211382e-06, "logits/chosen": -2.8927106857299805, "logits/rejected": -2.4844841957092285, "logps/chosen": -86.979736328125, "logps/rejected": -845.6539306640625, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.4818732738494873, "rewards/margins": 7.604652404785156, "rewards/rejected": -8.086524963378906, "step": 42620 }, { "epoch": 0.51, "learning_rate": 2.845340678616408e-06, "logits/chosen": -2.914416551589966, "logits/rejected": -2.3883774280548096, "logps/chosen": -104.65013122558594, "logps/rejected": -906.3406372070312, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.5658771395683289, "rewards/margins": 8.123048782348633, "rewards/rejected": -8.688925743103027, "step": 42630 }, { "epoch": 0.51, "learning_rate": 2.844306042216154e-06, "logits/chosen": -2.8558993339538574, "logits/rejected": -2.5942177772521973, "logps/chosen": -92.46026611328125, "logps/rejected": -893.2171020507812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4913715720176697, "rewards/margins": 8.052395820617676, "rewards/rejected": -8.543766975402832, "step": 42640 }, { "epoch": 0.51, "learning_rate": 2.8432713457010206e-06, "logits/chosen": -2.860058307647705, "logits/rejected": -2.41960072517395, "logps/chosen": -116.97489929199219, "logps/rejected": -861.9363403320312, "loss": 0.1353, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.720683217048645, "rewards/margins": 7.512091159820557, "rewards/rejected": -8.23277473449707, "step": 42650 }, { "epoch": 0.51, "learning_rate": 2.8422365892516646e-06, "logits/chosen": -2.882404088973999, "logits/rejected": -2.288879632949829, "logps/chosen": -106.342041015625, "logps/rejected": -921.5676879882812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6136776208877563, "rewards/margins": 8.228175163269043, "rewards/rejected": -8.841853141784668, "step": 42660 }, { "epoch": 0.51, "learning_rate": 2.8412017730487505e-06, "logits/chosen": -2.907914638519287, "logits/rejected": -2.4982635974884033, "logps/chosen": -98.94776153564453, "logps/rejected": -924.6975708007812, "loss": 0.0223, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5261107683181763, "rewards/margins": 8.3503999710083, "rewards/rejected": -8.876509666442871, "step": 42670 }, { "epoch": 0.51, "learning_rate": 2.840166897272954e-06, "logits/chosen": -2.8715527057647705, "logits/rejected": -2.5215275287628174, "logps/chosen": -101.2922134399414, "logps/rejected": -948.8350830078125, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.5939702391624451, "rewards/margins": 8.516458511352539, "rewards/rejected": -9.110429763793945, "step": 42680 }, { "epoch": 0.51, "learning_rate": 2.839131962104963e-06, "logits/chosen": -2.895099639892578, "logits/rejected": -2.377953052520752, "logps/chosen": -99.14155578613281, "logps/rejected": -939.7401123046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5328940153121948, "rewards/margins": 8.488012313842773, "rewards/rejected": -9.020906448364258, "step": 42690 }, { "epoch": 0.51, "learning_rate": 2.838096967725472e-06, "logits/chosen": -2.8827128410339355, "logits/rejected": -2.220437526702881, "logps/chosen": -139.2304229736328, "logps/rejected": -1126.697265625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7988325357437134, "rewards/margins": 10.05171012878418, "rewards/rejected": -10.850542068481445, "step": 42700 }, { "epoch": 0.51, "learning_rate": 2.83706191431519e-06, "logits/chosen": -2.835698127746582, "logits/rejected": -2.399940013885498, "logps/chosen": -90.40641784667969, "logps/rejected": -898.7088623046875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.487219899892807, "rewards/margins": 8.113934516906738, "rewards/rejected": -8.601155281066895, "step": 42710 }, { "epoch": 0.51, "learning_rate": 2.8360268020548344e-06, "logits/chosen": -2.914968729019165, "logits/rejected": -2.427077054977417, "logps/chosen": -103.86859130859375, "logps/rejected": -960.1749267578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5873851180076599, "rewards/margins": 8.653806686401367, "rewards/rejected": -9.241191864013672, "step": 42720 }, { "epoch": 0.51, "learning_rate": 2.834991631125132e-06, "logits/chosen": -2.8532299995422363, "logits/rejected": -2.4512412548065186, "logps/chosen": -102.22120666503906, "logps/rejected": -964.4411010742188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5390175580978394, "rewards/margins": 8.711332321166992, "rewards/rejected": -9.250349998474121, "step": 42730 }, { "epoch": 0.51, "learning_rate": 2.833956401706821e-06, "logits/chosen": -2.7752292156219482, "logits/rejected": -2.058772087097168, "logps/chosen": -130.8736572265625, "logps/rejected": -1072.024658203125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.7766823768615723, "rewards/margins": 9.547018051147461, "rewards/rejected": -10.323701858520508, "step": 42740 }, { "epoch": 0.51, "learning_rate": 2.83292111398065e-06, "logits/chosen": -2.8046746253967285, "logits/rejected": -2.0845143795013428, "logps/chosen": -142.4071502685547, "logps/rejected": -1051.538330078125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8538210988044739, "rewards/margins": 9.253487586975098, "rewards/rejected": -10.107308387756348, "step": 42750 }, { "epoch": 0.51, "learning_rate": 2.8318857681273772e-06, "logits/chosen": -2.869877338409424, "logits/rejected": -2.4037697315216064, "logps/chosen": -84.17828369140625, "logps/rejected": -829.31689453125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.44651055335998535, "rewards/margins": 7.472002983093262, "rewards/rejected": -7.918514251708984, "step": 42760 }, { "epoch": 0.51, "learning_rate": 2.8308503643277706e-06, "logits/chosen": -2.8392186164855957, "logits/rejected": -2.1222567558288574, "logps/chosen": -129.73089599609375, "logps/rejected": -1034.364990234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.795053243637085, "rewards/margins": 9.145769119262695, "rewards/rejected": -9.940821647644043, "step": 42770 }, { "epoch": 0.51, "learning_rate": 2.8298149027626092e-06, "logits/chosen": -2.884005308151245, "logits/rejected": -2.0605995655059814, "logps/chosen": -133.14920043945312, "logps/rejected": -1091.569091796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7463271617889404, "rewards/margins": 9.757291793823242, "rewards/rejected": -10.503620147705078, "step": 42780 }, { "epoch": 0.51, "learning_rate": 2.8287793836126833e-06, "logits/chosen": -2.87199068069458, "logits/rejected": -2.306800365447998, "logps/chosen": -135.0514678955078, "logps/rejected": -919.5113525390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8049267530441284, "rewards/margins": 8.006756782531738, "rewards/rejected": -8.811681747436523, "step": 42790 }, { "epoch": 0.51, "learning_rate": 2.8277438070587886e-06, "logits/chosen": -2.861717939376831, "logits/rejected": -2.387023687362671, "logps/chosen": -106.8354721069336, "logps/rejected": -915.7523193359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6254273653030396, "rewards/margins": 8.146757125854492, "rewards/rejected": -8.772184371948242, "step": 42800 }, { "epoch": 0.51, "learning_rate": 2.8267081732817357e-06, "logits/chosen": -2.8794803619384766, "logits/rejected": -2.2473015785217285, "logps/chosen": -123.43516540527344, "logps/rejected": -1025.263671875, "loss": 0.1309, "rewards/accuracies": 1.0, "rewards/chosen": -0.7251849174499512, "rewards/margins": 9.130491256713867, "rewards/rejected": -9.855677604675293, "step": 42810 }, { "epoch": 0.51, "learning_rate": 2.8256724824623443e-06, "logits/chosen": -2.8703410625457764, "logits/rejected": -2.319101095199585, "logps/chosen": -111.8861083984375, "logps/rejected": -942.6539916992188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6718307137489319, "rewards/margins": 8.374466896057129, "rewards/rejected": -9.046298027038574, "step": 42820 }, { "epoch": 0.51, "learning_rate": 2.824636734781441e-06, "logits/chosen": -2.865589141845703, "logits/rejected": -2.3340582847595215, "logps/chosen": -134.65011596679688, "logps/rejected": -972.6070556640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8209336996078491, "rewards/margins": 8.513605117797852, "rewards/rejected": -9.334539413452148, "step": 42830 }, { "epoch": 0.51, "learning_rate": 2.823600930419865e-06, "logits/chosen": -2.9050776958465576, "logits/rejected": -2.3229968547821045, "logps/chosen": -110.91690826416016, "logps/rejected": -941.2473754882812, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6392408013343811, "rewards/margins": 8.378487586975098, "rewards/rejected": -9.017727851867676, "step": 42840 }, { "epoch": 0.51, "learning_rate": 2.8225650695584667e-06, "logits/chosen": -2.831415891647339, "logits/rejected": -2.5011487007141113, "logps/chosen": -144.7917022705078, "logps/rejected": -878.2575073242188, "loss": 0.3077, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0396358966827393, "rewards/margins": 7.3608808517456055, "rewards/rejected": -8.400517463684082, "step": 42850 }, { "epoch": 0.51, "learning_rate": 2.821529152378102e-06, "logits/chosen": -2.860872745513916, "logits/rejected": -1.9714524745941162, "logps/chosen": -146.23114013671875, "logps/rejected": -1031.4510498046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9543123245239258, "rewards/margins": 8.957975387573242, "rewards/rejected": -9.912287712097168, "step": 42860 }, { "epoch": 0.51, "learning_rate": 2.8204931790596407e-06, "logits/chosen": -2.849187135696411, "logits/rejected": -2.105469226837158, "logps/chosen": -140.8454132080078, "logps/rejected": -1166.041259765625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8683851361274719, "rewards/margins": 10.38584041595459, "rewards/rejected": -11.25422477722168, "step": 42870 }, { "epoch": 0.51, "learning_rate": 2.81945714978396e-06, "logits/chosen": -2.805579423904419, "logits/rejected": -2.3369805812835693, "logps/chosen": -133.33670043945312, "logps/rejected": -941.8449096679688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8508697748184204, "rewards/margins": 8.182802200317383, "rewards/rejected": -9.033673286437988, "step": 42880 }, { "epoch": 0.51, "learning_rate": 2.8184210647319493e-06, "logits/chosen": -2.870952606201172, "logits/rejected": -2.317747116088867, "logps/chosen": -117.80625915527344, "logps/rejected": -942.8507080078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7238026857376099, "rewards/margins": 8.308890342712402, "rewards/rejected": -9.032693862915039, "step": 42890 }, { "epoch": 0.51, "learning_rate": 2.817384924084504e-06, "logits/chosen": -2.870100259780884, "logits/rejected": -2.194924831390381, "logps/chosen": -128.6234588623047, "logps/rejected": -966.4212646484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7822920083999634, "rewards/margins": 8.490720748901367, "rewards/rejected": -9.273012161254883, "step": 42900 }, { "epoch": 0.51, "learning_rate": 2.8163487280225325e-06, "logits/chosen": -2.8974790573120117, "logits/rejected": -2.2609424591064453, "logps/chosen": -101.78349304199219, "logps/rejected": -915.7222900390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5586541295051575, "rewards/margins": 8.209665298461914, "rewards/rejected": -8.768320083618164, "step": 42910 }, { "epoch": 0.51, "learning_rate": 2.8153124767269524e-06, "logits/chosen": -2.8687949180603027, "logits/rejected": -2.2660634517669678, "logps/chosen": -123.03719329833984, "logps/rejected": -995.9810791015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7769824862480164, "rewards/margins": 8.790105819702148, "rewards/rejected": -9.567089080810547, "step": 42920 }, { "epoch": 0.51, "learning_rate": 2.814276170378689e-06, "logits/chosen": -2.829052209854126, "logits/rejected": -2.090568780899048, "logps/chosen": -155.74362182617188, "logps/rejected": -1002.8753662109375, "loss": 0.1049, "rewards/accuracies": 1.0, "rewards/chosen": -0.9884471893310547, "rewards/margins": 8.633336067199707, "rewards/rejected": -9.621782302856445, "step": 42930 }, { "epoch": 0.51, "learning_rate": 2.81323980915868e-06, "logits/chosen": -2.877568006515503, "logits/rejected": -2.434232473373413, "logps/chosen": -95.75953674316406, "logps/rejected": -847.5732421875, "loss": 0.1371, "rewards/accuracies": 1.0, "rewards/chosen": -0.5367575883865356, "rewards/margins": 7.56165075302124, "rewards/rejected": -8.098409652709961, "step": 42940 }, { "epoch": 0.51, "learning_rate": 2.8122033932478697e-06, "logits/chosen": -2.8610708713531494, "logits/rejected": -2.1804513931274414, "logps/chosen": -153.74668884277344, "logps/rejected": -988.7931518554688, "loss": 0.1241, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0032670497894287, "rewards/margins": 8.495685577392578, "rewards/rejected": -9.498952865600586, "step": 42950 }, { "epoch": 0.51, "learning_rate": 2.811166922827213e-06, "logits/chosen": -2.8475656509399414, "logits/rejected": -2.1065750122070312, "logps/chosen": -127.82425689697266, "logps/rejected": -936.63427734375, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.8169379234313965, "rewards/margins": 8.164162635803223, "rewards/rejected": -8.981100082397461, "step": 42960 }, { "epoch": 0.51, "learning_rate": 2.8101303980776767e-06, "logits/chosen": -2.8606362342834473, "logits/rejected": -2.2946829795837402, "logps/chosen": -115.57939147949219, "logps/rejected": -876.4338989257812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.69597327709198, "rewards/margins": 7.678657531738281, "rewards/rejected": -8.37463092803955, "step": 42970 }, { "epoch": 0.51, "learning_rate": 2.809093819180235e-06, "logits/chosen": -2.89082670211792, "logits/rejected": -2.6812984943389893, "logps/chosen": -60.063995361328125, "logps/rejected": -743.08837890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.255082905292511, "rewards/margins": 6.812527656555176, "rewards/rejected": -7.0676093101501465, "step": 42980 }, { "epoch": 0.51, "learning_rate": 2.80805718631587e-06, "logits/chosen": -2.851296901702881, "logits/rejected": -2.143200159072876, "logps/chosen": -120.0467758178711, "logps/rejected": -1088.19287109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7084657549858093, "rewards/margins": 9.774949073791504, "rewards/rejected": -10.483413696289062, "step": 42990 }, { "epoch": 0.51, "learning_rate": 2.8070204996655764e-06, "logits/chosen": -2.8248419761657715, "logits/rejected": -2.0610976219177246, "logps/chosen": -145.78306579589844, "logps/rejected": -1166.20361328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8876042366027832, "rewards/margins": 10.363649368286133, "rewards/rejected": -11.251255989074707, "step": 43000 }, { "epoch": 0.51, "learning_rate": 2.805983759410356e-06, "logits/chosen": -2.87251353263855, "logits/rejected": -2.303802013397217, "logps/chosen": -125.82259368896484, "logps/rejected": -897.3487548828125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7838960886001587, "rewards/margins": 7.802889823913574, "rewards/rejected": -8.586787223815918, "step": 43010 }, { "epoch": 0.51, "learning_rate": 2.804946965731221e-06, "logits/chosen": -2.8766939640045166, "logits/rejected": -2.5495152473449707, "logps/chosen": -85.31636810302734, "logps/rejected": -908.3445434570312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4555482864379883, "rewards/margins": 8.24428653717041, "rewards/rejected": -8.699833869934082, "step": 43020 }, { "epoch": 0.52, "learning_rate": 2.8039101188091926e-06, "logits/chosen": -2.8741981983184814, "logits/rejected": -2.601745128631592, "logps/chosen": -86.30599975585938, "logps/rejected": -804.5408935546875, "loss": 0.0216, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5049434900283813, "rewards/margins": 7.181499481201172, "rewards/rejected": -7.686443328857422, "step": 43030 }, { "epoch": 0.52, "learning_rate": 2.802873218825302e-06, "logits/chosen": -2.832765579223633, "logits/rejected": -2.2217602729797363, "logps/chosen": -145.0297393798828, "logps/rejected": -1000.6052856445312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9689280390739441, "rewards/margins": 8.645477294921875, "rewards/rejected": -9.614405632019043, "step": 43040 }, { "epoch": 0.52, "learning_rate": 2.8018362659605885e-06, "logits/chosen": -2.89338755607605, "logits/rejected": -2.5516417026519775, "logps/chosen": -90.60460662841797, "logps/rejected": -890.3195190429688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.48924487829208374, "rewards/margins": 8.028557777404785, "rewards/rejected": -8.517802238464355, "step": 43050 }, { "epoch": 0.52, "learning_rate": 2.8007992603961008e-06, "logits/chosen": -2.8421683311462402, "logits/rejected": -2.367699384689331, "logps/chosen": -121.38743591308594, "logps/rejected": -961.4810791015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7539361715316772, "rewards/margins": 8.472310066223145, "rewards/rejected": -9.226244926452637, "step": 43060 }, { "epoch": 0.52, "learning_rate": 2.799762202312898e-06, "logits/chosen": -2.874642848968506, "logits/rejected": -2.1096444129943848, "logps/chosen": -179.6621551513672, "logps/rejected": -1142.4039306640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2612183094024658, "rewards/margins": 9.752309799194336, "rewards/rejected": -11.013526916503906, "step": 43070 }, { "epoch": 0.52, "learning_rate": 2.7987250918920477e-06, "logits/chosen": -2.829451084136963, "logits/rejected": -2.2165818214416504, "logps/chosen": -139.72300720214844, "logps/rejected": -1101.118896484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9497588276863098, "rewards/margins": 9.641603469848633, "rewards/rejected": -10.591361999511719, "step": 43080 }, { "epoch": 0.52, "learning_rate": 2.7976879293146254e-06, "logits/chosen": -2.854919910430908, "logits/rejected": -2.405695676803589, "logps/chosen": -103.42877197265625, "logps/rejected": -852.658203125, "loss": 0.2165, "rewards/accuracies": 1.0, "rewards/chosen": -0.5538961887359619, "rewards/margins": 7.583050727844238, "rewards/rejected": -8.136945724487305, "step": 43090 }, { "epoch": 0.52, "learning_rate": 2.7966507147617172e-06, "logits/chosen": -2.833980083465576, "logits/rejected": -2.33500337600708, "logps/chosen": -136.39938354492188, "logps/rejected": -1000.27294921875, "loss": 0.0708, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8796472549438477, "rewards/margins": 8.735878944396973, "rewards/rejected": -9.61552619934082, "step": 43100 }, { "epoch": 0.52, "learning_rate": 2.7956134484144194e-06, "logits/chosen": -2.828012704849243, "logits/rejected": -2.568368434906006, "logps/chosen": -80.25975036621094, "logps/rejected": -769.842041015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4552925229072571, "rewards/margins": 6.883074760437012, "rewards/rejected": -7.338367462158203, "step": 43110 }, { "epoch": 0.52, "learning_rate": 2.7945761304538334e-06, "logits/chosen": -2.8516900539398193, "logits/rejected": -2.423287868499756, "logps/chosen": -106.01664733886719, "logps/rejected": -871.1902465820312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6494251489639282, "rewards/margins": 7.683937072753906, "rewards/rejected": -8.333361625671387, "step": 43120 }, { "epoch": 0.52, "learning_rate": 2.793538761061073e-06, "logits/chosen": -2.839369297027588, "logits/rejected": -2.262631893157959, "logps/chosen": -133.836669921875, "logps/rejected": -1052.6669921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8845065236091614, "rewards/margins": 9.254682540893555, "rewards/rejected": -10.139189720153809, "step": 43130 }, { "epoch": 0.52, "learning_rate": 2.792501340417261e-06, "logits/chosen": -2.864583969116211, "logits/rejected": -2.3231799602508545, "logps/chosen": -167.3424530029297, "logps/rejected": -996.0849609375, "loss": 0.1687, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.202567219734192, "rewards/margins": 8.372464179992676, "rewards/rejected": -9.575030326843262, "step": 43140 }, { "epoch": 0.52, "learning_rate": 2.7914638687035266e-06, "logits/chosen": -2.854048728942871, "logits/rejected": -2.333178758621216, "logps/chosen": -129.42222595214844, "logps/rejected": -903.5540161132812, "loss": 0.0939, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8444503545761108, "rewards/margins": 7.80664587020874, "rewards/rejected": -8.651097297668457, "step": 43150 }, { "epoch": 0.52, "learning_rate": 2.7904263461010104e-06, "logits/chosen": -2.902836322784424, "logits/rejected": -2.467738628387451, "logps/chosen": -112.41535949707031, "logps/rejected": -893.15576171875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.6124029755592346, "rewards/margins": 7.92984676361084, "rewards/rejected": -8.54224967956543, "step": 43160 }, { "epoch": 0.52, "learning_rate": 2.7893887727908614e-06, "logits/chosen": -2.8480026721954346, "logits/rejected": -2.218327045440674, "logps/chosen": -128.56472778320312, "logps/rejected": -1070.2828369140625, "loss": 0.1288, "rewards/accuracies": 1.0, "rewards/chosen": -0.7921518087387085, "rewards/margins": 9.52497673034668, "rewards/rejected": -10.31712818145752, "step": 43170 }, { "epoch": 0.52, "learning_rate": 2.788351148954236e-06, "logits/chosen": -2.8711698055267334, "logits/rejected": -2.448396682739258, "logps/chosen": -111.17362976074219, "logps/rejected": -883.1265869140625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5811859369277954, "rewards/margins": 7.872342586517334, "rewards/rejected": -8.453527450561523, "step": 43180 }, { "epoch": 0.52, "learning_rate": 2.7873134747723e-06, "logits/chosen": -2.860827684402466, "logits/rejected": -2.463566303253174, "logps/chosen": -110.65203857421875, "logps/rejected": -841.48779296875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6682447195053101, "rewards/margins": 7.376306056976318, "rewards/rejected": -8.044549942016602, "step": 43190 }, { "epoch": 0.52, "learning_rate": 2.7862757504262306e-06, "logits/chosen": -2.954274892807007, "logits/rejected": -2.4087119102478027, "logps/chosen": -111.33558654785156, "logps/rejected": -980.2267456054688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6349678635597229, "rewards/margins": 8.782073974609375, "rewards/rejected": -9.41704273223877, "step": 43200 }, { "epoch": 0.52, "learning_rate": 2.78523797609721e-06, "logits/chosen": -2.851654052734375, "logits/rejected": -2.3739423751831055, "logps/chosen": -143.0849151611328, "logps/rejected": -963.6583251953125, "loss": 0.1079, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.961199164390564, "rewards/margins": 8.280816078186035, "rewards/rejected": -9.24201488494873, "step": 43210 }, { "epoch": 0.52, "learning_rate": 2.784200151966431e-06, "logits/chosen": -2.822859764099121, "logits/rejected": -2.284423351287842, "logps/chosen": -109.3176498413086, "logps/rejected": -883.8992309570312, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6019741296768188, "rewards/margins": 7.839883327484131, "rewards/rejected": -8.441858291625977, "step": 43220 }, { "epoch": 0.52, "learning_rate": 2.783162278215094e-06, "logits/chosen": -2.860171318054199, "logits/rejected": -2.0733225345611572, "logps/chosen": -159.82501220703125, "logps/rejected": -1077.9912109375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.0625877380371094, "rewards/margins": 9.316750526428223, "rewards/rejected": -10.379338264465332, "step": 43230 }, { "epoch": 0.52, "learning_rate": 2.78212435502441e-06, "logits/chosen": -2.8936502933502197, "logits/rejected": -2.55159592628479, "logps/chosen": -90.50096130371094, "logps/rejected": -814.4366455078125, "loss": 0.0187, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.46980881690979004, "rewards/margins": 7.29611349105835, "rewards/rejected": -7.765922546386719, "step": 43240 }, { "epoch": 0.52, "learning_rate": 2.7810863825755977e-06, "logits/chosen": -2.840559244155884, "logits/rejected": -2.2868754863739014, "logps/chosen": -123.17586517333984, "logps/rejected": -1038.6982421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7383179664611816, "rewards/margins": 9.26405143737793, "rewards/rejected": -10.002370834350586, "step": 43250 }, { "epoch": 0.52, "learning_rate": 2.7800483610498835e-06, "logits/chosen": -2.8729233741760254, "logits/rejected": -2.1725549697875977, "logps/chosen": -135.11874389648438, "logps/rejected": -1137.1995849609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8883885145187378, "rewards/margins": 10.082180976867676, "rewards/rejected": -10.970568656921387, "step": 43260 }, { "epoch": 0.52, "learning_rate": 2.7790102906285038e-06, "logits/chosen": -2.8238067626953125, "logits/rejected": -2.019282102584839, "logps/chosen": -138.32443237304688, "logps/rejected": -1118.2374267578125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8596706390380859, "rewards/margins": 9.923009872436523, "rewards/rejected": -10.782679557800293, "step": 43270 }, { "epoch": 0.52, "learning_rate": 2.777972171492701e-06, "logits/chosen": -2.8714394569396973, "logits/rejected": -2.4382293224334717, "logps/chosen": -91.03209686279297, "logps/rejected": -816.4514770507812, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -0.482151597738266, "rewards/margins": 7.307075500488281, "rewards/rejected": -7.789227485656738, "step": 43280 }, { "epoch": 0.52, "learning_rate": 2.7769340038237296e-06, "logits/chosen": -2.826463222503662, "logits/rejected": -2.4327666759490967, "logps/chosen": -92.06537628173828, "logps/rejected": -847.84619140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5282447338104248, "rewards/margins": 7.572489261627197, "rewards/rejected": -8.100732803344727, "step": 43290 }, { "epoch": 0.52, "learning_rate": 2.7758957878028505e-06, "logits/chosen": -2.8542580604553223, "logits/rejected": -2.1321990489959717, "logps/chosen": -122.7144775390625, "logps/rejected": -1050.471923828125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6697887182235718, "rewards/margins": 9.446037292480469, "rewards/rejected": -10.115826606750488, "step": 43300 }, { "epoch": 0.52, "learning_rate": 2.7748575236113334e-06, "logits/chosen": -2.917217969894409, "logits/rejected": -2.52702260017395, "logps/chosen": -82.72422790527344, "logps/rejected": -917.1624755859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.44382911920547485, "rewards/margins": 8.359197616577148, "rewards/rejected": -8.803028106689453, "step": 43310 }, { "epoch": 0.52, "learning_rate": 2.7738192114304557e-06, "logits/chosen": -2.811126232147217, "logits/rejected": -2.3415257930755615, "logps/chosen": -133.54147338867188, "logps/rejected": -888.9192504882812, "loss": 0.1309, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8925898671150208, "rewards/margins": 7.607839107513428, "rewards/rejected": -8.500429153442383, "step": 43320 }, { "epoch": 0.52, "learning_rate": 2.7727808514415037e-06, "logits/chosen": -2.865708589553833, "logits/rejected": -2.2341079711914062, "logps/chosen": -107.65411376953125, "logps/rejected": -972.1148681640625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6083835959434509, "rewards/margins": 8.737838745117188, "rewards/rejected": -9.346222877502441, "step": 43330 }, { "epoch": 0.52, "learning_rate": 2.771742443825774e-06, "logits/chosen": -2.869858980178833, "logits/rejected": -2.2328312397003174, "logps/chosen": -137.05337524414062, "logps/rejected": -1111.2589111328125, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.8466981649398804, "rewards/margins": 9.866925239562988, "rewards/rejected": -10.713623046875, "step": 43340 }, { "epoch": 0.52, "learning_rate": 2.770703988764567e-06, "logits/chosen": -2.9109530448913574, "logits/rejected": -2.3814210891723633, "logps/chosen": -117.68949127197266, "logps/rejected": -877.2637939453125, "loss": 0.1084, "rewards/accuracies": 1.0, "rewards/chosen": -0.7266902327537537, "rewards/margins": 7.646851539611816, "rewards/rejected": -8.373540878295898, "step": 43350 }, { "epoch": 0.52, "learning_rate": 2.7696654864391958e-06, "logits/chosen": -2.867716073989868, "logits/rejected": -2.445366621017456, "logps/chosen": -89.41490173339844, "logps/rejected": -892.8674926757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4319966435432434, "rewards/margins": 8.121682167053223, "rewards/rejected": -8.553678512573242, "step": 43360 }, { "epoch": 0.52, "learning_rate": 2.7686269370309805e-06, "logits/chosen": -2.843872308731079, "logits/rejected": -2.3282628059387207, "logps/chosen": -157.0325164794922, "logps/rejected": -943.3203125, "loss": 0.1796, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0687828063964844, "rewards/margins": 7.968295097351074, "rewards/rejected": -9.037077903747559, "step": 43370 }, { "epoch": 0.52, "learning_rate": 2.767588340721246e-06, "logits/chosen": -2.8337395191192627, "logits/rejected": -2.2307403087615967, "logps/chosen": -133.5491485595703, "logps/rejected": -901.6270751953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8144624829292297, "rewards/margins": 7.810303688049316, "rewards/rejected": -8.62476634979248, "step": 43380 }, { "epoch": 0.52, "learning_rate": 2.766549697691332e-06, "logits/chosen": -2.87990665435791, "logits/rejected": -2.2206974029541016, "logps/chosen": -115.09034729003906, "logps/rejected": -1007.0418701171875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.661756694316864, "rewards/margins": 9.024998664855957, "rewards/rejected": -9.68675422668457, "step": 43390 }, { "epoch": 0.52, "learning_rate": 2.765511008122581e-06, "logits/chosen": -2.8072617053985596, "logits/rejected": -2.1768479347229004, "logps/chosen": -157.82461547851562, "logps/rejected": -1017.45458984375, "loss": 0.0764, "rewards/accuracies": 1.0, "rewards/chosen": -1.1042548418045044, "rewards/margins": 8.663686752319336, "rewards/rejected": -9.76794147491455, "step": 43400 }, { "epoch": 0.52, "learning_rate": 2.7644722721963454e-06, "logits/chosen": -2.8577094078063965, "logits/rejected": -2.1130731105804443, "logps/chosen": -146.9774932861328, "logps/rejected": -1031.667236328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9270893335342407, "rewards/margins": 8.990638732910156, "rewards/rejected": -9.917728424072266, "step": 43410 }, { "epoch": 0.52, "learning_rate": 2.7634334900939853e-06, "logits/chosen": -2.859837293624878, "logits/rejected": -2.4510345458984375, "logps/chosen": -108.68245697021484, "logps/rejected": -887.447265625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.6284744739532471, "rewards/margins": 7.848489284515381, "rewards/rejected": -8.476963996887207, "step": 43420 }, { "epoch": 0.52, "learning_rate": 2.76239466199687e-06, "logits/chosen": -2.8866984844207764, "logits/rejected": -2.4329276084899902, "logps/chosen": -137.20578002929688, "logps/rejected": -899.3565673828125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.8664559125900269, "rewards/margins": 7.737078666687012, "rewards/rejected": -8.603534698486328, "step": 43430 }, { "epoch": 0.52, "learning_rate": 2.761355788086375e-06, "logits/chosen": -2.8688178062438965, "logits/rejected": -2.4294424057006836, "logps/chosen": -123.84230041503906, "logps/rejected": -913.8717651367188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8036416172981262, "rewards/margins": 7.943279266357422, "rewards/rejected": -8.746920585632324, "step": 43440 }, { "epoch": 0.52, "learning_rate": 2.760316868543885e-06, "logits/chosen": -2.828950881958008, "logits/rejected": -1.979514718055725, "logps/chosen": -165.38577270507812, "logps/rejected": -1220.16552734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1019307374954224, "rewards/margins": 10.693662643432617, "rewards/rejected": -11.795594215393066, "step": 43450 }, { "epoch": 0.52, "learning_rate": 2.7592779035507933e-06, "logits/chosen": -2.826387405395508, "logits/rejected": -2.3722023963928223, "logps/chosen": -127.38938903808594, "logps/rejected": -1052.01025390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8558019399642944, "rewards/margins": 9.280192375183105, "rewards/rejected": -10.135995864868164, "step": 43460 }, { "epoch": 0.52, "learning_rate": 2.7582388932884995e-06, "logits/chosen": -2.8748788833618164, "logits/rejected": -2.1913435459136963, "logps/chosen": -162.37110900878906, "logps/rejected": -1062.500244140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1021630764007568, "rewards/margins": 9.109282493591309, "rewards/rejected": -10.211444854736328, "step": 43470 }, { "epoch": 0.52, "learning_rate": 2.7571998379384123e-06, "logits/chosen": -2.8872039318084717, "logits/rejected": -2.4261178970336914, "logps/chosen": -139.9659881591797, "logps/rejected": -861.0078125, "loss": 0.1024, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9750110507011414, "rewards/margins": 7.2590484619140625, "rewards/rejected": -8.23405933380127, "step": 43480 }, { "epoch": 0.52, "learning_rate": 2.7561607376819477e-06, "logits/chosen": -2.84829044342041, "logits/rejected": -2.3280694484710693, "logps/chosen": -150.84938049316406, "logps/rejected": -942.6741943359375, "loss": 0.0244, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0488550662994385, "rewards/margins": 7.969402313232422, "rewards/rejected": -9.018257141113281, "step": 43490 }, { "epoch": 0.52, "learning_rate": 2.7551215927005298e-06, "logits/chosen": -2.8474972248077393, "logits/rejected": -2.226111888885498, "logps/chosen": -150.66307067871094, "logps/rejected": -1042.41015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9953950047492981, "rewards/margins": 9.035600662231445, "rewards/rejected": -10.03099536895752, "step": 43500 }, { "epoch": 0.52, "learning_rate": 2.75408240317559e-06, "logits/chosen": -2.885970115661621, "logits/rejected": -2.207878828048706, "logps/chosen": -151.3515167236328, "logps/rejected": -1041.4188232421875, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9643243551254272, "rewards/margins": 9.015971183776855, "rewards/rejected": -9.98029613494873, "step": 43510 }, { "epoch": 0.52, "learning_rate": 2.753043169288569e-06, "logits/chosen": -2.811720371246338, "logits/rejected": -2.152129650115967, "logps/chosen": -143.948486328125, "logps/rejected": -987.6290283203125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9934830665588379, "rewards/margins": 8.494089126586914, "rewards/rejected": -9.48757266998291, "step": 43520 }, { "epoch": 0.52, "learning_rate": 2.7520038912209133e-06, "logits/chosen": -2.871626853942871, "logits/rejected": -2.411196231842041, "logps/chosen": -139.16073608398438, "logps/rejected": -1089.5, "loss": 0.1078, "rewards/accuracies": 1.0, "rewards/chosen": -0.951612651348114, "rewards/margins": 9.550722122192383, "rewards/rejected": -10.502335548400879, "step": 43530 }, { "epoch": 0.52, "learning_rate": 2.7509645691540775e-06, "logits/chosen": -2.8069446086883545, "logits/rejected": -2.3220601081848145, "logps/chosen": -158.89772033691406, "logps/rejected": -914.46484375, "loss": 0.0965, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1612025499343872, "rewards/margins": 7.597581386566162, "rewards/rejected": -8.758783340454102, "step": 43540 }, { "epoch": 0.52, "learning_rate": 2.7499252032695256e-06, "logits/chosen": -2.8505635261535645, "logits/rejected": -2.3530831336975098, "logps/chosen": -133.6879119873047, "logps/rejected": -981.4951171875, "loss": 0.1422, "rewards/accuracies": 1.0, "rewards/chosen": -0.8213812112808228, "rewards/margins": 8.604806900024414, "rewards/rejected": -9.426187515258789, "step": 43550 }, { "epoch": 0.52, "learning_rate": 2.748885793748727e-06, "logits/chosen": -2.8679232597351074, "logits/rejected": -2.597935676574707, "logps/chosen": -84.28907775878906, "logps/rejected": -839.8331909179688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4809589385986328, "rewards/margins": 7.564812660217285, "rewards/rejected": -8.045770645141602, "step": 43560 }, { "epoch": 0.52, "learning_rate": 2.7478463407731604e-06, "logits/chosen": -2.8129143714904785, "logits/rejected": -2.16391658782959, "logps/chosen": -166.5081787109375, "logps/rejected": -990.8258666992188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1695623397827148, "rewards/margins": 8.336104393005371, "rewards/rejected": -9.505667686462402, "step": 43570 }, { "epoch": 0.52, "learning_rate": 2.74680684452431e-06, "logits/chosen": -2.893203020095825, "logits/rejected": -2.430711269378662, "logps/chosen": -134.35345458984375, "logps/rejected": -921.3746337890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8855621218681335, "rewards/margins": 7.957690238952637, "rewards/rejected": -8.843252182006836, "step": 43580 }, { "epoch": 0.52, "learning_rate": 2.745767305183671e-06, "logits/chosen": -2.8816187381744385, "logits/rejected": -2.5801756381988525, "logps/chosen": -91.43013000488281, "logps/rejected": -913.75634765625, "loss": 0.1223, "rewards/accuracies": 1.0, "rewards/chosen": -0.5255996584892273, "rewards/margins": 8.241800308227539, "rewards/rejected": -8.767401695251465, "step": 43590 }, { "epoch": 0.52, "learning_rate": 2.744727722932743e-06, "logits/chosen": -2.8223633766174316, "logits/rejected": -2.0981123447418213, "logps/chosen": -239.63839721679688, "logps/rejected": -1203.5084228515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.8535295724868774, "rewards/margins": 9.783609390258789, "rewards/rejected": -11.637139320373535, "step": 43600 }, { "epoch": 0.52, "learning_rate": 2.743688097953034e-06, "logits/chosen": -2.854893445968628, "logits/rejected": -2.388284206390381, "logps/chosen": -99.26676940917969, "logps/rejected": -903.3317260742188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5939812064170837, "rewards/margins": 8.066534042358398, "rewards/rejected": -8.660514831542969, "step": 43610 }, { "epoch": 0.52, "learning_rate": 2.7426484304260605e-06, "logits/chosen": -2.8240294456481934, "logits/rejected": -2.1560473442077637, "logps/chosen": -203.86351013183594, "logps/rejected": -1125.045166015625, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.5290950536727905, "rewards/margins": 9.317983627319336, "rewards/rejected": -10.847078323364258, "step": 43620 }, { "epoch": 0.52, "learning_rate": 2.741608720533344e-06, "logits/chosen": -2.8578295707702637, "logits/rejected": -2.3397533893585205, "logps/chosen": -169.1082305908203, "logps/rejected": -967.4718017578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2183163166046143, "rewards/margins": 8.080120086669922, "rewards/rejected": -9.298436164855957, "step": 43630 }, { "epoch": 0.52, "learning_rate": 2.7405689684564167e-06, "logits/chosen": -2.781585216522217, "logits/rejected": -2.289442777633667, "logps/chosen": -195.3646697998047, "logps/rejected": -1068.912109375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.4684504270553589, "rewards/margins": 8.807897567749023, "rewards/rejected": -10.276349067687988, "step": 43640 }, { "epoch": 0.52, "learning_rate": 2.7395291743768152e-06, "logits/chosen": -2.8532395362854004, "logits/rejected": -2.223153591156006, "logps/chosen": -168.88536071777344, "logps/rejected": -1001.400390625, "loss": 0.0939, "rewards/accuracies": 1.0, "rewards/chosen": -1.19123113155365, "rewards/margins": 8.43479061126709, "rewards/rejected": -9.626020431518555, "step": 43650 }, { "epoch": 0.52, "learning_rate": 2.7384893384760857e-06, "logits/chosen": -2.781700611114502, "logits/rejected": -2.149008274078369, "logps/chosen": -172.07188415527344, "logps/rejected": -1021.6018676757812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.246605634689331, "rewards/margins": 8.576790809631348, "rewards/rejected": -9.823395729064941, "step": 43660 }, { "epoch": 0.52, "learning_rate": 2.73744946093578e-06, "logits/chosen": -2.8837509155273438, "logits/rejected": -2.3660085201263428, "logps/chosen": -146.67239379882812, "logps/rejected": -1034.563232421875, "loss": 0.1701, "rewards/accuracies": 1.0, "rewards/chosen": -0.9855842590332031, "rewards/margins": 8.974872589111328, "rewards/rejected": -9.960456848144531, "step": 43670 }, { "epoch": 0.52, "learning_rate": 2.736409541937457e-06, "logits/chosen": -2.845130443572998, "logits/rejected": -2.2605812549591064, "logps/chosen": -195.37777709960938, "logps/rejected": -1068.097900390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4119317531585693, "rewards/margins": 8.87670612335205, "rewards/rejected": -10.288637161254883, "step": 43680 }, { "epoch": 0.52, "learning_rate": 2.7353695816626857e-06, "logits/chosen": -2.840985059738159, "logits/rejected": -2.2779178619384766, "logps/chosen": -148.91297912597656, "logps/rejected": -967.6865234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0405609607696533, "rewards/margins": 8.250642776489258, "rewards/rejected": -9.291204452514648, "step": 43690 }, { "epoch": 0.52, "learning_rate": 2.7343295802930386e-06, "logits/chosen": -2.846879482269287, "logits/rejected": -2.2636988162994385, "logps/chosen": -187.97540283203125, "logps/rejected": -1019.1935424804688, "loss": 0.104, "rewards/accuracies": 1.0, "rewards/chosen": -1.418755292892456, "rewards/margins": 8.379164695739746, "rewards/rejected": -9.797920227050781, "step": 43700 }, { "epoch": 0.52, "learning_rate": 2.7332895380100976e-06, "logits/chosen": -2.8747000694274902, "logits/rejected": -2.4078640937805176, "logps/chosen": -129.9452362060547, "logps/rejected": -919.8515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8470815420150757, "rewards/margins": 7.977530002593994, "rewards/rejected": -8.82461166381836, "step": 43710 }, { "epoch": 0.52, "learning_rate": 2.732249454995452e-06, "logits/chosen": -2.8828577995300293, "logits/rejected": -2.243358612060547, "logps/chosen": -165.9210662841797, "logps/rejected": -1036.4036865234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.155150055885315, "rewards/margins": 8.809640884399414, "rewards/rejected": -9.964791297912598, "step": 43720 }, { "epoch": 0.52, "learning_rate": 2.7312093314306954e-06, "logits/chosen": -2.8121533393859863, "logits/rejected": -1.9509022235870361, "logps/chosen": -227.8342742919922, "logps/rejected": -1252.4827880859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6540749073028564, "rewards/margins": 10.454143524169922, "rewards/rejected": -12.108217239379883, "step": 43730 }, { "epoch": 0.52, "learning_rate": 2.730169167497432e-06, "logits/chosen": -2.851451873779297, "logits/rejected": -2.4232306480407715, "logps/chosen": -149.0309600830078, "logps/rejected": -934.0362548828125, "loss": 0.2014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0405813455581665, "rewards/margins": 7.9221296310424805, "rewards/rejected": -8.9627103805542, "step": 43740 }, { "epoch": 0.52, "learning_rate": 2.7291289633772714e-06, "logits/chosen": -2.902535915374756, "logits/rejected": -2.517235517501831, "logps/chosen": -164.88259887695312, "logps/rejected": -916.4157104492188, "loss": 0.1374, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2127416133880615, "rewards/margins": 7.573822021484375, "rewards/rejected": -8.786564826965332, "step": 43750 }, { "epoch": 0.52, "learning_rate": 2.7280887192518305e-06, "logits/chosen": -2.834285020828247, "logits/rejected": -2.1664230823516846, "logps/chosen": -209.91043090820312, "logps/rejected": -1103.7857666015625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6133029460906982, "rewards/margins": 9.022878646850586, "rewards/rejected": -10.636181831359863, "step": 43760 }, { "epoch": 0.52, "learning_rate": 2.7270484353027325e-06, "logits/chosen": -2.8531532287597656, "logits/rejected": -2.0996339321136475, "logps/chosen": -201.713623046875, "logps/rejected": -1167.0498046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.518835425376892, "rewards/margins": 9.754175186157227, "rewards/rejected": -11.273012161254883, "step": 43770 }, { "epoch": 0.52, "learning_rate": 2.7260081117116078e-06, "logits/chosen": -2.9091684818267822, "logits/rejected": -2.4669148921966553, "logps/chosen": -149.4280242919922, "logps/rejected": -985.5242919921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.048742651939392, "rewards/margins": 8.416122436523438, "rewards/rejected": -9.464864730834961, "step": 43780 }, { "epoch": 0.52, "learning_rate": 2.724967748660096e-06, "logits/chosen": -2.8391125202178955, "logits/rejected": -2.079230785369873, "logps/chosen": -204.90658569335938, "logps/rejected": -1180.813232421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.5303490161895752, "rewards/margins": 9.87030029296875, "rewards/rejected": -11.40064811706543, "step": 43790 }, { "epoch": 0.52, "learning_rate": 2.7239273463298378e-06, "logits/chosen": -2.8826472759246826, "logits/rejected": -2.296450138092041, "logps/chosen": -187.53121948242188, "logps/rejected": -1134.9866943359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3259217739105225, "rewards/margins": 9.646108627319336, "rewards/rejected": -10.972030639648438, "step": 43800 }, { "epoch": 0.52, "learning_rate": 2.722886904902488e-06, "logits/chosen": -2.840014934539795, "logits/rejected": -2.4869918823242188, "logps/chosen": -112.54679107666016, "logps/rejected": -907.0452880859375, "loss": 0.1043, "rewards/accuracies": 1.0, "rewards/chosen": -0.7326689958572388, "rewards/margins": 7.942263603210449, "rewards/rejected": -8.674932479858398, "step": 43810 }, { "epoch": 0.52, "learning_rate": 2.7218464245597043e-06, "logits/chosen": -2.7747318744659424, "logits/rejected": -2.250136137008667, "logps/chosen": -159.45962524414062, "logps/rejected": -1030.3345947265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015571355819702, "rewards/margins": 8.824549674987793, "rewards/rejected": -9.926107406616211, "step": 43820 }, { "epoch": 0.52, "learning_rate": 2.7208059054831497e-06, "logits/chosen": -2.9253573417663574, "logits/rejected": -2.5385189056396484, "logps/chosen": -114.90473937988281, "logps/rejected": -895.5462646484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7321608662605286, "rewards/margins": 7.86715030670166, "rewards/rejected": -8.599311828613281, "step": 43830 }, { "epoch": 0.52, "learning_rate": 2.7197653478544982e-06, "logits/chosen": -2.8952624797821045, "logits/rejected": -2.30366849899292, "logps/chosen": -154.527587890625, "logps/rejected": -1009.3193359375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1247169971466064, "rewards/margins": 8.592262268066406, "rewards/rejected": -9.71697998046875, "step": 43840 }, { "epoch": 0.52, "learning_rate": 2.718724751855428e-06, "logits/chosen": -2.863659143447876, "logits/rejected": -2.285012722015381, "logps/chosen": -208.3686065673828, "logps/rejected": -1123.143310546875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.5389039516448975, "rewards/margins": 9.287132263183594, "rewards/rejected": -10.82603645324707, "step": 43850 }, { "epoch": 0.53, "learning_rate": 2.717684117667622e-06, "logits/chosen": -2.8577425479888916, "logits/rejected": -2.3074660301208496, "logps/chosen": -155.51724243164062, "logps/rejected": -966.8668823242188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0246959924697876, "rewards/margins": 8.257749557495117, "rewards/rejected": -9.282444953918457, "step": 43860 }, { "epoch": 0.53, "learning_rate": 2.7166434454727743e-06, "logits/chosen": -2.8524601459503174, "logits/rejected": -2.3314735889434814, "logps/chosen": -170.84823608398438, "logps/rejected": -1032.062744140625, "loss": 0.2649, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2256357669830322, "rewards/margins": 8.719853401184082, "rewards/rejected": -9.945488929748535, "step": 43870 }, { "epoch": 0.53, "learning_rate": 2.7156027354525834e-06, "logits/chosen": -2.864598035812378, "logits/rejected": -2.414316177368164, "logps/chosen": -121.5591812133789, "logps/rejected": -916.33544921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8204405903816223, "rewards/margins": 7.96865177154541, "rewards/rejected": -8.789091110229492, "step": 43880 }, { "epoch": 0.53, "learning_rate": 2.714561987788753e-06, "logits/chosen": -2.8509602546691895, "logits/rejected": -2.116452693939209, "logps/chosen": -243.0188446044922, "logps/rejected": -1223.2213134765625, "loss": 0.0971, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8221099376678467, "rewards/margins": 9.992454528808594, "rewards/rejected": -11.81456470489502, "step": 43890 }, { "epoch": 0.53, "learning_rate": 2.7135212026629955e-06, "logits/chosen": -2.890657424926758, "logits/rejected": -2.353010654449463, "logps/chosen": -171.54685974121094, "logps/rejected": -1086.093017578125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.243658423423767, "rewards/margins": 9.208023071289062, "rewards/rejected": -10.451681137084961, "step": 43900 }, { "epoch": 0.53, "learning_rate": 2.7124803802570286e-06, "logits/chosen": -2.824803590774536, "logits/rejected": -2.3778600692749023, "logps/chosen": -133.1650848388672, "logps/rejected": -966.2965698242188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9035542607307434, "rewards/margins": 8.372757911682129, "rewards/rejected": -9.276311874389648, "step": 43910 }, { "epoch": 0.53, "learning_rate": 2.7114395207525784e-06, "logits/chosen": -2.8733811378479004, "logits/rejected": -2.2488598823547363, "logps/chosen": -228.42587280273438, "logps/rejected": -1207.6959228515625, "loss": 0.2703, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7423025369644165, "rewards/margins": 9.92830753326416, "rewards/rejected": -11.670609474182129, "step": 43920 }, { "epoch": 0.53, "learning_rate": 2.7103986243313745e-06, "logits/chosen": -2.864323616027832, "logits/rejected": -2.4604084491729736, "logps/chosen": -142.3118896484375, "logps/rejected": -806.3140869140625, "loss": 0.1922, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0321455001831055, "rewards/margins": 6.664502143859863, "rewards/rejected": -7.696646690368652, "step": 43930 }, { "epoch": 0.53, "learning_rate": 2.709357691175155e-06, "logits/chosen": -2.8594768047332764, "logits/rejected": -1.9978971481323242, "logps/chosen": -215.7018585205078, "logps/rejected": -1243.6402587890625, "loss": 0.0192, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6004879474639893, "rewards/margins": 10.423294067382812, "rewards/rejected": -12.023781776428223, "step": 43940 }, { "epoch": 0.53, "learning_rate": 2.7083167214656652e-06, "logits/chosen": -2.894371509552002, "logits/rejected": -2.604020595550537, "logps/chosen": -76.33403015136719, "logps/rejected": -840.0028076171875, "loss": 0.1241, "rewards/accuracies": 1.0, "rewards/chosen": -0.4070788323879242, "rewards/margins": 7.61721658706665, "rewards/rejected": -8.024293899536133, "step": 43950 }, { "epoch": 0.53, "learning_rate": 2.7072757153846533e-06, "logits/chosen": -2.8765792846679688, "logits/rejected": -2.4336915016174316, "logps/chosen": -151.42660522460938, "logps/rejected": -965.9398193359375, "loss": 0.1643, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1089622974395752, "rewards/margins": 8.156354904174805, "rewards/rejected": -9.2653169631958, "step": 43960 }, { "epoch": 0.53, "learning_rate": 2.706234673113877e-06, "logits/chosen": -2.8820905685424805, "logits/rejected": -2.5345301628112793, "logps/chosen": -121.04353332519531, "logps/rejected": -884.5905151367188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7895889282226562, "rewards/margins": 7.671667575836182, "rewards/rejected": -8.461256980895996, "step": 43970 }, { "epoch": 0.53, "learning_rate": 2.705193594835101e-06, "logits/chosen": -2.887843370437622, "logits/rejected": -2.7375845909118652, "logps/chosen": -41.622074127197266, "logps/rejected": -671.3709716796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.08709894865751266, "rewards/margins": 6.266558647155762, "rewards/rejected": -6.3536577224731445, "step": 43980 }, { "epoch": 0.53, "learning_rate": 2.7041524807300923e-06, "logits/chosen": -2.888308048248291, "logits/rejected": -2.4196524620056152, "logps/chosen": -155.9433135986328, "logps/rejected": -1102.7608642578125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.079883098602295, "rewards/margins": 9.553423881530762, "rewards/rejected": -10.633307456970215, "step": 43990 }, { "epoch": 0.53, "learning_rate": 2.7031113309806272e-06, "logits/chosen": -2.8679745197296143, "logits/rejected": -2.4098293781280518, "logps/chosen": -114.61311340332031, "logps/rejected": -890.7191162109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7347984910011292, "rewards/margins": 7.8018317222595215, "rewards/rejected": -8.536630630493164, "step": 44000 }, { "epoch": 0.53, "learning_rate": 2.702070145768488e-06, "logits/chosen": -2.90218186378479, "logits/rejected": -2.4665026664733887, "logps/chosen": -132.82240295410156, "logps/rejected": -902.5842895507812, "loss": 0.1648, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.901702880859375, "rewards/margins": 7.753470420837402, "rewards/rejected": -8.655173301696777, "step": 44010 }, { "epoch": 0.53, "learning_rate": 2.701028925275462e-06, "logits/chosen": -2.825873851776123, "logits/rejected": -2.2204396724700928, "logps/chosen": -183.71437072753906, "logps/rejected": -1206.2672119140625, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.311368465423584, "rewards/margins": 10.36081314086914, "rewards/rejected": -11.672182083129883, "step": 44020 }, { "epoch": 0.53, "learning_rate": 2.6999876696833447e-06, "logits/chosen": -2.8748891353607178, "logits/rejected": -2.065915584564209, "logps/chosen": -181.06588745117188, "logps/rejected": -1106.1737060546875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.2860956192016602, "rewards/margins": 9.365877151489258, "rewards/rejected": -10.651973724365234, "step": 44030 }, { "epoch": 0.53, "learning_rate": 2.698946379173936e-06, "logits/chosen": -2.883711576461792, "logits/rejected": -2.6909286975860596, "logps/chosen": -69.2506332397461, "logps/rejected": -886.0335693359375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.3439524173736572, "rewards/margins": 8.145645141601562, "rewards/rejected": -8.48959732055664, "step": 44040 }, { "epoch": 0.53, "learning_rate": 2.697905053929042e-06, "logits/chosen": -2.8586833477020264, "logits/rejected": -2.4511525630950928, "logps/chosen": -133.25550842285156, "logps/rejected": -1010.2442626953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8870679140090942, "rewards/margins": 8.814445495605469, "rewards/rejected": -9.701513290405273, "step": 44050 }, { "epoch": 0.53, "learning_rate": 2.696863694130475e-06, "logits/chosen": -2.832836627960205, "logits/rejected": -1.9260098934173584, "logps/chosen": -210.94393920898438, "logps/rejected": -1162.5008544921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4874153137207031, "rewards/margins": 9.72215461730957, "rewards/rejected": -11.209569931030273, "step": 44060 }, { "epoch": 0.53, "learning_rate": 2.695822299960053e-06, "logits/chosen": -2.8542861938476562, "logits/rejected": -2.200974464416504, "logps/chosen": -201.28128051757812, "logps/rejected": -1141.0283203125, "loss": 0.1026, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.45582115650177, "rewards/margins": 9.563715934753418, "rewards/rejected": -11.019536972045898, "step": 44070 }, { "epoch": 0.53, "learning_rate": 2.6947808715996025e-06, "logits/chosen": -2.856585741043091, "logits/rejected": -2.556206226348877, "logps/chosen": -100.982421875, "logps/rejected": -803.6852416992188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.580525815486908, "rewards/margins": 7.092887878417969, "rewards/rejected": -7.6734137535095215, "step": 44080 }, { "epoch": 0.53, "learning_rate": 2.6937394092309527e-06, "logits/chosen": -2.8380684852600098, "logits/rejected": -2.375835418701172, "logps/chosen": -164.249755859375, "logps/rejected": -888.7703247070312, "loss": 0.1126, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2006018161773682, "rewards/margins": 7.295336723327637, "rewards/rejected": -8.495939254760742, "step": 44090 }, { "epoch": 0.53, "learning_rate": 2.692697913035939e-06, "logits/chosen": -2.8529791831970215, "logits/rejected": -2.1667325496673584, "logps/chosen": -161.05125427246094, "logps/rejected": -1007.7252807617188, "loss": 0.026, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.070603370666504, "rewards/margins": 8.629959106445312, "rewards/rejected": -9.700563430786133, "step": 44100 }, { "epoch": 0.53, "learning_rate": 2.6916563831964065e-06, "logits/chosen": -2.840341091156006, "logits/rejected": -2.2337629795074463, "logps/chosen": -142.87197875976562, "logps/rejected": -1055.2799072265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9306266903877258, "rewards/margins": 9.234942436218262, "rewards/rejected": -10.165568351745605, "step": 44110 }, { "epoch": 0.53, "learning_rate": 2.6906148198942006e-06, "logits/chosen": -2.856201171875, "logits/rejected": -2.2149155139923096, "logps/chosen": -152.1475372314453, "logps/rejected": -1001.5675659179688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0271365642547607, "rewards/margins": 8.60300064086914, "rewards/rejected": -9.63013744354248, "step": 44120 }, { "epoch": 0.53, "learning_rate": 2.689573223311177e-06, "logits/chosen": -2.8878912925720215, "logits/rejected": -2.1936111450195312, "logps/chosen": -181.57785034179688, "logps/rejected": -1124.951171875, "loss": 0.1197, "rewards/accuracies": 1.0, "rewards/chosen": -1.2552134990692139, "rewards/margins": 9.603615760803223, "rewards/rejected": -10.858827590942383, "step": 44130 }, { "epoch": 0.53, "learning_rate": 2.6885315936291955e-06, "logits/chosen": -2.8444409370422363, "logits/rejected": -2.3963284492492676, "logps/chosen": -136.3206024169922, "logps/rejected": -861.5379638671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9190188646316528, "rewards/margins": 7.314084053039551, "rewards/rejected": -8.233101844787598, "step": 44140 }, { "epoch": 0.53, "learning_rate": 2.6874899310301206e-06, "logits/chosen": -2.881417751312256, "logits/rejected": -2.3497214317321777, "logps/chosen": -122.00163269042969, "logps/rejected": -987.9533081054688, "loss": 0.088, "rewards/accuracies": 1.0, "rewards/chosen": -0.7274706363677979, "rewards/margins": 8.765827178955078, "rewards/rejected": -9.49329662322998, "step": 44150 }, { "epoch": 0.53, "learning_rate": 2.686448235695825e-06, "logits/chosen": -2.874967098236084, "logits/rejected": -2.23600435256958, "logps/chosen": -182.1478271484375, "logps/rejected": -1115.4468994140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3023993968963623, "rewards/margins": 9.450798034667969, "rewards/rejected": -10.753198623657227, "step": 44160 }, { "epoch": 0.53, "learning_rate": 2.6854065078081848e-06, "logits/chosen": -2.8866941928863525, "logits/rejected": -2.485090494155884, "logps/chosen": -132.6581573486328, "logps/rejected": -1021.2042236328125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.889040470123291, "rewards/margins": 8.934377670288086, "rewards/rejected": -9.823417663574219, "step": 44170 }, { "epoch": 0.53, "learning_rate": 2.684364747549084e-06, "logits/chosen": -2.8823792934417725, "logits/rejected": -2.474622964859009, "logps/chosen": -124.29087829589844, "logps/rejected": -981.8610229492188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7724416851997375, "rewards/margins": 8.665977478027344, "rewards/rejected": -9.438419342041016, "step": 44180 }, { "epoch": 0.53, "learning_rate": 2.6833229551004096e-06, "logits/chosen": -2.8712971210479736, "logits/rejected": -2.398555278778076, "logps/chosen": -140.45156860351562, "logps/rejected": -946.8170776367188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9360651969909668, "rewards/margins": 8.155901908874512, "rewards/rejected": -9.09196662902832, "step": 44190 }, { "epoch": 0.53, "learning_rate": 2.6822811306440564e-06, "logits/chosen": -2.8522696495056152, "logits/rejected": -2.445223569869995, "logps/chosen": -131.1540069580078, "logps/rejected": -932.9441528320312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8317005038261414, "rewards/margins": 8.069250106811523, "rewards/rejected": -8.90095043182373, "step": 44200 }, { "epoch": 0.53, "learning_rate": 2.6812392743619247e-06, "logits/chosen": -2.8865716457366943, "logits/rejected": -2.592101812362671, "logps/chosen": -110.9435043334961, "logps/rejected": -817.224365234375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7005801796913147, "rewards/margins": 7.096959114074707, "rewards/rejected": -7.797540187835693, "step": 44210 }, { "epoch": 0.53, "learning_rate": 2.680197386435918e-06, "logits/chosen": -2.8689639568328857, "logits/rejected": -2.081446409225464, "logps/chosen": -150.8936004638672, "logps/rejected": -1205.8408203125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.9823028445243835, "rewards/margins": 10.670684814453125, "rewards/rejected": -11.652987480163574, "step": 44220 }, { "epoch": 0.53, "learning_rate": 2.6791554670479485e-06, "logits/chosen": -2.8917737007141113, "logits/rejected": -2.4497382640838623, "logps/chosen": -117.0890884399414, "logps/rejected": -1052.130615234375, "loss": 0.0783, "rewards/accuracies": 1.0, "rewards/chosen": -0.6532190442085266, "rewards/margins": 9.463789939880371, "rewards/rejected": -10.117009162902832, "step": 44230 }, { "epoch": 0.53, "learning_rate": 2.6781135163799334e-06, "logits/chosen": -2.80430269241333, "logits/rejected": -2.2395176887512207, "logps/chosen": -131.64413452148438, "logps/rejected": -1110.417236328125, "loss": 0.0839, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8539617657661438, "rewards/margins": 9.856948852539062, "rewards/rejected": -10.710908889770508, "step": 44240 }, { "epoch": 0.53, "learning_rate": 2.6770715346137914e-06, "logits/chosen": -2.857057571411133, "logits/rejected": -2.3446202278137207, "logps/chosen": -169.501953125, "logps/rejected": -1093.24951171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2391164302825928, "rewards/margins": 9.290098190307617, "rewards/rejected": -10.529214859008789, "step": 44250 }, { "epoch": 0.53, "learning_rate": 2.6760295219314518e-06, "logits/chosen": -2.881894826889038, "logits/rejected": -2.2156951427459717, "logps/chosen": -158.8802490234375, "logps/rejected": -1057.708251953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.103449821472168, "rewards/margins": 9.083476066589355, "rewards/rejected": -10.186925888061523, "step": 44260 }, { "epoch": 0.53, "learning_rate": 2.6749874785148464e-06, "logits/chosen": -2.7975804805755615, "logits/rejected": -1.9476168155670166, "logps/chosen": -224.8721160888672, "logps/rejected": -1143.9412841796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.664210557937622, "rewards/margins": 9.368114471435547, "rewards/rejected": -11.032323837280273, "step": 44270 }, { "epoch": 0.53, "learning_rate": 2.673945404545914e-06, "logits/chosen": -2.8535971641540527, "logits/rejected": -2.418607234954834, "logps/chosen": -107.03388977050781, "logps/rejected": -926.3975830078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5839273929595947, "rewards/margins": 8.302438735961914, "rewards/rejected": -8.88636589050293, "step": 44280 }, { "epoch": 0.53, "learning_rate": 2.6729033002065964e-06, "logits/chosen": -2.9152209758758545, "logits/rejected": -2.4737441539764404, "logps/chosen": -126.29302978515625, "logps/rejected": -934.4640502929688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7972185611724854, "rewards/margins": 8.165781021118164, "rewards/rejected": -8.962998390197754, "step": 44290 }, { "epoch": 0.53, "learning_rate": 2.671861165678843e-06, "logits/chosen": -2.870661497116089, "logits/rejected": -2.3011605739593506, "logps/chosen": -174.51771545410156, "logps/rejected": -939.3294677734375, "loss": 0.0343, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2466905117034912, "rewards/margins": 7.7476348876953125, "rewards/rejected": -8.994325637817383, "step": 44300 }, { "epoch": 0.53, "learning_rate": 2.6708190011446073e-06, "logits/chosen": -2.817065477371216, "logits/rejected": -2.1762468814849854, "logps/chosen": -186.48886108398438, "logps/rejected": -1092.0684814453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3581260442733765, "rewards/margins": 9.175902366638184, "rewards/rejected": -10.534028053283691, "step": 44310 }, { "epoch": 0.53, "learning_rate": 2.669776806785848e-06, "logits/chosen": -2.831801176071167, "logits/rejected": -2.2929248809814453, "logps/chosen": -177.57498168945312, "logps/rejected": -948.6318359375, "loss": 0.1051, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2899529933929443, "rewards/margins": 7.818727016448975, "rewards/rejected": -9.108680725097656, "step": 44320 }, { "epoch": 0.53, "learning_rate": 2.6687345827845302e-06, "logits/chosen": -2.803986072540283, "logits/rejected": -2.286531925201416, "logps/chosen": -177.52261352539062, "logps/rejected": -935.1412963867188, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.3333735466003418, "rewards/margins": 7.6340227127075195, "rewards/rejected": -8.967395782470703, "step": 44330 }, { "epoch": 0.53, "learning_rate": 2.6676923293226237e-06, "logits/chosen": -2.8535876274108887, "logits/rejected": -2.2496232986450195, "logps/chosen": -209.31515502929688, "logps/rejected": -1218.2376708984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.563413143157959, "rewards/margins": 10.228240013122559, "rewards/rejected": -11.79165267944336, "step": 44340 }, { "epoch": 0.53, "learning_rate": 2.666650046582101e-06, "logits/chosen": -2.846979856491089, "logits/rejected": -2.308964729309082, "logps/chosen": -178.3280029296875, "logps/rejected": -1009.7142333984375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.3486400842666626, "rewards/margins": 8.348498344421387, "rewards/rejected": -9.697138786315918, "step": 44350 }, { "epoch": 0.53, "learning_rate": 2.6656077347449433e-06, "logits/chosen": -2.87367582321167, "logits/rejected": -2.346773862838745, "logps/chosen": -199.27963256835938, "logps/rejected": -1040.34765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.5300102233886719, "rewards/margins": 8.472169876098633, "rewards/rejected": -10.002180099487305, "step": 44360 }, { "epoch": 0.53, "learning_rate": 2.664565393993136e-06, "logits/chosen": -2.89573335647583, "logits/rejected": -2.5400004386901855, "logps/chosen": -183.21981811523438, "logps/rejected": -995.9595947265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.384295105934143, "rewards/margins": 8.190411567687988, "rewards/rejected": -9.57470703125, "step": 44370 }, { "epoch": 0.53, "learning_rate": 2.6635230245086667e-06, "logits/chosen": -2.8874411582946777, "logits/rejected": -2.4340577125549316, "logps/chosen": -156.7076873779297, "logps/rejected": -922.5344848632812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0830410718917847, "rewards/margins": 7.764678001403809, "rewards/rejected": -8.847719192504883, "step": 44380 }, { "epoch": 0.53, "learning_rate": 2.662480626473532e-06, "logits/chosen": -2.8769826889038086, "logits/rejected": -2.463425874710083, "logps/chosen": -108.4212417602539, "logps/rejected": -848.4324951171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7007686495780945, "rewards/margins": 7.417706489562988, "rewards/rejected": -8.118474960327148, "step": 44390 }, { "epoch": 0.53, "learning_rate": 2.6614382000697313e-06, "logits/chosen": -2.8967063426971436, "logits/rejected": -2.663797616958618, "logps/chosen": -93.12494659423828, "logps/rejected": -849.7520751953125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.578779935836792, "rewards/margins": 7.553204536437988, "rewards/rejected": -8.13198471069336, "step": 44400 }, { "epoch": 0.53, "learning_rate": 2.6603957454792696e-06, "logits/chosen": -2.8701326847076416, "logits/rejected": -2.4215216636657715, "logps/chosen": -156.340087890625, "logps/rejected": -988.8894653320312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1060450077056885, "rewards/margins": 8.401055335998535, "rewards/rejected": -9.507100105285645, "step": 44410 }, { "epoch": 0.53, "learning_rate": 2.659353262884156e-06, "logits/chosen": -2.848781108856201, "logits/rejected": -2.388950824737549, "logps/chosen": -142.90036010742188, "logps/rejected": -945.2823486328125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9903454780578613, "rewards/margins": 8.07837963104248, "rewards/rejected": -9.068724632263184, "step": 44420 }, { "epoch": 0.53, "learning_rate": 2.6583107524664044e-06, "logits/chosen": -2.881044387817383, "logits/rejected": -2.392216920852661, "logps/chosen": -176.76315307617188, "logps/rejected": -1039.251708984375, "loss": 0.0847, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3042843341827393, "rewards/margins": 8.693750381469727, "rewards/rejected": -9.99803638458252, "step": 44430 }, { "epoch": 0.53, "learning_rate": 2.6572682144080353e-06, "logits/chosen": -2.853543519973755, "logits/rejected": -2.3377583026885986, "logps/chosen": -193.6387176513672, "logps/rejected": -984.5758056640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.4505321979522705, "rewards/margins": 8.006287574768066, "rewards/rejected": -9.456819534301758, "step": 44440 }, { "epoch": 0.53, "learning_rate": 2.656225648891073e-06, "logits/chosen": -2.838893175125122, "logits/rejected": -2.2692155838012695, "logps/chosen": -209.7074432373047, "logps/rejected": -1042.834716796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6170982122421265, "rewards/margins": 8.400897026062012, "rewards/rejected": -10.017995834350586, "step": 44450 }, { "epoch": 0.53, "learning_rate": 2.6551830560975472e-06, "logits/chosen": -2.8591926097869873, "logits/rejected": -2.314333438873291, "logps/chosen": -156.24615478515625, "logps/rejected": -955.4674072265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.130789875984192, "rewards/margins": 8.058873176574707, "rewards/rejected": -9.18966293334961, "step": 44460 }, { "epoch": 0.53, "learning_rate": 2.6541404362094897e-06, "logits/chosen": -2.8767497539520264, "logits/rejected": -2.373582601547241, "logps/chosen": -175.66061401367188, "logps/rejected": -973.5791015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.258657455444336, "rewards/margins": 8.095312118530273, "rewards/rejected": -9.35396957397461, "step": 44470 }, { "epoch": 0.53, "learning_rate": 2.6530977894089403e-06, "logits/chosen": -2.8594534397125244, "logits/rejected": -2.351935625076294, "logps/chosen": -189.98875427246094, "logps/rejected": -1032.480712890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.413386344909668, "rewards/margins": 8.52353572845459, "rewards/rejected": -9.936922073364258, "step": 44480 }, { "epoch": 0.53, "learning_rate": 2.6520551158779424e-06, "logits/chosen": -2.9089386463165283, "logits/rejected": -2.4378678798675537, "logps/chosen": -169.73789978027344, "logps/rejected": -942.4509887695312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2362453937530518, "rewards/margins": 7.794785499572754, "rewards/rejected": -9.031030654907227, "step": 44490 }, { "epoch": 0.53, "learning_rate": 2.651012415798544e-06, "logits/chosen": -2.8752989768981934, "logits/rejected": -2.1811909675598145, "logps/chosen": -198.65379333496094, "logps/rejected": -1076.1578369140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4364384412765503, "rewards/margins": 8.934536933898926, "rewards/rejected": -10.370975494384766, "step": 44500 }, { "epoch": 0.53, "learning_rate": 2.6499696893527965e-06, "logits/chosen": -2.8364901542663574, "logits/rejected": -2.200770139694214, "logps/chosen": -220.44265747070312, "logps/rejected": -975.0521240234375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.6766624450683594, "rewards/margins": 7.667799472808838, "rewards/rejected": -9.344460487365723, "step": 44510 }, { "epoch": 0.53, "learning_rate": 2.6489269367227576e-06, "logits/chosen": -2.8188254833221436, "logits/rejected": -2.01277494430542, "logps/chosen": -270.17974853515625, "logps/rejected": -1152.000244140625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.187955856323242, "rewards/margins": 8.912674903869629, "rewards/rejected": -11.100629806518555, "step": 44520 }, { "epoch": 0.53, "learning_rate": 2.6478841580904902e-06, "logits/chosen": -2.8461124897003174, "logits/rejected": -2.4482216835021973, "logps/chosen": -174.72299194335938, "logps/rejected": -976.1871948242188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.293895959854126, "rewards/margins": 8.083196640014648, "rewards/rejected": -9.377092361450195, "step": 44530 }, { "epoch": 0.53, "learning_rate": 2.6468413536380587e-06, "logits/chosen": -2.862344264984131, "logits/rejected": -2.4199821949005127, "logps/chosen": -172.91244506835938, "logps/rejected": -907.7752685546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.2873847484588623, "rewards/margins": 7.41796350479126, "rewards/rejected": -8.70534896850586, "step": 44540 }, { "epoch": 0.53, "learning_rate": 2.6457985235475348e-06, "logits/chosen": -2.879150390625, "logits/rejected": -2.4375693798065186, "logps/chosen": -144.0272216796875, "logps/rejected": -942.4503784179688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0222795009613037, "rewards/margins": 8.01092529296875, "rewards/rejected": -9.033205032348633, "step": 44550 }, { "epoch": 0.53, "learning_rate": 2.6447556680009946e-06, "logits/chosen": -2.8538544178009033, "logits/rejected": -2.327655553817749, "logps/chosen": -172.2465362548828, "logps/rejected": -927.0079345703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3110277652740479, "rewards/margins": 7.56934118270874, "rewards/rejected": -8.88036823272705, "step": 44560 }, { "epoch": 0.53, "learning_rate": 2.6437127871805155e-06, "logits/chosen": -2.9221596717834473, "logits/rejected": -2.434607982635498, "logps/chosen": -164.265380859375, "logps/rejected": -902.7603759765625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.19511878490448, "rewards/margins": 7.452597141265869, "rewards/rejected": -8.647716522216797, "step": 44570 }, { "epoch": 0.53, "learning_rate": 2.6426698812681834e-06, "logits/chosen": -2.8153247833251953, "logits/rejected": -2.3762052059173584, "logps/chosen": -137.31881713867188, "logps/rejected": -868.73779296875, "loss": 0.15, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9233734011650085, "rewards/margins": 7.393913269042969, "rewards/rejected": -8.317285537719727, "step": 44580 }, { "epoch": 0.53, "learning_rate": 2.6416269504460856e-06, "logits/chosen": -2.8776090145111084, "logits/rejected": -2.258185863494873, "logps/chosen": -203.6652374267578, "logps/rejected": -1099.3623046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.5288463830947876, "rewards/margins": 9.04871940612793, "rewards/rejected": -10.57756519317627, "step": 44590 }, { "epoch": 0.53, "learning_rate": 2.640583994896317e-06, "logits/chosen": -2.840634822845459, "logits/rejected": -2.292874813079834, "logps/chosen": -190.2763671875, "logps/rejected": -967.1696166992188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4358716011047363, "rewards/margins": 7.855086326599121, "rewards/rejected": -9.2909574508667, "step": 44600 }, { "epoch": 0.53, "learning_rate": 2.6395410148009728e-06, "logits/chosen": -2.8929944038391113, "logits/rejected": -2.0859386920928955, "logps/chosen": -206.9546661376953, "logps/rejected": -1079.450439453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5701448917388916, "rewards/margins": 8.81313419342041, "rewards/rejected": -10.383279800415039, "step": 44610 }, { "epoch": 0.53, "learning_rate": 2.6384980103421543e-06, "logits/chosen": -2.8829450607299805, "logits/rejected": -2.3487255573272705, "logps/chosen": -170.125244140625, "logps/rejected": -1008.7097778320312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1312764883041382, "rewards/margins": 8.565641403198242, "rewards/rejected": -9.696916580200195, "step": 44620 }, { "epoch": 0.53, "learning_rate": 2.6374549817019686e-06, "logits/chosen": -2.901796340942383, "logits/rejected": -2.5310165882110596, "logps/chosen": -131.7493133544922, "logps/rejected": -902.5048828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9237983822822571, "rewards/margins": 7.7413201332092285, "rewards/rejected": -8.665118217468262, "step": 44630 }, { "epoch": 0.53, "learning_rate": 2.6364119290625244e-06, "logits/chosen": -2.877281665802002, "logits/rejected": -2.27596116065979, "logps/chosen": -206.95590209960938, "logps/rejected": -1079.6876220703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.5371050834655762, "rewards/margins": 8.848611831665039, "rewards/rejected": -10.385717391967773, "step": 44640 }, { "epoch": 0.53, "learning_rate": 2.6353688526059355e-06, "logits/chosen": -2.8559751510620117, "logits/rejected": -2.025773048400879, "logps/chosen": -229.1786651611328, "logps/rejected": -1143.346923828125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.7430074214935303, "rewards/margins": 9.288167953491211, "rewards/rejected": -11.031173706054688, "step": 44650 }, { "epoch": 0.53, "learning_rate": 2.6343257525143224e-06, "logits/chosen": -2.8328559398651123, "logits/rejected": -2.2410106658935547, "logps/chosen": -201.30174255371094, "logps/rejected": -1044.315185546875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.5195083618164062, "rewards/margins": 8.512359619140625, "rewards/rejected": -10.031867980957031, "step": 44660 }, { "epoch": 0.53, "learning_rate": 2.6332826289698056e-06, "logits/chosen": -2.857072353363037, "logits/rejected": -2.4484333992004395, "logps/chosen": -221.23062133789062, "logps/rejected": -936.9822387695312, "loss": 0.0738, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7823574542999268, "rewards/margins": 7.221014499664307, "rewards/rejected": -9.003373146057129, "step": 44670 }, { "epoch": 0.53, "learning_rate": 2.632239482154511e-06, "logits/chosen": -2.8267548084259033, "logits/rejected": -2.10754132270813, "logps/chosen": -297.5674133300781, "logps/rejected": -1158.184326171875, "loss": 0.1049, "rewards/accuracies": 1.0, "rewards/chosen": -2.463040351867676, "rewards/margins": 8.704649925231934, "rewards/rejected": -11.16769027709961, "step": 44680 }, { "epoch": 0.53, "learning_rate": 2.631196312250571e-06, "logits/chosen": -2.8544774055480957, "logits/rejected": -2.198253870010376, "logps/chosen": -284.8468017578125, "logps/rejected": -1101.137451171875, "loss": 0.0209, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.3694262504577637, "rewards/margins": 8.247503280639648, "rewards/rejected": -10.61693000793457, "step": 44690 }, { "epoch": 0.54, "learning_rate": 2.6301531194401193e-06, "logits/chosen": -2.8963680267333984, "logits/rejected": -2.4184446334838867, "logps/chosen": -189.98788452148438, "logps/rejected": -1011.6790771484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4788180589675903, "rewards/margins": 8.2556791305542, "rewards/rejected": -9.7344970703125, "step": 44700 }, { "epoch": 0.54, "learning_rate": 2.6291099039052944e-06, "logits/chosen": -2.87064790725708, "logits/rejected": -2.356642007827759, "logps/chosen": -221.35366821289062, "logps/rejected": -1025.2418212890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.7102253437042236, "rewards/margins": 8.149022102355957, "rewards/rejected": -9.859248161315918, "step": 44710 }, { "epoch": 0.54, "learning_rate": 2.628066665828239e-06, "logits/chosen": -2.802243709564209, "logits/rejected": -2.043994426727295, "logps/chosen": -322.3624267578125, "logps/rejected": -1193.0482177734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.713789939880371, "rewards/margins": 8.795329093933105, "rewards/rejected": -11.509119987487793, "step": 44720 }, { "epoch": 0.54, "learning_rate": 2.6270234053910993e-06, "logits/chosen": -2.891141414642334, "logits/rejected": -2.4424760341644287, "logps/chosen": -242.9439239501953, "logps/rejected": -999.9078369140625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.9687387943267822, "rewards/margins": 7.645442962646484, "rewards/rejected": -9.614182472229004, "step": 44730 }, { "epoch": 0.54, "learning_rate": 2.625980122776025e-06, "logits/chosen": -2.871115207672119, "logits/rejected": -2.216710329055786, "logps/chosen": -226.1534881591797, "logps/rejected": -1034.1126708984375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.796312689781189, "rewards/margins": 8.154555320739746, "rewards/rejected": -9.950868606567383, "step": 44740 }, { "epoch": 0.54, "learning_rate": 2.624936818165172e-06, "logits/chosen": -2.8965816497802734, "logits/rejected": -2.526142120361328, "logps/chosen": -163.45358276367188, "logps/rejected": -931.1572265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.196030855178833, "rewards/margins": 7.740862846374512, "rewards/rejected": -8.936893463134766, "step": 44750 }, { "epoch": 0.54, "learning_rate": 2.623893491740698e-06, "logits/chosen": -2.8671414852142334, "logits/rejected": -2.4790680408477783, "logps/chosen": -177.41656494140625, "logps/rejected": -939.7818603515625, "loss": 0.0974, "rewards/accuracies": 1.0, "rewards/chosen": -1.3287651538848877, "rewards/margins": 7.68509578704834, "rewards/rejected": -9.013860702514648, "step": 44760 }, { "epoch": 0.54, "learning_rate": 2.622850143684763e-06, "logits/chosen": -2.900447368621826, "logits/rejected": -2.4924569129943848, "logps/chosen": -205.52816772460938, "logps/rejected": -1045.5242919921875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5681966543197632, "rewards/margins": 8.49901294708252, "rewards/rejected": -10.067209243774414, "step": 44770 }, { "epoch": 0.54, "learning_rate": 2.621806774179535e-06, "logits/chosen": -2.8484182357788086, "logits/rejected": -2.387678623199463, "logps/chosen": -248.5258026123047, "logps/rejected": -1030.9852294921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.005005359649658, "rewards/margins": 7.916071891784668, "rewards/rejected": -9.9210786819458, "step": 44780 }, { "epoch": 0.54, "learning_rate": 2.6207633834071825e-06, "logits/chosen": -2.8403847217559814, "logits/rejected": -2.284938335418701, "logps/chosen": -222.5978546142578, "logps/rejected": -1097.2813720703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7409569025039673, "rewards/margins": 8.831613540649414, "rewards/rejected": -10.572568893432617, "step": 44790 }, { "epoch": 0.54, "learning_rate": 2.6197199715498788e-06, "logits/chosen": -2.81880259513855, "logits/rejected": -2.1474575996398926, "logps/chosen": -285.03326416015625, "logps/rejected": -1110.7127685546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.2750656604766846, "rewards/margins": 8.430206298828125, "rewards/rejected": -10.70527172088623, "step": 44800 }, { "epoch": 0.54, "learning_rate": 2.6186765387898004e-06, "logits/chosen": -2.827200412750244, "logits/rejected": -2.3704166412353516, "logps/chosen": -141.72052001953125, "logps/rejected": -864.1290283203125, "loss": 0.1381, "rewards/accuracies": 1.0, "rewards/chosen": -1.0289068222045898, "rewards/margins": 7.236316680908203, "rewards/rejected": -8.265223503112793, "step": 44810 }, { "epoch": 0.54, "learning_rate": 2.6176330853091283e-06, "logits/chosen": -2.847180128097534, "logits/rejected": -2.345787525177002, "logps/chosen": -193.302001953125, "logps/rejected": -917.2320556640625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.4570504426956177, "rewards/margins": 7.339529514312744, "rewards/rejected": -8.79658031463623, "step": 44820 }, { "epoch": 0.54, "learning_rate": 2.616589611290046e-06, "logits/chosen": -2.871922016143799, "logits/rejected": -2.2538769245147705, "logps/chosen": -174.0427703857422, "logps/rejected": -1077.401611328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2634918689727783, "rewards/margins": 9.116304397583008, "rewards/rejected": -10.379796028137207, "step": 44830 }, { "epoch": 0.54, "learning_rate": 2.6155461169147407e-06, "logits/chosen": -2.871004343032837, "logits/rejected": -2.276866912841797, "logps/chosen": -187.39158630371094, "logps/rejected": -1134.205078125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3797789812088013, "rewards/margins": 9.574002265930176, "rewards/rejected": -10.953782081604004, "step": 44840 }, { "epoch": 0.54, "learning_rate": 2.6145026023654045e-06, "logits/chosen": -2.89349627494812, "logits/rejected": -2.4884190559387207, "logps/chosen": -129.40423583984375, "logps/rejected": -887.8568115234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8440580368041992, "rewards/margins": 7.660482883453369, "rewards/rejected": -8.50454044342041, "step": 44850 }, { "epoch": 0.54, "learning_rate": 2.613459067824232e-06, "logits/chosen": -2.8804047107696533, "logits/rejected": -2.439535617828369, "logps/chosen": -172.3536834716797, "logps/rejected": -1032.1077880859375, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -1.2639358043670654, "rewards/margins": 8.677818298339844, "rewards/rejected": -9.941755294799805, "step": 44860 }, { "epoch": 0.54, "learning_rate": 2.612415513473422e-06, "logits/chosen": -2.818786144256592, "logits/rejected": -2.147033214569092, "logps/chosen": -182.5278778076172, "logps/rejected": -924.8211059570312, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -1.3298394680023193, "rewards/margins": 7.534276485443115, "rewards/rejected": -8.864115715026855, "step": 44870 }, { "epoch": 0.54, "learning_rate": 2.6113719394951755e-06, "logits/chosen": -2.8720946311950684, "logits/rejected": -2.0997138023376465, "logps/chosen": -238.17709350585938, "logps/rejected": -1190.0916748046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.7922389507293701, "rewards/margins": 9.693822860717773, "rewards/rejected": -11.48606014251709, "step": 44880 }, { "epoch": 0.54, "learning_rate": 2.610328346071697e-06, "logits/chosen": -2.8893349170684814, "logits/rejected": -2.6987249851226807, "logps/chosen": -116.1677474975586, "logps/rejected": -779.4542846679688, "loss": 0.1703, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7674587368965149, "rewards/margins": 6.654261589050293, "rewards/rejected": -7.4217209815979, "step": 44890 }, { "epoch": 0.54, "learning_rate": 2.609284733385196e-06, "logits/chosen": -2.859222412109375, "logits/rejected": -1.9828637838363647, "logps/chosen": -264.3453063964844, "logps/rejected": -1177.5379638671875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.101818561553955, "rewards/margins": 9.27877426147461, "rewards/rejected": -11.380594253540039, "step": 44900 }, { "epoch": 0.54, "learning_rate": 2.6082411016178845e-06, "logits/chosen": -2.8485267162323, "logits/rejected": -2.1207714080810547, "logps/chosen": -196.90109252929688, "logps/rejected": -1169.5843505859375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.4421511888504028, "rewards/margins": 9.859256744384766, "rewards/rejected": -11.301408767700195, "step": 44910 }, { "epoch": 0.54, "learning_rate": 2.607197450951977e-06, "logits/chosen": -2.897822380065918, "logits/rejected": -2.2226781845092773, "logps/chosen": -182.7111358642578, "logps/rejected": -974.5442504882812, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.2974250316619873, "rewards/margins": 8.046142578125, "rewards/rejected": -9.343567848205566, "step": 44920 }, { "epoch": 0.54, "learning_rate": 2.6061537815696924e-06, "logits/chosen": -2.86183500289917, "logits/rejected": -2.2311604022979736, "logps/chosen": -179.73236083984375, "logps/rejected": -1042.171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2311137914657593, "rewards/margins": 8.771915435791016, "rewards/rejected": -10.003029823303223, "step": 44930 }, { "epoch": 0.54, "learning_rate": 2.605110093653252e-06, "logits/chosen": -2.8774921894073486, "logits/rejected": -2.4966671466827393, "logps/chosen": -152.27020263671875, "logps/rejected": -911.6066284179688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.0860779285430908, "rewards/margins": 7.648196220397949, "rewards/rejected": -8.734275817871094, "step": 44940 }, { "epoch": 0.54, "learning_rate": 2.6040663873848815e-06, "logits/chosen": -2.8743629455566406, "logits/rejected": -2.2827508449554443, "logps/chosen": -155.50210571289062, "logps/rejected": -998.3912963867188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9654983282089233, "rewards/margins": 8.610315322875977, "rewards/rejected": -9.575814247131348, "step": 44950 }, { "epoch": 0.54, "learning_rate": 2.6030226629468085e-06, "logits/chosen": -2.834817409515381, "logits/rejected": -2.3848419189453125, "logps/chosen": -167.3943634033203, "logps/rejected": -1041.2735595703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2423434257507324, "rewards/margins": 8.786500930786133, "rewards/rejected": -10.028843879699707, "step": 44960 }, { "epoch": 0.54, "learning_rate": 2.6019789205212654e-06, "logits/chosen": -2.8067803382873535, "logits/rejected": -2.27933931350708, "logps/chosen": -165.40469360351562, "logps/rejected": -1000.0203857421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.18128502368927, "rewards/margins": 8.432817459106445, "rewards/rejected": -9.614103317260742, "step": 44970 }, { "epoch": 0.54, "learning_rate": 2.6009351602904853e-06, "logits/chosen": -2.8459432125091553, "logits/rejected": -2.2033355236053467, "logps/chosen": -176.11300659179688, "logps/rejected": -972.0534057617188, "loss": 0.1636, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2867279052734375, "rewards/margins": 8.048284530639648, "rewards/rejected": -9.335013389587402, "step": 44980 }, { "epoch": 0.54, "learning_rate": 2.5998913824367066e-06, "logits/chosen": -2.860435962677002, "logits/rejected": -2.3370492458343506, "logps/chosen": -159.33160400390625, "logps/rejected": -894.8846435546875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.150787353515625, "rewards/margins": 7.413566589355469, "rewards/rejected": -8.564353942871094, "step": 44990 }, { "epoch": 0.54, "learning_rate": 2.59884758714217e-06, "logits/chosen": -2.856797695159912, "logits/rejected": -2.566941261291504, "logps/chosen": -102.52577209472656, "logps/rejected": -845.8112182617188, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6528486609458923, "rewards/margins": 7.424308776855469, "rewards/rejected": -8.077157974243164, "step": 45000 }, { "epoch": 0.54, "eval_logits/chosen": -2.840272903442383, "eval_logits/rejected": -1.6936837434768677, "eval_logps/chosen": -398.8731384277344, "eval_logps/rejected": -1370.0538330078125, "eval_loss": 0.0009748496231622994, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -3.3769283294677734, "eval_rewards/margins": 9.856356620788574, "eval_rewards/rejected": -13.233284950256348, "eval_runtime": 1.2159, "eval_samples_per_second": 4.112, "eval_steps_per_second": 2.467, "step": 45000 }, { "epoch": 0.54, "learning_rate": 2.5978037745891193e-06, "logits/chosen": -2.838888645172119, "logits/rejected": -2.2360546588897705, "logps/chosen": -149.9476318359375, "logps/rejected": -956.4102783203125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0238087177276611, "rewards/margins": 8.157305717468262, "rewards/rejected": -9.18111515045166, "step": 45010 }, { "epoch": 0.54, "learning_rate": 2.596759944959802e-06, "logits/chosen": -2.822025775909424, "logits/rejected": -2.226104259490967, "logps/chosen": -179.6795196533203, "logps/rejected": -1013.4376831054688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.3235961198806763, "rewards/margins": 8.415946960449219, "rewards/rejected": -9.739543914794922, "step": 45020 }, { "epoch": 0.54, "learning_rate": 2.595716098436466e-06, "logits/chosen": -2.9268343448638916, "logits/rejected": -2.586254596710205, "logps/chosen": -109.09537506103516, "logps/rejected": -857.2847900390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6774271130561829, "rewards/margins": 7.519900321960449, "rewards/rejected": -8.197327613830566, "step": 45030 }, { "epoch": 0.54, "learning_rate": 2.594672235201365e-06, "logits/chosen": -2.8549771308898926, "logits/rejected": -2.438711166381836, "logps/chosen": -114.26409912109375, "logps/rejected": -895.5094604492188, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7253403067588806, "rewards/margins": 7.854869842529297, "rewards/rejected": -8.580209732055664, "step": 45040 }, { "epoch": 0.54, "learning_rate": 2.5936283554367553e-06, "logits/chosen": -2.87968373298645, "logits/rejected": -1.9957221746444702, "logps/chosen": -213.74111938476562, "logps/rejected": -1229.3463134765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.5354748964309692, "rewards/margins": 10.346915245056152, "rewards/rejected": -11.882390022277832, "step": 45050 }, { "epoch": 0.54, "learning_rate": 2.5925844593248943e-06, "logits/chosen": -2.8798460960388184, "logits/rejected": -2.3923017978668213, "logps/chosen": -139.71495056152344, "logps/rejected": -955.8215942382812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9739642143249512, "rewards/margins": 8.200754165649414, "rewards/rejected": -9.174718856811523, "step": 45060 }, { "epoch": 0.54, "learning_rate": 2.5915405470480436e-06, "logits/chosen": -2.8388614654541016, "logits/rejected": -2.1204919815063477, "logps/chosen": -197.16453552246094, "logps/rejected": -1083.7021484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.494072675704956, "rewards/margins": 8.93970775604248, "rewards/rejected": -10.433780670166016, "step": 45070 }, { "epoch": 0.54, "learning_rate": 2.5904966187884685e-06, "logits/chosen": -2.8515210151672363, "logits/rejected": -2.2332804203033447, "logps/chosen": -136.92794799804688, "logps/rejected": -954.0579833984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8760446310043335, "rewards/margins": 8.278219223022461, "rewards/rejected": -9.154263496398926, "step": 45080 }, { "epoch": 0.54, "learning_rate": 2.5894526747284344e-06, "logits/chosen": -2.8704049587249756, "logits/rejected": -2.290417194366455, "logps/chosen": -166.34686279296875, "logps/rejected": -941.0607299804688, "loss": 0.0942, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1777781248092651, "rewards/margins": 7.846234321594238, "rewards/rejected": -9.024011611938477, "step": 45090 }, { "epoch": 0.54, "learning_rate": 2.5884087150502117e-06, "logits/chosen": -2.8491218090057373, "logits/rejected": -1.923008918762207, "logps/chosen": -219.07968139648438, "logps/rejected": -1219.431640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6475321054458618, "rewards/margins": 10.136302947998047, "rewards/rejected": -11.783836364746094, "step": 45100 }, { "epoch": 0.54, "learning_rate": 2.5873647399360736e-06, "logits/chosen": -2.8348405361175537, "logits/rejected": -2.3021557331085205, "logps/chosen": -123.36183166503906, "logps/rejected": -1005.3499755859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7868838906288147, "rewards/margins": 8.877362251281738, "rewards/rejected": -9.664246559143066, "step": 45110 }, { "epoch": 0.54, "learning_rate": 2.586320749568294e-06, "logits/chosen": -2.8976047039031982, "logits/rejected": -2.367783784866333, "logps/chosen": -155.70956420898438, "logps/rejected": -1035.8653564453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1133524179458618, "rewards/margins": 8.86201286315918, "rewards/rejected": -9.975364685058594, "step": 45120 }, { "epoch": 0.54, "learning_rate": 2.5852767441291513e-06, "logits/chosen": -2.831735134124756, "logits/rejected": -1.9295005798339844, "logps/chosen": -218.3793182373047, "logps/rejected": -1193.2783203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.521597146987915, "rewards/margins": 9.98341178894043, "rewards/rejected": -11.505010604858398, "step": 45130 }, { "epoch": 0.54, "learning_rate": 2.584232723800927e-06, "logits/chosen": -2.863983154296875, "logits/rejected": -2.0965778827667236, "logps/chosen": -198.55758666992188, "logps/rejected": -1207.3040771484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4768450260162354, "rewards/margins": 10.18017578125, "rewards/rejected": -11.657020568847656, "step": 45140 }, { "epoch": 0.54, "learning_rate": 2.5831886887659013e-06, "logits/chosen": -2.8926913738250732, "logits/rejected": -2.3186612129211426, "logps/chosen": -128.46548461914062, "logps/rejected": -955.5111083984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7850972414016724, "rewards/margins": 8.384246826171875, "rewards/rejected": -9.169343948364258, "step": 45150 }, { "epoch": 0.54, "learning_rate": 2.5821446392063627e-06, "logits/chosen": -2.9097094535827637, "logits/rejected": -2.6209843158721924, "logps/chosen": -95.8115005493164, "logps/rejected": -854.4865112304688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5622164011001587, "rewards/margins": 7.603957176208496, "rewards/rejected": -8.166172981262207, "step": 45160 }, { "epoch": 0.54, "learning_rate": 2.5811005753045975e-06, "logits/chosen": -2.8362138271331787, "logits/rejected": -2.31233286857605, "logps/chosen": -142.8259735107422, "logps/rejected": -948.5838012695312, "loss": 0.1619, "rewards/accuracies": 1.0, "rewards/chosen": -0.9619766473770142, "rewards/margins": 8.133100509643555, "rewards/rejected": -9.095077514648438, "step": 45170 }, { "epoch": 0.54, "learning_rate": 2.5800564972428983e-06, "logits/chosen": -2.8769888877868652, "logits/rejected": -2.2686619758605957, "logps/chosen": -158.98512268066406, "logps/rejected": -1007.1689453125, "loss": 0.1346, "rewards/accuracies": 1.0, "rewards/chosen": -1.0853749513626099, "rewards/margins": 8.597216606140137, "rewards/rejected": -9.682592391967773, "step": 45180 }, { "epoch": 0.54, "learning_rate": 2.5790124052035558e-06, "logits/chosen": -2.870640516281128, "logits/rejected": -2.4562668800354004, "logps/chosen": -134.30001831054688, "logps/rejected": -903.31689453125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9022383689880371, "rewards/margins": 7.757399559020996, "rewards/rejected": -8.659638404846191, "step": 45190 }, { "epoch": 0.54, "learning_rate": 2.577968299368867e-06, "logits/chosen": -2.8798091411590576, "logits/rejected": -2.071962833404541, "logps/chosen": -193.88198852539062, "logps/rejected": -1077.092041015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3946850299835205, "rewards/margins": 8.966506004333496, "rewards/rejected": -10.361190795898438, "step": 45200 }, { "epoch": 0.54, "learning_rate": 2.57692417992113e-06, "logits/chosen": -2.87052583694458, "logits/rejected": -2.2487964630126953, "logps/chosen": -153.58367919921875, "logps/rejected": -1054.67822265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0086934566497803, "rewards/margins": 9.154939651489258, "rewards/rejected": -10.163633346557617, "step": 45210 }, { "epoch": 0.54, "learning_rate": 2.5758800470426455e-06, "logits/chosen": -2.8757236003875732, "logits/rejected": -2.015568971633911, "logps/chosen": -210.1687469482422, "logps/rejected": -1077.2060546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.501828908920288, "rewards/margins": 8.86239242553711, "rewards/rejected": -10.364221572875977, "step": 45220 }, { "epoch": 0.54, "learning_rate": 2.5748359009157143e-06, "logits/chosen": -2.8555092811584473, "logits/rejected": -2.2359423637390137, "logps/chosen": -176.4265899658203, "logps/rejected": -1050.9320068359375, "loss": 0.0908, "rewards/accuracies": 1.0, "rewards/chosen": -1.2252819538116455, "rewards/margins": 8.887083053588867, "rewards/rejected": -10.112364768981934, "step": 45230 }, { "epoch": 0.54, "learning_rate": 2.573791741722644e-06, "logits/chosen": -2.9064712524414062, "logits/rejected": -2.376011371612549, "logps/chosen": -138.72865295410156, "logps/rejected": -935.0691528320312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9256361722946167, "rewards/margins": 8.053987503051758, "rewards/rejected": -8.979623794555664, "step": 45240 }, { "epoch": 0.54, "learning_rate": 2.5727475696457394e-06, "logits/chosen": -2.878528356552124, "logits/rejected": -2.055145025253296, "logps/chosen": -223.4213104248047, "logps/rejected": -1124.883056640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.729047417640686, "rewards/margins": 9.110981941223145, "rewards/rejected": -10.8400297164917, "step": 45250 }, { "epoch": 0.54, "learning_rate": 2.571703384867311e-06, "logits/chosen": -2.8862662315368652, "logits/rejected": -2.237041473388672, "logps/chosen": -159.3874969482422, "logps/rejected": -965.2614135742188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1094214916229248, "rewards/margins": 8.14494514465332, "rewards/rejected": -9.254365921020508, "step": 45260 }, { "epoch": 0.54, "learning_rate": 2.5706591875696712e-06, "logits/chosen": -2.8720850944519043, "logits/rejected": -2.454054355621338, "logps/chosen": -105.3782958984375, "logps/rejected": -883.0716552734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6119092106819153, "rewards/margins": 7.8329758644104, "rewards/rejected": -8.444884300231934, "step": 45270 }, { "epoch": 0.54, "learning_rate": 2.569614977935133e-06, "logits/chosen": -2.868821144104004, "logits/rejected": -2.476494073867798, "logps/chosen": -181.02963256835938, "logps/rejected": -956.9281005859375, "loss": 0.143, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3578903675079346, "rewards/margins": 7.818587303161621, "rewards/rejected": -9.176478385925293, "step": 45280 }, { "epoch": 0.54, "learning_rate": 2.5685707561460137e-06, "logits/chosen": -2.8443360328674316, "logits/rejected": -2.1959142684936523, "logps/chosen": -141.05767822265625, "logps/rejected": -919.4898681640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9759224057197571, "rewards/margins": 7.84576416015625, "rewards/rejected": -8.821686744689941, "step": 45290 }, { "epoch": 0.54, "learning_rate": 2.567526522384631e-06, "logits/chosen": -2.8537087440490723, "logits/rejected": -2.273188591003418, "logps/chosen": -164.846435546875, "logps/rejected": -1033.4832763671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1789360046386719, "rewards/margins": 8.762178421020508, "rewards/rejected": -9.94111442565918, "step": 45300 }, { "epoch": 0.54, "learning_rate": 2.5664822768333046e-06, "logits/chosen": -2.9079196453094482, "logits/rejected": -2.3232338428497314, "logps/chosen": -222.10403442382812, "logps/rejected": -1065.897216796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.687345266342163, "rewards/margins": 8.567960739135742, "rewards/rejected": -10.255305290222168, "step": 45310 }, { "epoch": 0.54, "learning_rate": 2.5654380196743572e-06, "logits/chosen": -2.8532395362854004, "logits/rejected": -2.4132025241851807, "logps/chosen": -186.79969787597656, "logps/rejected": -781.589111328125, "loss": 0.243, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.42183518409729, "rewards/margins": 6.026188850402832, "rewards/rejected": -7.448023319244385, "step": 45320 }, { "epoch": 0.54, "learning_rate": 2.5643937510901136e-06, "logits/chosen": -2.928462266921997, "logits/rejected": -2.609117269515991, "logps/chosen": -93.24092102050781, "logps/rejected": -814.9935302734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.525855302810669, "rewards/margins": 7.259098052978516, "rewards/rejected": -7.784952640533447, "step": 45330 }, { "epoch": 0.54, "learning_rate": 2.5633494712629005e-06, "logits/chosen": -2.8749992847442627, "logits/rejected": -2.286353588104248, "logps/chosen": -164.84628295898438, "logps/rejected": -994.9910888671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1813031435012817, "rewards/margins": 8.387991905212402, "rewards/rejected": -9.569294929504395, "step": 45340 }, { "epoch": 0.54, "learning_rate": 2.5623051803750443e-06, "logits/chosen": -2.901527166366577, "logits/rejected": -2.5209436416625977, "logps/chosen": -142.68032836914062, "logps/rejected": -875.4476318359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9557322263717651, "rewards/margins": 7.421148777008057, "rewards/rejected": -8.37688159942627, "step": 45350 }, { "epoch": 0.54, "learning_rate": 2.561260878608877e-06, "logits/chosen": -2.855982542037964, "logits/rejected": -2.2840263843536377, "logps/chosen": -146.45321655273438, "logps/rejected": -895.4652099609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9924567937850952, "rewards/margins": 7.590274810791016, "rewards/rejected": -8.582731246948242, "step": 45360 }, { "epoch": 0.54, "learning_rate": 2.5602165661467313e-06, "logits/chosen": -2.868891477584839, "logits/rejected": -2.272691488265991, "logps/chosen": -168.22128295898438, "logps/rejected": -967.1295166015625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.2226498126983643, "rewards/margins": 8.062105178833008, "rewards/rejected": -9.284753799438477, "step": 45370 }, { "epoch": 0.54, "learning_rate": 2.55917224317094e-06, "logits/chosen": -2.8921146392822266, "logits/rejected": -2.262965679168701, "logps/chosen": -179.81661987304688, "logps/rejected": -1126.9954833984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2704732418060303, "rewards/margins": 9.594616889953613, "rewards/rejected": -10.865089416503906, "step": 45380 }, { "epoch": 0.54, "learning_rate": 2.558127909863839e-06, "logits/chosen": -2.8404431343078613, "logits/rejected": -2.0263829231262207, "logps/chosen": -192.16668701171875, "logps/rejected": -1053.911865234375, "loss": 0.08, "rewards/accuracies": 1.0, "rewards/chosen": -1.4087387323379517, "rewards/margins": 8.727411270141602, "rewards/rejected": -10.136148452758789, "step": 45390 }, { "epoch": 0.54, "learning_rate": 2.557083566407767e-06, "logits/chosen": -2.8449394702911377, "logits/rejected": -2.0831265449523926, "logps/chosen": -184.9988250732422, "logps/rejected": -1012.1485595703125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.3249431848526, "rewards/margins": 8.37662410736084, "rewards/rejected": -9.701566696166992, "step": 45400 }, { "epoch": 0.54, "learning_rate": 2.5560392129850626e-06, "logits/chosen": -2.802732467651367, "logits/rejected": -2.16861629486084, "logps/chosen": -144.90750122070312, "logps/rejected": -993.4241943359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9693740010261536, "rewards/margins": 8.576960563659668, "rewards/rejected": -9.546335220336914, "step": 45410 }, { "epoch": 0.54, "learning_rate": 2.554994849778067e-06, "logits/chosen": -2.8332159519195557, "logits/rejected": -2.3334577083587646, "logps/chosen": -142.88607788085938, "logps/rejected": -956.3807373046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9780492782592773, "rewards/margins": 8.203768730163574, "rewards/rejected": -9.181818962097168, "step": 45420 }, { "epoch": 0.54, "learning_rate": 2.553950476969123e-06, "logits/chosen": -2.8495824337005615, "logits/rejected": -2.300114154815674, "logps/chosen": -144.8253173828125, "logps/rejected": -1008.2008056640625, "loss": 0.1421, "rewards/accuracies": 1.0, "rewards/chosen": -1.0284162759780884, "rewards/margins": 8.667989730834961, "rewards/rejected": -9.696405410766602, "step": 45430 }, { "epoch": 0.54, "learning_rate": 2.5529060947405766e-06, "logits/chosen": -2.8801231384277344, "logits/rejected": -2.392068386077881, "logps/chosen": -138.9815216064453, "logps/rejected": -977.2838134765625, "loss": 0.0937, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9487594366073608, "rewards/margins": 8.43146800994873, "rewards/rejected": -9.380226135253906, "step": 45440 }, { "epoch": 0.54, "learning_rate": 2.551861703274772e-06, "logits/chosen": -2.9026637077331543, "logits/rejected": -2.514997959136963, "logps/chosen": -113.88011169433594, "logps/rejected": -862.8712158203125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7063604593276978, "rewards/margins": 7.547556400299072, "rewards/rejected": -8.25391674041748, "step": 45450 }, { "epoch": 0.54, "learning_rate": 2.550817302754059e-06, "logits/chosen": -2.862612247467041, "logits/rejected": -2.3775646686553955, "logps/chosen": -129.89158630371094, "logps/rejected": -945.0205078125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8242095708847046, "rewards/margins": 8.228622436523438, "rewards/rejected": -9.052831649780273, "step": 45460 }, { "epoch": 0.54, "learning_rate": 2.5497728933607856e-06, "logits/chosen": -2.906088352203369, "logits/rejected": -2.42256498336792, "logps/chosen": -155.0782470703125, "logps/rejected": -961.5978393554688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0835869312286377, "rewards/margins": 8.132194519042969, "rewards/rejected": -9.215782165527344, "step": 45470 }, { "epoch": 0.54, "learning_rate": 2.5487284752773032e-06, "logits/chosen": -2.8461525440216064, "logits/rejected": -2.3801448345184326, "logps/chosen": -106.73686218261719, "logps/rejected": -834.1139526367188, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -0.6042370796203613, "rewards/margins": 7.3564581871032715, "rewards/rejected": -7.960694789886475, "step": 45480 }, { "epoch": 0.54, "learning_rate": 2.547684048685964e-06, "logits/chosen": -2.90141224861145, "logits/rejected": -2.289010524749756, "logps/chosen": -167.82130432128906, "logps/rejected": -976.9226684570312, "loss": 0.1397, "rewards/accuracies": 1.0, "rewards/chosen": -1.1186025142669678, "rewards/margins": 8.255880355834961, "rewards/rejected": -9.374483108520508, "step": 45490 }, { "epoch": 0.54, "learning_rate": 2.5466396137691228e-06, "logits/chosen": -2.812969923019409, "logits/rejected": -2.2644994258880615, "logps/chosen": -125.4996109008789, "logps/rejected": -938.6281127929688, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.7974655628204346, "rewards/margins": 8.217643737792969, "rewards/rejected": -9.015108108520508, "step": 45500 }, { "epoch": 0.54, "learning_rate": 2.5455951707091346e-06, "logits/chosen": -2.902012586593628, "logits/rejected": -2.4664437770843506, "logps/chosen": -93.42616271972656, "logps/rejected": -859.4176025390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5258254408836365, "rewards/margins": 7.678785800933838, "rewards/rejected": -8.204610824584961, "step": 45510 }, { "epoch": 0.54, "learning_rate": 2.5445507196883564e-06, "logits/chosen": -2.826810598373413, "logits/rejected": -2.1419694423675537, "logps/chosen": -169.68800354003906, "logps/rejected": -1086.776611328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1694071292877197, "rewards/margins": 9.28416633605957, "rewards/rejected": -10.453572273254395, "step": 45520 }, { "epoch": 0.55, "learning_rate": 2.543506260889146e-06, "logits/chosen": -2.8853416442871094, "logits/rejected": -2.3309836387634277, "logps/chosen": -127.8624038696289, "logps/rejected": -1040.93359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.78252774477005, "rewards/margins": 9.242337226867676, "rewards/rejected": -10.02486515045166, "step": 45530 }, { "epoch": 0.55, "learning_rate": 2.5424617944938633e-06, "logits/chosen": -2.8198418617248535, "logits/rejected": -2.433292865753174, "logps/chosen": -117.30149841308594, "logps/rejected": -857.0665893554688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7775211334228516, "rewards/margins": 7.412957668304443, "rewards/rejected": -8.190479278564453, "step": 45540 }, { "epoch": 0.55, "learning_rate": 2.541417320684869e-06, "logits/chosen": -2.8885302543640137, "logits/rejected": -2.1061792373657227, "logps/chosen": -168.1336669921875, "logps/rejected": -1004.2786865234375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.1482923030853271, "rewards/margins": 8.50533390045166, "rewards/rejected": -9.653626441955566, "step": 45550 }, { "epoch": 0.55, "learning_rate": 2.540372839644526e-06, "logits/chosen": -2.8576767444610596, "logits/rejected": -2.3757050037384033, "logps/chosen": -144.72195434570312, "logps/rejected": -991.9041137695312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9653337597846985, "rewards/margins": 8.56309700012207, "rewards/rejected": -9.52843189239502, "step": 45560 }, { "epoch": 0.55, "learning_rate": 2.539328351555197e-06, "logits/chosen": -2.9056239128112793, "logits/rejected": -2.2923948764801025, "logps/chosen": -144.93881225585938, "logps/rejected": -1035.3450927734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9786339998245239, "rewards/margins": 8.974950790405273, "rewards/rejected": -9.953585624694824, "step": 45570 }, { "epoch": 0.55, "learning_rate": 2.5382838565992472e-06, "logits/chosen": -2.8694164752960205, "logits/rejected": -2.336512804031372, "logps/chosen": -119.78324890136719, "logps/rejected": -922.1370849609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.748840868473053, "rewards/margins": 8.078813552856445, "rewards/rejected": -8.827654838562012, "step": 45580 }, { "epoch": 0.55, "learning_rate": 2.537239354959042e-06, "logits/chosen": -2.8758742809295654, "logits/rejected": -2.431243419647217, "logps/chosen": -128.3231964111328, "logps/rejected": -954.8419799804688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8555337190628052, "rewards/margins": 8.310992240905762, "rewards/rejected": -9.166526794433594, "step": 45590 }, { "epoch": 0.55, "learning_rate": 2.5361948468169494e-06, "logits/chosen": -2.858095169067383, "logits/rejected": -2.3263561725616455, "logps/chosen": -113.41214752197266, "logps/rejected": -922.84521484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6931657791137695, "rewards/margins": 8.148423194885254, "rewards/rejected": -8.841588020324707, "step": 45600 }, { "epoch": 0.55, "learning_rate": 2.535150332355337e-06, "logits/chosen": -2.87626576423645, "logits/rejected": -2.1485559940338135, "logps/chosen": -205.8433837890625, "logps/rejected": -1109.05419921875, "loss": 0.1591, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.496788740158081, "rewards/margins": 9.181926727294922, "rewards/rejected": -10.678714752197266, "step": 45610 }, { "epoch": 0.55, "learning_rate": 2.5341058117565737e-06, "logits/chosen": -2.8610575199127197, "logits/rejected": -2.296682357788086, "logps/chosen": -143.99171447753906, "logps/rejected": -1116.8056640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9597893953323364, "rewards/margins": 9.811386108398438, "rewards/rejected": -10.771173477172852, "step": 45620 }, { "epoch": 0.55, "learning_rate": 2.5330612852030313e-06, "logits/chosen": -2.9166393280029297, "logits/rejected": -2.4032528400421143, "logps/chosen": -116.03887939453125, "logps/rejected": -924.1217651367188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7154170870780945, "rewards/margins": 8.14086627960205, "rewards/rejected": -8.856283187866211, "step": 45630 }, { "epoch": 0.55, "learning_rate": 2.5320167528770796e-06, "logits/chosen": -2.8527655601501465, "logits/rejected": -2.1693310737609863, "logps/chosen": -162.00694274902344, "logps/rejected": -930.8468017578125, "loss": 0.1593, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0870387554168701, "rewards/margins": 7.834079742431641, "rewards/rejected": -8.921117782592773, "step": 45640 }, { "epoch": 0.55, "learning_rate": 2.5309722149610916e-06, "logits/chosen": -2.850327491760254, "logits/rejected": -2.4470138549804688, "logps/chosen": -117.064697265625, "logps/rejected": -847.7275390625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.729623019695282, "rewards/margins": 7.366507530212402, "rewards/rejected": -8.096131324768066, "step": 45650 }, { "epoch": 0.55, "learning_rate": 2.5299276716374417e-06, "logits/chosen": -2.823500394821167, "logits/rejected": -2.4323768615722656, "logps/chosen": -120.4688491821289, "logps/rejected": -838.2136840820312, "loss": 0.1003, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8155710101127625, "rewards/margins": 7.188899040222168, "rewards/rejected": -8.004469871520996, "step": 45660 }, { "epoch": 0.55, "learning_rate": 2.528883123088503e-06, "logits/chosen": -2.8369359970092773, "logits/rejected": -2.0223846435546875, "logps/chosen": -164.82386779785156, "logps/rejected": -975.6095581054688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.126307725906372, "rewards/margins": 8.246053695678711, "rewards/rejected": -9.37236213684082, "step": 45670 }, { "epoch": 0.55, "learning_rate": 2.527838569496651e-06, "logits/chosen": -2.8566176891326904, "logits/rejected": -2.3896806240081787, "logps/chosen": -120.10502624511719, "logps/rejected": -863.1729736328125, "loss": 0.137, "rewards/accuracies": 1.0, "rewards/chosen": -0.6774517297744751, "rewards/margins": 7.574436187744141, "rewards/rejected": -8.251888275146484, "step": 45680 }, { "epoch": 0.55, "learning_rate": 2.5267940110442618e-06, "logits/chosen": -2.8625080585479736, "logits/rejected": -2.175649404525757, "logps/chosen": -128.18600463867188, "logps/rejected": -1009.6964721679688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8158213496208191, "rewards/margins": 8.87907600402832, "rewards/rejected": -9.69489860534668, "step": 45690 }, { "epoch": 0.55, "learning_rate": 2.5257494479137136e-06, "logits/chosen": -2.8795113563537598, "logits/rejected": -2.5181660652160645, "logps/chosen": -96.35345458984375, "logps/rejected": -802.66650390625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.589743971824646, "rewards/margins": 7.062493801116943, "rewards/rejected": -7.652238368988037, "step": 45700 }, { "epoch": 0.55, "learning_rate": 2.5247048802873827e-06, "logits/chosen": -2.834934711456299, "logits/rejected": -2.273397922515869, "logps/chosen": -128.63668823242188, "logps/rejected": -982.7108154296875, "loss": 0.1319, "rewards/accuracies": 1.0, "rewards/chosen": -0.8515251278877258, "rewards/margins": 8.590113639831543, "rewards/rejected": -9.441638946533203, "step": 45710 }, { "epoch": 0.55, "learning_rate": 2.5236603083476487e-06, "logits/chosen": -2.9127275943756104, "logits/rejected": -2.6800198554992676, "logps/chosen": -81.0260009765625, "logps/rejected": -731.5390014648438, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4615083634853363, "rewards/margins": 6.4958367347717285, "rewards/rejected": -6.957345485687256, "step": 45720 }, { "epoch": 0.55, "learning_rate": 2.52261573227689e-06, "logits/chosen": -2.841660261154175, "logits/rejected": -2.1087114810943604, "logps/chosen": -165.197265625, "logps/rejected": -873.0633544921875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.102867603302002, "rewards/margins": 7.236186981201172, "rewards/rejected": -8.339055061340332, "step": 45730 }, { "epoch": 0.55, "learning_rate": 2.5215711522574875e-06, "logits/chosen": -2.86069655418396, "logits/rejected": -2.28239107131958, "logps/chosen": -178.78414916992188, "logps/rejected": -977.6435546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2871886491775513, "rewards/margins": 8.091768264770508, "rewards/rejected": -9.378955841064453, "step": 45740 }, { "epoch": 0.55, "learning_rate": 2.5205265684718216e-06, "logits/chosen": -2.8768858909606934, "logits/rejected": -2.461047410964966, "logps/chosen": -128.36978149414062, "logps/rejected": -939.1346435546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8582340478897095, "rewards/margins": 8.14843463897705, "rewards/rejected": -9.006669044494629, "step": 45750 }, { "epoch": 0.55, "learning_rate": 2.5194819811022745e-06, "logits/chosen": -2.9026238918304443, "logits/rejected": -2.452099323272705, "logps/chosen": -128.3651885986328, "logps/rejected": -766.6312255859375, "loss": 0.1105, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8563357591629028, "rewards/margins": 6.443877220153809, "rewards/rejected": -7.300212860107422, "step": 45760 }, { "epoch": 0.55, "learning_rate": 2.518437390331227e-06, "logits/chosen": -2.8767106533050537, "logits/rejected": -2.0998826026916504, "logps/chosen": -180.4153289794922, "logps/rejected": -1104.4239501953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2479561567306519, "rewards/margins": 9.39857292175293, "rewards/rejected": -10.646530151367188, "step": 45770 }, { "epoch": 0.55, "learning_rate": 2.517392796341063e-06, "logits/chosen": -2.896308183670044, "logits/rejected": -2.451500654220581, "logps/chosen": -109.7955093383789, "logps/rejected": -912.6324462890625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6781058311462402, "rewards/margins": 8.06098747253418, "rewards/rejected": -8.739092826843262, "step": 45780 }, { "epoch": 0.55, "learning_rate": 2.516348199314165e-06, "logits/chosen": -2.8156094551086426, "logits/rejected": -2.358035087585449, "logps/chosen": -132.6451416015625, "logps/rejected": -932.9190673828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8753618001937866, "rewards/margins": 8.071476936340332, "rewards/rejected": -8.946839332580566, "step": 45790 }, { "epoch": 0.55, "learning_rate": 2.5153035994329163e-06, "logits/chosen": -2.8415544033050537, "logits/rejected": -2.163480043411255, "logps/chosen": -160.2410125732422, "logps/rejected": -994.0042724609375, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -1.150892972946167, "rewards/margins": 8.404410362243652, "rewards/rejected": -9.555303573608398, "step": 45800 }, { "epoch": 0.55, "learning_rate": 2.5142589968797014e-06, "logits/chosen": -2.853602409362793, "logits/rejected": -2.445584535598755, "logps/chosen": -129.73118591308594, "logps/rejected": -907.0118408203125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.8614916801452637, "rewards/margins": 7.839244842529297, "rewards/rejected": -8.700736999511719, "step": 45810 }, { "epoch": 0.55, "learning_rate": 2.5132143918369064e-06, "logits/chosen": -2.841175079345703, "logits/rejected": -2.2764172554016113, "logps/chosen": -155.5475616455078, "logps/rejected": -926.2635498046875, "loss": 0.2156, "rewards/accuracies": 1.0, "rewards/chosen": -1.0276339054107666, "rewards/margins": 7.840333461761475, "rewards/rejected": -8.86796760559082, "step": 45820 }, { "epoch": 0.55, "learning_rate": 2.5121697844869143e-06, "logits/chosen": -2.8500123023986816, "logits/rejected": -2.3663086891174316, "logps/chosen": -154.50946044921875, "logps/rejected": -914.2130737304688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0606788396835327, "rewards/margins": 7.694798469543457, "rewards/rejected": -8.755477905273438, "step": 45830 }, { "epoch": 0.55, "learning_rate": 2.5111251750121112e-06, "logits/chosen": -2.876659870147705, "logits/rejected": -2.3634445667266846, "logps/chosen": -147.08688354492188, "logps/rejected": -1060.939208984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.972928524017334, "rewards/margins": 9.244039535522461, "rewards/rejected": -10.216967582702637, "step": 45840 }, { "epoch": 0.55, "learning_rate": 2.5100805635948846e-06, "logits/chosen": -2.8241257667541504, "logits/rejected": -2.1264023780822754, "logps/chosen": -205.6738739013672, "logps/rejected": -1115.235107421875, "loss": 0.1242, "rewards/accuracies": 1.0, "rewards/chosen": -1.5329725742340088, "rewards/margins": 9.214357376098633, "rewards/rejected": -10.747329711914062, "step": 45850 }, { "epoch": 0.55, "learning_rate": 2.5090359504176192e-06, "logits/chosen": -2.8679842948913574, "logits/rejected": -2.2890610694885254, "logps/chosen": -143.79991149902344, "logps/rejected": -921.2872924804688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9855955243110657, "rewards/margins": 7.837426662445068, "rewards/rejected": -8.823022842407227, "step": 45860 }, { "epoch": 0.55, "learning_rate": 2.5079913356627016e-06, "logits/chosen": -2.8931028842926025, "logits/rejected": -2.192138195037842, "logps/chosen": -167.68341064453125, "logps/rejected": -1037.832275390625, "loss": 0.1304, "rewards/accuracies": 1.0, "rewards/chosen": -1.0950751304626465, "rewards/margins": 8.885887145996094, "rewards/rejected": -9.980962753295898, "step": 45870 }, { "epoch": 0.55, "learning_rate": 2.506946719512519e-06, "logits/chosen": -2.8835175037384033, "logits/rejected": -2.567347288131714, "logps/chosen": -99.82172393798828, "logps/rejected": -864.7072143554688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6020970940589905, "rewards/margins": 7.675851345062256, "rewards/rejected": -8.277948379516602, "step": 45880 }, { "epoch": 0.55, "learning_rate": 2.5059021021494588e-06, "logits/chosen": -2.863943576812744, "logits/rejected": -2.234405517578125, "logps/chosen": -182.83078002929688, "logps/rejected": -962.64013671875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.3358253240585327, "rewards/margins": 7.909885406494141, "rewards/rejected": -9.245710372924805, "step": 45890 }, { "epoch": 0.55, "learning_rate": 2.5048574837559083e-06, "logits/chosen": -2.8878087997436523, "logits/rejected": -2.308415412902832, "logps/chosen": -181.91567993164062, "logps/rejected": -939.0567626953125, "loss": 0.0879, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3301188945770264, "rewards/margins": 7.65915584564209, "rewards/rejected": -8.989274978637695, "step": 45900 }, { "epoch": 0.55, "learning_rate": 2.5038128645142533e-06, "logits/chosen": -2.847628116607666, "logits/rejected": -2.3238441944122314, "logps/chosen": -153.29116821289062, "logps/rejected": -1012.2775268554688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0382189750671387, "rewards/margins": 8.68990421295166, "rewards/rejected": -9.72812271118164, "step": 45910 }, { "epoch": 0.55, "learning_rate": 2.502768244606884e-06, "logits/chosen": -2.8247904777526855, "logits/rejected": -2.1471590995788574, "logps/chosen": -157.7896728515625, "logps/rejected": -1017.1937255859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0898470878601074, "rewards/margins": 8.69444465637207, "rewards/rejected": -9.784292221069336, "step": 45920 }, { "epoch": 0.55, "learning_rate": 2.5017236242161864e-06, "logits/chosen": -2.889575958251953, "logits/rejected": -2.253826856613159, "logps/chosen": -166.05313110351562, "logps/rejected": -985.0108642578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.130794882774353, "rewards/margins": 8.32123851776123, "rewards/rejected": -9.452033042907715, "step": 45930 }, { "epoch": 0.55, "learning_rate": 2.5006790035245487e-06, "logits/chosen": -2.821634531021118, "logits/rejected": -1.9857457876205444, "logps/chosen": -197.506103515625, "logps/rejected": -1161.8853759765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4547866582870483, "rewards/margins": 9.732954025268555, "rewards/rejected": -11.18774127960205, "step": 45940 }, { "epoch": 0.55, "learning_rate": 2.4996343827143594e-06, "logits/chosen": -2.9145150184631348, "logits/rejected": -2.4708449840545654, "logps/chosen": -120.1761474609375, "logps/rejected": -912.5745849609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7539549469947815, "rewards/margins": 7.998292446136475, "rewards/rejected": -8.752248764038086, "step": 45950 }, { "epoch": 0.55, "learning_rate": 2.498589761968005e-06, "logits/chosen": -2.8406624794006348, "logits/rejected": -2.3444764614105225, "logps/chosen": -157.55648803710938, "logps/rejected": -904.0194091796875, "loss": 0.0242, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1498949527740479, "rewards/margins": 7.5033111572265625, "rewards/rejected": -8.653206825256348, "step": 45960 }, { "epoch": 0.55, "learning_rate": 2.497545141467875e-06, "logits/chosen": -2.920206308364868, "logits/rejected": -2.5480504035949707, "logps/chosen": -100.87814331054688, "logps/rejected": -823.4241943359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6038151979446411, "rewards/margins": 7.236712455749512, "rewards/rejected": -7.8405280113220215, "step": 45970 }, { "epoch": 0.55, "learning_rate": 2.4965005213963566e-06, "logits/chosen": -2.867457628250122, "logits/rejected": -2.281186580657959, "logps/chosen": -147.0483856201172, "logps/rejected": -995.3331298828125, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": -0.9936113357543945, "rewards/margins": 8.571556091308594, "rewards/rejected": -9.565167427062988, "step": 45980 }, { "epoch": 0.55, "learning_rate": 2.4954559019358372e-06, "logits/chosen": -2.861790895462036, "logits/rejected": -2.177931070327759, "logps/chosen": -160.1118621826172, "logps/rejected": -911.7027587890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0911558866500854, "rewards/margins": 7.647027492523193, "rewards/rejected": -8.738183975219727, "step": 45990 }, { "epoch": 0.55, "learning_rate": 2.4944112832687044e-06, "logits/chosen": -2.8700995445251465, "logits/rejected": -2.357386350631714, "logps/chosen": -171.0983123779297, "logps/rejected": -966.4797973632812, "loss": 0.1149, "rewards/accuracies": 1.0, "rewards/chosen": -1.2611587047576904, "rewards/margins": 8.022125244140625, "rewards/rejected": -9.283284187316895, "step": 46000 }, { "epoch": 0.55, "learning_rate": 2.493366665577346e-06, "logits/chosen": -2.856038808822632, "logits/rejected": -2.27986478805542, "logps/chosen": -163.16238403320312, "logps/rejected": -988.4505615234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1172735691070557, "rewards/margins": 8.375321388244629, "rewards/rejected": -9.492594718933105, "step": 46010 }, { "epoch": 0.55, "learning_rate": 2.49232204904415e-06, "logits/chosen": -2.847520351409912, "logits/rejected": -2.2790422439575195, "logps/chosen": -176.48255920410156, "logps/rejected": -1036.618896484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.232284426689148, "rewards/margins": 8.730427742004395, "rewards/rejected": -9.962712287902832, "step": 46020 }, { "epoch": 0.55, "learning_rate": 2.4912774338515023e-06, "logits/chosen": -2.883777618408203, "logits/rejected": -2.351898193359375, "logps/chosen": -124.15071105957031, "logps/rejected": -853.7271728515625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.7198764681816101, "rewards/margins": 7.440650939941406, "rewards/rejected": -8.160528182983398, "step": 46030 }, { "epoch": 0.55, "learning_rate": 2.4902328201817906e-06, "logits/chosen": -2.886521100997925, "logits/rejected": -2.193432331085205, "logps/chosen": -178.56344604492188, "logps/rejected": -1019.76708984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2352023124694824, "rewards/margins": 8.574685096740723, "rewards/rejected": -9.80988597869873, "step": 46040 }, { "epoch": 0.55, "learning_rate": 2.489188208217402e-06, "logits/chosen": -2.8554224967956543, "logits/rejected": -2.3569560050964355, "logps/chosen": -126.5951156616211, "logps/rejected": -872.00244140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8566457629203796, "rewards/margins": 7.479212760925293, "rewards/rejected": -8.335859298706055, "step": 46050 }, { "epoch": 0.55, "learning_rate": 2.4881435981407213e-06, "logits/chosen": -2.786634922027588, "logits/rejected": -2.1448588371276855, "logps/chosen": -181.64083862304688, "logps/rejected": -994.6395263671875, "loss": 0.1131, "rewards/accuracies": 1.0, "rewards/chosen": -1.2940943241119385, "rewards/margins": 8.259851455688477, "rewards/rejected": -9.553945541381836, "step": 46060 }, { "epoch": 0.55, "learning_rate": 2.4870989901341355e-06, "logits/chosen": -2.8728740215301514, "logits/rejected": -2.214343309402466, "logps/chosen": -204.35745239257812, "logps/rejected": -1067.330322265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4481205940246582, "rewards/margins": 8.830268859863281, "rewards/rejected": -10.278388023376465, "step": 46070 }, { "epoch": 0.55, "learning_rate": 2.486054384380031e-06, "logits/chosen": -2.857733726501465, "logits/rejected": -2.2406888008117676, "logps/chosen": -188.34719848632812, "logps/rejected": -1076.009521484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.360524296760559, "rewards/margins": 8.995756149291992, "rewards/rejected": -10.356279373168945, "step": 46080 }, { "epoch": 0.55, "learning_rate": 2.4850097810607917e-06, "logits/chosen": -2.861908435821533, "logits/rejected": -2.3765463829040527, "logps/chosen": -156.46353149414062, "logps/rejected": -1028.2274169921875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.1412080526351929, "rewards/margins": 8.745770454406738, "rewards/rejected": -9.886978149414062, "step": 46090 }, { "epoch": 0.55, "learning_rate": 2.4839651803588025e-06, "logits/chosen": -2.8719584941864014, "logits/rejected": -2.330864429473877, "logps/chosen": -136.55548095703125, "logps/rejected": -943.7214965820312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8779364824295044, "rewards/margins": 8.168180465698242, "rewards/rejected": -9.046116828918457, "step": 46100 }, { "epoch": 0.55, "learning_rate": 2.4829205824564486e-06, "logits/chosen": -2.855896472930908, "logits/rejected": -2.16143536567688, "logps/chosen": -194.84764099121094, "logps/rejected": -1056.4473876953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.431625247001648, "rewards/margins": 8.741833686828613, "rewards/rejected": -10.17345905303955, "step": 46110 }, { "epoch": 0.55, "learning_rate": 2.4818759875361144e-06, "logits/chosen": -2.8512778282165527, "logits/rejected": -2.166043758392334, "logps/chosen": -188.7274169921875, "logps/rejected": -1103.96826171875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3641493320465088, "rewards/margins": 9.271090507507324, "rewards/rejected": -10.63524055480957, "step": 46120 }, { "epoch": 0.55, "learning_rate": 2.4808313957801814e-06, "logits/chosen": -2.8579630851745605, "logits/rejected": -2.51631498336792, "logps/chosen": -120.63154602050781, "logps/rejected": -866.4808349609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8254861831665039, "rewards/margins": 7.461795806884766, "rewards/rejected": -8.287282943725586, "step": 46130 }, { "epoch": 0.55, "learning_rate": 2.479786807371034e-06, "logits/chosen": -2.875138282775879, "logits/rejected": -2.340609073638916, "logps/chosen": -145.37185668945312, "logps/rejected": -916.1895751953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.984175980091095, "rewards/margins": 7.797669410705566, "rewards/rejected": -8.781845092773438, "step": 46140 }, { "epoch": 0.55, "learning_rate": 2.4787422224910535e-06, "logits/chosen": -2.891875743865967, "logits/rejected": -2.543250322341919, "logps/chosen": -114.39151763916016, "logps/rejected": -843.9468994140625, "loss": 0.1008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7438596487045288, "rewards/margins": 7.314389228820801, "rewards/rejected": -8.058248519897461, "step": 46150 }, { "epoch": 0.55, "learning_rate": 2.4776976413226224e-06, "logits/chosen": -2.9025092124938965, "logits/rejected": -2.390284776687622, "logps/chosen": -153.08572387695312, "logps/rejected": -1048.78369140625, "loss": 0.1097, "rewards/accuracies": 1.0, "rewards/chosen": -1.041933298110962, "rewards/margins": 9.055822372436523, "rewards/rejected": -10.097755432128906, "step": 46160 }, { "epoch": 0.55, "learning_rate": 2.4766530640481207e-06, "logits/chosen": -2.873479127883911, "logits/rejected": -2.4930741786956787, "logps/chosen": -124.5190658569336, "logps/rejected": -838.5067138671875, "loss": 0.172, "rewards/accuracies": 1.0, "rewards/chosen": -0.8234656453132629, "rewards/margins": 7.196677207946777, "rewards/rejected": -8.020143508911133, "step": 46170 }, { "epoch": 0.55, "learning_rate": 2.47560849084993e-06, "logits/chosen": -2.865170955657959, "logits/rejected": -2.565608501434326, "logps/chosen": -119.36495208740234, "logps/rejected": -902.0740966796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7911905646324158, "rewards/margins": 7.835345268249512, "rewards/rejected": -8.626535415649414, "step": 46180 }, { "epoch": 0.55, "learning_rate": 2.474563921910428e-06, "logits/chosen": -2.8921334743499756, "logits/rejected": -2.4162094593048096, "logps/chosen": -134.90859985351562, "logps/rejected": -848.3840942382812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8412901759147644, "rewards/margins": 7.247090816497803, "rewards/rejected": -8.088380813598633, "step": 46190 }, { "epoch": 0.55, "learning_rate": 2.4735193574119946e-06, "logits/chosen": -2.8476176261901855, "logits/rejected": -2.1907176971435547, "logps/chosen": -181.6918182373047, "logps/rejected": -1045.0694580078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2759437561035156, "rewards/margins": 8.788668632507324, "rewards/rejected": -10.064611434936523, "step": 46200 }, { "epoch": 0.55, "learning_rate": 2.4724747975370083e-06, "logits/chosen": -2.836662530899048, "logits/rejected": -2.2301979064941406, "logps/chosen": -158.13034057617188, "logps/rejected": -963.0096435546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0822441577911377, "rewards/margins": 8.156516075134277, "rewards/rejected": -9.238759994506836, "step": 46210 }, { "epoch": 0.55, "learning_rate": 2.4714302424678463e-06, "logits/chosen": -2.8773627281188965, "logits/rejected": -2.5863289833068848, "logps/chosen": -75.38139343261719, "logps/rejected": -747.2921752929688, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.3584599792957306, "rewards/margins": 6.753652095794678, "rewards/rejected": -7.112112522125244, "step": 46220 }, { "epoch": 0.55, "learning_rate": 2.470385692386883e-06, "logits/chosen": -2.901632308959961, "logits/rejected": -2.5151095390319824, "logps/chosen": -102.35810852050781, "logps/rejected": -841.8359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6357425451278687, "rewards/margins": 7.420779228210449, "rewards/rejected": -8.05652141571045, "step": 46230 }, { "epoch": 0.55, "learning_rate": 2.469341147476497e-06, "logits/chosen": -2.838496446609497, "logits/rejected": -2.262678384780884, "logps/chosen": -155.4379425048828, "logps/rejected": -1010.9606323242188, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.0909266471862793, "rewards/margins": 8.631070137023926, "rewards/rejected": -9.721996307373047, "step": 46240 }, { "epoch": 0.55, "learning_rate": 2.468296607919061e-06, "logits/chosen": -2.868046283721924, "logits/rejected": -2.292792797088623, "logps/chosen": -161.7744140625, "logps/rejected": -939.8737182617188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.182590365409851, "rewards/margins": 7.822288513183594, "rewards/rejected": -9.004878997802734, "step": 46250 }, { "epoch": 0.55, "learning_rate": 2.4672520738969496e-06, "logits/chosen": -2.847208023071289, "logits/rejected": -2.515763521194458, "logps/chosen": -108.3206787109375, "logps/rejected": -854.0123291015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6784048080444336, "rewards/margins": 7.4773736000061035, "rewards/rejected": -8.155778884887695, "step": 46260 }, { "epoch": 0.55, "learning_rate": 2.4662075455925355e-06, "logits/chosen": -2.8931450843811035, "logits/rejected": -2.256408452987671, "logps/chosen": -164.55368041992188, "logps/rejected": -959.9967041015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1328916549682617, "rewards/margins": 8.063962936401367, "rewards/rejected": -9.196855545043945, "step": 46270 }, { "epoch": 0.55, "learning_rate": 2.4651630231881895e-06, "logits/chosen": -2.8574399948120117, "logits/rejected": -2.43942928314209, "logps/chosen": -132.80679321289062, "logps/rejected": -959.8077392578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8990956544876099, "rewards/margins": 8.312829971313477, "rewards/rejected": -9.211926460266113, "step": 46280 }, { "epoch": 0.55, "learning_rate": 2.4641185068662825e-06, "logits/chosen": -2.843428134918213, "logits/rejected": -2.438255786895752, "logps/chosen": -148.0373992919922, "logps/rejected": -849.2561645507812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0161924362182617, "rewards/margins": 7.095883369445801, "rewards/rejected": -8.112076759338379, "step": 46290 }, { "epoch": 0.55, "learning_rate": 2.4630739968091857e-06, "logits/chosen": -2.896458625793457, "logits/rejected": -2.276886463165283, "logps/chosen": -155.77200317382812, "logps/rejected": -963.0818481445312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.031789779663086, "rewards/margins": 8.191242218017578, "rewards/rejected": -9.223031997680664, "step": 46300 }, { "epoch": 0.55, "learning_rate": 2.4620294931992654e-06, "logits/chosen": -2.874265432357788, "logits/rejected": -2.341618061065674, "logps/chosen": -134.42938232421875, "logps/rejected": -1034.874267578125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8985745310783386, "rewards/margins": 9.060064315795898, "rewards/rejected": -9.958639144897461, "step": 46310 }, { "epoch": 0.55, "learning_rate": 2.4609849962188907e-06, "logits/chosen": -2.8144047260284424, "logits/rejected": -2.245624303817749, "logps/chosen": -184.93768310546875, "logps/rejected": -936.5234375, "loss": 0.1447, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.345391035079956, "rewards/margins": 7.6291351318359375, "rewards/rejected": -8.974526405334473, "step": 46320 }, { "epoch": 0.55, "learning_rate": 2.459940506050427e-06, "logits/chosen": -2.860060930252075, "logits/rejected": -2.4553630352020264, "logps/chosen": -133.76170349121094, "logps/rejected": -834.7747192382812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8929176330566406, "rewards/margins": 7.061634063720703, "rewards/rejected": -7.954551696777344, "step": 46330 }, { "epoch": 0.55, "learning_rate": 2.45889602287624e-06, "logits/chosen": -2.8288724422454834, "logits/rejected": -2.2515041828155518, "logps/chosen": -169.10092163085938, "logps/rejected": -1001.4202270507812, "loss": 0.1902, "rewards/accuracies": 1.0, "rewards/chosen": -1.2238491773605347, "rewards/margins": 8.386062622070312, "rewards/rejected": -9.609911918640137, "step": 46340 }, { "epoch": 0.55, "learning_rate": 2.4578515468786933e-06, "logits/chosen": -2.858271837234497, "logits/rejected": -2.2298028469085693, "logps/chosen": -167.5038299560547, "logps/rejected": -985.4244384765625, "loss": 0.0889, "rewards/accuracies": 1.0, "rewards/chosen": -1.2017349004745483, "rewards/margins": 8.263866424560547, "rewards/rejected": -9.46560287475586, "step": 46350 }, { "epoch": 0.55, "learning_rate": 2.456807078240149e-06, "logits/chosen": -2.887606382369995, "logits/rejected": -2.4375107288360596, "logps/chosen": -131.52618408203125, "logps/rejected": -858.8762817382812, "loss": 0.0191, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8221689462661743, "rewards/margins": 7.393351078033447, "rewards/rejected": -8.215520858764648, "step": 46360 }, { "epoch": 0.56, "learning_rate": 2.4557626171429695e-06, "logits/chosen": -2.8652007579803467, "logits/rejected": -2.351438522338867, "logps/chosen": -114.42204284667969, "logps/rejected": -928.8214721679688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7189441919326782, "rewards/margins": 8.180404663085938, "rewards/rejected": -8.899349212646484, "step": 46370 }, { "epoch": 0.56, "learning_rate": 2.4547181637695137e-06, "logits/chosen": -2.891355514526367, "logits/rejected": -2.1551976203918457, "logps/chosen": -184.6591796875, "logps/rejected": -1048.6729736328125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.2588274478912354, "rewards/margins": 8.850445747375488, "rewards/rejected": -10.109271049499512, "step": 46380 }, { "epoch": 0.56, "learning_rate": 2.4536737183021405e-06, "logits/chosen": -2.885772466659546, "logits/rejected": -2.1444954872131348, "logps/chosen": -196.38583374023438, "logps/rejected": -1023.7874145507812, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.3744163513183594, "rewards/margins": 8.464385986328125, "rewards/rejected": -9.838801383972168, "step": 46390 }, { "epoch": 0.56, "learning_rate": 2.4526292809232083e-06, "logits/chosen": -2.8818676471710205, "logits/rejected": -2.3265488147735596, "logps/chosen": -130.63980102539062, "logps/rejected": -942.1214599609375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8538699150085449, "rewards/margins": 8.177207946777344, "rewards/rejected": -9.031076431274414, "step": 46400 }, { "epoch": 0.56, "learning_rate": 2.4515848518150716e-06, "logits/chosen": -2.8447322845458984, "logits/rejected": -2.428434133529663, "logps/chosen": -118.66117095947266, "logps/rejected": -889.6829833984375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7016862630844116, "rewards/margins": 7.81378173828125, "rewards/rejected": -8.515467643737793, "step": 46410 }, { "epoch": 0.56, "learning_rate": 2.450540431160085e-06, "logits/chosen": -2.799783706665039, "logits/rejected": -2.360952615737915, "logps/chosen": -128.34347534179688, "logps/rejected": -875.2449340820312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8582651019096375, "rewards/margins": 7.506381988525391, "rewards/rejected": -8.364646911621094, "step": 46420 }, { "epoch": 0.56, "learning_rate": 2.4494960191406016e-06, "logits/chosen": -2.8972525596618652, "logits/rejected": -2.184903144836426, "logps/chosen": -158.83352661132812, "logps/rejected": -1003.8504638671875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.0856988430023193, "rewards/margins": 8.545150756835938, "rewards/rejected": -9.630849838256836, "step": 46430 }, { "epoch": 0.56, "learning_rate": 2.448451615938974e-06, "logits/chosen": -2.8830785751342773, "logits/rejected": -2.2931694984436035, "logps/chosen": -180.50558471679688, "logps/rejected": -980.1721801757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.274828314781189, "rewards/margins": 8.105175018310547, "rewards/rejected": -9.380002975463867, "step": 46440 }, { "epoch": 0.56, "learning_rate": 2.4474072217375504e-06, "logits/chosen": -2.877340793609619, "logits/rejected": -2.2037978172302246, "logps/chosen": -132.7599639892578, "logps/rejected": -997.8341674804688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8350804448127747, "rewards/margins": 8.749634742736816, "rewards/rejected": -9.584714889526367, "step": 46450 }, { "epoch": 0.56, "learning_rate": 2.4463628367186794e-06, "logits/chosen": -2.835563898086548, "logits/rejected": -2.338229179382324, "logps/chosen": -152.94009399414062, "logps/rejected": -905.2142333984375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0643775463104248, "rewards/margins": 7.5957231521606445, "rewards/rejected": -8.660099983215332, "step": 46460 }, { "epoch": 0.56, "learning_rate": 2.445318461064709e-06, "logits/chosen": -2.8798108100891113, "logits/rejected": -2.091444730758667, "logps/chosen": -198.28025817871094, "logps/rejected": -1063.524658203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4205023050308228, "rewards/margins": 8.824617385864258, "rewards/rejected": -10.245119094848633, "step": 46470 }, { "epoch": 0.56, "learning_rate": 2.444274094957983e-06, "logits/chosen": -2.8206324577331543, "logits/rejected": -2.1761231422424316, "logps/chosen": -182.04258728027344, "logps/rejected": -1031.958984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.319434404373169, "rewards/margins": 8.606608390808105, "rewards/rejected": -9.926042556762695, "step": 46480 }, { "epoch": 0.56, "learning_rate": 2.4432297385808455e-06, "logits/chosen": -2.8936116695404053, "logits/rejected": -2.351365327835083, "logps/chosen": -152.4815673828125, "logps/rejected": -1026.393798828125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0139240026474, "rewards/margins": 8.846375465393066, "rewards/rejected": -9.860300064086914, "step": 46490 }, { "epoch": 0.56, "learning_rate": 2.442185392115638e-06, "logits/chosen": -2.8352081775665283, "logits/rejected": -2.2261455059051514, "logps/chosen": -135.4546661376953, "logps/rejected": -946.1238403320312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8820163607597351, "rewards/margins": 8.19011402130127, "rewards/rejected": -9.07213020324707, "step": 46500 }, { "epoch": 0.56, "learning_rate": 2.4411410557447e-06, "logits/chosen": -2.8570501804351807, "logits/rejected": -2.3715553283691406, "logps/chosen": -125.7328109741211, "logps/rejected": -825.7552490234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8240758776664734, "rewards/margins": 7.055089473724365, "rewards/rejected": -7.8791656494140625, "step": 46510 }, { "epoch": 0.56, "learning_rate": 2.44009672965037e-06, "logits/chosen": -2.842604875564575, "logits/rejected": -2.356327533721924, "logps/chosen": -163.10440063476562, "logps/rejected": -976.4407348632812, "loss": 0.1231, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1141788959503174, "rewards/margins": 8.269094467163086, "rewards/rejected": -9.383272171020508, "step": 46520 }, { "epoch": 0.56, "learning_rate": 2.4390524140149853e-06, "logits/chosen": -2.785717248916626, "logits/rejected": -2.1068406105041504, "logps/chosen": -173.72561645507812, "logps/rejected": -994.4534301757812, "loss": 0.137, "rewards/accuracies": 1.0, "rewards/chosen": -1.2324364185333252, "rewards/margins": 8.331258773803711, "rewards/rejected": -9.563694953918457, "step": 46530 }, { "epoch": 0.56, "learning_rate": 2.43800810902088e-06, "logits/chosen": -2.855738639831543, "logits/rejected": -2.2269530296325684, "logps/chosen": -149.39682006835938, "logps/rejected": -992.1145629882812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0053980350494385, "rewards/margins": 8.515344619750977, "rewards/rejected": -9.520743370056152, "step": 46540 }, { "epoch": 0.56, "learning_rate": 2.436963814850386e-06, "logits/chosen": -2.858888626098633, "logits/rejected": -2.1156206130981445, "logps/chosen": -184.11654663085938, "logps/rejected": -1060.115966796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2466732263565063, "rewards/margins": 8.946948051452637, "rewards/rejected": -10.193621635437012, "step": 46550 }, { "epoch": 0.56, "learning_rate": 2.435919531685835e-06, "logits/chosen": -2.860914468765259, "logits/rejected": -2.2135539054870605, "logps/chosen": -195.6732940673828, "logps/rejected": -1177.041748046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4629065990447998, "rewards/margins": 9.89925479888916, "rewards/rejected": -11.362160682678223, "step": 46560 }, { "epoch": 0.56, "learning_rate": 2.4348752597095564e-06, "logits/chosen": -2.898764133453369, "logits/rejected": -2.3753244876861572, "logps/chosen": -131.65158081054688, "logps/rejected": -990.6082153320312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8980901837348938, "rewards/margins": 8.614477157592773, "rewards/rejected": -9.512567520141602, "step": 46570 }, { "epoch": 0.56, "learning_rate": 2.433830999103876e-06, "logits/chosen": -2.823193311691284, "logits/rejected": -2.2937426567077637, "logps/chosen": -134.5216827392578, "logps/rejected": -838.6036376953125, "loss": 0.0811, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9303884506225586, "rewards/margins": 7.0691328048706055, "rewards/rejected": -7.9995222091674805, "step": 46580 }, { "epoch": 0.56, "learning_rate": 2.432786750051119e-06, "logits/chosen": -2.883941173553467, "logits/rejected": -2.3203682899475098, "logps/chosen": -162.33023071289062, "logps/rejected": -1001.5672607421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0987651348114014, "rewards/margins": 8.527474403381348, "rewards/rejected": -9.626238822937012, "step": 46590 }, { "epoch": 0.56, "learning_rate": 2.43174251273361e-06, "logits/chosen": -2.8967511653900146, "logits/rejected": -2.4768803119659424, "logps/chosen": -162.6666717529297, "logps/rejected": -953.0291137695312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1620210409164429, "rewards/margins": 7.984400272369385, "rewards/rejected": -9.146421432495117, "step": 46600 }, { "epoch": 0.56, "learning_rate": 2.430698287333668e-06, "logits/chosen": -2.9008891582489014, "logits/rejected": -2.4441938400268555, "logps/chosen": -95.99779510498047, "logps/rejected": -879.8089599609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5477827191352844, "rewards/margins": 7.878737449645996, "rewards/rejected": -8.426520347595215, "step": 46610 }, { "epoch": 0.56, "learning_rate": 2.4296540740336122e-06, "logits/chosen": -2.893949031829834, "logits/rejected": -2.5687146186828613, "logps/chosen": -100.35931396484375, "logps/rejected": -823.2867431640625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6074745059013367, "rewards/margins": 7.254090309143066, "rewards/rejected": -7.861564636230469, "step": 46620 }, { "epoch": 0.56, "learning_rate": 2.42860987301576e-06, "logits/chosen": -2.862464666366577, "logits/rejected": -2.3527112007141113, "logps/chosen": -141.9612579345703, "logps/rejected": -987.810546875, "loss": 0.0944, "rewards/accuracies": 1.0, "rewards/chosen": -0.9847068786621094, "rewards/margins": 8.511404037475586, "rewards/rejected": -9.496109962463379, "step": 46630 }, { "epoch": 0.56, "learning_rate": 2.427565684462426e-06, "logits/chosen": -2.897757053375244, "logits/rejected": -2.360234022140503, "logps/chosen": -145.6857147216797, "logps/rejected": -943.0167236328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0182123184204102, "rewards/margins": 8.022379875183105, "rewards/rejected": -9.0405912399292, "step": 46640 }, { "epoch": 0.56, "learning_rate": 2.426521508555921e-06, "logits/chosen": -2.8802103996276855, "logits/rejected": -2.350329875946045, "logps/chosen": -118.29573822021484, "logps/rejected": -932.9586181640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7544065713882446, "rewards/margins": 8.195670127868652, "rewards/rejected": -8.950077056884766, "step": 46650 }, { "epoch": 0.56, "learning_rate": 2.4254773454785577e-06, "logits/chosen": -2.8313307762145996, "logits/rejected": -2.169024705886841, "logps/chosen": -140.88076782226562, "logps/rejected": -1057.7030029296875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.9241684079170227, "rewards/margins": 9.258946418762207, "rewards/rejected": -10.183115005493164, "step": 46660 }, { "epoch": 0.56, "learning_rate": 2.4244331954126425e-06, "logits/chosen": -2.8675003051757812, "logits/rejected": -2.290870428085327, "logps/chosen": -148.6851043701172, "logps/rejected": -1024.16357421875, "loss": 0.1442, "rewards/accuracies": 1.0, "rewards/chosen": -1.002121925354004, "rewards/margins": 8.854012489318848, "rewards/rejected": -9.856134414672852, "step": 46670 }, { "epoch": 0.56, "learning_rate": 2.423389058540482e-06, "logits/chosen": -2.885822296142578, "logits/rejected": -2.35856556892395, "logps/chosen": -133.62005615234375, "logps/rejected": -918.9410400390625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8517698049545288, "rewards/margins": 7.951199531555176, "rewards/rejected": -8.802968978881836, "step": 46680 }, { "epoch": 0.56, "learning_rate": 2.422344935044379e-06, "logits/chosen": -2.9081599712371826, "logits/rejected": -2.4688825607299805, "logps/chosen": -112.21958923339844, "logps/rejected": -919.6505737304688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6631054282188416, "rewards/margins": 8.155889511108398, "rewards/rejected": -8.818994522094727, "step": 46690 }, { "epoch": 0.56, "learning_rate": 2.4213008251066346e-06, "logits/chosen": -2.8313400745391846, "logits/rejected": -2.0778043270111084, "logps/chosen": -195.29530334472656, "logps/rejected": -1156.4647216796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.3974924087524414, "rewards/margins": 9.750593185424805, "rewards/rejected": -11.14808464050293, "step": 46700 }, { "epoch": 0.56, "learning_rate": 2.420256728909547e-06, "logits/chosen": -2.881495237350464, "logits/rejected": -2.412909746170044, "logps/chosen": -114.08577728271484, "logps/rejected": -990.8522338867188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6791396141052246, "rewards/margins": 8.842376708984375, "rewards/rejected": -9.521514892578125, "step": 46710 }, { "epoch": 0.56, "learning_rate": 2.419212646635414e-06, "logits/chosen": -2.8594698905944824, "logits/rejected": -2.355034351348877, "logps/chosen": -130.91233825683594, "logps/rejected": -987.0966796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8490067720413208, "rewards/margins": 8.634798049926758, "rewards/rejected": -9.483804702758789, "step": 46720 }, { "epoch": 0.56, "learning_rate": 2.418168578466528e-06, "logits/chosen": -2.8603601455688477, "logits/rejected": -2.1276695728302, "logps/chosen": -166.9567108154297, "logps/rejected": -1079.63818359375, "loss": 0.0612, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1441473960876465, "rewards/margins": 9.259919166564941, "rewards/rejected": -10.404067993164062, "step": 46730 }, { "epoch": 0.56, "learning_rate": 2.417124524585181e-06, "logits/chosen": -2.869572162628174, "logits/rejected": -2.315462112426758, "logps/chosen": -144.04837036132812, "logps/rejected": -1027.629638671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9894108772277832, "rewards/margins": 8.891916275024414, "rewards/rejected": -9.881328582763672, "step": 46740 }, { "epoch": 0.56, "learning_rate": 2.4160804851736615e-06, "logits/chosen": -2.8975071907043457, "logits/rejected": -2.6022849082946777, "logps/chosen": -125.74739074707031, "logps/rejected": -854.6585083007812, "loss": 0.1035, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8483255505561829, "rewards/margins": 7.329835414886475, "rewards/rejected": -8.178160667419434, "step": 46750 }, { "epoch": 0.56, "learning_rate": 2.4150364604142566e-06, "logits/chosen": -2.8996119499206543, "logits/rejected": -2.3175721168518066, "logps/chosen": -144.25119018554688, "logps/rejected": -968.3831787109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9736418724060059, "rewards/margins": 8.31301498413086, "rewards/rejected": -9.28665828704834, "step": 46760 }, { "epoch": 0.56, "learning_rate": 2.4139924504892496e-06, "logits/chosen": -2.8657240867614746, "logits/rejected": -2.2248053550720215, "logps/chosen": -170.16168212890625, "logps/rejected": -1076.355712890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2022006511688232, "rewards/margins": 9.137813568115234, "rewards/rejected": -10.34001636505127, "step": 46770 }, { "epoch": 0.56, "learning_rate": 2.412948455580922e-06, "logits/chosen": -2.87463116645813, "logits/rejected": -2.137876033782959, "logps/chosen": -167.95504760742188, "logps/rejected": -1052.02783203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1774156093597412, "rewards/margins": 8.944860458374023, "rewards/rejected": -10.122275352478027, "step": 46780 }, { "epoch": 0.56, "learning_rate": 2.4119044758715524e-06, "logits/chosen": -2.888059139251709, "logits/rejected": -2.310945987701416, "logps/chosen": -163.38671875, "logps/rejected": -1016.5997924804688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2052875757217407, "rewards/margins": 8.568902969360352, "rewards/rejected": -9.774190902709961, "step": 46790 }, { "epoch": 0.56, "learning_rate": 2.410860511543416e-06, "logits/chosen": -2.8852357864379883, "logits/rejected": -2.5310580730438232, "logps/chosen": -100.70732879638672, "logps/rejected": -796.165283203125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.5827950835227966, "rewards/margins": 6.994398593902588, "rewards/rejected": -7.5771942138671875, "step": 46800 }, { "epoch": 0.56, "learning_rate": 2.4098165627787874e-06, "logits/chosen": -2.8789854049682617, "logits/rejected": -2.585812568664551, "logps/chosen": -92.75691223144531, "logps/rejected": -854.5184326171875, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": -0.5597283840179443, "rewards/margins": 7.618112087249756, "rewards/rejected": -8.177839279174805, "step": 46810 }, { "epoch": 0.56, "learning_rate": 2.4087726297599366e-06, "logits/chosen": -2.8628149032592773, "logits/rejected": -2.296936511993408, "logps/chosen": -153.43919372558594, "logps/rejected": -1010.6574096679688, "loss": 0.1022, "rewards/accuracies": 1.0, "rewards/chosen": -1.0583388805389404, "rewards/margins": 8.672247886657715, "rewards/rejected": -9.730586051940918, "step": 46820 }, { "epoch": 0.56, "learning_rate": 2.407728712669131e-06, "logits/chosen": -2.8541100025177, "logits/rejected": -2.3327560424804688, "logps/chosen": -181.62896728515625, "logps/rejected": -977.8884887695312, "loss": 0.0943, "rewards/accuracies": 1.0, "rewards/chosen": -1.3334428071975708, "rewards/margins": 8.032232284545898, "rewards/rejected": -9.365675926208496, "step": 46830 }, { "epoch": 0.56, "learning_rate": 2.406684811688635e-06, "logits/chosen": -2.8855080604553223, "logits/rejected": -2.4140913486480713, "logps/chosen": -142.70761108398438, "logps/rejected": -1030.8458251953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9730991125106812, "rewards/margins": 8.926420211791992, "rewards/rejected": -9.899519920349121, "step": 46840 }, { "epoch": 0.56, "learning_rate": 2.4056409270007126e-06, "logits/chosen": -2.8648295402526855, "logits/rejected": -2.2114808559417725, "logps/chosen": -161.50601196289062, "logps/rejected": -1024.484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1152489185333252, "rewards/margins": 8.756814956665039, "rewards/rejected": -9.872062683105469, "step": 46850 }, { "epoch": 0.56, "learning_rate": 2.4045970587876224e-06, "logits/chosen": -2.9100303649902344, "logits/rejected": -2.38525390625, "logps/chosen": -123.5513916015625, "logps/rejected": -948.0559692382812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7516996264457703, "rewards/margins": 8.341026306152344, "rewards/rejected": -9.09272575378418, "step": 46860 }, { "epoch": 0.56, "learning_rate": 2.4035532072316207e-06, "logits/chosen": -2.897066831588745, "logits/rejected": -1.984483003616333, "logps/chosen": -205.8389129638672, "logps/rejected": -1097.4610595703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.488857626914978, "rewards/margins": 9.062907218933105, "rewards/rejected": -10.551765441894531, "step": 46870 }, { "epoch": 0.56, "learning_rate": 2.4025093725149604e-06, "logits/chosen": -2.8723371028900146, "logits/rejected": -2.3396716117858887, "logps/chosen": -209.8708038330078, "logps/rejected": -1108.275146484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.5333502292633057, "rewards/margins": 9.145952224731445, "rewards/rejected": -10.679300308227539, "step": 46880 }, { "epoch": 0.56, "learning_rate": 2.4014655548198944e-06, "logits/chosen": -2.8568942546844482, "logits/rejected": -2.0738797187805176, "logps/chosen": -175.39071655273438, "logps/rejected": -1100.4703369140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2324907779693604, "rewards/margins": 9.354398727416992, "rewards/rejected": -10.586889266967773, "step": 46890 }, { "epoch": 0.56, "learning_rate": 2.4004217543286677e-06, "logits/chosen": -2.8777594566345215, "logits/rejected": -2.418874740600586, "logps/chosen": -136.1282501220703, "logps/rejected": -941.7452392578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9015962481498718, "rewards/margins": 8.12704849243164, "rewards/rejected": -9.028645515441895, "step": 46900 }, { "epoch": 0.56, "learning_rate": 2.399377971223526e-06, "logits/chosen": -2.8922767639160156, "logits/rejected": -2.363004684448242, "logps/chosen": -150.8256378173828, "logps/rejected": -1029.7635498046875, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -1.0679367780685425, "rewards/margins": 8.830147743225098, "rewards/rejected": -9.89808464050293, "step": 46910 }, { "epoch": 0.56, "learning_rate": 2.398334205686712e-06, "logits/chosen": -2.864034652709961, "logits/rejected": -2.284808397293091, "logps/chosen": -144.39454650878906, "logps/rejected": -975.5023193359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0003399848937988, "rewards/margins": 8.356417655944824, "rewards/rejected": -9.356757164001465, "step": 46920 }, { "epoch": 0.56, "learning_rate": 2.3972904579004624e-06, "logits/chosen": -2.881734848022461, "logits/rejected": -2.383171319961548, "logps/chosen": -177.25119018554688, "logps/rejected": -913.8848876953125, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.297560214996338, "rewards/margins": 7.438237190246582, "rewards/rejected": -8.735796928405762, "step": 46930 }, { "epoch": 0.56, "learning_rate": 2.396246728047014e-06, "logits/chosen": -2.8659231662750244, "logits/rejected": -2.3221986293792725, "logps/chosen": -185.77203369140625, "logps/rejected": -932.2257080078125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.3841204643249512, "rewards/margins": 7.559033393859863, "rewards/rejected": -8.94315242767334, "step": 46940 }, { "epoch": 0.56, "learning_rate": 2.395203016308598e-06, "logits/chosen": -2.85471773147583, "logits/rejected": -2.34368896484375, "logps/chosen": -125.6642837524414, "logps/rejected": -1008.9140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7839708924293518, "rewards/margins": 8.924211502075195, "rewards/rejected": -9.708183288574219, "step": 46950 }, { "epoch": 0.56, "learning_rate": 2.3941593228674455e-06, "logits/chosen": -2.871509313583374, "logits/rejected": -2.5180256366729736, "logps/chosen": -124.03279113769531, "logps/rejected": -891.7272338867188, "loss": 0.0898, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8525902628898621, "rewards/margins": 7.685695648193359, "rewards/rejected": -8.538286209106445, "step": 46960 }, { "epoch": 0.56, "learning_rate": 2.3931156479057807e-06, "logits/chosen": -2.8390049934387207, "logits/rejected": -2.3350532054901123, "logps/chosen": -191.13475036621094, "logps/rejected": -1006.9490966796875, "loss": 0.1332, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.461349368095398, "rewards/margins": 8.220206260681152, "rewards/rejected": -9.68155574798584, "step": 46970 }, { "epoch": 0.56, "learning_rate": 2.3920719916058264e-06, "logits/chosen": -2.865508556365967, "logits/rejected": -2.5189552307128906, "logps/chosen": -97.13987731933594, "logps/rejected": -784.4576416015625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.554425835609436, "rewards/margins": 6.923777103424072, "rewards/rejected": -7.478202819824219, "step": 46980 }, { "epoch": 0.56, "learning_rate": 2.391028354149804e-06, "logits/chosen": -2.8213095664978027, "logits/rejected": -2.30208420753479, "logps/chosen": -141.04615783691406, "logps/rejected": -1003.1590576171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9605588912963867, "rewards/margins": 8.685175895690918, "rewards/rejected": -9.645734786987305, "step": 46990 }, { "epoch": 0.56, "learning_rate": 2.389984735719927e-06, "logits/chosen": -2.879364490509033, "logits/rejected": -2.372960090637207, "logps/chosen": -109.60005187988281, "logps/rejected": -922.16064453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6895136833190918, "rewards/margins": 8.160059928894043, "rewards/rejected": -8.849573135375977, "step": 47000 }, { "epoch": 0.56, "learning_rate": 2.38894113649841e-06, "logits/chosen": -2.8522019386291504, "logits/rejected": -2.3861489295959473, "logps/chosen": -115.78639221191406, "logps/rejected": -874.2445068359375, "loss": 0.1502, "rewards/accuracies": 1.0, "rewards/chosen": -0.6942881345748901, "rewards/margins": 7.668063163757324, "rewards/rejected": -8.36235237121582, "step": 47010 }, { "epoch": 0.56, "learning_rate": 2.3878975566674625e-06, "logits/chosen": -2.8727662563323975, "logits/rejected": -2.208287000656128, "logps/chosen": -159.94168090820312, "logps/rejected": -1058.5560302734375, "loss": 0.1357, "rewards/accuracies": 1.0, "rewards/chosen": -1.1267879009246826, "rewards/margins": 9.069494247436523, "rewards/rejected": -10.196281433105469, "step": 47020 }, { "epoch": 0.56, "learning_rate": 2.38685399640929e-06, "logits/chosen": -2.888786792755127, "logits/rejected": -2.2541656494140625, "logps/chosen": -140.61862182617188, "logps/rejected": -1027.8177490234375, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -0.9341703653335571, "rewards/margins": 8.95237922668457, "rewards/rejected": -9.886549949645996, "step": 47030 }, { "epoch": 0.56, "learning_rate": 2.385810455906095e-06, "logits/chosen": -2.8647217750549316, "logits/rejected": -2.2832627296447754, "logps/chosen": -150.27957153320312, "logps/rejected": -955.9837036132812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.044631004333496, "rewards/margins": 8.125158309936523, "rewards/rejected": -9.16978931427002, "step": 47040 }, { "epoch": 0.56, "learning_rate": 2.3847669353400774e-06, "logits/chosen": -2.867227792739868, "logits/rejected": -2.3223979473114014, "logps/chosen": -179.6161346435547, "logps/rejected": -1063.756103515625, "loss": 0.2799, "rewards/accuracies": 1.0, "rewards/chosen": -1.3179962635040283, "rewards/margins": 8.922907829284668, "rewards/rejected": -10.240903854370117, "step": 47050 }, { "epoch": 0.56, "learning_rate": 2.3837234348934337e-06, "logits/chosen": -2.8487112522125244, "logits/rejected": -2.1898136138916016, "logps/chosen": -158.14096069335938, "logps/rejected": -1020.4281005859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.034778118133545, "rewards/margins": 8.772500991821289, "rewards/rejected": -9.807279586791992, "step": 47060 }, { "epoch": 0.56, "learning_rate": 2.382679954748354e-06, "logits/chosen": -2.816587448120117, "logits/rejected": -2.3846919536590576, "logps/chosen": -127.73130798339844, "logps/rejected": -913.62744140625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8626962900161743, "rewards/margins": 7.887368679046631, "rewards/rejected": -8.750065803527832, "step": 47070 }, { "epoch": 0.56, "learning_rate": 2.381636495087029e-06, "logits/chosen": -2.8420863151550293, "logits/rejected": -2.217371702194214, "logps/chosen": -137.66281127929688, "logps/rejected": -1049.086181640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9098079800605774, "rewards/margins": 9.187954902648926, "rewards/rejected": -10.097763061523438, "step": 47080 }, { "epoch": 0.56, "learning_rate": 2.380593056091643e-06, "logits/chosen": -2.789422035217285, "logits/rejected": -2.2027790546417236, "logps/chosen": -160.87673950195312, "logps/rejected": -1019.7526245117188, "loss": 0.1427, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1345689296722412, "rewards/margins": 8.666584968566895, "rewards/rejected": -9.801153182983398, "step": 47090 }, { "epoch": 0.56, "learning_rate": 2.3795496379443776e-06, "logits/chosen": -2.8750193119049072, "logits/rejected": -2.1415154933929443, "logps/chosen": -186.867431640625, "logps/rejected": -1077.263427734375, "loss": 0.1198, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.324059247970581, "rewards/margins": 9.03678035736084, "rewards/rejected": -10.36083984375, "step": 47100 }, { "epoch": 0.56, "learning_rate": 2.378506240827412e-06, "logits/chosen": -2.8653225898742676, "logits/rejected": -2.1103482246398926, "logps/chosen": -162.28298950195312, "logps/rejected": -968.13134765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1076343059539795, "rewards/margins": 8.189062118530273, "rewards/rejected": -9.296696662902832, "step": 47110 }, { "epoch": 0.56, "learning_rate": 2.3774628649229183e-06, "logits/chosen": -2.8599166870117188, "logits/rejected": -2.186903715133667, "logps/chosen": -156.77005004882812, "logps/rejected": -1015.3713989257812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0744879245758057, "rewards/margins": 8.679605484008789, "rewards/rejected": -9.754094123840332, "step": 47120 }, { "epoch": 0.56, "learning_rate": 2.3764195104130684e-06, "logits/chosen": -2.8704137802124023, "logits/rejected": -2.3134288787841797, "logps/chosen": -120.84922790527344, "logps/rejected": -951.4375, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.7383137941360474, "rewards/margins": 8.395439147949219, "rewards/rejected": -9.133752822875977, "step": 47130 }, { "epoch": 0.56, "learning_rate": 2.3753761774800293e-06, "logits/chosen": -2.8625717163085938, "logits/rejected": -2.246121883392334, "logps/chosen": -141.3809814453125, "logps/rejected": -1020.4703979492188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9655567407608032, "rewards/margins": 8.851380348205566, "rewards/rejected": -9.816936492919922, "step": 47140 }, { "epoch": 0.56, "learning_rate": 2.3743328663059626e-06, "logits/chosen": -2.8488705158233643, "logits/rejected": -2.314258098602295, "logps/chosen": -103.81062316894531, "logps/rejected": -885.00244140625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5814292430877686, "rewards/margins": 7.8891282081604, "rewards/rejected": -8.47055721282959, "step": 47150 }, { "epoch": 0.56, "learning_rate": 2.3732895770730286e-06, "logits/chosen": -2.854142904281616, "logits/rejected": -2.393475294113159, "logps/chosen": -126.1207275390625, "logps/rejected": -1001.1937255859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7870917916297913, "rewards/margins": 8.846304893493652, "rewards/rejected": -9.633397102355957, "step": 47160 }, { "epoch": 0.56, "learning_rate": 2.372246309963383e-06, "logits/chosen": -2.8431403636932373, "logits/rejected": -2.3062548637390137, "logps/chosen": -144.19371032714844, "logps/rejected": -985.2418823242188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9868983030319214, "rewards/margins": 8.460248947143555, "rewards/rejected": -9.447147369384766, "step": 47170 }, { "epoch": 0.56, "learning_rate": 2.3712030651591764e-06, "logits/chosen": -2.875699043273926, "logits/rejected": -2.407174825668335, "logps/chosen": -119.88211822509766, "logps/rejected": -951.6512451171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7710672616958618, "rewards/margins": 8.355512619018555, "rewards/rejected": -9.126581192016602, "step": 47180 }, { "epoch": 0.56, "learning_rate": 2.370159842842557e-06, "logits/chosen": -2.8397114276885986, "logits/rejected": -2.3001508712768555, "logps/chosen": -136.93394470214844, "logps/rejected": -971.5099487304688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8930238485336304, "rewards/margins": 8.440778732299805, "rewards/rejected": -9.333803176879883, "step": 47190 }, { "epoch": 0.57, "learning_rate": 2.3691166431956682e-06, "logits/chosen": -2.859941005706787, "logits/rejected": -2.4555745124816895, "logps/chosen": -120.31327056884766, "logps/rejected": -803.8837280273438, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7735517024993896, "rewards/margins": 6.892617225646973, "rewards/rejected": -7.6661696434021, "step": 47200 }, { "epoch": 0.57, "learning_rate": 2.3680734664006503e-06, "logits/chosen": -2.8927161693573, "logits/rejected": -2.3029890060424805, "logps/chosen": -152.33486938476562, "logps/rejected": -1021.8097534179688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9985200762748718, "rewards/margins": 8.816205978393555, "rewards/rejected": -9.814725875854492, "step": 47210 }, { "epoch": 0.57, "learning_rate": 2.367030312639639e-06, "logits/chosen": -2.909773349761963, "logits/rejected": -2.543858528137207, "logps/chosen": -96.03509521484375, "logps/rejected": -872.4025268554688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5219998955726624, "rewards/margins": 7.818869590759277, "rewards/rejected": -8.340869903564453, "step": 47220 }, { "epoch": 0.57, "learning_rate": 2.365987182094765e-06, "logits/chosen": -2.892923355102539, "logits/rejected": -2.1243557929992676, "logps/chosen": -189.32870483398438, "logps/rejected": -1057.2197265625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.3596594333648682, "rewards/margins": 8.801323890686035, "rewards/rejected": -10.160983085632324, "step": 47230 }, { "epoch": 0.57, "learning_rate": 2.3649440749481577e-06, "logits/chosen": -2.867347002029419, "logits/rejected": -2.2023072242736816, "logps/chosen": -160.70660400390625, "logps/rejected": -989.4323120117188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0895497798919678, "rewards/margins": 8.393852233886719, "rewards/rejected": -9.48340129852295, "step": 47240 }, { "epoch": 0.57, "learning_rate": 2.3639009913819393e-06, "logits/chosen": -2.8845458030700684, "logits/rejected": -2.4173130989074707, "logps/chosen": -107.8294677734375, "logps/rejected": -865.2017822265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6338124871253967, "rewards/margins": 7.644351005554199, "rewards/rejected": -8.27816390991211, "step": 47250 }, { "epoch": 0.57, "learning_rate": 2.36285793157823e-06, "logits/chosen": -2.8743841648101807, "logits/rejected": -2.1996102333068848, "logps/chosen": -172.87570190429688, "logps/rejected": -1081.504638671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1746407747268677, "rewards/margins": 9.23387622833252, "rewards/rejected": -10.408515930175781, "step": 47260 }, { "epoch": 0.57, "learning_rate": 2.361814895719145e-06, "logits/chosen": -2.83223819732666, "logits/rejected": -2.0069222450256348, "logps/chosen": -201.8201904296875, "logps/rejected": -1025.749267578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4917296171188354, "rewards/margins": 8.374028205871582, "rewards/rejected": -9.865757942199707, "step": 47270 }, { "epoch": 0.57, "learning_rate": 2.360771883986795e-06, "logits/chosen": -2.9117889404296875, "logits/rejected": -2.3543925285339355, "logps/chosen": -136.54454040527344, "logps/rejected": -1010.9948120117188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9111925959587097, "rewards/margins": 8.799607276916504, "rewards/rejected": -9.710801124572754, "step": 47280 }, { "epoch": 0.57, "learning_rate": 2.3597288965632873e-06, "logits/chosen": -2.8344156742095947, "logits/rejected": -2.3487095832824707, "logps/chosen": -140.9736785888672, "logps/rejected": -875.7445068359375, "loss": 0.1216, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9202250242233276, "rewards/margins": 7.462406158447266, "rewards/rejected": -8.382631301879883, "step": 47290 }, { "epoch": 0.57, "learning_rate": 2.3586859336307254e-06, "logits/chosen": -2.88096284866333, "logits/rejected": -2.4785327911376953, "logps/chosen": -106.7115478515625, "logps/rejected": -886.7286376953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6452901363372803, "rewards/margins": 7.840325832366943, "rewards/rejected": -8.485615730285645, "step": 47300 }, { "epoch": 0.57, "learning_rate": 2.357642995371207e-06, "logits/chosen": -2.9030117988586426, "logits/rejected": -2.4577536582946777, "logps/chosen": -126.0548095703125, "logps/rejected": -940.9518432617188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8084405064582825, "rewards/margins": 8.216276168823242, "rewards/rejected": -9.024717330932617, "step": 47310 }, { "epoch": 0.57, "learning_rate": 2.3566000819668256e-06, "logits/chosen": -2.862238645553589, "logits/rejected": -2.4297568798065186, "logps/chosen": -162.9351043701172, "logps/rejected": -963.4927978515625, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": -1.158783197402954, "rewards/margins": 8.069671630859375, "rewards/rejected": -9.22845458984375, "step": 47320 }, { "epoch": 0.57, "learning_rate": 2.3555571935996714e-06, "logits/chosen": -2.8544163703918457, "logits/rejected": -2.481959581375122, "logps/chosen": -136.0655059814453, "logps/rejected": -876.1788330078125, "loss": 0.158, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.947601318359375, "rewards/margins": 7.440415859222412, "rewards/rejected": -8.388017654418945, "step": 47330 }, { "epoch": 0.57, "learning_rate": 2.3545143304518313e-06, "logits/chosen": -2.8776731491088867, "logits/rejected": -2.462580680847168, "logps/chosen": -120.01517486572266, "logps/rejected": -854.642578125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.762113094329834, "rewards/margins": 7.405056953430176, "rewards/rejected": -8.167169570922852, "step": 47340 }, { "epoch": 0.57, "learning_rate": 2.353471492705384e-06, "logits/chosen": -2.857787609100342, "logits/rejected": -2.1473498344421387, "logps/chosen": -175.68861389160156, "logps/rejected": -1093.71630859375, "loss": 0.1314, "rewards/accuracies": 1.0, "rewards/chosen": -1.191122055053711, "rewards/margins": 9.33509349822998, "rewards/rejected": -10.526216506958008, "step": 47350 }, { "epoch": 0.57, "learning_rate": 2.3524286805424073e-06, "logits/chosen": -2.8724279403686523, "logits/rejected": -2.462923288345337, "logps/chosen": -131.36294555664062, "logps/rejected": -939.1266479492188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8641308546066284, "rewards/margins": 8.144346237182617, "rewards/rejected": -9.008478164672852, "step": 47360 }, { "epoch": 0.57, "learning_rate": 2.3513858941449726e-06, "logits/chosen": -2.886086940765381, "logits/rejected": -2.2498817443847656, "logps/chosen": -177.40374755859375, "logps/rejected": -988.6123046875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.2796512842178345, "rewards/margins": 8.219379425048828, "rewards/rejected": -9.499031066894531, "step": 47370 }, { "epoch": 0.57, "learning_rate": 2.350343133695149e-06, "logits/chosen": -2.8575382232666016, "logits/rejected": -2.405616521835327, "logps/chosen": -107.9614028930664, "logps/rejected": -935.6599731445312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6910413503646851, "rewards/margins": 8.292132377624512, "rewards/rejected": -8.983174324035645, "step": 47380 }, { "epoch": 0.57, "learning_rate": 2.349300399374998e-06, "logits/chosen": -2.903264045715332, "logits/rejected": -2.5521233081817627, "logps/chosen": -104.59416198730469, "logps/rejected": -839.533203125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.6763367056846619, "rewards/margins": 7.348761081695557, "rewards/rejected": -8.025097846984863, "step": 47390 }, { "epoch": 0.57, "learning_rate": 2.348257691366579e-06, "logits/chosen": -2.830271005630493, "logits/rejected": -2.2198641300201416, "logps/chosen": -183.25082397460938, "logps/rejected": -917.8522338867188, "loss": 0.1206, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2622745037078857, "rewards/margins": 7.511575222015381, "rewards/rejected": -8.773849487304688, "step": 47400 }, { "epoch": 0.57, "learning_rate": 2.347215009851946e-06, "logits/chosen": -2.881852626800537, "logits/rejected": -2.5660805702209473, "logps/chosen": -115.56585693359375, "logps/rejected": -790.548095703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7829490303993225, "rewards/margins": 6.757901668548584, "rewards/rejected": -7.540851593017578, "step": 47410 }, { "epoch": 0.57, "learning_rate": 2.346172355013147e-06, "logits/chosen": -2.8786110877990723, "logits/rejected": -2.2332332134246826, "logps/chosen": -148.1680450439453, "logps/rejected": -1086.65283203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9836599230766296, "rewards/margins": 9.502386093139648, "rewards/rejected": -10.48604679107666, "step": 47420 }, { "epoch": 0.57, "learning_rate": 2.345129727032228e-06, "logits/chosen": -2.903563976287842, "logits/rejected": -2.4655981063842773, "logps/chosen": -112.42860412597656, "logps/rejected": -884.1051025390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6741148829460144, "rewards/margins": 7.78563928604126, "rewards/rejected": -8.459754943847656, "step": 47430 }, { "epoch": 0.57, "learning_rate": 2.344087126091229e-06, "logits/chosen": -2.855861186981201, "logits/rejected": -2.2698497772216797, "logps/chosen": -150.23526000976562, "logps/rejected": -957.63037109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0252591371536255, "rewards/margins": 8.16207218170166, "rewards/rejected": -9.187331199645996, "step": 47440 }, { "epoch": 0.57, "learning_rate": 2.343044552372184e-06, "logits/chosen": -2.8638827800750732, "logits/rejected": -2.2879586219787598, "logps/chosen": -121.9675064086914, "logps/rejected": -934.5628051757812, "loss": 0.1251, "rewards/accuracies": 1.0, "rewards/chosen": -0.7837015390396118, "rewards/margins": 8.17796516418457, "rewards/rejected": -8.961666107177734, "step": 47450 }, { "epoch": 0.57, "learning_rate": 2.3420020060571244e-06, "logits/chosen": -2.878390073776245, "logits/rejected": -2.4181227684020996, "logps/chosen": -115.8152847290039, "logps/rejected": -893.6787109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7473147511482239, "rewards/margins": 7.814486503601074, "rewards/rejected": -8.56180191040039, "step": 47460 }, { "epoch": 0.57, "learning_rate": 2.340959487328076e-06, "logits/chosen": -2.8275771141052246, "logits/rejected": -2.1757302284240723, "logps/chosen": -165.31480407714844, "logps/rejected": -1018.4533081054688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1635429859161377, "rewards/margins": 8.63392448425293, "rewards/rejected": -9.797468185424805, "step": 47470 }, { "epoch": 0.57, "learning_rate": 2.3399169963670597e-06, "logits/chosen": -2.882612705230713, "logits/rejected": -2.363124370574951, "logps/chosen": -130.39712524414062, "logps/rejected": -838.2376098632812, "loss": 0.0626, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8181326985359192, "rewards/margins": 7.193230628967285, "rewards/rejected": -8.01136302947998, "step": 47480 }, { "epoch": 0.57, "learning_rate": 2.33887453335609e-06, "logits/chosen": -2.9165496826171875, "logits/rejected": -2.495999574661255, "logps/chosen": -103.37166595458984, "logps/rejected": -900.8287353515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5735355019569397, "rewards/margins": 8.0506010055542, "rewards/rejected": -8.624135971069336, "step": 47490 }, { "epoch": 0.57, "learning_rate": 2.33783209847718e-06, "logits/chosen": -2.8722264766693115, "logits/rejected": -2.3186326026916504, "logps/chosen": -127.45906829833984, "logps/rejected": -992.5594482421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8323985934257507, "rewards/margins": 8.710315704345703, "rewards/rejected": -9.54271411895752, "step": 47500 }, { "epoch": 0.57, "learning_rate": 2.336789691912335e-06, "logits/chosen": -2.8742706775665283, "logits/rejected": -2.062065839767456, "logps/chosen": -171.11703491210938, "logps/rejected": -1081.72509765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1802799701690674, "rewards/margins": 9.227561950683594, "rewards/rejected": -10.407840728759766, "step": 47510 }, { "epoch": 0.57, "learning_rate": 2.3357473138435567e-06, "logits/chosen": -2.8909449577331543, "logits/rejected": -2.50154447555542, "logps/chosen": -139.07569885253906, "logps/rejected": -876.5543212890625, "loss": 0.0371, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9476878046989441, "rewards/margins": 7.432810306549072, "rewards/rejected": -8.380498886108398, "step": 47520 }, { "epoch": 0.57, "learning_rate": 2.3347049644528417e-06, "logits/chosen": -2.905287742614746, "logits/rejected": -2.242713451385498, "logps/chosen": -152.1229248046875, "logps/rejected": -1108.21142578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9859669804573059, "rewards/margins": 9.708486557006836, "rewards/rejected": -10.694454193115234, "step": 47530 }, { "epoch": 0.57, "learning_rate": 2.3336626439221803e-06, "logits/chosen": -2.882284164428711, "logits/rejected": -2.273649215698242, "logps/chosen": -137.25643920898438, "logps/rejected": -1036.25927734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.864622950553894, "rewards/margins": 9.10531234741211, "rewards/rejected": -9.969935417175293, "step": 47540 }, { "epoch": 0.57, "learning_rate": 2.332620352433559e-06, "logits/chosen": -2.825951099395752, "logits/rejected": -2.3084139823913574, "logps/chosen": -146.3650360107422, "logps/rejected": -994.7312622070312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0318715572357178, "rewards/margins": 8.539212226867676, "rewards/rejected": -9.571083068847656, "step": 47550 }, { "epoch": 0.57, "learning_rate": 2.33157809016896e-06, "logits/chosen": -2.8858110904693604, "logits/rejected": -2.3811261653900146, "logps/chosen": -122.21357727050781, "logps/rejected": -950.6384887695312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7823598384857178, "rewards/margins": 8.333799362182617, "rewards/rejected": -9.116159439086914, "step": 47560 }, { "epoch": 0.57, "learning_rate": 2.3305358573103583e-06, "logits/chosen": -2.8906538486480713, "logits/rejected": -2.40641450881958, "logps/chosen": -145.41851806640625, "logps/rejected": -1032.021240234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0009269714355469, "rewards/margins": 8.920525550842285, "rewards/rejected": -9.921452522277832, "step": 47570 }, { "epoch": 0.57, "learning_rate": 2.329493654039725e-06, "logits/chosen": -2.839879035949707, "logits/rejected": -2.1563310623168945, "logps/chosen": -192.9593505859375, "logps/rejected": -1078.2430419921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3704373836517334, "rewards/margins": 9.012114524841309, "rewards/rejected": -10.382552146911621, "step": 47580 }, { "epoch": 0.57, "learning_rate": 2.328451480539027e-06, "logits/chosen": -2.879713296890259, "logits/rejected": -2.5760960578918457, "logps/chosen": -115.79803466796875, "logps/rejected": -924.3067626953125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.7639447450637817, "rewards/margins": 8.087845802307129, "rewards/rejected": -8.851789474487305, "step": 47590 }, { "epoch": 0.57, "learning_rate": 2.3274093369902234e-06, "logits/chosen": -2.8409135341644287, "logits/rejected": -2.1645290851593018, "logps/chosen": -159.30699157714844, "logps/rejected": -1026.2939453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0983250141143799, "rewards/margins": 8.769235610961914, "rewards/rejected": -9.867559432983398, "step": 47600 }, { "epoch": 0.57, "learning_rate": 2.32636722357527e-06, "logits/chosen": -2.8880324363708496, "logits/rejected": -2.416076183319092, "logps/chosen": -124.5313491821289, "logps/rejected": -947.0935668945312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.807551383972168, "rewards/margins": 8.276936531066895, "rewards/rejected": -9.084487915039062, "step": 47610 }, { "epoch": 0.57, "learning_rate": 2.3253251404761174e-06, "logits/chosen": -2.889357328414917, "logits/rejected": -2.3845696449279785, "logps/chosen": -121.81419372558594, "logps/rejected": -962.8616333007812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7539076805114746, "rewards/margins": 8.467350006103516, "rewards/rejected": -9.221258163452148, "step": 47620 }, { "epoch": 0.57, "learning_rate": 2.3242830878747107e-06, "logits/chosen": -2.8169963359832764, "logits/rejected": -2.321983814239502, "logps/chosen": -142.67823791503906, "logps/rejected": -1086.034423828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9763503074645996, "rewards/margins": 9.488631248474121, "rewards/rejected": -10.464980125427246, "step": 47630 }, { "epoch": 0.57, "learning_rate": 2.323241065952988e-06, "logits/chosen": -2.8288590908050537, "logits/rejected": -2.216945171356201, "logps/chosen": -142.26913452148438, "logps/rejected": -959.3898315429688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9825031161308289, "rewards/margins": 8.240546226501465, "rewards/rejected": -9.223048210144043, "step": 47640 }, { "epoch": 0.57, "learning_rate": 2.3221990748928843e-06, "logits/chosen": -2.8708560466766357, "logits/rejected": -2.453125476837158, "logps/chosen": -160.0777587890625, "logps/rejected": -976.7918090820312, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.176727533340454, "rewards/margins": 8.207447052001953, "rewards/rejected": -9.384173393249512, "step": 47650 }, { "epoch": 0.57, "learning_rate": 2.321157114876329e-06, "logits/chosen": -2.8442773818969727, "logits/rejected": -2.2070555686950684, "logps/chosen": -195.57080078125, "logps/rejected": -1043.3681640625, "loss": 0.1655, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.469482660293579, "rewards/margins": 8.56098747253418, "rewards/rejected": -10.03046989440918, "step": 47660 }, { "epoch": 0.57, "learning_rate": 2.320115186085244e-06, "logits/chosen": -2.8544087409973145, "logits/rejected": -2.2285513877868652, "logps/chosen": -143.47314453125, "logps/rejected": -923.0955200195312, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.9018173217773438, "rewards/margins": 7.937272071838379, "rewards/rejected": -8.839090347290039, "step": 47670 }, { "epoch": 0.57, "learning_rate": 2.319073288701548e-06, "logits/chosen": -2.8747830390930176, "logits/rejected": -2.3131563663482666, "logps/chosen": -126.21964263916016, "logps/rejected": -912.0974731445312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7701658606529236, "rewards/margins": 7.964540004730225, "rewards/rejected": -8.734704971313477, "step": 47680 }, { "epoch": 0.57, "learning_rate": 2.318031422907154e-06, "logits/chosen": -2.849952220916748, "logits/rejected": -2.4090375900268555, "logps/chosen": -100.61700439453125, "logps/rejected": -842.6135864257812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6245344281196594, "rewards/margins": 7.427197456359863, "rewards/rejected": -8.051733016967773, "step": 47690 }, { "epoch": 0.57, "learning_rate": 2.3169895888839673e-06, "logits/chosen": -2.895664691925049, "logits/rejected": -2.332627058029175, "logps/chosen": -163.75772094726562, "logps/rejected": -1043.15087890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0966408252716064, "rewards/margins": 8.928110122680664, "rewards/rejected": -10.024751663208008, "step": 47700 }, { "epoch": 0.57, "learning_rate": 2.3159477868138903e-06, "logits/chosen": -2.8113887310028076, "logits/rejected": -1.971255898475647, "logps/chosen": -168.49676513671875, "logps/rejected": -1060.3443603515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1555094718933105, "rewards/margins": 9.052922248840332, "rewards/rejected": -10.208431243896484, "step": 47710 }, { "epoch": 0.57, "learning_rate": 2.3149060168788186e-06, "logits/chosen": -2.845369577407837, "logits/rejected": -1.9963823556900024, "logps/chosen": -227.256103515625, "logps/rejected": -1058.015869140625, "loss": 0.0213, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7175686359405518, "rewards/margins": 8.46460247039795, "rewards/rejected": -10.182170867919922, "step": 47720 }, { "epoch": 0.57, "learning_rate": 2.3138642792606427e-06, "logits/chosen": -2.89581561088562, "logits/rejected": -2.1749022006988525, "logps/chosen": -155.07167053222656, "logps/rejected": -1061.8431396484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0418577194213867, "rewards/margins": 9.180073738098145, "rewards/rejected": -10.221931457519531, "step": 47730 }, { "epoch": 0.57, "learning_rate": 2.312822574141246e-06, "logits/chosen": -2.9079928398132324, "logits/rejected": -2.596430778503418, "logps/chosen": -97.73109436035156, "logps/rejected": -799.63720703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.576561689376831, "rewards/margins": 7.04906702041626, "rewards/rejected": -7.6256279945373535, "step": 47740 }, { "epoch": 0.57, "learning_rate": 2.3117809017025085e-06, "logits/chosen": -2.9471588134765625, "logits/rejected": -2.6521155834198, "logps/chosen": -97.79483032226562, "logps/rejected": -868.6190185546875, "loss": 0.1155, "rewards/accuracies": 1.0, "rewards/chosen": -0.5841442346572876, "rewards/margins": 7.718052864074707, "rewards/rejected": -8.302197456359863, "step": 47750 }, { "epoch": 0.57, "learning_rate": 2.310739262126303e-06, "logits/chosen": -2.853834390640259, "logits/rejected": -2.087859630584717, "logps/chosen": -180.119384765625, "logps/rejected": -1164.6531982421875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.2539862394332886, "rewards/margins": 9.977630615234375, "rewards/rejected": -11.23161792755127, "step": 47760 }, { "epoch": 0.57, "learning_rate": 2.3096976555944957e-06, "logits/chosen": -2.837404251098633, "logits/rejected": -2.142531633377075, "logps/chosen": -153.79840087890625, "logps/rejected": -1017.4693603515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0834063291549683, "rewards/margins": 8.69795036315918, "rewards/rejected": -9.781357765197754, "step": 47770 }, { "epoch": 0.57, "learning_rate": 2.3086560822889498e-06, "logits/chosen": -2.864727735519409, "logits/rejected": -2.3772644996643066, "logps/chosen": -118.95719146728516, "logps/rejected": -914.8673706054688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7356677651405334, "rewards/margins": 8.031662940979004, "rewards/rejected": -8.767332077026367, "step": 47780 }, { "epoch": 0.57, "learning_rate": 2.307614542391521e-06, "logits/chosen": -2.8609607219696045, "logits/rejected": -2.230012893676758, "logps/chosen": -201.573486328125, "logps/rejected": -1037.497802734375, "loss": 0.3073, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.515634536743164, "rewards/margins": 8.461047172546387, "rewards/rejected": -9.976682662963867, "step": 47790 }, { "epoch": 0.57, "learning_rate": 2.306573036084058e-06, "logits/chosen": -2.9306187629699707, "logits/rejected": -2.4717211723327637, "logps/chosen": -95.85978698730469, "logps/rejected": -841.1736450195312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5560547113418579, "rewards/margins": 7.485910892486572, "rewards/rejected": -8.041966438293457, "step": 47800 }, { "epoch": 0.57, "learning_rate": 2.3055315635484056e-06, "logits/chosen": -2.8482666015625, "logits/rejected": -2.3359618186950684, "logps/chosen": -145.83096313476562, "logps/rejected": -943.91748046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0026967525482178, "rewards/margins": 8.050907135009766, "rewards/rejected": -9.053604125976562, "step": 47810 }, { "epoch": 0.57, "learning_rate": 2.3044901249664026e-06, "logits/chosen": -2.896864414215088, "logits/rejected": -2.5660250186920166, "logps/chosen": -132.5770263671875, "logps/rejected": -889.6873779296875, "loss": 0.1068, "rewards/accuracies": 1.0, "rewards/chosen": -0.9073955416679382, "rewards/margins": 7.5994062423706055, "rewards/rejected": -8.506802558898926, "step": 47820 }, { "epoch": 0.57, "learning_rate": 2.303448720519881e-06, "logits/chosen": -2.8607163429260254, "logits/rejected": -2.3607449531555176, "logps/chosen": -164.60763549804688, "logps/rejected": -1127.0567626953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.20682692527771, "rewards/margins": 9.660469055175781, "rewards/rejected": -10.86729621887207, "step": 47830 }, { "epoch": 0.57, "learning_rate": 2.3024073503906664e-06, "logits/chosen": -2.831763982772827, "logits/rejected": -2.235091209411621, "logps/chosen": -164.31072998046875, "logps/rejected": -831.04736328125, "loss": 0.1288, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1320732831954956, "rewards/margins": 6.79306173324585, "rewards/rejected": -7.925135135650635, "step": 47840 }, { "epoch": 0.57, "learning_rate": 2.301366014760579e-06, "logits/chosen": -2.8477590084075928, "logits/rejected": -2.3564391136169434, "logps/chosen": -131.26669311523438, "logps/rejected": -913.5660400390625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.88066166639328, "rewards/margins": 7.8683953285217285, "rewards/rejected": -8.749055862426758, "step": 47850 }, { "epoch": 0.57, "learning_rate": 2.300324713811435e-06, "logits/chosen": -2.8512773513793945, "logits/rejected": -2.2826783657073975, "logps/chosen": -217.4535675048828, "logps/rejected": -915.8629150390625, "loss": 0.2416, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6783781051635742, "rewards/margins": 7.082840919494629, "rewards/rejected": -8.761218070983887, "step": 47860 }, { "epoch": 0.57, "learning_rate": 2.2992834477250407e-06, "logits/chosen": -2.861548662185669, "logits/rejected": -2.2920939922332764, "logps/chosen": -145.18202209472656, "logps/rejected": -968.0478515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9819680452346802, "rewards/margins": 8.310951232910156, "rewards/rejected": -9.292919158935547, "step": 47870 }, { "epoch": 0.57, "learning_rate": 2.298242216683199e-06, "logits/chosen": -2.8526129722595215, "logits/rejected": -2.23600435256958, "logps/chosen": -149.19415283203125, "logps/rejected": -1039.970947265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9918484687805176, "rewards/margins": 9.019319534301758, "rewards/rejected": -10.011167526245117, "step": 47880 }, { "epoch": 0.57, "learning_rate": 2.2972010208677066e-06, "logits/chosen": -2.847865581512451, "logits/rejected": -2.2281293869018555, "logps/chosen": -193.74435424804688, "logps/rejected": -943.0921630859375, "loss": 0.083, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4286484718322754, "rewards/margins": 7.622169494628906, "rewards/rejected": -9.050817489624023, "step": 47890 }, { "epoch": 0.57, "learning_rate": 2.296159860460352e-06, "logits/chosen": -2.832550525665283, "logits/rejected": -2.0553858280181885, "logps/chosen": -182.84556579589844, "logps/rejected": -1070.942626953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.288561224937439, "rewards/margins": 9.004865646362305, "rewards/rejected": -10.293426513671875, "step": 47900 }, { "epoch": 0.57, "learning_rate": 2.295118735642919e-06, "logits/chosen": -2.870378017425537, "logits/rejected": -2.263573408126831, "logps/chosen": -163.54136657714844, "logps/rejected": -1035.5015869140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1384950876235962, "rewards/margins": 8.80965518951416, "rewards/rejected": -9.948150634765625, "step": 47910 }, { "epoch": 0.57, "learning_rate": 2.2940776465971864e-06, "logits/chosen": -2.835277557373047, "logits/rejected": -2.1327691078186035, "logps/chosen": -186.36441040039062, "logps/rejected": -1106.718017578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3102695941925049, "rewards/margins": 9.352479934692383, "rewards/rejected": -10.662749290466309, "step": 47920 }, { "epoch": 0.57, "learning_rate": 2.2930365935049244e-06, "logits/chosen": -2.890734910964966, "logits/rejected": -2.485563039779663, "logps/chosen": -108.26826477050781, "logps/rejected": -871.9088134765625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6133589148521423, "rewards/margins": 7.737212181091309, "rewards/rejected": -8.350571632385254, "step": 47930 }, { "epoch": 0.57, "learning_rate": 2.291995576547899e-06, "logits/chosen": -2.8852438926696777, "logits/rejected": -2.357268810272217, "logps/chosen": -142.24765014648438, "logps/rejected": -960.8056640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9663524627685547, "rewards/margins": 8.265176773071289, "rewards/rejected": -9.231527328491211, "step": 47940 }, { "epoch": 0.57, "learning_rate": 2.2909545959078684e-06, "logits/chosen": -2.8776278495788574, "logits/rejected": -2.2599735260009766, "logps/chosen": -152.34518432617188, "logps/rejected": -956.66259765625, "loss": 0.0848, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0323375463485718, "rewards/margins": 8.136054992675781, "rewards/rejected": -9.1683931350708, "step": 47950 }, { "epoch": 0.57, "learning_rate": 2.2899136517665843e-06, "logits/chosen": -2.8029990196228027, "logits/rejected": -2.0028347969055176, "logps/chosen": -210.16574096679688, "logps/rejected": -1209.314208984375, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": -1.4735757112503052, "rewards/margins": 10.199328422546387, "rewards/rejected": -11.672903060913086, "step": 47960 }, { "epoch": 0.57, "learning_rate": 2.288872744305793e-06, "logits/chosen": -2.8343613147735596, "logits/rejected": -2.332794427871704, "logps/chosen": -119.97623443603516, "logps/rejected": -1009.529296875, "loss": 0.1146, "rewards/accuracies": 1.0, "rewards/chosen": -0.7419668436050415, "rewards/margins": 8.965734481811523, "rewards/rejected": -9.707703590393066, "step": 47970 }, { "epoch": 0.57, "learning_rate": 2.2878318737072346e-06, "logits/chosen": -2.8317513465881348, "logits/rejected": -2.2208433151245117, "logps/chosen": -191.36441040039062, "logps/rejected": -1043.173583984375, "loss": 0.1257, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.414591670036316, "rewards/margins": 8.625648498535156, "rewards/rejected": -10.040239334106445, "step": 47980 }, { "epoch": 0.57, "learning_rate": 2.2867910401526417e-06, "logits/chosen": -2.8932950496673584, "logits/rejected": -2.543972969055176, "logps/chosen": -103.4525146484375, "logps/rejected": -813.3218994140625, "loss": 0.1179, "rewards/accuracies": 1.0, "rewards/chosen": -0.6470782160758972, "rewards/margins": 7.108960151672363, "rewards/rejected": -7.756038665771484, "step": 47990 }, { "epoch": 0.57, "learning_rate": 2.2857502438237406e-06, "logits/chosen": -2.8686485290527344, "logits/rejected": -2.454782485961914, "logps/chosen": -94.18623352050781, "logps/rejected": -890.3104248046875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5285338759422302, "rewards/margins": 8.003533363342285, "rewards/rejected": -8.532068252563477, "step": 48000 }, { "epoch": 0.57, "eval_logits/chosen": -2.850338935852051, "eval_logits/rejected": -1.7047399282455444, "eval_logps/chosen": -342.6891784667969, "eval_logps/rejected": -1290.9998779296875, "eval_loss": 0.001300624804571271, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -2.815088987350464, "eval_rewards/margins": 9.627655029296875, "eval_rewards/rejected": -12.442744255065918, "eval_runtime": 1.2135, "eval_samples_per_second": 4.12, "eval_steps_per_second": 2.472, "step": 48000 }, { "epoch": 0.57, "learning_rate": 2.2847094849022523e-06, "logits/chosen": -2.8532090187072754, "logits/rejected": -2.124751567840576, "logps/chosen": -177.64259338378906, "logps/rejected": -1012.5339965820312, "loss": 0.2459, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2773139476776123, "rewards/margins": 8.460010528564453, "rewards/rejected": -9.737324714660645, "step": 48010 }, { "epoch": 0.57, "learning_rate": 2.28366876356989e-06, "logits/chosen": -2.8642687797546387, "logits/rejected": -2.19844388961792, "logps/chosen": -200.3677215576172, "logps/rejected": -1101.5081787109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4803807735443115, "rewards/margins": 9.132121086120605, "rewards/rejected": -10.612502098083496, "step": 48020 }, { "epoch": 0.57, "learning_rate": 2.2826280800083604e-06, "logits/chosen": -2.865643262863159, "logits/rejected": -2.2346482276916504, "logps/chosen": -122.47415924072266, "logps/rejected": -926.6134033203125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.7728044390678406, "rewards/margins": 8.111552238464355, "rewards/rejected": -8.884357452392578, "step": 48030 }, { "epoch": 0.58, "learning_rate": 2.2815874343993644e-06, "logits/chosen": -2.880037307739258, "logits/rejected": -2.6047961711883545, "logps/chosen": -87.71761322021484, "logps/rejected": -773.3226318359375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.4954504072666168, "rewards/margins": 6.871247291564941, "rewards/rejected": -7.366697788238525, "step": 48040 }, { "epoch": 0.58, "learning_rate": 2.280546826924596e-06, "logits/chosen": -2.8389835357666016, "logits/rejected": -2.368994951248169, "logps/chosen": -115.9497299194336, "logps/rejected": -902.8585205078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7014889717102051, "rewards/margins": 7.935390472412109, "rewards/rejected": -8.636878967285156, "step": 48050 }, { "epoch": 0.58, "learning_rate": 2.2795062577657416e-06, "logits/chosen": -2.8655600547790527, "logits/rejected": -2.331202507019043, "logps/chosen": -150.84072875976562, "logps/rejected": -929.5711059570312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.0299313068389893, "rewards/margins": 7.870225429534912, "rewards/rejected": -8.900156021118164, "step": 48060 }, { "epoch": 0.58, "learning_rate": 2.2784657271044825e-06, "logits/chosen": -2.831378936767578, "logits/rejected": -1.889439582824707, "logps/chosen": -229.2450408935547, "logps/rejected": -1118.331787109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.7035205364227295, "rewards/margins": 9.074853897094727, "rewards/rejected": -10.778376579284668, "step": 48070 }, { "epoch": 0.58, "learning_rate": 2.277425235122493e-06, "logits/chosen": -2.8706233501434326, "logits/rejected": -2.195312976837158, "logps/chosen": -145.61038208007812, "logps/rejected": -910.1340942382812, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.912322998046875, "rewards/margins": 7.7989959716796875, "rewards/rejected": -8.711318016052246, "step": 48080 }, { "epoch": 0.58, "learning_rate": 2.2763847820014383e-06, "logits/chosen": -2.841064929962158, "logits/rejected": -2.144904613494873, "logps/chosen": -153.78338623046875, "logps/rejected": -1100.6964111328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9479132890701294, "rewards/margins": 9.650747299194336, "rewards/rejected": -10.598661422729492, "step": 48090 }, { "epoch": 0.58, "learning_rate": 2.2753443679229794e-06, "logits/chosen": -2.8846771717071533, "logits/rejected": -2.307488441467285, "logps/chosen": -136.7812957763672, "logps/rejected": -930.373046875, "loss": 0.059, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8802103996276855, "rewards/margins": 8.036508560180664, "rewards/rejected": -8.916719436645508, "step": 48100 }, { "epoch": 0.58, "learning_rate": 2.2743039930687706e-06, "logits/chosen": -2.895514965057373, "logits/rejected": -2.3876194953918457, "logps/chosen": -168.18630981445312, "logps/rejected": -959.6416015625, "loss": 0.1386, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2133433818817139, "rewards/margins": 7.989419460296631, "rewards/rejected": -9.202763557434082, "step": 48110 }, { "epoch": 0.58, "learning_rate": 2.2732636576204577e-06, "logits/chosen": -2.8568553924560547, "logits/rejected": -2.3580260276794434, "logps/chosen": -138.36058044433594, "logps/rejected": -937.1110229492188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9061479568481445, "rewards/margins": 8.066507339477539, "rewards/rejected": -8.97265338897705, "step": 48120 }, { "epoch": 0.58, "learning_rate": 2.2722233617596797e-06, "logits/chosen": -2.851578950881958, "logits/rejected": -2.249349594116211, "logps/chosen": -143.3874969482422, "logps/rejected": -1111.34912109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.918721079826355, "rewards/margins": 9.79722785949707, "rewards/rejected": -10.715947151184082, "step": 48130 }, { "epoch": 0.58, "learning_rate": 2.2711831056680705e-06, "logits/chosen": -2.8409922122955322, "logits/rejected": -2.4303195476531982, "logps/chosen": -101.17413330078125, "logps/rejected": -905.5402221679688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.604366660118103, "rewards/margins": 8.080231666564941, "rewards/rejected": -8.684598922729492, "step": 48140 }, { "epoch": 0.58, "learning_rate": 2.270142889527256e-06, "logits/chosen": -2.9085803031921387, "logits/rejected": -2.4478697776794434, "logps/chosen": -116.16914367675781, "logps/rejected": -950.2321166992188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7636412382125854, "rewards/margins": 8.350794792175293, "rewards/rejected": -9.114436149597168, "step": 48150 }, { "epoch": 0.58, "learning_rate": 2.2691027135188543e-06, "logits/chosen": -2.860041379928589, "logits/rejected": -2.1378092765808105, "logps/chosen": -167.87423706054688, "logps/rejected": -1158.6790771484375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.1316996812820435, "rewards/margins": 10.052831649780273, "rewards/rejected": -11.184532165527344, "step": 48160 }, { "epoch": 0.58, "learning_rate": 2.2680625778244773e-06, "logits/chosen": -2.850390911102295, "logits/rejected": -2.231971263885498, "logps/chosen": -173.08560180664062, "logps/rejected": -998.2433471679688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.206112027168274, "rewards/margins": 8.387829780578613, "rewards/rejected": -9.593941688537598, "step": 48170 }, { "epoch": 0.58, "learning_rate": 2.2670224826257304e-06, "logits/chosen": -2.827754259109497, "logits/rejected": -2.155042886734009, "logps/chosen": -189.2725067138672, "logps/rejected": -1073.294189453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3247909545898438, "rewards/margins": 9.006467819213867, "rewards/rejected": -10.331257820129395, "step": 48180 }, { "epoch": 0.58, "learning_rate": 2.265982428104211e-06, "logits/chosen": -2.863800287246704, "logits/rejected": -2.4771556854248047, "logps/chosen": -98.42903900146484, "logps/rejected": -808.883544921875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.577283501625061, "rewards/margins": 7.140810966491699, "rewards/rejected": -7.7180938720703125, "step": 48190 }, { "epoch": 0.58, "learning_rate": 2.264942414441509e-06, "logits/chosen": -2.8613433837890625, "logits/rejected": -2.4597887992858887, "logps/chosen": -100.34419250488281, "logps/rejected": -795.5242919921875, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.6217579245567322, "rewards/margins": 6.963059425354004, "rewards/rejected": -7.584817409515381, "step": 48200 }, { "epoch": 0.58, "learning_rate": 2.2639024418192095e-06, "logits/chosen": -2.868051528930664, "logits/rejected": -2.2019553184509277, "logps/chosen": -149.44085693359375, "logps/rejected": -1129.10986328125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.9756857752799988, "rewards/margins": 9.921354293823242, "rewards/rejected": -10.897042274475098, "step": 48210 }, { "epoch": 0.58, "learning_rate": 2.262862510418887e-06, "logits/chosen": -2.8796744346618652, "logits/rejected": -2.2517688274383545, "logps/chosen": -132.8201904296875, "logps/rejected": -980.8683471679688, "loss": 0.1594, "rewards/accuracies": 1.0, "rewards/chosen": -0.8583583831787109, "rewards/margins": 8.562360763549805, "rewards/rejected": -9.420720100402832, "step": 48220 }, { "epoch": 0.58, "learning_rate": 2.261822620422112e-06, "logits/chosen": -2.899204969406128, "logits/rejected": -2.5854287147521973, "logps/chosen": -112.13875579833984, "logps/rejected": -900.3828125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6846767067909241, "rewards/margins": 7.930286407470703, "rewards/rejected": -8.61496353149414, "step": 48230 }, { "epoch": 0.58, "learning_rate": 2.2607827720104462e-06, "logits/chosen": -2.8618369102478027, "logits/rejected": -2.5323843955993652, "logps/chosen": -129.90530395507812, "logps/rejected": -954.9221801757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.89129638671875, "rewards/margins": 8.282934188842773, "rewards/rejected": -9.174230575561523, "step": 48240 }, { "epoch": 0.58, "learning_rate": 2.259742965365444e-06, "logits/chosen": -2.8617234230041504, "logits/rejected": -2.0838358402252197, "logps/chosen": -152.90884399414062, "logps/rejected": -1092.80810546875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.0594544410705566, "rewards/margins": 9.458359718322754, "rewards/rejected": -10.517812728881836, "step": 48250 }, { "epoch": 0.58, "learning_rate": 2.2587032006686527e-06, "logits/chosen": -2.9002387523651123, "logits/rejected": -2.6050539016723633, "logps/chosen": -88.9876937866211, "logps/rejected": -827.8860473632812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4835425913333893, "rewards/margins": 7.435345649719238, "rewards/rejected": -7.918889045715332, "step": 48260 }, { "epoch": 0.58, "learning_rate": 2.257663478101612e-06, "logits/chosen": -2.871593952178955, "logits/rejected": -2.271939754486084, "logps/chosen": -156.88070678710938, "logps/rejected": -952.0968627929688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.107089638710022, "rewards/margins": 8.022053718566895, "rewards/rejected": -9.129142761230469, "step": 48270 }, { "epoch": 0.58, "learning_rate": 2.2566237978458555e-06, "logits/chosen": -2.8615517616271973, "logits/rejected": -2.6184096336364746, "logps/chosen": -91.35501098632812, "logps/rejected": -842.6890869140625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5533432364463806, "rewards/margins": 7.508923530578613, "rewards/rejected": -8.062265396118164, "step": 48280 }, { "epoch": 0.58, "learning_rate": 2.2555841600829078e-06, "logits/chosen": -2.859841823577881, "logits/rejected": -2.27661395072937, "logps/chosen": -158.48695373535156, "logps/rejected": -1037.2196044921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.10438871383667, "rewards/margins": 8.874578475952148, "rewards/rejected": -9.978967666625977, "step": 48290 }, { "epoch": 0.58, "learning_rate": 2.2545445649942867e-06, "logits/chosen": -2.888179302215576, "logits/rejected": -2.511072874069214, "logps/chosen": -115.81978607177734, "logps/rejected": -1012.8875732421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7047854661941528, "rewards/margins": 9.03840160369873, "rewards/rejected": -9.743185997009277, "step": 48300 }, { "epoch": 0.58, "learning_rate": 2.2535050127615034e-06, "logits/chosen": -2.807760238647461, "logits/rejected": -2.2495875358581543, "logps/chosen": -123.65582275390625, "logps/rejected": -980.5987548828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.777798593044281, "rewards/margins": 8.644296646118164, "rewards/rejected": -9.422094345092773, "step": 48310 }, { "epoch": 0.58, "learning_rate": 2.2524655035660594e-06, "logits/chosen": -2.9069628715515137, "logits/rejected": -2.386915683746338, "logps/chosen": -142.33108520507812, "logps/rejected": -953.60693359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9594791531562805, "rewards/margins": 8.192126274108887, "rewards/rejected": -9.151605606079102, "step": 48320 }, { "epoch": 0.58, "learning_rate": 2.2514260375894507e-06, "logits/chosen": -2.910954475402832, "logits/rejected": -2.392638921737671, "logps/chosen": -154.70314025878906, "logps/rejected": -936.8475341796875, "loss": 0.0929, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0457422733306885, "rewards/margins": 7.940084934234619, "rewards/rejected": -8.98582649230957, "step": 48330 }, { "epoch": 0.58, "learning_rate": 2.2503866150131658e-06, "logits/chosen": -2.848060131072998, "logits/rejected": -2.4970483779907227, "logps/chosen": -115.09156799316406, "logps/rejected": -949.8416137695312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7182936668395996, "rewards/margins": 8.390946388244629, "rewards/rejected": -9.109240531921387, "step": 48340 }, { "epoch": 0.58, "learning_rate": 2.2493472360186847e-06, "logits/chosen": -2.9261531829833984, "logits/rejected": -2.601088047027588, "logps/chosen": -103.94246673583984, "logps/rejected": -881.1286010742188, "loss": 0.1001, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6436344981193542, "rewards/margins": 7.797508239746094, "rewards/rejected": -8.441142082214355, "step": 48350 }, { "epoch": 0.58, "learning_rate": 2.24830790078748e-06, "logits/chosen": -2.881786346435547, "logits/rejected": -2.3103277683258057, "logps/chosen": -130.47946166992188, "logps/rejected": -895.017578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8184849619865417, "rewards/margins": 7.7481560707092285, "rewards/rejected": -8.566640853881836, "step": 48360 }, { "epoch": 0.58, "learning_rate": 2.247268609501018e-06, "logits/chosen": -2.890800714492798, "logits/rejected": -2.4147486686706543, "logps/chosen": -128.49757385253906, "logps/rejected": -876.64501953125, "loss": 0.1203, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8261263966560364, "rewards/margins": 7.568490028381348, "rewards/rejected": -8.39461612701416, "step": 48370 }, { "epoch": 0.58, "learning_rate": 2.2462293623407535e-06, "logits/chosen": -2.904458522796631, "logits/rejected": -2.2766928672790527, "logps/chosen": -165.24639892578125, "logps/rejected": -1149.759033203125, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -1.1426700353622437, "rewards/margins": 9.929049491882324, "rewards/rejected": -11.071718215942383, "step": 48380 }, { "epoch": 0.58, "learning_rate": 2.245190159488138e-06, "logits/chosen": -2.859138011932373, "logits/rejected": -2.215179443359375, "logps/chosen": -163.87867736816406, "logps/rejected": -1035.912841796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1797997951507568, "rewards/margins": 8.783339500427246, "rewards/rejected": -9.963138580322266, "step": 48390 }, { "epoch": 0.58, "learning_rate": 2.244151001124613e-06, "logits/chosen": -2.903610944747925, "logits/rejected": -2.297060489654541, "logps/chosen": -112.1201171875, "logps/rejected": -895.0418701171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6354498863220215, "rewards/margins": 7.932908535003662, "rewards/rejected": -8.568359375, "step": 48400 }, { "epoch": 0.58, "learning_rate": 2.2431118874316126e-06, "logits/chosen": -2.851083755493164, "logits/rejected": -2.4974703788757324, "logps/chosen": -104.22346496582031, "logps/rejected": -950.7105712890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6287869215011597, "rewards/margins": 8.489522933959961, "rewards/rejected": -9.11830997467041, "step": 48410 }, { "epoch": 0.58, "learning_rate": 2.2420728185905628e-06, "logits/chosen": -2.869075298309326, "logits/rejected": -2.3135793209075928, "logps/chosen": -151.56495666503906, "logps/rejected": -947.6187744140625, "loss": 0.072, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9899507761001587, "rewards/margins": 8.097987174987793, "rewards/rejected": -9.087939262390137, "step": 48420 }, { "epoch": 0.58, "learning_rate": 2.2410337947828835e-06, "logits/chosen": -2.8477981090545654, "logits/rejected": -2.1380295753479004, "logps/chosen": -222.9864044189453, "logps/rejected": -1131.7686767578125, "loss": 0.0733, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7146217823028564, "rewards/margins": 9.196816444396973, "rewards/rejected": -10.91143798828125, "step": 48430 }, { "epoch": 0.58, "learning_rate": 2.239994816189983e-06, "logits/chosen": -2.8437600135803223, "logits/rejected": -2.429945945739746, "logps/chosen": -106.93050384521484, "logps/rejected": -841.1005859375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6620581150054932, "rewards/margins": 7.375507354736328, "rewards/rejected": -8.037565231323242, "step": 48440 }, { "epoch": 0.58, "learning_rate": 2.2389558829932663e-06, "logits/chosen": -2.8547043800354004, "logits/rejected": -2.3415744304656982, "logps/chosen": -134.29647827148438, "logps/rejected": -957.1603393554688, "loss": 0.0902, "rewards/accuracies": 1.0, "rewards/chosen": -0.8340988159179688, "rewards/margins": 8.349625587463379, "rewards/rejected": -9.183723449707031, "step": 48450 }, { "epoch": 0.58, "learning_rate": 2.237916995374127e-06, "logits/chosen": -2.8957841396331787, "logits/rejected": -2.3132479190826416, "logps/chosen": -187.2462158203125, "logps/rejected": -1180.0101318359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3215835094451904, "rewards/margins": 10.06346607208252, "rewards/rejected": -11.385049819946289, "step": 48460 }, { "epoch": 0.58, "learning_rate": 2.236878153513953e-06, "logits/chosen": -2.8974666595458984, "logits/rejected": -2.5766677856445312, "logps/chosen": -95.45450592041016, "logps/rejected": -846.9705200195312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5204690098762512, "rewards/margins": 7.573556423187256, "rewards/rejected": -8.094026565551758, "step": 48470 }, { "epoch": 0.58, "learning_rate": 2.2358393575941222e-06, "logits/chosen": -2.8646552562713623, "logits/rejected": -2.2919089794158936, "logps/chosen": -129.62515258789062, "logps/rejected": -953.9761962890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8600834012031555, "rewards/margins": 8.291318893432617, "rewards/rejected": -9.151402473449707, "step": 48480 }, { "epoch": 0.58, "learning_rate": 2.234800607796006e-06, "logits/chosen": -2.91560959815979, "logits/rejected": -2.521960735321045, "logps/chosen": -114.8433609008789, "logps/rejected": -901.58544921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7288996577262878, "rewards/margins": 7.909304618835449, "rewards/rejected": -8.638204574584961, "step": 48490 }, { "epoch": 0.58, "learning_rate": 2.2337619043009673e-06, "logits/chosen": -2.8312573432922363, "logits/rejected": -2.3671770095825195, "logps/chosen": -103.471435546875, "logps/rejected": -873.8541870117188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6202812194824219, "rewards/margins": 7.742776393890381, "rewards/rejected": -8.363057136535645, "step": 48500 }, { "epoch": 0.58, "learning_rate": 2.2327232472903603e-06, "logits/chosen": -2.8794710636138916, "logits/rejected": -2.2023417949676514, "logps/chosen": -152.91197204589844, "logps/rejected": -1054.5234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0078338384628296, "rewards/margins": 9.137405395507812, "rewards/rejected": -10.145238876342773, "step": 48510 }, { "epoch": 0.58, "learning_rate": 2.2316846369455318e-06, "logits/chosen": -2.8881027698516846, "logits/rejected": -2.545557737350464, "logps/chosen": -112.76983642578125, "logps/rejected": -791.1470947265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7429682612419128, "rewards/margins": 6.8091301918029785, "rewards/rejected": -7.552098751068115, "step": 48520 }, { "epoch": 0.58, "learning_rate": 2.2306460734478213e-06, "logits/chosen": -2.8443307876586914, "logits/rejected": -2.2796103954315186, "logps/chosen": -132.56826782226562, "logps/rejected": -993.1380004882812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8694409132003784, "rewards/margins": 8.678352355957031, "rewards/rejected": -9.5477933883667, "step": 48530 }, { "epoch": 0.58, "learning_rate": 2.2296075569785578e-06, "logits/chosen": -2.887340784072876, "logits/rejected": -2.392042636871338, "logps/chosen": -137.595947265625, "logps/rejected": -1019.0582275390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8994287252426147, "rewards/margins": 8.912760734558105, "rewards/rejected": -9.812190055847168, "step": 48540 }, { "epoch": 0.58, "learning_rate": 2.2285690877190636e-06, "logits/chosen": -2.8900887966156006, "logits/rejected": -2.374298572540283, "logps/chosen": -136.3363494873047, "logps/rejected": -1008.3779296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9041778445243835, "rewards/margins": 8.797721862792969, "rewards/rejected": -9.701899528503418, "step": 48550 }, { "epoch": 0.58, "learning_rate": 2.2275306658506528e-06, "logits/chosen": -2.932910680770874, "logits/rejected": -2.5025291442871094, "logps/chosen": -101.30968475341797, "logps/rejected": -917.76904296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6017257571220398, "rewards/margins": 8.19880485534668, "rewards/rejected": -8.800530433654785, "step": 48560 }, { "epoch": 0.58, "learning_rate": 2.2264922915546315e-06, "logits/chosen": -2.8543903827667236, "logits/rejected": -2.113553524017334, "logps/chosen": -192.25270080566406, "logps/rejected": -1093.20361328125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.3632991313934326, "rewards/margins": 9.183116912841797, "rewards/rejected": -10.546417236328125, "step": 48570 }, { "epoch": 0.58, "learning_rate": 2.2254539650122963e-06, "logits/chosen": -2.883185625076294, "logits/rejected": -2.3047115802764893, "logps/chosen": -120.60665130615234, "logps/rejected": -933.9259643554688, "loss": 0.1585, "rewards/accuracies": 1.0, "rewards/chosen": -0.7623590230941772, "rewards/margins": 8.184133529663086, "rewards/rejected": -8.946493148803711, "step": 48580 }, { "epoch": 0.58, "learning_rate": 2.224415686404936e-06, "logits/chosen": -2.8437933921813965, "logits/rejected": -2.0484142303466797, "logps/chosen": -164.4411163330078, "logps/rejected": -1085.846435546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0637718439102173, "rewards/margins": 9.404447555541992, "rewards/rejected": -10.468219757080078, "step": 48590 }, { "epoch": 0.58, "learning_rate": 2.2233774559138323e-06, "logits/chosen": -2.895838975906372, "logits/rejected": -2.3606443405151367, "logps/chosen": -117.12850189208984, "logps/rejected": -934.306640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.717219352722168, "rewards/margins": 8.241527557373047, "rewards/rejected": -8.958746910095215, "step": 48600 }, { "epoch": 0.58, "learning_rate": 2.2223392737202558e-06, "logits/chosen": -2.8371291160583496, "logits/rejected": -2.291914463043213, "logps/chosen": -129.03094482421875, "logps/rejected": -1011.1678466796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8333641290664673, "rewards/margins": 8.882325172424316, "rewards/rejected": -9.715689659118652, "step": 48610 }, { "epoch": 0.58, "learning_rate": 2.2213011400054707e-06, "logits/chosen": -2.834172487258911, "logits/rejected": -2.3851869106292725, "logps/chosen": -154.2550506591797, "logps/rejected": -802.0532836914062, "loss": 0.2865, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.081514596939087, "rewards/margins": 6.567142486572266, "rewards/rejected": -7.648657321929932, "step": 48620 }, { "epoch": 0.58, "learning_rate": 2.2202630549507332e-06, "logits/chosen": -2.8513903617858887, "logits/rejected": -2.187041997909546, "logps/chosen": -139.17965698242188, "logps/rejected": -1050.981689453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9596914052963257, "rewards/margins": 9.167597770690918, "rewards/rejected": -10.127288818359375, "step": 48630 }, { "epoch": 0.58, "learning_rate": 2.2192250187372886e-06, "logits/chosen": -2.8801069259643555, "logits/rejected": -2.346463680267334, "logps/chosen": -141.77926635742188, "logps/rejected": -950.1424560546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9572965502738953, "rewards/margins": 8.167145729064941, "rewards/rejected": -9.124442100524902, "step": 48640 }, { "epoch": 0.58, "learning_rate": 2.2181870315463763e-06, "logits/chosen": -2.829972505569458, "logits/rejected": -2.349285840988159, "logps/chosen": -106.45878601074219, "logps/rejected": -894.6139526367188, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.5943824052810669, "rewards/margins": 7.971163749694824, "rewards/rejected": -8.565546035766602, "step": 48650 }, { "epoch": 0.58, "learning_rate": 2.2171490935592256e-06, "logits/chosen": -2.884530544281006, "logits/rejected": -2.17035174369812, "logps/chosen": -170.10340881347656, "logps/rejected": -1019.8405151367188, "loss": 0.1054, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2262777090072632, "rewards/margins": 8.575711250305176, "rewards/rejected": -9.80198860168457, "step": 48660 }, { "epoch": 0.58, "learning_rate": 2.2161112049570577e-06, "logits/chosen": -2.9191081523895264, "logits/rejected": -2.3666062355041504, "logps/chosen": -140.96304321289062, "logps/rejected": -951.4014892578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9493119120597839, "rewards/margins": 8.16956901550293, "rewards/rejected": -9.118881225585938, "step": 48670 }, { "epoch": 0.58, "learning_rate": 2.215073365921085e-06, "logits/chosen": -2.807892322540283, "logits/rejected": -2.365657329559326, "logps/chosen": -122.2367935180664, "logps/rejected": -862.9573364257812, "loss": 0.0787, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7600629329681396, "rewards/margins": 7.496883392333984, "rewards/rejected": -8.256946563720703, "step": 48680 }, { "epoch": 0.58, "learning_rate": 2.2140355766325106e-06, "logits/chosen": -2.784257411956787, "logits/rejected": -1.9706405401229858, "logps/chosen": -203.05392456054688, "logps/rejected": -1156.6953125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4756807088851929, "rewards/margins": 9.671619415283203, "rewards/rejected": -11.147298812866211, "step": 48690 }, { "epoch": 0.58, "learning_rate": 2.212997837272531e-06, "logits/chosen": -2.882969379425049, "logits/rejected": -2.4028191566467285, "logps/chosen": -140.78335571289062, "logps/rejected": -926.5947265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9666814804077148, "rewards/margins": 7.897188663482666, "rewards/rejected": -8.863869667053223, "step": 48700 }, { "epoch": 0.58, "learning_rate": 2.2119601480223316e-06, "logits/chosen": -2.8617303371429443, "logits/rejected": -2.2952332496643066, "logps/chosen": -153.7236785888672, "logps/rejected": -1056.1436767578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.99730384349823, "rewards/margins": 9.144185066223145, "rewards/rejected": -10.141489028930664, "step": 48710 }, { "epoch": 0.58, "learning_rate": 2.2109225090630905e-06, "logits/chosen": -2.873631000518799, "logits/rejected": -2.2100305557250977, "logps/chosen": -161.5048828125, "logps/rejected": -1040.497314453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1290874481201172, "rewards/margins": 8.85653018951416, "rewards/rejected": -9.985616683959961, "step": 48720 }, { "epoch": 0.58, "learning_rate": 2.2098849205759766e-06, "logits/chosen": -2.8477981090545654, "logits/rejected": -2.2578866481781006, "logps/chosen": -154.12005615234375, "logps/rejected": -1017.6812744140625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0715430974960327, "rewards/margins": 8.706899642944336, "rewards/rejected": -9.7784423828125, "step": 48730 }, { "epoch": 0.58, "learning_rate": 2.2088473827421496e-06, "logits/chosen": -2.8039486408233643, "logits/rejected": -2.237717866897583, "logps/chosen": -164.35025024414062, "logps/rejected": -1023.7725830078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1555708646774292, "rewards/margins": 8.69867992401123, "rewards/rejected": -9.854251861572266, "step": 48740 }, { "epoch": 0.58, "learning_rate": 2.20780989574276e-06, "logits/chosen": -2.8527207374572754, "logits/rejected": -2.3936100006103516, "logps/chosen": -129.7135467529297, "logps/rejected": -1081.2569580078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8608890771865845, "rewards/margins": 9.5269775390625, "rewards/rejected": -10.387866020202637, "step": 48750 }, { "epoch": 0.58, "learning_rate": 2.2067724597589516e-06, "logits/chosen": -2.888381242752075, "logits/rejected": -2.475482940673828, "logps/chosen": -112.20594787597656, "logps/rejected": -883.794921875, "loss": 0.1073, "rewards/accuracies": 1.0, "rewards/chosen": -0.665725827217102, "rewards/margins": 7.801704406738281, "rewards/rejected": -8.46743106842041, "step": 48760 }, { "epoch": 0.58, "learning_rate": 2.2057350749718575e-06, "logits/chosen": -2.891047239303589, "logits/rejected": -2.5727503299713135, "logps/chosen": -117.6965103149414, "logps/rejected": -852.1962890625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7108755707740784, "rewards/margins": 7.442809104919434, "rewards/rejected": -8.153684616088867, "step": 48770 }, { "epoch": 0.58, "learning_rate": 2.2046977415626022e-06, "logits/chosen": -2.900575637817383, "logits/rejected": -2.227945566177368, "logps/chosen": -155.4843292236328, "logps/rejected": -1000.4323120117188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0739883184432983, "rewards/margins": 8.537638664245605, "rewards/rejected": -9.611626625061035, "step": 48780 }, { "epoch": 0.58, "learning_rate": 2.2036604597123013e-06, "logits/chosen": -2.9341468811035156, "logits/rejected": -2.4715397357940674, "logps/chosen": -110.96870422363281, "logps/rejected": -937.4514770507812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6907535791397095, "rewards/margins": 8.300212860107422, "rewards/rejected": -8.990964889526367, "step": 48790 }, { "epoch": 0.58, "learning_rate": 2.202623229602061e-06, "logits/chosen": -2.9505655765533447, "logits/rejected": -2.2282276153564453, "logps/chosen": -145.19107055664062, "logps/rejected": -1130.6783447265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8827295303344727, "rewards/margins": 10.008936882019043, "rewards/rejected": -10.891666412353516, "step": 48800 }, { "epoch": 0.58, "learning_rate": 2.2015860514129787e-06, "logits/chosen": -2.859786033630371, "logits/rejected": -2.40911865234375, "logps/chosen": -115.46907806396484, "logps/rejected": -968.2952880859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7148758769035339, "rewards/margins": 8.582237243652344, "rewards/rejected": -9.297113418579102, "step": 48810 }, { "epoch": 0.58, "learning_rate": 2.2005489253261435e-06, "logits/chosen": -2.8611879348754883, "logits/rejected": -2.2031497955322266, "logps/chosen": -159.27392578125, "logps/rejected": -1002.978515625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1185276508331299, "rewards/margins": 8.4965238571167, "rewards/rejected": -9.61505126953125, "step": 48820 }, { "epoch": 0.58, "learning_rate": 2.1995118515226335e-06, "logits/chosen": -2.9278440475463867, "logits/rejected": -2.3485474586486816, "logps/chosen": -136.38040161132812, "logps/rejected": -937.0094604492188, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.8673421144485474, "rewards/margins": 8.112929344177246, "rewards/rejected": -8.980271339416504, "step": 48830 }, { "epoch": 0.58, "learning_rate": 2.1984748301835196e-06, "logits/chosen": -2.924671173095703, "logits/rejected": -2.410789728164673, "logps/chosen": -160.13970947265625, "logps/rejected": -986.5791015625, "loss": 0.1097, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1324046850204468, "rewards/margins": 8.336433410644531, "rewards/rejected": -9.468836784362793, "step": 48840 }, { "epoch": 0.58, "learning_rate": 2.197437861489864e-06, "logits/chosen": -2.863645076751709, "logits/rejected": -2.214181423187256, "logps/chosen": -150.23651123046875, "logps/rejected": -993.6842041015625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9991806149482727, "rewards/margins": 8.54224681854248, "rewards/rejected": -9.541426658630371, "step": 48850 }, { "epoch": 0.58, "learning_rate": 2.1964009456227164e-06, "logits/chosen": -2.8875603675842285, "logits/rejected": -2.388355255126953, "logps/chosen": -135.98597717285156, "logps/rejected": -1016.7608642578125, "loss": 0.1161, "rewards/accuracies": 1.0, "rewards/chosen": -0.8761542439460754, "rewards/margins": 8.897385597229004, "rewards/rejected": -9.773539543151855, "step": 48860 }, { "epoch": 0.58, "learning_rate": 2.1953640827631205e-06, "logits/chosen": -2.829216480255127, "logits/rejected": -2.19736647605896, "logps/chosen": -176.8363037109375, "logps/rejected": -1039.484130859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2760705947875977, "rewards/margins": 8.711946487426758, "rewards/rejected": -9.988016128540039, "step": 48870 }, { "epoch": 0.59, "learning_rate": 2.1943272730921102e-06, "logits/chosen": -2.8049726486206055, "logits/rejected": -2.353846788406372, "logps/chosen": -100.33867645263672, "logps/rejected": -863.9990234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5813813209533691, "rewards/margins": 7.682493686676025, "rewards/rejected": -8.263875007629395, "step": 48880 }, { "epoch": 0.59, "learning_rate": 2.193290516790709e-06, "logits/chosen": -2.859203815460205, "logits/rejected": -2.231884479522705, "logps/chosen": -155.17385864257812, "logps/rejected": -1018.9352416992188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0509954690933228, "rewards/margins": 8.745809555053711, "rewards/rejected": -9.796804428100586, "step": 48890 }, { "epoch": 0.59, "learning_rate": 2.1922538140399314e-06, "logits/chosen": -2.8720574378967285, "logits/rejected": -2.4001402854919434, "logps/chosen": -131.8561248779297, "logps/rejected": -908.1237182617188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8563157320022583, "rewards/margins": 7.849632263183594, "rewards/rejected": -8.705948829650879, "step": 48900 }, { "epoch": 0.59, "learning_rate": 2.191217165020783e-06, "logits/chosen": -2.869292974472046, "logits/rejected": -2.367795705795288, "logps/chosen": -171.87939453125, "logps/rejected": -961.462890625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.2724356651306152, "rewards/margins": 7.948397636413574, "rewards/rejected": -9.220832824707031, "step": 48910 }, { "epoch": 0.59, "learning_rate": 2.1901805699142606e-06, "logits/chosen": -2.8165736198425293, "logits/rejected": -1.8818210363388062, "logps/chosen": -186.63409423828125, "logps/rejected": -1203.134033203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2960832118988037, "rewards/margins": 10.324226379394531, "rewards/rejected": -11.620309829711914, "step": 48920 }, { "epoch": 0.59, "learning_rate": 2.1891440289013498e-06, "logits/chosen": -2.8738083839416504, "logits/rejected": -2.294477701187134, "logps/chosen": -144.75729370117188, "logps/rejected": -976.798828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.912833571434021, "rewards/margins": 8.463068962097168, "rewards/rejected": -9.37590217590332, "step": 48930 }, { "epoch": 0.59, "learning_rate": 2.188107542163028e-06, "logits/chosen": -2.823390483856201, "logits/rejected": -2.256556749343872, "logps/chosen": -198.14743041992188, "logps/rejected": -1075.6151123046875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.4444780349731445, "rewards/margins": 8.895163536071777, "rewards/rejected": -10.339642524719238, "step": 48940 }, { "epoch": 0.59, "learning_rate": 2.1870711098802636e-06, "logits/chosen": -2.8923768997192383, "logits/rejected": -2.3680994510650635, "logps/chosen": -106.45648193359375, "logps/rejected": -910.2799072265625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.6112565398216248, "rewards/margins": 8.101019859313965, "rewards/rejected": -8.71227741241455, "step": 48950 }, { "epoch": 0.59, "learning_rate": 2.1860347322340137e-06, "logits/chosen": -2.864651918411255, "logits/rejected": -2.3004472255706787, "logps/chosen": -97.67491149902344, "logps/rejected": -900.6519775390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4959360957145691, "rewards/margins": 8.133394241333008, "rewards/rejected": -8.629331588745117, "step": 48960 }, { "epoch": 0.59, "learning_rate": 2.184998409405228e-06, "logits/chosen": -2.893953323364258, "logits/rejected": -2.288710594177246, "logps/chosen": -116.82598876953125, "logps/rejected": -946.9442138671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6827249526977539, "rewards/margins": 8.388471603393555, "rewards/rejected": -9.071195602416992, "step": 48970 }, { "epoch": 0.59, "learning_rate": 2.183962141574845e-06, "logits/chosen": -2.88913631439209, "logits/rejected": -2.4256811141967773, "logps/chosen": -84.03042602539062, "logps/rejected": -842.8941650390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4074113965034485, "rewards/margins": 7.631725311279297, "rewards/rejected": -8.039137840270996, "step": 48980 }, { "epoch": 0.59, "learning_rate": 2.182925928923795e-06, "logits/chosen": -2.8547866344451904, "logits/rejected": -2.3215909004211426, "logps/chosen": -123.68565368652344, "logps/rejected": -994.05224609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7551911473274231, "rewards/margins": 8.778454780578613, "rewards/rejected": -9.533645629882812, "step": 48990 }, { "epoch": 0.59, "learning_rate": 2.181889771632996e-06, "logits/chosen": -2.86622953414917, "logits/rejected": -2.3053078651428223, "logps/chosen": -115.14111328125, "logps/rejected": -973.0279541015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6720594167709351, "rewards/margins": 8.660724639892578, "rewards/rejected": -9.332784652709961, "step": 49000 }, { "epoch": 0.59, "learning_rate": 2.1808536698833598e-06, "logits/chosen": -2.8883724212646484, "logits/rejected": -2.196951389312744, "logps/chosen": -102.80198669433594, "logps/rejected": -943.2591552734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5360271334648132, "rewards/margins": 8.508443832397461, "rewards/rejected": -9.044469833374023, "step": 49010 }, { "epoch": 0.59, "learning_rate": 2.1798176238557866e-06, "logits/chosen": -2.901423931121826, "logits/rejected": -2.3029322624206543, "logps/chosen": -114.71446228027344, "logps/rejected": -982.6535034179688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6524972915649414, "rewards/margins": 8.788407325744629, "rewards/rejected": -9.44090461730957, "step": 49020 }, { "epoch": 0.59, "learning_rate": 2.178781633731167e-06, "logits/chosen": -2.835383176803589, "logits/rejected": -2.5115180015563965, "logps/chosen": -110.69319152832031, "logps/rejected": -805.0352172851562, "loss": 0.0803, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6954272389411926, "rewards/margins": 6.992800235748291, "rewards/rejected": -7.688227653503418, "step": 49030 }, { "epoch": 0.59, "learning_rate": 2.1777456996903816e-06, "logits/chosen": -2.9044694900512695, "logits/rejected": -2.491746187210083, "logps/chosen": -103.57035064697266, "logps/rejected": -948.96240234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5682643055915833, "rewards/margins": 8.530065536499023, "rewards/rejected": -9.098329544067383, "step": 49040 }, { "epoch": 0.59, "learning_rate": 2.176709821914303e-06, "logits/chosen": -2.8555362224578857, "logits/rejected": -1.9814529418945312, "logps/chosen": -160.9570770263672, "logps/rejected": -1032.200927734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.078397512435913, "rewards/margins": 8.841669082641602, "rewards/rejected": -9.920066833496094, "step": 49050 }, { "epoch": 0.59, "learning_rate": 2.1756740005837907e-06, "logits/chosen": -2.873265266418457, "logits/rejected": -2.268765449523926, "logps/chosen": -123.4256362915039, "logps/rejected": -992.0296020507812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7877556681632996, "rewards/margins": 8.733610153198242, "rewards/rejected": -9.521367073059082, "step": 49060 }, { "epoch": 0.59, "learning_rate": 2.1746382358796973e-06, "logits/chosen": -2.872427463531494, "logits/rejected": -2.4197475910186768, "logps/chosen": -87.08174133300781, "logps/rejected": -853.0778198242188, "loss": 0.1198, "rewards/accuracies": 1.0, "rewards/chosen": -0.49161118268966675, "rewards/margins": 7.6797051429748535, "rewards/rejected": -8.171316146850586, "step": 49070 }, { "epoch": 0.59, "learning_rate": 2.173602527982864e-06, "logits/chosen": -2.916006326675415, "logits/rejected": -2.4434151649475098, "logps/chosen": -131.60629272460938, "logps/rejected": -952.2830810546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8368502855300903, "rewards/margins": 8.294196128845215, "rewards/rejected": -9.131047248840332, "step": 49080 }, { "epoch": 0.59, "learning_rate": 2.1725668770741236e-06, "logits/chosen": -2.8546502590179443, "logits/rejected": -2.0219008922576904, "logps/chosen": -144.85292053222656, "logps/rejected": -1141.1817626953125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8960496187210083, "rewards/margins": 10.099827766418457, "rewards/rejected": -10.995877265930176, "step": 49090 }, { "epoch": 0.59, "learning_rate": 2.1715312833342963e-06, "logits/chosen": -2.876267671585083, "logits/rejected": -2.174370527267456, "logps/chosen": -127.65738677978516, "logps/rejected": -951.6868896484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.763421893119812, "rewards/margins": 8.366412162780762, "rewards/rejected": -9.129834175109863, "step": 49100 }, { "epoch": 0.59, "learning_rate": 2.1704957469441947e-06, "logits/chosen": -2.887155532836914, "logits/rejected": -2.4282021522521973, "logps/chosen": -104.21781921386719, "logps/rejected": -974.0393676757812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5790559649467468, "rewards/margins": 8.776388168334961, "rewards/rejected": -9.355443954467773, "step": 49110 }, { "epoch": 0.59, "learning_rate": 2.169460268084621e-06, "logits/chosen": -2.888598918914795, "logits/rejected": -2.360138416290283, "logps/chosen": -126.7294921875, "logps/rejected": -915.8629760742188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7370100021362305, "rewards/margins": 8.018546104431152, "rewards/rejected": -8.755556106567383, "step": 49120 }, { "epoch": 0.59, "learning_rate": 2.168424846936366e-06, "logits/chosen": -2.9027912616729736, "logits/rejected": -2.5840249061584473, "logps/chosen": -81.71928405761719, "logps/rejected": -767.0364379882812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4412154257297516, "rewards/margins": 6.8739776611328125, "rewards/rejected": -7.315193176269531, "step": 49130 }, { "epoch": 0.59, "learning_rate": 2.167389483680212e-06, "logits/chosen": -2.9000821113586426, "logits/rejected": -2.232621431350708, "logps/chosen": -102.39093017578125, "logps/rejected": -961.1099853515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.51740962266922, "rewards/margins": 8.709073066711426, "rewards/rejected": -9.226481437683105, "step": 49140 }, { "epoch": 0.59, "learning_rate": 2.1663541784969307e-06, "logits/chosen": -2.8503665924072266, "logits/rejected": -2.157731771469116, "logps/chosen": -130.50363159179688, "logps/rejected": -996.6354370117188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8556678891181946, "rewards/margins": 8.730088233947754, "rewards/rejected": -9.585755348205566, "step": 49150 }, { "epoch": 0.59, "learning_rate": 2.165318931567283e-06, "logits/chosen": -2.889892339706421, "logits/rejected": -2.241924524307251, "logps/chosen": -112.72882080078125, "logps/rejected": -965.3721923828125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7085764408111572, "rewards/margins": 8.559263229370117, "rewards/rejected": -9.267839431762695, "step": 49160 }, { "epoch": 0.59, "learning_rate": 2.164283743072019e-06, "logits/chosen": -2.9191064834594727, "logits/rejected": -2.4913101196289062, "logps/chosen": -109.50309753417969, "logps/rejected": -889.9475708007812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6723865270614624, "rewards/margins": 7.852574825286865, "rewards/rejected": -8.524961471557617, "step": 49170 }, { "epoch": 0.59, "learning_rate": 2.163248613191882e-06, "logits/chosen": -2.8906502723693848, "logits/rejected": -2.4462897777557373, "logps/chosen": -98.04383850097656, "logps/rejected": -918.2803955078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5173490643501282, "rewards/margins": 8.272838592529297, "rewards/rejected": -8.79018783569336, "step": 49180 }, { "epoch": 0.59, "learning_rate": 2.162213542107602e-06, "logits/chosen": -2.8954317569732666, "logits/rejected": -2.3082356452941895, "logps/chosen": -139.97561645507812, "logps/rejected": -1043.7525634765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8618757128715515, "rewards/margins": 9.16861629486084, "rewards/rejected": -10.030492782592773, "step": 49190 }, { "epoch": 0.59, "learning_rate": 2.1611785299998996e-06, "logits/chosen": -2.859466314315796, "logits/rejected": -2.4340147972106934, "logps/chosen": -134.18136596679688, "logps/rejected": -923.7994995117188, "loss": 0.1219, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9174591302871704, "rewards/margins": 7.923856258392334, "rewards/rejected": -8.841314315795898, "step": 49200 }, { "epoch": 0.59, "learning_rate": 2.160143577049485e-06, "logits/chosen": -2.852884292602539, "logits/rejected": -2.3892366886138916, "logps/chosen": -111.8271484375, "logps/rejected": -900.1036376953125, "loss": 0.1428, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6948012113571167, "rewards/margins": 7.933170318603516, "rewards/rejected": -8.627972602844238, "step": 49210 }, { "epoch": 0.59, "learning_rate": 2.159108683437057e-06, "logits/chosen": -2.874467611312866, "logits/rejected": -2.498870372772217, "logps/chosen": -93.4696044921875, "logps/rejected": -868.33984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5256065726280212, "rewards/margins": 7.772912502288818, "rewards/rejected": -8.298519134521484, "step": 49220 }, { "epoch": 0.59, "learning_rate": 2.1580738493433067e-06, "logits/chosen": -2.877034902572632, "logits/rejected": -2.3188507556915283, "logps/chosen": -115.1799087524414, "logps/rejected": -927.8779296875, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -0.7138200998306274, "rewards/margins": 8.179649353027344, "rewards/rejected": -8.89346981048584, "step": 49230 }, { "epoch": 0.59, "learning_rate": 2.157039074948913e-06, "logits/chosen": -2.861304998397827, "logits/rejected": -2.5953590869903564, "logps/chosen": -81.75178527832031, "logps/rejected": -793.8902587890625, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -0.45333534479141235, "rewards/margins": 7.117526054382324, "rewards/rejected": -7.570860862731934, "step": 49240 }, { "epoch": 0.59, "learning_rate": 2.156004360434544e-06, "logits/chosen": -2.8810489177703857, "logits/rejected": -2.277285099029541, "logps/chosen": -103.13478088378906, "logps/rejected": -934.8615112304688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6134594678878784, "rewards/margins": 8.333468437194824, "rewards/rejected": -8.946928024291992, "step": 49250 }, { "epoch": 0.59, "learning_rate": 2.1549697059808576e-06, "logits/chosen": -2.871802806854248, "logits/rejected": -2.282139778137207, "logps/chosen": -118.82051849365234, "logps/rejected": -990.9271240234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6829482316970825, "rewards/margins": 8.829153060913086, "rewards/rejected": -9.512102127075195, "step": 49260 }, { "epoch": 0.59, "learning_rate": 2.1539351117685035e-06, "logits/chosen": -2.8814280033111572, "logits/rejected": -2.4124794006347656, "logps/chosen": -111.95353698730469, "logps/rejected": -966.3541259765625, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.6639925241470337, "rewards/margins": 8.611576080322266, "rewards/rejected": -9.275568962097168, "step": 49270 }, { "epoch": 0.59, "learning_rate": 2.152900577978117e-06, "logits/chosen": -2.879255771636963, "logits/rejected": -2.0130839347839355, "logps/chosen": -138.34500122070312, "logps/rejected": -1111.005126953125, "loss": 0.1477, "rewards/accuracies": 1.0, "rewards/chosen": -0.837283730506897, "rewards/margins": 9.852895736694336, "rewards/rejected": -10.690179824829102, "step": 49280 }, { "epoch": 0.59, "learning_rate": 2.1518661047903257e-06, "logits/chosen": -2.832977056503296, "logits/rejected": -2.254676342010498, "logps/chosen": -123.72930908203125, "logps/rejected": -956.5543823242188, "loss": 0.0407, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7812234163284302, "rewards/margins": 8.397438049316406, "rewards/rejected": -9.17866039276123, "step": 49290 }, { "epoch": 0.59, "learning_rate": 2.150831692385745e-06, "logits/chosen": -2.860020399093628, "logits/rejected": -2.3967630863189697, "logps/chosen": -126.63813781738281, "logps/rejected": -902.517578125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.8217867016792297, "rewards/margins": 7.818877220153809, "rewards/rejected": -8.640664100646973, "step": 49300 }, { "epoch": 0.59, "learning_rate": 2.149797340944982e-06, "logits/chosen": -2.9026846885681152, "logits/rejected": -2.4726319313049316, "logps/chosen": -91.88662719726562, "logps/rejected": -888.9695434570312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.48816928267478943, "rewards/margins": 8.030311584472656, "rewards/rejected": -8.51848030090332, "step": 49310 }, { "epoch": 0.59, "learning_rate": 2.14876305064863e-06, "logits/chosen": -2.8612945079803467, "logits/rejected": -2.3935515880584717, "logps/chosen": -96.1763687133789, "logps/rejected": -907.9056396484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5216132402420044, "rewards/margins": 8.17747688293457, "rewards/rejected": -8.699090003967285, "step": 49320 }, { "epoch": 0.59, "learning_rate": 2.1477288216772736e-06, "logits/chosen": -2.8995673656463623, "logits/rejected": -2.48317289352417, "logps/chosen": -99.9197006225586, "logps/rejected": -896.3055419921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5654662847518921, "rewards/margins": 8.019224166870117, "rewards/rejected": -8.584691047668457, "step": 49330 }, { "epoch": 0.59, "learning_rate": 2.1466946542114876e-06, "logits/chosen": -2.7995378971099854, "logits/rejected": -2.2555434703826904, "logps/chosen": -110.5373764038086, "logps/rejected": -978.1261596679688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6982563734054565, "rewards/margins": 8.703587532043457, "rewards/rejected": -9.40184497833252, "step": 49340 }, { "epoch": 0.59, "learning_rate": 2.145660548431833e-06, "logits/chosen": -2.867957592010498, "logits/rejected": -2.485269546508789, "logps/chosen": -93.79238891601562, "logps/rejected": -950.5706787109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5374650359153748, "rewards/margins": 8.584890365600586, "rewards/rejected": -9.122355461120605, "step": 49350 }, { "epoch": 0.59, "learning_rate": 2.1446265045188625e-06, "logits/chosen": -2.879901885986328, "logits/rejected": -2.372253656387329, "logps/chosen": -127.12200927734375, "logps/rejected": -949.5554809570312, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.8123958706855774, "rewards/margins": 8.278139114379883, "rewards/rejected": -9.090535163879395, "step": 49360 }, { "epoch": 0.59, "learning_rate": 2.1435925226531178e-06, "logits/chosen": -2.8604979515075684, "logits/rejected": -2.4700608253479004, "logps/chosen": -104.24736022949219, "logps/rejected": -924.81103515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6266239285469055, "rewards/margins": 8.233332633972168, "rewards/rejected": -8.859955787658691, "step": 49370 }, { "epoch": 0.59, "learning_rate": 2.142558603015128e-06, "logits/chosen": -2.8711655139923096, "logits/rejected": -2.3427014350891113, "logps/chosen": -107.91108703613281, "logps/rejected": -965.5953369140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.617380678653717, "rewards/margins": 8.639689445495605, "rewards/rejected": -9.257070541381836, "step": 49380 }, { "epoch": 0.59, "learning_rate": 2.1415247457854135e-06, "logits/chosen": -2.8752353191375732, "logits/rejected": -2.4931349754333496, "logps/chosen": -93.78807067871094, "logps/rejected": -904.6258544921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5215832591056824, "rewards/margins": 8.150078773498535, "rewards/rejected": -8.671660423278809, "step": 49390 }, { "epoch": 0.59, "learning_rate": 2.140490951144483e-06, "logits/chosen": -2.902669906616211, "logits/rejected": -2.5105433464050293, "logps/chosen": -107.37992858886719, "logps/rejected": -785.5408325195312, "loss": 0.1319, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7007968425750732, "rewards/margins": 6.785957336425781, "rewards/rejected": -7.486753940582275, "step": 49400 }, { "epoch": 0.59, "learning_rate": 2.139457219272834e-06, "logits/chosen": -2.9097657203674316, "logits/rejected": -2.465423583984375, "logps/chosen": -85.35920715332031, "logps/rejected": -891.64892578125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.4436314105987549, "rewards/margins": 8.087660789489746, "rewards/rejected": -8.531291961669922, "step": 49410 }, { "epoch": 0.59, "learning_rate": 2.138423550350953e-06, "logits/chosen": -2.847792863845825, "logits/rejected": -2.403202533721924, "logps/chosen": -102.1010971069336, "logps/rejected": -899.669921875, "loss": 0.1371, "rewards/accuracies": 1.0, "rewards/chosen": -0.5626486539840698, "rewards/margins": 8.062131881713867, "rewards/rejected": -8.62477970123291, "step": 49420 }, { "epoch": 0.59, "learning_rate": 2.1373899445593158e-06, "logits/chosen": -2.861523151397705, "logits/rejected": -2.256871461868286, "logps/chosen": -111.8280258178711, "logps/rejected": -965.3156127929688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6197085380554199, "rewards/margins": 8.652134895324707, "rewards/rejected": -9.271843910217285, "step": 49430 }, { "epoch": 0.59, "learning_rate": 2.136356402078388e-06, "logits/chosen": -2.8959248065948486, "logits/rejected": -2.1228578090667725, "logps/chosen": -119.13818359375, "logps/rejected": -1024.05859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6668485403060913, "rewards/margins": 9.183298110961914, "rewards/rejected": -9.850147247314453, "step": 49440 }, { "epoch": 0.59, "learning_rate": 2.135322923088621e-06, "logits/chosen": -2.8950581550598145, "logits/rejected": -2.4715709686279297, "logps/chosen": -95.79901885986328, "logps/rejected": -842.43505859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5498682260513306, "rewards/margins": 7.492884635925293, "rewards/rejected": -8.042754173278809, "step": 49450 }, { "epoch": 0.59, "learning_rate": 2.1342895077704597e-06, "logits/chosen": -2.8726110458374023, "logits/rejected": -2.213409185409546, "logps/chosen": -133.71005249023438, "logps/rejected": -934.4000244140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7846359014511108, "rewards/margins": 8.171817779541016, "rewards/rejected": -8.956454277038574, "step": 49460 }, { "epoch": 0.59, "learning_rate": 2.133256156304335e-06, "logits/chosen": -2.8812642097473145, "logits/rejected": -2.1775782108306885, "logps/chosen": -125.6377944946289, "logps/rejected": -954.3341064453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7501264810562134, "rewards/margins": 8.409234046936035, "rewards/rejected": -9.159360885620117, "step": 49470 }, { "epoch": 0.59, "learning_rate": 2.132222868870666e-06, "logits/chosen": -2.841614246368408, "logits/rejected": -2.159005880355835, "logps/chosen": -141.87680053710938, "logps/rejected": -960.1837158203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9059772491455078, "rewards/margins": 8.290670394897461, "rewards/rejected": -9.196647644042969, "step": 49480 }, { "epoch": 0.59, "learning_rate": 2.1311896456498628e-06, "logits/chosen": -2.910372018814087, "logits/rejected": -2.6268527507781982, "logps/chosen": -71.98281860351562, "logps/rejected": -824.47314453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3580422103404999, "rewards/margins": 7.5090155601501465, "rewards/rejected": -7.867057800292969, "step": 49490 }, { "epoch": 0.59, "learning_rate": 2.1301564868223227e-06, "logits/chosen": -2.9023423194885254, "logits/rejected": -2.1856369972229004, "logps/chosen": -121.180419921875, "logps/rejected": -1105.2095947265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.702181875705719, "rewards/margins": 9.93876838684082, "rewards/rejected": -10.640950202941895, "step": 49500 }, { "epoch": 0.59, "learning_rate": 2.129123392568434e-06, "logits/chosen": -2.846222162246704, "logits/rejected": -2.254981517791748, "logps/chosen": -112.8687744140625, "logps/rejected": -943.5392456054688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.673585057258606, "rewards/margins": 8.353292465209961, "rewards/rejected": -9.026876449584961, "step": 49510 }, { "epoch": 0.59, "learning_rate": 2.12809036306857e-06, "logits/chosen": -2.903409957885742, "logits/rejected": -2.440303087234497, "logps/chosen": -82.50220489501953, "logps/rejected": -844.5021362304688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.43176349997520447, "rewards/margins": 7.643318176269531, "rewards/rejected": -8.075081825256348, "step": 49520 }, { "epoch": 0.59, "learning_rate": 2.127057398503096e-06, "logits/chosen": -2.88378643989563, "logits/rejected": -2.5909905433654785, "logps/chosen": -73.7965087890625, "logps/rejected": -771.5236206054688, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.3518003523349762, "rewards/margins": 6.999722957611084, "rewards/rejected": -7.351523399353027, "step": 49530 }, { "epoch": 0.59, "learning_rate": 2.1260244990523645e-06, "logits/chosen": -2.839432716369629, "logits/rejected": -2.1519927978515625, "logps/chosen": -152.45191955566406, "logps/rejected": -1093.5198974609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9944764375686646, "rewards/margins": 9.521116256713867, "rewards/rejected": -10.515592575073242, "step": 49540 }, { "epoch": 0.59, "learning_rate": 2.1249916648967163e-06, "logits/chosen": -2.832616090774536, "logits/rejected": -2.332897663116455, "logps/chosen": -89.3699951171875, "logps/rejected": -863.1724853515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4643036723136902, "rewards/margins": 7.778522491455078, "rewards/rejected": -8.242826461791992, "step": 49550 }, { "epoch": 0.59, "learning_rate": 2.1239588962164824e-06, "logits/chosen": -2.8938634395599365, "logits/rejected": -2.5379528999328613, "logps/chosen": -84.59856414794922, "logps/rejected": -887.3504638671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3973667025566101, "rewards/margins": 8.075037956237793, "rewards/rejected": -8.472404479980469, "step": 49560 }, { "epoch": 0.59, "learning_rate": 2.122926193191981e-06, "logits/chosen": -2.8898322582244873, "logits/rejected": -2.2761783599853516, "logps/chosen": -121.75065612792969, "logps/rejected": -941.9075927734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6907869577407837, "rewards/margins": 8.330964088439941, "rewards/rejected": -9.021750450134277, "step": 49570 }, { "epoch": 0.59, "learning_rate": 2.121893556003518e-06, "logits/chosen": -2.8271124362945557, "logits/rejected": -2.3443307876586914, "logps/chosen": -100.10253143310547, "logps/rejected": -903.57861328125, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.5766716003417969, "rewards/margins": 8.081460952758789, "rewards/rejected": -8.658132553100586, "step": 49580 }, { "epoch": 0.59, "learning_rate": 2.1208609848313898e-06, "logits/chosen": -2.872814178466797, "logits/rejected": -2.19020676612854, "logps/chosen": -136.75650024414062, "logps/rejected": -1040.4400634765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8495917320251465, "rewards/margins": 9.161507606506348, "rewards/rejected": -10.011098861694336, "step": 49590 }, { "epoch": 0.59, "learning_rate": 2.1198284798558816e-06, "logits/chosen": -2.8674092292785645, "logits/rejected": -2.2874088287353516, "logps/chosen": -111.35417175292969, "logps/rejected": -926.1923828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.58592689037323, "rewards/margins": 8.27558708190918, "rewards/rejected": -8.8615140914917, "step": 49600 }, { "epoch": 0.59, "learning_rate": 2.1187960412572645e-06, "logits/chosen": -2.8453290462493896, "logits/rejected": -2.426682710647583, "logps/chosen": -113.3116226196289, "logps/rejected": -811.6556396484375, "loss": 0.0799, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7225396633148193, "rewards/margins": 7.030125617980957, "rewards/rejected": -7.752665042877197, "step": 49610 }, { "epoch": 0.59, "learning_rate": 2.1177636692158e-06, "logits/chosen": -2.8300905227661133, "logits/rejected": -1.8215818405151367, "logps/chosen": -177.59713745117188, "logps/rejected": -1165.883544921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2467719316482544, "rewards/margins": 10.000757217407227, "rewards/rejected": -11.247527122497559, "step": 49620 }, { "epoch": 0.59, "learning_rate": 2.1167313639117377e-06, "logits/chosen": -2.890765428543091, "logits/rejected": -2.500826835632324, "logps/chosen": -81.09028625488281, "logps/rejected": -874.7806396484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4038534164428711, "rewards/margins": 7.968675136566162, "rewards/rejected": -8.372528076171875, "step": 49630 }, { "epoch": 0.59, "learning_rate": 2.1156991255253138e-06, "logits/chosen": -2.886732578277588, "logits/rejected": -2.3532886505126953, "logps/chosen": -102.87178039550781, "logps/rejected": -933.8113403320312, "loss": 0.1689, "rewards/accuracies": 1.0, "rewards/chosen": -0.5698840618133545, "rewards/margins": 8.389190673828125, "rewards/rejected": -8.959074020385742, "step": 49640 }, { "epoch": 0.59, "learning_rate": 2.114666954236756e-06, "logits/chosen": -2.832287311553955, "logits/rejected": -2.041635036468506, "logps/chosen": -178.8795623779297, "logps/rejected": -1126.659423828125, "loss": 0.1626, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2371571063995361, "rewards/margins": 9.616047859191895, "rewards/rejected": -10.853205680847168, "step": 49650 }, { "epoch": 0.59, "learning_rate": 2.113634850226278e-06, "logits/chosen": -2.8744659423828125, "logits/rejected": -2.4945759773254395, "logps/chosen": -72.02970886230469, "logps/rejected": -836.4484252929688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.3533354699611664, "rewards/margins": 7.626453399658203, "rewards/rejected": -7.979788303375244, "step": 49660 }, { "epoch": 0.59, "learning_rate": 2.1126028136740814e-06, "logits/chosen": -2.8700339794158936, "logits/rejected": -2.3621156215667725, "logps/chosen": -153.270751953125, "logps/rejected": -1014.4332885742188, "loss": 0.1049, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0923783779144287, "rewards/margins": 8.669485092163086, "rewards/rejected": -9.761862754821777, "step": 49670 }, { "epoch": 0.59, "learning_rate": 2.1115708447603577e-06, "logits/chosen": -2.8959341049194336, "logits/rejected": -2.6049704551696777, "logps/chosen": -120.24900817871094, "logps/rejected": -792.7872314453125, "loss": 0.153, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7920297384262085, "rewards/margins": 6.750143527984619, "rewards/rejected": -7.542173862457275, "step": 49680 }, { "epoch": 0.59, "learning_rate": 2.1105389436652862e-06, "logits/chosen": -2.820011854171753, "logits/rejected": -2.267324924468994, "logps/chosen": -157.85244750976562, "logps/rejected": -1045.5535888671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0867736339569092, "rewards/margins": 8.961962699890137, "rewards/rejected": -10.048735618591309, "step": 49690 }, { "epoch": 0.59, "learning_rate": 2.109507110569033e-06, "logits/chosen": -2.8519654273986816, "logits/rejected": -2.1188321113586426, "logps/chosen": -152.0358123779297, "logps/rejected": -1063.802001953125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.954977810382843, "rewards/margins": 9.274659156799316, "rewards/rejected": -10.229636192321777, "step": 49700 }, { "epoch": 0.6, "learning_rate": 2.1084753456517538e-06, "logits/chosen": -2.8886799812316895, "logits/rejected": -2.5655360221862793, "logps/chosen": -84.85460662841797, "logps/rejected": -863.1232299804688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.45196571946144104, "rewards/margins": 7.800843715667725, "rewards/rejected": -8.25281047821045, "step": 49710 }, { "epoch": 0.6, "learning_rate": 2.107443649093592e-06, "logits/chosen": -2.8936715126037598, "logits/rejected": -2.4539458751678467, "logps/chosen": -87.63624572753906, "logps/rejected": -918.7205200195312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.47729530930519104, "rewards/margins": 8.332728385925293, "rewards/rejected": -8.810023307800293, "step": 49720 }, { "epoch": 0.6, "learning_rate": 2.106412021074679e-06, "logits/chosen": -2.8636393547058105, "logits/rejected": -2.2828097343444824, "logps/chosen": -131.39312744140625, "logps/rejected": -909.0172119140625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7992933392524719, "rewards/margins": 7.905985355377197, "rewards/rejected": -8.705278396606445, "step": 49730 }, { "epoch": 0.6, "learning_rate": 2.1053804617751334e-06, "logits/chosen": -2.8348236083984375, "logits/rejected": -2.2536063194274902, "logps/chosen": -126.40777587890625, "logps/rejected": -1003.9402465820312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8243832588195801, "rewards/margins": 8.831671714782715, "rewards/rejected": -9.65605640411377, "step": 49740 }, { "epoch": 0.6, "learning_rate": 2.104348971375063e-06, "logits/chosen": -2.862208366394043, "logits/rejected": -2.2976651191711426, "logps/chosen": -123.01216125488281, "logps/rejected": -941.5625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7898241281509399, "rewards/margins": 8.23687744140625, "rewards/rejected": -9.026701927185059, "step": 49750 }, { "epoch": 0.6, "learning_rate": 2.1033175500545644e-06, "logits/chosen": -2.8931922912597656, "logits/rejected": -2.285863161087036, "logps/chosen": -129.12835693359375, "logps/rejected": -1003.3690185546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7515554428100586, "rewards/margins": 8.863495826721191, "rewards/rejected": -9.61505126953125, "step": 49760 }, { "epoch": 0.6, "learning_rate": 2.102286197993719e-06, "logits/chosen": -2.889169931411743, "logits/rejected": -2.3615267276763916, "logps/chosen": -113.91606140136719, "logps/rejected": -994.6424560546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6639219522476196, "rewards/margins": 8.900958061218262, "rewards/rejected": -9.56488037109375, "step": 49770 }, { "epoch": 0.6, "learning_rate": 2.1012549153725985e-06, "logits/chosen": -2.8402023315429688, "logits/rejected": -2.2196662425994873, "logps/chosen": -117.65869140625, "logps/rejected": -898.7803955078125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.7223725318908691, "rewards/margins": 7.880214691162109, "rewards/rejected": -8.60258674621582, "step": 49780 }, { "epoch": 0.6, "learning_rate": 2.1002237023712635e-06, "logits/chosen": -2.837576150894165, "logits/rejected": -2.2712855339050293, "logps/chosen": -115.08719635009766, "logps/rejected": -959.8050537109375, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": -0.7049189805984497, "rewards/margins": 8.509757995605469, "rewards/rejected": -9.214676856994629, "step": 49790 }, { "epoch": 0.6, "learning_rate": 2.099192559169759e-06, "logits/chosen": -2.8400349617004395, "logits/rejected": -2.243941307067871, "logps/chosen": -118.74613952636719, "logps/rejected": -934.3557739257812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6682634949684143, "rewards/margins": 8.274877548217773, "rewards/rejected": -8.943140029907227, "step": 49800 }, { "epoch": 0.6, "learning_rate": 2.09816148594812e-06, "logits/chosen": -2.9076461791992188, "logits/rejected": -2.1117568016052246, "logps/chosen": -159.64735412597656, "logps/rejected": -1143.083740234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9243261218070984, "rewards/margins": 10.102843284606934, "rewards/rejected": -11.027168273925781, "step": 49810 }, { "epoch": 0.6, "learning_rate": 2.09713048288637e-06, "logits/chosen": -2.858686923980713, "logits/rejected": -2.4252655506134033, "logps/chosen": -138.799560546875, "logps/rejected": -955.4025268554688, "loss": 0.1606, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9551347494125366, "rewards/margins": 8.21275520324707, "rewards/rejected": -9.167889595031738, "step": 49820 }, { "epoch": 0.6, "learning_rate": 2.096099550164519e-06, "logits/chosen": -2.8491287231445312, "logits/rejected": -2.3367929458618164, "logps/chosen": -112.87660217285156, "logps/rejected": -976.8936767578125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6469458341598511, "rewards/margins": 8.736974716186523, "rewards/rejected": -9.383920669555664, "step": 49830 }, { "epoch": 0.6, "learning_rate": 2.0950686879625645e-06, "logits/chosen": -2.8578882217407227, "logits/rejected": -2.2428689002990723, "logps/chosen": -114.80838775634766, "logps/rejected": -987.0771484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6967741847038269, "rewards/margins": 8.782646179199219, "rewards/rejected": -9.479421615600586, "step": 49840 }, { "epoch": 0.6, "learning_rate": 2.0940378964604924e-06, "logits/chosen": -2.8173413276672363, "logits/rejected": -2.4687864780426025, "logps/chosen": -92.30766296386719, "logps/rejected": -918.4187622070312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5454164743423462, "rewards/margins": 8.26107120513916, "rewards/rejected": -8.806487083435059, "step": 49850 }, { "epoch": 0.6, "learning_rate": 2.0930071758382763e-06, "logits/chosen": -2.8899433612823486, "logits/rejected": -2.3302669525146484, "logps/chosen": -112.40040588378906, "logps/rejected": -874.5316162109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.684941828250885, "rewards/margins": 7.688589572906494, "rewards/rejected": -8.37353229522705, "step": 49860 }, { "epoch": 0.6, "learning_rate": 2.0919765262758767e-06, "logits/chosen": -2.863722324371338, "logits/rejected": -2.587845802307129, "logps/chosen": -76.61276245117188, "logps/rejected": -796.0565795898438, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.41222041845321655, "rewards/margins": 7.170963287353516, "rewards/rejected": -7.583184242248535, "step": 49870 }, { "epoch": 0.6, "learning_rate": 2.0909459479532425e-06, "logits/chosen": -2.856489896774292, "logits/rejected": -2.274144172668457, "logps/chosen": -141.3462371826172, "logps/rejected": -970.3421020507812, "loss": 0.1258, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9909381866455078, "rewards/margins": 8.321151733398438, "rewards/rejected": -9.312089920043945, "step": 49880 }, { "epoch": 0.6, "learning_rate": 2.0899154410503096e-06, "logits/chosen": -2.831613779067993, "logits/rejected": -2.2309207916259766, "logps/chosen": -129.55210876464844, "logps/rejected": -904.1101684570312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8276799321174622, "rewards/margins": 7.814260005950928, "rewards/rejected": -8.641939163208008, "step": 49890 }, { "epoch": 0.6, "learning_rate": 2.088885005747002e-06, "logits/chosen": -2.8404054641723633, "logits/rejected": -2.351398468017578, "logps/chosen": -107.999755859375, "logps/rejected": -913.0486450195312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6781479120254517, "rewards/margins": 8.086305618286133, "rewards/rejected": -8.764453887939453, "step": 49900 }, { "epoch": 0.6, "learning_rate": 2.08785464222323e-06, "logits/chosen": -2.8692617416381836, "logits/rejected": -2.270622968673706, "logps/chosen": -115.3949203491211, "logps/rejected": -926.5587158203125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7153729200363159, "rewards/margins": 8.173931121826172, "rewards/rejected": -8.889303207397461, "step": 49910 }, { "epoch": 0.6, "learning_rate": 2.0868243506588926e-06, "logits/chosen": -2.897036075592041, "logits/rejected": -2.496445655822754, "logps/chosen": -100.89656829833984, "logps/rejected": -845.0798950195312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6103211641311646, "rewards/margins": 7.471414089202881, "rewards/rejected": -8.081735610961914, "step": 49920 }, { "epoch": 0.6, "learning_rate": 2.085794131233877e-06, "logits/chosen": -2.8601889610290527, "logits/rejected": -2.044879674911499, "logps/chosen": -160.434326171875, "logps/rejected": -1011.8668823242188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0892698764801025, "rewards/margins": 8.637165069580078, "rewards/rejected": -9.726434707641602, "step": 49930 }, { "epoch": 0.6, "learning_rate": 2.084763984128055e-06, "logits/chosen": -2.8786513805389404, "logits/rejected": -2.250396728515625, "logps/chosen": -114.837158203125, "logps/rejected": -959.8543090820312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6885628700256348, "rewards/margins": 8.522331237792969, "rewards/rejected": -9.210893630981445, "step": 49940 }, { "epoch": 0.6, "learning_rate": 2.0837339095212884e-06, "logits/chosen": -2.8672313690185547, "logits/rejected": -2.28120756149292, "logps/chosen": -125.48577880859375, "logps/rejected": -1015.3485107421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7589230537414551, "rewards/margins": 9.001437187194824, "rewards/rejected": -9.760360717773438, "step": 49950 }, { "epoch": 0.6, "learning_rate": 2.0827039075934252e-06, "logits/chosen": -2.907209873199463, "logits/rejected": -2.449974536895752, "logps/chosen": -120.5575942993164, "logps/rejected": -930.146484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7078810930252075, "rewards/margins": 8.213069915771484, "rewards/rejected": -8.920949935913086, "step": 49960 }, { "epoch": 0.6, "learning_rate": 2.0816739785243004e-06, "logits/chosen": -2.9041595458984375, "logits/rejected": -2.6176202297210693, "logps/chosen": -76.89488220214844, "logps/rejected": -817.6107788085938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.41539448499679565, "rewards/margins": 7.396108150482178, "rewards/rejected": -7.811502933502197, "step": 49970 }, { "epoch": 0.6, "learning_rate": 2.080644122493737e-06, "logits/chosen": -2.8862240314483643, "logits/rejected": -2.2683939933776855, "logps/chosen": -118.62274169921875, "logps/rejected": -875.1776123046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6999952793121338, "rewards/margins": 7.675304412841797, "rewards/rejected": -8.375300407409668, "step": 49980 }, { "epoch": 0.6, "learning_rate": 2.0796143396815456e-06, "logits/chosen": -2.8970046043395996, "logits/rejected": -1.9812628030776978, "logps/chosen": -160.38624572753906, "logps/rejected": -1220.5596923828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.065582513809204, "rewards/margins": 10.718584060668945, "rewards/rejected": -11.784165382385254, "step": 49990 }, { "epoch": 0.6, "learning_rate": 2.078584630267523e-06, "logits/chosen": -2.870900869369507, "logits/rejected": -2.501465320587158, "logps/chosen": -93.6998519897461, "logps/rejected": -903.9478759765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5144747495651245, "rewards/margins": 8.136292457580566, "rewards/rejected": -8.65076732635498, "step": 50000 }, { "epoch": 0.6, "learning_rate": 2.0775549944314526e-06, "logits/chosen": -2.8578438758850098, "logits/rejected": -2.2602181434631348, "logps/chosen": -137.35092163085938, "logps/rejected": -906.4441528320312, "loss": 0.1111, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9218883514404297, "rewards/margins": 7.757876396179199, "rewards/rejected": -8.679764747619629, "step": 50010 }, { "epoch": 0.6, "learning_rate": 2.076525432353107e-06, "logits/chosen": -2.8588883876800537, "logits/rejected": -2.276306629180908, "logps/chosen": -127.4945068359375, "logps/rejected": -1049.7073974609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7496849298477173, "rewards/margins": 9.349034309387207, "rewards/rejected": -10.098719596862793, "step": 50020 }, { "epoch": 0.6, "learning_rate": 2.075495944212245e-06, "logits/chosen": -2.8814873695373535, "logits/rejected": -2.2478668689727783, "logps/chosen": -139.9740753173828, "logps/rejected": -1053.1011962890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.881156325340271, "rewards/margins": 9.244280815124512, "rewards/rejected": -10.125436782836914, "step": 50030 }, { "epoch": 0.6, "learning_rate": 2.0744665301886124e-06, "logits/chosen": -2.8871243000030518, "logits/rejected": -2.1767466068267822, "logps/chosen": -153.28274536132812, "logps/rejected": -1078.57275390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9841066598892212, "rewards/margins": 9.384757041931152, "rewards/rejected": -10.368864059448242, "step": 50040 }, { "epoch": 0.6, "learning_rate": 2.073437190461942e-06, "logits/chosen": -2.899606227874756, "logits/rejected": -2.3149476051330566, "logps/chosen": -120.21578216552734, "logps/rejected": -925.7244262695312, "loss": 0.0855, "rewards/accuracies": 1.0, "rewards/chosen": -0.7451857328414917, "rewards/margins": 8.113802909851074, "rewards/rejected": -8.858987808227539, "step": 50050 }, { "epoch": 0.6, "learning_rate": 2.0724079252119527e-06, "logits/chosen": -2.8523240089416504, "logits/rejected": -2.383143663406372, "logps/chosen": -110.89640808105469, "logps/rejected": -900.2552490234375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6804787516593933, "rewards/margins": 7.933667182922363, "rewards/rejected": -8.61414623260498, "step": 50060 }, { "epoch": 0.6, "learning_rate": 2.071378734618352e-06, "logits/chosen": -2.8458950519561768, "logits/rejected": -2.226001024246216, "logps/chosen": -144.65103149414062, "logps/rejected": -986.4910278320312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9194129705429077, "rewards/margins": 8.554585456848145, "rewards/rejected": -9.4739990234375, "step": 50070 }, { "epoch": 0.6, "learning_rate": 2.070349618860835e-06, "logits/chosen": -2.874264717102051, "logits/rejected": -2.4936416149139404, "logps/chosen": -128.04376220703125, "logps/rejected": -817.2354736328125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.833141028881073, "rewards/margins": 6.960446357727051, "rewards/rejected": -7.793586730957031, "step": 50080 }, { "epoch": 0.6, "learning_rate": 2.0693205781190803e-06, "logits/chosen": -2.87144136428833, "logits/rejected": -2.3509409427642822, "logps/chosen": -105.4869384765625, "logps/rejected": -966.0877685546875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5789284110069275, "rewards/margins": 8.70228385925293, "rewards/rejected": -9.281211853027344, "step": 50090 }, { "epoch": 0.6, "learning_rate": 2.0682916125727567e-06, "logits/chosen": -2.853429079055786, "logits/rejected": -2.136467456817627, "logps/chosen": -157.46575927734375, "logps/rejected": -1040.429931640625, "loss": 0.1084, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0315061807632446, "rewards/margins": 8.980621337890625, "rewards/rejected": -10.012128829956055, "step": 50100 }, { "epoch": 0.6, "learning_rate": 2.067262722401519e-06, "logits/chosen": -2.899390459060669, "logits/rejected": -2.257349729537964, "logps/chosen": -110.44893646240234, "logps/rejected": -924.9495239257812, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6151901483535767, "rewards/margins": 8.226401329040527, "rewards/rejected": -8.841591835021973, "step": 50110 }, { "epoch": 0.6, "learning_rate": 2.0662339077850073e-06, "logits/chosen": -2.864344835281372, "logits/rejected": -2.3906826972961426, "logps/chosen": -96.18904876708984, "logps/rejected": -866.6280517578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4742717742919922, "rewards/margins": 7.802706718444824, "rewards/rejected": -8.276979446411133, "step": 50120 }, { "epoch": 0.6, "learning_rate": 2.0652051689028506e-06, "logits/chosen": -2.896069049835205, "logits/rejected": -2.249966859817505, "logps/chosen": -93.05070495605469, "logps/rejected": -956.6893310546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.46467381715774536, "rewards/margins": 8.721010208129883, "rewards/rejected": -9.185684204101562, "step": 50130 }, { "epoch": 0.6, "learning_rate": 2.0641765059346643e-06, "logits/chosen": -2.830960512161255, "logits/rejected": -2.311753511428833, "logps/chosen": -135.10340881347656, "logps/rejected": -891.7341918945312, "loss": 0.1609, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8830839991569519, "rewards/margins": 7.646542549133301, "rewards/rejected": -8.52962589263916, "step": 50140 }, { "epoch": 0.6, "learning_rate": 2.0631479190600496e-06, "logits/chosen": -2.8690638542175293, "logits/rejected": -2.5061087608337402, "logps/chosen": -90.7273178100586, "logps/rejected": -926.3226318359375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.46764713525772095, "rewards/margins": 8.40416431427002, "rewards/rejected": -8.871811866760254, "step": 50150 }, { "epoch": 0.6, "learning_rate": 2.062119408458595e-06, "logits/chosen": -2.885981798171997, "logits/rejected": -2.23828387260437, "logps/chosen": -121.46797180175781, "logps/rejected": -985.2061767578125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6697744131088257, "rewards/margins": 8.786861419677734, "rewards/rejected": -9.456636428833008, "step": 50160 }, { "epoch": 0.6, "learning_rate": 2.061090974309875e-06, "logits/chosen": -2.8684723377227783, "logits/rejected": -2.4326634407043457, "logps/chosen": -98.43281555175781, "logps/rejected": -877.1231689453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.542241096496582, "rewards/margins": 7.844796657562256, "rewards/rejected": -8.38703727722168, "step": 50170 }, { "epoch": 0.6, "learning_rate": 2.0600626167934525e-06, "logits/chosen": -2.8885855674743652, "logits/rejected": -2.2981865406036377, "logps/chosen": -109.0622329711914, "logps/rejected": -948.7545166015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6263965368270874, "rewards/margins": 8.45483684539795, "rewards/rejected": -9.081233024597168, "step": 50180 }, { "epoch": 0.6, "learning_rate": 2.0590343360888747e-06, "logits/chosen": -2.8834569454193115, "logits/rejected": -2.354560375213623, "logps/chosen": -102.52980041503906, "logps/rejected": -888.9775390625, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.5757171511650085, "rewards/margins": 7.92593240737915, "rewards/rejected": -8.501649856567383, "step": 50190 }, { "epoch": 0.6, "learning_rate": 2.058006132375677e-06, "logits/chosen": -2.9076600074768066, "logits/rejected": -2.335919141769409, "logps/chosen": -116.20780181884766, "logps/rejected": -854.0950927734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6742339134216309, "rewards/margins": 7.470940589904785, "rewards/rejected": -8.145174026489258, "step": 50200 }, { "epoch": 0.6, "learning_rate": 2.056978005833382e-06, "logits/chosen": -2.9169344902038574, "logits/rejected": -2.380953311920166, "logps/chosen": -101.46134185791016, "logps/rejected": -917.4461059570312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5719126462936401, "rewards/margins": 8.2219820022583, "rewards/rejected": -8.793895721435547, "step": 50210 }, { "epoch": 0.6, "learning_rate": 2.0559499566414958e-06, "logits/chosen": -2.8389124870300293, "logits/rejected": -2.264735460281372, "logps/chosen": -120.73857116699219, "logps/rejected": -950.4625854492188, "loss": 0.1092, "rewards/accuracies": 1.0, "rewards/chosen": -0.7274594306945801, "rewards/margins": 8.381449699401855, "rewards/rejected": -9.108909606933594, "step": 50220 }, { "epoch": 0.6, "learning_rate": 2.0549219849795137e-06, "logits/chosen": -2.895082712173462, "logits/rejected": -2.4606170654296875, "logps/chosen": -93.68914794921875, "logps/rejected": -901.2611083984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.49221834540367126, "rewards/margins": 8.133739471435547, "rewards/rejected": -8.625957489013672, "step": 50230 }, { "epoch": 0.6, "learning_rate": 2.0538940910269172e-06, "logits/chosen": -2.8716893196105957, "logits/rejected": -2.5853307247161865, "logps/chosen": -84.88626861572266, "logps/rejected": -723.5825805664062, "loss": 0.0986, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.45338934659957886, "rewards/margins": 6.410855770111084, "rewards/rejected": -6.8642449378967285, "step": 50240 }, { "epoch": 0.6, "learning_rate": 2.052866274963174e-06, "logits/chosen": -2.8987460136413574, "logits/rejected": -2.259783983230591, "logps/chosen": -103.66780853271484, "logps/rejected": -946.9108276367188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5078778266906738, "rewards/margins": 8.56900405883789, "rewards/rejected": -9.076881408691406, "step": 50250 }, { "epoch": 0.6, "learning_rate": 2.0518385369677364e-06, "logits/chosen": -2.8629238605499268, "logits/rejected": -2.297945261001587, "logps/chosen": -119.73722839355469, "logps/rejected": -924.2449951171875, "loss": 0.1085, "rewards/accuracies": 1.0, "rewards/chosen": -0.6713045835494995, "rewards/margins": 8.174436569213867, "rewards/rejected": -8.845741271972656, "step": 50260 }, { "epoch": 0.6, "learning_rate": 2.050810877220046e-06, "logits/chosen": -2.8495688438415527, "logits/rejected": -2.2295823097229004, "logps/chosen": -120.97286224365234, "logps/rejected": -1005.0675048828125, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -0.6566025614738464, "rewards/margins": 8.972635269165039, "rewards/rejected": -9.629237174987793, "step": 50270 }, { "epoch": 0.6, "learning_rate": 2.049783295899529e-06, "logits/chosen": -2.8778419494628906, "logits/rejected": -2.2200863361358643, "logps/chosen": -106.8829116821289, "logps/rejected": -959.1858520507812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5447089076042175, "rewards/margins": 8.657339096069336, "rewards/rejected": -9.202047348022461, "step": 50280 }, { "epoch": 0.6, "learning_rate": 2.048755793185598e-06, "logits/chosen": -2.856827735900879, "logits/rejected": -1.9476597309112549, "logps/chosen": -132.00779724121094, "logps/rejected": -1063.725830078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7627770304679871, "rewards/margins": 9.461420059204102, "rewards/rejected": -10.22419548034668, "step": 50290 }, { "epoch": 0.6, "learning_rate": 2.047728369257652e-06, "logits/chosen": -2.9123637676239014, "logits/rejected": -2.5228209495544434, "logps/chosen": -79.57996368408203, "logps/rejected": -813.9921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3845826983451843, "rewards/margins": 7.372744560241699, "rewards/rejected": -7.757327079772949, "step": 50300 }, { "epoch": 0.6, "learning_rate": 2.0467010242950768e-06, "logits/chosen": -2.8510546684265137, "logits/rejected": -1.973860502243042, "logps/chosen": -139.84744262695312, "logps/rejected": -1078.5997314453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9005715250968933, "rewards/margins": 9.47758960723877, "rewards/rejected": -10.378162384033203, "step": 50310 }, { "epoch": 0.6, "learning_rate": 2.045673758477244e-06, "logits/chosen": -2.8609213829040527, "logits/rejected": -2.2555859088897705, "logps/chosen": -132.49807739257812, "logps/rejected": -939.4420166015625, "loss": 0.1141, "rewards/accuracies": 1.0, "rewards/chosen": -0.8552656173706055, "rewards/margins": 8.132063865661621, "rewards/rejected": -8.987329483032227, "step": 50320 }, { "epoch": 0.6, "learning_rate": 2.0446465719835102e-06, "logits/chosen": -2.9385275840759277, "logits/rejected": -2.5045018196105957, "logps/chosen": -97.48069763183594, "logps/rejected": -948.2546997070312, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.4988032877445221, "rewards/margins": 8.593147277832031, "rewards/rejected": -9.091949462890625, "step": 50330 }, { "epoch": 0.6, "learning_rate": 2.0436194649932208e-06, "logits/chosen": -2.872246265411377, "logits/rejected": -2.267517566680908, "logps/chosen": -128.0910186767578, "logps/rejected": -973.0125122070312, "loss": 0.0883, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7540913224220276, "rewards/margins": 8.579221725463867, "rewards/rejected": -9.33331298828125, "step": 50340 }, { "epoch": 0.6, "learning_rate": 2.042592437685706e-06, "logits/chosen": -2.903812885284424, "logits/rejected": -2.651803970336914, "logps/chosen": -71.8205337524414, "logps/rejected": -861.2568359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.322885125875473, "rewards/margins": 7.922338962554932, "rewards/rejected": -8.245223045349121, "step": 50350 }, { "epoch": 0.6, "learning_rate": 2.0415654902402804e-06, "logits/chosen": -2.9171957969665527, "logits/rejected": -2.382978916168213, "logps/chosen": -103.82814025878906, "logps/rejected": -925.8728637695312, "loss": 0.113, "rewards/accuracies": 1.0, "rewards/chosen": -0.5187405347824097, "rewards/margins": 8.351099967956543, "rewards/rejected": -8.869840621948242, "step": 50360 }, { "epoch": 0.6, "learning_rate": 2.0405386228362467e-06, "logits/chosen": -2.906780958175659, "logits/rejected": -2.6245200634002686, "logps/chosen": -70.15863800048828, "logps/rejected": -811.3997802734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3012312054634094, "rewards/margins": 7.44326639175415, "rewards/rejected": -7.744497776031494, "step": 50370 }, { "epoch": 0.6, "learning_rate": 2.0395118356528942e-06, "logits/chosen": -2.8524763584136963, "logits/rejected": -2.3389458656311035, "logps/chosen": -107.30928802490234, "logps/rejected": -950.4244995117188, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5966057777404785, "rewards/margins": 8.53236198425293, "rewards/rejected": -9.128968238830566, "step": 50380 }, { "epoch": 0.6, "learning_rate": 2.0384851288694957e-06, "logits/chosen": -2.853217601776123, "logits/rejected": -2.3101813793182373, "logps/chosen": -99.57221984863281, "logps/rejected": -886.2722778320312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5625537037849426, "rewards/margins": 7.916535377502441, "rewards/rejected": -8.479089736938477, "step": 50390 }, { "epoch": 0.6, "learning_rate": 2.0374585026653118e-06, "logits/chosen": -2.895062208175659, "logits/rejected": -2.2826716899871826, "logps/chosen": -97.9295654296875, "logps/rejected": -882.9309692382812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4901720881462097, "rewards/margins": 7.942175388336182, "rewards/rejected": -8.432347297668457, "step": 50400 }, { "epoch": 0.6, "learning_rate": 2.0364319572195894e-06, "logits/chosen": -2.9180760383605957, "logits/rejected": -2.5873372554779053, "logps/chosen": -90.37451171875, "logps/rejected": -796.2142333984375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5251516103744507, "rewards/margins": 7.069859981536865, "rewards/rejected": -7.5950117111206055, "step": 50410 }, { "epoch": 0.6, "learning_rate": 2.035405492711559e-06, "logits/chosen": -2.924654006958008, "logits/rejected": -2.2266898155212402, "logps/chosen": -114.3722152709961, "logps/rejected": -1020.7625122070312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5910651087760925, "rewards/margins": 9.214274406433105, "rewards/rejected": -9.805339813232422, "step": 50420 }, { "epoch": 0.6, "learning_rate": 2.0343791093204392e-06, "logits/chosen": -2.8697428703308105, "logits/rejected": -2.1769423484802246, "logps/chosen": -115.24346923828125, "logps/rejected": -951.4993896484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6200100779533386, "rewards/margins": 8.502991676330566, "rewards/rejected": -9.123001098632812, "step": 50430 }, { "epoch": 0.6, "learning_rate": 2.0333528072254335e-06, "logits/chosen": -2.8252930641174316, "logits/rejected": -2.150974988937378, "logps/chosen": -117.29862213134766, "logps/rejected": -920.6268310546875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.637033998966217, "rewards/margins": 8.178937911987305, "rewards/rejected": -8.815972328186035, "step": 50440 }, { "epoch": 0.6, "learning_rate": 2.032326586605732e-06, "logits/chosen": -2.9256131649017334, "logits/rejected": -2.5657124519348145, "logps/chosen": -78.42304992675781, "logps/rejected": -830.86767578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.35288819670677185, "rewards/margins": 7.563179969787598, "rewards/rejected": -7.916066646575928, "step": 50450 }, { "epoch": 0.6, "learning_rate": 2.0313004476405098e-06, "logits/chosen": -2.8596341609954834, "logits/rejected": -2.006237506866455, "logps/chosen": -168.95864868164062, "logps/rejected": -1099.66015625, "loss": 0.1336, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.015882134437561, "rewards/margins": 9.549473762512207, "rewards/rejected": -10.56535530090332, "step": 50460 }, { "epoch": 0.6, "learning_rate": 2.0302743905089274e-06, "logits/chosen": -2.9042820930480957, "logits/rejected": -2.3735878467559814, "logps/chosen": -92.7354507446289, "logps/rejected": -856.7423095703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4863322675228119, "rewards/margins": 7.69149923324585, "rewards/rejected": -8.177831649780273, "step": 50470 }, { "epoch": 0.6, "learning_rate": 2.029248415390132e-06, "logits/chosen": -2.8972480297088623, "logits/rejected": -2.273681402206421, "logps/chosen": -154.63641357421875, "logps/rejected": -964.3927001953125, "loss": 0.1667, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.989449143409729, "rewards/margins": 8.238846778869629, "rewards/rejected": -9.228296279907227, "step": 50480 }, { "epoch": 0.6, "learning_rate": 2.0282225224632555e-06, "logits/chosen": -2.8773581981658936, "logits/rejected": -2.09063458442688, "logps/chosen": -124.32574462890625, "logps/rejected": -1058.463134765625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7550703287124634, "rewards/margins": 9.425910949707031, "rewards/rejected": -10.180980682373047, "step": 50490 }, { "epoch": 0.6, "learning_rate": 2.027196711907417e-06, "logits/chosen": -2.8539092540740967, "logits/rejected": -2.3571999073028564, "logps/chosen": -119.2994613647461, "logps/rejected": -856.5211181640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7601064443588257, "rewards/margins": 7.425313472747803, "rewards/rejected": -8.185419082641602, "step": 50500 }, { "epoch": 0.6, "learning_rate": 2.0261709839017183e-06, "logits/chosen": -2.862675428390503, "logits/rejected": -2.2368035316467285, "logps/chosen": -102.44105529785156, "logps/rejected": -947.4456176757812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.578548014163971, "rewards/margins": 8.524806022644043, "rewards/rejected": -9.103354454040527, "step": 50510 }, { "epoch": 0.6, "learning_rate": 2.0251453386252507e-06, "logits/chosen": -2.8895697593688965, "logits/rejected": -2.20819354057312, "logps/chosen": -115.160888671875, "logps/rejected": -934.61767578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6465026140213013, "rewards/margins": 8.299081802368164, "rewards/rejected": -8.945585250854492, "step": 50520 }, { "epoch": 0.6, "learning_rate": 2.0241197762570885e-06, "logits/chosen": -2.845149517059326, "logits/rejected": -2.2481777667999268, "logps/chosen": -103.48942565917969, "logps/rejected": -895.4576416015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5642345547676086, "rewards/margins": 8.003677368164062, "rewards/rejected": -8.567912101745605, "step": 50530 }, { "epoch": 0.6, "learning_rate": 2.023094296976291e-06, "logits/chosen": -2.84843373298645, "logits/rejected": -2.356870174407959, "logps/chosen": -102.88462829589844, "logps/rejected": -841.8526611328125, "loss": 0.0989, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5988581776618958, "rewards/margins": 7.452398777008057, "rewards/rejected": -8.051257133483887, "step": 50540 }, { "epoch": 0.61, "learning_rate": 2.0220689009619045e-06, "logits/chosen": -2.8801732063293457, "logits/rejected": -2.333868980407715, "logps/chosen": -110.2883071899414, "logps/rejected": -928.3516845703125, "loss": 0.0982, "rewards/accuracies": 1.0, "rewards/chosen": -0.6363421678543091, "rewards/margins": 8.247771263122559, "rewards/rejected": -8.884114265441895, "step": 50550 }, { "epoch": 0.61, "learning_rate": 2.02104358839296e-06, "logits/chosen": -2.9300498962402344, "logits/rejected": -2.4888436794281006, "logps/chosen": -76.32130432128906, "logps/rejected": -881.107421875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.34796619415283203, "rewards/margins": 8.092443466186523, "rewards/rejected": -8.440411567687988, "step": 50560 }, { "epoch": 0.61, "learning_rate": 2.0200183594484763e-06, "logits/chosen": -2.8681533336639404, "logits/rejected": -2.5924198627471924, "logps/chosen": -69.4319839477539, "logps/rejected": -775.9595947265625, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -0.2977755665779114, "rewards/margins": 7.088636875152588, "rewards/rejected": -7.38641357421875, "step": 50570 }, { "epoch": 0.61, "learning_rate": 2.018993214307453e-06, "logits/chosen": -2.829127311706543, "logits/rejected": -2.2598068714141846, "logps/chosen": -110.5269546508789, "logps/rejected": -963.4403076171875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6082953810691833, "rewards/margins": 8.635478019714355, "rewards/rejected": -9.2437744140625, "step": 50580 }, { "epoch": 0.61, "learning_rate": 2.0179681531488783e-06, "logits/chosen": -2.8478198051452637, "logits/rejected": -2.1183667182922363, "logps/chosen": -139.5872039794922, "logps/rejected": -1039.3204345703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8042076826095581, "rewards/margins": 9.164440155029297, "rewards/rejected": -9.968648910522461, "step": 50590 }, { "epoch": 0.61, "learning_rate": 2.016943176151726e-06, "logits/chosen": -2.9327967166900635, "logits/rejected": -2.5539679527282715, "logps/chosen": -76.1171875, "logps/rejected": -830.4893798828125, "loss": 0.1296, "rewards/accuracies": 1.0, "rewards/chosen": -0.3551015257835388, "rewards/margins": 7.580512046813965, "rewards/rejected": -7.935614109039307, "step": 50600 }, { "epoch": 0.61, "learning_rate": 2.015918283494953e-06, "logits/chosen": -2.8720479011535645, "logits/rejected": -2.289320707321167, "logps/chosen": -137.6636505126953, "logps/rejected": -969.85302734375, "loss": 0.2634, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8303556442260742, "rewards/margins": 8.475863456726074, "rewards/rejected": -9.306220054626465, "step": 50610 }, { "epoch": 0.61, "learning_rate": 2.0148934753575035e-06, "logits/chosen": -2.8704235553741455, "logits/rejected": -2.1690261363983154, "logps/chosen": -101.1826400756836, "logps/rejected": -961.3653564453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5243777632713318, "rewards/margins": 8.698970794677734, "rewards/rejected": -9.223348617553711, "step": 50620 }, { "epoch": 0.61, "learning_rate": 2.013868751918306e-06, "logits/chosen": -2.815708875656128, "logits/rejected": -1.9627580642700195, "logps/chosen": -133.29112243652344, "logps/rejected": -994.8040161132812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7103732824325562, "rewards/margins": 8.845527648925781, "rewards/rejected": -9.555902481079102, "step": 50630 }, { "epoch": 0.61, "learning_rate": 2.0128441133562738e-06, "logits/chosen": -2.8819503784179688, "logits/rejected": -2.187971353530884, "logps/chosen": -127.20185852050781, "logps/rejected": -883.8112182617188, "loss": 0.0855, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7665770649909973, "rewards/margins": 7.685021877288818, "rewards/rejected": -8.451600074768066, "step": 50640 }, { "epoch": 0.61, "learning_rate": 2.0118195598503066e-06, "logits/chosen": -2.8525633811950684, "logits/rejected": -2.428128719329834, "logps/chosen": -102.4242935180664, "logps/rejected": -798.9014282226562, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5777639150619507, "rewards/margins": 7.02394962310791, "rewards/rejected": -7.601712703704834, "step": 50650 }, { "epoch": 0.61, "learning_rate": 2.010795091579288e-06, "logits/chosen": -2.9256625175476074, "logits/rejected": -2.3196499347686768, "logps/chosen": -145.5330352783203, "logps/rejected": -880.6101684570312, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9709264039993286, "rewards/margins": 7.443331718444824, "rewards/rejected": -8.41425895690918, "step": 50660 }, { "epoch": 0.61, "learning_rate": 2.0097707087220887e-06, "logits/chosen": -2.8872437477111816, "logits/rejected": -2.4759490489959717, "logps/chosen": -108.89794921875, "logps/rejected": -936.52490234375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.6747750043869019, "rewards/margins": 8.285585403442383, "rewards/rejected": -8.960359573364258, "step": 50670 }, { "epoch": 0.61, "learning_rate": 2.008746411457561e-06, "logits/chosen": -2.878861427307129, "logits/rejected": -2.2887167930603027, "logps/chosen": -96.93983459472656, "logps/rejected": -937.1358642578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5036347508430481, "rewards/margins": 8.479945182800293, "rewards/rejected": -8.983580589294434, "step": 50680 }, { "epoch": 0.61, "learning_rate": 2.0077221999645457e-06, "logits/chosen": -2.901115894317627, "logits/rejected": -2.3763742446899414, "logps/chosen": -97.36279296875, "logps/rejected": -901.9925537109375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5243688821792603, "rewards/margins": 8.117879867553711, "rewards/rejected": -8.642248153686523, "step": 50690 }, { "epoch": 0.61, "learning_rate": 2.006698074421868e-06, "logits/chosen": -2.908539056777954, "logits/rejected": -2.284745216369629, "logps/chosen": -121.00593566894531, "logps/rejected": -986.3016357421875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.7034982442855835, "rewards/margins": 8.751843452453613, "rewards/rejected": -9.455342292785645, "step": 50700 }, { "epoch": 0.61, "learning_rate": 2.0056740350083353e-06, "logits/chosen": -2.8732526302337646, "logits/rejected": -2.516402006149292, "logps/chosen": -94.82818603515625, "logps/rejected": -842.0032958984375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5313889980316162, "rewards/margins": 7.498464107513428, "rewards/rejected": -8.029851913452148, "step": 50710 }, { "epoch": 0.61, "learning_rate": 2.0046500819027437e-06, "logits/chosen": -2.8664183616638184, "logits/rejected": -2.428446054458618, "logps/chosen": -106.27690124511719, "logps/rejected": -934.75390625, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.6243381500244141, "rewards/margins": 8.334521293640137, "rewards/rejected": -8.95885944366455, "step": 50720 }, { "epoch": 0.61, "learning_rate": 2.003626215283872e-06, "logits/chosen": -2.8818297386169434, "logits/rejected": -2.291607618331909, "logps/chosen": -135.02816772460938, "logps/rejected": -937.1944580078125, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.8618043065071106, "rewards/margins": 8.112937927246094, "rewards/rejected": -8.974742889404297, "step": 50730 }, { "epoch": 0.61, "learning_rate": 2.0026024353304845e-06, "logits/chosen": -2.90018892288208, "logits/rejected": -2.449061393737793, "logps/chosen": -90.77003479003906, "logps/rejected": -904.3519287109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.44557490944862366, "rewards/margins": 8.203665733337402, "rewards/rejected": -8.64924144744873, "step": 50740 }, { "epoch": 0.61, "learning_rate": 2.00157874222133e-06, "logits/chosen": -2.8619143962860107, "logits/rejected": -2.3308730125427246, "logps/chosen": -114.0223388671875, "logps/rejected": -914.8125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.658307671546936, "rewards/margins": 8.094015121459961, "rewards/rejected": -8.75232219696045, "step": 50750 }, { "epoch": 0.61, "learning_rate": 2.0005551361351427e-06, "logits/chosen": -2.8749852180480957, "logits/rejected": -2.400688409805298, "logps/chosen": -109.22218322753906, "logps/rejected": -845.3366088867188, "loss": 0.1142, "rewards/accuracies": 1.0, "rewards/chosen": -0.6528666019439697, "rewards/margins": 7.419568061828613, "rewards/rejected": -8.072433471679688, "step": 50760 }, { "epoch": 0.61, "learning_rate": 1.999531617250642e-06, "logits/chosen": -2.8786699771881104, "logits/rejected": -2.2728114128112793, "logps/chosen": -100.41806030273438, "logps/rejected": -942.9220581054688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5278473496437073, "rewards/margins": 8.518209457397461, "rewards/rejected": -9.046056747436523, "step": 50770 }, { "epoch": 0.61, "learning_rate": 1.9985081857465307e-06, "logits/chosen": -2.892836570739746, "logits/rejected": -2.402571439743042, "logps/chosen": -100.37889099121094, "logps/rejected": -834.6520385742188, "loss": 0.1093, "rewards/accuracies": 1.0, "rewards/chosen": -0.5332473516464233, "rewards/margins": 7.442351341247559, "rewards/rejected": -7.975599765777588, "step": 50780 }, { "epoch": 0.61, "learning_rate": 1.997484841801497e-06, "logits/chosen": -2.898707389831543, "logits/rejected": -2.4191782474517822, "logps/chosen": -98.08137512207031, "logps/rejected": -883.115234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5804845094680786, "rewards/margins": 7.886185646057129, "rewards/rejected": -8.466670036315918, "step": 50790 }, { "epoch": 0.61, "learning_rate": 1.996461585594215e-06, "logits/chosen": -2.86018705368042, "logits/rejected": -2.4219040870666504, "logps/chosen": -106.46038818359375, "logps/rejected": -830.5592041015625, "loss": 0.0421, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.619839072227478, "rewards/margins": 7.304350852966309, "rewards/rejected": -7.924190521240234, "step": 50800 }, { "epoch": 0.61, "learning_rate": 1.9954384173033415e-06, "logits/chosen": -2.933321475982666, "logits/rejected": -2.4399094581604004, "logps/chosen": -100.60533142089844, "logps/rejected": -944.8349609375, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4871024191379547, "rewards/margins": 8.575638771057129, "rewards/rejected": -9.06274127960205, "step": 50810 }, { "epoch": 0.61, "learning_rate": 1.9944153371075185e-06, "logits/chosen": -2.862766981124878, "logits/rejected": -2.1347782611846924, "logps/chosen": -130.2900848388672, "logps/rejected": -962.6865234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.752697765827179, "rewards/margins": 8.4878511428833, "rewards/rejected": -9.240549087524414, "step": 50820 }, { "epoch": 0.61, "learning_rate": 1.993392345185374e-06, "logits/chosen": -2.9122636318206787, "logits/rejected": -2.277299165725708, "logps/chosen": -114.74241638183594, "logps/rejected": -941.1642456054688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5840070843696594, "rewards/margins": 8.432655334472656, "rewards/rejected": -9.016661643981934, "step": 50830 }, { "epoch": 0.61, "learning_rate": 1.9923694417155186e-06, "logits/chosen": -2.905513286590576, "logits/rejected": -2.4376747608184814, "logps/chosen": -132.12989807128906, "logps/rejected": -830.3302001953125, "loss": 0.1789, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9008874893188477, "rewards/margins": 7.026568412780762, "rewards/rejected": -7.927455902099609, "step": 50840 }, { "epoch": 0.61, "learning_rate": 1.9913466268765485e-06, "logits/chosen": -2.8608641624450684, "logits/rejected": -2.3902153968811035, "logps/chosen": -89.47029113769531, "logps/rejected": -867.0958251953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.4798739552497864, "rewards/margins": 7.795149803161621, "rewards/rejected": -8.275023460388184, "step": 50850 }, { "epoch": 0.61, "learning_rate": 1.9903239008470445e-06, "logits/chosen": -2.8970069885253906, "logits/rejected": -2.3637874126434326, "logps/chosen": -99.39966583251953, "logps/rejected": -948.828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5420578718185425, "rewards/margins": 8.562250137329102, "rewards/rejected": -9.10430908203125, "step": 50860 }, { "epoch": 0.61, "learning_rate": 1.989301263805572e-06, "logits/chosen": -2.8651809692382812, "logits/rejected": -2.128796339035034, "logps/chosen": -129.18112182617188, "logps/rejected": -1058.0386962890625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7710655927658081, "rewards/margins": 9.404297828674316, "rewards/rejected": -10.175363540649414, "step": 50870 }, { "epoch": 0.61, "learning_rate": 1.988278715930681e-06, "logits/chosen": -2.890350818634033, "logits/rejected": -2.4935102462768555, "logps/chosen": -100.39009094238281, "logps/rejected": -847.4094848632812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5277038812637329, "rewards/margins": 7.5605926513671875, "rewards/rejected": -8.088295936584473, "step": 50880 }, { "epoch": 0.61, "learning_rate": 1.9872562574009046e-06, "logits/chosen": -2.844132661819458, "logits/rejected": -2.1404573917388916, "logps/chosen": -131.89231872558594, "logps/rejected": -1037.450439453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8132607340812683, "rewards/margins": 9.167193412780762, "rewards/rejected": -9.98045539855957, "step": 50890 }, { "epoch": 0.61, "learning_rate": 1.9862338883947617e-06, "logits/chosen": -2.83505916595459, "logits/rejected": -2.2185797691345215, "logps/chosen": -128.39051818847656, "logps/rejected": -1066.3695068359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7391246557235718, "rewards/margins": 9.53538990020752, "rewards/rejected": -10.274514198303223, "step": 50900 }, { "epoch": 0.61, "learning_rate": 1.9852116090907546e-06, "logits/chosen": -2.920152187347412, "logits/rejected": -1.965829610824585, "logps/chosen": -137.78585815429688, "logps/rejected": -1082.143310546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8332370519638062, "rewards/margins": 9.565481185913086, "rewards/rejected": -10.398717880249023, "step": 50910 }, { "epoch": 0.61, "learning_rate": 1.9841894196673716e-06, "logits/chosen": -2.8426365852355957, "logits/rejected": -1.9372791051864624, "logps/chosen": -141.06643676757812, "logps/rejected": -1232.525146484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8692536354064941, "rewards/margins": 11.033952713012695, "rewards/rejected": -11.903206825256348, "step": 50920 }, { "epoch": 0.61, "learning_rate": 1.9831673203030827e-06, "logits/chosen": -2.8499884605407715, "logits/rejected": -2.219583511352539, "logps/chosen": -147.0430908203125, "logps/rejected": -1118.56298828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8842730522155762, "rewards/margins": 9.885384559631348, "rewards/rejected": -10.769657135009766, "step": 50930 }, { "epoch": 0.61, "learning_rate": 1.982145311176344e-06, "logits/chosen": -2.892375946044922, "logits/rejected": -2.2878990173339844, "logps/chosen": -116.39139556884766, "logps/rejected": -921.52783203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6926146745681763, "rewards/margins": 8.127131462097168, "rewards/rejected": -8.819745063781738, "step": 50940 }, { "epoch": 0.61, "learning_rate": 1.9811233924655963e-06, "logits/chosen": -2.8854634761810303, "logits/rejected": -2.2164199352264404, "logps/chosen": -140.6182403564453, "logps/rejected": -1018.5289306640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8694101572036743, "rewards/margins": 8.910770416259766, "rewards/rejected": -9.780181884765625, "step": 50950 }, { "epoch": 0.61, "learning_rate": 1.980101564349263e-06, "logits/chosen": -2.906505584716797, "logits/rejected": -2.2561604976654053, "logps/chosen": -123.5053939819336, "logps/rejected": -1054.740478515625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.736217737197876, "rewards/margins": 9.400546073913574, "rewards/rejected": -10.136763572692871, "step": 50960 }, { "epoch": 0.61, "learning_rate": 1.9790798270057526e-06, "logits/chosen": -2.8877320289611816, "logits/rejected": -2.332078218460083, "logps/chosen": -105.6409683227539, "logps/rejected": -909.6326293945312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6086495518684387, "rewards/margins": 8.093160629272461, "rewards/rejected": -8.70180892944336, "step": 50970 }, { "epoch": 0.61, "learning_rate": 1.9780581806134578e-06, "logits/chosen": -2.861374616622925, "logits/rejected": -2.4132354259490967, "logps/chosen": -102.43321228027344, "logps/rejected": -917.8568115234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6030707955360413, "rewards/margins": 8.191902160644531, "rewards/rejected": -8.794974327087402, "step": 50980 }, { "epoch": 0.61, "learning_rate": 1.9770366253507543e-06, "logits/chosen": -2.868851900100708, "logits/rejected": -2.2296807765960693, "logps/chosen": -122.43524169921875, "logps/rejected": -1022.0774536132812, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.7145793437957764, "rewards/margins": 9.10307788848877, "rewards/rejected": -9.817659378051758, "step": 50990 }, { "epoch": 0.61, "learning_rate": 1.9760151613960045e-06, "logits/chosen": -2.8941705226898193, "logits/rejected": -2.3679282665252686, "logps/chosen": -140.50485229492188, "logps/rejected": -849.9664306640625, "loss": 0.2266, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9752994775772095, "rewards/margins": 7.136685371398926, "rewards/rejected": -8.111984252929688, "step": 51000 }, { "epoch": 0.61, "eval_logits/chosen": -2.865036964416504, "eval_logits/rejected": -1.6744871139526367, "eval_logps/chosen": -256.4992370605469, "eval_logps/rejected": -1148.8468017578125, "eval_loss": 0.0013532256707549095, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.953189492225647, "eval_rewards/margins": 9.068024635314941, "eval_rewards/rejected": -11.021214485168457, "eval_runtime": 1.2168, "eval_samples_per_second": 4.109, "eval_steps_per_second": 2.466, "step": 51000 }, { "epoch": 0.61, "learning_rate": 1.974993788927551e-06, "logits/chosen": -2.8814120292663574, "logits/rejected": -2.2402749061584473, "logps/chosen": -120.06050109863281, "logps/rejected": -933.3014526367188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7028130292892456, "rewards/margins": 8.25236701965332, "rewards/rejected": -8.955178260803223, "step": 51010 }, { "epoch": 0.61, "learning_rate": 1.973972508123725e-06, "logits/chosen": -2.838388442993164, "logits/rejected": -2.4561150074005127, "logps/chosen": -103.21504974365234, "logps/rejected": -849.3604736328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6217228174209595, "rewards/margins": 7.493846893310547, "rewards/rejected": -8.115569114685059, "step": 51020 }, { "epoch": 0.61, "learning_rate": 1.9729513191628375e-06, "logits/chosen": -2.8466544151306152, "logits/rejected": -2.3446145057678223, "logps/chosen": -114.2154769897461, "logps/rejected": -882.16259765625, "loss": 0.1105, "rewards/accuracies": 1.0, "rewards/chosen": -0.7004423141479492, "rewards/margins": 7.741217136383057, "rewards/rejected": -8.441658020019531, "step": 51030 }, { "epoch": 0.61, "learning_rate": 1.9719302222231863e-06, "logits/chosen": -2.8431670665740967, "logits/rejected": -2.3703110218048096, "logps/chosen": -120.13661193847656, "logps/rejected": -890.7874145507812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7019785642623901, "rewards/margins": 7.811505317687988, "rewards/rejected": -8.513484001159668, "step": 51040 }, { "epoch": 0.61, "learning_rate": 1.9709092174830514e-06, "logits/chosen": -2.8930163383483887, "logits/rejected": -2.3566250801086426, "logps/chosen": -110.9451675415039, "logps/rejected": -915.8106689453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6901068091392517, "rewards/margins": 8.08339786529541, "rewards/rejected": -8.773504257202148, "step": 51050 }, { "epoch": 0.61, "learning_rate": 1.9698883051206978e-06, "logits/chosen": -2.853353261947632, "logits/rejected": -2.3065197467803955, "logps/chosen": -161.55642700195312, "logps/rejected": -989.5399169921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.08487868309021, "rewards/margins": 8.406707763671875, "rewards/rejected": -9.491584777832031, "step": 51060 }, { "epoch": 0.61, "learning_rate": 1.9688674853143733e-06, "logits/chosen": -2.9260196685791016, "logits/rejected": -2.312561511993408, "logps/chosen": -109.92787170410156, "logps/rejected": -848.8294067382812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6461072564125061, "rewards/margins": 7.445476531982422, "rewards/rejected": -8.091584205627441, "step": 51070 }, { "epoch": 0.61, "learning_rate": 1.967846758242311e-06, "logits/chosen": -2.8668532371520996, "logits/rejected": -2.092345714569092, "logps/chosen": -148.27883911132812, "logps/rejected": -1027.1474609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9075284004211426, "rewards/margins": 8.962186813354492, "rewards/rejected": -9.869714736938477, "step": 51080 }, { "epoch": 0.61, "learning_rate": 1.966826124082727e-06, "logits/chosen": -2.8287739753723145, "logits/rejected": -2.2997944355010986, "logps/chosen": -110.79378509521484, "logps/rejected": -937.3455200195312, "loss": 0.1664, "rewards/accuracies": 1.0, "rewards/chosen": -0.6479784250259399, "rewards/margins": 8.338296890258789, "rewards/rejected": -8.986274719238281, "step": 51090 }, { "epoch": 0.61, "learning_rate": 1.9658055830138205e-06, "logits/chosen": -2.8477015495300293, "logits/rejected": -2.334623336791992, "logps/chosen": -115.02681732177734, "logps/rejected": -940.00732421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7139445543289185, "rewards/margins": 8.315786361694336, "rewards/rejected": -9.029730796813965, "step": 51100 }, { "epoch": 0.61, "learning_rate": 1.9647851352137752e-06, "logits/chosen": -2.8784310817718506, "logits/rejected": -2.495021343231201, "logps/chosen": -92.31941223144531, "logps/rejected": -860.2694091796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.48941460251808167, "rewards/margins": 7.742430686950684, "rewards/rejected": -8.23184585571289, "step": 51110 }, { "epoch": 0.61, "learning_rate": 1.96376478086076e-06, "logits/chosen": -2.897728443145752, "logits/rejected": -2.5122158527374268, "logps/chosen": -98.68479919433594, "logps/rejected": -860.7312622070312, "loss": 0.1261, "rewards/accuracies": 1.0, "rewards/chosen": -0.5612891316413879, "rewards/margins": 7.66131067276001, "rewards/rejected": -8.222599983215332, "step": 51120 }, { "epoch": 0.61, "learning_rate": 1.9627445201329236e-06, "logits/chosen": -2.89882493019104, "logits/rejected": -2.3756206035614014, "logps/chosen": -107.68072509765625, "logps/rejected": -899.51806640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6587400436401367, "rewards/margins": 7.948716163635254, "rewards/rejected": -8.60745620727539, "step": 51130 }, { "epoch": 0.61, "learning_rate": 1.9617243532084022e-06, "logits/chosen": -2.8583407402038574, "logits/rejected": -2.4063923358917236, "logps/chosen": -98.3011474609375, "logps/rejected": -941.6414794921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5395575761795044, "rewards/margins": 8.489046096801758, "rewards/rejected": -9.028604507446289, "step": 51140 }, { "epoch": 0.61, "learning_rate": 1.9607042802653144e-06, "logits/chosen": -2.88563871383667, "logits/rejected": -2.338369846343994, "logps/chosen": -110.6205062866211, "logps/rejected": -872.3670043945312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.643710732460022, "rewards/margins": 7.6887526512146, "rewards/rejected": -8.332463264465332, "step": 51150 }, { "epoch": 0.61, "learning_rate": 1.9596843014817603e-06, "logits/chosen": -2.825223445892334, "logits/rejected": -2.1791739463806152, "logps/chosen": -131.10379028320312, "logps/rejected": -971.1075439453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.825465202331543, "rewards/margins": 8.491479873657227, "rewards/rejected": -9.31694507598877, "step": 51160 }, { "epoch": 0.61, "learning_rate": 1.958664417035827e-06, "logits/chosen": -2.871706008911133, "logits/rejected": -2.1772265434265137, "logps/chosen": -132.42767333984375, "logps/rejected": -940.3837890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8223785161972046, "rewards/margins": 8.193275451660156, "rewards/rejected": -9.015653610229492, "step": 51170 }, { "epoch": 0.61, "learning_rate": 1.9576446271055826e-06, "logits/chosen": -2.885176181793213, "logits/rejected": -2.176661252975464, "logps/chosen": -134.61036682128906, "logps/rejected": -1034.123291015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8458666801452637, "rewards/margins": 9.098763465881348, "rewards/rejected": -9.944629669189453, "step": 51180 }, { "epoch": 0.61, "learning_rate": 1.956624931869081e-06, "logits/chosen": -2.822495222091675, "logits/rejected": -2.074058771133423, "logps/chosen": -160.22463989257812, "logps/rejected": -963.6602783203125, "loss": 0.1603, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0941426753997803, "rewards/margins": 8.141450881958008, "rewards/rejected": -9.235594749450684, "step": 51190 }, { "epoch": 0.61, "learning_rate": 1.9556053315043564e-06, "logits/chosen": -2.848428726196289, "logits/rejected": -2.1632533073425293, "logps/chosen": -154.7510986328125, "logps/rejected": -1003.8629150390625, "loss": 0.0976, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0299961566925049, "rewards/margins": 8.603516578674316, "rewards/rejected": -9.633512496948242, "step": 51200 }, { "epoch": 0.61, "learning_rate": 1.9545858261894287e-06, "logits/chosen": -2.8552443981170654, "logits/rejected": -2.2414581775665283, "logps/chosen": -113.56075286865234, "logps/rejected": -895.3048706054688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6407123804092407, "rewards/margins": 7.923886299133301, "rewards/rejected": -8.564597129821777, "step": 51210 }, { "epoch": 0.61, "learning_rate": 1.9535664161023016e-06, "logits/chosen": -2.9013729095458984, "logits/rejected": -2.439940929412842, "logps/chosen": -100.97830200195312, "logps/rejected": -856.2906494140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5623136758804321, "rewards/margins": 7.627206325531006, "rewards/rejected": -8.189520835876465, "step": 51220 }, { "epoch": 0.61, "learning_rate": 1.95254710142096e-06, "logits/chosen": -2.8041326999664307, "logits/rejected": -2.032684564590454, "logps/chosen": -130.0994415283203, "logps/rejected": -998.345703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7246134281158447, "rewards/margins": 8.865998268127441, "rewards/rejected": -9.590611457824707, "step": 51230 }, { "epoch": 0.61, "learning_rate": 1.9515278823233743e-06, "logits/chosen": -2.8772854804992676, "logits/rejected": -2.215684413909912, "logps/chosen": -122.3088150024414, "logps/rejected": -1001.2462768554688, "loss": 0.019, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7202790975570679, "rewards/margins": 8.869834899902344, "rewards/rejected": -9.59011459350586, "step": 51240 }, { "epoch": 0.61, "learning_rate": 1.9505087589874973e-06, "logits/chosen": -2.8954851627349854, "logits/rejected": -2.230299949645996, "logps/chosen": -134.2687225341797, "logps/rejected": -985.2294921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7918144464492798, "rewards/margins": 8.650131225585938, "rewards/rejected": -9.441946029663086, "step": 51250 }, { "epoch": 0.61, "learning_rate": 1.9494897315912644e-06, "logits/chosen": -2.8593201637268066, "logits/rejected": -2.1968319416046143, "logps/chosen": -116.96128845214844, "logps/rejected": -959.9949340820312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6747527122497559, "rewards/margins": 8.543025016784668, "rewards/rejected": -9.217778205871582, "step": 51260 }, { "epoch": 0.61, "learning_rate": 1.9484708003125954e-06, "logits/chosen": -2.9280452728271484, "logits/rejected": -2.387681007385254, "logps/chosen": -110.47804260253906, "logps/rejected": -866.9640502929688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6510635018348694, "rewards/margins": 7.6327362060546875, "rewards/rejected": -8.28380012512207, "step": 51270 }, { "epoch": 0.61, "learning_rate": 1.947451965329392e-06, "logits/chosen": -2.8169732093811035, "logits/rejected": -2.2715768814086914, "logps/chosen": -106.70280456542969, "logps/rejected": -991.9739379882812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6204351782798767, "rewards/margins": 8.893434524536133, "rewards/rejected": -9.51386833190918, "step": 51280 }, { "epoch": 0.61, "learning_rate": 1.946433226819542e-06, "logits/chosen": -2.8492157459259033, "logits/rejected": -2.2061519622802734, "logps/chosen": -112.08390045166016, "logps/rejected": -926.3909912109375, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -0.6626687049865723, "rewards/margins": 8.214068412780762, "rewards/rejected": -8.876737594604492, "step": 51290 }, { "epoch": 0.61, "learning_rate": 1.945414584960913e-06, "logits/chosen": -2.881934404373169, "logits/rejected": -2.124484062194824, "logps/chosen": -142.4234619140625, "logps/rejected": -1058.998046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8805004954338074, "rewards/margins": 9.302366256713867, "rewards/rejected": -10.182866096496582, "step": 51300 }, { "epoch": 0.61, "learning_rate": 1.9443960399313573e-06, "logits/chosen": -2.8808071613311768, "logits/rejected": -2.270432949066162, "logps/chosen": -104.08033752441406, "logps/rejected": -964.927734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6120225191116333, "rewards/margins": 8.628682136535645, "rewards/rejected": -9.240704536437988, "step": 51310 }, { "epoch": 0.61, "learning_rate": 1.9433775919087093e-06, "logits/chosen": -2.9201865196228027, "logits/rejected": -2.530726909637451, "logps/chosen": -99.2814712524414, "logps/rejected": -841.3756713867188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5549847483634949, "rewards/margins": 7.470211029052734, "rewards/rejected": -8.025196075439453, "step": 51320 }, { "epoch": 0.61, "learning_rate": 1.942359241070788e-06, "logits/chosen": -2.8410584926605225, "logits/rejected": -2.3911890983581543, "logps/chosen": -83.47514343261719, "logps/rejected": -891.8037109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4397568702697754, "rewards/margins": 8.111627578735352, "rewards/rejected": -8.551383972167969, "step": 51330 }, { "epoch": 0.61, "learning_rate": 1.941340987595395e-06, "logits/chosen": -2.8413093090057373, "logits/rejected": -2.2949538230895996, "logps/chosen": -128.34014892578125, "logps/rejected": -901.7218627929688, "loss": 0.0998, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8290309906005859, "rewards/margins": 7.816580772399902, "rewards/rejected": -8.645611763000488, "step": 51340 }, { "epoch": 0.61, "learning_rate": 1.9403228316603136e-06, "logits/chosen": -2.8636765480041504, "logits/rejected": -1.9555644989013672, "logps/chosen": -151.50672912597656, "logps/rejected": -1064.249267578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9580557942390442, "rewards/margins": 9.290426254272461, "rewards/rejected": -10.248483657836914, "step": 51350 }, { "epoch": 0.61, "learning_rate": 1.9393047734433113e-06, "logits/chosen": -2.8473477363586426, "logits/rejected": -1.8853302001953125, "logps/chosen": -153.9946746826172, "logps/rejected": -987.3995971679688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9599797129631042, "rewards/margins": 8.49832534790039, "rewards/rejected": -9.458306312561035, "step": 51360 }, { "epoch": 0.61, "learning_rate": 1.9382868131221384e-06, "logits/chosen": -2.9061782360076904, "logits/rejected": -2.333091974258423, "logps/chosen": -114.84859466552734, "logps/rejected": -1009.8436279296875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6561306118965149, "rewards/margins": 9.0560941696167, "rewards/rejected": -9.712224960327148, "step": 51370 }, { "epoch": 0.62, "learning_rate": 1.937268950874528e-06, "logits/chosen": -2.867079496383667, "logits/rejected": -2.0405421257019043, "logps/chosen": -123.21732330322266, "logps/rejected": -1007.3186645507812, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.7116074562072754, "rewards/margins": 8.968188285827637, "rewards/rejected": -9.67979621887207, "step": 51380 }, { "epoch": 0.62, "learning_rate": 1.9362511868781955e-06, "logits/chosen": -2.8597750663757324, "logits/rejected": -2.244455337524414, "logps/chosen": -121.39112854003906, "logps/rejected": -938.9730224609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7459288835525513, "rewards/margins": 8.251227378845215, "rewards/rejected": -8.997157096862793, "step": 51390 }, { "epoch": 0.62, "learning_rate": 1.9352335213108404e-06, "logits/chosen": -2.9037129878997803, "logits/rejected": -2.4326863288879395, "logps/chosen": -91.02580261230469, "logps/rejected": -898.4993896484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4864567220211029, "rewards/margins": 8.117650985717773, "rewards/rejected": -8.604107856750488, "step": 51400 }, { "epoch": 0.62, "learning_rate": 1.9342159543501436e-06, "logits/chosen": -2.8987696170806885, "logits/rejected": -2.218137264251709, "logps/chosen": -148.7823486328125, "logps/rejected": -975.8919067382812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9991599321365356, "rewards/margins": 8.362967491149902, "rewards/rejected": -9.362127304077148, "step": 51410 }, { "epoch": 0.62, "learning_rate": 1.9331984861737694e-06, "logits/chosen": -2.884676933288574, "logits/rejected": -2.512819766998291, "logps/chosen": -114.03878021240234, "logps/rejected": -820.0535278320312, "loss": 0.0763, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7250999212265015, "rewards/margins": 7.092695713043213, "rewards/rejected": -7.817795753479004, "step": 51420 }, { "epoch": 0.62, "learning_rate": 1.9321811169593656e-06, "logits/chosen": -2.870069742202759, "logits/rejected": -2.3060097694396973, "logps/chosen": -122.4417495727539, "logps/rejected": -969.12158203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.718610405921936, "rewards/margins": 8.596158981323242, "rewards/rejected": -9.31476879119873, "step": 51430 }, { "epoch": 0.62, "learning_rate": 1.931163846884562e-06, "logits/chosen": -2.885204792022705, "logits/rejected": -2.5829620361328125, "logps/chosen": -78.48597717285156, "logps/rejected": -830.3284301757812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4141305387020111, "rewards/margins": 7.506129264831543, "rewards/rejected": -7.920259952545166, "step": 51440 }, { "epoch": 0.62, "learning_rate": 1.930146676126971e-06, "logits/chosen": -2.9094605445861816, "logits/rejected": -2.289952278137207, "logps/chosen": -109.97147369384766, "logps/rejected": -957.79736328125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6663063168525696, "rewards/margins": 8.530969619750977, "rewards/rejected": -9.197275161743164, "step": 51450 }, { "epoch": 0.62, "learning_rate": 1.929129604864187e-06, "logits/chosen": -2.861837863922119, "logits/rejected": -2.437532663345337, "logps/chosen": -100.18968200683594, "logps/rejected": -910.6737060546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6142778992652893, "rewards/margins": 8.100793838500977, "rewards/rejected": -8.715072631835938, "step": 51460 }, { "epoch": 0.62, "learning_rate": 1.928112633273789e-06, "logits/chosen": -2.866579532623291, "logits/rejected": -2.149822235107422, "logps/chosen": -140.77340698242188, "logps/rejected": -1086.34228515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9418237805366516, "rewards/margins": 9.517274856567383, "rewards/rejected": -10.459097862243652, "step": 51470 }, { "epoch": 0.62, "learning_rate": 1.9270957615333368e-06, "logits/chosen": -2.867372989654541, "logits/rejected": -2.488776683807373, "logps/chosen": -107.9395751953125, "logps/rejected": -825.4093017578125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6542056798934937, "rewards/margins": 7.220338344573975, "rewards/rejected": -7.874543190002441, "step": 51480 }, { "epoch": 0.62, "learning_rate": 1.9260789898203734e-06, "logits/chosen": -2.8604094982147217, "logits/rejected": -2.219691276550293, "logps/chosen": -115.84993743896484, "logps/rejected": -973.2701416015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6979128122329712, "rewards/margins": 8.655961990356445, "rewards/rejected": -9.353874206542969, "step": 51490 }, { "epoch": 0.62, "learning_rate": 1.9250623183124246e-06, "logits/chosen": -2.8673880100250244, "logits/rejected": -2.338620662689209, "logps/chosen": -125.58831787109375, "logps/rejected": -989.0118408203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.771040141582489, "rewards/margins": 8.727742195129395, "rewards/rejected": -9.498781204223633, "step": 51500 }, { "epoch": 0.62, "learning_rate": 1.924045747186998e-06, "logits/chosen": -2.8604588508605957, "logits/rejected": -2.4425089359283447, "logps/chosen": -95.89436340332031, "logps/rejected": -858.611328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4997319281101227, "rewards/margins": 7.711820125579834, "rewards/rejected": -8.211551666259766, "step": 51510 }, { "epoch": 0.62, "learning_rate": 1.9230292766215836e-06, "logits/chosen": -2.8287320137023926, "logits/rejected": -2.4607462882995605, "logps/chosen": -95.03181457519531, "logps/rejected": -918.45068359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5033690333366394, "rewards/margins": 8.30339241027832, "rewards/rejected": -8.8067626953125, "step": 51520 }, { "epoch": 0.62, "learning_rate": 1.922012906793656e-06, "logits/chosen": -2.8749940395355225, "logits/rejected": -2.3912107944488525, "logps/chosen": -100.30132293701172, "logps/rejected": -978.7784423828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5868868231773376, "rewards/margins": 8.802553176879883, "rewards/rejected": -9.389440536499023, "step": 51530 }, { "epoch": 0.62, "learning_rate": 1.9209966378806694e-06, "logits/chosen": -2.883033275604248, "logits/rejected": -2.4737629890441895, "logps/chosen": -92.42390441894531, "logps/rejected": -869.9588012695312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4897168278694153, "rewards/margins": 7.837056636810303, "rewards/rejected": -8.326772689819336, "step": 51540 }, { "epoch": 0.62, "learning_rate": 1.9199804700600613e-06, "logits/chosen": -2.8726966381073, "logits/rejected": -2.1722495555877686, "logps/chosen": -111.73982238769531, "logps/rejected": -1036.0247802734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6491338610649109, "rewards/margins": 9.312735557556152, "rewards/rejected": -9.961870193481445, "step": 51550 }, { "epoch": 0.62, "learning_rate": 1.9189644035092526e-06, "logits/chosen": -2.9057114124298096, "logits/rejected": -2.2403006553649902, "logps/chosen": -119.0486068725586, "logps/rejected": -1091.8023681640625, "loss": 0.1491, "rewards/accuracies": 1.0, "rewards/chosen": -0.6896257996559143, "rewards/margins": 9.83215618133545, "rewards/rejected": -10.521781921386719, "step": 51560 }, { "epoch": 0.62, "learning_rate": 1.9179484384056454e-06, "logits/chosen": -2.8624279499053955, "logits/rejected": -2.4762072563171387, "logps/chosen": -95.2166748046875, "logps/rejected": -841.3435668945312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5479297637939453, "rewards/margins": 7.4828033447265625, "rewards/rejected": -8.030733108520508, "step": 51570 }, { "epoch": 0.62, "learning_rate": 1.9169325749266236e-06, "logits/chosen": -2.917323350906372, "logits/rejected": -2.43658709526062, "logps/chosen": -104.00069427490234, "logps/rejected": -906.9918212890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6369850039482117, "rewards/margins": 8.046832084655762, "rewards/rejected": -8.683816909790039, "step": 51580 }, { "epoch": 0.62, "learning_rate": 1.9159168132495554e-06, "logits/chosen": -2.9034781455993652, "logits/rejected": -2.296485424041748, "logps/chosen": -105.09123229980469, "logps/rejected": -1006.3816528320312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5580759048461914, "rewards/margins": 9.114452362060547, "rewards/rejected": -9.672527313232422, "step": 51590 }, { "epoch": 0.62, "learning_rate": 1.9149011535517894e-06, "logits/chosen": -2.8281426429748535, "logits/rejected": -2.153815984725952, "logps/chosen": -122.39095306396484, "logps/rejected": -1035.6702880859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7023419141769409, "rewards/margins": 9.244964599609375, "rewards/rejected": -9.947306632995605, "step": 51600 }, { "epoch": 0.62, "learning_rate": 1.913885596010657e-06, "logits/chosen": -2.888150691986084, "logits/rejected": -2.3937039375305176, "logps/chosen": -95.82355499267578, "logps/rejected": -808.5006103515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5340455770492554, "rewards/margins": 7.171506404876709, "rewards/rejected": -7.705552577972412, "step": 51610 }, { "epoch": 0.62, "learning_rate": 1.912870140803471e-06, "logits/chosen": -2.934870719909668, "logits/rejected": -2.5827393531799316, "logps/chosen": -115.4486312866211, "logps/rejected": -760.04443359375, "loss": 0.1188, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7500475645065308, "rewards/margins": 6.474267482757568, "rewards/rejected": -7.224314212799072, "step": 51620 }, { "epoch": 0.62, "learning_rate": 1.911854788107528e-06, "logits/chosen": -2.793480634689331, "logits/rejected": -2.052642345428467, "logps/chosen": -123.6926498413086, "logps/rejected": -995.8858642578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7263172268867493, "rewards/margins": 8.822535514831543, "rewards/rejected": -9.548852920532227, "step": 51630 }, { "epoch": 0.62, "learning_rate": 1.9108395381001062e-06, "logits/chosen": -2.90608549118042, "logits/rejected": -2.2014174461364746, "logps/chosen": -107.34733581542969, "logps/rejected": -1018.6321411132812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5818723440170288, "rewards/margins": 9.21892261505127, "rewards/rejected": -9.80079460144043, "step": 51640 }, { "epoch": 0.62, "learning_rate": 1.909824390958464e-06, "logits/chosen": -2.8170218467712402, "logits/rejected": -2.0335967540740967, "logps/chosen": -117.3964614868164, "logps/rejected": -935.8147583007812, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6813747882843018, "rewards/margins": 8.283047676086426, "rewards/rejected": -8.964423179626465, "step": 51650 }, { "epoch": 0.62, "learning_rate": 1.908809346859844e-06, "logits/chosen": -2.8478362560272217, "logits/rejected": -2.2024056911468506, "logps/chosen": -143.09951782226562, "logps/rejected": -1040.6800537109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8947569727897644, "rewards/margins": 9.10374927520752, "rewards/rejected": -9.998506546020508, "step": 51660 }, { "epoch": 0.62, "learning_rate": 1.9077944059814707e-06, "logits/chosen": -2.8519060611724854, "logits/rejected": -2.3386127948760986, "logps/chosen": -94.69999694824219, "logps/rejected": -889.43017578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.510313868522644, "rewards/margins": 7.996304512023926, "rewards/rejected": -8.506617546081543, "step": 51670 }, { "epoch": 0.62, "learning_rate": 1.906779568500549e-06, "logits/chosen": -2.8650307655334473, "logits/rejected": -2.298151969909668, "logps/chosen": -109.7844009399414, "logps/rejected": -912.33251953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6422187089920044, "rewards/margins": 8.085753440856934, "rewards/rejected": -8.727970123291016, "step": 51680 }, { "epoch": 0.62, "learning_rate": 1.9057648345942666e-06, "logits/chosen": -2.9118399620056152, "logits/rejected": -2.518122911453247, "logps/chosen": -77.61094665527344, "logps/rejected": -808.0098266601562, "loss": 0.1266, "rewards/accuracies": 1.0, "rewards/chosen": -0.346000611782074, "rewards/margins": 7.341111183166504, "rewards/rejected": -7.687111854553223, "step": 51690 }, { "epoch": 0.62, "learning_rate": 1.9047502044397934e-06, "logits/chosen": -2.8997480869293213, "logits/rejected": -2.171058177947998, "logps/chosen": -105.32489013671875, "logps/rejected": -1005.0152587890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5704551935195923, "rewards/margins": 9.086477279663086, "rewards/rejected": -9.656932830810547, "step": 51700 }, { "epoch": 0.62, "learning_rate": 1.9037356782142817e-06, "logits/chosen": -2.9059677124023438, "logits/rejected": -2.2909226417541504, "logps/chosen": -104.95430755615234, "logps/rejected": -929.69482421875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6347345113754272, "rewards/margins": 8.283194541931152, "rewards/rejected": -8.917929649353027, "step": 51710 }, { "epoch": 0.62, "learning_rate": 1.902721256094865e-06, "logits/chosen": -2.9042017459869385, "logits/rejected": -2.281236171722412, "logps/chosen": -121.57645416259766, "logps/rejected": -924.4894409179688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7275041341781616, "rewards/margins": 8.12468433380127, "rewards/rejected": -8.852189064025879, "step": 51720 }, { "epoch": 0.62, "learning_rate": 1.9017069382586577e-06, "logits/chosen": -2.8584482669830322, "logits/rejected": -2.4658265113830566, "logps/chosen": -141.72158813476562, "logps/rejected": -843.28759765625, "loss": 0.2624, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9532001614570618, "rewards/margins": 7.10171365737915, "rewards/rejected": -8.054914474487305, "step": 51730 }, { "epoch": 0.62, "learning_rate": 1.9006927248827573e-06, "logits/chosen": -2.845571756362915, "logits/rejected": -2.317620038986206, "logps/chosen": -113.2367172241211, "logps/rejected": -944.3328857421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6452423930168152, "rewards/margins": 8.414565086364746, "rewards/rejected": -9.059808731079102, "step": 51740 }, { "epoch": 0.62, "learning_rate": 1.8996786161442427e-06, "logits/chosen": -2.870173931121826, "logits/rejected": -2.402836799621582, "logps/chosen": -95.97093200683594, "logps/rejected": -868.3609619140625, "loss": 0.0394, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5150820016860962, "rewards/margins": 7.783763885498047, "rewards/rejected": -8.298845291137695, "step": 51750 }, { "epoch": 0.62, "learning_rate": 1.8986646122201747e-06, "logits/chosen": -2.8065130710601807, "logits/rejected": -2.2867276668548584, "logps/chosen": -132.1233367919922, "logps/rejected": -990.88916015625, "loss": 0.0205, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7906633019447327, "rewards/margins": 8.717157363891602, "rewards/rejected": -9.507820129394531, "step": 51760 }, { "epoch": 0.62, "learning_rate": 1.8976507132875949e-06, "logits/chosen": -2.872471332550049, "logits/rejected": -2.4337730407714844, "logps/chosen": -105.343994140625, "logps/rejected": -867.0989990234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6188766956329346, "rewards/margins": 7.664307594299316, "rewards/rejected": -8.283184051513672, "step": 51770 }, { "epoch": 0.62, "learning_rate": 1.8966369195235277e-06, "logits/chosen": -2.8945810794830322, "logits/rejected": -2.415619373321533, "logps/chosen": -103.51170349121094, "logps/rejected": -869.4849853515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.603073000907898, "rewards/margins": 7.715158939361572, "rewards/rejected": -8.318231582641602, "step": 51780 }, { "epoch": 0.62, "learning_rate": 1.8956232311049787e-06, "logits/chosen": -2.919041872024536, "logits/rejected": -2.434802532196045, "logps/chosen": -106.80516052246094, "logps/rejected": -928.6614990234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6338552236557007, "rewards/margins": 8.266011238098145, "rewards/rejected": -8.899866104125977, "step": 51790 }, { "epoch": 0.62, "learning_rate": 1.894609648208935e-06, "logits/chosen": -2.9105656147003174, "logits/rejected": -2.296313762664795, "logps/chosen": -122.0895767211914, "logps/rejected": -974.3209228515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7166507244110107, "rewards/margins": 8.639589309692383, "rewards/rejected": -9.356240272521973, "step": 51800 }, { "epoch": 0.62, "learning_rate": 1.893596171012365e-06, "logits/chosen": -2.8861300945281982, "logits/rejected": -2.357618570327759, "logps/chosen": -97.37312316894531, "logps/rejected": -901.61328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5108110308647156, "rewards/margins": 8.122007369995117, "rewards/rejected": -8.632818222045898, "step": 51810 }, { "epoch": 0.62, "learning_rate": 1.89258279969222e-06, "logits/chosen": -2.911510944366455, "logits/rejected": -2.366720199584961, "logps/chosen": -105.0907974243164, "logps/rejected": -997.2349853515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5831516981124878, "rewards/margins": 8.995537757873535, "rewards/rejected": -9.578689575195312, "step": 51820 }, { "epoch": 0.62, "learning_rate": 1.8915695344254304e-06, "logits/chosen": -2.8205463886260986, "logits/rejected": -2.184337615966797, "logps/chosen": -118.8581771850586, "logps/rejected": -948.8167724609375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.7345014810562134, "rewards/margins": 8.358518600463867, "rewards/rejected": -9.09302043914795, "step": 51830 }, { "epoch": 0.62, "learning_rate": 1.8905563753889103e-06, "logits/chosen": -2.8344547748565674, "logits/rejected": -2.3926920890808105, "logps/chosen": -124.04359436035156, "logps/rejected": -812.9664916992188, "loss": 0.0998, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.750056803226471, "rewards/margins": 6.989931583404541, "rewards/rejected": -7.739988803863525, "step": 51840 }, { "epoch": 0.62, "learning_rate": 1.8895433227595545e-06, "logits/chosen": -2.8764846324920654, "logits/rejected": -2.184166669845581, "logps/chosen": -127.81205749511719, "logps/rejected": -985.5452270507812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.79443359375, "rewards/margins": 8.664045333862305, "rewards/rejected": -9.458478927612305, "step": 51850 }, { "epoch": 0.62, "learning_rate": 1.8885303767142396e-06, "logits/chosen": -2.8456101417541504, "logits/rejected": -2.1926732063293457, "logps/chosen": -108.8210220336914, "logps/rejected": -900.3931884765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6202012896537781, "rewards/margins": 8.006357192993164, "rewards/rejected": -8.626559257507324, "step": 51860 }, { "epoch": 0.62, "learning_rate": 1.8875175374298221e-06, "logits/chosen": -2.898660659790039, "logits/rejected": -2.4335412979125977, "logps/chosen": -125.81925964355469, "logps/rejected": -938.3143310546875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7945888042449951, "rewards/margins": 8.195422172546387, "rewards/rejected": -8.990011215209961, "step": 51870 }, { "epoch": 0.62, "learning_rate": 1.8865048050831414e-06, "logits/chosen": -2.8591508865356445, "logits/rejected": -2.3775100708007812, "logps/chosen": -101.97565460205078, "logps/rejected": -841.482421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5756056904792786, "rewards/margins": 7.460656642913818, "rewards/rejected": -8.036261558532715, "step": 51880 }, { "epoch": 0.62, "learning_rate": 1.8854921798510183e-06, "logits/chosen": -2.8751697540283203, "logits/rejected": -2.262845039367676, "logps/chosen": -172.29928588867188, "logps/rejected": -1025.972900390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1510847806930542, "rewards/margins": 8.716899871826172, "rewards/rejected": -9.867985725402832, "step": 51890 }, { "epoch": 0.62, "learning_rate": 1.8844796619102535e-06, "logits/chosen": -2.8754680156707764, "logits/rejected": -2.2640559673309326, "logps/chosen": -109.15705871582031, "logps/rejected": -926.8935546875, "loss": 0.1929, "rewards/accuracies": 1.0, "rewards/chosen": -0.6537727117538452, "rewards/margins": 8.219858169555664, "rewards/rejected": -8.87363052368164, "step": 51900 }, { "epoch": 0.62, "learning_rate": 1.88346725143763e-06, "logits/chosen": -2.875115156173706, "logits/rejected": -2.6205637454986572, "logps/chosen": -113.11546325683594, "logps/rejected": -728.1041259765625, "loss": 0.1956, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7664123773574829, "rewards/margins": 6.145042419433594, "rewards/rejected": -6.911454200744629, "step": 51910 }, { "epoch": 0.62, "learning_rate": 1.882454948609913e-06, "logits/chosen": -2.8748297691345215, "logits/rejected": -2.3528244495391846, "logps/chosen": -100.4683609008789, "logps/rejected": -901.3214111328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5665206909179688, "rewards/margins": 8.052925109863281, "rewards/rejected": -8.619446754455566, "step": 51920 }, { "epoch": 0.62, "learning_rate": 1.881442753603846e-06, "logits/chosen": -2.892573833465576, "logits/rejected": -2.4960315227508545, "logps/chosen": -90.7175064086914, "logps/rejected": -875.0177612304688, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.47640833258628845, "rewards/margins": 7.8892998695373535, "rewards/rejected": -8.36570930480957, "step": 51930 }, { "epoch": 0.62, "learning_rate": 1.8804306665961563e-06, "logits/chosen": -2.850388526916504, "logits/rejected": -2.1618294715881348, "logps/chosen": -136.68698120117188, "logps/rejected": -929.6140747070312, "loss": 0.1326, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8864900469779968, "rewards/margins": 8.014045715332031, "rewards/rejected": -8.900537490844727, "step": 51940 }, { "epoch": 0.62, "learning_rate": 1.8794186877635512e-06, "logits/chosen": -2.8678038120269775, "logits/rejected": -2.1268317699432373, "logps/chosen": -144.26063537597656, "logps/rejected": -949.98583984375, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -0.8628509640693665, "rewards/margins": 8.234206199645996, "rewards/rejected": -9.097058296203613, "step": 51950 }, { "epoch": 0.62, "learning_rate": 1.8784068172827205e-06, "logits/chosen": -2.856180191040039, "logits/rejected": -2.3713607788085938, "logps/chosen": -119.95866394042969, "logps/rejected": -880.4406127929688, "loss": 0.1217, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7384225130081177, "rewards/margins": 7.679938316345215, "rewards/rejected": -8.418360710144043, "step": 51960 }, { "epoch": 0.62, "learning_rate": 1.8773950553303324e-06, "logits/chosen": -2.8800129890441895, "logits/rejected": -2.477323532104492, "logps/chosen": -110.69405364990234, "logps/rejected": -919.7777099609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6254168748855591, "rewards/margins": 8.17492389678955, "rewards/rejected": -8.800339698791504, "step": 51970 }, { "epoch": 0.62, "learning_rate": 1.8763834020830385e-06, "logits/chosen": -2.832660436630249, "logits/rejected": -2.3076083660125732, "logps/chosen": -123.6649169921875, "logps/rejected": -880.7752075195312, "loss": 0.2503, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8041351437568665, "rewards/margins": 7.620057582855225, "rewards/rejected": -8.424192428588867, "step": 51980 }, { "epoch": 0.62, "learning_rate": 1.875371857717471e-06, "logits/chosen": -2.8740620613098145, "logits/rejected": -2.3849387168884277, "logps/chosen": -116.0461196899414, "logps/rejected": -943.98974609375, "loss": 0.2019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6761878728866577, "rewards/margins": 8.367026329040527, "rewards/rejected": -9.043214797973633, "step": 51990 }, { "epoch": 0.62, "learning_rate": 1.8743604224102418e-06, "logits/chosen": -2.879441499710083, "logits/rejected": -2.3993632793426514, "logps/chosen": -82.20326232910156, "logps/rejected": -943.6052856445312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3896879553794861, "rewards/margins": 8.670719146728516, "rewards/rejected": -9.060407638549805, "step": 52000 }, { "epoch": 0.62, "learning_rate": 1.8733490963379453e-06, "logits/chosen": -2.868168830871582, "logits/rejected": -2.3663432598114014, "logps/chosen": -103.27961730957031, "logps/rejected": -877.3880615234375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6130053997039795, "rewards/margins": 7.7815961837768555, "rewards/rejected": -8.39460277557373, "step": 52010 }, { "epoch": 0.62, "learning_rate": 1.8723378796771566e-06, "logits/chosen": -2.863741636276245, "logits/rejected": -2.4606871604919434, "logps/chosen": -95.22213745117188, "logps/rejected": -842.2599487304688, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5531700849533081, "rewards/margins": 7.477663516998291, "rewards/rejected": -8.03083324432373, "step": 52020 }, { "epoch": 0.62, "learning_rate": 1.8713267726044303e-06, "logits/chosen": -2.903440475463867, "logits/rejected": -2.4721946716308594, "logps/chosen": -88.37642669677734, "logps/rejected": -838.8834228515625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.4699264466762543, "rewards/margins": 7.5414557456970215, "rewards/rejected": -8.011382102966309, "step": 52030 }, { "epoch": 0.62, "learning_rate": 1.8703157752963036e-06, "logits/chosen": -2.816227436065674, "logits/rejected": -2.173865795135498, "logps/chosen": -123.3927230834961, "logps/rejected": -1007.1676025390625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7084745168685913, "rewards/margins": 8.950963020324707, "rewards/rejected": -9.65943717956543, "step": 52040 }, { "epoch": 0.62, "learning_rate": 1.8693048879292935e-06, "logits/chosen": -2.8768575191497803, "logits/rejected": -2.187897205352783, "logps/chosen": -140.17308044433594, "logps/rejected": -1145.85595703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8501768112182617, "rewards/margins": 10.188276290893555, "rewards/rejected": -11.038453102111816, "step": 52050 }, { "epoch": 0.62, "learning_rate": 1.8682941106798985e-06, "logits/chosen": -2.861952304840088, "logits/rejected": -2.2790846824645996, "logps/chosen": -111.29353332519531, "logps/rejected": -845.5286254882812, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.6273174285888672, "rewards/margins": 7.45177698135376, "rewards/rejected": -8.079094886779785, "step": 52060 }, { "epoch": 0.62, "learning_rate": 1.8672834437245974e-06, "logits/chosen": -2.8699467182159424, "logits/rejected": -2.3679423332214355, "logps/chosen": -91.62361145019531, "logps/rejected": -956.0965576171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5046218037605286, "rewards/margins": 8.666801452636719, "rewards/rejected": -9.171422958374023, "step": 52070 }, { "epoch": 0.62, "learning_rate": 1.8662728872398495e-06, "logits/chosen": -2.8760743141174316, "logits/rejected": -2.197971820831299, "logps/chosen": -123.26725769042969, "logps/rejected": -996.5010986328125, "loss": 0.1241, "rewards/accuracies": 1.0, "rewards/chosen": -0.7288790345191956, "rewards/margins": 8.84748649597168, "rewards/rejected": -9.576366424560547, "step": 52080 }, { "epoch": 0.62, "learning_rate": 1.8652624414020958e-06, "logits/chosen": -2.9101710319519043, "logits/rejected": -2.6318368911743164, "logps/chosen": -77.94457244873047, "logps/rejected": -818.3507690429688, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.41190648078918457, "rewards/margins": 7.3991851806640625, "rewards/rejected": -7.811091423034668, "step": 52090 }, { "epoch": 0.62, "learning_rate": 1.8642521063877564e-06, "logits/chosen": -2.8415539264678955, "logits/rejected": -2.1547679901123047, "logps/chosen": -115.0836410522461, "logps/rejected": -1022.8004150390625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6383657455444336, "rewards/margins": 9.183530807495117, "rewards/rejected": -9.821897506713867, "step": 52100 }, { "epoch": 0.62, "learning_rate": 1.8632418823732335e-06, "logits/chosen": -2.925081729888916, "logits/rejected": -2.2806308269500732, "logps/chosen": -106.5534439086914, "logps/rejected": -926.7062377929688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5904490947723389, "rewards/margins": 8.273518562316895, "rewards/rejected": -8.863967895507812, "step": 52110 }, { "epoch": 0.62, "learning_rate": 1.862231769534909e-06, "logits/chosen": -2.9058640003204346, "logits/rejected": -2.3351314067840576, "logps/chosen": -105.77790832519531, "logps/rejected": -910.9899291992188, "loss": 0.0967, "rewards/accuracies": 1.0, "rewards/chosen": -0.5843077301979065, "rewards/margins": 8.138541221618652, "rewards/rejected": -8.722848892211914, "step": 52120 }, { "epoch": 0.62, "learning_rate": 1.8612217680491468e-06, "logits/chosen": -2.8735945224761963, "logits/rejected": -2.484635829925537, "logps/chosen": -81.4566879272461, "logps/rejected": -808.57373046875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4117569327354431, "rewards/margins": 7.296185493469238, "rewards/rejected": -7.707942008972168, "step": 52130 }, { "epoch": 0.62, "learning_rate": 1.86021187809229e-06, "logits/chosen": -2.857821226119995, "logits/rejected": -2.2008578777313232, "logps/chosen": -134.2426300048828, "logps/rejected": -858.6334228515625, "loss": 0.0853, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8362079858779907, "rewards/margins": 7.370445251464844, "rewards/rejected": -8.206653594970703, "step": 52140 }, { "epoch": 0.62, "learning_rate": 1.8592020998406618e-06, "logits/chosen": -2.8237144947052, "logits/rejected": -2.258084774017334, "logps/chosen": -110.83260345458984, "logps/rejected": -954.7262573242188, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6396360993385315, "rewards/margins": 8.518342971801758, "rewards/rejected": -9.157979965209961, "step": 52150 }, { "epoch": 0.62, "learning_rate": 1.8581924334705675e-06, "logits/chosen": -2.8690531253814697, "logits/rejected": -2.121518611907959, "logps/chosen": -122.1932144165039, "logps/rejected": -1011.7771606445312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6686230301856995, "rewards/margins": 9.048544883728027, "rewards/rejected": -9.717167854309082, "step": 52160 }, { "epoch": 0.62, "learning_rate": 1.8571828791582918e-06, "logits/chosen": -2.8778579235076904, "logits/rejected": -2.2657625675201416, "logps/chosen": -125.3525390625, "logps/rejected": -986.3663940429688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7211030125617981, "rewards/margins": 8.74134635925293, "rewards/rejected": -9.46245002746582, "step": 52170 }, { "epoch": 0.62, "learning_rate": 1.8561734370801005e-06, "logits/chosen": -2.8936972618103027, "logits/rejected": -2.5365586280822754, "logps/chosen": -81.1082992553711, "logps/rejected": -853.0169067382812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4238970875740051, "rewards/margins": 7.717207431793213, "rewards/rejected": -8.141104698181152, "step": 52180 }, { "epoch": 0.62, "learning_rate": 1.8551641074122384e-06, "logits/chosen": -2.8502092361450195, "logits/rejected": -2.363356590270996, "logps/chosen": -95.61833190917969, "logps/rejected": -949.0530395507812, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5245140790939331, "rewards/margins": 8.587483406066895, "rewards/rejected": -9.1119966506958, "step": 52190 }, { "epoch": 0.62, "learning_rate": 1.854154890330932e-06, "logits/chosen": -2.8989267349243164, "logits/rejected": -2.1357169151306152, "logps/chosen": -130.42874145507812, "logps/rejected": -1051.175537109375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7896350622177124, "rewards/margins": 9.328601837158203, "rewards/rejected": -10.118237495422363, "step": 52200 }, { "epoch": 0.62, "learning_rate": 1.8531457860123889e-06, "logits/chosen": -2.808953046798706, "logits/rejected": -2.0296380519866943, "logps/chosen": -122.79206848144531, "logps/rejected": -1000.1292114257812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7179046869277954, "rewards/margins": 8.88471508026123, "rewards/rejected": -9.602620124816895, "step": 52210 }, { "epoch": 0.63, "learning_rate": 1.8521367946327943e-06, "logits/chosen": -2.904977321624756, "logits/rejected": -2.476057767868042, "logps/chosen": -128.92715454101562, "logps/rejected": -936.7679443359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8022741079330444, "rewards/margins": 8.175127029418945, "rewards/rejected": -8.977400779724121, "step": 52220 }, { "epoch": 0.63, "learning_rate": 1.851127916368316e-06, "logits/chosen": -2.8780131340026855, "logits/rejected": -2.1883444786071777, "logps/chosen": -142.98800659179688, "logps/rejected": -1032.03173828125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9271309971809387, "rewards/margins": 8.983795166015625, "rewards/rejected": -9.910926818847656, "step": 52230 }, { "epoch": 0.63, "learning_rate": 1.8501191513951017e-06, "logits/chosen": -2.918726921081543, "logits/rejected": -2.156627655029297, "logps/chosen": -123.85987854003906, "logps/rejected": -1088.58740234375, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -0.7240317463874817, "rewards/margins": 9.756860733032227, "rewards/rejected": -10.480892181396484, "step": 52240 }, { "epoch": 0.63, "learning_rate": 1.8491104998892784e-06, "logits/chosen": -2.847102642059326, "logits/rejected": -2.259368419647217, "logps/chosen": -114.5171890258789, "logps/rejected": -937.6187744140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6450347900390625, "rewards/margins": 8.346746444702148, "rewards/rejected": -8.991782188415527, "step": 52250 }, { "epoch": 0.63, "learning_rate": 1.8481019620269535e-06, "logits/chosen": -2.913944721221924, "logits/rejected": -2.3948187828063965, "logps/chosen": -100.71257019042969, "logps/rejected": -939.4529418945312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5688455104827881, "rewards/margins": 8.440805435180664, "rewards/rejected": -9.009650230407715, "step": 52260 }, { "epoch": 0.63, "learning_rate": 1.8470935379842159e-06, "logits/chosen": -2.847578287124634, "logits/rejected": -1.9887113571166992, "logps/chosen": -133.75729370117188, "logps/rejected": -1126.9998779296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8178809881210327, "rewards/margins": 10.049834251403809, "rewards/rejected": -10.867715835571289, "step": 52270 }, { "epoch": 0.63, "learning_rate": 1.8460852279371338e-06, "logits/chosen": -2.9045984745025635, "logits/rejected": -2.3403074741363525, "logps/chosen": -115.78189849853516, "logps/rejected": -1084.2296142578125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6823506355285645, "rewards/margins": 9.756269454956055, "rewards/rejected": -10.438620567321777, "step": 52280 }, { "epoch": 0.63, "learning_rate": 1.845077032061754e-06, "logits/chosen": -2.8593080043792725, "logits/rejected": -2.351788282394409, "logps/chosen": -112.93086242675781, "logps/rejected": -867.7357177734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6703908443450928, "rewards/margins": 7.602241516113281, "rewards/rejected": -8.27263069152832, "step": 52290 }, { "epoch": 0.63, "learning_rate": 1.8440689505341054e-06, "logits/chosen": -2.902268886566162, "logits/rejected": -2.435194492340088, "logps/chosen": -91.80354309082031, "logps/rejected": -920.3580322265625, "loss": 0.1166, "rewards/accuracies": 1.0, "rewards/chosen": -0.44533076882362366, "rewards/margins": 8.388465881347656, "rewards/rejected": -8.833795547485352, "step": 52300 }, { "epoch": 0.63, "learning_rate": 1.8430609835301969e-06, "logits/chosen": -2.8260629177093506, "logits/rejected": -2.4074535369873047, "logps/chosen": -100.51653289794922, "logps/rejected": -894.3580932617188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5742259621620178, "rewards/margins": 7.9966535568237305, "rewards/rejected": -8.570878982543945, "step": 52310 }, { "epoch": 0.63, "learning_rate": 1.8420531312260153e-06, "logits/chosen": -2.892080783843994, "logits/rejected": -2.2523062229156494, "logps/chosen": -119.60770416259766, "logps/rejected": -1008.2311401367188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6609864234924316, "rewards/margins": 9.033998489379883, "rewards/rejected": -9.694986343383789, "step": 52320 }, { "epoch": 0.63, "learning_rate": 1.84104539379753e-06, "logits/chosen": -2.893566370010376, "logits/rejected": -2.4306418895721436, "logps/chosen": -109.45552062988281, "logps/rejected": -855.39208984375, "loss": 0.2376, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6874032616615295, "rewards/margins": 7.493646144866943, "rewards/rejected": -8.181049346923828, "step": 52330 }, { "epoch": 0.63, "learning_rate": 1.8400377714206891e-06, "logits/chosen": -2.866683006286621, "logits/rejected": -2.3409111499786377, "logps/chosen": -103.9120864868164, "logps/rejected": -990.4459838867188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5489858388900757, "rewards/margins": 8.953909873962402, "rewards/rejected": -9.502896308898926, "step": 52340 }, { "epoch": 0.63, "learning_rate": 1.83903026427142e-06, "logits/chosen": -2.946739673614502, "logits/rejected": -2.5224952697753906, "logps/chosen": -99.76788330078125, "logps/rejected": -919.17529296875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5461416244506836, "rewards/margins": 8.250981330871582, "rewards/rejected": -8.797122955322266, "step": 52350 }, { "epoch": 0.63, "learning_rate": 1.8380228725256311e-06, "logits/chosen": -2.84883451461792, "logits/rejected": -2.4033217430114746, "logps/chosen": -109.63621520996094, "logps/rejected": -811.8718872070312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6722627878189087, "rewards/margins": 7.068105220794678, "rewards/rejected": -7.740367889404297, "step": 52360 }, { "epoch": 0.63, "learning_rate": 1.8370155963592095e-06, "logits/chosen": -2.8484411239624023, "logits/rejected": -2.4373912811279297, "logps/chosen": -107.03243255615234, "logps/rejected": -863.9319458007812, "loss": 0.0957, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6078429818153381, "rewards/margins": 7.65505838394165, "rewards/rejected": -8.262901306152344, "step": 52370 }, { "epoch": 0.63, "learning_rate": 1.8360084359480246e-06, "logits/chosen": -2.86676287651062, "logits/rejected": -2.192617893218994, "logps/chosen": -106.8793716430664, "logps/rejected": -890.3030395507812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6185838580131531, "rewards/margins": 7.9069976806640625, "rewards/rejected": -8.525582313537598, "step": 52380 }, { "epoch": 0.63, "learning_rate": 1.8350013914679216e-06, "logits/chosen": -2.8572840690612793, "logits/rejected": -2.2646079063415527, "logps/chosen": -106.0830078125, "logps/rejected": -972.6251831054688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6082928776741028, "rewards/margins": 8.734134674072266, "rewards/rejected": -9.342428207397461, "step": 52390 }, { "epoch": 0.63, "learning_rate": 1.833994463094729e-06, "logits/chosen": -2.878378391265869, "logits/rejected": -2.35681414604187, "logps/chosen": -123.52734375, "logps/rejected": -969.7752075195312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7404136657714844, "rewards/margins": 8.555937767028809, "rewards/rejected": -9.296351432800293, "step": 52400 }, { "epoch": 0.63, "learning_rate": 1.8329876510042539e-06, "logits/chosen": -2.8723785877227783, "logits/rejected": -2.3372254371643066, "logps/chosen": -105.80777740478516, "logps/rejected": -914.5671997070312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.548552393913269, "rewards/margins": 8.207941055297852, "rewards/rejected": -8.756494522094727, "step": 52410 }, { "epoch": 0.63, "learning_rate": 1.8319809553722816e-06, "logits/chosen": -2.9555811882019043, "logits/rejected": -2.337498426437378, "logps/chosen": -114.6030502319336, "logps/rejected": -871.1339721679688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5636960864067078, "rewards/margins": 7.760640621185303, "rewards/rejected": -8.32433795928955, "step": 52420 }, { "epoch": 0.63, "learning_rate": 1.8309743763745793e-06, "logits/chosen": -2.857494354248047, "logits/rejected": -2.368326187133789, "logps/chosen": -113.20245361328125, "logps/rejected": -908.2448120117188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7032302618026733, "rewards/margins": 7.98514461517334, "rewards/rejected": -8.688375473022461, "step": 52430 }, { "epoch": 0.63, "learning_rate": 1.8299679141868935e-06, "logits/chosen": -2.878922462463379, "logits/rejected": -2.2934508323669434, "logps/chosen": -109.61021423339844, "logps/rejected": -977.5213623046875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.564913272857666, "rewards/margins": 8.811275482177734, "rewards/rejected": -9.376188278198242, "step": 52440 }, { "epoch": 0.63, "learning_rate": 1.8289615689849482e-06, "logits/chosen": -2.876765727996826, "logits/rejected": -2.567573308944702, "logps/chosen": -75.68441772460938, "logps/rejected": -803.741455078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3144516944885254, "rewards/margins": 7.356974124908447, "rewards/rejected": -7.671425819396973, "step": 52450 }, { "epoch": 0.63, "learning_rate": 1.8279553409444493e-06, "logits/chosen": -2.8974449634552, "logits/rejected": -2.409975528717041, "logps/chosen": -106.47908020019531, "logps/rejected": -957.42626953125, "loss": 0.2575, "rewards/accuracies": 1.0, "rewards/chosen": -0.6124069690704346, "rewards/margins": 8.575590133666992, "rewards/rejected": -9.187997817993164, "step": 52460 }, { "epoch": 0.63, "learning_rate": 1.8269492302410813e-06, "logits/chosen": -2.8932180404663086, "logits/rejected": -2.2343316078186035, "logps/chosen": -121.2095718383789, "logps/rejected": -1054.7518310546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7199005484580994, "rewards/margins": 9.409116744995117, "rewards/rejected": -10.12901782989502, "step": 52470 }, { "epoch": 0.63, "learning_rate": 1.8259432370505088e-06, "logits/chosen": -2.902538299560547, "logits/rejected": -2.4139175415039062, "logps/chosen": -95.55720520019531, "logps/rejected": -923.1856689453125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5004580616950989, "rewards/margins": 8.344974517822266, "rewards/rejected": -8.84543228149414, "step": 52480 }, { "epoch": 0.63, "learning_rate": 1.8249373615483746e-06, "logits/chosen": -2.8897433280944824, "logits/rejected": -2.367002010345459, "logps/chosen": -98.52532196044922, "logps/rejected": -911.5302734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.564552903175354, "rewards/margins": 8.158777236938477, "rewards/rejected": -8.723329544067383, "step": 52490 }, { "epoch": 0.63, "learning_rate": 1.8239316039103023e-06, "logits/chosen": -2.8709237575531006, "logits/rejected": -2.2508325576782227, "logps/chosen": -137.4861602783203, "logps/rejected": -948.4661865234375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.903549313545227, "rewards/margins": 8.174799919128418, "rewards/rejected": -9.078350067138672, "step": 52500 }, { "epoch": 0.63, "learning_rate": 1.8229259643118946e-06, "logits/chosen": -2.8849923610687256, "logits/rejected": -2.1919686794281006, "logps/chosen": -136.31961059570312, "logps/rejected": -921.9111328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8410822749137878, "rewards/margins": 7.967411994934082, "rewards/rejected": -8.808493614196777, "step": 52510 }, { "epoch": 0.63, "learning_rate": 1.8219204429287324e-06, "logits/chosen": -2.9077789783477783, "logits/rejected": -2.4641051292419434, "logps/chosen": -89.61870574951172, "logps/rejected": -890.3045654296875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.48368483781814575, "rewards/margins": 8.04826831817627, "rewards/rejected": -8.531952857971191, "step": 52520 }, { "epoch": 0.63, "learning_rate": 1.8209150399363778e-06, "logits/chosen": -2.9306132793426514, "logits/rejected": -2.228720188140869, "logps/chosen": -113.94520568847656, "logps/rejected": -977.6827392578125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.668334424495697, "rewards/margins": 8.718122482299805, "rewards/rejected": -9.386457443237305, "step": 52530 }, { "epoch": 0.63, "learning_rate": 1.8199097555103704e-06, "logits/chosen": -2.900068998336792, "logits/rejected": -2.4137518405914307, "logps/chosen": -106.2019271850586, "logps/rejected": -886.13671875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.588681697845459, "rewards/margins": 7.870291709899902, "rewards/rejected": -8.458972930908203, "step": 52540 }, { "epoch": 0.63, "learning_rate": 1.8189045898262314e-06, "logits/chosen": -2.849911689758301, "logits/rejected": -2.2516825199127197, "logps/chosen": -135.51077270507812, "logps/rejected": -1014.99658203125, "loss": 0.0792, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8786018490791321, "rewards/margins": 8.864316940307617, "rewards/rejected": -9.742918968200684, "step": 52550 }, { "epoch": 0.63, "learning_rate": 1.8178995430594602e-06, "logits/chosen": -2.8806135654449463, "logits/rejected": -2.3642630577087402, "logps/chosen": -101.14215850830078, "logps/rejected": -882.4122924804688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5364684462547302, "rewards/margins": 7.8990797996521, "rewards/rejected": -8.435547828674316, "step": 52560 }, { "epoch": 0.63, "learning_rate": 1.8168946153855332e-06, "logits/chosen": -2.852262020111084, "logits/rejected": -2.320122241973877, "logps/chosen": -102.36579895019531, "logps/rejected": -981.1990356445312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5265315771102905, "rewards/margins": 8.901219367980957, "rewards/rejected": -9.427751541137695, "step": 52570 }, { "epoch": 0.63, "learning_rate": 1.8158898069799097e-06, "logits/chosen": -2.8855843544006348, "logits/rejected": -2.1629257202148438, "logps/chosen": -143.82940673828125, "logps/rejected": -1034.9736328125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9035543203353882, "rewards/margins": 9.037641525268555, "rewards/rejected": -9.941195487976074, "step": 52580 }, { "epoch": 0.63, "learning_rate": 1.8148851180180255e-06, "logits/chosen": -2.9200503826141357, "logits/rejected": -2.494091510772705, "logps/chosen": -104.9413070678711, "logps/rejected": -860.2032470703125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.587216317653656, "rewards/margins": 7.610996246337891, "rewards/rejected": -8.198213577270508, "step": 52590 }, { "epoch": 0.63, "learning_rate": 1.8138805486752977e-06, "logits/chosen": -2.921471118927002, "logits/rejected": -2.287816286087036, "logps/chosen": -107.55366516113281, "logps/rejected": -953.15380859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5232069492340088, "rewards/margins": 8.61213493347168, "rewards/rejected": -9.13534164428711, "step": 52600 }, { "epoch": 0.63, "learning_rate": 1.81287609912712e-06, "logits/chosen": -2.9087109565734863, "logits/rejected": -2.4457175731658936, "logps/chosen": -81.70276641845703, "logps/rejected": -817.3617553710938, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4164503216743469, "rewards/margins": 7.380815029144287, "rewards/rejected": -7.797265529632568, "step": 52610 }, { "epoch": 0.63, "learning_rate": 1.8118717695488667e-06, "logits/chosen": -2.8368966579437256, "logits/rejected": -2.0836048126220703, "logps/chosen": -122.44268798828125, "logps/rejected": -1001.4705810546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6895725131034851, "rewards/margins": 8.929052352905273, "rewards/rejected": -9.618624687194824, "step": 52620 }, { "epoch": 0.63, "learning_rate": 1.8108675601158918e-06, "logits/chosen": -2.913590908050537, "logits/rejected": -2.4728379249572754, "logps/chosen": -90.21871185302734, "logps/rejected": -890.53173828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4731510579586029, "rewards/margins": 8.044337272644043, "rewards/rejected": -8.517488479614258, "step": 52630 }, { "epoch": 0.63, "learning_rate": 1.8098634710035262e-06, "logits/chosen": -2.863527536392212, "logits/rejected": -2.121701717376709, "logps/chosen": -138.98788452148438, "logps/rejected": -974.5814208984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8185299634933472, "rewards/margins": 8.524968147277832, "rewards/rejected": -9.343496322631836, "step": 52640 }, { "epoch": 0.63, "learning_rate": 1.8088595023870822e-06, "logits/chosen": -2.8656914234161377, "logits/rejected": -2.3361942768096924, "logps/chosen": -95.7293701171875, "logps/rejected": -862.5489501953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.49830666184425354, "rewards/margins": 7.745863914489746, "rewards/rejected": -8.244170188903809, "step": 52650 }, { "epoch": 0.63, "learning_rate": 1.8078556544418498e-06, "logits/chosen": -2.904345750808716, "logits/rejected": -2.243259906768799, "logps/chosen": -110.35044860839844, "logps/rejected": -991.6658325195312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6396313905715942, "rewards/margins": 8.87724494934082, "rewards/rejected": -9.516876220703125, "step": 52660 }, { "epoch": 0.63, "learning_rate": 1.8068519273430973e-06, "logits/chosen": -2.8516201972961426, "logits/rejected": -2.304673910140991, "logps/chosen": -100.96791076660156, "logps/rejected": -953.6887817382812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5031042098999023, "rewards/margins": 8.64406967163086, "rewards/rejected": -9.147173881530762, "step": 52670 }, { "epoch": 0.63, "learning_rate": 1.8058483212660732e-06, "logits/chosen": -2.8570892810821533, "logits/rejected": -2.1523385047912598, "logps/chosen": -133.40402221679688, "logps/rejected": -1159.22216796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8206878900527954, "rewards/margins": 10.373077392578125, "rewards/rejected": -11.193763732910156, "step": 52680 }, { "epoch": 0.63, "learning_rate": 1.8048448363860044e-06, "logits/chosen": -2.8950817584991455, "logits/rejected": -2.4730749130249023, "logps/chosen": -121.9195556640625, "logps/rejected": -780.793212890625, "loss": 0.1206, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.775944173336029, "rewards/margins": 6.660672664642334, "rewards/rejected": -7.436617374420166, "step": 52690 }, { "epoch": 0.63, "learning_rate": 1.8038414728780971e-06, "logits/chosen": -2.8711841106414795, "logits/rejected": -2.193570375442505, "logps/chosen": -119.0502700805664, "logps/rejected": -1042.6597900390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7215566635131836, "rewards/margins": 9.306689262390137, "rewards/rejected": -10.02824592590332, "step": 52700 }, { "epoch": 0.63, "learning_rate": 1.8028382309175341e-06, "logits/chosen": -2.913066864013672, "logits/rejected": -2.2591443061828613, "logps/chosen": -122.21085357666016, "logps/rejected": -893.6376953125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7200285792350769, "rewards/margins": 7.839930057525635, "rewards/rejected": -8.559957504272461, "step": 52710 }, { "epoch": 0.63, "learning_rate": 1.8018351106794802e-06, "logits/chosen": -2.8669986724853516, "logits/rejected": -2.603564500808716, "logps/chosen": -74.02435302734375, "logps/rejected": -825.7757568359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3671949505805969, "rewards/margins": 7.526919364929199, "rewards/rejected": -7.894115447998047, "step": 52720 }, { "epoch": 0.63, "learning_rate": 1.800832112339077e-06, "logits/chosen": -2.8876826763153076, "logits/rejected": -2.414569854736328, "logps/chosen": -97.59563446044922, "logps/rejected": -986.248046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5494743585586548, "rewards/margins": 8.933652877807617, "rewards/rejected": -9.483126640319824, "step": 52730 }, { "epoch": 0.63, "learning_rate": 1.7998292360714448e-06, "logits/chosen": -2.87577223777771, "logits/rejected": -2.2857766151428223, "logps/chosen": -90.24336242675781, "logps/rejected": -885.498046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.48678070306777954, "rewards/margins": 7.983798980712891, "rewards/rejected": -8.470579147338867, "step": 52740 }, { "epoch": 0.63, "learning_rate": 1.7988264820516834e-06, "logits/chosen": -2.8402936458587646, "logits/rejected": -2.3514466285705566, "logps/chosen": -135.71604919433594, "logps/rejected": -847.8868408203125, "loss": 0.3081, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9200658798217773, "rewards/margins": 7.168667793273926, "rewards/rejected": -8.088732719421387, "step": 52750 }, { "epoch": 0.63, "learning_rate": 1.7978238504548714e-06, "logits/chosen": -2.8629610538482666, "logits/rejected": -1.9751636981964111, "logps/chosen": -126.6294937133789, "logps/rejected": -1047.796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.696806788444519, "rewards/margins": 9.372027397155762, "rewards/rejected": -10.06883430480957, "step": 52760 }, { "epoch": 0.63, "learning_rate": 1.7968213414560643e-06, "logits/chosen": -2.859900951385498, "logits/rejected": -2.2112317085266113, "logps/chosen": -129.02197265625, "logps/rejected": -1057.879638671875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7205356359481812, "rewards/margins": 9.451228141784668, "rewards/rejected": -10.17176342010498, "step": 52770 }, { "epoch": 0.63, "learning_rate": 1.795818955230298e-06, "logits/chosen": -2.8617966175079346, "logits/rejected": -2.292959451675415, "logps/chosen": -134.34658813476562, "logps/rejected": -884.2286987304688, "loss": 0.1038, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8608152270317078, "rewards/margins": 7.5911545753479, "rewards/rejected": -8.451969146728516, "step": 52780 }, { "epoch": 0.63, "learning_rate": 1.7948166919525862e-06, "logits/chosen": -2.8980977535247803, "logits/rejected": -2.420036554336548, "logps/chosen": -99.42205047607422, "logps/rejected": -879.1921997070312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5239482522010803, "rewards/margins": 7.895855903625488, "rewards/rejected": -8.419803619384766, "step": 52790 }, { "epoch": 0.63, "learning_rate": 1.7938145517979222e-06, "logits/chosen": -2.8811678886413574, "logits/rejected": -2.0495553016662598, "logps/chosen": -159.18423461914062, "logps/rejected": -1177.2044677734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0127354860305786, "rewards/margins": 10.332320213317871, "rewards/rejected": -11.34505558013916, "step": 52800 }, { "epoch": 0.63, "learning_rate": 1.792812534941275e-06, "logits/chosen": -2.912076950073242, "logits/rejected": -2.5636239051818848, "logps/chosen": -108.08424377441406, "logps/rejected": -812.498779296875, "loss": 0.1394, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6865739822387695, "rewards/margins": 7.058180332183838, "rewards/rejected": -7.744753837585449, "step": 52810 }, { "epoch": 0.63, "learning_rate": 1.791810641557596e-06, "logits/chosen": -2.8301379680633545, "logits/rejected": -2.229701042175293, "logps/chosen": -126.2835464477539, "logps/rejected": -984.7974853515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7478304505348206, "rewards/margins": 8.698698043823242, "rewards/rejected": -9.446529388427734, "step": 52820 }, { "epoch": 0.63, "learning_rate": 1.7908088718218119e-06, "logits/chosen": -2.905557632446289, "logits/rejected": -2.282108783721924, "logps/chosen": -109.0704345703125, "logps/rejected": -1000.2135620117188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5446601510047913, "rewards/margins": 9.055000305175781, "rewards/rejected": -9.59965991973877, "step": 52830 }, { "epoch": 0.63, "learning_rate": 1.7898072259088287e-06, "logits/chosen": -2.9429574012756348, "logits/rejected": -2.4141898155212402, "logps/chosen": -97.30220794677734, "logps/rejected": -873.6739501953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5344303846359253, "rewards/margins": 7.817334175109863, "rewards/rejected": -8.351763725280762, "step": 52840 }, { "epoch": 0.63, "learning_rate": 1.7888057039935313e-06, "logits/chosen": -2.9020936489105225, "logits/rejected": -2.3649284839630127, "logps/chosen": -100.86813354492188, "logps/rejected": -880.5186767578125, "loss": 0.1333, "rewards/accuracies": 1.0, "rewards/chosen": -0.553920567035675, "rewards/margins": 7.876019477844238, "rewards/rejected": -8.429939270019531, "step": 52850 }, { "epoch": 0.63, "learning_rate": 1.7878043062507832e-06, "logits/chosen": -2.8938145637512207, "logits/rejected": -2.386944532394409, "logps/chosen": -93.61883544921875, "logps/rejected": -863.5270385742188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5012179613113403, "rewards/margins": 7.755946159362793, "rewards/rejected": -8.25716495513916, "step": 52860 }, { "epoch": 0.63, "learning_rate": 1.7868030328554251e-06, "logits/chosen": -2.8782386779785156, "logits/rejected": -2.4875802993774414, "logps/chosen": -90.45587921142578, "logps/rejected": -865.7578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.48777732253074646, "rewards/margins": 7.786561489105225, "rewards/rejected": -8.274338722229004, "step": 52870 }, { "epoch": 0.63, "learning_rate": 1.7858018839822762e-06, "logits/chosen": -2.8860416412353516, "logits/rejected": -2.319693088531494, "logps/chosen": -102.28495788574219, "logps/rejected": -892.9244995117188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5563007593154907, "rewards/margins": 7.984854698181152, "rewards/rejected": -8.541154861450195, "step": 52880 }, { "epoch": 0.63, "learning_rate": 1.7848008598061351e-06, "logits/chosen": -2.899122714996338, "logits/rejected": -2.326915979385376, "logps/chosen": -97.38776397705078, "logps/rejected": -917.2755737304688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.49604907631874084, "rewards/margins": 8.291435241699219, "rewards/rejected": -8.787484169006348, "step": 52890 }, { "epoch": 0.63, "learning_rate": 1.7837999605017781e-06, "logits/chosen": -2.902230739593506, "logits/rejected": -2.385066509246826, "logps/chosen": -109.42596435546875, "logps/rejected": -938.4718627929688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5814872980117798, "rewards/margins": 8.414281845092773, "rewards/rejected": -8.995769500732422, "step": 52900 }, { "epoch": 0.63, "learning_rate": 1.7827991862439579e-06, "logits/chosen": -2.8620028495788574, "logits/rejected": -2.377488613128662, "logps/chosen": -108.50443267822266, "logps/rejected": -944.0773315429688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.626049816608429, "rewards/margins": 8.41557788848877, "rewards/rejected": -9.041627883911133, "step": 52910 }, { "epoch": 0.63, "learning_rate": 1.7817985372074082e-06, "logits/chosen": -2.850745677947998, "logits/rejected": -2.334892749786377, "logps/chosen": -100.39805603027344, "logps/rejected": -888.2333984375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5392056107521057, "rewards/margins": 7.953365325927734, "rewards/rejected": -8.492570877075195, "step": 52920 }, { "epoch": 0.63, "learning_rate": 1.7807980135668394e-06, "logits/chosen": -2.8756422996520996, "logits/rejected": -2.375426769256592, "logps/chosen": -101.44815063476562, "logps/rejected": -923.9548950195312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5311318635940552, "rewards/margins": 8.310104370117188, "rewards/rejected": -8.841235160827637, "step": 52930 }, { "epoch": 0.63, "learning_rate": 1.7797976154969392e-06, "logits/chosen": -2.8846263885498047, "logits/rejected": -2.156359910964966, "logps/chosen": -151.43931579589844, "logps/rejected": -968.3336791992188, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.947717010974884, "rewards/margins": 8.327173233032227, "rewards/rejected": -9.274890899658203, "step": 52940 }, { "epoch": 0.63, "learning_rate": 1.7787973431723757e-06, "logits/chosen": -2.8520450592041016, "logits/rejected": -2.206280469894409, "logps/chosen": -125.51898193359375, "logps/rejected": -993.0236206054688, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.7109721302986145, "rewards/margins": 8.821386337280273, "rewards/rejected": -9.53235912322998, "step": 52950 }, { "epoch": 0.63, "learning_rate": 1.777797196767792e-06, "logits/chosen": -2.909933090209961, "logits/rejected": -2.5170657634735107, "logps/chosen": -86.71111297607422, "logps/rejected": -905.5289306640625, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.4409189820289612, "rewards/margins": 8.236413955688477, "rewards/rejected": -8.677332878112793, "step": 52960 }, { "epoch": 0.63, "learning_rate": 1.7767971764578128e-06, "logits/chosen": -2.835331439971924, "logits/rejected": -2.428884744644165, "logps/chosen": -78.7264404296875, "logps/rejected": -880.509765625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.3528505861759186, "rewards/margins": 8.062589645385742, "rewards/rejected": -8.415438652038574, "step": 52970 }, { "epoch": 0.63, "learning_rate": 1.7757972824170383e-06, "logits/chosen": -2.882746458053589, "logits/rejected": -2.3162381649017334, "logps/chosen": -94.00340270996094, "logps/rejected": -925.232421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4759202003479004, "rewards/margins": 8.392932891845703, "rewards/rejected": -8.868853569030762, "step": 52980 }, { "epoch": 0.63, "learning_rate": 1.7747975148200463e-06, "logits/chosen": -2.883805751800537, "logits/rejected": -2.341291904449463, "logps/chosen": -113.4918212890625, "logps/rejected": -970.2056884765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6218082308769226, "rewards/margins": 8.678167343139648, "rewards/rejected": -9.299975395202637, "step": 52990 }, { "epoch": 0.63, "learning_rate": 1.7737978738413941e-06, "logits/chosen": -2.8866398334503174, "logits/rejected": -2.3104007244110107, "logps/chosen": -104.54389953613281, "logps/rejected": -928.2781372070312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5946694612503052, "rewards/margins": 8.296758651733398, "rewards/rejected": -8.89142894744873, "step": 53000 }, { "epoch": 0.63, "learning_rate": 1.7727983596556165e-06, "logits/chosen": -2.9177258014678955, "logits/rejected": -2.3561322689056396, "logps/chosen": -111.6801528930664, "logps/rejected": -911.11279296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6109215021133423, "rewards/margins": 8.118690490722656, "rewards/rejected": -8.72961139678955, "step": 53010 }, { "epoch": 0.63, "learning_rate": 1.771798972437226e-06, "logits/chosen": -2.8806347846984863, "logits/rejected": -2.2962520122528076, "logps/chosen": -124.51351165771484, "logps/rejected": -849.2666015625, "loss": 0.0935, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7544602155685425, "rewards/margins": 7.346599578857422, "rewards/rejected": -8.101059913635254, "step": 53020 }, { "epoch": 0.63, "learning_rate": 1.7707997123607116e-06, "logits/chosen": -2.8471479415893555, "logits/rejected": -2.238835334777832, "logps/chosen": -144.5642547607422, "logps/rejected": -943.7197265625, "loss": 0.0614, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9145730137825012, "rewards/margins": 8.129744529724121, "rewards/rejected": -9.044317245483398, "step": 53030 }, { "epoch": 0.63, "learning_rate": 1.7698005796005424e-06, "logits/chosen": -2.899092435836792, "logits/rejected": -2.355588674545288, "logps/chosen": -100.22335052490234, "logps/rejected": -910.8492431640625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5280001163482666, "rewards/margins": 8.183134078979492, "rewards/rejected": -8.71113395690918, "step": 53040 }, { "epoch": 0.64, "learning_rate": 1.7688015743311644e-06, "logits/chosen": -2.822507381439209, "logits/rejected": -2.0556905269622803, "logps/chosen": -120.31129455566406, "logps/rejected": -904.8179931640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6721982359886169, "rewards/margins": 7.984557151794434, "rewards/rejected": -8.656756401062012, "step": 53050 }, { "epoch": 0.64, "learning_rate": 1.7678026967270002e-06, "logits/chosen": -2.9214394092559814, "logits/rejected": -2.392456531524658, "logps/chosen": -97.6500015258789, "logps/rejected": -948.0599365234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.522747814655304, "rewards/margins": 8.567814826965332, "rewards/rejected": -9.09056282043457, "step": 53060 }, { "epoch": 0.64, "learning_rate": 1.7668039469624514e-06, "logits/chosen": -2.8553967475891113, "logits/rejected": -2.0719218254089355, "logps/chosen": -150.43402099609375, "logps/rejected": -1081.54833984375, "loss": 0.1409, "rewards/accuracies": 1.0, "rewards/chosen": -0.9103301167488098, "rewards/margins": 9.471985816955566, "rewards/rejected": -10.382316589355469, "step": 53070 }, { "epoch": 0.64, "learning_rate": 1.7658053252118978e-06, "logits/chosen": -2.813842296600342, "logits/rejected": -2.164616346359253, "logps/chosen": -134.87010192871094, "logps/rejected": -905.1619873046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7924120426177979, "rewards/margins": 7.859882354736328, "rewards/rejected": -8.652294158935547, "step": 53080 }, { "epoch": 0.64, "learning_rate": 1.7648068316496947e-06, "logits/chosen": -2.862152576446533, "logits/rejected": -2.4921743869781494, "logps/chosen": -87.62767791748047, "logps/rejected": -824.54345703125, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.46320241689682007, "rewards/margins": 7.379081726074219, "rewards/rejected": -7.842283725738525, "step": 53090 }, { "epoch": 0.64, "learning_rate": 1.763808466450177e-06, "logits/chosen": -2.88169527053833, "logits/rejected": -2.399893045425415, "logps/chosen": -112.8253402709961, "logps/rejected": -999.6004638671875, "loss": 0.0939, "rewards/accuracies": 1.0, "rewards/chosen": -0.6399491429328918, "rewards/margins": 8.963005065917969, "rewards/rejected": -9.602953910827637, "step": 53100 }, { "epoch": 0.64, "learning_rate": 1.762810229787656e-06, "logits/chosen": -2.885714054107666, "logits/rejected": -2.3507845401763916, "logps/chosen": -110.87882232666016, "logps/rejected": -938.0573120117188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6208392977714539, "rewards/margins": 8.373299598693848, "rewards/rejected": -8.994138717651367, "step": 53110 }, { "epoch": 0.64, "learning_rate": 1.7618121218364222e-06, "logits/chosen": -2.902600049972534, "logits/rejected": -2.3355586528778076, "logps/chosen": -119.7194595336914, "logps/rejected": -964.8582153320312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6631780862808228, "rewards/margins": 8.580949783325195, "rewards/rejected": -9.24412727355957, "step": 53120 }, { "epoch": 0.64, "learning_rate": 1.7608141427707415e-06, "logits/chosen": -2.919952869415283, "logits/rejected": -2.386840581893921, "logps/chosen": -91.02767944335938, "logps/rejected": -844.5491333007812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.49805203080177307, "rewards/margins": 7.5596513748168945, "rewards/rejected": -8.057703018188477, "step": 53130 }, { "epoch": 0.64, "learning_rate": 1.759816292764858e-06, "logits/chosen": -2.882589340209961, "logits/rejected": -2.2402405738830566, "logps/chosen": -144.5514373779297, "logps/rejected": -979.91015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9368540048599243, "rewards/margins": 8.450437545776367, "rewards/rejected": -9.387290954589844, "step": 53140 }, { "epoch": 0.64, "learning_rate": 1.7588185719929946e-06, "logits/chosen": -2.898958444595337, "logits/rejected": -2.5499730110168457, "logps/chosen": -104.22769927978516, "logps/rejected": -872.4058837890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6272886991500854, "rewards/margins": 7.712170600891113, "rewards/rejected": -8.339459419250488, "step": 53150 }, { "epoch": 0.64, "learning_rate": 1.7578209806293495e-06, "logits/chosen": -2.8926501274108887, "logits/rejected": -2.1607000827789307, "logps/chosen": -126.217041015625, "logps/rejected": -1026.2872314453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7196000218391418, "rewards/margins": 9.130463600158691, "rewards/rejected": -9.850062370300293, "step": 53160 }, { "epoch": 0.64, "learning_rate": 1.7568235188481e-06, "logits/chosen": -2.8872556686401367, "logits/rejected": -2.303056001663208, "logps/chosen": -118.95915222167969, "logps/rejected": -917.9703369140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6481935381889343, "rewards/margins": 8.152984619140625, "rewards/rejected": -8.801178932189941, "step": 53170 }, { "epoch": 0.64, "learning_rate": 1.7558261868234006e-06, "logits/chosen": -2.892585039138794, "logits/rejected": -2.564296007156372, "logps/chosen": -80.81959533691406, "logps/rejected": -841.8532104492188, "loss": 0.114, "rewards/accuracies": 1.0, "rewards/chosen": -0.41866597533226013, "rewards/margins": 7.624608039855957, "rewards/rejected": -8.043272972106934, "step": 53180 }, { "epoch": 0.64, "learning_rate": 1.7548289847293814e-06, "logits/chosen": -2.8546693325042725, "logits/rejected": -2.3654353618621826, "logps/chosen": -106.52143859863281, "logps/rejected": -917.0178833007812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5857807993888855, "rewards/margins": 8.196626663208008, "rewards/rejected": -8.782407760620117, "step": 53190 }, { "epoch": 0.64, "learning_rate": 1.753831912740152e-06, "logits/chosen": -2.8907179832458496, "logits/rejected": -2.3947834968566895, "logps/chosen": -100.67658996582031, "logps/rejected": -892.4518432617188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5865145921707153, "rewards/margins": 7.954111576080322, "rewards/rejected": -8.540627479553223, "step": 53200 }, { "epoch": 0.64, "learning_rate": 1.752834971029798e-06, "logits/chosen": -2.8739519119262695, "logits/rejected": -2.4143152236938477, "logps/chosen": -91.71397399902344, "logps/rejected": -866.9925537109375, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": -0.4902319312095642, "rewards/margins": 7.7908935546875, "rewards/rejected": -8.28112506866455, "step": 53210 }, { "epoch": 0.64, "learning_rate": 1.7518381597723839e-06, "logits/chosen": -2.927994966506958, "logits/rejected": -2.453700542449951, "logps/chosen": -113.53929138183594, "logps/rejected": -921.3067626953125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.6517091989517212, "rewards/margins": 8.16010570526123, "rewards/rejected": -8.81181526184082, "step": 53220 }, { "epoch": 0.64, "learning_rate": 1.7508414791419482e-06, "logits/chosen": -2.86395525932312, "logits/rejected": -2.260544776916504, "logps/chosen": -145.39707946777344, "logps/rejected": -1023.8103637695312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.879030704498291, "rewards/margins": 8.951162338256836, "rewards/rejected": -9.830191612243652, "step": 53230 }, { "epoch": 0.64, "learning_rate": 1.7498449293125096e-06, "logits/chosen": -2.870189666748047, "logits/rejected": -2.423438549041748, "logps/chosen": -91.07817077636719, "logps/rejected": -905.6013793945312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.4800441861152649, "rewards/margins": 8.182276725769043, "rewards/rejected": -8.662322044372559, "step": 53240 }, { "epoch": 0.64, "learning_rate": 1.7488485104580634e-06, "logits/chosen": -2.8554654121398926, "logits/rejected": -2.2320079803466797, "logps/chosen": -121.61934661865234, "logps/rejected": -994.9443359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.716133713722229, "rewards/margins": 8.841861724853516, "rewards/rejected": -9.557995796203613, "step": 53250 }, { "epoch": 0.64, "learning_rate": 1.7478522227525807e-06, "logits/chosen": -2.9376842975616455, "logits/rejected": -2.457523822784424, "logps/chosen": -105.66455078125, "logps/rejected": -903.2507934570312, "loss": 0.2683, "rewards/accuracies": 1.0, "rewards/chosen": -0.5824911594390869, "rewards/margins": 8.052915573120117, "rewards/rejected": -8.635406494140625, "step": 53260 }, { "epoch": 0.64, "learning_rate": 1.7468560663700107e-06, "logits/chosen": -2.815481185913086, "logits/rejected": -2.2318968772888184, "logps/chosen": -113.67808532714844, "logps/rejected": -989.9364013671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6975177526473999, "rewards/margins": 8.804993629455566, "rewards/rejected": -9.502511978149414, "step": 53270 }, { "epoch": 0.64, "learning_rate": 1.7458600414842806e-06, "logits/chosen": -2.8451590538024902, "logits/rejected": -2.1748287677764893, "logps/chosen": -135.42617797851562, "logps/rejected": -1079.743408203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8302484750747681, "rewards/margins": 9.558345794677734, "rewards/rejected": -10.388595581054688, "step": 53280 }, { "epoch": 0.64, "learning_rate": 1.7448641482692919e-06, "logits/chosen": -2.9012088775634766, "logits/rejected": -2.374629497528076, "logps/chosen": -109.12544250488281, "logps/rejected": -887.0045166015625, "loss": 0.0958, "rewards/accuracies": 1.0, "rewards/chosen": -0.6232384443283081, "rewards/margins": 7.865904331207275, "rewards/rejected": -8.489142417907715, "step": 53290 }, { "epoch": 0.64, "learning_rate": 1.7438683868989256e-06, "logits/chosen": -2.883617639541626, "logits/rejected": -2.357321262359619, "logps/chosen": -90.01753234863281, "logps/rejected": -861.1095581054688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.45856037735939026, "rewards/margins": 7.7663254737854, "rewards/rejected": -8.224885940551758, "step": 53300 }, { "epoch": 0.64, "learning_rate": 1.742872757547039e-06, "logits/chosen": -2.925915241241455, "logits/rejected": -2.3411216735839844, "logps/chosen": -120.684326171875, "logps/rejected": -1074.0101318359375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6849675178527832, "rewards/margins": 9.646048545837402, "rewards/rejected": -10.331016540527344, "step": 53310 }, { "epoch": 0.64, "learning_rate": 1.7418772603874666e-06, "logits/chosen": -2.8975372314453125, "logits/rejected": -2.3211870193481445, "logps/chosen": -113.89581298828125, "logps/rejected": -904.2264404296875, "loss": 0.0938, "rewards/accuracies": 1.0, "rewards/chosen": -0.6607559323310852, "rewards/margins": 8.002037048339844, "rewards/rejected": -8.662793159484863, "step": 53320 }, { "epoch": 0.64, "learning_rate": 1.7408818955940183e-06, "logits/chosen": -2.8772428035736084, "logits/rejected": -2.5595240592956543, "logps/chosen": -70.07313537597656, "logps/rejected": -817.93212890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.337678462266922, "rewards/margins": 7.478285312652588, "rewards/rejected": -7.815962791442871, "step": 53330 }, { "epoch": 0.64, "learning_rate": 1.7398866633404825e-06, "logits/chosen": -2.8637423515319824, "logits/rejected": -2.1800284385681152, "logps/chosen": -146.77772521972656, "logps/rejected": -961.1419677734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9543678164482117, "rewards/margins": 8.251933097839355, "rewards/rejected": -9.20630168914795, "step": 53340 }, { "epoch": 0.64, "learning_rate": 1.7388915638006255e-06, "logits/chosen": -2.90625, "logits/rejected": -2.416954755783081, "logps/chosen": -125.24580383300781, "logps/rejected": -964.4992065429688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7789262533187866, "rewards/margins": 8.45276927947998, "rewards/rejected": -9.231695175170898, "step": 53350 }, { "epoch": 0.64, "learning_rate": 1.7378965971481863e-06, "logits/chosen": -2.9231886863708496, "logits/rejected": -2.198489189147949, "logps/chosen": -141.8405303955078, "logps/rejected": -981.7169799804688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8407829999923706, "rewards/margins": 8.565507888793945, "rewards/rejected": -9.406291007995605, "step": 53360 }, { "epoch": 0.64, "learning_rate": 1.7369017635568848e-06, "logits/chosen": -2.9358866214752197, "logits/rejected": -2.218468189239502, "logps/chosen": -89.01310729980469, "logps/rejected": -956.7210083007812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.44108086824417114, "rewards/margins": 8.731127738952637, "rewards/rejected": -9.172209739685059, "step": 53370 }, { "epoch": 0.64, "learning_rate": 1.7359070632004165e-06, "logits/chosen": -2.861321210861206, "logits/rejected": -2.043586492538452, "logps/chosen": -142.05072021484375, "logps/rejected": -1108.9554443359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8786411285400391, "rewards/margins": 9.807435989379883, "rewards/rejected": -10.686078071594238, "step": 53380 }, { "epoch": 0.64, "learning_rate": 1.734912496252451e-06, "logits/chosen": -2.9097037315368652, "logits/rejected": -2.5597286224365234, "logps/chosen": -97.2780990600586, "logps/rejected": -892.4382934570312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.47573432326316833, "rewards/margins": 8.051340103149414, "rewards/rejected": -8.527074813842773, "step": 53390 }, { "epoch": 0.64, "learning_rate": 1.7339180628866405e-06, "logits/chosen": -2.9141440391540527, "logits/rejected": -2.3749916553497314, "logps/chosen": -101.32853698730469, "logps/rejected": -827.3943481445312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5199825167655945, "rewards/margins": 7.365118503570557, "rewards/rejected": -7.8850998878479, "step": 53400 }, { "epoch": 0.64, "learning_rate": 1.7329237632766082e-06, "logits/chosen": -2.925536632537842, "logits/rejected": -2.399183511734009, "logps/chosen": -118.20650482177734, "logps/rejected": -924.3820190429688, "loss": 0.091, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6973124146461487, "rewards/margins": 8.148914337158203, "rewards/rejected": -8.84622573852539, "step": 53410 }, { "epoch": 0.64, "learning_rate": 1.731929597595956e-06, "logits/chosen": -2.8779778480529785, "logits/rejected": -2.4300484657287598, "logps/chosen": -89.42540740966797, "logps/rejected": -904.4429931640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4604770541191101, "rewards/margins": 8.19670581817627, "rewards/rejected": -8.657182693481445, "step": 53420 }, { "epoch": 0.64, "learning_rate": 1.7309355660182632e-06, "logits/chosen": -2.8719482421875, "logits/rejected": -2.283806562423706, "logps/chosen": -111.44085693359375, "logps/rejected": -1023.6510620117188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6207446455955505, "rewards/margins": 9.229523658752441, "rewards/rejected": -9.85026741027832, "step": 53430 }, { "epoch": 0.64, "learning_rate": 1.7299416687170847e-06, "logits/chosen": -2.85625958442688, "logits/rejected": -2.3308463096618652, "logps/chosen": -103.1976547241211, "logps/rejected": -965.7536010742188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5276853442192078, "rewards/margins": 8.746832847595215, "rewards/rejected": -9.274518966674805, "step": 53440 }, { "epoch": 0.64, "learning_rate": 1.7289479058659517e-06, "logits/chosen": -2.9171345233917236, "logits/rejected": -2.350616931915283, "logps/chosen": -114.25399017333984, "logps/rejected": -939.1763916015625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6505813002586365, "rewards/margins": 8.346903800964355, "rewards/rejected": -8.997485160827637, "step": 53450 }, { "epoch": 0.64, "learning_rate": 1.7279542776383728e-06, "logits/chosen": -2.8542933464050293, "logits/rejected": -2.288020610809326, "logps/chosen": -103.8831787109375, "logps/rejected": -824.8665161132812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5696415901184082, "rewards/margins": 7.29917049407959, "rewards/rejected": -7.868811130523682, "step": 53460 }, { "epoch": 0.64, "learning_rate": 1.7269607842078335e-06, "logits/chosen": -2.89300537109375, "logits/rejected": -2.431601047515869, "logps/chosen": -93.39535522460938, "logps/rejected": -828.7623291015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5092582702636719, "rewards/margins": 7.389248847961426, "rewards/rejected": -7.898508548736572, "step": 53470 }, { "epoch": 0.64, "learning_rate": 1.7259674257477937e-06, "logits/chosen": -2.8694629669189453, "logits/rejected": -2.277505397796631, "logps/chosen": -143.60757446289062, "logps/rejected": -898.8333740234375, "loss": 0.104, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9105297923088074, "rewards/margins": 7.688779354095459, "rewards/rejected": -8.599308967590332, "step": 53480 }, { "epoch": 0.64, "learning_rate": 1.7249742024316912e-06, "logits/chosen": -2.8866021633148193, "logits/rejected": -2.1532700061798096, "logps/chosen": -116.167236328125, "logps/rejected": -985.7205200195312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6102768182754517, "rewards/margins": 8.856188774108887, "rewards/rejected": -9.466465950012207, "step": 53490 }, { "epoch": 0.64, "learning_rate": 1.7239811144329416e-06, "logits/chosen": -2.898515462875366, "logits/rejected": -2.4820942878723145, "logps/chosen": -79.29016876220703, "logps/rejected": -830.5515747070312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.36797896027565, "rewards/margins": 7.560781955718994, "rewards/rejected": -7.9287614822387695, "step": 53500 }, { "epoch": 0.64, "learning_rate": 1.722988161924933e-06, "logits/chosen": -2.918102741241455, "logits/rejected": -2.239203691482544, "logps/chosen": -135.23733520507812, "logps/rejected": -1100.31787109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8266007304191589, "rewards/margins": 9.756258010864258, "rewards/rejected": -10.58285903930664, "step": 53510 }, { "epoch": 0.64, "learning_rate": 1.7219953450810335e-06, "logits/chosen": -2.8850159645080566, "logits/rejected": -2.325025796890259, "logps/chosen": -107.407470703125, "logps/rejected": -1026.488525390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5906229615211487, "rewards/margins": 9.270620346069336, "rewards/rejected": -9.86124324798584, "step": 53520 }, { "epoch": 0.64, "learning_rate": 1.7210026640745857e-06, "logits/chosen": -2.869330883026123, "logits/rejected": -2.5406899452209473, "logps/chosen": -83.23368835449219, "logps/rejected": -850.5836791992188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.41552743315696716, "rewards/margins": 7.715590476989746, "rewards/rejected": -8.13111686706543, "step": 53530 }, { "epoch": 0.64, "learning_rate": 1.7200101190789105e-06, "logits/chosen": -2.842909336090088, "logits/rejected": -2.1630477905273438, "logps/chosen": -129.71450805664062, "logps/rejected": -1159.3499755859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7541902661323547, "rewards/margins": 10.423402786254883, "rewards/rejected": -11.177594184875488, "step": 53540 }, { "epoch": 0.64, "learning_rate": 1.7190177102673006e-06, "logits/chosen": -2.875977039337158, "logits/rejected": -2.1905677318573, "logps/chosen": -144.87545776367188, "logps/rejected": -1102.4210205078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8892127275466919, "rewards/margins": 9.722936630249023, "rewards/rejected": -10.612150192260742, "step": 53550 }, { "epoch": 0.64, "learning_rate": 1.71802543781303e-06, "logits/chosen": -2.890993118286133, "logits/rejected": -2.451500415802002, "logps/chosen": -96.31678009033203, "logps/rejected": -900.4208984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5175379514694214, "rewards/margins": 8.097166061401367, "rewards/rejected": -8.614704132080078, "step": 53560 }, { "epoch": 0.64, "learning_rate": 1.7170333018893465e-06, "logits/chosen": -2.835843801498413, "logits/rejected": -2.304163694381714, "logps/chosen": -128.10452270507812, "logps/rejected": -811.5745239257812, "loss": 0.11, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8098467588424683, "rewards/margins": 6.915935516357422, "rewards/rejected": -7.7257819175720215, "step": 53570 }, { "epoch": 0.64, "learning_rate": 1.7160413026694733e-06, "logits/chosen": -2.8779098987579346, "logits/rejected": -2.2598819732666016, "logps/chosen": -113.62089538574219, "logps/rejected": -965.0515747070312, "loss": 0.0938, "rewards/accuracies": 1.0, "rewards/chosen": -0.6038802862167358, "rewards/margins": 8.65124225616455, "rewards/rejected": -9.255122184753418, "step": 53580 }, { "epoch": 0.64, "learning_rate": 1.7150494403266115e-06, "logits/chosen": -2.8800113201141357, "logits/rejected": -2.278754949569702, "logps/chosen": -107.0156478881836, "logps/rejected": -981.1575927734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6104398965835571, "rewards/margins": 8.81943416595459, "rewards/rejected": -9.429874420166016, "step": 53590 }, { "epoch": 0.64, "learning_rate": 1.7140577150339377e-06, "logits/chosen": -2.9386494159698486, "logits/rejected": -2.410533905029297, "logps/chosen": -119.55538177490234, "logps/rejected": -930.0499877929688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6391147375106812, "rewards/margins": 8.271879196166992, "rewards/rejected": -8.910995483398438, "step": 53600 }, { "epoch": 0.64, "learning_rate": 1.7130661269646038e-06, "logits/chosen": -2.891963243484497, "logits/rejected": -2.4097065925598145, "logps/chosen": -132.69589233398438, "logps/rejected": -875.0787353515625, "loss": 0.1387, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8582590222358704, "rewards/margins": 7.504673004150391, "rewards/rejected": -8.362932205200195, "step": 53610 }, { "epoch": 0.64, "learning_rate": 1.7120746762917378e-06, "logits/chosen": -2.8971588611602783, "logits/rejected": -2.422577381134033, "logps/chosen": -106.164794921875, "logps/rejected": -931.3297119140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6097227931022644, "rewards/margins": 8.325788497924805, "rewards/rejected": -8.935511589050293, "step": 53620 }, { "epoch": 0.64, "learning_rate": 1.7110833631884455e-06, "logits/chosen": -2.8867926597595215, "logits/rejected": -2.2262825965881348, "logps/chosen": -137.31385803222656, "logps/rejected": -1107.506591796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8288006782531738, "rewards/margins": 9.821776390075684, "rewards/rejected": -10.650578498840332, "step": 53630 }, { "epoch": 0.64, "learning_rate": 1.7100921878278073e-06, "logits/chosen": -2.887423038482666, "logits/rejected": -2.2577996253967285, "logps/chosen": -128.09310913085938, "logps/rejected": -1069.72412109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7346900105476379, "rewards/margins": 9.562670707702637, "rewards/rejected": -10.297362327575684, "step": 53640 }, { "epoch": 0.64, "learning_rate": 1.7091011503828786e-06, "logits/chosen": -2.887141466140747, "logits/rejected": -2.358366012573242, "logps/chosen": -111.5020523071289, "logps/rejected": -994.8253173828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6576336026191711, "rewards/margins": 8.89081859588623, "rewards/rejected": -9.548452377319336, "step": 53650 }, { "epoch": 0.64, "learning_rate": 1.7081102510266928e-06, "logits/chosen": -2.873504638671875, "logits/rejected": -2.4929096698760986, "logps/chosen": -120.4576187133789, "logps/rejected": -787.5452880859375, "loss": 0.1501, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.772392213344574, "rewards/margins": 6.730129241943359, "rewards/rejected": -7.5025224685668945, "step": 53660 }, { "epoch": 0.64, "learning_rate": 1.707119489932258e-06, "logits/chosen": -2.8899364471435547, "logits/rejected": -2.3299448490142822, "logps/chosen": -86.83772277832031, "logps/rejected": -881.5147705078125, "loss": 0.0879, "rewards/accuracies": 1.0, "rewards/chosen": -0.43746131658554077, "rewards/margins": 8.004999160766602, "rewards/rejected": -8.442461013793945, "step": 53670 }, { "epoch": 0.64, "learning_rate": 1.7061288672725573e-06, "logits/chosen": -2.829092502593994, "logits/rejected": -2.5028584003448486, "logps/chosen": -103.96370697021484, "logps/rejected": -829.5975341796875, "loss": 0.044, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6134641766548157, "rewards/margins": 7.304111480712891, "rewards/rejected": -7.917575836181641, "step": 53680 }, { "epoch": 0.64, "learning_rate": 1.7051383832205521e-06, "logits/chosen": -2.8980953693389893, "logits/rejected": -2.2938153743743896, "logps/chosen": -129.3963165283203, "logps/rejected": -946.3226318359375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8013936281204224, "rewards/margins": 8.257095336914062, "rewards/rejected": -9.058488845825195, "step": 53690 }, { "epoch": 0.64, "learning_rate": 1.7041480379491777e-06, "logits/chosen": -2.863893985748291, "logits/rejected": -2.3891658782958984, "logps/chosen": -112.23933410644531, "logps/rejected": -911.68603515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6704100370407104, "rewards/margins": 8.046526908874512, "rewards/rejected": -8.716937065124512, "step": 53700 }, { "epoch": 0.64, "learning_rate": 1.703157831631345e-06, "logits/chosen": -2.8859710693359375, "logits/rejected": -2.4807090759277344, "logps/chosen": -117.2824935913086, "logps/rejected": -750.309326171875, "loss": 0.1364, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7595804333686829, "rewards/margins": 6.369009494781494, "rewards/rejected": -7.1285905838012695, "step": 53710 }, { "epoch": 0.64, "learning_rate": 1.7021677644399417e-06, "logits/chosen": -2.8949637413024902, "logits/rejected": -2.1428027153015137, "logps/chosen": -136.4640655517578, "logps/rejected": -1013.9142456054688, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.8030039072036743, "rewards/margins": 8.930952072143555, "rewards/rejected": -9.733956336975098, "step": 53720 }, { "epoch": 0.64, "learning_rate": 1.701177836547831e-06, "logits/chosen": -2.8987185955047607, "logits/rejected": -2.383476495742798, "logps/chosen": -141.91819763183594, "logps/rejected": -929.5634765625, "loss": 0.1509, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9260246157646179, "rewards/margins": 7.981305122375488, "rewards/rejected": -8.907330513000488, "step": 53730 }, { "epoch": 0.64, "learning_rate": 1.7001880481278519e-06, "logits/chosen": -2.876274585723877, "logits/rejected": -2.3840746879577637, "logps/chosen": -113.70967864990234, "logps/rejected": -963.3961181640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6460782885551453, "rewards/margins": 8.596292495727539, "rewards/rejected": -9.24237060546875, "step": 53740 }, { "epoch": 0.64, "learning_rate": 1.6991983993528178e-06, "logits/chosen": -2.8757805824279785, "logits/rejected": -2.293027400970459, "logps/chosen": -127.78743743896484, "logps/rejected": -1033.028564453125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.807390570640564, "rewards/margins": 9.122503280639648, "rewards/rejected": -9.929893493652344, "step": 53750 }, { "epoch": 0.64, "learning_rate": 1.6982088903955186e-06, "logits/chosen": -2.862215757369995, "logits/rejected": -2.217710494995117, "logps/chosen": -126.19700622558594, "logps/rejected": -923.5900268554688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7380377650260925, "rewards/margins": 8.096539497375488, "rewards/rejected": -8.834575653076172, "step": 53760 }, { "epoch": 0.64, "learning_rate": 1.6972195214287213e-06, "logits/chosen": -2.885772943496704, "logits/rejected": -2.3229360580444336, "logps/chosen": -104.24942779541016, "logps/rejected": -830.6472778320312, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6049968004226685, "rewards/margins": 7.319639682769775, "rewards/rejected": -7.9246368408203125, "step": 53770 }, { "epoch": 0.64, "learning_rate": 1.696230292625165e-06, "logits/chosen": -2.861156940460205, "logits/rejected": -2.330064296722412, "logps/chosen": -97.14405822753906, "logps/rejected": -959.2078857421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5155889391899109, "rewards/margins": 8.686746597290039, "rewards/rejected": -9.2023344039917, "step": 53780 }, { "epoch": 0.64, "learning_rate": 1.6952412041575675e-06, "logits/chosen": -2.8496830463409424, "logits/rejected": -2.3648133277893066, "logps/chosen": -110.57540130615234, "logps/rejected": -899.7392578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6438679099082947, "rewards/margins": 7.957705020904541, "rewards/rejected": -8.60157299041748, "step": 53790 }, { "epoch": 0.64, "learning_rate": 1.694252256198621e-06, "logits/chosen": -2.9131457805633545, "logits/rejected": -2.308297872543335, "logps/chosen": -104.87581634521484, "logps/rejected": -971.1022338867188, "loss": 0.1456, "rewards/accuracies": 1.0, "rewards/chosen": -0.5584240555763245, "rewards/margins": 8.77021312713623, "rewards/rejected": -9.328638076782227, "step": 53800 }, { "epoch": 0.64, "learning_rate": 1.6932634489209915e-06, "logits/chosen": -2.840268611907959, "logits/rejected": -2.2281038761138916, "logps/chosen": -95.4429702758789, "logps/rejected": -965.8638916015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.47265368700027466, "rewards/margins": 8.795931816101074, "rewards/rejected": -9.268586158752441, "step": 53810 }, { "epoch": 0.64, "learning_rate": 1.6922747824973243e-06, "logits/chosen": -2.9065916538238525, "logits/rejected": -2.4560694694519043, "logps/chosen": -83.42608642578125, "logps/rejected": -835.68896484375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4085124135017395, "rewards/margins": 7.572176933288574, "rewards/rejected": -7.980690002441406, "step": 53820 }, { "epoch": 0.64, "learning_rate": 1.6912862571002364e-06, "logits/chosen": -2.9562575817108154, "logits/rejected": -2.6031999588012695, "logps/chosen": -80.073486328125, "logps/rejected": -879.60546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.38664665818214417, "rewards/margins": 8.025152206420898, "rewards/rejected": -8.411798477172852, "step": 53830 }, { "epoch": 0.64, "learning_rate": 1.6902978729023218e-06, "logits/chosen": -2.9195449352264404, "logits/rejected": -2.6912951469421387, "logps/chosen": -64.11915588378906, "logps/rejected": -745.9189453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.27193981409072876, "rewards/margins": 6.831411838531494, "rewards/rejected": -7.103350639343262, "step": 53840 }, { "epoch": 0.64, "learning_rate": 1.6893096300761496e-06, "logits/chosen": -2.849152088165283, "logits/rejected": -2.2754690647125244, "logps/chosen": -141.03590393066406, "logps/rejected": -1019.6829223632812, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.9476479291915894, "rewards/margins": 8.851531028747559, "rewards/rejected": -9.799180030822754, "step": 53850 }, { "epoch": 0.64, "learning_rate": 1.6883215287942655e-06, "logits/chosen": -2.9286396503448486, "logits/rejected": -2.5234594345092773, "logps/chosen": -94.54572296142578, "logps/rejected": -886.75634765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5148703455924988, "rewards/margins": 7.966236114501953, "rewards/rejected": -8.481107711791992, "step": 53860 }, { "epoch": 0.64, "learning_rate": 1.687333569229187e-06, "logits/chosen": -2.9486048221588135, "logits/rejected": -2.698538303375244, "logps/chosen": -74.52883911132812, "logps/rejected": -792.3203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3811787962913513, "rewards/margins": 7.166802406311035, "rewards/rejected": -7.547981262207031, "step": 53870 }, { "epoch": 0.64, "learning_rate": 1.6863457515534104e-06, "logits/chosen": -2.859266996383667, "logits/rejected": -2.2898309230804443, "logps/chosen": -118.89359283447266, "logps/rejected": -1001.49267578125, "loss": 0.0229, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6529709100723267, "rewards/margins": 8.972698211669922, "rewards/rejected": -9.625669479370117, "step": 53880 }, { "epoch": 0.65, "learning_rate": 1.6853580759394066e-06, "logits/chosen": -2.8792710304260254, "logits/rejected": -2.4250669479370117, "logps/chosen": -92.33003997802734, "logps/rejected": -873.4158935546875, "loss": 0.1274, "rewards/accuracies": 1.0, "rewards/chosen": -0.4748927056789398, "rewards/margins": 7.8785529136657715, "rewards/rejected": -8.353446006774902, "step": 53890 }, { "epoch": 0.65, "learning_rate": 1.6843705425596194e-06, "logits/chosen": -2.9066734313964844, "logits/rejected": -2.3686892986297607, "logps/chosen": -122.3363265991211, "logps/rejected": -891.9949951171875, "loss": 0.1103, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7446410059928894, "rewards/margins": 7.777314186096191, "rewards/rejected": -8.521955490112305, "step": 53900 }, { "epoch": 0.65, "learning_rate": 1.6833831515864702e-06, "logits/chosen": -2.8524911403656006, "logits/rejected": -2.3394992351531982, "logps/chosen": -142.32327270507812, "logps/rejected": -842.9774169921875, "loss": 0.1496, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9469278454780579, "rewards/margins": 7.106318473815918, "rewards/rejected": -8.05324649810791, "step": 53910 }, { "epoch": 0.65, "learning_rate": 1.682395903192355e-06, "logits/chosen": -2.8780503273010254, "logits/rejected": -2.328453779220581, "logps/chosen": -98.0611572265625, "logps/rejected": -951.8179931640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.4940149188041687, "rewards/margins": 8.63885498046875, "rewards/rejected": -9.1328706741333, "step": 53920 }, { "epoch": 0.65, "learning_rate": 1.681408797549644e-06, "logits/chosen": -2.925544261932373, "logits/rejected": -2.678889274597168, "logps/chosen": -113.18485260009766, "logps/rejected": -821.0242919921875, "loss": 0.1221, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7309142351150513, "rewards/margins": 7.101840019226074, "rewards/rejected": -7.832754611968994, "step": 53930 }, { "epoch": 0.65, "learning_rate": 1.6804218348306833e-06, "logits/chosen": -2.919318437576294, "logits/rejected": -2.580491065979004, "logps/chosen": -82.50054931640625, "logps/rejected": -850.6441650390625, "loss": 0.1037, "rewards/accuracies": 1.0, "rewards/chosen": -0.4314201772212982, "rewards/margins": 7.696225166320801, "rewards/rejected": -8.127645492553711, "step": 53940 }, { "epoch": 0.65, "learning_rate": 1.679435015207794e-06, "logits/chosen": -2.9146080017089844, "logits/rejected": -2.3568851947784424, "logps/chosen": -98.0143051147461, "logps/rejected": -940.7951049804688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5491108894348145, "rewards/margins": 8.468599319458008, "rewards/rejected": -9.017709732055664, "step": 53950 }, { "epoch": 0.65, "learning_rate": 1.6784483388532723e-06, "logits/chosen": -2.9463140964508057, "logits/rejected": -2.5749926567077637, "logps/chosen": -105.47444915771484, "logps/rejected": -853.9605712890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6301641464233398, "rewards/margins": 7.517856597900391, "rewards/rejected": -8.148019790649414, "step": 53960 }, { "epoch": 0.65, "learning_rate": 1.6774618059393888e-06, "logits/chosen": -2.899244785308838, "logits/rejected": -2.3114919662475586, "logps/chosen": -110.99169921875, "logps/rejected": -1037.4266357421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6539931893348694, "rewards/margins": 9.317296981811523, "rewards/rejected": -9.971290588378906, "step": 53970 }, { "epoch": 0.65, "learning_rate": 1.6764754166383892e-06, "logits/chosen": -2.877277374267578, "logits/rejected": -2.4743456840515137, "logps/chosen": -100.84391784667969, "logps/rejected": -853.52294921875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6091575026512146, "rewards/margins": 7.550845146179199, "rewards/rejected": -8.160003662109375, "step": 53980 }, { "epoch": 0.65, "learning_rate": 1.6754891711224952e-06, "logits/chosen": -2.8734195232391357, "logits/rejected": -2.237879514694214, "logps/chosen": -101.8714370727539, "logps/rejected": -1016.8439331054688, "loss": 0.3056, "rewards/accuracies": 1.0, "rewards/chosen": -0.5310796499252319, "rewards/margins": 9.22875690460205, "rewards/rejected": -9.759836196899414, "step": 53990 }, { "epoch": 0.65, "learning_rate": 1.6745030695639014e-06, "logits/chosen": -2.9544646739959717, "logits/rejected": -2.516822338104248, "logps/chosen": -84.93732452392578, "logps/rejected": -863.91015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.40461963415145874, "rewards/margins": 7.847738742828369, "rewards/rejected": -8.252359390258789, "step": 54000 }, { "epoch": 0.65, "eval_logits/chosen": -2.8945693969726562, "eval_logits/rejected": -1.8327827453613281, "eval_logps/chosen": -241.9466094970703, "eval_logps/rejected": -1121.84228515625, "eval_loss": 0.0014494138304144144, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.8076633214950562, "eval_rewards/margins": 8.94350528717041, "eval_rewards/rejected": -10.75117015838623, "eval_runtime": 1.2163, "eval_samples_per_second": 4.111, "eval_steps_per_second": 2.466, "step": 54000 }, { "epoch": 0.65, "learning_rate": 1.6735171121347792e-06, "logits/chosen": -2.911400318145752, "logits/rejected": -2.205448627471924, "logps/chosen": -128.72695922851562, "logps/rejected": -1088.244873046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.757562518119812, "rewards/margins": 9.713805198669434, "rewards/rejected": -10.471366882324219, "step": 54010 }, { "epoch": 0.65, "learning_rate": 1.672531299007274e-06, "logits/chosen": -2.909355640411377, "logits/rejected": -2.4884493350982666, "logps/chosen": -91.61376953125, "logps/rejected": -926.4781494140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.45223531126976013, "rewards/margins": 8.420967102050781, "rewards/rejected": -8.873202323913574, "step": 54020 }, { "epoch": 0.65, "learning_rate": 1.6715456303535055e-06, "logits/chosen": -2.8975582122802734, "logits/rejected": -2.4159436225891113, "logps/chosen": -83.84115600585938, "logps/rejected": -780.6837158203125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.38711896538734436, "rewards/margins": 7.042217254638672, "rewards/rejected": -7.429335117340088, "step": 54030 }, { "epoch": 0.65, "learning_rate": 1.6705601063455683e-06, "logits/chosen": -2.8621063232421875, "logits/rejected": -2.1330859661102295, "logps/chosen": -127.52799224853516, "logps/rejected": -958.7799682617188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6947711706161499, "rewards/margins": 8.497274398803711, "rewards/rejected": -9.192044258117676, "step": 54040 }, { "epoch": 0.65, "learning_rate": 1.6695747271555337e-06, "logits/chosen": -2.8674793243408203, "logits/rejected": -2.312936544418335, "logps/chosen": -117.19889068603516, "logps/rejected": -934.9772338867188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6844289302825928, "rewards/margins": 8.266607284545898, "rewards/rejected": -8.95103645324707, "step": 54050 }, { "epoch": 0.65, "learning_rate": 1.6685894929554455e-06, "logits/chosen": -2.8799233436584473, "logits/rejected": -2.3138110637664795, "logps/chosen": -102.28926086425781, "logps/rejected": -919.4597778320312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5237906575202942, "rewards/margins": 8.290874481201172, "rewards/rejected": -8.814664840698242, "step": 54060 }, { "epoch": 0.65, "learning_rate": 1.6676044039173222e-06, "logits/chosen": -2.9632954597473145, "logits/rejected": -2.7324271202087402, "logps/chosen": -58.649742126464844, "logps/rejected": -818.9517211914062, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.20273089408874512, "rewards/margins": 7.611371040344238, "rewards/rejected": -7.814101219177246, "step": 54070 }, { "epoch": 0.65, "learning_rate": 1.666619460213158e-06, "logits/chosen": -2.8662233352661133, "logits/rejected": -2.5786073207855225, "logps/chosen": -72.41898345947266, "logps/rejected": -722.984375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.3231988549232483, "rewards/margins": 6.54950475692749, "rewards/rejected": -6.872703552246094, "step": 54080 }, { "epoch": 0.65, "learning_rate": 1.6656346620149221e-06, "logits/chosen": -2.898503541946411, "logits/rejected": -2.072465419769287, "logps/chosen": -140.332763671875, "logps/rejected": -1104.2236328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8334685564041138, "rewards/margins": 9.780850410461426, "rewards/rejected": -10.61431884765625, "step": 54090 }, { "epoch": 0.65, "learning_rate": 1.6646500094945564e-06, "logits/chosen": -2.839022397994995, "logits/rejected": -2.3619332313537598, "logps/chosen": -111.40437316894531, "logps/rejected": -960.1384887695312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6340798139572144, "rewards/margins": 8.57124137878418, "rewards/rejected": -9.205322265625, "step": 54100 }, { "epoch": 0.65, "learning_rate": 1.6636655028239789e-06, "logits/chosen": -2.8689639568328857, "logits/rejected": -2.186619520187378, "logps/chosen": -123.18019104003906, "logps/rejected": -1058.99072265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6047551035881042, "rewards/margins": 9.579996109008789, "rewards/rejected": -10.1847505569458, "step": 54110 }, { "epoch": 0.65, "learning_rate": 1.662681142175082e-06, "logits/chosen": -2.895397901535034, "logits/rejected": -2.3131275177001953, "logps/chosen": -103.7718276977539, "logps/rejected": -942.7296752929688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5263477563858032, "rewards/margins": 8.508100509643555, "rewards/rejected": -9.034448623657227, "step": 54120 }, { "epoch": 0.65, "learning_rate": 1.661696927719732e-06, "logits/chosen": -2.9360527992248535, "logits/rejected": -2.389456272125244, "logps/chosen": -96.14723205566406, "logps/rejected": -964.4739990234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.47963494062423706, "rewards/margins": 8.759954452514648, "rewards/rejected": -9.239587783813477, "step": 54130 }, { "epoch": 0.65, "learning_rate": 1.66071285962977e-06, "logits/chosen": -2.9035937786102295, "logits/rejected": -2.3001046180725098, "logps/chosen": -101.08444213867188, "logps/rejected": -896.2529296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5312325954437256, "rewards/margins": 8.04206371307373, "rewards/rejected": -8.573296546936035, "step": 54140 }, { "epoch": 0.65, "learning_rate": 1.6597289380770117e-06, "logits/chosen": -2.903247117996216, "logits/rejected": -2.438410520553589, "logps/chosen": -95.90809631347656, "logps/rejected": -904.78662109375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.5227338671684265, "rewards/margins": 8.135549545288086, "rewards/rejected": -8.658282279968262, "step": 54150 }, { "epoch": 0.65, "learning_rate": 1.6587451632332476e-06, "logits/chosen": -2.8435935974121094, "logits/rejected": -2.114680051803589, "logps/chosen": -127.26314544677734, "logps/rejected": -1019.0480346679688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7304781079292297, "rewards/margins": 9.055727005004883, "rewards/rejected": -9.786205291748047, "step": 54160 }, { "epoch": 0.65, "learning_rate": 1.657761535270241e-06, "logits/chosen": -2.8758010864257812, "logits/rejected": -2.490739345550537, "logps/chosen": -117.38714599609375, "logps/rejected": -803.9781494140625, "loss": 0.1499, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7570726871490479, "rewards/margins": 6.897891998291016, "rewards/rejected": -7.654965400695801, "step": 54170 }, { "epoch": 0.65, "learning_rate": 1.656778054359731e-06, "logits/chosen": -2.8551487922668457, "logits/rejected": -2.3470041751861572, "logps/chosen": -102.40213775634766, "logps/rejected": -868.9054565429688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5595313310623169, "rewards/margins": 7.733089447021484, "rewards/rejected": -8.292620658874512, "step": 54180 }, { "epoch": 0.65, "learning_rate": 1.6557947206734314e-06, "logits/chosen": -2.9271061420440674, "logits/rejected": -2.1996047496795654, "logps/chosen": -123.89814758300781, "logps/rejected": -1030.502685546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7352510690689087, "rewards/margins": 9.171560287475586, "rewards/rejected": -9.90681266784668, "step": 54190 }, { "epoch": 0.65, "learning_rate": 1.6548115343830284e-06, "logits/chosen": -2.8467535972595215, "logits/rejected": -2.525362968444824, "logps/chosen": -69.21685028076172, "logps/rejected": -814.5776977539062, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.2988852858543396, "rewards/margins": 7.475083827972412, "rewards/rejected": -7.7739691734313965, "step": 54200 }, { "epoch": 0.65, "learning_rate": 1.6538284956601841e-06, "logits/chosen": -2.9057059288024902, "logits/rejected": -2.3866934776306152, "logps/chosen": -132.025146484375, "logps/rejected": -944.1282348632812, "loss": 0.1113, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8467461466789246, "rewards/margins": 8.217167854309082, "rewards/rejected": -9.063913345336914, "step": 54210 }, { "epoch": 0.65, "learning_rate": 1.6528456046765344e-06, "logits/chosen": -2.890746593475342, "logits/rejected": -2.121303081512451, "logps/chosen": -131.7447967529297, "logps/rejected": -1028.6734619140625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.797582745552063, "rewards/margins": 9.076190948486328, "rewards/rejected": -9.873773574829102, "step": 54220 }, { "epoch": 0.65, "learning_rate": 1.6518628616036883e-06, "logits/chosen": -2.865417242050171, "logits/rejected": -2.2556865215301514, "logps/chosen": -118.08097076416016, "logps/rejected": -936.8112182617188, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6980189085006714, "rewards/margins": 8.264378547668457, "rewards/rejected": -8.962396621704102, "step": 54230 }, { "epoch": 0.65, "learning_rate": 1.6508802666132323e-06, "logits/chosen": -2.8639349937438965, "logits/rejected": -2.416452646255493, "logps/chosen": -110.00636291503906, "logps/rejected": -917.6727294921875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6557814478874207, "rewards/margins": 8.127969741821289, "rewards/rejected": -8.783750534057617, "step": 54240 }, { "epoch": 0.65, "learning_rate": 1.6498978198767228e-06, "logits/chosen": -2.8859095573425293, "logits/rejected": -2.2880091667175293, "logps/chosen": -131.16897583007812, "logps/rejected": -872.4022216796875, "loss": 0.0889, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.824397087097168, "rewards/margins": 7.5196533203125, "rewards/rejected": -8.344050407409668, "step": 54250 }, { "epoch": 0.65, "learning_rate": 1.6489155215656932e-06, "logits/chosen": -2.907623529434204, "logits/rejected": -2.348435401916504, "logps/chosen": -117.8276596069336, "logps/rejected": -973.96044921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6598270535469055, "rewards/margins": 8.69044303894043, "rewards/rejected": -9.350271224975586, "step": 54260 }, { "epoch": 0.65, "learning_rate": 1.6479333718516499e-06, "logits/chosen": -2.927074432373047, "logits/rejected": -2.5605907440185547, "logps/chosen": -119.66835021972656, "logps/rejected": -848.2839965820312, "loss": 0.096, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7824357748031616, "rewards/margins": 7.314755916595459, "rewards/rejected": -8.09719181060791, "step": 54270 }, { "epoch": 0.65, "learning_rate": 1.6469513709060731e-06, "logits/chosen": -2.834519863128662, "logits/rejected": -2.3994078636169434, "logps/chosen": -85.3870620727539, "logps/rejected": -939.5255737304688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4269193112850189, "rewards/margins": 8.582670211791992, "rewards/rejected": -9.009591102600098, "step": 54280 }, { "epoch": 0.65, "learning_rate": 1.6459695189004174e-06, "logits/chosen": -2.9225118160247803, "logits/rejected": -2.5447752475738525, "logps/chosen": -101.30070495605469, "logps/rejected": -901.3888549804688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5674676895141602, "rewards/margins": 8.067171096801758, "rewards/rejected": -8.634638786315918, "step": 54290 }, { "epoch": 0.65, "learning_rate": 1.644987816006112e-06, "logits/chosen": -2.914487600326538, "logits/rejected": -2.4420247077941895, "logps/chosen": -104.9737777709961, "logps/rejected": -931.3772583007812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5492773652076721, "rewards/margins": 8.370630264282227, "rewards/rejected": -8.919906616210938, "step": 54300 }, { "epoch": 0.65, "learning_rate": 1.6440062623945597e-06, "logits/chosen": -2.873843193054199, "logits/rejected": -2.2327377796173096, "logps/chosen": -119.19444274902344, "logps/rejected": -988.9006958007812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7079266309738159, "rewards/margins": 8.790277481079102, "rewards/rejected": -9.498205184936523, "step": 54310 }, { "epoch": 0.65, "learning_rate": 1.6430248582371361e-06, "logits/chosen": -2.905864715576172, "logits/rejected": -2.560940742492676, "logps/chosen": -85.37391662597656, "logps/rejected": -802.7758178710938, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.4351845681667328, "rewards/margins": 7.210136413574219, "rewards/rejected": -7.645320892333984, "step": 54320 }, { "epoch": 0.65, "learning_rate": 1.6420436037051918e-06, "logits/chosen": -2.90141224861145, "logits/rejected": -2.6232054233551025, "logps/chosen": -69.5321273803711, "logps/rejected": -772.5632934570312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.29424139857292175, "rewards/margins": 7.0618486404418945, "rewards/rejected": -7.356091499328613, "step": 54330 }, { "epoch": 0.65, "learning_rate": 1.6410624989700522e-06, "logits/chosen": -2.901085376739502, "logits/rejected": -2.445356845855713, "logps/chosen": -96.81021118164062, "logps/rejected": -864.3306884765625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5440059304237366, "rewards/margins": 7.700705528259277, "rewards/rejected": -8.244711875915527, "step": 54340 }, { "epoch": 0.65, "learning_rate": 1.640081544203014e-06, "logits/chosen": -2.903207302093506, "logits/rejected": -2.479152202606201, "logps/chosen": -86.84132385253906, "logps/rejected": -914.0125732421875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.40450015664100647, "rewards/margins": 8.364189147949219, "rewards/rejected": -8.768689155578613, "step": 54350 }, { "epoch": 0.65, "learning_rate": 1.63910073957535e-06, "logits/chosen": -2.8802247047424316, "logits/rejected": -2.3483312129974365, "logps/chosen": -115.56624603271484, "logps/rejected": -986.9054565429688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6417708396911621, "rewards/margins": 8.83180046081543, "rewards/rejected": -9.473572731018066, "step": 54360 }, { "epoch": 0.65, "learning_rate": 1.6381200852583056e-06, "logits/chosen": -2.860842227935791, "logits/rejected": -2.2943031787872314, "logps/chosen": -133.15847778320312, "logps/rejected": -848.0274658203125, "loss": 0.1046, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8179129362106323, "rewards/margins": 7.284708499908447, "rewards/rejected": -8.102622032165527, "step": 54370 }, { "epoch": 0.65, "learning_rate": 1.6371395814231017e-06, "logits/chosen": -2.918372631072998, "logits/rejected": -2.3786256313323975, "logps/chosen": -104.24813079833984, "logps/rejected": -917.1103515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5774274468421936, "rewards/margins": 8.205747604370117, "rewards/rejected": -8.78317642211914, "step": 54380 }, { "epoch": 0.65, "learning_rate": 1.6361592282409294e-06, "logits/chosen": -2.87611722946167, "logits/rejected": -2.314058780670166, "logps/chosen": -124.9846420288086, "logps/rejected": -1019.4913330078125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6578115224838257, "rewards/margins": 9.14693832397461, "rewards/rejected": -9.804749488830566, "step": 54390 }, { "epoch": 0.65, "learning_rate": 1.6351790258829563e-06, "logits/chosen": -2.8445191383361816, "logits/rejected": -2.328209161758423, "logps/chosen": -98.96976470947266, "logps/rejected": -961.47021484375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5604631900787354, "rewards/margins": 8.677804946899414, "rewards/rejected": -9.238266944885254, "step": 54400 }, { "epoch": 0.65, "learning_rate": 1.6341989745203246e-06, "logits/chosen": -2.8800995349884033, "logits/rejected": -2.27929425239563, "logps/chosen": -122.8438720703125, "logps/rejected": -1022.0530395507812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7309716939926147, "rewards/margins": 9.097854614257812, "rewards/rejected": -9.828825950622559, "step": 54410 }, { "epoch": 0.65, "learning_rate": 1.6332190743241464e-06, "logits/chosen": -2.864269256591797, "logits/rejected": -2.5029826164245605, "logps/chosen": -78.84872436523438, "logps/rejected": -806.5872802734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3979227840900421, "rewards/margins": 7.285472869873047, "rewards/rejected": -7.6833953857421875, "step": 54420 }, { "epoch": 0.65, "learning_rate": 1.6322393254655103e-06, "logits/chosen": -2.9047739505767822, "logits/rejected": -2.465578079223633, "logps/chosen": -97.64163208007812, "logps/rejected": -867.2130737304688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5365892648696899, "rewards/margins": 7.7402167320251465, "rewards/rejected": -8.276806831359863, "step": 54430 }, { "epoch": 0.65, "learning_rate": 1.6312597281154785e-06, "logits/chosen": -2.900617837905884, "logits/rejected": -2.2826826572418213, "logps/chosen": -115.85182189941406, "logps/rejected": -1052.630615234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6479917168617249, "rewards/margins": 9.475259780883789, "rewards/rejected": -10.123252868652344, "step": 54440 }, { "epoch": 0.65, "learning_rate": 1.6302802824450848e-06, "logits/chosen": -2.9327142238616943, "logits/rejected": -2.434494733810425, "logps/chosen": -108.14241790771484, "logps/rejected": -954.9431762695312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5705382227897644, "rewards/margins": 8.57569694519043, "rewards/rejected": -9.146234512329102, "step": 54450 }, { "epoch": 0.65, "learning_rate": 1.6293009886253386e-06, "logits/chosen": -2.877506732940674, "logits/rejected": -2.458920955657959, "logps/chosen": -79.39820098876953, "logps/rejected": -782.3880004882812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3930301070213318, "rewards/margins": 7.053807735443115, "rewards/rejected": -7.446837425231934, "step": 54460 }, { "epoch": 0.65, "learning_rate": 1.628321846827221e-06, "logits/chosen": -2.90030837059021, "logits/rejected": -2.391829013824463, "logps/chosen": -108.51649475097656, "logps/rejected": -911.0, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.633763313293457, "rewards/margins": 8.08607292175293, "rewards/rejected": -8.719836235046387, "step": 54470 }, { "epoch": 0.65, "learning_rate": 1.6273428572216886e-06, "logits/chosen": -2.887054204940796, "logits/rejected": -2.314774513244629, "logps/chosen": -113.37980651855469, "logps/rejected": -961.7174682617188, "loss": 0.0854, "rewards/accuracies": 1.0, "rewards/chosen": -0.6248073577880859, "rewards/margins": 8.60501480102539, "rewards/rejected": -9.229822158813477, "step": 54480 }, { "epoch": 0.65, "learning_rate": 1.6263640199796692e-06, "logits/chosen": -2.83671498298645, "logits/rejected": -2.42148494720459, "logps/chosen": -89.6988525390625, "logps/rejected": -911.2584228515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5007210969924927, "rewards/margins": 8.2450590133667, "rewards/rejected": -8.745779991149902, "step": 54490 }, { "epoch": 0.65, "learning_rate": 1.6253853352720655e-06, "logits/chosen": -2.9358267784118652, "logits/rejected": -2.6228549480438232, "logps/chosen": -85.63005065917969, "logps/rejected": -798.4273681640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4421214163303375, "rewards/margins": 7.165747165679932, "rewards/rejected": -7.6078691482543945, "step": 54500 }, { "epoch": 0.65, "learning_rate": 1.6244068032697538e-06, "logits/chosen": -2.9391367435455322, "logits/rejected": -2.492586851119995, "logps/chosen": -81.66812133789062, "logps/rejected": -802.489990234375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.42153677344322205, "rewards/margins": 7.219714164733887, "rewards/rejected": -7.641251564025879, "step": 54510 }, { "epoch": 0.65, "learning_rate": 1.6234284241435816e-06, "logits/chosen": -2.8708925247192383, "logits/rejected": -2.3656458854675293, "logps/chosen": -105.71331787109375, "logps/rejected": -905.68017578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5905031561851501, "rewards/margins": 8.088122367858887, "rewards/rejected": -8.67862606048584, "step": 54520 }, { "epoch": 0.65, "learning_rate": 1.6224501980643723e-06, "logits/chosen": -2.919774293899536, "logits/rejected": -2.599438428878784, "logps/chosen": -72.7979507446289, "logps/rejected": -743.82421875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.32645002007484436, "rewards/margins": 6.744396209716797, "rewards/rejected": -7.070845603942871, "step": 54530 }, { "epoch": 0.65, "learning_rate": 1.6214721252029214e-06, "logits/chosen": -2.849047899246216, "logits/rejected": -2.259366512298584, "logps/chosen": -108.63460540771484, "logps/rejected": -950.828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6149128675460815, "rewards/margins": 8.49819564819336, "rewards/rejected": -9.113107681274414, "step": 54540 }, { "epoch": 0.65, "learning_rate": 1.6204942057299972e-06, "logits/chosen": -2.8893094062805176, "logits/rejected": -2.3992247581481934, "logps/chosen": -103.1790542602539, "logps/rejected": -823.71044921875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5801056623458862, "rewards/margins": 7.276769161224365, "rewards/rejected": -7.856873989105225, "step": 54550 }, { "epoch": 0.65, "learning_rate": 1.6195164398163415e-06, "logits/chosen": -2.8705646991729736, "logits/rejected": -2.315114974975586, "logps/chosen": -145.4158172607422, "logps/rejected": -870.3279418945312, "loss": 0.0854, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0106531381607056, "rewards/margins": 7.301042079925537, "rewards/rejected": -8.311694145202637, "step": 54560 }, { "epoch": 0.65, "learning_rate": 1.6185388276326703e-06, "logits/chosen": -2.916771411895752, "logits/rejected": -2.31329607963562, "logps/chosen": -111.13261413574219, "logps/rejected": -941.1423950195312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6123875379562378, "rewards/margins": 8.408872604370117, "rewards/rejected": -9.021261215209961, "step": 54570 }, { "epoch": 0.65, "learning_rate": 1.6175613693496719e-06, "logits/chosen": -2.9084184169769287, "logits/rejected": -2.5437211990356445, "logps/chosen": -91.67097473144531, "logps/rejected": -795.0294799804688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4363357126712799, "rewards/margins": 7.129755973815918, "rewards/rejected": -7.566092014312744, "step": 54580 }, { "epoch": 0.65, "learning_rate": 1.616584065138007e-06, "logits/chosen": -2.8595447540283203, "logits/rejected": -2.4501843452453613, "logps/chosen": -85.18009948730469, "logps/rejected": -889.1888427734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.45713862776756287, "rewards/margins": 8.049474716186523, "rewards/rejected": -8.506613731384277, "step": 54590 }, { "epoch": 0.65, "learning_rate": 1.6156069151683112e-06, "logits/chosen": -2.835805654525757, "logits/rejected": -2.440427303314209, "logps/chosen": -85.23829650878906, "logps/rejected": -842.369140625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4036424160003662, "rewards/margins": 7.656449794769287, "rewards/rejected": -8.060091972351074, "step": 54600 }, { "epoch": 0.65, "learning_rate": 1.614629919611192e-06, "logits/chosen": -2.9151532649993896, "logits/rejected": -2.6602931022644043, "logps/chosen": -73.11009979248047, "logps/rejected": -780.9735107421875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.36381271481513977, "rewards/margins": 7.081160068511963, "rewards/rejected": -7.444971561431885, "step": 54610 }, { "epoch": 0.65, "learning_rate": 1.613653078637229e-06, "logits/chosen": -2.8829784393310547, "logits/rejected": -2.303501844406128, "logps/chosen": -128.64894104003906, "logps/rejected": -847.3562622070312, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.7903726100921631, "rewards/margins": 7.293761253356934, "rewards/rejected": -8.084134101867676, "step": 54620 }, { "epoch": 0.65, "learning_rate": 1.612676392416977e-06, "logits/chosen": -2.889132022857666, "logits/rejected": -2.462435245513916, "logps/chosen": -85.19473266601562, "logps/rejected": -887.3527221679688, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.4667053818702698, "rewards/margins": 8.033437728881836, "rewards/rejected": -8.500144004821777, "step": 54630 }, { "epoch": 0.65, "learning_rate": 1.611699861120963e-06, "logits/chosen": -2.833299160003662, "logits/rejected": -2.3421924114227295, "logps/chosen": -113.9303207397461, "logps/rejected": -958.7427978515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6780806183815002, "rewards/margins": 8.528343200683594, "rewards/rejected": -9.206424713134766, "step": 54640 }, { "epoch": 0.65, "learning_rate": 1.6107234849196846e-06, "logits/chosen": -2.8964972496032715, "logits/rejected": -2.411240816116333, "logps/chosen": -113.40531921386719, "logps/rejected": -926.4191284179688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6171292662620544, "rewards/margins": 8.242718696594238, "rewards/rejected": -8.859848022460938, "step": 54650 }, { "epoch": 0.65, "learning_rate": 1.6097472639836173e-06, "logits/chosen": -2.8437743186950684, "logits/rejected": -2.271618127822876, "logps/chosen": -171.5491943359375, "logps/rejected": -871.8834228515625, "loss": 0.138, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.22877037525177, "rewards/margins": 7.092512607574463, "rewards/rejected": -8.321283340454102, "step": 54660 }, { "epoch": 0.65, "learning_rate": 1.6087711984832046e-06, "logits/chosen": -2.904771327972412, "logits/rejected": -2.572239875793457, "logps/chosen": -77.75157928466797, "logps/rejected": -836.1051635742188, "loss": 0.1265, "rewards/accuracies": 1.0, "rewards/chosen": -0.3240335285663605, "rewards/margins": 7.659267425537109, "rewards/rejected": -7.983300685882568, "step": 54670 }, { "epoch": 0.65, "learning_rate": 1.6077952885888653e-06, "logits/chosen": -2.856074810028076, "logits/rejected": -2.236492872238159, "logps/chosen": -161.13487243652344, "logps/rejected": -1063.509033203125, "loss": 0.1162, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0961506366729736, "rewards/margins": 9.126641273498535, "rewards/rejected": -10.22279167175293, "step": 54680 }, { "epoch": 0.65, "learning_rate": 1.6068195344709913e-06, "logits/chosen": -2.8798460960388184, "logits/rejected": -2.3151164054870605, "logps/chosen": -102.47795104980469, "logps/rejected": -924.1998901367188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5717663168907166, "rewards/margins": 8.294607162475586, "rewards/rejected": -8.866374969482422, "step": 54690 }, { "epoch": 0.65, "learning_rate": 1.6058439362999457e-06, "logits/chosen": -2.8645427227020264, "logits/rejected": -2.325611114501953, "logps/chosen": -107.35282897949219, "logps/rejected": -931.1853637695312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6243734359741211, "rewards/margins": 8.290226936340332, "rewards/rejected": -8.914600372314453, "step": 54700 }, { "epoch": 0.65, "learning_rate": 1.604868494246065e-06, "logits/chosen": -2.867410182952881, "logits/rejected": -2.4517805576324463, "logps/chosen": -87.9049301147461, "logps/rejected": -858.3458251953125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.45878952741622925, "rewards/margins": 7.741224765777588, "rewards/rejected": -8.200014114379883, "step": 54710 }, { "epoch": 0.66, "learning_rate": 1.6038932084796588e-06, "logits/chosen": -2.8550333976745605, "logits/rejected": -2.420952796936035, "logps/chosen": -122.7790298461914, "logps/rejected": -919.3157348632812, "loss": 0.1035, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7855780124664307, "rewards/margins": 8.00969123840332, "rewards/rejected": -8.795269012451172, "step": 54720 }, { "epoch": 0.66, "learning_rate": 1.6029180791710106e-06, "logits/chosen": -2.893277645111084, "logits/rejected": -2.5133349895477295, "logps/chosen": -84.36956787109375, "logps/rejected": -835.2335205078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.44259676337242126, "rewards/margins": 7.5419487953186035, "rewards/rejected": -7.984545707702637, "step": 54730 }, { "epoch": 0.66, "learning_rate": 1.6019431064903734e-06, "logits/chosen": -2.8673901557922363, "logits/rejected": -2.1403651237487793, "logps/chosen": -136.02159118652344, "logps/rejected": -1074.2564697265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8340544700622559, "rewards/margins": 9.503718376159668, "rewards/rejected": -10.337773323059082, "step": 54740 }, { "epoch": 0.66, "learning_rate": 1.6009682906079756e-06, "logits/chosen": -2.8818936347961426, "logits/rejected": -2.3082780838012695, "logps/chosen": -104.4512710571289, "logps/rejected": -953.6605224609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5678514242172241, "rewards/margins": 8.566134452819824, "rewards/rejected": -9.133986473083496, "step": 54750 }, { "epoch": 0.66, "learning_rate": 1.599993631694018e-06, "logits/chosen": -2.861494541168213, "logits/rejected": -2.311067819595337, "logps/chosen": -136.4265899658203, "logps/rejected": -933.9929809570312, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.7935766577720642, "rewards/margins": 8.147850036621094, "rewards/rejected": -8.941426277160645, "step": 54760 }, { "epoch": 0.66, "learning_rate": 1.599019129918672e-06, "logits/chosen": -2.8600966930389404, "logits/rejected": -2.074195384979248, "logps/chosen": -128.96336364746094, "logps/rejected": -1031.4739990234375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7866573333740234, "rewards/margins": 9.133639335632324, "rewards/rejected": -9.920296669006348, "step": 54770 }, { "epoch": 0.66, "learning_rate": 1.5980447854520836e-06, "logits/chosen": -2.8646624088287354, "logits/rejected": -2.3515305519104004, "logps/chosen": -95.64344787597656, "logps/rejected": -880.2490234375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5113450288772583, "rewards/margins": 7.915543556213379, "rewards/rejected": -8.426888465881348, "step": 54780 }, { "epoch": 0.66, "learning_rate": 1.5970705984643712e-06, "logits/chosen": -2.866814374923706, "logits/rejected": -2.326568603515625, "logps/chosen": -122.93115234375, "logps/rejected": -1032.3189697265625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7226583361625671, "rewards/margins": 9.204835891723633, "rewards/rejected": -9.927492141723633, "step": 54790 }, { "epoch": 0.66, "learning_rate": 1.5960965691256239e-06, "logits/chosen": -2.8834493160247803, "logits/rejected": -2.2270493507385254, "logps/chosen": -105.90494537353516, "logps/rejected": -902.6390380859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5658665895462036, "rewards/margins": 8.083196640014648, "rewards/rejected": -8.649065017700195, "step": 54800 }, { "epoch": 0.66, "learning_rate": 1.5951226976059054e-06, "logits/chosen": -2.930518388748169, "logits/rejected": -2.481369733810425, "logps/chosen": -100.04327392578125, "logps/rejected": -887.6760864257812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5977450609207153, "rewards/margins": 7.902078151702881, "rewards/rejected": -8.499824523925781, "step": 54810 }, { "epoch": 0.66, "learning_rate": 1.59414898407525e-06, "logits/chosen": -2.870774507522583, "logits/rejected": -2.449446201324463, "logps/chosen": -115.5598373413086, "logps/rejected": -915.2728271484375, "loss": 0.1293, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.698272168636322, "rewards/margins": 8.064493179321289, "rewards/rejected": -8.762764930725098, "step": 54820 }, { "epoch": 0.66, "learning_rate": 1.5931754287036672e-06, "logits/chosen": -2.8766417503356934, "logits/rejected": -2.1523995399475098, "logps/chosen": -138.58822631835938, "logps/rejected": -864.5344848632812, "loss": 0.1605, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8751184344291687, "rewards/margins": 7.388913631439209, "rewards/rejected": -8.264032363891602, "step": 54830 }, { "epoch": 0.66, "learning_rate": 1.5922020316611353e-06, "logits/chosen": -2.8732142448425293, "logits/rejected": -2.446676731109619, "logps/chosen": -104.62017822265625, "logps/rejected": -916.3355712890625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6285894513130188, "rewards/margins": 8.140042304992676, "rewards/rejected": -8.768631935119629, "step": 54840 }, { "epoch": 0.66, "learning_rate": 1.5912287931176074e-06, "logits/chosen": -2.891772508621216, "logits/rejected": -2.509239673614502, "logps/chosen": -93.67634582519531, "logps/rejected": -926.3070068359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.47998690605163574, "rewards/margins": 8.387621879577637, "rewards/rejected": -8.867609024047852, "step": 54850 }, { "epoch": 0.66, "learning_rate": 1.5902557132430085e-06, "logits/chosen": -2.9105846881866455, "logits/rejected": -2.3192636966705322, "logps/chosen": -97.29549407958984, "logps/rejected": -920.462890625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4957498610019684, "rewards/margins": 8.321915626525879, "rewards/rejected": -8.817665100097656, "step": 54860 }, { "epoch": 0.66, "learning_rate": 1.589282792207235e-06, "logits/chosen": -2.8759238719940186, "logits/rejected": -2.4822399616241455, "logps/chosen": -99.6484603881836, "logps/rejected": -910.3499755859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5534154176712036, "rewards/margins": 8.164173126220703, "rewards/rejected": -8.717588424682617, "step": 54870 }, { "epoch": 0.66, "learning_rate": 1.5883100301801568e-06, "logits/chosen": -2.8922019004821777, "logits/rejected": -2.5641729831695557, "logps/chosen": -105.92710876464844, "logps/rejected": -816.8916625976562, "loss": 0.1585, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6701189279556274, "rewards/margins": 7.117074489593506, "rewards/rejected": -7.787193298339844, "step": 54880 }, { "epoch": 0.66, "learning_rate": 1.5873374273316155e-06, "logits/chosen": -2.9167985916137695, "logits/rejected": -2.238835334777832, "logps/chosen": -116.06644439697266, "logps/rejected": -989.8779296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6354015469551086, "rewards/margins": 8.865835189819336, "rewards/rejected": -9.501236915588379, "step": 54890 }, { "epoch": 0.66, "learning_rate": 1.5863649838314243e-06, "logits/chosen": -2.909325361251831, "logits/rejected": -2.3953757286071777, "logps/chosen": -106.16569519042969, "logps/rejected": -886.7868041992188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5901912450790405, "rewards/margins": 7.8820695877075195, "rewards/rejected": -8.472260475158691, "step": 54900 }, { "epoch": 0.66, "learning_rate": 1.585392699849369e-06, "logits/chosen": -2.9030752182006836, "logits/rejected": -2.4667410850524902, "logps/chosen": -76.35467529296875, "logps/rejected": -788.3733520507812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3405453562736511, "rewards/margins": 7.1626458168029785, "rewards/rejected": -7.503190517425537, "step": 54910 }, { "epoch": 0.66, "learning_rate": 1.5844205755552083e-06, "logits/chosen": -2.8792128562927246, "logits/rejected": -2.218658924102783, "logps/chosen": -124.69145202636719, "logps/rejected": -914.9765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7468487620353699, "rewards/margins": 7.982675075531006, "rewards/rejected": -8.729524612426758, "step": 54920 }, { "epoch": 0.66, "learning_rate": 1.583448611118673e-06, "logits/chosen": -2.8453822135925293, "logits/rejected": -2.3903372287750244, "logps/chosen": -104.10262298583984, "logps/rejected": -912.4200439453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5984565019607544, "rewards/margins": 8.142603874206543, "rewards/rejected": -8.741060256958008, "step": 54930 }, { "epoch": 0.66, "learning_rate": 1.5824768067094636e-06, "logits/chosen": -2.916764259338379, "logits/rejected": -2.3715195655822754, "logps/chosen": -87.1254653930664, "logps/rejected": -909.1144409179688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.42623138427734375, "rewards/margins": 8.28552532196045, "rewards/rejected": -8.711756706237793, "step": 54940 }, { "epoch": 0.66, "learning_rate": 1.5815051624972557e-06, "logits/chosen": -2.8935706615448, "logits/rejected": -2.3727259635925293, "logps/chosen": -96.05223846435547, "logps/rejected": -904.7210083007812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4939926564693451, "rewards/margins": 8.163007736206055, "rewards/rejected": -8.657001495361328, "step": 54950 }, { "epoch": 0.66, "learning_rate": 1.5805336786516959e-06, "logits/chosen": -2.847062587738037, "logits/rejected": -2.4539473056793213, "logps/chosen": -91.78407287597656, "logps/rejected": -824.52197265625, "loss": 0.1026, "rewards/accuracies": 1.0, "rewards/chosen": -0.5175474882125854, "rewards/margins": 7.360146999359131, "rewards/rejected": -7.877694129943848, "step": 54960 }, { "epoch": 0.66, "learning_rate": 1.5795623553424016e-06, "logits/chosen": -2.843568801879883, "logits/rejected": -2.480464458465576, "logps/chosen": -81.89508819580078, "logps/rejected": -835.9754028320312, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.3861122727394104, "rewards/margins": 7.589502811431885, "rewards/rejected": -7.97561502456665, "step": 54970 }, { "epoch": 0.66, "learning_rate": 1.5785911927389637e-06, "logits/chosen": -2.8509294986724854, "logits/rejected": -2.3636021614074707, "logps/chosen": -99.8314208984375, "logps/rejected": -870.4976806640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.45789170265197754, "rewards/margins": 7.8616838455200195, "rewards/rejected": -8.319576263427734, "step": 54980 }, { "epoch": 0.66, "learning_rate": 1.5776201910109445e-06, "logits/chosen": -2.891631603240967, "logits/rejected": -2.335425853729248, "logps/chosen": -101.66499328613281, "logps/rejected": -879.8995361328125, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.5626636147499084, "rewards/margins": 7.8549909591674805, "rewards/rejected": -8.417654037475586, "step": 54990 }, { "epoch": 0.66, "learning_rate": 1.5766493503278781e-06, "logits/chosen": -2.882627010345459, "logits/rejected": -2.5123791694641113, "logps/chosen": -97.79704284667969, "logps/rejected": -872.1510620117188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5051547884941101, "rewards/margins": 7.822049617767334, "rewards/rejected": -8.327204704284668, "step": 55000 }, { "epoch": 0.66, "learning_rate": 1.5756786708592706e-06, "logits/chosen": -2.8720736503601074, "logits/rejected": -2.4597349166870117, "logps/chosen": -114.42547607421875, "logps/rejected": -837.1669921875, "loss": 0.1034, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7304415702819824, "rewards/margins": 7.275799751281738, "rewards/rejected": -8.006239891052246, "step": 55010 }, { "epoch": 0.66, "learning_rate": 1.5747081527746e-06, "logits/chosen": -2.8767144680023193, "logits/rejected": -2.394202709197998, "logps/chosen": -93.90597534179688, "logps/rejected": -980.7427978515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4938233494758606, "rewards/margins": 8.919412612915039, "rewards/rejected": -9.413235664367676, "step": 55020 }, { "epoch": 0.66, "learning_rate": 1.5737377962433165e-06, "logits/chosen": -2.900451183319092, "logits/rejected": -2.2892873287200928, "logps/chosen": -100.42501068115234, "logps/rejected": -917.2359619140625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5266456604003906, "rewards/margins": 8.261137008666992, "rewards/rejected": -8.787782669067383, "step": 55030 }, { "epoch": 0.66, "learning_rate": 1.5727676014348406e-06, "logits/chosen": -2.8635094165802, "logits/rejected": -2.3078930377960205, "logps/chosen": -93.47749328613281, "logps/rejected": -893.3587036132812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4847958981990814, "rewards/margins": 8.066364288330078, "rewards/rejected": -8.55116081237793, "step": 55040 }, { "epoch": 0.66, "learning_rate": 1.571797568518566e-06, "logits/chosen": -2.87422513961792, "logits/rejected": -2.166898488998413, "logps/chosen": -136.79122924804688, "logps/rejected": -1020.0172119140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8746282458305359, "rewards/margins": 8.920755386352539, "rewards/rejected": -9.79538345336914, "step": 55050 }, { "epoch": 0.66, "learning_rate": 1.570827697663859e-06, "logits/chosen": -2.8483729362487793, "logits/rejected": -2.186042547225952, "logps/chosen": -105.57649230957031, "logps/rejected": -906.2559814453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5699104070663452, "rewards/margins": 8.095142364501953, "rewards/rejected": -8.66505241394043, "step": 55060 }, { "epoch": 0.66, "learning_rate": 1.5698579890400534e-06, "logits/chosen": -2.900935173034668, "logits/rejected": -2.5497887134552, "logps/chosen": -106.857666015625, "logps/rejected": -869.1549072265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.6388194561004639, "rewards/margins": 7.678004264831543, "rewards/rejected": -8.316823959350586, "step": 55070 }, { "epoch": 0.66, "learning_rate": 1.5688884428164613e-06, "logits/chosen": -2.877837896347046, "logits/rejected": -2.3808481693267822, "logps/chosen": -121.32359313964844, "logps/rejected": -941.7786865234375, "loss": 0.1023, "rewards/accuracies": 1.0, "rewards/chosen": -0.790494978427887, "rewards/margins": 8.246938705444336, "rewards/rejected": -9.037433624267578, "step": 55080 }, { "epoch": 0.66, "learning_rate": 1.5679190591623606e-06, "logits/chosen": -2.8674235343933105, "logits/rejected": -2.3413829803466797, "logps/chosen": -114.894287109375, "logps/rejected": -990.9681396484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6991810202598572, "rewards/margins": 8.834813117980957, "rewards/rejected": -9.533994674682617, "step": 55090 }, { "epoch": 0.66, "learning_rate": 1.566949838247003e-06, "logits/chosen": -2.8245625495910645, "logits/rejected": -2.2451395988464355, "logps/chosen": -105.61468505859375, "logps/rejected": -938.5120239257812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5975598096847534, "rewards/margins": 8.392982482910156, "rewards/rejected": -8.9905424118042, "step": 55100 }, { "epoch": 0.66, "learning_rate": 1.565980780239613e-06, "logits/chosen": -2.9005279541015625, "logits/rejected": -2.298610210418701, "logps/chosen": -126.00553894042969, "logps/rejected": -1005.7799072265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7711461782455444, "rewards/margins": 8.890074729919434, "rewards/rejected": -9.661221504211426, "step": 55110 }, { "epoch": 0.66, "learning_rate": 1.565011885309384e-06, "logits/chosen": -2.823452949523926, "logits/rejected": -2.079982280731201, "logps/chosen": -147.5081787109375, "logps/rejected": -988.1755981445312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8931090235710144, "rewards/margins": 8.589275360107422, "rewards/rejected": -9.48238468170166, "step": 55120 }, { "epoch": 0.66, "learning_rate": 1.564043153625483e-06, "logits/chosen": -2.8752083778381348, "logits/rejected": -2.4973983764648438, "logps/chosen": -95.2370376586914, "logps/rejected": -821.2103271484375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5390611886978149, "rewards/margins": 7.296185493469238, "rewards/rejected": -7.835247039794922, "step": 55130 }, { "epoch": 0.66, "learning_rate": 1.563074585357048e-06, "logits/chosen": -2.879747152328491, "logits/rejected": -2.359781265258789, "logps/chosen": -102.35865783691406, "logps/rejected": -891.7694091796875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5979652404785156, "rewards/margins": 7.930851936340332, "rewards/rejected": -8.528816223144531, "step": 55140 }, { "epoch": 0.66, "learning_rate": 1.5621061806731888e-06, "logits/chosen": -2.9132485389709473, "logits/rejected": -2.5441484451293945, "logps/chosen": -81.45036315917969, "logps/rejected": -892.4113159179688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.41756683588027954, "rewards/margins": 8.114788055419922, "rewards/rejected": -8.532355308532715, "step": 55150 }, { "epoch": 0.66, "learning_rate": 1.561137939742985e-06, "logits/chosen": -2.8898844718933105, "logits/rejected": -2.314284563064575, "logps/chosen": -136.11009216308594, "logps/rejected": -962.8826293945312, "loss": 0.1006, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8872086405754089, "rewards/margins": 8.357809066772461, "rewards/rejected": -9.245016098022461, "step": 55160 }, { "epoch": 0.66, "learning_rate": 1.5601698627354894e-06, "logits/chosen": -2.838792324066162, "logits/rejected": -2.389103412628174, "logps/chosen": -91.99322509765625, "logps/rejected": -922.2546997070312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5161737203598022, "rewards/margins": 8.307080268859863, "rewards/rejected": -8.82325267791748, "step": 55170 }, { "epoch": 0.66, "learning_rate": 1.5592019498197259e-06, "logits/chosen": -2.8613476753234863, "logits/rejected": -2.1166832447052, "logps/chosen": -123.60823059082031, "logps/rejected": -1011.8411254882812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7233647108078003, "rewards/margins": 8.991342544555664, "rewards/rejected": -9.714709281921387, "step": 55180 }, { "epoch": 0.66, "learning_rate": 1.5582342011646884e-06, "logits/chosen": -2.889324188232422, "logits/rejected": -2.5067877769470215, "logps/chosen": -84.52111053466797, "logps/rejected": -826.80322265625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.4396687150001526, "rewards/margins": 7.453108787536621, "rewards/rejected": -7.892777919769287, "step": 55190 }, { "epoch": 0.66, "learning_rate": 1.5572666169393436e-06, "logits/chosen": -2.9094176292419434, "logits/rejected": -2.501401424407959, "logps/chosen": -128.81185913085938, "logps/rejected": -868.7726440429688, "loss": 0.1018, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8017560839653015, "rewards/margins": 7.507790565490723, "rewards/rejected": -8.309547424316406, "step": 55200 }, { "epoch": 0.66, "learning_rate": 1.55629919731263e-06, "logits/chosen": -2.8531248569488525, "logits/rejected": -2.4155631065368652, "logps/chosen": -97.18463134765625, "logps/rejected": -826.1180419921875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5402776002883911, "rewards/margins": 7.330680847167969, "rewards/rejected": -7.8709588050842285, "step": 55210 }, { "epoch": 0.66, "learning_rate": 1.5553319424534548e-06, "logits/chosen": -2.870087146759033, "logits/rejected": -2.35180401802063, "logps/chosen": -92.86146545410156, "logps/rejected": -853.7069091796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5178195238113403, "rewards/margins": 7.635964870452881, "rewards/rejected": -8.15378475189209, "step": 55220 }, { "epoch": 0.66, "learning_rate": 1.554364852530699e-06, "logits/chosen": -2.880967378616333, "logits/rejected": -2.231205463409424, "logps/chosen": -98.70198059082031, "logps/rejected": -955.7825927734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5389009714126587, "rewards/margins": 8.623664855957031, "rewards/rejected": -9.162566184997559, "step": 55230 }, { "epoch": 0.66, "learning_rate": 1.5533979277132132e-06, "logits/chosen": -2.8910624980926514, "logits/rejected": -2.242861747741699, "logps/chosen": -108.69527435302734, "logps/rejected": -964.98779296875, "loss": 0.1029, "rewards/accuracies": 1.0, "rewards/chosen": -0.560460090637207, "rewards/margins": 8.692842483520508, "rewards/rejected": -9.253303527832031, "step": 55240 }, { "epoch": 0.66, "learning_rate": 1.5524311681698211e-06, "logits/chosen": -2.904283285140991, "logits/rejected": -2.173919200897217, "logps/chosen": -139.4501190185547, "logps/rejected": -1015.0074462890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8534703254699707, "rewards/margins": 8.890495300292969, "rewards/rejected": -9.743965148925781, "step": 55250 }, { "epoch": 0.66, "learning_rate": 1.5514645740693145e-06, "logits/chosen": -2.9066338539123535, "logits/rejected": -2.265749454498291, "logps/chosen": -129.0753936767578, "logps/rejected": -1048.319091796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7939222455024719, "rewards/margins": 9.265138626098633, "rewards/rejected": -10.059061050415039, "step": 55260 }, { "epoch": 0.66, "learning_rate": 1.5504981455804587e-06, "logits/chosen": -2.920224189758301, "logits/rejected": -2.485010862350464, "logps/chosen": -118.99507904052734, "logps/rejected": -849.8699340820312, "loss": 0.1435, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7655487060546875, "rewards/margins": 7.3596673011779785, "rewards/rejected": -8.125216484069824, "step": 55270 }, { "epoch": 0.66, "learning_rate": 1.5495318828719901e-06, "logits/chosen": -2.8354504108428955, "logits/rejected": -2.0841596126556396, "logps/chosen": -144.6005401611328, "logps/rejected": -1030.1746826171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9124693870544434, "rewards/margins": 8.983860969543457, "rewards/rejected": -9.896331787109375, "step": 55280 }, { "epoch": 0.66, "learning_rate": 1.5485657861126146e-06, "logits/chosen": -2.8690242767333984, "logits/rejected": -2.2340636253356934, "logps/chosen": -118.26301574707031, "logps/rejected": -907.2516479492188, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.7257205247879028, "rewards/margins": 7.945714473724365, "rewards/rejected": -8.671435356140137, "step": 55290 }, { "epoch": 0.66, "learning_rate": 1.5475998554710103e-06, "logits/chosen": -2.8653616905212402, "logits/rejected": -2.1797595024108887, "logps/chosen": -153.80723571777344, "logps/rejected": -1041.042724609375, "loss": 0.1329, "rewards/accuracies": 1.0, "rewards/chosen": -1.0396219491958618, "rewards/margins": 8.943571090698242, "rewards/rejected": -9.983193397521973, "step": 55300 }, { "epoch": 0.66, "learning_rate": 1.5466340911158267e-06, "logits/chosen": -2.867415189743042, "logits/rejected": -2.3749783039093018, "logps/chosen": -126.0755844116211, "logps/rejected": -913.8126220703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8421759605407715, "rewards/margins": 7.907747745513916, "rewards/rejected": -8.749923706054688, "step": 55310 }, { "epoch": 0.66, "learning_rate": 1.5456684932156824e-06, "logits/chosen": -2.866964101791382, "logits/rejected": -2.1679301261901855, "logps/chosen": -137.48748779296875, "logps/rejected": -1017.7720947265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8627344369888306, "rewards/margins": 8.905402183532715, "rewards/rejected": -9.768136978149414, "step": 55320 }, { "epoch": 0.66, "learning_rate": 1.5447030619391683e-06, "logits/chosen": -2.862705945968628, "logits/rejected": -2.3499367237091064, "logps/chosen": -114.13548278808594, "logps/rejected": -958.794921875, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -0.6874266862869263, "rewards/margins": 8.520013809204102, "rewards/rejected": -9.207441329956055, "step": 55330 }, { "epoch": 0.66, "learning_rate": 1.5437377974548467e-06, "logits/chosen": -2.858123779296875, "logits/rejected": -2.299161672592163, "logps/chosen": -126.28950500488281, "logps/rejected": -978.3298950195312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7791510820388794, "rewards/margins": 8.617142677307129, "rewards/rejected": -9.396293640136719, "step": 55340 }, { "epoch": 0.66, "learning_rate": 1.5427726999312503e-06, "logits/chosen": -2.8881185054779053, "logits/rejected": -2.529493808746338, "logps/chosen": -102.76979064941406, "logps/rejected": -795.7566528320312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6150408387184143, "rewards/margins": 6.963278293609619, "rewards/rejected": -7.578318119049072, "step": 55350 }, { "epoch": 0.66, "learning_rate": 1.541807769536881e-06, "logits/chosen": -2.8954083919525146, "logits/rejected": -2.427840232849121, "logps/chosen": -129.50338745117188, "logps/rejected": -877.6585083007812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8323137164115906, "rewards/margins": 7.572154998779297, "rewards/rejected": -8.404468536376953, "step": 55360 }, { "epoch": 0.66, "learning_rate": 1.5408430064402141e-06, "logits/chosen": -2.9138293266296387, "logits/rejected": -2.259174346923828, "logps/chosen": -141.7537384033203, "logps/rejected": -990.0759887695312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8801128268241882, "rewards/margins": 8.620315551757812, "rewards/rejected": -9.500428199768066, "step": 55370 }, { "epoch": 0.66, "learning_rate": 1.539878410809695e-06, "logits/chosen": -2.8451309204101562, "logits/rejected": -2.2142231464385986, "logps/chosen": -120.70155334472656, "logps/rejected": -985.51806640625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7201468348503113, "rewards/margins": 8.739346504211426, "rewards/rejected": -9.459493637084961, "step": 55380 }, { "epoch": 0.66, "learning_rate": 1.5389139828137383e-06, "logits/chosen": -2.8800365924835205, "logits/rejected": -2.111612558364868, "logps/chosen": -190.29660034179688, "logps/rejected": -1080.1240234375, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.33744215965271, "rewards/margins": 9.047562599182129, "rewards/rejected": -10.385004997253418, "step": 55390 }, { "epoch": 0.66, "learning_rate": 1.5379497226207308e-06, "logits/chosen": -2.8723702430725098, "logits/rejected": -2.416750192642212, "logps/chosen": -89.40214538574219, "logps/rejected": -874.2362060546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4068982005119324, "rewards/margins": 7.967770576477051, "rewards/rejected": -8.37466812133789, "step": 55400 }, { "epoch": 0.66, "learning_rate": 1.5369856303990298e-06, "logits/chosen": -2.844273805618286, "logits/rejected": -2.3467330932617188, "logps/chosen": -138.9937744140625, "logps/rejected": -919.4274291992188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.930753231048584, "rewards/margins": 7.8737592697143555, "rewards/rejected": -8.804513931274414, "step": 55410 }, { "epoch": 0.66, "learning_rate": 1.536021706316963e-06, "logits/chosen": -2.901149272918701, "logits/rejected": -2.2716622352600098, "logps/chosen": -150.75009155273438, "logps/rejected": -1113.690185546875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9324858784675598, "rewards/margins": 9.787163734436035, "rewards/rejected": -10.719650268554688, "step": 55420 }, { "epoch": 0.66, "learning_rate": 1.535057950542829e-06, "logits/chosen": -2.8942461013793945, "logits/rejected": -2.2205257415771484, "logps/chosen": -144.44204711914062, "logps/rejected": -1008.8934326171875, "loss": 0.1394, "rewards/accuracies": 1.0, "rewards/chosen": -0.9152765274047852, "rewards/margins": 8.778410911560059, "rewards/rejected": -9.69368839263916, "step": 55430 }, { "epoch": 0.66, "learning_rate": 1.5340943632448968e-06, "logits/chosen": -2.8494677543640137, "logits/rejected": -2.2005889415740967, "logps/chosen": -121.2345199584961, "logps/rejected": -968.1658325195312, "loss": 0.0869, "rewards/accuracies": 1.0, "rewards/chosen": -0.7191057205200195, "rewards/margins": 8.571722984313965, "rewards/rejected": -9.290828704833984, "step": 55440 }, { "epoch": 0.66, "learning_rate": 1.5331309445914065e-06, "logits/chosen": -2.9043402671813965, "logits/rejected": -2.262805461883545, "logps/chosen": -122.8232421875, "logps/rejected": -935.0089721679688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7668604254722595, "rewards/margins": 8.203191757202148, "rewards/rejected": -8.970051765441895, "step": 55450 }, { "epoch": 0.66, "learning_rate": 1.5321676947505674e-06, "logits/chosen": -2.8493151664733887, "logits/rejected": -2.362947940826416, "logps/chosen": -116.98692321777344, "logps/rejected": -849.2025146484375, "loss": 0.1235, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7107927799224854, "rewards/margins": 7.418303489685059, "rewards/rejected": -8.129096031188965, "step": 55460 }, { "epoch": 0.66, "learning_rate": 1.5312046138905607e-06, "logits/chosen": -2.851449489593506, "logits/rejected": -2.2589006423950195, "logps/chosen": -115.57249450683594, "logps/rejected": -1004.9407348632812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6710105538368225, "rewards/margins": 8.979238510131836, "rewards/rejected": -9.650250434875488, "step": 55470 }, { "epoch": 0.66, "learning_rate": 1.5302417021795382e-06, "logits/chosen": -2.884828567504883, "logits/rejected": -2.5811784267425537, "logps/chosen": -100.20335388183594, "logps/rejected": -908.5565185546875, "loss": 0.1758, "rewards/accuracies": 1.0, "rewards/chosen": -0.6053150296211243, "rewards/margins": 8.104742050170898, "rewards/rejected": -8.71005630493164, "step": 55480 }, { "epoch": 0.66, "learning_rate": 1.5292789597856194e-06, "logits/chosen": -2.877373456954956, "logits/rejected": -2.2594616413116455, "logps/chosen": -114.6588363647461, "logps/rejected": -1061.5328369140625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.673147976398468, "rewards/margins": 9.546990394592285, "rewards/rejected": -10.220137596130371, "step": 55490 }, { "epoch": 0.66, "learning_rate": 1.5283163868769002e-06, "logits/chosen": -2.854444980621338, "logits/rejected": -2.3839516639709473, "logps/chosen": -118.79017639160156, "logps/rejected": -923.1456909179688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7575901746749878, "rewards/margins": 8.087102890014648, "rewards/rejected": -8.844694137573242, "step": 55500 }, { "epoch": 0.66, "learning_rate": 1.5273539836214401e-06, "logits/chosen": -2.8536858558654785, "logits/rejected": -2.099522590637207, "logps/chosen": -159.78163146972656, "logps/rejected": -1079.209716796875, "loss": 0.1251, "rewards/accuracies": 1.0, "rewards/chosen": -1.047334909439087, "rewards/margins": 9.333044052124023, "rewards/rejected": -10.380380630493164, "step": 55510 }, { "epoch": 0.66, "learning_rate": 1.5263917501872735e-06, "logits/chosen": -2.8434276580810547, "logits/rejected": -2.3984837532043457, "logps/chosen": -75.35188293457031, "logps/rejected": -836.9564208984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.35177403688430786, "rewards/margins": 7.635479927062988, "rewards/rejected": -7.9872541427612305, "step": 55520 }, { "epoch": 0.66, "learning_rate": 1.5254296867424034e-06, "logits/chosen": -2.8655056953430176, "logits/rejected": -2.1948132514953613, "logps/chosen": -123.78239440917969, "logps/rejected": -1000.8179931640625, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.7899985313415527, "rewards/margins": 8.816696166992188, "rewards/rejected": -9.606693267822266, "step": 55530 }, { "epoch": 0.66, "learning_rate": 1.524467793454803e-06, "logits/chosen": -2.8724374771118164, "logits/rejected": -2.2423415184020996, "logps/chosen": -125.15992736816406, "logps/rejected": -1019.3194580078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7554885745048523, "rewards/margins": 9.043767929077148, "rewards/rejected": -9.799256324768066, "step": 55540 }, { "epoch": 0.66, "learning_rate": 1.5235060704924166e-06, "logits/chosen": -2.853597640991211, "logits/rejected": -2.0659985542297363, "logps/chosen": -163.52508544921875, "logps/rejected": -1070.4046630859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1208775043487549, "rewards/margins": 9.185676574707031, "rewards/rejected": -10.306554794311523, "step": 55550 }, { "epoch": 0.67, "learning_rate": 1.5225445180231576e-06, "logits/chosen": -2.897386074066162, "logits/rejected": -2.3493332862854004, "logps/chosen": -113.64311218261719, "logps/rejected": -952.56201171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6980899572372437, "rewards/margins": 8.425419807434082, "rewards/rejected": -9.123510360717773, "step": 55560 }, { "epoch": 0.67, "learning_rate": 1.521583136214912e-06, "logits/chosen": -2.807800531387329, "logits/rejected": -2.1734073162078857, "logps/chosen": -127.4447021484375, "logps/rejected": -1055.264404296875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7866408228874207, "rewards/margins": 9.359704971313477, "rewards/rejected": -10.146345138549805, "step": 55570 }, { "epoch": 0.67, "learning_rate": 1.5206219252355325e-06, "logits/chosen": -2.839268207550049, "logits/rejected": -2.063286542892456, "logps/chosen": -147.06057739257812, "logps/rejected": -1074.363525390625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9939852952957153, "rewards/margins": 9.361658096313477, "rewards/rejected": -10.355642318725586, "step": 55580 }, { "epoch": 0.67, "learning_rate": 1.519660885252845e-06, "logits/chosen": -2.87188982963562, "logits/rejected": -2.336513042449951, "logps/chosen": -113.52088928222656, "logps/rejected": -926.3209838867188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6498968601226807, "rewards/margins": 8.221887588500977, "rewards/rejected": -8.871784210205078, "step": 55590 }, { "epoch": 0.67, "learning_rate": 1.5187000164346444e-06, "logits/chosen": -2.8740599155426025, "logits/rejected": -2.4015257358551025, "logps/chosen": -111.43607330322266, "logps/rejected": -812.7545166015625, "loss": 0.1325, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6593354344367981, "rewards/margins": 7.097299098968506, "rewards/rejected": -7.756634712219238, "step": 55600 }, { "epoch": 0.67, "learning_rate": 1.5177393189486953e-06, "logits/chosen": -2.841351270675659, "logits/rejected": -2.34570574760437, "logps/chosen": -105.89958190917969, "logps/rejected": -951.1251220703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6220318078994751, "rewards/margins": 8.498973846435547, "rewards/rejected": -9.12100601196289, "step": 55610 }, { "epoch": 0.67, "learning_rate": 1.5167787929627327e-06, "logits/chosen": -2.8747878074645996, "logits/rejected": -2.2889175415039062, "logps/chosen": -122.6474838256836, "logps/rejected": -1019.6892700195312, "loss": 0.1634, "rewards/accuracies": 1.0, "rewards/chosen": -0.7371793389320374, "rewards/margins": 9.052294731140137, "rewards/rejected": -9.789473533630371, "step": 55620 }, { "epoch": 0.67, "learning_rate": 1.515818438644463e-06, "logits/chosen": -2.889784097671509, "logits/rejected": -2.139814853668213, "logps/chosen": -128.24588012695312, "logps/rejected": -978.4591064453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7545997500419617, "rewards/margins": 8.625137329101562, "rewards/rejected": -9.379737854003906, "step": 55630 }, { "epoch": 0.67, "learning_rate": 1.5148582561615595e-06, "logits/chosen": -2.8820910453796387, "logits/rejected": -2.6070988178253174, "logps/chosen": -72.69331359863281, "logps/rejected": -798.6187744140625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3302444815635681, "rewards/margins": 7.292241096496582, "rewards/rejected": -7.622485160827637, "step": 55640 }, { "epoch": 0.67, "learning_rate": 1.5138982456816687e-06, "logits/chosen": -2.8613953590393066, "logits/rejected": -2.5210492610931396, "logps/chosen": -98.58145904541016, "logps/rejected": -808.3642578125, "loss": 0.0742, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5770378112792969, "rewards/margins": 7.11724328994751, "rewards/rejected": -7.69428014755249, "step": 55650 }, { "epoch": 0.67, "learning_rate": 1.512938407372405e-06, "logits/chosen": -2.926248550415039, "logits/rejected": -2.437713146209717, "logps/chosen": -102.00020599365234, "logps/rejected": -938.5982666015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5480425357818604, "rewards/margins": 8.458757400512695, "rewards/rejected": -9.006799697875977, "step": 55660 }, { "epoch": 0.67, "learning_rate": 1.5119787414013549e-06, "logits/chosen": -2.8919506072998047, "logits/rejected": -2.446129560470581, "logps/chosen": -107.2016372680664, "logps/rejected": -897.3548583984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5683504343032837, "rewards/margins": 8.014336585998535, "rewards/rejected": -8.582687377929688, "step": 55670 }, { "epoch": 0.67, "learning_rate": 1.5110192479360718e-06, "logits/chosen": -2.8233206272125244, "logits/rejected": -2.3296289443969727, "logps/chosen": -101.17655944824219, "logps/rejected": -944.615234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5515619516372681, "rewards/margins": 8.498711585998535, "rewards/rejected": -9.050274848937988, "step": 55680 }, { "epoch": 0.67, "learning_rate": 1.5100599271440813e-06, "logits/chosen": -2.923037052154541, "logits/rejected": -2.4883265495300293, "logps/chosen": -147.22789001464844, "logps/rejected": -868.3103637695312, "loss": 0.147, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9878886938095093, "rewards/margins": 7.320975303649902, "rewards/rejected": -8.30886459350586, "step": 55690 }, { "epoch": 0.67, "learning_rate": 1.5091007791928786e-06, "logits/chosen": -2.863208293914795, "logits/rejected": -2.4339759349823, "logps/chosen": -96.16436767578125, "logps/rejected": -865.6517333984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5327448844909668, "rewards/margins": 7.743895530700684, "rewards/rejected": -8.276639938354492, "step": 55700 }, { "epoch": 0.67, "learning_rate": 1.5081418042499268e-06, "logits/chosen": -2.876610040664673, "logits/rejected": -2.328575611114502, "logps/chosen": -112.27767181396484, "logps/rejected": -893.7150268554688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5927980542182922, "rewards/margins": 7.943186283111572, "rewards/rejected": -8.535982131958008, "step": 55710 }, { "epoch": 0.67, "learning_rate": 1.5071830024826617e-06, "logits/chosen": -2.9046616554260254, "logits/rejected": -2.4702305793762207, "logps/chosen": -108.59657287597656, "logps/rejected": -877.4095458984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5804029703140259, "rewards/margins": 7.797629356384277, "rewards/rejected": -8.378032684326172, "step": 55720 }, { "epoch": 0.67, "learning_rate": 1.506224374058487e-06, "logits/chosen": -2.9001269340515137, "logits/rejected": -2.340944766998291, "logps/chosen": -137.75747680664062, "logps/rejected": -871.5111083984375, "loss": 0.1443, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9665497541427612, "rewards/margins": 7.365081787109375, "rewards/rejected": -8.331632614135742, "step": 55730 }, { "epoch": 0.67, "learning_rate": 1.5052659191447765e-06, "logits/chosen": -2.848339080810547, "logits/rejected": -2.333885908126831, "logps/chosen": -113.05473327636719, "logps/rejected": -901.2730712890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6953439116477966, "rewards/margins": 7.938148498535156, "rewards/rejected": -8.633493423461914, "step": 55740 }, { "epoch": 0.67, "learning_rate": 1.5043076379088733e-06, "logits/chosen": -2.8419575691223145, "logits/rejected": -2.1971306800842285, "logps/chosen": -117.97129821777344, "logps/rejected": -979.9417724609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6372777819633484, "rewards/margins": 8.769632339477539, "rewards/rejected": -9.40691089630127, "step": 55750 }, { "epoch": 0.67, "learning_rate": 1.5033495305180915e-06, "logits/chosen": -2.8852038383483887, "logits/rejected": -2.4225094318389893, "logps/chosen": -102.83293151855469, "logps/rejected": -950.44091796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5863222479820251, "rewards/margins": 8.533773422241211, "rewards/rejected": -9.120096206665039, "step": 55760 }, { "epoch": 0.67, "learning_rate": 1.502391597139714e-06, "logits/chosen": -2.90258526802063, "logits/rejected": -2.296511173248291, "logps/chosen": -123.00187683105469, "logps/rejected": -975.8064575195312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6936649084091187, "rewards/margins": 8.664018630981445, "rewards/rejected": -9.357685089111328, "step": 55770 }, { "epoch": 0.67, "learning_rate": 1.5014338379409922e-06, "logits/chosen": -2.8831992149353027, "logits/rejected": -2.243330240249634, "logps/chosen": -106.27864837646484, "logps/rejected": -1000.2474365234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5806798338890076, "rewards/margins": 9.03779411315918, "rewards/rejected": -9.618474960327148, "step": 55780 }, { "epoch": 0.67, "learning_rate": 1.500476253089149e-06, "logits/chosen": -2.9088473320007324, "logits/rejected": -2.6252214908599854, "logps/chosen": -83.79051208496094, "logps/rejected": -780.3099365234375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4696626663208008, "rewards/margins": 6.974618434906006, "rewards/rejected": -7.444281578063965, "step": 55790 }, { "epoch": 0.67, "learning_rate": 1.4995188427513766e-06, "logits/chosen": -2.8456947803497314, "logits/rejected": -2.232295513153076, "logps/chosen": -120.34532165527344, "logps/rejected": -977.9483642578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.754658579826355, "rewards/margins": 8.617280006408691, "rewards/rejected": -9.371938705444336, "step": 55800 }, { "epoch": 0.67, "learning_rate": 1.4985616070948348e-06, "logits/chosen": -2.8928563594818115, "logits/rejected": -2.3698060512542725, "logps/chosen": -95.02894592285156, "logps/rejected": -873.63671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5344682931900024, "rewards/margins": 7.82235860824585, "rewards/rejected": -8.356825828552246, "step": 55810 }, { "epoch": 0.67, "learning_rate": 1.497604546286655e-06, "logits/chosen": -2.893791437149048, "logits/rejected": -1.9978317022323608, "logps/chosen": -159.20962524414062, "logps/rejected": -1120.9920654296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8710336685180664, "rewards/margins": 9.908254623413086, "rewards/rejected": -10.779289245605469, "step": 55820 }, { "epoch": 0.67, "learning_rate": 1.496647660493938e-06, "logits/chosen": -2.885462999343872, "logits/rejected": -2.088317394256592, "logps/chosen": -161.96505737304688, "logps/rejected": -1011.4632568359375, "loss": 0.0433, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0368726253509521, "rewards/margins": 8.678665161132812, "rewards/rejected": -9.715538024902344, "step": 55830 }, { "epoch": 0.67, "learning_rate": 1.4956909498837525e-06, "logits/chosen": -2.8931779861450195, "logits/rejected": -1.9763834476470947, "logps/chosen": -131.5479278564453, "logps/rejected": -1156.690185546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.801863968372345, "rewards/margins": 10.355459213256836, "rewards/rejected": -11.157322883605957, "step": 55840 }, { "epoch": 0.67, "learning_rate": 1.4947344146231373e-06, "logits/chosen": -2.8974666595458984, "logits/rejected": -2.2504563331604004, "logps/chosen": -120.85430908203125, "logps/rejected": -1019.4385986328125, "loss": 0.1587, "rewards/accuracies": 1.0, "rewards/chosen": -0.713421642780304, "rewards/margins": 9.077214241027832, "rewards/rejected": -9.79063606262207, "step": 55850 }, { "epoch": 0.67, "learning_rate": 1.493778054879102e-06, "logits/chosen": -2.8530008792877197, "logits/rejected": -2.132026195526123, "logps/chosen": -117.77192687988281, "logps/rejected": -1063.0867919921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.6713243722915649, "rewards/margins": 9.565284729003906, "rewards/rejected": -10.236607551574707, "step": 55860 }, { "epoch": 0.67, "learning_rate": 1.4928218708186237e-06, "logits/chosen": -2.8847155570983887, "logits/rejected": -2.3795039653778076, "logps/chosen": -104.60359191894531, "logps/rejected": -927.5374145507812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5686891078948975, "rewards/margins": 8.310051918029785, "rewards/rejected": -8.878740310668945, "step": 55870 }, { "epoch": 0.67, "learning_rate": 1.491865862608649e-06, "logits/chosen": -2.8784594535827637, "logits/rejected": -2.306535243988037, "logps/chosen": -105.49063873291016, "logps/rejected": -970.2239379882812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6152863502502441, "rewards/margins": 8.71375560760498, "rewards/rejected": -9.329041481018066, "step": 55880 }, { "epoch": 0.67, "learning_rate": 1.4909100304160945e-06, "logits/chosen": -2.895977735519409, "logits/rejected": -2.387917995452881, "logps/chosen": -88.19821166992188, "logps/rejected": -928.4528198242188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.44686904549598694, "rewards/margins": 8.457133293151855, "rewards/rejected": -8.90400218963623, "step": 55890 }, { "epoch": 0.67, "learning_rate": 1.4899543744078468e-06, "logits/chosen": -2.834709405899048, "logits/rejected": -2.185354709625244, "logps/chosen": -122.69744873046875, "logps/rejected": -1071.7333984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6916666030883789, "rewards/margins": 9.612017631530762, "rewards/rejected": -10.303683280944824, "step": 55900 }, { "epoch": 0.67, "learning_rate": 1.4889988947507586e-06, "logits/chosen": -2.9096457958221436, "logits/rejected": -2.3312530517578125, "logps/chosen": -117.72001647949219, "logps/rejected": -938.2265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6195266842842102, "rewards/margins": 8.367762565612793, "rewards/rejected": -8.987289428710938, "step": 55910 }, { "epoch": 0.67, "learning_rate": 1.488043591611657e-06, "logits/chosen": -2.9487459659576416, "logits/rejected": -2.3886308670043945, "logps/chosen": -114.32054138183594, "logps/rejected": -882.1315307617188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6601313948631287, "rewards/margins": 7.77612829208374, "rewards/rejected": -8.436260223388672, "step": 55920 }, { "epoch": 0.67, "learning_rate": 1.487088465157333e-06, "logits/chosen": -2.887378454208374, "logits/rejected": -2.2408530712127686, "logps/chosen": -129.54086303710938, "logps/rejected": -1032.740478515625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.797182559967041, "rewards/margins": 9.118074417114258, "rewards/rejected": -9.915257453918457, "step": 55930 }, { "epoch": 0.67, "learning_rate": 1.4861335155545497e-06, "logits/chosen": -2.891691207885742, "logits/rejected": -2.365713596343994, "logps/chosen": -99.67202758789062, "logps/rejected": -849.5945434570312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5596489906311035, "rewards/margins": 7.5559515953063965, "rewards/rejected": -8.115601539611816, "step": 55940 }, { "epoch": 0.67, "learning_rate": 1.4851787429700392e-06, "logits/chosen": -2.899141788482666, "logits/rejected": -2.430227518081665, "logps/chosen": -84.02882385253906, "logps/rejected": -879.9737548828125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.39100903272628784, "rewards/margins": 8.029413223266602, "rewards/rejected": -8.420422554016113, "step": 55950 }, { "epoch": 0.67, "learning_rate": 1.4842241475705014e-06, "logits/chosen": -2.8754279613494873, "logits/rejected": -2.3727893829345703, "logps/chosen": -118.57951354980469, "logps/rejected": -978.02490234375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.7494832277297974, "rewards/margins": 8.629844665527344, "rewards/rejected": -9.379327774047852, "step": 55960 }, { "epoch": 0.67, "learning_rate": 1.4832697295226062e-06, "logits/chosen": -2.8689589500427246, "logits/rejected": -2.3869693279266357, "logps/chosen": -93.84019470214844, "logps/rejected": -898.0980224609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4954666495323181, "rewards/margins": 8.10894775390625, "rewards/rejected": -8.604413986206055, "step": 55970 }, { "epoch": 0.67, "learning_rate": 1.482315488992992e-06, "logits/chosen": -2.814727306365967, "logits/rejected": -2.312831401824951, "logps/chosen": -109.4759292602539, "logps/rejected": -961.8932495117188, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -0.6322563290596008, "rewards/margins": 8.598100662231445, "rewards/rejected": -9.23035717010498, "step": 55980 }, { "epoch": 0.67, "learning_rate": 1.4813614261482678e-06, "logits/chosen": -2.8600029945373535, "logits/rejected": -2.2289633750915527, "logps/chosen": -110.7947006225586, "logps/rejected": -965.8349609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6467239856719971, "rewards/margins": 8.620565414428711, "rewards/rejected": -9.267288208007812, "step": 55990 }, { "epoch": 0.67, "learning_rate": 1.480407541155009e-06, "logits/chosen": -2.912482738494873, "logits/rejected": -2.5700297355651855, "logps/chosen": -84.74601745605469, "logps/rejected": -794.0338745117188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.45073986053466797, "rewards/margins": 7.1255784034729, "rewards/rejected": -7.576319217681885, "step": 56000 }, { "epoch": 0.67, "learning_rate": 1.4794538341797616e-06, "logits/chosen": -2.93202543258667, "logits/rejected": -2.2700448036193848, "logps/chosen": -125.66744232177734, "logps/rejected": -1020.7146606445312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7670220732688904, "rewards/margins": 9.049386978149414, "rewards/rejected": -9.816408157348633, "step": 56010 }, { "epoch": 0.67, "learning_rate": 1.4785003053890406e-06, "logits/chosen": -2.8750808238983154, "logits/rejected": -2.2194437980651855, "logps/chosen": -132.4734344482422, "logps/rejected": -929.7845458984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8282892107963562, "rewards/margins": 8.071883201599121, "rewards/rejected": -8.90017318725586, "step": 56020 }, { "epoch": 0.67, "learning_rate": 1.4775469549493288e-06, "logits/chosen": -2.838759183883667, "logits/rejected": -2.36698579788208, "logps/chosen": -115.48162841796875, "logps/rejected": -838.947265625, "loss": 0.0686, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6611248254776001, "rewards/margins": 7.3564887046813965, "rewards/rejected": -8.017614364624023, "step": 56030 }, { "epoch": 0.67, "learning_rate": 1.4765937830270788e-06, "logits/chosen": -2.8685481548309326, "logits/rejected": -2.355020046234131, "logps/chosen": -89.49952697753906, "logps/rejected": -859.6619262695312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.46666884422302246, "rewards/margins": 7.7524309158325195, "rewards/rejected": -8.219099044799805, "step": 56040 }, { "epoch": 0.67, "learning_rate": 1.4756407897887126e-06, "logits/chosen": -2.9005980491638184, "logits/rejected": -2.2208237648010254, "logps/chosen": -128.1241912841797, "logps/rejected": -1021.2271728515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7659132480621338, "rewards/margins": 9.044740676879883, "rewards/rejected": -9.810654640197754, "step": 56050 }, { "epoch": 0.67, "learning_rate": 1.4746879754006188e-06, "logits/chosen": -2.855440616607666, "logits/rejected": -2.1951794624328613, "logps/chosen": -121.86311340332031, "logps/rejected": -1045.77099609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7178618311882019, "rewards/margins": 9.319910049438477, "rewards/rejected": -10.037771224975586, "step": 56060 }, { "epoch": 0.67, "learning_rate": 1.473735340029157e-06, "logits/chosen": -2.908653974533081, "logits/rejected": -2.452033519744873, "logps/chosen": -76.43397521972656, "logps/rejected": -844.7560424804688, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.3455030620098114, "rewards/margins": 7.7264251708984375, "rewards/rejected": -8.071928024291992, "step": 56070 }, { "epoch": 0.67, "learning_rate": 1.4727828838406543e-06, "logits/chosen": -2.8890678882598877, "logits/rejected": -2.262606143951416, "logps/chosen": -120.11446380615234, "logps/rejected": -1029.80517578125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7116217613220215, "rewards/margins": 9.189895629882812, "rewards/rejected": -9.901517868041992, "step": 56080 }, { "epoch": 0.67, "learning_rate": 1.471830607001408e-06, "logits/chosen": -2.8936989307403564, "logits/rejected": -2.1902096271514893, "logps/chosen": -116.29075622558594, "logps/rejected": -1044.4954833984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6724963188171387, "rewards/margins": 9.372589111328125, "rewards/rejected": -10.045084953308105, "step": 56090 }, { "epoch": 0.67, "learning_rate": 1.4708785096776812e-06, "logits/chosen": -2.9570364952087402, "logits/rejected": -2.4846811294555664, "logps/chosen": -78.03532409667969, "logps/rejected": -901.7589111328125, "loss": 0.1631, "rewards/accuracies": 1.0, "rewards/chosen": -0.38712427020072937, "rewards/margins": 8.256512641906738, "rewards/rejected": -8.643636703491211, "step": 56100 }, { "epoch": 0.67, "learning_rate": 1.4699265920357086e-06, "logits/chosen": -2.8806312084198, "logits/rejected": -2.170330047607422, "logps/chosen": -125.31689453125, "logps/rejected": -1150.287353515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6958380937576294, "rewards/margins": 10.392377853393555, "rewards/rejected": -11.088216781616211, "step": 56110 }, { "epoch": 0.67, "learning_rate": 1.4689748542416927e-06, "logits/chosen": -2.8816747665405273, "logits/rejected": -2.2163233757019043, "logps/chosen": -103.99986267089844, "logps/rejected": -893.2150268554688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5313806533813477, "rewards/margins": 8.015130996704102, "rewards/rejected": -8.546510696411133, "step": 56120 }, { "epoch": 0.67, "learning_rate": 1.468023296461803e-06, "logits/chosen": -2.926234722137451, "logits/rejected": -2.3228538036346436, "logps/chosen": -107.50477600097656, "logps/rejected": -918.806640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6142910718917847, "rewards/margins": 8.189346313476562, "rewards/rejected": -8.80363655090332, "step": 56130 }, { "epoch": 0.67, "learning_rate": 1.4670719188621797e-06, "logits/chosen": -2.922654390335083, "logits/rejected": -2.492537498474121, "logps/chosen": -87.80064392089844, "logps/rejected": -864.4093017578125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.4702247977256775, "rewards/margins": 7.793726444244385, "rewards/rejected": -8.263951301574707, "step": 56140 }, { "epoch": 0.67, "learning_rate": 1.4661207216089306e-06, "logits/chosen": -2.825815200805664, "logits/rejected": -2.2949719429016113, "logps/chosen": -116.89852142333984, "logps/rejected": -954.0997924804688, "loss": 0.0775, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7126756906509399, "rewards/margins": 8.438371658325195, "rewards/rejected": -9.151047706604004, "step": 56150 }, { "epoch": 0.67, "learning_rate": 1.4651697048681319e-06, "logits/chosen": -2.847921371459961, "logits/rejected": -2.465855836868286, "logps/chosen": -84.82609558105469, "logps/rejected": -885.2022705078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.40072375535964966, "rewards/margins": 8.079068183898926, "rewards/rejected": -8.479791641235352, "step": 56160 }, { "epoch": 0.67, "learning_rate": 1.4642188688058283e-06, "logits/chosen": -2.8721957206726074, "logits/rejected": -2.0958282947540283, "logps/chosen": -143.30459594726562, "logps/rejected": -952.54248046875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.8808519244194031, "rewards/margins": 8.248126029968262, "rewards/rejected": -9.12897777557373, "step": 56170 }, { "epoch": 0.67, "learning_rate": 1.4632682135880338e-06, "logits/chosen": -2.9339957237243652, "logits/rejected": -2.4000487327575684, "logps/chosen": -102.65938568115234, "logps/rejected": -985.5847778320312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5321707725524902, "rewards/margins": 8.936870574951172, "rewards/rejected": -9.46904182434082, "step": 56180 }, { "epoch": 0.67, "learning_rate": 1.4623177393807303e-06, "logits/chosen": -2.821371555328369, "logits/rejected": -2.1788363456726074, "logps/chosen": -118.3100814819336, "logps/rejected": -913.0205078125, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": -0.7027807235717773, "rewards/margins": 8.033137321472168, "rewards/rejected": -8.735918045043945, "step": 56190 }, { "epoch": 0.67, "learning_rate": 1.4613674463498667e-06, "logits/chosen": -2.9041125774383545, "logits/rejected": -2.2027747631073, "logps/chosen": -112.11735534667969, "logps/rejected": -1062.2412109375, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.624487042427063, "rewards/margins": 9.602823257446289, "rewards/rejected": -10.227309226989746, "step": 56200 }, { "epoch": 0.67, "learning_rate": 1.4604173346613625e-06, "logits/chosen": -2.877019166946411, "logits/rejected": -2.4163200855255127, "logps/chosen": -106.9375228881836, "logps/rejected": -969.1776123046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6218745112419128, "rewards/margins": 8.675360679626465, "rewards/rejected": -9.297235488891602, "step": 56210 }, { "epoch": 0.67, "learning_rate": 1.4594674044811042e-06, "logits/chosen": -2.8695030212402344, "logits/rejected": -2.0377438068389893, "logps/chosen": -132.52114868164062, "logps/rejected": -1022.4943237304688, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7454738616943359, "rewards/margins": 9.079523086547852, "rewards/rejected": -9.824996948242188, "step": 56220 }, { "epoch": 0.67, "learning_rate": 1.4585176559749475e-06, "logits/chosen": -2.8686869144439697, "logits/rejected": -2.5162229537963867, "logps/chosen": -88.38484191894531, "logps/rejected": -807.4069213867188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5025602579116821, "rewards/margins": 7.190603733062744, "rewards/rejected": -7.6931633949279785, "step": 56230 }, { "epoch": 0.67, "learning_rate": 1.457568089308716e-06, "logits/chosen": -2.8482604026794434, "logits/rejected": -2.022949695587158, "logps/chosen": -136.22598266601562, "logps/rejected": -1068.909912109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7703455090522766, "rewards/margins": 9.513500213623047, "rewards/rejected": -10.283845901489258, "step": 56240 }, { "epoch": 0.67, "learning_rate": 1.4566187046482e-06, "logits/chosen": -2.8893702030181885, "logits/rejected": -2.3382275104522705, "logps/chosen": -99.99007415771484, "logps/rejected": -991.361328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5185359716415405, "rewards/margins": 9.002240180969238, "rewards/rejected": -9.520776748657227, "step": 56250 }, { "epoch": 0.67, "learning_rate": 1.4556695021591605e-06, "logits/chosen": -2.9113144874572754, "logits/rejected": -2.300508499145508, "logps/chosen": -93.06932067871094, "logps/rejected": -901.3294677734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5000733137130737, "rewards/margins": 8.132499694824219, "rewards/rejected": -8.632574081420898, "step": 56260 }, { "epoch": 0.67, "learning_rate": 1.4547204820073251e-06, "logits/chosen": -2.9443092346191406, "logits/rejected": -2.463365316390991, "logps/chosen": -91.14384460449219, "logps/rejected": -873.7542724609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4665684700012207, "rewards/margins": 7.893364906311035, "rewards/rejected": -8.359933853149414, "step": 56270 }, { "epoch": 0.67, "learning_rate": 1.4537716443583907e-06, "logits/chosen": -2.9169039726257324, "logits/rejected": -2.522473096847534, "logps/chosen": -89.53426361083984, "logps/rejected": -889.3782958984375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4502994120121002, "rewards/margins": 8.061216354370117, "rewards/rejected": -8.511516571044922, "step": 56280 }, { "epoch": 0.67, "learning_rate": 1.4528229893780212e-06, "logits/chosen": -2.9116597175598145, "logits/rejected": -2.1412596702575684, "logps/chosen": -133.69949340820312, "logps/rejected": -969.8853759765625, "loss": 0.0906, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.849666953086853, "rewards/margins": 8.436243057250977, "rewards/rejected": -9.285910606384277, "step": 56290 }, { "epoch": 0.67, "learning_rate": 1.4518745172318505e-06, "logits/chosen": -2.873680591583252, "logits/rejected": -2.264024257659912, "logps/chosen": -111.6049575805664, "logps/rejected": -1023.6658325195312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6132036447525024, "rewards/margins": 9.211739540100098, "rewards/rejected": -9.824942588806152, "step": 56300 }, { "epoch": 0.67, "learning_rate": 1.450926228085477e-06, "logits/chosen": -2.8928380012512207, "logits/rejected": -2.317328691482544, "logps/chosen": -128.19424438476562, "logps/rejected": -1004.9156494140625, "loss": 0.158, "rewards/accuracies": 1.0, "rewards/chosen": -0.7410253286361694, "rewards/margins": 8.904634475708008, "rewards/rejected": -9.645659446716309, "step": 56310 }, { "epoch": 0.67, "learning_rate": 1.4499781221044706e-06, "logits/chosen": -2.878746509552002, "logits/rejected": -2.471759080886841, "logps/chosen": -99.38459777832031, "logps/rejected": -880.2601318359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5155799388885498, "rewards/margins": 7.895144462585449, "rewards/rejected": -8.410725593566895, "step": 56320 }, { "epoch": 0.67, "learning_rate": 1.4490301994543665e-06, "logits/chosen": -2.8718414306640625, "logits/rejected": -2.081470012664795, "logps/chosen": -126.1206283569336, "logps/rejected": -1077.003173828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6958097219467163, "rewards/margins": 9.669398307800293, "rewards/rejected": -10.36520767211914, "step": 56330 }, { "epoch": 0.67, "learning_rate": 1.4480824603006716e-06, "logits/chosen": -2.903687000274658, "logits/rejected": -2.154707670211792, "logps/chosen": -112.89693450927734, "logps/rejected": -884.4924926757812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.591070830821991, "rewards/margins": 7.868237495422363, "rewards/rejected": -8.459307670593262, "step": 56340 }, { "epoch": 0.67, "learning_rate": 1.4471349048088579e-06, "logits/chosen": -2.913825035095215, "logits/rejected": -2.2982983589172363, "logps/chosen": -114.9588623046875, "logps/rejected": -933.5587158203125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6605945825576782, "rewards/margins": 8.292553901672363, "rewards/rejected": -8.95314884185791, "step": 56350 }, { "epoch": 0.67, "learning_rate": 1.4461875331443662e-06, "logits/chosen": -2.905026435852051, "logits/rejected": -2.43586802482605, "logps/chosen": -91.45182037353516, "logps/rejected": -907.2429809570312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.44938287138938904, "rewards/margins": 8.244588851928711, "rewards/rejected": -8.693971633911133, "step": 56360 }, { "epoch": 0.67, "learning_rate": 1.4452403454726036e-06, "logits/chosen": -2.8891406059265137, "logits/rejected": -2.4185895919799805, "logps/chosen": -90.12565612792969, "logps/rejected": -957.76025390625, "loss": 0.1513, "rewards/accuracies": 1.0, "rewards/chosen": -0.4581373333930969, "rewards/margins": 8.743993759155273, "rewards/rejected": -9.202131271362305, "step": 56370 }, { "epoch": 0.67, "learning_rate": 1.4442933419589471e-06, "logits/chosen": -2.8345797061920166, "logits/rejected": -2.3575234413146973, "logps/chosen": -97.97295379638672, "logps/rejected": -889.3250732421875, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": -0.5583270788192749, "rewards/margins": 7.949830532073975, "rewards/rejected": -8.508157730102539, "step": 56380 }, { "epoch": 0.68, "learning_rate": 1.443346522768741e-06, "logits/chosen": -2.844856023788452, "logits/rejected": -2.393610715866089, "logps/chosen": -101.98328399658203, "logps/rejected": -837.6917114257812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5856055021286011, "rewards/margins": 7.413748264312744, "rewards/rejected": -7.999353885650635, "step": 56390 }, { "epoch": 0.68, "learning_rate": 1.4423998880672974e-06, "logits/chosen": -2.9000887870788574, "logits/rejected": -2.574429750442505, "logps/chosen": -84.96570587158203, "logps/rejected": -814.8541259765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4178805351257324, "rewards/margins": 7.351762294769287, "rewards/rejected": -7.769643306732178, "step": 56400 }, { "epoch": 0.68, "learning_rate": 1.4414534380198957e-06, "logits/chosen": -2.8418564796447754, "logits/rejected": -2.20914888381958, "logps/chosen": -102.69869232177734, "logps/rejected": -920.6871337890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.547926127910614, "rewards/margins": 8.282485008239746, "rewards/rejected": -8.830410957336426, "step": 56410 }, { "epoch": 0.68, "learning_rate": 1.4405071727917852e-06, "logits/chosen": -2.902477979660034, "logits/rejected": -2.2718698978424072, "logps/chosen": -110.2069320678711, "logps/rejected": -944.4080810546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6112962961196899, "rewards/margins": 8.428033828735352, "rewards/rejected": -9.03933048248291, "step": 56420 }, { "epoch": 0.68, "learning_rate": 1.4395610925481784e-06, "logits/chosen": -2.8824779987335205, "logits/rejected": -2.332334041595459, "logps/chosen": -111.14337158203125, "logps/rejected": -988.9351806640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5872782468795776, "rewards/margins": 8.905081748962402, "rewards/rejected": -9.492361068725586, "step": 56430 }, { "epoch": 0.68, "learning_rate": 1.4386151974542598e-06, "logits/chosen": -2.8781323432922363, "logits/rejected": -2.2703185081481934, "logps/chosen": -118.0139389038086, "logps/rejected": -1030.573974609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.655113935470581, "rewards/margins": 9.240493774414062, "rewards/rejected": -9.895608901977539, "step": 56440 }, { "epoch": 0.68, "learning_rate": 1.4376694876751795e-06, "logits/chosen": -2.861820936203003, "logits/rejected": -2.4718456268310547, "logps/chosen": -85.26177978515625, "logps/rejected": -813.5462036132812, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.4325408935546875, "rewards/margins": 7.331025123596191, "rewards/rejected": -7.763566493988037, "step": 56450 }, { "epoch": 0.68, "learning_rate": 1.4367239633760565e-06, "logits/chosen": -2.9253087043762207, "logits/rejected": -2.469919204711914, "logps/chosen": -92.41100311279297, "logps/rejected": -869.58349609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4945736527442932, "rewards/margins": 7.821835517883301, "rewards/rejected": -8.316408157348633, "step": 56460 }, { "epoch": 0.68, "learning_rate": 1.4357786247219762e-06, "logits/chosen": -2.9094643592834473, "logits/rejected": -2.4695518016815186, "logps/chosen": -157.72238159179688, "logps/rejected": -790.1832275390625, "loss": 0.3691, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0965380668640137, "rewards/margins": 6.425469875335693, "rewards/rejected": -7.522006988525391, "step": 56470 }, { "epoch": 0.68, "learning_rate": 1.4348334718779924e-06, "logits/chosen": -2.8658289909362793, "logits/rejected": -2.504448413848877, "logps/chosen": -82.59980773925781, "logps/rejected": -877.2491455078125, "loss": 0.0952, "rewards/accuracies": 1.0, "rewards/chosen": -0.4181479513645172, "rewards/margins": 7.9722747802734375, "rewards/rejected": -8.390422821044922, "step": 56480 }, { "epoch": 0.68, "learning_rate": 1.433888505009127e-06, "logits/chosen": -2.8510007858276367, "logits/rejected": -2.3703510761260986, "logps/chosen": -94.22752380371094, "logps/rejected": -883.5408325195312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.48952585458755493, "rewards/margins": 7.967215061187744, "rewards/rejected": -8.456741333007812, "step": 56490 }, { "epoch": 0.68, "learning_rate": 1.4329437242803667e-06, "logits/chosen": -2.8815393447875977, "logits/rejected": -2.553727865219116, "logps/chosen": -93.75484466552734, "logps/rejected": -819.3553466796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.524754524230957, "rewards/margins": 7.289473533630371, "rewards/rejected": -7.814228057861328, "step": 56500 }, { "epoch": 0.68, "learning_rate": 1.4319991298566683e-06, "logits/chosen": -2.8715405464172363, "logits/rejected": -2.431556224822998, "logps/chosen": -123.02608489990234, "logps/rejected": -854.6585083007812, "loss": 0.1086, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7727736234664917, "rewards/margins": 7.396295070648193, "rewards/rejected": -8.169069290161133, "step": 56510 }, { "epoch": 0.68, "learning_rate": 1.4310547219029559e-06, "logits/chosen": -2.883235216140747, "logits/rejected": -2.431868076324463, "logps/chosen": -106.78131103515625, "logps/rejected": -829.3341674804688, "loss": 0.0936, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6540607213973999, "rewards/margins": 7.256675720214844, "rewards/rejected": -7.910736083984375, "step": 56520 }, { "epoch": 0.68, "learning_rate": 1.43011050058412e-06, "logits/chosen": -2.884974718093872, "logits/rejected": -2.440021514892578, "logps/chosen": -101.6846694946289, "logps/rejected": -857.82568359375, "loss": 0.1378, "rewards/accuracies": 1.0, "rewards/chosen": -0.6232556104660034, "rewards/margins": 7.577000617980957, "rewards/rejected": -8.20025634765625, "step": 56530 }, { "epoch": 0.68, "learning_rate": 1.429166466065019e-06, "logits/chosen": -2.9061453342437744, "logits/rejected": -2.4575488567352295, "logps/chosen": -98.19686126708984, "logps/rejected": -884.56640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5427126288414001, "rewards/margins": 7.9224677085876465, "rewards/rejected": -8.465180397033691, "step": 56540 }, { "epoch": 0.68, "learning_rate": 1.4282226185104793e-06, "logits/chosen": -2.885467052459717, "logits/rejected": -2.424006223678589, "logps/chosen": -123.78825378417969, "logps/rejected": -898.1229248046875, "loss": 0.0873, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8144327402114868, "rewards/margins": 7.77855920791626, "rewards/rejected": -8.592992782592773, "step": 56550 }, { "epoch": 0.68, "learning_rate": 1.427278958085294e-06, "logits/chosen": -2.883800506591797, "logits/rejected": -2.4977259635925293, "logps/chosen": -111.55171203613281, "logps/rejected": -860.2838745117188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6449517011642456, "rewards/margins": 7.5655670166015625, "rewards/rejected": -8.210517883300781, "step": 56560 }, { "epoch": 0.68, "learning_rate": 1.426335484954223e-06, "logits/chosen": -2.8991403579711914, "logits/rejected": -2.1978020668029785, "logps/chosen": -111.8277359008789, "logps/rejected": -938.7109375, "loss": 0.0974, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6654718518257141, "rewards/margins": 8.334453582763672, "rewards/rejected": -8.99992561340332, "step": 56570 }, { "epoch": 0.68, "learning_rate": 1.4253921992819941e-06, "logits/chosen": -2.902042865753174, "logits/rejected": -2.516890525817871, "logps/chosen": -77.40570068359375, "logps/rejected": -815.7473754882812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3576430380344391, "rewards/margins": 7.4399871826171875, "rewards/rejected": -7.79763126373291, "step": 56580 }, { "epoch": 0.68, "learning_rate": 1.4244491012333027e-06, "logits/chosen": -2.8758137226104736, "logits/rejected": -2.400430202484131, "logps/chosen": -111.3381118774414, "logps/rejected": -941.1304931640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6781826019287109, "rewards/margins": 8.353521347045898, "rewards/rejected": -9.03170394897461, "step": 56590 }, { "epoch": 0.68, "learning_rate": 1.423506190972811e-06, "logits/chosen": -2.8624520301818848, "logits/rejected": -2.4382729530334473, "logps/chosen": -94.15697479248047, "logps/rejected": -857.0045166015625, "loss": 0.1093, "rewards/accuracies": 1.0, "rewards/chosen": -0.4750436842441559, "rewards/margins": 7.6956634521484375, "rewards/rejected": -8.170707702636719, "step": 56600 }, { "epoch": 0.68, "learning_rate": 1.4225634686651483e-06, "logits/chosen": -2.8806278705596924, "logits/rejected": -2.2987096309661865, "logps/chosen": -122.8046875, "logps/rejected": -974.6458740234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7867963910102844, "rewards/margins": 8.553544998168945, "rewards/rejected": -9.340339660644531, "step": 56610 }, { "epoch": 0.68, "learning_rate": 1.4216209344749127e-06, "logits/chosen": -2.902880907058716, "logits/rejected": -2.480443239212036, "logps/chosen": -90.13379669189453, "logps/rejected": -894.05712890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4454960823059082, "rewards/margins": 8.110467910766602, "rewards/rejected": -8.555963516235352, "step": 56620 }, { "epoch": 0.68, "learning_rate": 1.4206785885666657e-06, "logits/chosen": -2.8483519554138184, "logits/rejected": -2.2593016624450684, "logps/chosen": -114.82972717285156, "logps/rejected": -1048.910400390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6460862159729004, "rewards/margins": 9.427667617797852, "rewards/rejected": -10.073755264282227, "step": 56630 }, { "epoch": 0.68, "learning_rate": 1.4197364311049396e-06, "logits/chosen": -2.8738250732421875, "logits/rejected": -2.2400062084198, "logps/chosen": -101.67745208740234, "logps/rejected": -911.3558349609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5335900187492371, "rewards/margins": 8.196016311645508, "rewards/rejected": -8.729606628417969, "step": 56640 }, { "epoch": 0.68, "learning_rate": 1.4187944622542324e-06, "logits/chosen": -2.82292103767395, "logits/rejected": -2.028110980987549, "logps/chosen": -119.45904541015625, "logps/rejected": -1007.201171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6310110688209534, "rewards/margins": 9.056591987609863, "rewards/rejected": -9.687602043151855, "step": 56650 }, { "epoch": 0.68, "learning_rate": 1.4178526821790089e-06, "logits/chosen": -2.8477203845977783, "logits/rejected": -2.2059454917907715, "logps/chosen": -116.51116943359375, "logps/rejected": -983.0601806640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.673744797706604, "rewards/margins": 8.762345314025879, "rewards/rejected": -9.436090469360352, "step": 56660 }, { "epoch": 0.68, "learning_rate": 1.4169110910437016e-06, "logits/chosen": -2.9030582904815674, "logits/rejected": -2.462742805480957, "logps/chosen": -102.8082046508789, "logps/rejected": -856.1156005859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5745965242385864, "rewards/margins": 7.600700378417969, "rewards/rejected": -8.175296783447266, "step": 56670 }, { "epoch": 0.68, "learning_rate": 1.41596968901271e-06, "logits/chosen": -2.9058501720428467, "logits/rejected": -2.6114814281463623, "logps/chosen": -79.95066833496094, "logps/rejected": -831.12158203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.37689560651779175, "rewards/margins": 7.558777809143066, "rewards/rejected": -7.93567419052124, "step": 56680 }, { "epoch": 0.68, "learning_rate": 1.4150284762504007e-06, "logits/chosen": -2.8870997428894043, "logits/rejected": -2.3850674629211426, "logps/chosen": -84.92573547363281, "logps/rejected": -907.3388671875, "loss": 0.2221, "rewards/accuracies": 1.0, "rewards/chosen": -0.39984941482543945, "rewards/margins": 8.28752326965332, "rewards/rejected": -8.687372207641602, "step": 56690 }, { "epoch": 0.68, "learning_rate": 1.414087452921105e-06, "logits/chosen": -2.8794636726379395, "logits/rejected": -2.1866183280944824, "logps/chosen": -113.80564880371094, "logps/rejected": -1082.7059326171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.660305917263031, "rewards/margins": 9.766880989074707, "rewards/rejected": -10.427186965942383, "step": 56700 }, { "epoch": 0.68, "learning_rate": 1.4131466191891241e-06, "logits/chosen": -2.934741735458374, "logits/rejected": -2.3865113258361816, "logps/chosen": -99.62744140625, "logps/rejected": -903.0755004882812, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.47991424798965454, "rewards/margins": 8.158967971801758, "rewards/rejected": -8.63888168334961, "step": 56710 }, { "epoch": 0.68, "learning_rate": 1.412205975218725e-06, "logits/chosen": -2.8562469482421875, "logits/rejected": -2.177957773208618, "logps/chosen": -121.9245834350586, "logps/rejected": -977.4660034179688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6702636480331421, "rewards/margins": 8.708990097045898, "rewards/rejected": -9.379252433776855, "step": 56720 }, { "epoch": 0.68, "learning_rate": 1.4112655211741414e-06, "logits/chosen": -2.886063575744629, "logits/rejected": -2.293278217315674, "logps/chosen": -103.10685729980469, "logps/rejected": -986.9158325195312, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": -0.5476503372192383, "rewards/margins": 8.91675853729248, "rewards/rejected": -9.464408874511719, "step": 56730 }, { "epoch": 0.68, "learning_rate": 1.4103252572195735e-06, "logits/chosen": -2.9012224674224854, "logits/rejected": -2.4368228912353516, "logps/chosen": -89.61343383789062, "logps/rejected": -987.1798706054688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.48388394713401794, "rewards/margins": 8.997049331665039, "rewards/rejected": -9.480934143066406, "step": 56740 }, { "epoch": 0.68, "learning_rate": 1.4093851835191898e-06, "logits/chosen": -2.845135450363159, "logits/rejected": -2.0876388549804688, "logps/chosen": -123.25910949707031, "logps/rejected": -969.7998046875, "loss": 0.0949, "rewards/accuracies": 1.0, "rewards/chosen": -0.72489994764328, "rewards/margins": 8.573148727416992, "rewards/rejected": -9.29804801940918, "step": 56750 }, { "epoch": 0.68, "learning_rate": 1.408445300237124e-06, "logits/chosen": -2.885651111602783, "logits/rejected": -2.454807758331299, "logps/chosen": -121.05696868896484, "logps/rejected": -891.0318603515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7866860628128052, "rewards/margins": 7.739193916320801, "rewards/rejected": -8.525879859924316, "step": 56760 }, { "epoch": 0.68, "learning_rate": 1.407505607537477e-06, "logits/chosen": -2.8745174407958984, "logits/rejected": -2.234038829803467, "logps/chosen": -117.5823745727539, "logps/rejected": -925.6865234375, "loss": 0.1126, "rewards/accuracies": 1.0, "rewards/chosen": -0.6213525533676147, "rewards/margins": 8.23576831817627, "rewards/rejected": -8.8571195602417, "step": 56770 }, { "epoch": 0.68, "learning_rate": 1.4065661055843166e-06, "logits/chosen": -2.853618621826172, "logits/rejected": -2.4778692722320557, "logps/chosen": -85.60266876220703, "logps/rejected": -840.5095825195312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4427512586116791, "rewards/margins": 7.5849480628967285, "rewards/rejected": -8.027700424194336, "step": 56780 }, { "epoch": 0.68, "learning_rate": 1.405626794541677e-06, "logits/chosen": -2.9036777019500732, "logits/rejected": -2.3887670040130615, "logps/chosen": -104.67787170410156, "logps/rejected": -902.1480712890625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5297298431396484, "rewards/margins": 8.081023216247559, "rewards/rejected": -8.610753059387207, "step": 56790 }, { "epoch": 0.68, "learning_rate": 1.40468767457356e-06, "logits/chosen": -2.9209821224212646, "logits/rejected": -2.3161683082580566, "logps/chosen": -109.2570571899414, "logps/rejected": -918.71630859375, "loss": 0.0881, "rewards/accuracies": 1.0, "rewards/chosen": -0.6148238778114319, "rewards/margins": 8.185381889343262, "rewards/rejected": -8.800206184387207, "step": 56800 }, { "epoch": 0.68, "learning_rate": 1.4037487458439336e-06, "logits/chosen": -2.8531649112701416, "logits/rejected": -2.4549450874328613, "logps/chosen": -73.21678161621094, "logps/rejected": -845.7598876953125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.35751503705978394, "rewards/margins": 7.7162981033325195, "rewards/rejected": -8.073812484741211, "step": 56810 }, { "epoch": 0.68, "learning_rate": 1.4028100085167306e-06, "logits/chosen": -2.9036548137664795, "logits/rejected": -2.4930503368377686, "logps/chosen": -92.89362335205078, "logps/rejected": -803.8753051757812, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.47329384088516235, "rewards/margins": 7.187473297119141, "rewards/rejected": -7.6607666015625, "step": 56820 }, { "epoch": 0.68, "learning_rate": 1.401871462755853e-06, "logits/chosen": -2.8800323009490967, "logits/rejected": -2.157179594039917, "logps/chosen": -123.79624938964844, "logps/rejected": -972.4090576171875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.7352386713027954, "rewards/margins": 8.593260765075684, "rewards/rejected": -9.328499794006348, "step": 56830 }, { "epoch": 0.68, "learning_rate": 1.4009331087251678e-06, "logits/chosen": -2.958493947982788, "logits/rejected": -2.1363253593444824, "logps/chosen": -139.5215606689453, "logps/rejected": -983.30615234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8328695297241211, "rewards/margins": 8.600488662719727, "rewards/rejected": -9.433358192443848, "step": 56840 }, { "epoch": 0.68, "learning_rate": 1.3999949465885096e-06, "logits/chosen": -2.897702693939209, "logits/rejected": -2.3537914752960205, "logps/chosen": -116.55084228515625, "logps/rejected": -957.5739135742188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6805003881454468, "rewards/margins": 8.496099472045898, "rewards/rejected": -9.176599502563477, "step": 56850 }, { "epoch": 0.68, "learning_rate": 1.3990569765096784e-06, "logits/chosen": -2.8819174766540527, "logits/rejected": -2.077846050262451, "logps/chosen": -124.66324615478516, "logps/rejected": -1056.421630859375, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -0.7079875469207764, "rewards/margins": 9.435456275939941, "rewards/rejected": -10.14344310760498, "step": 56860 }, { "epoch": 0.68, "learning_rate": 1.398119198652441e-06, "logits/chosen": -2.871004581451416, "logits/rejected": -2.361624240875244, "logps/chosen": -86.55785369873047, "logps/rejected": -912.73388671875, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -0.432553768157959, "rewards/margins": 8.312162399291992, "rewards/rejected": -8.744714736938477, "step": 56870 }, { "epoch": 0.68, "learning_rate": 1.3971816131805326e-06, "logits/chosen": -2.863076686859131, "logits/rejected": -2.2500369548797607, "logps/chosen": -121.5035629272461, "logps/rejected": -1026.93994140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7308071851730347, "rewards/margins": 9.132131576538086, "rewards/rejected": -9.86293888092041, "step": 56880 }, { "epoch": 0.68, "learning_rate": 1.3962442202576509e-06, "logits/chosen": -2.919832468032837, "logits/rejected": -2.414722204208374, "logps/chosen": -90.78181457519531, "logps/rejected": -863.2262573242188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.44655337929725647, "rewards/margins": 7.803491115570068, "rewards/rejected": -8.250044822692871, "step": 56890 }, { "epoch": 0.68, "learning_rate": 1.3953070200474623e-06, "logits/chosen": -2.871842384338379, "logits/rejected": -2.4228453636169434, "logps/chosen": -107.43656158447266, "logps/rejected": -932.9601440429688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6319051384925842, "rewards/margins": 8.310247421264648, "rewards/rejected": -8.942152976989746, "step": 56900 }, { "epoch": 0.68, "learning_rate": 1.3943700127136e-06, "logits/chosen": -2.8590569496154785, "logits/rejected": -2.1080384254455566, "logps/chosen": -118.12074279785156, "logps/rejected": -970.591796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6703731417655945, "rewards/margins": 8.639646530151367, "rewards/rejected": -9.310020446777344, "step": 56910 }, { "epoch": 0.68, "learning_rate": 1.3934331984196625e-06, "logits/chosen": -2.8754730224609375, "logits/rejected": -2.091836452484131, "logps/chosen": -115.1241683959961, "logps/rejected": -989.2345581054688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.652843177318573, "rewards/margins": 8.838154792785645, "rewards/rejected": -9.490997314453125, "step": 56920 }, { "epoch": 0.68, "learning_rate": 1.3924965773292154e-06, "logits/chosen": -2.83897066116333, "logits/rejected": -2.269946575164795, "logps/chosen": -113.78348541259766, "logps/rejected": -849.1062622070312, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6428040862083435, "rewards/margins": 7.449443817138672, "rewards/rejected": -8.09224796295166, "step": 56930 }, { "epoch": 0.68, "learning_rate": 1.3915601496057913e-06, "logits/chosen": -2.874922275543213, "logits/rejected": -2.4953513145446777, "logps/chosen": -90.11457824707031, "logps/rejected": -868.5613403320312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4694967269897461, "rewards/margins": 7.837275505065918, "rewards/rejected": -8.30677318572998, "step": 56940 }, { "epoch": 0.68, "learning_rate": 1.3906239154128848e-06, "logits/chosen": -2.8054280281066895, "logits/rejected": -2.2253005504608154, "logps/chosen": -106.5706558227539, "logps/rejected": -931.1788330078125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5682313442230225, "rewards/margins": 8.358601570129395, "rewards/rejected": -8.926833152770996, "step": 56950 }, { "epoch": 0.68, "learning_rate": 1.389687874913962e-06, "logits/chosen": -2.8838346004486084, "logits/rejected": -2.0834672451019287, "logps/chosen": -138.74819946289062, "logps/rejected": -1126.33642578125, "loss": 0.0866, "rewards/accuracies": 1.0, "rewards/chosen": -0.8254504203796387, "rewards/margins": 9.996870994567871, "rewards/rejected": -10.822322845458984, "step": 56960 }, { "epoch": 0.68, "learning_rate": 1.3887520282724524e-06, "logits/chosen": -2.8461787700653076, "logits/rejected": -2.284085750579834, "logps/chosen": -97.53642272949219, "logps/rejected": -871.0471801757812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.47707876563072205, "rewards/margins": 7.847456932067871, "rewards/rejected": -8.324536323547363, "step": 56970 }, { "epoch": 0.68, "learning_rate": 1.3878163756517521e-06, "logits/chosen": -2.870492935180664, "logits/rejected": -2.3222546577453613, "logps/chosen": -106.36091613769531, "logps/rejected": -877.8565673828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6122188568115234, "rewards/margins": 7.780867099761963, "rewards/rejected": -8.393086433410645, "step": 56980 }, { "epoch": 0.68, "learning_rate": 1.386880917215224e-06, "logits/chosen": -2.8826792240142822, "logits/rejected": -2.0575459003448486, "logps/chosen": -145.15463256835938, "logps/rejected": -1175.55908203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8701990246772766, "rewards/margins": 10.4735746383667, "rewards/rejected": -11.343774795532227, "step": 56990 }, { "epoch": 0.68, "learning_rate": 1.3859456531261959e-06, "logits/chosen": -2.8439483642578125, "logits/rejected": -2.4176087379455566, "logps/chosen": -94.0952377319336, "logps/rejected": -914.7127685546875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5037261247634888, "rewards/margins": 8.256478309631348, "rewards/rejected": -8.760204315185547, "step": 57000 }, { "epoch": 0.68, "eval_logits/chosen": -2.8859901428222656, "eval_logits/rejected": -1.764438509941101, "eval_logps/chosen": -242.771484375, "eval_logps/rejected": -1134.8023681640625, "eval_loss": 0.0013447643723338842, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.815912127494812, "eval_rewards/margins": 9.064857482910156, "eval_rewards/rejected": -10.880770683288574, "eval_runtime": 1.2153, "eval_samples_per_second": 4.114, "eval_steps_per_second": 2.469, "step": 57000 }, { "epoch": 0.68, "learning_rate": 1.3850105835479639e-06, "logits/chosen": -2.8888022899627686, "logits/rejected": -2.358768939971924, "logps/chosen": -100.10191345214844, "logps/rejected": -1026.112548828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5337835550308228, "rewards/margins": 9.32698917388916, "rewards/rejected": -9.860772132873535, "step": 57010 }, { "epoch": 0.68, "learning_rate": 1.384075708643786e-06, "logits/chosen": -2.9136173725128174, "logits/rejected": -2.3762924671173096, "logps/chosen": -119.33613586425781, "logps/rejected": -1006.1925659179688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7026389241218567, "rewards/margins": 8.957913398742676, "rewards/rejected": -9.660551071166992, "step": 57020 }, { "epoch": 0.68, "learning_rate": 1.38314102857689e-06, "logits/chosen": -2.80993914604187, "logits/rejected": -2.0846750736236572, "logps/chosen": -120.329345703125, "logps/rejected": -1068.5660400390625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6052811741828918, "rewards/margins": 9.684440612792969, "rewards/rejected": -10.28972053527832, "step": 57030 }, { "epoch": 0.68, "learning_rate": 1.3822065435104686e-06, "logits/chosen": -2.8639583587646484, "logits/rejected": -2.4727492332458496, "logps/chosen": -110.90946960449219, "logps/rejected": -858.537109375, "loss": 0.0885, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6824773550033569, "rewards/margins": 7.530951499938965, "rewards/rejected": -8.21342945098877, "step": 57040 }, { "epoch": 0.68, "learning_rate": 1.3812722536076801e-06, "logits/chosen": -2.9134411811828613, "logits/rejected": -2.4322824478149414, "logps/chosen": -106.59578704833984, "logps/rejected": -936.8591918945312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5537654161453247, "rewards/margins": 8.433769226074219, "rewards/rejected": -8.98753547668457, "step": 57050 }, { "epoch": 0.68, "learning_rate": 1.380338159031649e-06, "logits/chosen": -2.8891634941101074, "logits/rejected": -2.511056423187256, "logps/chosen": -86.48854064941406, "logps/rejected": -866.8153076171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.46890607476234436, "rewards/margins": 7.8238372802734375, "rewards/rejected": -8.292742729187012, "step": 57060 }, { "epoch": 0.68, "learning_rate": 1.3794042599454656e-06, "logits/chosen": -2.8713905811309814, "logits/rejected": -2.2817189693450928, "logps/chosen": -108.8892593383789, "logps/rejected": -909.765625, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": -0.5904154181480408, "rewards/margins": 8.117568969726562, "rewards/rejected": -8.70798397064209, "step": 57070 }, { "epoch": 0.68, "learning_rate": 1.378470556512187e-06, "logits/chosen": -2.8819451332092285, "logits/rejected": -2.4646735191345215, "logps/chosen": -95.59908294677734, "logps/rejected": -887.3421020507812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5283188819885254, "rewards/margins": 7.9688720703125, "rewards/rejected": -8.497191429138184, "step": 57080 }, { "epoch": 0.68, "learning_rate": 1.3775370488948336e-06, "logits/chosen": -2.862455368041992, "logits/rejected": -2.465475559234619, "logps/chosen": -97.60913848876953, "logps/rejected": -829.4178466796875, "loss": 0.0776, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5816212892532349, "rewards/margins": 7.340371608734131, "rewards/rejected": -7.921993255615234, "step": 57090 }, { "epoch": 0.68, "learning_rate": 1.3766037372563937e-06, "logits/chosen": -2.8789992332458496, "logits/rejected": -2.2795965671539307, "logps/chosen": -120.68131256103516, "logps/rejected": -999.3795166015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6930879950523376, "rewards/margins": 8.895502090454102, "rewards/rejected": -9.58858871459961, "step": 57100 }, { "epoch": 0.68, "learning_rate": 1.3756706217598212e-06, "logits/chosen": -2.8429722785949707, "logits/rejected": -2.210939407348633, "logps/chosen": -109.30255126953125, "logps/rejected": -979.6326293945312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6478895545005798, "rewards/margins": 8.756550788879395, "rewards/rejected": -9.404439926147461, "step": 57110 }, { "epoch": 0.68, "learning_rate": 1.3747377025680357e-06, "logits/chosen": -2.897833824157715, "logits/rejected": -2.423156261444092, "logps/chosen": -99.85801696777344, "logps/rejected": -896.6414794921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.564818263053894, "rewards/margins": 8.011561393737793, "rewards/rejected": -8.576379776000977, "step": 57120 }, { "epoch": 0.68, "learning_rate": 1.3738049798439218e-06, "logits/chosen": -2.8686187267303467, "logits/rejected": -2.2706236839294434, "logps/chosen": -145.1128692626953, "logps/rejected": -988.1940307617188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9429329633712769, "rewards/margins": 8.542595863342285, "rewards/rejected": -9.485528945922852, "step": 57130 }, { "epoch": 0.68, "learning_rate": 1.3728724537503314e-06, "logits/chosen": -2.852621555328369, "logits/rejected": -2.2154576778411865, "logps/chosen": -136.2132568359375, "logps/rejected": -985.9913940429688, "loss": 0.0943, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8482005000114441, "rewards/margins": 8.607378005981445, "rewards/rejected": -9.455578804016113, "step": 57140 }, { "epoch": 0.68, "learning_rate": 1.3719401244500794e-06, "logits/chosen": -2.913644790649414, "logits/rejected": -2.361915111541748, "logps/chosen": -101.26312255859375, "logps/rejected": -908.3650512695312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5574918985366821, "rewards/margins": 8.135396003723145, "rewards/rejected": -8.692888259887695, "step": 57150 }, { "epoch": 0.68, "learning_rate": 1.3710079921059486e-06, "logits/chosen": -2.8721420764923096, "logits/rejected": -2.1863083839416504, "logps/chosen": -113.6526107788086, "logps/rejected": -1007.7178955078125, "loss": 0.2109, "rewards/accuracies": 1.0, "rewards/chosen": -0.6482630968093872, "rewards/margins": 9.029032707214355, "rewards/rejected": -9.67729663848877, "step": 57160 }, { "epoch": 0.68, "learning_rate": 1.3700760568806864e-06, "logits/chosen": -2.850712537765503, "logits/rejected": -2.3840465545654297, "logps/chosen": -91.84222412109375, "logps/rejected": -895.5051879882812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5030295848846436, "rewards/margins": 8.07415771484375, "rewards/rejected": -8.577186584472656, "step": 57170 }, { "epoch": 0.68, "learning_rate": 1.3691443189370056e-06, "logits/chosen": -2.9249253273010254, "logits/rejected": -2.4724104404449463, "logps/chosen": -102.40199279785156, "logps/rejected": -893.3350830078125, "loss": 0.1323, "rewards/accuracies": 1.0, "rewards/chosen": -0.598218560218811, "rewards/margins": 7.949880123138428, "rewards/rejected": -8.54809856414795, "step": 57180 }, { "epoch": 0.68, "learning_rate": 1.3682127784375872e-06, "logits/chosen": -2.9023549556732178, "logits/rejected": -2.4911391735076904, "logps/chosen": -92.16265869140625, "logps/rejected": -841.8924560546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4627472758293152, "rewards/margins": 7.577894687652588, "rewards/rejected": -8.040640830993652, "step": 57190 }, { "epoch": 0.68, "learning_rate": 1.3672814355450754e-06, "logits/chosen": -2.913350820541382, "logits/rejected": -2.188835859298706, "logps/chosen": -147.71267700195312, "logps/rejected": -1019.5017700195312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8747291564941406, "rewards/margins": 8.89769172668457, "rewards/rejected": -9.772420883178711, "step": 57200 }, { "epoch": 0.68, "learning_rate": 1.3663502904220774e-06, "logits/chosen": -2.914313793182373, "logits/rejected": -2.3202857971191406, "logps/chosen": -120.51717376708984, "logps/rejected": -945.6394653320312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6266871690750122, "rewards/margins": 8.433038711547852, "rewards/rejected": -9.05972671508789, "step": 57210 }, { "epoch": 0.68, "learning_rate": 1.3654193432311702e-06, "logits/chosen": -2.8887712955474854, "logits/rejected": -2.374788522720337, "logps/chosen": -88.95645141601562, "logps/rejected": -908.97509765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.46516960859298706, "rewards/margins": 8.238119125366211, "rewards/rejected": -8.703289031982422, "step": 57220 }, { "epoch": 0.69, "learning_rate": 1.3644885941348948e-06, "logits/chosen": -2.8790528774261475, "logits/rejected": -2.2763171195983887, "logps/chosen": -89.17459869384766, "logps/rejected": -911.7639770507812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.431100457906723, "rewards/margins": 8.305612564086914, "rewards/rejected": -8.736713409423828, "step": 57230 }, { "epoch": 0.69, "learning_rate": 1.3635580432957571e-06, "logits/chosen": -2.854888439178467, "logits/rejected": -2.2445197105407715, "logps/chosen": -123.60804748535156, "logps/rejected": -1096.2989501953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.755181074142456, "rewards/margins": 9.804780006408691, "rewards/rejected": -10.559961318969727, "step": 57240 }, { "epoch": 0.69, "learning_rate": 1.3626276908762286e-06, "logits/chosen": -2.910792827606201, "logits/rejected": -2.270498037338257, "logps/chosen": -121.7203598022461, "logps/rejected": -1053.7017822265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.659575879573822, "rewards/margins": 9.478901863098145, "rewards/rejected": -10.13847827911377, "step": 57250 }, { "epoch": 0.69, "learning_rate": 1.3616975370387472e-06, "logits/chosen": -2.8727030754089355, "logits/rejected": -2.385124921798706, "logps/chosen": -96.77247619628906, "logps/rejected": -896.4363403320312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5395988821983337, "rewards/margins": 8.039560317993164, "rewards/rejected": -8.5791597366333, "step": 57260 }, { "epoch": 0.69, "learning_rate": 1.3607675819457135e-06, "logits/chosen": -2.887923002243042, "logits/rejected": -2.403681755065918, "logps/chosen": -102.61698150634766, "logps/rejected": -866.52685546875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5663303136825562, "rewards/margins": 7.714833736419678, "rewards/rejected": -8.28116512298584, "step": 57270 }, { "epoch": 0.69, "learning_rate": 1.359837825759496e-06, "logits/chosen": -2.931779384613037, "logits/rejected": -2.478245973587036, "logps/chosen": -91.0212631225586, "logps/rejected": -890.9149169921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.48920345306396484, "rewards/margins": 8.029719352722168, "rewards/rejected": -8.51892375946045, "step": 57280 }, { "epoch": 0.69, "learning_rate": 1.3589082686424274e-06, "logits/chosen": -2.89426851272583, "logits/rejected": -2.409008741378784, "logps/chosen": -133.8333282470703, "logps/rejected": -917.3078002929688, "loss": 0.1357, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9123033285140991, "rewards/margins": 7.866474151611328, "rewards/rejected": -8.778776168823242, "step": 57290 }, { "epoch": 0.69, "learning_rate": 1.3579789107568058e-06, "logits/chosen": -2.8371405601501465, "logits/rejected": -2.4449594020843506, "logps/chosen": -110.57929992675781, "logps/rejected": -796.8800659179688, "loss": 0.0922, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6857683658599854, "rewards/margins": 6.903339385986328, "rewards/rejected": -7.589108467102051, "step": 57300 }, { "epoch": 0.69, "learning_rate": 1.3570497522648945e-06, "logits/chosen": -2.8883614540100098, "logits/rejected": -2.384645700454712, "logps/chosen": -111.85820007324219, "logps/rejected": -962.69287109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6160426139831543, "rewards/margins": 8.617592811584473, "rewards/rejected": -9.233635902404785, "step": 57310 }, { "epoch": 0.69, "learning_rate": 1.3561207933289221e-06, "logits/chosen": -2.8964405059814453, "logits/rejected": -2.147477626800537, "logps/chosen": -145.3527374267578, "logps/rejected": -992.8873901367188, "loss": 0.0823, "rewards/accuracies": 1.0, "rewards/chosen": -0.9527277946472168, "rewards/margins": 8.591333389282227, "rewards/rejected": -9.544059753417969, "step": 57320 }, { "epoch": 0.69, "learning_rate": 1.355192034111083e-06, "logits/chosen": -2.8742966651916504, "logits/rejected": -2.1626322269439697, "logps/chosen": -149.07980346679688, "logps/rejected": -1078.407470703125, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.9868121147155762, "rewards/margins": 9.391160011291504, "rewards/rejected": -10.377972602844238, "step": 57330 }, { "epoch": 0.69, "learning_rate": 1.3542634747735346e-06, "logits/chosen": -2.9149391651153564, "logits/rejected": -2.618195056915283, "logps/chosen": -92.96820068359375, "logps/rejected": -794.4800415039062, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5472491979598999, "rewards/margins": 7.016745090484619, "rewards/rejected": -7.56399393081665, "step": 57340 }, { "epoch": 0.69, "learning_rate": 1.353335115478401e-06, "logits/chosen": -2.874917507171631, "logits/rejected": -2.149010181427002, "logps/chosen": -113.4287109375, "logps/rejected": -1024.744873046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5934034585952759, "rewards/margins": 9.259347915649414, "rewards/rejected": -9.852750778198242, "step": 57350 }, { "epoch": 0.69, "learning_rate": 1.3524069563877717e-06, "logits/chosen": -2.8722753524780273, "logits/rejected": -2.545295000076294, "logps/chosen": -86.00010681152344, "logps/rejected": -827.4099731445312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4736466407775879, "rewards/margins": 7.434251308441162, "rewards/rejected": -7.907898902893066, "step": 57360 }, { "epoch": 0.69, "learning_rate": 1.3514789976637005e-06, "logits/chosen": -2.8327088356018066, "logits/rejected": -2.316905975341797, "logps/chosen": -108.81658935546875, "logps/rejected": -926.3967895507812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6276316046714783, "rewards/margins": 8.242709159851074, "rewards/rejected": -8.870340347290039, "step": 57370 }, { "epoch": 0.69, "learning_rate": 1.3505512394682062e-06, "logits/chosen": -2.9423720836639404, "logits/rejected": -2.536353826522827, "logps/chosen": -88.19468688964844, "logps/rejected": -806.1307983398438, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.44672656059265137, "rewards/margins": 7.234103202819824, "rewards/rejected": -7.6808295249938965, "step": 57380 }, { "epoch": 0.69, "learning_rate": 1.3496236819632736e-06, "logits/chosen": -2.8701751232147217, "logits/rejected": -2.4360222816467285, "logps/chosen": -95.71664428710938, "logps/rejected": -887.1492919921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4852069318294525, "rewards/margins": 7.996938228607178, "rewards/rejected": -8.482145309448242, "step": 57390 }, { "epoch": 0.69, "learning_rate": 1.3486963253108516e-06, "logits/chosen": -2.849910259246826, "logits/rejected": -2.2004895210266113, "logps/chosen": -117.99012756347656, "logps/rejected": -1034.494384765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6550605893135071, "rewards/margins": 9.292551040649414, "rewards/rejected": -9.947611808776855, "step": 57400 }, { "epoch": 0.69, "learning_rate": 1.3477691696728526e-06, "logits/chosen": -2.8961710929870605, "logits/rejected": -2.4021410942077637, "logps/chosen": -91.43721008300781, "logps/rejected": -931.6968994140625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.46504729986190796, "rewards/margins": 8.470612525939941, "rewards/rejected": -8.935660362243652, "step": 57410 }, { "epoch": 0.69, "learning_rate": 1.3468422152111565e-06, "logits/chosen": -2.910618305206299, "logits/rejected": -2.311945676803589, "logps/chosen": -96.69580841064453, "logps/rejected": -795.7236328125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.49786806106567383, "rewards/margins": 7.087985992431641, "rewards/rejected": -7.585853576660156, "step": 57420 }, { "epoch": 0.69, "learning_rate": 1.345915462087607e-06, "logits/chosen": -2.901366710662842, "logits/rejected": -2.5775909423828125, "logps/chosen": -70.30450439453125, "logps/rejected": -840.8515625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.2766658663749695, "rewards/margins": 7.744101047515869, "rewards/rejected": -8.020767211914062, "step": 57430 }, { "epoch": 0.69, "learning_rate": 1.3449889104640123e-06, "logits/chosen": -2.87809419631958, "logits/rejected": -2.249805450439453, "logps/chosen": -98.13182067871094, "logps/rejected": -927.8482666015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5369671583175659, "rewards/margins": 8.347455978393555, "rewards/rejected": -8.884424209594727, "step": 57440 }, { "epoch": 0.69, "learning_rate": 1.344062560502146e-06, "logits/chosen": -2.913121223449707, "logits/rejected": -2.5182623863220215, "logps/chosen": -79.67218017578125, "logps/rejected": -789.3856201171875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4013260304927826, "rewards/margins": 7.1271772384643555, "rewards/rejected": -7.528504371643066, "step": 57450 }, { "epoch": 0.69, "learning_rate": 1.343136412363747e-06, "logits/chosen": -2.880488157272339, "logits/rejected": -2.304776191711426, "logps/chosen": -109.93013000488281, "logps/rejected": -941.3236083984375, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": -0.5752987861633301, "rewards/margins": 8.428525924682617, "rewards/rejected": -9.003824234008789, "step": 57460 }, { "epoch": 0.69, "learning_rate": 1.3422104662105166e-06, "logits/chosen": -2.849325656890869, "logits/rejected": -2.216339349746704, "logps/chosen": -138.27615356445312, "logps/rejected": -1114.715576171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8377296328544617, "rewards/margins": 9.898411750793457, "rewards/rejected": -10.7361421585083, "step": 57470 }, { "epoch": 0.69, "learning_rate": 1.3412847222041231e-06, "logits/chosen": -2.8705432415008545, "logits/rejected": -2.36286997795105, "logps/chosen": -107.8373031616211, "logps/rejected": -863.7723388671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6334002017974854, "rewards/margins": 7.617430210113525, "rewards/rejected": -8.250829696655273, "step": 57480 }, { "epoch": 0.69, "learning_rate": 1.3403591805061988e-06, "logits/chosen": -2.874619245529175, "logits/rejected": -2.3816187381744385, "logps/chosen": -107.11334228515625, "logps/rejected": -950.78564453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.585411548614502, "rewards/margins": 8.54529094696045, "rewards/rejected": -9.13070297241211, "step": 57490 }, { "epoch": 0.69, "learning_rate": 1.3394338412783408e-06, "logits/chosen": -2.855297803878784, "logits/rejected": -2.3079612255096436, "logps/chosen": -112.04939270019531, "logps/rejected": -1015.89990234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6504219770431519, "rewards/margins": 9.10665225982666, "rewards/rejected": -9.757074356079102, "step": 57500 }, { "epoch": 0.69, "learning_rate": 1.338508704682111e-06, "logits/chosen": -2.869501829147339, "logits/rejected": -2.411303997039795, "logps/chosen": -103.56341552734375, "logps/rejected": -951.7586669921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6339367628097534, "rewards/margins": 8.499475479125977, "rewards/rejected": -9.13341236114502, "step": 57510 }, { "epoch": 0.69, "learning_rate": 1.337583770879035e-06, "logits/chosen": -2.9060587882995605, "logits/rejected": -2.2094452381134033, "logps/chosen": -124.8719482421875, "logps/rejected": -1066.5999755859375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6941951513290405, "rewards/margins": 9.556782722473145, "rewards/rejected": -10.250978469848633, "step": 57520 }, { "epoch": 0.69, "learning_rate": 1.3366590400306058e-06, "logits/chosen": -2.8700625896453857, "logits/rejected": -2.0641462802886963, "logps/chosen": -127.16766357421875, "logps/rejected": -1021.5324096679688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7357672452926636, "rewards/margins": 9.087570190429688, "rewards/rejected": -9.82333755493164, "step": 57530 }, { "epoch": 0.69, "learning_rate": 1.3357345122982757e-06, "logits/chosen": -2.8414053916931152, "logits/rejected": -2.275925397872925, "logps/chosen": -96.30101013183594, "logps/rejected": -872.76611328125, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.5441790819168091, "rewards/margins": 7.801854133605957, "rewards/rejected": -8.346033096313477, "step": 57540 }, { "epoch": 0.69, "learning_rate": 1.334810187843466e-06, "logits/chosen": -2.897564649581909, "logits/rejected": -2.456277370452881, "logps/chosen": -98.0764389038086, "logps/rejected": -845.5069580078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5172857642173767, "rewards/margins": 7.555180549621582, "rewards/rejected": -8.072465896606445, "step": 57550 }, { "epoch": 0.69, "learning_rate": 1.333886066827562e-06, "logits/chosen": -2.865347146987915, "logits/rejected": -2.270228147506714, "logps/chosen": -118.5700454711914, "logps/rejected": -1014.4615478515625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6849967241287231, "rewards/margins": 9.052698135375977, "rewards/rejected": -9.73769474029541, "step": 57560 }, { "epoch": 0.69, "learning_rate": 1.3329621494119115e-06, "logits/chosen": -2.9082303047180176, "logits/rejected": -2.2475273609161377, "logps/chosen": -127.6653060913086, "logps/rejected": -987.2351684570312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.791244626045227, "rewards/margins": 8.671960830688477, "rewards/rejected": -9.463205337524414, "step": 57570 }, { "epoch": 0.69, "learning_rate": 1.3320384357578284e-06, "logits/chosen": -2.8920509815216064, "logits/rejected": -2.181767225265503, "logps/chosen": -108.03578186035156, "logps/rejected": -926.9479370117188, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": -0.5647358298301697, "rewards/margins": 8.309017181396484, "rewards/rejected": -8.873751640319824, "step": 57580 }, { "epoch": 0.69, "learning_rate": 1.3311149260265907e-06, "logits/chosen": -2.9105305671691895, "logits/rejected": -2.4140326976776123, "logps/chosen": -93.93799591064453, "logps/rejected": -968.5283203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.47789883613586426, "rewards/margins": 8.810443878173828, "rewards/rejected": -9.28834342956543, "step": 57590 }, { "epoch": 0.69, "learning_rate": 1.3301916203794401e-06, "logits/chosen": -2.8332433700561523, "logits/rejected": -2.099571704864502, "logps/chosen": -130.21852111816406, "logps/rejected": -1078.958251953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7176216840744019, "rewards/margins": 9.66019058227539, "rewards/rejected": -10.377812385559082, "step": 57600 }, { "epoch": 0.69, "learning_rate": 1.329268518977584e-06, "logits/chosen": -2.862473249435425, "logits/rejected": -2.282789707183838, "logps/chosen": -110.57843017578125, "logps/rejected": -965.0944213867188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6078516244888306, "rewards/margins": 8.646655082702637, "rewards/rejected": -9.254507064819336, "step": 57610 }, { "epoch": 0.69, "learning_rate": 1.3283456219821928e-06, "logits/chosen": -2.8944411277770996, "logits/rejected": -2.4579551219940186, "logps/chosen": -84.13965606689453, "logps/rejected": -844.2770385742188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4086706042289734, "rewards/margins": 7.658128261566162, "rewards/rejected": -8.06679916381836, "step": 57620 }, { "epoch": 0.69, "learning_rate": 1.3274229295544022e-06, "logits/chosen": -2.8599557876586914, "logits/rejected": -2.128408908843994, "logps/chosen": -118.7958984375, "logps/rejected": -967.93212890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6831554174423218, "rewards/margins": 8.593483924865723, "rewards/rejected": -9.276639938354492, "step": 57630 }, { "epoch": 0.69, "learning_rate": 1.326500441855311e-06, "logits/chosen": -2.8770217895507812, "logits/rejected": -2.6766114234924316, "logps/chosen": -91.31745910644531, "logps/rejected": -761.7418212890625, "loss": 0.1439, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5480372309684753, "rewards/margins": 6.706105709075928, "rewards/rejected": -7.254143714904785, "step": 57640 }, { "epoch": 0.69, "learning_rate": 1.3255781590459843e-06, "logits/chosen": -2.8259711265563965, "logits/rejected": -2.2972137928009033, "logps/chosen": -99.52157592773438, "logps/rejected": -807.4541625976562, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5118711590766907, "rewards/margins": 7.1685380935668945, "rewards/rejected": -7.680410861968994, "step": 57650 }, { "epoch": 0.69, "learning_rate": 1.3246560812874485e-06, "logits/chosen": -2.888678789138794, "logits/rejected": -2.3549914360046387, "logps/chosen": -96.1755142211914, "logps/rejected": -939.2843017578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.48962077498435974, "rewards/margins": 8.507532119750977, "rewards/rejected": -8.997152328491211, "step": 57660 }, { "epoch": 0.69, "learning_rate": 1.3237342087406964e-06, "logits/chosen": -2.9071218967437744, "logits/rejected": -2.2160773277282715, "logps/chosen": -127.6838607788086, "logps/rejected": -1112.2259521484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7297487258911133, "rewards/margins": 9.989845275878906, "rewards/rejected": -10.71959400177002, "step": 57670 }, { "epoch": 0.69, "learning_rate": 1.3228125415666844e-06, "logits/chosen": -2.835623025894165, "logits/rejected": -2.2515149116516113, "logps/chosen": -142.8987579345703, "logps/rejected": -860.2151489257812, "loss": 0.1441, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9206022024154663, "rewards/margins": 7.289095878601074, "rewards/rejected": -8.209698677062988, "step": 57680 }, { "epoch": 0.69, "learning_rate": 1.3218910799263334e-06, "logits/chosen": -2.88834285736084, "logits/rejected": -2.173366069793701, "logps/chosen": -115.3897476196289, "logps/rejected": -1108.41162109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6580541729927063, "rewards/margins": 10.026552200317383, "rewards/rejected": -10.68460750579834, "step": 57690 }, { "epoch": 0.69, "learning_rate": 1.3209698239805277e-06, "logits/chosen": -2.8531341552734375, "logits/rejected": -2.3722710609436035, "logps/chosen": -108.7125015258789, "logps/rejected": -899.4180908203125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6292811632156372, "rewards/margins": 7.9794745445251465, "rewards/rejected": -8.608755111694336, "step": 57700 }, { "epoch": 0.69, "learning_rate": 1.320048773890116e-06, "logits/chosen": -2.8451273441314697, "logits/rejected": -2.248213768005371, "logps/chosen": -104.96681213378906, "logps/rejected": -976.0079956054688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5867128372192383, "rewards/margins": 8.783504486083984, "rewards/rejected": -9.370218276977539, "step": 57710 }, { "epoch": 0.69, "learning_rate": 1.3191279298159123e-06, "logits/chosen": -2.869246244430542, "logits/rejected": -2.3686747550964355, "logps/chosen": -116.78849792480469, "logps/rejected": -878.5240478515625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.7146053314208984, "rewards/margins": 7.6738691329956055, "rewards/rejected": -8.388474464416504, "step": 57720 }, { "epoch": 0.69, "learning_rate": 1.3182072919186913e-06, "logits/chosen": -2.913012981414795, "logits/rejected": -2.5798916816711426, "logps/chosen": -101.15421295166016, "logps/rejected": -812.2425537109375, "loss": 0.1516, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6482540965080261, "rewards/margins": 7.095548152923584, "rewards/rejected": -7.743802070617676, "step": 57730 }, { "epoch": 0.69, "learning_rate": 1.3172868603591948e-06, "logits/chosen": -2.9162797927856445, "logits/rejected": -2.657080888748169, "logps/chosen": -80.80375671386719, "logps/rejected": -882.2010498046875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.38524824380874634, "rewards/margins": 8.05823802947998, "rewards/rejected": -8.443486213684082, "step": 57740 }, { "epoch": 0.69, "learning_rate": 1.3163666352981279e-06, "logits/chosen": -2.8228538036346436, "logits/rejected": -2.2162530422210693, "logps/chosen": -127.05912780761719, "logps/rejected": -919.2111206054688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.840123176574707, "rewards/margins": 7.959212303161621, "rewards/rejected": -8.799335479736328, "step": 57750 }, { "epoch": 0.69, "learning_rate": 1.3154466168961588e-06, "logits/chosen": -2.8969173431396484, "logits/rejected": -2.5154497623443604, "logps/chosen": -82.16395568847656, "logps/rejected": -801.6832885742188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.42391437292099, "rewards/margins": 7.212868690490723, "rewards/rejected": -7.636783599853516, "step": 57760 }, { "epoch": 0.69, "learning_rate": 1.3145268053139204e-06, "logits/chosen": -2.8788886070251465, "logits/rejected": -2.3495824337005615, "logps/chosen": -97.76564025878906, "logps/rejected": -953.9007568359375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5136346817016602, "rewards/margins": 8.646296501159668, "rewards/rejected": -9.159931182861328, "step": 57770 }, { "epoch": 0.69, "learning_rate": 1.3136072007120105e-06, "logits/chosen": -2.8899314403533936, "logits/rejected": -2.397125482559204, "logps/chosen": -104.35108947753906, "logps/rejected": -940.3824462890625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5907629132270813, "rewards/margins": 8.408636093139648, "rewards/rejected": -8.999399185180664, "step": 57780 }, { "epoch": 0.69, "learning_rate": 1.3126878032509871e-06, "logits/chosen": -2.9064626693725586, "logits/rejected": -2.5305137634277344, "logps/chosen": -80.04484558105469, "logps/rejected": -861.4193115234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.401663213968277, "rewards/margins": 7.842356204986572, "rewards/rejected": -8.2440185546875, "step": 57790 }, { "epoch": 0.69, "learning_rate": 1.3117686130913756e-06, "logits/chosen": -2.8089394569396973, "logits/rejected": -1.9950809478759766, "logps/chosen": -131.403564453125, "logps/rejected": -1057.8182373046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7775478959083557, "rewards/margins": 9.405828475952148, "rewards/rejected": -10.183374404907227, "step": 57800 }, { "epoch": 0.69, "learning_rate": 1.3108496303936641e-06, "logits/chosen": -2.8776984214782715, "logits/rejected": -2.2137436866760254, "logps/chosen": -130.4403839111328, "logps/rejected": -926.6435546875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7815989255905151, "rewards/margins": 8.091559410095215, "rewards/rejected": -8.87315845489502, "step": 57810 }, { "epoch": 0.69, "learning_rate": 1.309930855318305e-06, "logits/chosen": -2.9303975105285645, "logits/rejected": -2.366471529006958, "logps/chosen": -94.25001525878906, "logps/rejected": -872.2781372070312, "loss": 0.0951, "rewards/accuracies": 1.0, "rewards/chosen": -0.4361720681190491, "rewards/margins": 7.8949384689331055, "rewards/rejected": -8.331110000610352, "step": 57820 }, { "epoch": 0.69, "learning_rate": 1.3090122880257125e-06, "logits/chosen": -2.8745224475860596, "logits/rejected": -2.047616481781006, "logps/chosen": -148.46405029296875, "logps/rejected": -1047.307373046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.882781982421875, "rewards/margins": 9.178289413452148, "rewards/rejected": -10.061071395874023, "step": 57830 }, { "epoch": 0.69, "learning_rate": 1.308093928676267e-06, "logits/chosen": -2.869703769683838, "logits/rejected": -2.4837491512298584, "logps/chosen": -86.72132873535156, "logps/rejected": -868.6265869140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4301300644874573, "rewards/margins": 7.876364707946777, "rewards/rejected": -8.306495666503906, "step": 57840 }, { "epoch": 0.69, "learning_rate": 1.307175777430312e-06, "logits/chosen": -2.8664278984069824, "logits/rejected": -2.487971544265747, "logps/chosen": -79.86489868164062, "logps/rejected": -787.111572265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4326403737068176, "rewards/margins": 7.05254602432251, "rewards/rejected": -7.485185146331787, "step": 57850 }, { "epoch": 0.69, "learning_rate": 1.3062578344481525e-06, "logits/chosen": -2.8819711208343506, "logits/rejected": -2.267651081085205, "logps/chosen": -95.8463363647461, "logps/rejected": -909.88427734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4824061393737793, "rewards/margins": 8.229570388793945, "rewards/rejected": -8.711977005004883, "step": 57860 }, { "epoch": 0.69, "learning_rate": 1.3053400998900596e-06, "logits/chosen": -2.847010850906372, "logits/rejected": -2.366773843765259, "logps/chosen": -100.79295349121094, "logps/rejected": -976.97998046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.508611261844635, "rewards/margins": 8.87502384185791, "rewards/rejected": -9.383634567260742, "step": 57870 }, { "epoch": 0.69, "learning_rate": 1.3044225739162675e-06, "logits/chosen": -2.8580033779144287, "logits/rejected": -2.0852701663970947, "logps/chosen": -137.99859619140625, "logps/rejected": -1144.5736083984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8230335116386414, "rewards/margins": 10.205299377441406, "rewards/rejected": -11.02833366394043, "step": 57880 }, { "epoch": 0.69, "learning_rate": 1.303505256686973e-06, "logits/chosen": -2.9030206203460693, "logits/rejected": -2.5120694637298584, "logps/chosen": -100.47391510009766, "logps/rejected": -942.6236572265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5806490182876587, "rewards/margins": 8.467987060546875, "rewards/rejected": -9.048636436462402, "step": 57890 }, { "epoch": 0.69, "learning_rate": 1.3025881483623376e-06, "logits/chosen": -2.848512649536133, "logits/rejected": -2.133200168609619, "logps/chosen": -107.38398742675781, "logps/rejected": -994.9014892578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5838562846183777, "rewards/margins": 8.987153053283691, "rewards/rejected": -9.571009635925293, "step": 57900 }, { "epoch": 0.69, "learning_rate": 1.301671249102486e-06, "logits/chosen": -2.915915012359619, "logits/rejected": -2.3801822662353516, "logps/chosen": -122.5904541015625, "logps/rejected": -903.3439331054688, "loss": 0.0916, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7676082253456116, "rewards/margins": 7.876953125, "rewards/rejected": -8.644559860229492, "step": 57910 }, { "epoch": 0.69, "learning_rate": 1.300754559067507e-06, "logits/chosen": -2.8844075202941895, "logits/rejected": -2.3951354026794434, "logps/chosen": -95.72817993164062, "logps/rejected": -893.5958862304688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.48160022497177124, "rewards/margins": 8.079541206359863, "rewards/rejected": -8.561141967773438, "step": 57920 }, { "epoch": 0.69, "learning_rate": 1.2998380784174503e-06, "logits/chosen": -2.8865411281585693, "logits/rejected": -2.2539541721343994, "logps/chosen": -125.64302062988281, "logps/rejected": -989.6627197265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7521958947181702, "rewards/margins": 8.758273124694824, "rewards/rejected": -9.510469436645508, "step": 57930 }, { "epoch": 0.69, "learning_rate": 1.2989218073123317e-06, "logits/chosen": -2.8840832710266113, "logits/rejected": -2.6054751873016357, "logps/chosen": -112.13543701171875, "logps/rejected": -733.9948120117188, "loss": 0.1926, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7111382484436035, "rewards/margins": 6.272732734680176, "rewards/rejected": -6.983870029449463, "step": 57940 }, { "epoch": 0.69, "learning_rate": 1.2980057459121298e-06, "logits/chosen": -2.8614888191223145, "logits/rejected": -2.4189937114715576, "logps/chosen": -96.07504272460938, "logps/rejected": -888.3671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.48710542917251587, "rewards/margins": 8.013957023620605, "rewards/rejected": -8.501062393188477, "step": 57950 }, { "epoch": 0.69, "learning_rate": 1.2970898943767863e-06, "logits/chosen": -2.8797974586486816, "logits/rejected": -2.287212610244751, "logps/chosen": -106.03181457519531, "logps/rejected": -938.8944091796875, "loss": 0.1299, "rewards/accuracies": 1.0, "rewards/chosen": -0.5907630920410156, "rewards/margins": 8.407783508300781, "rewards/rejected": -8.99854564666748, "step": 57960 }, { "epoch": 0.69, "learning_rate": 1.2961742528662063e-06, "logits/chosen": -2.8965935707092285, "logits/rejected": -2.4291725158691406, "logps/chosen": -84.20454406738281, "logps/rejected": -855.7799072265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3906731903553009, "rewards/margins": 7.787799835205078, "rewards/rejected": -8.178472518920898, "step": 57970 }, { "epoch": 0.69, "learning_rate": 1.295258821540259e-06, "logits/chosen": -2.901169538497925, "logits/rejected": -2.3958518505096436, "logps/chosen": -109.17469787597656, "logps/rejected": -946.1759033203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6027652025222778, "rewards/margins": 8.467451095581055, "rewards/rejected": -9.070216178894043, "step": 57980 }, { "epoch": 0.69, "learning_rate": 1.2943436005587745e-06, "logits/chosen": -2.924083948135376, "logits/rejected": -2.369037389755249, "logps/chosen": -98.47418212890625, "logps/rejected": -799.2000732421875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.50485759973526, "rewards/margins": 7.106463432312012, "rewards/rejected": -7.611321449279785, "step": 57990 }, { "epoch": 0.69, "learning_rate": 1.293428590081549e-06, "logits/chosen": -2.900752544403076, "logits/rejected": -2.34181809425354, "logps/chosen": -98.48289489746094, "logps/rejected": -968.2088623046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5174503326416016, "rewards/margins": 8.78394603729248, "rewards/rejected": -9.301396369934082, "step": 58000 }, { "epoch": 0.69, "learning_rate": 1.2925137902683404e-06, "logits/chosen": -2.863818645477295, "logits/rejected": -2.228933811187744, "logps/chosen": -144.6077423095703, "logps/rejected": -998.0091552734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9534696340560913, "rewards/margins": 8.618135452270508, "rewards/rejected": -9.571606636047363, "step": 58010 }, { "epoch": 0.69, "learning_rate": 1.2915992012788697e-06, "logits/chosen": -2.841421604156494, "logits/rejected": -2.1100411415100098, "logps/chosen": -155.8921661376953, "logps/rejected": -949.4158325195312, "loss": 0.1245, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0085713863372803, "rewards/margins": 8.103732109069824, "rewards/rejected": -9.112302780151367, "step": 58020 }, { "epoch": 0.69, "learning_rate": 1.290684823272824e-06, "logits/chosen": -2.8953495025634766, "logits/rejected": -2.4488625526428223, "logps/chosen": -90.58404541015625, "logps/rejected": -911.5094604492188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4576447904109955, "rewards/margins": 8.287982940673828, "rewards/rejected": -8.745627403259277, "step": 58030 }, { "epoch": 0.69, "learning_rate": 1.2897706564098483e-06, "logits/chosen": -2.9160051345825195, "logits/rejected": -2.4839134216308594, "logps/chosen": -93.69895935058594, "logps/rejected": -869.0276489257812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.49821510910987854, "rewards/margins": 7.795844078063965, "rewards/rejected": -8.294058799743652, "step": 58040 }, { "epoch": 0.69, "learning_rate": 1.288856700849555e-06, "logits/chosen": -2.8616456985473633, "logits/rejected": -2.235151767730713, "logps/chosen": -103.98921966552734, "logps/rejected": -982.3015747070312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5487254858016968, "rewards/margins": 8.878750801086426, "rewards/rejected": -9.42747688293457, "step": 58050 }, { "epoch": 0.7, "learning_rate": 1.2879429567515179e-06, "logits/chosen": -2.8613340854644775, "logits/rejected": -2.2442102432250977, "logps/chosen": -123.78912353515625, "logps/rejected": -1043.53564453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7384639382362366, "rewards/margins": 9.289560317993164, "rewards/rejected": -10.028024673461914, "step": 58060 }, { "epoch": 0.7, "learning_rate": 1.287029424275274e-06, "logits/chosen": -2.8650546073913574, "logits/rejected": -2.3324458599090576, "logps/chosen": -94.16283416748047, "logps/rejected": -903.9022216796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5286356806755066, "rewards/margins": 8.123360633850098, "rewards/rejected": -8.651997566223145, "step": 58070 }, { "epoch": 0.7, "learning_rate": 1.2861161035803238e-06, "logits/chosen": -2.8939852714538574, "logits/rejected": -2.1785800457000732, "logps/chosen": -129.680419921875, "logps/rejected": -1057.6943359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7642873525619507, "rewards/margins": 9.410711288452148, "rewards/rejected": -10.174997329711914, "step": 58080 }, { "epoch": 0.7, "learning_rate": 1.2852029948261301e-06, "logits/chosen": -2.9069440364837646, "logits/rejected": -2.1112475395202637, "logps/chosen": -157.787841796875, "logps/rejected": -1042.763427734375, "loss": 0.0793, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9609524011611938, "rewards/margins": 9.060507774353027, "rewards/rejected": -10.02146053314209, "step": 58090 }, { "epoch": 0.7, "learning_rate": 1.2842900981721213e-06, "logits/chosen": -2.8934216499328613, "logits/rejected": -2.3933098316192627, "logps/chosen": -104.87237548828125, "logps/rejected": -817.3570556640625, "loss": 0.1751, "rewards/accuracies": 1.0, "rewards/chosen": -0.5903623700141907, "rewards/margins": 7.203503608703613, "rewards/rejected": -7.7938666343688965, "step": 58100 }, { "epoch": 0.7, "learning_rate": 1.2833774137776834e-06, "logits/chosen": -2.8606350421905518, "logits/rejected": -2.261791229248047, "logps/chosen": -106.60331726074219, "logps/rejected": -978.4814453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6250343918800354, "rewards/margins": 8.775947570800781, "rewards/rejected": -9.400982856750488, "step": 58110 }, { "epoch": 0.7, "learning_rate": 1.28246494180217e-06, "logits/chosen": -2.8322362899780273, "logits/rejected": -2.268223285675049, "logps/chosen": -101.596435546875, "logps/rejected": -860.6929931640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5040433406829834, "rewards/margins": 7.7084503173828125, "rewards/rejected": -8.212493896484375, "step": 58120 }, { "epoch": 0.7, "learning_rate": 1.2815526824048966e-06, "logits/chosen": -2.85347318649292, "logits/rejected": -2.514589786529541, "logps/chosen": -93.22264862060547, "logps/rejected": -881.4158325195312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5208898782730103, "rewards/margins": 7.912561893463135, "rewards/rejected": -8.433451652526855, "step": 58130 }, { "epoch": 0.7, "learning_rate": 1.2806406357451406e-06, "logits/chosen": -2.8736939430236816, "logits/rejected": -2.266648769378662, "logps/chosen": -126.0126724243164, "logps/rejected": -969.3362426757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7878928184509277, "rewards/margins": 8.495777130126953, "rewards/rejected": -9.283670425415039, "step": 58140 }, { "epoch": 0.7, "learning_rate": 1.279728801982143e-06, "logits/chosen": -2.893914222717285, "logits/rejected": -2.477341413497925, "logps/chosen": -99.69650268554688, "logps/rejected": -911.2774658203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.49712473154067993, "rewards/margins": 8.235420227050781, "rewards/rejected": -8.732544898986816, "step": 58150 }, { "epoch": 0.7, "learning_rate": 1.278817181275107e-06, "logits/chosen": -2.8756308555603027, "logits/rejected": -2.3798019886016846, "logps/chosen": -95.6004409790039, "logps/rejected": -908.3585815429688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5489050149917603, "rewards/margins": 8.143994331359863, "rewards/rejected": -8.692898750305176, "step": 58160 }, { "epoch": 0.7, "learning_rate": 1.277905773783201e-06, "logits/chosen": -2.8827197551727295, "logits/rejected": -2.4296364784240723, "logps/chosen": -104.4985122680664, "logps/rejected": -880.8172607421875, "loss": 0.0837, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6115430593490601, "rewards/margins": 7.802859306335449, "rewards/rejected": -8.414402961730957, "step": 58170 }, { "epoch": 0.7, "learning_rate": 1.2769945796655514e-06, "logits/chosen": -2.927020311355591, "logits/rejected": -2.384652614593506, "logps/chosen": -103.78526306152344, "logps/rejected": -960.1799926757812, "loss": 0.1508, "rewards/accuracies": 1.0, "rewards/chosen": -0.5736523270606995, "rewards/margins": 8.629278182983398, "rewards/rejected": -9.202930450439453, "step": 58180 }, { "epoch": 0.7, "learning_rate": 1.2760835990812515e-06, "logits/chosen": -2.894176959991455, "logits/rejected": -2.262133836746216, "logps/chosen": -89.97798156738281, "logps/rejected": -864.8199462890625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4555830955505371, "rewards/margins": 7.811121463775635, "rewards/rejected": -8.266703605651855, "step": 58190 }, { "epoch": 0.7, "learning_rate": 1.275172832189356e-06, "logits/chosen": -2.871920585632324, "logits/rejected": -2.3483986854553223, "logps/chosen": -93.07062530517578, "logps/rejected": -900.2647705078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.42870211601257324, "rewards/margins": 8.177257537841797, "rewards/rejected": -8.605958938598633, "step": 58200 }, { "epoch": 0.7, "learning_rate": 1.2742622791488818e-06, "logits/chosen": -2.86177659034729, "logits/rejected": -2.111602783203125, "logps/chosen": -153.27915954589844, "logps/rejected": -1108.920654296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9502173662185669, "rewards/margins": 9.721647262573242, "rewards/rejected": -10.671865463256836, "step": 58210 }, { "epoch": 0.7, "learning_rate": 1.2733519401188098e-06, "logits/chosen": -2.8873133659362793, "logits/rejected": -2.033989191055298, "logps/chosen": -129.48434448242188, "logps/rejected": -1055.834716796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7329238057136536, "rewards/margins": 9.417800903320312, "rewards/rejected": -10.150724411010742, "step": 58220 }, { "epoch": 0.7, "learning_rate": 1.2724418152580819e-06, "logits/chosen": -2.9065184593200684, "logits/rejected": -2.423736095428467, "logps/chosen": -95.81301879882812, "logps/rejected": -941.759765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4340962767601013, "rewards/margins": 8.593456268310547, "rewards/rejected": -9.027551651000977, "step": 58230 }, { "epoch": 0.7, "learning_rate": 1.2715319047256048e-06, "logits/chosen": -2.8611948490142822, "logits/rejected": -2.3228697776794434, "logps/chosen": -132.92840576171875, "logps/rejected": -911.97802734375, "loss": 0.1177, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7949098348617554, "rewards/margins": 7.925175666809082, "rewards/rejected": -8.720085144042969, "step": 58240 }, { "epoch": 0.7, "learning_rate": 1.2706222086802442e-06, "logits/chosen": -2.870004892349243, "logits/rejected": -2.4282827377319336, "logps/chosen": -97.7491683959961, "logps/rejected": -784.6929931640625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5687257647514343, "rewards/margins": 6.907505035400391, "rewards/rejected": -7.476231575012207, "step": 58250 }, { "epoch": 0.7, "learning_rate": 1.2697127272808317e-06, "logits/chosen": -2.8699686527252197, "logits/rejected": -2.0922234058380127, "logps/chosen": -133.73388671875, "logps/rejected": -1089.4354248046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7927530407905579, "rewards/margins": 9.692306518554688, "rewards/rejected": -10.48505973815918, "step": 58260 }, { "epoch": 0.7, "learning_rate": 1.2688034606861597e-06, "logits/chosen": -2.8724920749664307, "logits/rejected": -2.350620985031128, "logps/chosen": -115.44950103759766, "logps/rejected": -840.1267700195312, "loss": 0.0924, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7227073907852173, "rewards/margins": 7.3051018714904785, "rewards/rejected": -8.027810096740723, "step": 58270 }, { "epoch": 0.7, "learning_rate": 1.2678944090549838e-06, "logits/chosen": -2.9013402462005615, "logits/rejected": -2.428370475769043, "logps/chosen": -94.5494613647461, "logps/rejected": -898.4168701171875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.49860110878944397, "rewards/margins": 8.121828079223633, "rewards/rejected": -8.620429992675781, "step": 58280 }, { "epoch": 0.7, "learning_rate": 1.2669855725460225e-06, "logits/chosen": -2.892543315887451, "logits/rejected": -2.2900493144989014, "logps/chosen": -115.6088638305664, "logps/rejected": -971.77978515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6732378005981445, "rewards/margins": 8.650099754333496, "rewards/rejected": -9.323338508605957, "step": 58290 }, { "epoch": 0.7, "learning_rate": 1.2660769513179567e-06, "logits/chosen": -2.859013080596924, "logits/rejected": -2.2998125553131104, "logps/chosen": -95.32843780517578, "logps/rejected": -818.9610595703125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.49258264899253845, "rewards/margins": 7.298590660095215, "rewards/rejected": -7.791174411773682, "step": 58300 }, { "epoch": 0.7, "learning_rate": 1.2651685455294268e-06, "logits/chosen": -2.9002363681793213, "logits/rejected": -2.22613263130188, "logps/chosen": -111.364013671875, "logps/rejected": -1025.2943115234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5737305283546448, "rewards/margins": 9.285219192504883, "rewards/rejected": -9.8589506149292, "step": 58310 }, { "epoch": 0.7, "learning_rate": 1.2642603553390393e-06, "logits/chosen": -2.8556323051452637, "logits/rejected": -2.5010550022125244, "logps/chosen": -86.09443664550781, "logps/rejected": -842.5286254882812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.45731496810913086, "rewards/margins": 7.585640907287598, "rewards/rejected": -8.042956352233887, "step": 58320 }, { "epoch": 0.7, "learning_rate": 1.2633523809053619e-06, "logits/chosen": -2.959033489227295, "logits/rejected": -2.3882761001586914, "logps/chosen": -128.06497192382812, "logps/rejected": -993.607421875, "loss": 0.0901, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7790035009384155, "rewards/margins": 8.735660552978516, "rewards/rejected": -9.514663696289062, "step": 58330 }, { "epoch": 0.7, "learning_rate": 1.262444622386924e-06, "logits/chosen": -2.8792929649353027, "logits/rejected": -2.49601674079895, "logps/chosen": -130.29803466796875, "logps/rejected": -823.9290161132812, "loss": 0.156, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8688542246818542, "rewards/margins": 6.989912986755371, "rewards/rejected": -7.858767509460449, "step": 58340 }, { "epoch": 0.7, "learning_rate": 1.2615370799422177e-06, "logits/chosen": -2.878326892852783, "logits/rejected": -2.474543333053589, "logps/chosen": -81.3570327758789, "logps/rejected": -867.1463623046875, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -0.39154472947120667, "rewards/margins": 7.912413120269775, "rewards/rejected": -8.30395793914795, "step": 58350 }, { "epoch": 0.7, "learning_rate": 1.260629753729698e-06, "logits/chosen": -2.863434314727783, "logits/rejected": -2.518022060394287, "logps/chosen": -91.88594055175781, "logps/rejected": -816.018798828125, "loss": 0.1095, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.494535356760025, "rewards/margins": 7.292924404144287, "rewards/rejected": -7.7874603271484375, "step": 58360 }, { "epoch": 0.7, "learning_rate": 1.2597226439077814e-06, "logits/chosen": -2.8815808296203613, "logits/rejected": -2.332728862762451, "logps/chosen": -99.14370727539062, "logps/rejected": -858.7164306640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5160951018333435, "rewards/margins": 7.687840938568115, "rewards/rejected": -8.203936576843262, "step": 58370 }, { "epoch": 0.7, "learning_rate": 1.2588157506348462e-06, "logits/chosen": -2.894880771636963, "logits/rejected": -2.4313509464263916, "logps/chosen": -98.8905029296875, "logps/rejected": -861.03125, "loss": 0.0905, "rewards/accuracies": 1.0, "rewards/chosen": -0.48387956619262695, "rewards/margins": 7.736515045166016, "rewards/rejected": -8.2203950881958, "step": 58380 }, { "epoch": 0.7, "learning_rate": 1.2579090740692335e-06, "logits/chosen": -2.829847812652588, "logits/rejected": -2.3044631481170654, "logps/chosen": -121.68577575683594, "logps/rejected": -951.9720458984375, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.7529974579811096, "rewards/margins": 8.37175464630127, "rewards/rejected": -9.124752044677734, "step": 58390 }, { "epoch": 0.7, "learning_rate": 1.2570026143692466e-06, "logits/chosen": -2.8968191146850586, "logits/rejected": -2.419843912124634, "logps/chosen": -115.9864501953125, "logps/rejected": -913.8216552734375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7129417657852173, "rewards/margins": 8.031904220581055, "rewards/rejected": -8.744845390319824, "step": 58400 }, { "epoch": 0.7, "learning_rate": 1.2560963716931508e-06, "logits/chosen": -2.8742153644561768, "logits/rejected": -2.375295639038086, "logps/chosen": -105.06158447265625, "logps/rejected": -1016.4435424804688, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.5667147040367126, "rewards/margins": 9.202852249145508, "rewards/rejected": -9.769567489624023, "step": 58410 }, { "epoch": 0.7, "learning_rate": 1.2551903461991739e-06, "logits/chosen": -2.8666491508483887, "logits/rejected": -2.330066204071045, "logps/chosen": -109.80873107910156, "logps/rejected": -1056.4466552734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5656144022941589, "rewards/margins": 9.612198829650879, "rewards/rejected": -10.177813529968262, "step": 58420 }, { "epoch": 0.7, "learning_rate": 1.254284538045505e-06, "logits/chosen": -2.9182376861572266, "logits/rejected": -2.438096523284912, "logps/chosen": -106.75361633300781, "logps/rejected": -875.3970947265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5787208080291748, "rewards/margins": 7.769043922424316, "rewards/rejected": -8.34776496887207, "step": 58430 }, { "epoch": 0.7, "learning_rate": 1.253378947390296e-06, "logits/chosen": -2.8685905933380127, "logits/rejected": -2.2157864570617676, "logps/chosen": -108.79801940917969, "logps/rejected": -932.3014526367188, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.574606716632843, "rewards/margins": 8.360845565795898, "rewards/rejected": -8.935452461242676, "step": 58440 }, { "epoch": 0.7, "learning_rate": 1.2524735743916606e-06, "logits/chosen": -2.9284043312072754, "logits/rejected": -2.550920009613037, "logps/chosen": -71.14500427246094, "logps/rejected": -836.9713745117188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3059350848197937, "rewards/margins": 7.694287300109863, "rewards/rejected": -8.000223159790039, "step": 58450 }, { "epoch": 0.7, "learning_rate": 1.2515684192076744e-06, "logits/chosen": -2.895477771759033, "logits/rejected": -2.3000826835632324, "logps/chosen": -120.2408447265625, "logps/rejected": -828.6568603515625, "loss": 0.1148, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7152289152145386, "rewards/margins": 7.184656620025635, "rewards/rejected": -7.899885654449463, "step": 58460 }, { "epoch": 0.7, "learning_rate": 1.2506634819963745e-06, "logits/chosen": -2.9004714488983154, "logits/rejected": -2.305793046951294, "logps/chosen": -119.23011779785156, "logps/rejected": -943.77734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6337884068489075, "rewards/margins": 8.40372371673584, "rewards/rejected": -9.037511825561523, "step": 58470 }, { "epoch": 0.7, "learning_rate": 1.249758762915761e-06, "logits/chosen": -2.884636640548706, "logits/rejected": -2.3794567584991455, "logps/chosen": -102.26435852050781, "logps/rejected": -938.61962890625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5528560876846313, "rewards/margins": 8.443700790405273, "rewards/rejected": -8.996557235717773, "step": 58480 }, { "epoch": 0.7, "learning_rate": 1.248854262123796e-06, "logits/chosen": -2.8882927894592285, "logits/rejected": -2.3423309326171875, "logps/chosen": -89.06914520263672, "logps/rejected": -848.2501220703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4469950795173645, "rewards/margins": 7.66116189956665, "rewards/rejected": -8.10815715789795, "step": 58490 }, { "epoch": 0.7, "learning_rate": 1.247949979778401e-06, "logits/chosen": -2.8861117362976074, "logits/rejected": -1.9443029165267944, "logps/chosen": -145.01998901367188, "logps/rejected": -1113.049072265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8692439794540405, "rewards/margins": 9.843506813049316, "rewards/rejected": -10.712750434875488, "step": 58500 }, { "epoch": 0.7, "learning_rate": 1.2470459160374617e-06, "logits/chosen": -2.85956072807312, "logits/rejected": -2.1755738258361816, "logps/chosen": -112.28131103515625, "logps/rejected": -951.8267822265625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6586863398551941, "rewards/margins": 8.471480369567871, "rewards/rejected": -9.130167007446289, "step": 58510 }, { "epoch": 0.7, "learning_rate": 1.2461420710588259e-06, "logits/chosen": -2.9233810901641846, "logits/rejected": -2.636963129043579, "logps/chosen": -71.1103744506836, "logps/rejected": -795.5177001953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.3226304054260254, "rewards/margins": 7.266887664794922, "rewards/rejected": -7.589518070220947, "step": 58520 }, { "epoch": 0.7, "learning_rate": 1.2452384450003021e-06, "logits/chosen": -2.8499884605407715, "logits/rejected": -2.415799617767334, "logps/chosen": -82.07974243164062, "logps/rejected": -893.1436767578125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4147516191005707, "rewards/margins": 8.127676963806152, "rewards/rejected": -8.542428970336914, "step": 58530 }, { "epoch": 0.7, "learning_rate": 1.2443350380196608e-06, "logits/chosen": -2.9195308685302734, "logits/rejected": -2.4148199558258057, "logps/chosen": -98.668212890625, "logps/rejected": -875.427734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.46960005164146423, "rewards/margins": 7.903384208679199, "rewards/rejected": -8.372983932495117, "step": 58540 }, { "epoch": 0.7, "learning_rate": 1.2434318502746353e-06, "logits/chosen": -2.8455119132995605, "logits/rejected": -2.2721896171569824, "logps/chosen": -110.6100845336914, "logps/rejected": -960.0484619140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.598879337310791, "rewards/margins": 8.615673065185547, "rewards/rejected": -9.21455192565918, "step": 58550 }, { "epoch": 0.7, "learning_rate": 1.2425288819229178e-06, "logits/chosen": -2.868516445159912, "logits/rejected": -2.408494472503662, "logps/chosen": -142.54684448242188, "logps/rejected": -867.7491455078125, "loss": 0.1387, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9483591914176941, "rewards/margins": 7.351977348327637, "rewards/rejected": -8.300336837768555, "step": 58560 }, { "epoch": 0.7, "learning_rate": 1.2416261331221652e-06, "logits/chosen": -2.860250234603882, "logits/rejected": -2.3425536155700684, "logps/chosen": -134.78077697753906, "logps/rejected": -857.1993408203125, "loss": 0.1436, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9063876867294312, "rewards/margins": 7.28229284286499, "rewards/rejected": -8.188680648803711, "step": 58570 }, { "epoch": 0.7, "learning_rate": 1.2407236040299946e-06, "logits/chosen": -2.899898052215576, "logits/rejected": -2.5446667671203613, "logps/chosen": -106.16419982910156, "logps/rejected": -805.8895263671875, "loss": 0.1582, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6453549861907959, "rewards/margins": 7.046046257019043, "rewards/rejected": -7.691402435302734, "step": 58580 }, { "epoch": 0.7, "learning_rate": 1.2398212948039854e-06, "logits/chosen": -2.885359525680542, "logits/rejected": -2.3235702514648438, "logps/chosen": -96.74003601074219, "logps/rejected": -979.3157348632812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4552813172340393, "rewards/margins": 8.943319320678711, "rewards/rejected": -9.398599624633789, "step": 58590 }, { "epoch": 0.7, "learning_rate": 1.238919205601678e-06, "logits/chosen": -2.9361166954040527, "logits/rejected": -2.4442687034606934, "logps/chosen": -114.27359771728516, "logps/rejected": -867.3722534179688, "loss": 0.0899, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7129647135734558, "rewards/margins": 7.582510471343994, "rewards/rejected": -8.295475006103516, "step": 58600 }, { "epoch": 0.7, "learning_rate": 1.2380173365805748e-06, "logits/chosen": -2.889253616333008, "logits/rejected": -2.4826550483703613, "logps/chosen": -83.65121459960938, "logps/rejected": -843.74365234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.35070905089378357, "rewards/margins": 7.695242881774902, "rewards/rejected": -8.045951843261719, "step": 58610 }, { "epoch": 0.7, "learning_rate": 1.2371156878981406e-06, "logits/chosen": -2.8737435340881348, "logits/rejected": -2.144801378250122, "logps/chosen": -131.50967407226562, "logps/rejected": -1080.0289306640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7932932376861572, "rewards/margins": 9.593766212463379, "rewards/rejected": -10.387059211730957, "step": 58620 }, { "epoch": 0.7, "learning_rate": 1.2362142597117987e-06, "logits/chosen": -2.8719284534454346, "logits/rejected": -2.490814685821533, "logps/chosen": -90.36408996582031, "logps/rejected": -841.6666259765625, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.48114824295043945, "rewards/margins": 7.5407538414001465, "rewards/rejected": -8.021902084350586, "step": 58630 }, { "epoch": 0.7, "learning_rate": 1.2353130521789372e-06, "logits/chosen": -2.906489372253418, "logits/rejected": -2.2274866104125977, "logps/chosen": -131.927734375, "logps/rejected": -1013.1302490234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.807742714881897, "rewards/margins": 8.925101280212402, "rewards/rejected": -9.732843399047852, "step": 58640 }, { "epoch": 0.7, "learning_rate": 1.2344120654569044e-06, "logits/chosen": -2.8467063903808594, "logits/rejected": -2.1931519508361816, "logps/chosen": -109.87428283691406, "logps/rejected": -1018.0701904296875, "loss": 0.1193, "rewards/accuracies": 1.0, "rewards/chosen": -0.572517991065979, "rewards/margins": 9.2215576171875, "rewards/rejected": -9.794075965881348, "step": 58650 }, { "epoch": 0.7, "learning_rate": 1.23351129970301e-06, "logits/chosen": -2.924499034881592, "logits/rejected": -2.4582455158233643, "logps/chosen": -98.97328186035156, "logps/rejected": -860.9591674804688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5356062054634094, "rewards/margins": 7.691354274749756, "rewards/rejected": -8.226959228515625, "step": 58660 }, { "epoch": 0.7, "learning_rate": 1.2326107550745247e-06, "logits/chosen": -2.860903739929199, "logits/rejected": -2.440609931945801, "logps/chosen": -94.92345428466797, "logps/rejected": -826.5328369140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5344827175140381, "rewards/margins": 7.356342315673828, "rewards/rejected": -7.8908257484436035, "step": 58670 }, { "epoch": 0.7, "learning_rate": 1.2317104317286817e-06, "logits/chosen": -2.890316963195801, "logits/rejected": -2.352181911468506, "logps/chosen": -98.8382339477539, "logps/rejected": -887.7752075195312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5054678320884705, "rewards/margins": 7.9995832443237305, "rewards/rejected": -8.505050659179688, "step": 58680 }, { "epoch": 0.7, "learning_rate": 1.230810329822676e-06, "logits/chosen": -2.924189329147339, "logits/rejected": -2.4975860118865967, "logps/chosen": -90.51187133789062, "logps/rejected": -872.3807373046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.44436606764793396, "rewards/margins": 7.884954929351807, "rewards/rejected": -8.329320907592773, "step": 58690 }, { "epoch": 0.7, "learning_rate": 1.2299104495136604e-06, "logits/chosen": -2.9033989906311035, "logits/rejected": -2.3451597690582275, "logps/chosen": -113.5503158569336, "logps/rejected": -1043.03662109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.663460910320282, "rewards/margins": 9.367855072021484, "rewards/rejected": -10.031316757202148, "step": 58700 }, { "epoch": 0.7, "learning_rate": 1.2290107909587532e-06, "logits/chosen": -2.863856792449951, "logits/rejected": -2.4573066234588623, "logps/chosen": -84.76293182373047, "logps/rejected": -858.9646606445312, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.4384238123893738, "rewards/margins": 7.776919364929199, "rewards/rejected": -8.215343475341797, "step": 58710 }, { "epoch": 0.7, "learning_rate": 1.2281113543150317e-06, "logits/chosen": -2.8797712326049805, "logits/rejected": -2.3605222702026367, "logps/chosen": -105.00460052490234, "logps/rejected": -895.9671020507812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5850900411605835, "rewards/margins": 7.975823879241943, "rewards/rejected": -8.560914039611816, "step": 58720 }, { "epoch": 0.7, "learning_rate": 1.2272121397395351e-06, "logits/chosen": -2.901829957962036, "logits/rejected": -2.413623094558716, "logps/chosen": -100.38240051269531, "logps/rejected": -930.9865112304688, "loss": 0.1261, "rewards/accuracies": 1.0, "rewards/chosen": -0.5527201294898987, "rewards/margins": 8.377530097961426, "rewards/rejected": -8.93025016784668, "step": 58730 }, { "epoch": 0.7, "learning_rate": 1.226313147389264e-06, "logits/chosen": -2.8645222187042236, "logits/rejected": -2.239252805709839, "logps/chosen": -105.71061706542969, "logps/rejected": -905.5848388671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5561341047286987, "rewards/margins": 8.100793838500977, "rewards/rejected": -8.656927108764648, "step": 58740 }, { "epoch": 0.7, "learning_rate": 1.2254143774211807e-06, "logits/chosen": -2.8764162063598633, "logits/rejected": -2.4686267375946045, "logps/chosen": -99.98106384277344, "logps/rejected": -854.5462036132812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.571010947227478, "rewards/margins": 7.578006744384766, "rewards/rejected": -8.149017333984375, "step": 58750 }, { "epoch": 0.7, "learning_rate": 1.2245158299922062e-06, "logits/chosen": -2.9076409339904785, "logits/rejected": -2.185997486114502, "logps/chosen": -127.7671127319336, "logps/rejected": -1106.25244140625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7087122797966003, "rewards/margins": 9.944512367248535, "rewards/rejected": -10.653223991394043, "step": 58760 }, { "epoch": 0.7, "learning_rate": 1.2236175052592252e-06, "logits/chosen": -2.8683276176452637, "logits/rejected": -2.344372510910034, "logps/chosen": -104.45588684082031, "logps/rejected": -960.7261962890625, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -0.607568621635437, "rewards/margins": 8.606161117553711, "rewards/rejected": -9.213728904724121, "step": 58770 }, { "epoch": 0.7, "learning_rate": 1.222719403379083e-06, "logits/chosen": -2.944627046585083, "logits/rejected": -2.4293665885925293, "logps/chosen": -88.84310150146484, "logps/rejected": -962.3330078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4425268769264221, "rewards/margins": 8.793170928955078, "rewards/rejected": -9.235697746276855, "step": 58780 }, { "epoch": 0.7, "learning_rate": 1.221821524508585e-06, "logits/chosen": -2.8539505004882812, "logits/rejected": -2.280848979949951, "logps/chosen": -127.91618347167969, "logps/rejected": -859.1114501953125, "loss": 0.1685, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8061131238937378, "rewards/margins": 7.400147438049316, "rewards/rejected": -8.206259727478027, "step": 58790 }, { "epoch": 0.7, "learning_rate": 1.2209238688044994e-06, "logits/chosen": -2.868342638015747, "logits/rejected": -2.275052309036255, "logps/chosen": -126.59996032714844, "logps/rejected": -910.4781494140625, "loss": 0.1131, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7861193418502808, "rewards/margins": 7.92844295501709, "rewards/rejected": -8.714563369750977, "step": 58800 }, { "epoch": 0.7, "learning_rate": 1.2200264364235537e-06, "logits/chosen": -2.8804173469543457, "logits/rejected": -2.20058012008667, "logps/chosen": -102.80433654785156, "logps/rejected": -914.3001708984375, "loss": 0.073, "rewards/accuracies": 1.0, "rewards/chosen": -0.5267673134803772, "rewards/margins": 8.230941772460938, "rewards/rejected": -8.757708549499512, "step": 58810 }, { "epoch": 0.7, "learning_rate": 1.2191292275224382e-06, "logits/chosen": -2.877681255340576, "logits/rejected": -2.488539218902588, "logps/chosen": -84.25794982910156, "logps/rejected": -779.9981689453125, "loss": 0.0289, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.43147534132003784, "rewards/margins": 6.9957475662231445, "rewards/rejected": -7.427223205566406, "step": 58820 }, { "epoch": 0.7, "learning_rate": 1.2182322422578007e-06, "logits/chosen": -2.867581367492676, "logits/rejected": -2.5015323162078857, "logps/chosen": -101.1820068359375, "logps/rejected": -813.3846435546875, "loss": 0.1678, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6298428773880005, "rewards/margins": 7.133332252502441, "rewards/rejected": -7.763175010681152, "step": 58830 }, { "epoch": 0.7, "learning_rate": 1.2173354807862542e-06, "logits/chosen": -2.8824477195739746, "logits/rejected": -2.3259947299957275, "logps/chosen": -109.67918395996094, "logps/rejected": -1038.064697265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5929190516471863, "rewards/margins": 9.387264251708984, "rewards/rejected": -9.980183601379395, "step": 58840 }, { "epoch": 0.7, "learning_rate": 1.2164389432643702e-06, "logits/chosen": -2.9193296432495117, "logits/rejected": -2.431847095489502, "logps/chosen": -83.92631530761719, "logps/rejected": -813.3570556640625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.39899900555610657, "rewards/margins": 7.35296106338501, "rewards/rejected": -7.751959800720215, "step": 58850 }, { "epoch": 0.7, "learning_rate": 1.2155426298486808e-06, "logits/chosen": -2.8415699005126953, "logits/rejected": -2.4479775428771973, "logps/chosen": -109.89598083496094, "logps/rejected": -849.0050048828125, "loss": 0.1362, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6974718570709229, "rewards/margins": 7.399636268615723, "rewards/rejected": -8.097108840942383, "step": 58860 }, { "epoch": 0.7, "learning_rate": 1.2146465406956826e-06, "logits/chosen": -2.9110701084136963, "logits/rejected": -2.5085439682006836, "logps/chosen": -94.68224334716797, "logps/rejected": -903.2990112304688, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": -0.5026080012321472, "rewards/margins": 8.148630142211914, "rewards/rejected": -8.651237487792969, "step": 58870 }, { "epoch": 0.7, "learning_rate": 1.2137506759618276e-06, "logits/chosen": -2.884706735610962, "logits/rejected": -2.585151195526123, "logps/chosen": -64.84089660644531, "logps/rejected": -783.9591674804688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.27919822931289673, "rewards/margins": 7.192706108093262, "rewards/rejected": -7.4719038009643555, "step": 58880 }, { "epoch": 0.7, "learning_rate": 1.2128550358035324e-06, "logits/chosen": -2.8308048248291016, "logits/rejected": -2.3880019187927246, "logps/chosen": -87.86541748046875, "logps/rejected": -877.7955932617188, "loss": 0.0981, "rewards/accuracies": 1.0, "rewards/chosen": -0.43049126863479614, "rewards/margins": 7.964640140533447, "rewards/rejected": -8.39513111114502, "step": 58890 }, { "epoch": 0.71, "learning_rate": 1.2119596203771728e-06, "logits/chosen": -2.8865177631378174, "logits/rejected": -2.459721088409424, "logps/chosen": -89.25492858886719, "logps/rejected": -907.4468994140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4827001094818115, "rewards/margins": 8.213897705078125, "rewards/rejected": -8.696598052978516, "step": 58900 }, { "epoch": 0.71, "learning_rate": 1.211064429839086e-06, "logits/chosen": -2.882127523422241, "logits/rejected": -2.437675952911377, "logps/chosen": -109.87916564941406, "logps/rejected": -967.2275390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6592572927474976, "rewards/margins": 8.618913650512695, "rewards/rejected": -9.278170585632324, "step": 58910 }, { "epoch": 0.71, "learning_rate": 1.2101694643455702e-06, "logits/chosen": -2.892764091491699, "logits/rejected": -2.506373882293701, "logps/chosen": -86.73897552490234, "logps/rejected": -864.8310546875, "loss": 0.1262, "rewards/accuracies": 1.0, "rewards/chosen": -0.4491722583770752, "rewards/margins": 7.825279235839844, "rewards/rejected": -8.274452209472656, "step": 58920 }, { "epoch": 0.71, "learning_rate": 1.2092747240528832e-06, "logits/chosen": -2.850555658340454, "logits/rejected": -2.168858528137207, "logps/chosen": -132.00648498535156, "logps/rejected": -1022.6730346679688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7816377282142639, "rewards/margins": 9.044356346130371, "rewards/rejected": -9.825993537902832, "step": 58930 }, { "epoch": 0.71, "learning_rate": 1.208380209117246e-06, "logits/chosen": -2.9124276638031006, "logits/rejected": -2.3538331985473633, "logps/chosen": -101.32262420654297, "logps/rejected": -958.138671875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5369870066642761, "rewards/margins": 8.639087677001953, "rewards/rejected": -9.176074981689453, "step": 58940 }, { "epoch": 0.71, "learning_rate": 1.2074859196948355e-06, "logits/chosen": -2.9032440185546875, "logits/rejected": -2.170219898223877, "logps/chosen": -117.95533752441406, "logps/rejected": -988.1681518554688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6786197423934937, "rewards/margins": 8.80816650390625, "rewards/rejected": -9.486785888671875, "step": 58950 }, { "epoch": 0.71, "learning_rate": 1.206591855941794e-06, "logits/chosen": -2.888798952102661, "logits/rejected": -2.4517390727996826, "logps/chosen": -94.98320007324219, "logps/rejected": -885.4191284179688, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.4883374571800232, "rewards/margins": 7.975266456604004, "rewards/rejected": -8.463602066040039, "step": 58960 }, { "epoch": 0.71, "learning_rate": 1.2056980180142219e-06, "logits/chosen": -2.9008054733276367, "logits/rejected": -2.081251859664917, "logps/chosen": -124.57066345214844, "logps/rejected": -1036.86328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7152976393699646, "rewards/margins": 9.263824462890625, "rewards/rejected": -9.979121208190918, "step": 58970 }, { "epoch": 0.71, "learning_rate": 1.2048044060681813e-06, "logits/chosen": -2.86326003074646, "logits/rejected": -2.381317138671875, "logps/chosen": -95.04016876220703, "logps/rejected": -816.6509399414062, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.521705687046051, "rewards/margins": 7.26861572265625, "rewards/rejected": -7.790322303771973, "step": 58980 }, { "epoch": 0.71, "learning_rate": 1.203911020259694e-06, "logits/chosen": -2.8698248863220215, "logits/rejected": -2.4545254707336426, "logps/chosen": -88.2167739868164, "logps/rejected": -908.9322509765625, "loss": 0.0893, "rewards/accuracies": 1.0, "rewards/chosen": -0.4548402428627014, "rewards/margins": 8.24610424041748, "rewards/rejected": -8.700944900512695, "step": 58990 }, { "epoch": 0.71, "learning_rate": 1.2030178607447429e-06, "logits/chosen": -2.8348166942596436, "logits/rejected": -2.191056966781616, "logps/chosen": -104.26707458496094, "logps/rejected": -954.5330200195312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5235894918441772, "rewards/margins": 8.629595756530762, "rewards/rejected": -9.15318489074707, "step": 59000 }, { "epoch": 0.71, "learning_rate": 1.2021249276792721e-06, "logits/chosen": -2.878434896469116, "logits/rejected": -2.3506393432617188, "logps/chosen": -132.8891143798828, "logps/rejected": -876.84521484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8812629580497742, "rewards/margins": 7.49744176864624, "rewards/rejected": -8.378704071044922, "step": 59010 }, { "epoch": 0.71, "learning_rate": 1.2012322212191832e-06, "logits/chosen": -2.889047622680664, "logits/rejected": -2.489806890487671, "logps/chosen": -84.2138900756836, "logps/rejected": -752.0117797851562, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.40230292081832886, "rewards/margins": 6.736181735992432, "rewards/rejected": -7.138484954833984, "step": 59020 }, { "epoch": 0.71, "learning_rate": 1.200339741520341e-06, "logits/chosen": -2.8757569789886475, "logits/rejected": -2.4851131439208984, "logps/chosen": -92.28450012207031, "logps/rejected": -862.80078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.46482428908348083, "rewards/margins": 7.777263641357422, "rewards/rejected": -8.242086410522461, "step": 59030 }, { "epoch": 0.71, "learning_rate": 1.1994474887385706e-06, "logits/chosen": -2.8546533584594727, "logits/rejected": -2.1512715816497803, "logps/chosen": -117.32756042480469, "logps/rejected": -1095.3594970703125, "loss": 0.1097, "rewards/accuracies": 1.0, "rewards/chosen": -0.6758050322532654, "rewards/margins": 9.88223934173584, "rewards/rejected": -10.558043479919434, "step": 59040 }, { "epoch": 0.71, "learning_rate": 1.1985554630296567e-06, "logits/chosen": -2.9109132289886475, "logits/rejected": -2.4108378887176514, "logps/chosen": -110.6309814453125, "logps/rejected": -959.5555419921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5884970426559448, "rewards/margins": 8.616092681884766, "rewards/rejected": -9.204587936401367, "step": 59050 }, { "epoch": 0.71, "learning_rate": 1.1976636645493442e-06, "logits/chosen": -2.8318679332733154, "logits/rejected": -2.288698434829712, "logps/chosen": -98.34033203125, "logps/rejected": -914.9483642578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5228531956672668, "rewards/margins": 8.23669719696045, "rewards/rejected": -8.759550094604492, "step": 59060 }, { "epoch": 0.71, "learning_rate": 1.1967720934533397e-06, "logits/chosen": -2.875066041946411, "logits/rejected": -2.3291850090026855, "logps/chosen": -99.6823501586914, "logps/rejected": -901.9415283203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5589267015457153, "rewards/margins": 8.077957153320312, "rewards/rejected": -8.636883735656738, "step": 59070 }, { "epoch": 0.71, "learning_rate": 1.1958807498973071e-06, "logits/chosen": -2.843482255935669, "logits/rejected": -2.365562915802002, "logps/chosen": -115.21760559082031, "logps/rejected": -905.1357421875, "loss": 0.0836, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.693793773651123, "rewards/margins": 7.9662346839904785, "rewards/rejected": -8.660027503967285, "step": 59080 }, { "epoch": 0.71, "learning_rate": 1.1949896340368735e-06, "logits/chosen": -2.903043746948242, "logits/rejected": -2.3785886764526367, "logps/chosen": -96.2235336303711, "logps/rejected": -867.6912841796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4942890703678131, "rewards/margins": 7.797403812408447, "rewards/rejected": -8.291692733764648, "step": 59090 }, { "epoch": 0.71, "learning_rate": 1.1940987460276254e-06, "logits/chosen": -2.8839292526245117, "logits/rejected": -2.3374600410461426, "logps/chosen": -112.41011047363281, "logps/rejected": -894.1974487304688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6509350538253784, "rewards/margins": 7.91452169418335, "rewards/rejected": -8.565458297729492, "step": 59100 }, { "epoch": 0.71, "learning_rate": 1.1932080860251094e-06, "logits/chosen": -2.8941643238067627, "logits/rejected": -2.489042043685913, "logps/chosen": -104.8271255493164, "logps/rejected": -835.0289916992188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5882381796836853, "rewards/margins": 7.371385097503662, "rewards/rejected": -7.959624290466309, "step": 59110 }, { "epoch": 0.71, "learning_rate": 1.192317654184832e-06, "logits/chosen": -2.8810014724731445, "logits/rejected": -2.551724433898926, "logps/chosen": -96.15133666992188, "logps/rejected": -830.7453002929688, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.5576698184013367, "rewards/margins": 7.376023292541504, "rewards/rejected": -7.933692932128906, "step": 59120 }, { "epoch": 0.71, "learning_rate": 1.1914274506622603e-06, "logits/chosen": -2.8749053478240967, "logits/rejected": -2.314392566680908, "logps/chosen": -141.90347290039062, "logps/rejected": -844.1817626953125, "loss": 0.2107, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9429196119308472, "rewards/margins": 7.12286901473999, "rewards/rejected": -8.065789222717285, "step": 59130 }, { "epoch": 0.71, "learning_rate": 1.1905374756128222e-06, "logits/chosen": -2.8867461681365967, "logits/rejected": -2.234492778778076, "logps/chosen": -107.2706527709961, "logps/rejected": -901.0477294921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5402878522872925, "rewards/margins": 8.089341163635254, "rewards/rejected": -8.62962818145752, "step": 59140 }, { "epoch": 0.71, "learning_rate": 1.1896477291919028e-06, "logits/chosen": -2.9180169105529785, "logits/rejected": -2.4111812114715576, "logps/chosen": -111.53334045410156, "logps/rejected": -962.0260620117188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.667961597442627, "rewards/margins": 8.538559913635254, "rewards/rejected": -9.206521034240723, "step": 59150 }, { "epoch": 0.71, "learning_rate": 1.1887582115548507e-06, "logits/chosen": -2.8295419216156006, "logits/rejected": -2.369915008544922, "logps/chosen": -86.34344482421875, "logps/rejected": -915.7960205078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.40939274430274963, "rewards/margins": 8.361546516418457, "rewards/rejected": -8.770939826965332, "step": 59160 }, { "epoch": 0.71, "learning_rate": 1.1878689228569731e-06, "logits/chosen": -2.920563220977783, "logits/rejected": -2.314185619354248, "logps/chosen": -103.32890319824219, "logps/rejected": -960.4232177734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.566274106502533, "rewards/margins": 8.658071517944336, "rewards/rejected": -9.224344253540039, "step": 59170 }, { "epoch": 0.71, "learning_rate": 1.1869798632535372e-06, "logits/chosen": -2.891350269317627, "logits/rejected": -2.1916921138763428, "logps/chosen": -117.18556213378906, "logps/rejected": -1018.21728515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6830753087997437, "rewards/margins": 9.101061820983887, "rewards/rejected": -9.784137725830078, "step": 59180 }, { "epoch": 0.71, "learning_rate": 1.1860910328997707e-06, "logits/chosen": -2.9011483192443848, "logits/rejected": -2.489542245864868, "logps/chosen": -71.75542449951172, "logps/rejected": -810.2608032226562, "loss": 0.1864, "rewards/accuracies": 1.0, "rewards/chosen": -0.3416082561016083, "rewards/margins": 7.386724948883057, "rewards/rejected": -7.728334903717041, "step": 59190 }, { "epoch": 0.71, "learning_rate": 1.1852024319508605e-06, "logits/chosen": -2.9239954948425293, "logits/rejected": -2.2151520252227783, "logps/chosen": -108.12565612792969, "logps/rejected": -1023.0960693359375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.5779438018798828, "rewards/margins": 9.260688781738281, "rewards/rejected": -9.83863353729248, "step": 59200 }, { "epoch": 0.71, "learning_rate": 1.184314060561955e-06, "logits/chosen": -2.9072890281677246, "logits/rejected": -2.5757572650909424, "logps/chosen": -71.15635681152344, "logps/rejected": -882.9871215820312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3120238184928894, "rewards/margins": 8.146568298339844, "rewards/rejected": -8.458590507507324, "step": 59210 }, { "epoch": 0.71, "learning_rate": 1.1834259188881591e-06, "logits/chosen": -2.9329707622528076, "logits/rejected": -2.3054111003875732, "logps/chosen": -115.32283782958984, "logps/rejected": -1044.1444091796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6459553241729736, "rewards/margins": 9.379255294799805, "rewards/rejected": -10.025209426879883, "step": 59220 }, { "epoch": 0.71, "learning_rate": 1.1825380070845413e-06, "logits/chosen": -2.860607147216797, "logits/rejected": -2.1716110706329346, "logps/chosen": -149.2106475830078, "logps/rejected": -914.7000732421875, "loss": 0.0944, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.977557361125946, "rewards/margins": 7.775943756103516, "rewards/rejected": -8.753499984741211, "step": 59230 }, { "epoch": 0.71, "learning_rate": 1.1816503253061285e-06, "logits/chosen": -2.883284330368042, "logits/rejected": -2.2570929527282715, "logps/chosen": -104.73579406738281, "logps/rejected": -900.2692260742188, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5561922788619995, "rewards/margins": 8.071268081665039, "rewards/rejected": -8.627459526062012, "step": 59240 }, { "epoch": 0.71, "learning_rate": 1.1807628737079072e-06, "logits/chosen": -2.8787424564361572, "logits/rejected": -2.505706310272217, "logps/chosen": -89.44705200195312, "logps/rejected": -878.076171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.466095507144928, "rewards/margins": 7.905072212219238, "rewards/rejected": -8.371167182922363, "step": 59250 }, { "epoch": 0.71, "learning_rate": 1.1798756524448241e-06, "logits/chosen": -2.87262225151062, "logits/rejected": -2.4846129417419434, "logps/chosen": -82.89930725097656, "logps/rejected": -767.380126953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.41517090797424316, "rewards/margins": 6.896792411804199, "rewards/rejected": -7.311963081359863, "step": 59260 }, { "epoch": 0.71, "learning_rate": 1.1789886616717866e-06, "logits/chosen": -2.8799595832824707, "logits/rejected": -2.3609299659729004, "logps/chosen": -79.74051666259766, "logps/rejected": -831.0894775390625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3810303807258606, "rewards/margins": 7.5479936599731445, "rewards/rejected": -7.929023742675781, "step": 59270 }, { "epoch": 0.71, "learning_rate": 1.178101901543658e-06, "logits/chosen": -2.8990659713745117, "logits/rejected": -2.226008892059326, "logps/chosen": -139.0029296875, "logps/rejected": -1040.2784423828125, "loss": 0.1068, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8214312791824341, "rewards/margins": 9.182790756225586, "rewards/rejected": -10.004220962524414, "step": 59280 }, { "epoch": 0.71, "learning_rate": 1.1772153722152666e-06, "logits/chosen": -2.8940443992614746, "logits/rejected": -2.2643866539001465, "logps/chosen": -109.05330657958984, "logps/rejected": -994.1220703125, "loss": 0.0214, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5947697162628174, "rewards/margins": 8.956367492675781, "rewards/rejected": -9.55113697052002, "step": 59290 }, { "epoch": 0.71, "learning_rate": 1.1763290738413976e-06, "logits/chosen": -2.8989806175231934, "logits/rejected": -2.4977920055389404, "logps/chosen": -82.9163589477539, "logps/rejected": -861.5328979492188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.37241610884666443, "rewards/margins": 7.853543281555176, "rewards/rejected": -8.225957870483398, "step": 59300 }, { "epoch": 0.71, "learning_rate": 1.1754430065767961e-06, "logits/chosen": -2.866403818130493, "logits/rejected": -2.144631862640381, "logps/chosen": -100.79315948486328, "logps/rejected": -1024.5899658203125, "loss": 0.1023, "rewards/accuracies": 1.0, "rewards/chosen": -0.48866891860961914, "rewards/margins": 9.35910415649414, "rewards/rejected": -9.847772598266602, "step": 59310 }, { "epoch": 0.71, "learning_rate": 1.1745571705761666e-06, "logits/chosen": -2.853688955307007, "logits/rejected": -2.3297550678253174, "logps/chosen": -133.4285888671875, "logps/rejected": -920.1380615234375, "loss": 0.0929, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8430854082107544, "rewards/margins": 7.959558963775635, "rewards/rejected": -8.802644729614258, "step": 59320 }, { "epoch": 0.71, "learning_rate": 1.173671565994175e-06, "logits/chosen": -2.89139986038208, "logits/rejected": -2.3810856342315674, "logps/chosen": -98.5269775390625, "logps/rejected": -925.12451171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5222455859184265, "rewards/margins": 8.332858085632324, "rewards/rejected": -8.855104446411133, "step": 59330 }, { "epoch": 0.71, "learning_rate": 1.1727861929854431e-06, "logits/chosen": -2.8351166248321533, "logits/rejected": -2.4310250282287598, "logps/chosen": -126.19632720947266, "logps/rejected": -858.4417724609375, "loss": 0.1171, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7976464033126831, "rewards/margins": 7.395380973815918, "rewards/rejected": -8.193029403686523, "step": 59340 }, { "epoch": 0.71, "learning_rate": 1.171901051704556e-06, "logits/chosen": -2.9082417488098145, "logits/rejected": -2.6275219917297363, "logps/chosen": -73.25250244140625, "logps/rejected": -780.5830688476562, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.33782753348350525, "rewards/margins": 7.0970258712768555, "rewards/rejected": -7.434853553771973, "step": 59350 }, { "epoch": 0.71, "learning_rate": 1.1710161423060563e-06, "logits/chosen": -2.896605968475342, "logits/rejected": -2.4919564723968506, "logps/chosen": -81.6097412109375, "logps/rejected": -888.0146484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.3987341821193695, "rewards/margins": 8.099985122680664, "rewards/rejected": -8.498719215393066, "step": 59360 }, { "epoch": 0.71, "learning_rate": 1.1701314649444473e-06, "logits/chosen": -2.885164737701416, "logits/rejected": -2.4652793407440186, "logps/chosen": -83.1481704711914, "logps/rejected": -826.5714111328125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.37665849924087524, "rewards/margins": 7.523253440856934, "rewards/rejected": -7.899912357330322, "step": 59370 }, { "epoch": 0.71, "learning_rate": 1.1692470197741908e-06, "logits/chosen": -2.8667705059051514, "logits/rejected": -2.215815782546997, "logps/chosen": -119.7613754272461, "logps/rejected": -941.6162109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7312977910041809, "rewards/margins": 8.2837553024292, "rewards/rejected": -9.015052795410156, "step": 59380 }, { "epoch": 0.71, "learning_rate": 1.1683628069497097e-06, "logits/chosen": -2.8698196411132812, "logits/rejected": -2.431868314743042, "logps/chosen": -97.69474792480469, "logps/rejected": -864.322265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.537986159324646, "rewards/margins": 7.715001106262207, "rewards/rejected": -8.252986907958984, "step": 59390 }, { "epoch": 0.71, "learning_rate": 1.1674788266253832e-06, "logits/chosen": -2.8491721153259277, "logits/rejected": -2.3019070625305176, "logps/chosen": -121.62605285644531, "logps/rejected": -900.1044921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7305004000663757, "rewards/margins": 7.871498107910156, "rewards/rejected": -8.601999282836914, "step": 59400 }, { "epoch": 0.71, "learning_rate": 1.1665950789555524e-06, "logits/chosen": -2.897012948989868, "logits/rejected": -2.288233518600464, "logps/chosen": -118.115234375, "logps/rejected": -985.5054931640625, "loss": 0.0966, "rewards/accuracies": 1.0, "rewards/chosen": -0.670391857624054, "rewards/margins": 8.782970428466797, "rewards/rejected": -9.453363418579102, "step": 59410 }, { "epoch": 0.71, "learning_rate": 1.1657115640945174e-06, "logits/chosen": -2.920799732208252, "logits/rejected": -2.3786046504974365, "logps/chosen": -103.20854187011719, "logps/rejected": -918.7384033203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5451785326004028, "rewards/margins": 8.244988441467285, "rewards/rejected": -8.790167808532715, "step": 59420 }, { "epoch": 0.71, "learning_rate": 1.1648282821965378e-06, "logits/chosen": -2.9256603717803955, "logits/rejected": -2.2296957969665527, "logps/chosen": -127.68880462646484, "logps/rejected": -977.9844970703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7124214768409729, "rewards/margins": 8.667348861694336, "rewards/rejected": -9.379770278930664, "step": 59430 }, { "epoch": 0.71, "learning_rate": 1.1639452334158316e-06, "logits/chosen": -2.9203267097473145, "logits/rejected": -2.29603910446167, "logps/chosen": -124.3186264038086, "logps/rejected": -944.8048095703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7249739170074463, "rewards/margins": 8.310362815856934, "rewards/rejected": -9.035337448120117, "step": 59440 }, { "epoch": 0.71, "learning_rate": 1.1630624179065771e-06, "logits/chosen": -2.887500286102295, "logits/rejected": -2.4828968048095703, "logps/chosen": -97.59101867675781, "logps/rejected": -803.2769775390625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5792573094367981, "rewards/margins": 7.073503017425537, "rewards/rejected": -7.6527605056762695, "step": 59450 }, { "epoch": 0.71, "learning_rate": 1.1621798358229123e-06, "logits/chosen": -2.899907112121582, "logits/rejected": -2.592522144317627, "logps/chosen": -73.18441772460938, "logps/rejected": -873.302734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.31030553579330444, "rewards/margins": 8.047353744506836, "rewards/rejected": -8.357660293579102, "step": 59460 }, { "epoch": 0.71, "learning_rate": 1.161297487318931e-06, "logits/chosen": -2.869611978530884, "logits/rejected": -2.3169734477996826, "logps/chosen": -107.8360824584961, "logps/rejected": -889.6778564453125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.6139401197433472, "rewards/margins": 7.892867088317871, "rewards/rejected": -8.506807327270508, "step": 59470 }, { "epoch": 0.71, "learning_rate": 1.1604153725486908e-06, "logits/chosen": -2.87916898727417, "logits/rejected": -2.4367308616638184, "logps/chosen": -77.98017883300781, "logps/rejected": -786.5484008789062, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.36700770258903503, "rewards/margins": 7.138243198394775, "rewards/rejected": -7.505250453948975, "step": 59480 }, { "epoch": 0.71, "learning_rate": 1.159533491666206e-06, "logits/chosen": -2.910367727279663, "logits/rejected": -1.8913183212280273, "logps/chosen": -116.76597595214844, "logps/rejected": -1009.6502075195312, "loss": 0.0953, "rewards/accuracies": 1.0, "rewards/chosen": -0.6544352769851685, "rewards/margins": 9.038822174072266, "rewards/rejected": -9.693257331848145, "step": 59490 }, { "epoch": 0.71, "learning_rate": 1.1586518448254506e-06, "logits/chosen": -2.9278602600097656, "logits/rejected": -2.457604169845581, "logps/chosen": -107.86553955078125, "logps/rejected": -937.5089721679688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5974248051643372, "rewards/margins": 8.400850296020508, "rewards/rejected": -8.998273849487305, "step": 59500 }, { "epoch": 0.71, "learning_rate": 1.1577704321803577e-06, "logits/chosen": -2.8544528484344482, "logits/rejected": -2.0633392333984375, "logps/chosen": -116.40876770019531, "logps/rejected": -1057.4881591796875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6302293539047241, "rewards/margins": 9.53678035736084, "rewards/rejected": -10.167009353637695, "step": 59510 }, { "epoch": 0.71, "learning_rate": 1.1568892538848193e-06, "logits/chosen": -2.8987388610839844, "logits/rejected": -2.2401175498962402, "logps/chosen": -109.7168960571289, "logps/rejected": -1043.988037109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.623418927192688, "rewards/margins": 9.41688346862793, "rewards/rejected": -10.040303230285645, "step": 59520 }, { "epoch": 0.71, "learning_rate": 1.1560083100926877e-06, "logits/chosen": -2.8378539085388184, "logits/rejected": -2.2242484092712402, "logps/chosen": -118.35981750488281, "logps/rejected": -944.9488525390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7256925702095032, "rewards/margins": 8.325119018554688, "rewards/rejected": -9.050811767578125, "step": 59530 }, { "epoch": 0.71, "learning_rate": 1.1551276009577714e-06, "logits/chosen": -2.929805278778076, "logits/rejected": -2.4996485710144043, "logps/chosen": -103.09767150878906, "logps/rejected": -931.0300903320312, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5902777910232544, "rewards/margins": 8.328630447387695, "rewards/rejected": -8.918907165527344, "step": 59540 }, { "epoch": 0.71, "learning_rate": 1.1542471266338408e-06, "logits/chosen": -2.889172315597534, "logits/rejected": -2.3061304092407227, "logps/chosen": -98.4299087524414, "logps/rejected": -915.0324096679688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4988807141780853, "rewards/margins": 8.259344100952148, "rewards/rejected": -8.758225440979004, "step": 59550 }, { "epoch": 0.71, "learning_rate": 1.1533668872746245e-06, "logits/chosen": -2.87833833694458, "logits/rejected": -2.4388554096221924, "logps/chosen": -89.94973754882812, "logps/rejected": -885.98974609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.43912792205810547, "rewards/margins": 8.04040241241455, "rewards/rejected": -8.479531288146973, "step": 59560 }, { "epoch": 0.71, "learning_rate": 1.1524868830338093e-06, "logits/chosen": -2.830953598022461, "logits/rejected": -2.273991584777832, "logps/chosen": -113.4130630493164, "logps/rejected": -911.1834106445312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6758978962898254, "rewards/margins": 8.04507064819336, "rewards/rejected": -8.720969200134277, "step": 59570 }, { "epoch": 0.71, "learning_rate": 1.151607114065042e-06, "logits/chosen": -2.845919370651245, "logits/rejected": -2.2564711570739746, "logps/chosen": -106.3437728881836, "logps/rejected": -960.2659301757812, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5917068123817444, "rewards/margins": 8.61185359954834, "rewards/rejected": -9.203559875488281, "step": 59580 }, { "epoch": 0.71, "learning_rate": 1.1507275805219283e-06, "logits/chosen": -2.853400707244873, "logits/rejected": -2.398036479949951, "logps/chosen": -84.76481628417969, "logps/rejected": -890.18994140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4112131595611572, "rewards/margins": 8.0868501663208, "rewards/rejected": -8.498062133789062, "step": 59590 }, { "epoch": 0.71, "learning_rate": 1.1498482825580306e-06, "logits/chosen": -2.908259868621826, "logits/rejected": -2.1843628883361816, "logps/chosen": -113.80351257324219, "logps/rejected": -1014.3776245117188, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -0.6195289492607117, "rewards/margins": 9.131699562072754, "rewards/rejected": -9.751227378845215, "step": 59600 }, { "epoch": 0.71, "learning_rate": 1.1489692203268733e-06, "logits/chosen": -2.8790156841278076, "logits/rejected": -2.3833723068237305, "logps/chosen": -90.3826675415039, "logps/rejected": -922.1442260742188, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.48653292655944824, "rewards/margins": 8.356242179870605, "rewards/rejected": -8.842775344848633, "step": 59610 }, { "epoch": 0.71, "learning_rate": 1.1480903939819374e-06, "logits/chosen": -2.8803811073303223, "logits/rejected": -2.022373676300049, "logps/chosen": -138.53773498535156, "logps/rejected": -1096.4168701171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.751674234867096, "rewards/margins": 9.80933952331543, "rewards/rejected": -10.561014175415039, "step": 59620 }, { "epoch": 0.71, "learning_rate": 1.1472118036766639e-06, "logits/chosen": -2.8769822120666504, "logits/rejected": -2.440109968185425, "logps/chosen": -106.72779846191406, "logps/rejected": -873.5958251953125, "loss": 0.1075, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5931130647659302, "rewards/margins": 7.743058204650879, "rewards/rejected": -8.33617115020752, "step": 59630 }, { "epoch": 0.71, "learning_rate": 1.1463334495644524e-06, "logits/chosen": -2.8573861122131348, "logits/rejected": -2.379150867462158, "logps/chosen": -110.0248031616211, "logps/rejected": -832.7294921875, "loss": 0.1355, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.663083016872406, "rewards/margins": 7.2852349281311035, "rewards/rejected": -7.948317050933838, "step": 59640 }, { "epoch": 0.71, "learning_rate": 1.1454553317986612e-06, "logits/chosen": -2.9262256622314453, "logits/rejected": -2.279163360595703, "logps/chosen": -104.6465835571289, "logps/rejected": -1085.3770751953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.517306923866272, "rewards/margins": 9.927319526672363, "rewards/rejected": -10.44462776184082, "step": 59650 }, { "epoch": 0.71, "learning_rate": 1.1445774505326077e-06, "logits/chosen": -2.933964252471924, "logits/rejected": -2.4593331813812256, "logps/chosen": -94.15585327148438, "logps/rejected": -868.5667724609375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5183354020118713, "rewards/margins": 7.776200771331787, "rewards/rejected": -8.294536590576172, "step": 59660 }, { "epoch": 0.71, "learning_rate": 1.1436998059195658e-06, "logits/chosen": -2.881608247756958, "logits/rejected": -2.542189121246338, "logps/chosen": -71.56947326660156, "logps/rejected": -808.4909057617188, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.32450518012046814, "rewards/margins": 7.391772270202637, "rewards/rejected": -7.7162766456604, "step": 59670 }, { "epoch": 0.71, "learning_rate": 1.1428223981127708e-06, "logits/chosen": -2.9070920944213867, "logits/rejected": -2.356729745864868, "logps/chosen": -105.51790618896484, "logps/rejected": -914.1300659179688, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5957017540931702, "rewards/margins": 8.176817893981934, "rewards/rejected": -8.772520065307617, "step": 59680 }, { "epoch": 0.71, "learning_rate": 1.1419452272654157e-06, "logits/chosen": -2.8904757499694824, "logits/rejected": -2.4013490676879883, "logps/chosen": -76.9614028930664, "logps/rejected": -842.1976318359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.32070890069007874, "rewards/margins": 7.726310729980469, "rewards/rejected": -8.047019958496094, "step": 59690 }, { "epoch": 0.71, "learning_rate": 1.1410682935306514e-06, "logits/chosen": -2.889644145965576, "logits/rejected": -2.5232348442077637, "logps/chosen": -81.51468658447266, "logps/rejected": -865.7860107421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.41032272577285767, "rewards/margins": 7.8818559646606445, "rewards/rejected": -8.292179107666016, "step": 59700 }, { "epoch": 0.71, "learning_rate": 1.1401915970615903e-06, "logits/chosen": -2.8883063793182373, "logits/rejected": -2.6981170177459717, "logps/chosen": -86.21436309814453, "logps/rejected": -768.5217895507812, "loss": 0.0297, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5018383860588074, "rewards/margins": 6.815340518951416, "rewards/rejected": -7.317178249359131, "step": 59710 }, { "epoch": 0.71, "learning_rate": 1.1393151380112991e-06, "logits/chosen": -2.855159282684326, "logits/rejected": -2.2914042472839355, "logps/chosen": -107.6241455078125, "logps/rejected": -850.7205810546875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.6458781957626343, "rewards/margins": 7.4643096923828125, "rewards/rejected": -8.110187530517578, "step": 59720 }, { "epoch": 0.71, "learning_rate": 1.1384389165328054e-06, "logits/chosen": -2.9152541160583496, "logits/rejected": -2.2771639823913574, "logps/chosen": -115.3309555053711, "logps/rejected": -963.7572021484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6459301114082336, "rewards/margins": 8.591840744018555, "rewards/rejected": -9.237771987915039, "step": 59730 }, { "epoch": 0.72, "learning_rate": 1.1375629327790955e-06, "logits/chosen": -2.8842713832855225, "logits/rejected": -2.535449743270874, "logps/chosen": -111.35975646972656, "logps/rejected": -792.1650390625, "loss": 0.0986, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6933773756027222, "rewards/margins": 6.8367180824279785, "rewards/rejected": -7.530095100402832, "step": 59740 }, { "epoch": 0.72, "learning_rate": 1.1366871869031133e-06, "logits/chosen": -2.902817487716675, "logits/rejected": -2.1434788703918457, "logps/chosen": -105.21952056884766, "logps/rejected": -959.0792846679688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5170617699623108, "rewards/margins": 8.653635025024414, "rewards/rejected": -9.170697212219238, "step": 59750 }, { "epoch": 0.72, "learning_rate": 1.135811679057762e-06, "logits/chosen": -2.805713176727295, "logits/rejected": -2.412978410720825, "logps/chosen": -87.30004119873047, "logps/rejected": -915.9249267578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5057047009468079, "rewards/margins": 8.273664474487305, "rewards/rejected": -8.779369354248047, "step": 59760 }, { "epoch": 0.72, "learning_rate": 1.1349364093959025e-06, "logits/chosen": -2.8967742919921875, "logits/rejected": -2.4614109992980957, "logps/chosen": -84.93140411376953, "logps/rejected": -855.2286376953125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.4103013873100281, "rewards/margins": 7.771788597106934, "rewards/rejected": -8.182089805603027, "step": 59770 }, { "epoch": 0.72, "learning_rate": 1.1340613780703556e-06, "logits/chosen": -2.907580852508545, "logits/rejected": -2.2850916385650635, "logps/chosen": -126.07096099853516, "logps/rejected": -959.2199096679688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7351480722427368, "rewards/margins": 8.451902389526367, "rewards/rejected": -9.187050819396973, "step": 59780 }, { "epoch": 0.72, "learning_rate": 1.1331865852338975e-06, "logits/chosen": -2.907248020172119, "logits/rejected": -2.4042716026306152, "logps/chosen": -110.5058822631836, "logps/rejected": -1042.95751953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5738739967346191, "rewards/margins": 9.452516555786133, "rewards/rejected": -10.026390075683594, "step": 59790 }, { "epoch": 0.72, "learning_rate": 1.1323120310392656e-06, "logits/chosen": -2.9145569801330566, "logits/rejected": -2.27498197555542, "logps/chosen": -104.14605712890625, "logps/rejected": -937.8313598632812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5334634184837341, "rewards/margins": 8.456113815307617, "rewards/rejected": -8.989578247070312, "step": 59800 }, { "epoch": 0.72, "learning_rate": 1.131437715639154e-06, "logits/chosen": -2.8337483406066895, "logits/rejected": -2.385512113571167, "logps/chosen": -98.32860565185547, "logps/rejected": -860.2717895507812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5699359774589539, "rewards/margins": 7.660367488861084, "rewards/rejected": -8.230302810668945, "step": 59810 }, { "epoch": 0.72, "learning_rate": 1.1305636391862165e-06, "logits/chosen": -2.8961024284362793, "logits/rejected": -2.4917900562286377, "logps/chosen": -104.61190032958984, "logps/rejected": -844.6233520507812, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.5772628784179688, "rewards/margins": 7.4834394454956055, "rewards/rejected": -8.06070327758789, "step": 59820 }, { "epoch": 0.72, "learning_rate": 1.1296898018330642e-06, "logits/chosen": -2.9140689373016357, "logits/rejected": -2.448781728744507, "logps/chosen": -102.56404113769531, "logps/rejected": -884.7888793945312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5518817901611328, "rewards/margins": 7.903024196624756, "rewards/rejected": -8.45490550994873, "step": 59830 }, { "epoch": 0.72, "learning_rate": 1.1288162037322667e-06, "logits/chosen": -2.923372983932495, "logits/rejected": -2.495565414428711, "logps/chosen": -94.60295104980469, "logps/rejected": -916.1982421875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.48327916860580444, "rewards/margins": 8.299478530883789, "rewards/rejected": -8.782757759094238, "step": 59840 }, { "epoch": 0.72, "learning_rate": 1.1279428450363525e-06, "logits/chosen": -2.885810613632202, "logits/rejected": -2.2840428352355957, "logps/chosen": -111.74983215332031, "logps/rejected": -953.3936767578125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.5506643056869507, "rewards/margins": 8.582229614257812, "rewards/rejected": -9.132894515991211, "step": 59850 }, { "epoch": 0.72, "learning_rate": 1.1270697258978062e-06, "logits/chosen": -2.869354724884033, "logits/rejected": -2.5054452419281006, "logps/chosen": -107.469482421875, "logps/rejected": -787.044189453125, "loss": 0.1373, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6715033650398254, "rewards/margins": 6.830158233642578, "rewards/rejected": -7.501661777496338, "step": 59860 }, { "epoch": 0.72, "learning_rate": 1.1261968464690723e-06, "logits/chosen": -2.8639378547668457, "logits/rejected": -2.3209924697875977, "logps/chosen": -108.87918853759766, "logps/rejected": -1027.8704833984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6072896718978882, "rewards/margins": 9.269994735717773, "rewards/rejected": -9.87728500366211, "step": 59870 }, { "epoch": 0.72, "learning_rate": 1.1253242069025532e-06, "logits/chosen": -2.854872226715088, "logits/rejected": -2.4135329723358154, "logps/chosen": -103.26676940917969, "logps/rejected": -957.1378173828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5856783390045166, "rewards/margins": 8.586750030517578, "rewards/rejected": -9.172429084777832, "step": 59880 }, { "epoch": 0.72, "learning_rate": 1.12445180735061e-06, "logits/chosen": -2.857065200805664, "logits/rejected": -2.1756527423858643, "logps/chosen": -119.3354721069336, "logps/rejected": -1013.2047119140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.649738609790802, "rewards/margins": 9.073705673217773, "rewards/rejected": -9.723443031311035, "step": 59890 }, { "epoch": 0.72, "learning_rate": 1.123579647965561e-06, "logits/chosen": -2.8906056880950928, "logits/rejected": -2.4330737590789795, "logps/chosen": -88.5757064819336, "logps/rejected": -930.8004150390625, "loss": 0.1046, "rewards/accuracies": 1.0, "rewards/chosen": -0.44027847051620483, "rewards/margins": 8.48581600189209, "rewards/rejected": -8.926095962524414, "step": 59900 }, { "epoch": 0.72, "learning_rate": 1.122707728899683e-06, "logits/chosen": -2.8936309814453125, "logits/rejected": -2.5738446712493896, "logps/chosen": -81.13243103027344, "logps/rejected": -785.4033203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3986261487007141, "rewards/margins": 7.08327579498291, "rewards/rejected": -7.481902122497559, "step": 59910 }, { "epoch": 0.72, "learning_rate": 1.1218360503052098e-06, "logits/chosen": -2.9092531204223633, "logits/rejected": -2.383324384689331, "logps/chosen": -99.49122619628906, "logps/rejected": -1003.3263549804688, "loss": 0.1231, "rewards/accuracies": 1.0, "rewards/chosen": -0.5350720286369324, "rewards/margins": 9.095136642456055, "rewards/rejected": -9.630208015441895, "step": 59920 }, { "epoch": 0.72, "learning_rate": 1.1209646123343344e-06, "logits/chosen": -2.8618879318237305, "logits/rejected": -2.2498271465301514, "logps/chosen": -108.61528015136719, "logps/rejected": -900.0587768554688, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": -0.6128110289573669, "rewards/margins": 7.998175144195557, "rewards/rejected": -8.610986709594727, "step": 59930 }, { "epoch": 0.72, "learning_rate": 1.1200934151392073e-06, "logits/chosen": -2.8726677894592285, "logits/rejected": -2.1994335651397705, "logps/chosen": -103.01261138916016, "logps/rejected": -986.84619140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5610901117324829, "rewards/margins": 8.916838645935059, "rewards/rejected": -9.47792911529541, "step": 59940 }, { "epoch": 0.72, "learning_rate": 1.1192224588719376e-06, "logits/chosen": -2.8525989055633545, "logits/rejected": -2.2025020122528076, "logps/chosen": -137.11776733398438, "logps/rejected": -980.8787841796875, "loss": 0.0987, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8258529901504517, "rewards/margins": 8.578843116760254, "rewards/rejected": -9.404695510864258, "step": 59950 }, { "epoch": 0.72, "learning_rate": 1.1183517436845916e-06, "logits/chosen": -2.9247210025787354, "logits/rejected": -2.4515349864959717, "logps/chosen": -124.38319396972656, "logps/rejected": -917.181640625, "loss": 0.0286, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7228839993476868, "rewards/margins": 8.062307357788086, "rewards/rejected": -8.785192489624023, "step": 59960 }, { "epoch": 0.72, "learning_rate": 1.1174812697291937e-06, "logits/chosen": -2.888279438018799, "logits/rejected": -2.731036424636841, "logps/chosen": -53.43989944458008, "logps/rejected": -720.7658081054688, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": -0.18000677227973938, "rewards/margins": 6.659605503082275, "rewards/rejected": -6.839612007141113, "step": 59970 }, { "epoch": 0.72, "learning_rate": 1.1166110371577274e-06, "logits/chosen": -2.8912672996520996, "logits/rejected": -2.3676822185516357, "logps/chosen": -82.6308822631836, "logps/rejected": -859.4064331054688, "loss": 0.105, "rewards/accuracies": 1.0, "rewards/chosen": -0.396199494600296, "rewards/margins": 7.819418430328369, "rewards/rejected": -8.215619087219238, "step": 59980 }, { "epoch": 0.72, "learning_rate": 1.115741046122131e-06, "logits/chosen": -2.8958418369293213, "logits/rejected": -2.3036646842956543, "logps/chosen": -113.6048355102539, "logps/rejected": -1007.1881103515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6377967596054077, "rewards/margins": 9.036622047424316, "rewards/rejected": -9.674418449401855, "step": 59990 }, { "epoch": 0.72, "learning_rate": 1.114871296774303e-06, "logits/chosen": -2.868130683898926, "logits/rejected": -2.5219030380249023, "logps/chosen": -79.17100524902344, "logps/rejected": -856.22412109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.33413535356521606, "rewards/margins": 7.842006683349609, "rewards/rejected": -8.176142692565918, "step": 60000 }, { "epoch": 0.72, "eval_logits/chosen": -2.887071371078491, "eval_logits/rejected": -1.7573661804199219, "eval_logps/chosen": -234.7418670654297, "eval_logps/rejected": -1126.8001708984375, "eval_loss": 0.0013307330664247274, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.735615849494934, "eval_rewards/margins": 9.065131187438965, "eval_rewards/rejected": -10.80074691772461, "eval_runtime": 1.2146, "eval_samples_per_second": 4.117, "eval_steps_per_second": 2.47, "step": 60000 }, { "epoch": 0.72, "learning_rate": 1.1140017892660997e-06, "logits/chosen": -2.8639042377471924, "logits/rejected": -2.4150357246398926, "logps/chosen": -87.17707061767578, "logps/rejected": -844.3810424804688, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.43881064653396606, "rewards/margins": 7.628795623779297, "rewards/rejected": -8.067605972290039, "step": 60010 }, { "epoch": 0.72, "learning_rate": 1.1131325237493348e-06, "logits/chosen": -2.8526859283447266, "logits/rejected": -2.2702271938323975, "logps/chosen": -113.42942810058594, "logps/rejected": -943.5319213867188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6478756070137024, "rewards/margins": 8.392049789428711, "rewards/rejected": -9.039926528930664, "step": 60020 }, { "epoch": 0.72, "learning_rate": 1.1122635003757793e-06, "logits/chosen": -2.823512554168701, "logits/rejected": -2.233755350112915, "logps/chosen": -110.0313491821289, "logps/rejected": -1045.008056640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6039142608642578, "rewards/margins": 9.443778038024902, "rewards/rejected": -10.047693252563477, "step": 60030 }, { "epoch": 0.72, "learning_rate": 1.1113947192971619e-06, "logits/chosen": -2.886323928833008, "logits/rejected": -2.700319766998291, "logps/chosen": -58.341064453125, "logps/rejected": -761.7156372070312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.21526524424552917, "rewards/margins": 7.028888702392578, "rewards/rejected": -7.244154453277588, "step": 60040 }, { "epoch": 0.72, "learning_rate": 1.1105261806651713e-06, "logits/chosen": -2.920919895172119, "logits/rejected": -2.5783133506774902, "logps/chosen": -102.06085205078125, "logps/rejected": -823.0010986328125, "loss": 0.1007, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5735787153244019, "rewards/margins": 7.2950944900512695, "rewards/rejected": -7.8686723709106445, "step": 60050 }, { "epoch": 0.72, "learning_rate": 1.1096578846314493e-06, "logits/chosen": -2.902880907058716, "logits/rejected": -2.337441921234131, "logps/chosen": -90.39439392089844, "logps/rejected": -981.9884643554688, "loss": 0.1586, "rewards/accuracies": 1.0, "rewards/chosen": -0.4634666442871094, "rewards/margins": 8.965734481811523, "rewards/rejected": -9.429201126098633, "step": 60060 }, { "epoch": 0.72, "learning_rate": 1.1087898313475992e-06, "logits/chosen": -2.8838002681732178, "logits/rejected": -2.416983127593994, "logps/chosen": -86.96965026855469, "logps/rejected": -888.93115234375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.4057548940181732, "rewards/margins": 8.093283653259277, "rewards/rejected": -8.499038696289062, "step": 60070 }, { "epoch": 0.72, "learning_rate": 1.1079220209651805e-06, "logits/chosen": -2.879467487335205, "logits/rejected": -2.2293801307678223, "logps/chosen": -125.1184310913086, "logps/rejected": -1066.2626953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.689541220664978, "rewards/margins": 9.542083740234375, "rewards/rejected": -10.231624603271484, "step": 60080 }, { "epoch": 0.72, "learning_rate": 1.1070544536357105e-06, "logits/chosen": -2.8779048919677734, "logits/rejected": -2.325747489929199, "logps/chosen": -100.98834228515625, "logps/rejected": -929.5037231445312, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.5376111268997192, "rewards/margins": 8.357948303222656, "rewards/rejected": -8.895559310913086, "step": 60090 }, { "epoch": 0.72, "learning_rate": 1.1061871295106646e-06, "logits/chosen": -2.84688138961792, "logits/rejected": -2.1487338542938232, "logps/chosen": -112.23982238769531, "logps/rejected": -1051.382080078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6592740416526794, "rewards/margins": 9.458369255065918, "rewards/rejected": -10.117643356323242, "step": 60100 }, { "epoch": 0.72, "learning_rate": 1.1053200487414754e-06, "logits/chosen": -2.888634204864502, "logits/rejected": -2.3778133392333984, "logps/chosen": -98.78785705566406, "logps/rejected": -950.1784057617188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5257681608200073, "rewards/margins": 8.589279174804688, "rewards/rejected": -9.115046501159668, "step": 60110 }, { "epoch": 0.72, "learning_rate": 1.1044532114795312e-06, "logits/chosen": -2.893740653991699, "logits/rejected": -2.253340721130371, "logps/chosen": -121.72562408447266, "logps/rejected": -969.66015625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.722756028175354, "rewards/margins": 8.563945770263672, "rewards/rejected": -9.286702156066895, "step": 60120 }, { "epoch": 0.72, "learning_rate": 1.1035866178761795e-06, "logits/chosen": -2.8907763957977295, "logits/rejected": -2.185619354248047, "logps/chosen": -119.40594482421875, "logps/rejected": -1019.9724731445312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6681553721427917, "rewards/margins": 9.13101863861084, "rewards/rejected": -9.799173355102539, "step": 60130 }, { "epoch": 0.72, "learning_rate": 1.1027202680827268e-06, "logits/chosen": -2.8601698875427246, "logits/rejected": -2.3091630935668945, "logps/chosen": -83.77214813232422, "logps/rejected": -909.7615356445312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.41825658082962036, "rewards/margins": 8.288122177124023, "rewards/rejected": -8.706378936767578, "step": 60140 }, { "epoch": 0.72, "learning_rate": 1.1018541622504345e-06, "logits/chosen": -2.8428986072540283, "logits/rejected": -2.022768259048462, "logps/chosen": -127.86152648925781, "logps/rejected": -991.5886840820312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7466676831245422, "rewards/margins": 8.783060073852539, "rewards/rejected": -9.5297269821167, "step": 60150 }, { "epoch": 0.72, "learning_rate": 1.1009883005305222e-06, "logits/chosen": -2.9002256393432617, "logits/rejected": -2.461381435394287, "logps/chosen": -98.79264831542969, "logps/rejected": -869.3605346679688, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.575408399105072, "rewards/margins": 7.7286529541015625, "rewards/rejected": -8.304060935974121, "step": 60160 }, { "epoch": 0.72, "learning_rate": 1.100122683074168e-06, "logits/chosen": -2.9024767875671387, "logits/rejected": -2.384899139404297, "logps/chosen": -92.35215759277344, "logps/rejected": -884.0538940429688, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.48610982298851013, "rewards/margins": 7.963005065917969, "rewards/rejected": -8.449115753173828, "step": 60170 }, { "epoch": 0.72, "learning_rate": 1.0992573100325044e-06, "logits/chosen": -2.908904552459717, "logits/rejected": -2.3008456230163574, "logps/chosen": -104.33082580566406, "logps/rejected": -983.2587890625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5381988286972046, "rewards/margins": 8.904180526733398, "rewards/rejected": -9.442380905151367, "step": 60180 }, { "epoch": 0.72, "learning_rate": 1.0983921815566242e-06, "logits/chosen": -2.9242358207702637, "logits/rejected": -2.593925952911377, "logps/chosen": -78.20161437988281, "logps/rejected": -856.2283935546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4003539979457855, "rewards/margins": 7.7773895263671875, "rewards/rejected": -8.177742004394531, "step": 60190 }, { "epoch": 0.72, "learning_rate": 1.0975272977975763e-06, "logits/chosen": -2.837740421295166, "logits/rejected": -1.805625557899475, "logps/chosen": -149.1344757080078, "logps/rejected": -1160.802978515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8514106869697571, "rewards/margins": 10.338224411010742, "rewards/rejected": -11.189634323120117, "step": 60200 }, { "epoch": 0.72, "learning_rate": 1.0966626589063666e-06, "logits/chosen": -2.8741376399993896, "logits/rejected": -2.5070292949676514, "logps/chosen": -132.80926513671875, "logps/rejected": -815.7352905273438, "loss": 0.1954, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8809007406234741, "rewards/margins": 6.884735107421875, "rewards/rejected": -7.7656354904174805, "step": 60210 }, { "epoch": 0.72, "learning_rate": 1.0957982650339596e-06, "logits/chosen": -2.8586506843566895, "logits/rejected": -2.1418333053588867, "logps/chosen": -135.7034912109375, "logps/rejected": -1038.9385986328125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8034585118293762, "rewards/margins": 9.182046890258789, "rewards/rejected": -9.985506057739258, "step": 60220 }, { "epoch": 0.72, "learning_rate": 1.0949341163312763e-06, "logits/chosen": -2.889111042022705, "logits/rejected": -2.3334014415740967, "logps/chosen": -102.6164321899414, "logps/rejected": -978.0452270507812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5946402549743652, "rewards/margins": 8.78035831451416, "rewards/rejected": -9.374998092651367, "step": 60230 }, { "epoch": 0.72, "learning_rate": 1.094070212949193e-06, "logits/chosen": -2.895831346511841, "logits/rejected": -2.6008408069610596, "logps/chosen": -69.0053482055664, "logps/rejected": -803.1475830078125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3271009624004364, "rewards/margins": 7.332398414611816, "rewards/rejected": -7.659499168395996, "step": 60240 }, { "epoch": 0.72, "learning_rate": 1.0932065550385454e-06, "logits/chosen": -2.8491604328155518, "logits/rejected": -2.3075995445251465, "logps/chosen": -129.64892578125, "logps/rejected": -931.2879638671875, "loss": 0.1351, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8392187356948853, "rewards/margins": 8.086692810058594, "rewards/rejected": -8.925911903381348, "step": 60250 }, { "epoch": 0.72, "learning_rate": 1.0923431427501264e-06, "logits/chosen": -2.8546059131622314, "logits/rejected": -2.2139482498168945, "logps/chosen": -118.029296875, "logps/rejected": -928.6062622070312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6653813123703003, "rewards/margins": 8.218618392944336, "rewards/rejected": -8.884000778198242, "step": 60260 }, { "epoch": 0.72, "learning_rate": 1.0914799762346847e-06, "logits/chosen": -2.895409107208252, "logits/rejected": -2.447438955307007, "logps/chosen": -94.71751403808594, "logps/rejected": -957.2100830078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.46063417196273804, "rewards/margins": 8.718171119689941, "rewards/rejected": -9.178804397583008, "step": 60270 }, { "epoch": 0.72, "learning_rate": 1.0906170556429278e-06, "logits/chosen": -2.8898751735687256, "logits/rejected": -2.420841693878174, "logps/chosen": -90.6541519165039, "logps/rejected": -835.8338012695312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.45471900701522827, "rewards/margins": 7.531163692474365, "rewards/rejected": -7.985882759094238, "step": 60280 }, { "epoch": 0.72, "learning_rate": 1.0897543811255181e-06, "logits/chosen": -2.936727523803711, "logits/rejected": -2.359548330307007, "logps/chosen": -92.06471252441406, "logps/rejected": -896.2017822265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4667075276374817, "rewards/margins": 8.115640640258789, "rewards/rejected": -8.582348823547363, "step": 60290 }, { "epoch": 0.72, "learning_rate": 1.0888919528330778e-06, "logits/chosen": -2.907104969024658, "logits/rejected": -2.4203286170959473, "logps/chosen": -99.40340423583984, "logps/rejected": -836.0479736328125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5387364029884338, "rewards/margins": 7.432297706604004, "rewards/rejected": -7.971034049987793, "step": 60300 }, { "epoch": 0.72, "learning_rate": 1.0880297709161822e-06, "logits/chosen": -2.8495945930480957, "logits/rejected": -2.160644769668579, "logps/chosen": -114.22987365722656, "logps/rejected": -943.4970703125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.6135274767875671, "rewards/margins": 8.44044017791748, "rewards/rejected": -9.05396842956543, "step": 60310 }, { "epoch": 0.72, "learning_rate": 1.087167835525367e-06, "logits/chosen": -2.82781982421875, "logits/rejected": -2.245387315750122, "logps/chosen": -109.51966857910156, "logps/rejected": -988.5654296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5491896271705627, "rewards/margins": 8.946975708007812, "rewards/rejected": -9.49616527557373, "step": 60320 }, { "epoch": 0.72, "learning_rate": 1.0863061468111242e-06, "logits/chosen": -2.909130573272705, "logits/rejected": -2.162367343902588, "logps/chosen": -147.17405700683594, "logps/rejected": -1044.4356689453125, "loss": 0.0979, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9397891163825989, "rewards/margins": 9.097131729125977, "rewards/rejected": -10.036921501159668, "step": 60330 }, { "epoch": 0.72, "learning_rate": 1.0854447049239014e-06, "logits/chosen": -2.8698458671569824, "logits/rejected": -2.137073040008545, "logps/chosen": -109.66117095947266, "logps/rejected": -950.6985473632812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5614746809005737, "rewards/margins": 8.563294410705566, "rewards/rejected": -9.124768257141113, "step": 60340 }, { "epoch": 0.72, "learning_rate": 1.0845835100141044e-06, "logits/chosen": -2.8556747436523438, "logits/rejected": -2.2459261417388916, "logps/chosen": -125.35237121582031, "logps/rejected": -971.4124755859375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.7932592630386353, "rewards/margins": 8.51530647277832, "rewards/rejected": -9.308565139770508, "step": 60350 }, { "epoch": 0.72, "learning_rate": 1.0837225622320955e-06, "logits/chosen": -2.8829593658447266, "logits/rejected": -2.21039080619812, "logps/chosen": -144.7526092529297, "logps/rejected": -972.7413330078125, "loss": 0.1467, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9509595036506653, "rewards/margins": 8.383527755737305, "rewards/rejected": -9.334486961364746, "step": 60360 }, { "epoch": 0.72, "learning_rate": 1.0828618617281945e-06, "logits/chosen": -2.8902106285095215, "logits/rejected": -2.438852548599243, "logps/chosen": -82.55509185791016, "logps/rejected": -889.3466796875, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -0.41227275133132935, "rewards/margins": 8.110099792480469, "rewards/rejected": -8.522371292114258, "step": 60370 }, { "epoch": 0.72, "learning_rate": 1.0820014086526755e-06, "logits/chosen": -2.8466784954071045, "logits/rejected": -2.3287594318389893, "logps/chosen": -94.21895599365234, "logps/rejected": -885.6725463867188, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -0.49400219321250916, "rewards/margins": 7.981629371643066, "rewards/rejected": -8.475631713867188, "step": 60380 }, { "epoch": 0.72, "learning_rate": 1.0811412031557725e-06, "logits/chosen": -2.8852057456970215, "logits/rejected": -2.2228541374206543, "logps/chosen": -102.28113555908203, "logps/rejected": -1074.646728515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5241507887840271, "rewards/margins": 9.820343017578125, "rewards/rejected": -10.34449291229248, "step": 60390 }, { "epoch": 0.72, "learning_rate": 1.0802812453876746e-06, "logits/chosen": -2.8756461143493652, "logits/rejected": -2.316256046295166, "logps/chosen": -95.83680725097656, "logps/rejected": -942.7708740234375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5142747759819031, "rewards/margins": 8.529550552368164, "rewards/rejected": -9.043825149536133, "step": 60400 }, { "epoch": 0.72, "learning_rate": 1.0794215354985283e-06, "logits/chosen": -2.8718013763427734, "logits/rejected": -2.1799564361572266, "logps/chosen": -113.51761627197266, "logps/rejected": -1010.5001220703125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6256052255630493, "rewards/margins": 9.088228225708008, "rewards/rejected": -9.713833808898926, "step": 60410 }, { "epoch": 0.72, "learning_rate": 1.0785620736384361e-06, "logits/chosen": -2.8725991249084473, "logits/rejected": -2.1930713653564453, "logps/chosen": -105.515380859375, "logps/rejected": -1003.9679565429688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5528584718704224, "rewards/margins": 9.098058700561523, "rewards/rejected": -9.650918006896973, "step": 60420 }, { "epoch": 0.72, "learning_rate": 1.0777028599574591e-06, "logits/chosen": -2.8649067878723145, "logits/rejected": -2.250624895095825, "logps/chosen": -130.55490112304688, "logps/rejected": -978.826171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8175942301750183, "rewards/margins": 8.562051773071289, "rewards/rejected": -9.379647254943848, "step": 60430 }, { "epoch": 0.72, "learning_rate": 1.0768438946056118e-06, "logits/chosen": -2.9027531147003174, "logits/rejected": -2.6168360710144043, "logps/chosen": -70.7513656616211, "logps/rejected": -776.6336059570312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.32443028688430786, "rewards/margins": 7.073024749755859, "rewards/rejected": -7.397454738616943, "step": 60440 }, { "epoch": 0.72, "learning_rate": 1.0759851777328672e-06, "logits/chosen": -2.9049744606018066, "logits/rejected": -2.3980507850646973, "logps/chosen": -99.04481506347656, "logps/rejected": -985.5325317382812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.511203408241272, "rewards/margins": 8.953313827514648, "rewards/rejected": -9.464517593383789, "step": 60450 }, { "epoch": 0.72, "learning_rate": 1.075126709489156e-06, "logits/chosen": -2.8612940311431885, "logits/rejected": -2.429163932800293, "logps/chosen": -117.93809509277344, "logps/rejected": -831.5607299804688, "loss": 0.0982, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7352513074874878, "rewards/margins": 7.203160762786865, "rewards/rejected": -7.938411712646484, "step": 60460 }, { "epoch": 0.72, "learning_rate": 1.074268490024364e-06, "logits/chosen": -2.798314332962036, "logits/rejected": -2.310058116912842, "logps/chosen": -94.23210144042969, "logps/rejected": -876.0926513671875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4891154170036316, "rewards/margins": 7.895103454589844, "rewards/rejected": -8.384218215942383, "step": 60470 }, { "epoch": 0.72, "learning_rate": 1.0734105194883337e-06, "logits/chosen": -2.881767749786377, "logits/rejected": -2.396554470062256, "logps/chosen": -76.86358642578125, "logps/rejected": -878.6724853515625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.33017975091934204, "rewards/margins": 8.074573516845703, "rewards/rejected": -8.404752731323242, "step": 60480 }, { "epoch": 0.72, "learning_rate": 1.0725527980308644e-06, "logits/chosen": -2.874648332595825, "logits/rejected": -2.0400807857513428, "logps/chosen": -144.07852172851562, "logps/rejected": -1109.070068359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8730395436286926, "rewards/margins": 9.802525520324707, "rewards/rejected": -10.675564765930176, "step": 60490 }, { "epoch": 0.72, "learning_rate": 1.071695325801713e-06, "logits/chosen": -2.9134762287139893, "logits/rejected": -2.2389492988586426, "logps/chosen": -95.5165023803711, "logps/rejected": -942.9328002929688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4662659168243408, "rewards/margins": 8.578673362731934, "rewards/rejected": -9.044939041137695, "step": 60500 }, { "epoch": 0.72, "learning_rate": 1.0708381029505898e-06, "logits/chosen": -2.8923580646514893, "logits/rejected": -2.222224473953247, "logps/chosen": -100.61441802978516, "logps/rejected": -973.0947265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5195029377937317, "rewards/margins": 8.818078994750977, "rewards/rejected": -9.337580680847168, "step": 60510 }, { "epoch": 0.72, "learning_rate": 1.0699811296271646e-06, "logits/chosen": -2.8787479400634766, "logits/rejected": -2.452636241912842, "logps/chosen": -97.84807586669922, "logps/rejected": -820.9469604492188, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5442329049110413, "rewards/margins": 7.295906066894531, "rewards/rejected": -7.840138912200928, "step": 60520 }, { "epoch": 0.72, "learning_rate": 1.0691244059810623e-06, "logits/chosen": -2.909238338470459, "logits/rejected": -2.4237136840820312, "logps/chosen": -73.85225677490234, "logps/rejected": -916.4539184570312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3096413016319275, "rewards/margins": 8.473464965820312, "rewards/rejected": -8.783105850219727, "step": 60530 }, { "epoch": 0.72, "learning_rate": 1.068267932161865e-06, "logits/chosen": -2.8701062202453613, "logits/rejected": -2.506263494491577, "logps/chosen": -72.7658920288086, "logps/rejected": -797.8506469726562, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3578484058380127, "rewards/margins": 7.256482124328613, "rewards/rejected": -7.614331245422363, "step": 60540 }, { "epoch": 0.72, "learning_rate": 1.06741170831911e-06, "logits/chosen": -2.894580841064453, "logits/rejected": -2.6545472145080566, "logps/chosen": -60.31398391723633, "logps/rejected": -798.4721069335938, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.25410106778144836, "rewards/margins": 7.369519233703613, "rewards/rejected": -7.623620510101318, "step": 60550 }, { "epoch": 0.72, "learning_rate": 1.0665557346022917e-06, "logits/chosen": -2.823390007019043, "logits/rejected": -2.0042529106140137, "logps/chosen": -153.24342346191406, "logps/rejected": -1025.650634765625, "loss": 0.0587, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0079073905944824, "rewards/margins": 8.858434677124023, "rewards/rejected": -9.866342544555664, "step": 60560 }, { "epoch": 0.73, "learning_rate": 1.0657000111608613e-06, "logits/chosen": -2.8984265327453613, "logits/rejected": -2.501128911972046, "logps/chosen": -98.89906311035156, "logps/rejected": -944.3992919921875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5352541208267212, "rewards/margins": 8.517754554748535, "rewards/rejected": -9.053009033203125, "step": 60570 }, { "epoch": 0.73, "learning_rate": 1.064844538144225e-06, "logits/chosen": -2.8492586612701416, "logits/rejected": -2.323498249053955, "logps/chosen": -97.84083557128906, "logps/rejected": -838.6531982421875, "loss": 0.1583, "rewards/accuracies": 1.0, "rewards/chosen": -0.5206311345100403, "rewards/margins": 7.460585117340088, "rewards/rejected": -7.9812164306640625, "step": 60580 }, { "epoch": 0.73, "learning_rate": 1.0639893157017467e-06, "logits/chosen": -2.9047207832336426, "logits/rejected": -2.1936376094818115, "logps/chosen": -146.63589477539062, "logps/rejected": -1142.784912109375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9170452356338501, "rewards/margins": 10.090510368347168, "rewards/rejected": -11.00755500793457, "step": 60590 }, { "epoch": 0.73, "learning_rate": 1.0631343439827452e-06, "logits/chosen": -2.8814597129821777, "logits/rejected": -1.9983059167861938, "logps/chosen": -142.9755401611328, "logps/rejected": -1097.026123046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8566837310791016, "rewards/margins": 9.678884506225586, "rewards/rejected": -10.535568237304688, "step": 60600 }, { "epoch": 0.73, "learning_rate": 1.0622796231364969e-06, "logits/chosen": -2.87477707862854, "logits/rejected": -2.3654556274414062, "logps/chosen": -119.40641784667969, "logps/rejected": -842.2713623046875, "loss": 0.1058, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7116214036941528, "rewards/margins": 7.326972961425781, "rewards/rejected": -8.038593292236328, "step": 60610 }, { "epoch": 0.73, "learning_rate": 1.061425153312234e-06, "logits/chosen": -2.9360532760620117, "logits/rejected": -2.2068910598754883, "logps/chosen": -112.63334655761719, "logps/rejected": -1009.4158325195312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5802567601203918, "rewards/margins": 9.107339859008789, "rewards/rejected": -9.687597274780273, "step": 60620 }, { "epoch": 0.73, "learning_rate": 1.0605709346591427e-06, "logits/chosen": -2.9394094944000244, "logits/rejected": -2.3980910778045654, "logps/chosen": -97.44590759277344, "logps/rejected": -885.3636474609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5035573244094849, "rewards/margins": 7.964790344238281, "rewards/rejected": -8.468347549438477, "step": 60630 }, { "epoch": 0.73, "learning_rate": 1.0597169673263688e-06, "logits/chosen": -2.9030632972717285, "logits/rejected": -2.231196403503418, "logps/chosen": -122.83222961425781, "logps/rejected": -1011.4112548828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7046924829483032, "rewards/margins": 9.001775741577148, "rewards/rejected": -9.70646858215332, "step": 60640 }, { "epoch": 0.73, "learning_rate": 1.058863251463012e-06, "logits/chosen": -2.867180585861206, "logits/rejected": -2.256349563598633, "logps/chosen": -99.56755065917969, "logps/rejected": -1027.818603515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5049034953117371, "rewards/margins": 9.36631965637207, "rewards/rejected": -9.871223449707031, "step": 60650 }, { "epoch": 0.73, "learning_rate": 1.0580097872181289e-06, "logits/chosen": -2.9012465476989746, "logits/rejected": -2.1891074180603027, "logps/chosen": -120.43338775634766, "logps/rejected": -1062.7725830078125, "loss": 0.1128, "rewards/accuracies": 1.0, "rewards/chosen": -0.6737769842147827, "rewards/margins": 9.557680130004883, "rewards/rejected": -10.231456756591797, "step": 60660 }, { "epoch": 0.73, "learning_rate": 1.057156574740732e-06, "logits/chosen": -2.8977465629577637, "logits/rejected": -2.517145872116089, "logps/chosen": -95.75318908691406, "logps/rejected": -828.5147705078125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.5151809453964233, "rewards/margins": 7.393847465515137, "rewards/rejected": -7.909028053283691, "step": 60670 }, { "epoch": 0.73, "learning_rate": 1.0563036141797897e-06, "logits/chosen": -2.828329563140869, "logits/rejected": -2.044085741043091, "logps/chosen": -146.6122589111328, "logps/rejected": -1139.64111328125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8839000463485718, "rewards/margins": 10.091398239135742, "rewards/rejected": -10.975297927856445, "step": 60680 }, { "epoch": 0.73, "learning_rate": 1.055450905684228e-06, "logits/chosen": -2.8849451541900635, "logits/rejected": -2.190274715423584, "logps/chosen": -115.3370132446289, "logps/rejected": -1004.0107421875, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": -0.6908849477767944, "rewards/margins": 8.956486701965332, "rewards/rejected": -9.647371292114258, "step": 60690 }, { "epoch": 0.73, "learning_rate": 1.054598449402925e-06, "logits/chosen": -2.8699240684509277, "logits/rejected": -2.4386961460113525, "logps/chosen": -106.44051361083984, "logps/rejected": -879.5611572265625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.5558692216873169, "rewards/margins": 7.817713737487793, "rewards/rejected": -8.373583793640137, "step": 60700 }, { "epoch": 0.73, "learning_rate": 1.053746245484718e-06, "logits/chosen": -2.8268063068389893, "logits/rejected": -2.217776298522949, "logps/chosen": -126.23655700683594, "logps/rejected": -945.5340576171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6979008913040161, "rewards/margins": 8.350797653198242, "rewards/rejected": -9.048699378967285, "step": 60710 }, { "epoch": 0.73, "learning_rate": 1.0528942940783998e-06, "logits/chosen": -2.862653970718384, "logits/rejected": -2.421316623687744, "logps/chosen": -120.52592468261719, "logps/rejected": -847.7835693359375, "loss": 0.2206, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7862645983695984, "rewards/margins": 7.301912784576416, "rewards/rejected": -8.088176727294922, "step": 60720 }, { "epoch": 0.73, "learning_rate": 1.052042595332719e-06, "logits/chosen": -2.9152021408081055, "logits/rejected": -2.5498147010803223, "logps/chosen": -96.23954010009766, "logps/rejected": -902.68115234375, "loss": 0.0888, "rewards/accuracies": 1.0, "rewards/chosen": -0.5308391451835632, "rewards/margins": 8.114328384399414, "rewards/rejected": -8.645167350769043, "step": 60730 }, { "epoch": 0.73, "learning_rate": 1.0511911493963794e-06, "logits/chosen": -2.876400947570801, "logits/rejected": -2.262861728668213, "logps/chosen": -116.82160949707031, "logps/rejected": -955.3978271484375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6429592967033386, "rewards/margins": 8.510371208190918, "rewards/rejected": -9.153329849243164, "step": 60740 }, { "epoch": 0.73, "learning_rate": 1.050339956418042e-06, "logits/chosen": -2.9072048664093018, "logits/rejected": -2.5106804370880127, "logps/chosen": -70.1481704711914, "logps/rejected": -812.20361328125, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.3082306981086731, "rewards/margins": 7.43871545791626, "rewards/rejected": -7.746945381164551, "step": 60750 }, { "epoch": 0.73, "learning_rate": 1.0494890165463213e-06, "logits/chosen": -2.884533405303955, "logits/rejected": -2.386174440383911, "logps/chosen": -108.70777893066406, "logps/rejected": -875.5380859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6442474126815796, "rewards/margins": 7.731607913970947, "rewards/rejected": -8.375855445861816, "step": 60760 }, { "epoch": 0.73, "learning_rate": 1.0486383299297896e-06, "logits/chosen": -2.9202842712402344, "logits/rejected": -2.440103054046631, "logps/chosen": -98.09156799316406, "logps/rejected": -815.7059326171875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5032684803009033, "rewards/margins": 7.277249336242676, "rewards/rejected": -7.780517578125, "step": 60770 }, { "epoch": 0.73, "learning_rate": 1.047787896716974e-06, "logits/chosen": -2.9135193824768066, "logits/rejected": -2.3132762908935547, "logps/chosen": -121.30241394042969, "logps/rejected": -868.0935668945312, "loss": 0.0866, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7349704504013062, "rewards/margins": 7.553544521331787, "rewards/rejected": -8.288515090942383, "step": 60780 }, { "epoch": 0.73, "learning_rate": 1.046937717056359e-06, "logits/chosen": -2.861372232437134, "logits/rejected": -2.253633499145508, "logps/chosen": -108.1939697265625, "logps/rejected": -999.9122314453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5799354314804077, "rewards/margins": 9.033957481384277, "rewards/rejected": -9.613893508911133, "step": 60790 }, { "epoch": 0.73, "learning_rate": 1.0460877910963827e-06, "logits/chosen": -2.9055910110473633, "logits/rejected": -2.679291248321533, "logps/chosen": -70.61260223388672, "logps/rejected": -751.92578125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.3526146411895752, "rewards/margins": 6.795832633972168, "rewards/rejected": -7.1484479904174805, "step": 60800 }, { "epoch": 0.73, "learning_rate": 1.04523811898544e-06, "logits/chosen": -2.9070494174957275, "logits/rejected": -2.4350433349609375, "logps/chosen": -83.86559295654297, "logps/rejected": -860.9530029296875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.38579875230789185, "rewards/margins": 7.827568054199219, "rewards/rejected": -8.213366508483887, "step": 60810 }, { "epoch": 0.73, "learning_rate": 1.044388700871882e-06, "logits/chosen": -2.816270112991333, "logits/rejected": -2.2363171577453613, "logps/chosen": -94.3191909790039, "logps/rejected": -970.9177856445312, "loss": 0.1431, "rewards/accuracies": 1.0, "rewards/chosen": -0.4903610348701477, "rewards/margins": 8.82748031616211, "rewards/rejected": -9.317840576171875, "step": 60820 }, { "epoch": 0.73, "learning_rate": 1.0435395369040132e-06, "logits/chosen": -2.8550686836242676, "logits/rejected": -2.096033811569214, "logps/chosen": -117.75777435302734, "logps/rejected": -991.1770629882812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6464055180549622, "rewards/margins": 8.87195873260498, "rewards/rejected": -9.518363952636719, "step": 60830 }, { "epoch": 0.73, "learning_rate": 1.0426906272300963e-06, "logits/chosen": -2.928790807723999, "logits/rejected": -2.417564868927002, "logps/chosen": -90.93404388427734, "logps/rejected": -872.3327026367188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.43024325370788574, "rewards/margins": 7.90418004989624, "rewards/rejected": -8.334423065185547, "step": 60840 }, { "epoch": 0.73, "learning_rate": 1.041841971998348e-06, "logits/chosen": -2.8561453819274902, "logits/rejected": -2.418407917022705, "logps/chosen": -90.82130432128906, "logps/rejected": -876.2517700195312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4797891676425934, "rewards/margins": 7.892128944396973, "rewards/rejected": -8.371918678283691, "step": 60850 }, { "epoch": 0.73, "learning_rate": 1.040993571356942e-06, "logits/chosen": -2.8808324337005615, "logits/rejected": -2.3420355319976807, "logps/chosen": -91.15911865234375, "logps/rejected": -949.5830078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.49273982644081116, "rewards/margins": 8.620006561279297, "rewards/rejected": -9.112746238708496, "step": 60860 }, { "epoch": 0.73, "learning_rate": 1.0401454254540058e-06, "logits/chosen": -2.871798038482666, "logits/rejected": -1.8767106533050537, "logps/chosen": -152.7036590576172, "logps/rejected": -1040.7308349609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8925908803939819, "rewards/margins": 9.105291366577148, "rewards/rejected": -9.997883796691895, "step": 60870 }, { "epoch": 0.73, "learning_rate": 1.0392975344376241e-06, "logits/chosen": -2.9317336082458496, "logits/rejected": -2.4280917644500732, "logps/chosen": -96.74068450927734, "logps/rejected": -929.91943359375, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -0.5239154100418091, "rewards/margins": 8.396827697753906, "rewards/rejected": -8.920743942260742, "step": 60880 }, { "epoch": 0.73, "learning_rate": 1.0384498984558367e-06, "logits/chosen": -2.8959755897521973, "logits/rejected": -2.22171688079834, "logps/chosen": -114.70481872558594, "logps/rejected": -1011.3616333007812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6201084852218628, "rewards/margins": 9.088055610656738, "rewards/rejected": -9.708163261413574, "step": 60890 }, { "epoch": 0.73, "learning_rate": 1.0376025176566368e-06, "logits/chosen": -2.8467178344726562, "logits/rejected": -2.5616908073425293, "logps/chosen": -98.57877349853516, "logps/rejected": -824.0490112304688, "loss": 0.0216, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.605049729347229, "rewards/margins": 7.2650275230407715, "rewards/rejected": -7.870077610015869, "step": 60900 }, { "epoch": 0.73, "learning_rate": 1.0367553921879758e-06, "logits/chosen": -2.85978364944458, "logits/rejected": -2.337557077407837, "logps/chosen": -111.02587890625, "logps/rejected": -1011.7236328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6671051979064941, "rewards/margins": 9.060052871704102, "rewards/rejected": -9.727158546447754, "step": 60910 }, { "epoch": 0.73, "learning_rate": 1.0359085221977594e-06, "logits/chosen": -2.893181085586548, "logits/rejected": -2.1838696002960205, "logps/chosen": -108.73421478271484, "logps/rejected": -1004.2523193359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5719051957130432, "rewards/margins": 9.079278945922852, "rewards/rejected": -9.651185035705566, "step": 60920 }, { "epoch": 0.73, "learning_rate": 1.0350619078338483e-06, "logits/chosen": -2.8707022666931152, "logits/rejected": -2.305676221847534, "logps/chosen": -87.62349700927734, "logps/rejected": -947.4971923828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.46261510252952576, "rewards/margins": 8.626934051513672, "rewards/rejected": -9.08954906463623, "step": 60930 }, { "epoch": 0.73, "learning_rate": 1.0342155492440598e-06, "logits/chosen": -2.925095558166504, "logits/rejected": -2.612269639968872, "logps/chosen": -83.26210021972656, "logps/rejected": -863.5021362304688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.41920456290245056, "rewards/margins": 7.839892387390137, "rewards/rejected": -8.259098052978516, "step": 60940 }, { "epoch": 0.73, "learning_rate": 1.0333694465761663e-06, "logits/chosen": -2.8699283599853516, "logits/rejected": -2.390554904937744, "logps/chosen": -118.2139892578125, "logps/rejected": -960.8896484375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7159547209739685, "rewards/margins": 8.504132270812988, "rewards/rejected": -9.220086097717285, "step": 60950 }, { "epoch": 0.73, "learning_rate": 1.032523599977893e-06, "logits/chosen": -2.8712573051452637, "logits/rejected": -2.1232495307922363, "logps/chosen": -133.57334899902344, "logps/rejected": -992.6843872070312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8041221499443054, "rewards/margins": 8.699586868286133, "rewards/rejected": -9.50370979309082, "step": 60960 }, { "epoch": 0.73, "learning_rate": 1.0316780095969223e-06, "logits/chosen": -2.921276569366455, "logits/rejected": -2.428287982940674, "logps/chosen": -97.58757019042969, "logps/rejected": -944.0185546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5379582643508911, "rewards/margins": 8.511297225952148, "rewards/rejected": -9.04925537109375, "step": 60970 }, { "epoch": 0.73, "learning_rate": 1.030832675580894e-06, "logits/chosen": -2.8939075469970703, "logits/rejected": -2.430448055267334, "logps/chosen": -80.56351470947266, "logps/rejected": -855.2047119140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.35219115018844604, "rewards/margins": 7.8293561935424805, "rewards/rejected": -8.181547164916992, "step": 60980 }, { "epoch": 0.73, "learning_rate": 1.0299875980774002e-06, "logits/chosen": -2.8420987129211426, "logits/rejected": -2.1998064517974854, "logps/chosen": -128.35955810546875, "logps/rejected": -979.3280029296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7767314910888672, "rewards/margins": 8.611021041870117, "rewards/rejected": -9.3877534866333, "step": 60990 }, { "epoch": 0.73, "learning_rate": 1.0291427772339891e-06, "logits/chosen": -2.8661818504333496, "logits/rejected": -2.4681670665740967, "logps/chosen": -88.60255432128906, "logps/rejected": -864.6488037109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4917585253715515, "rewards/margins": 7.774563789367676, "rewards/rejected": -8.26632308959961, "step": 61000 }, { "epoch": 0.73, "learning_rate": 1.0282982131981648e-06, "logits/chosen": -2.9147236347198486, "logits/rejected": -2.4210002422332764, "logps/chosen": -114.74476623535156, "logps/rejected": -884.3674926757812, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6857466101646423, "rewards/margins": 7.7597784996032715, "rewards/rejected": -8.445526123046875, "step": 61010 }, { "epoch": 0.73, "learning_rate": 1.0274539061173841e-06, "logits/chosen": -2.888092279434204, "logits/rejected": -2.262634754180908, "logps/chosen": -117.6941909790039, "logps/rejected": -1040.778564453125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6154953241348267, "rewards/margins": 9.393056869506836, "rewards/rejected": -10.008554458618164, "step": 61020 }, { "epoch": 0.73, "learning_rate": 1.0266098561390614e-06, "logits/chosen": -2.8978798389434814, "logits/rejected": -2.5473008155822754, "logps/chosen": -77.60186004638672, "logps/rejected": -795.856689453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3793710470199585, "rewards/margins": 7.207376003265381, "rewards/rejected": -7.586747646331787, "step": 61030 }, { "epoch": 0.73, "learning_rate": 1.0257660634105656e-06, "logits/chosen": -2.917120933532715, "logits/rejected": -2.5932276248931885, "logps/chosen": -101.44876861572266, "logps/rejected": -899.4177856445312, "loss": 0.1208, "rewards/accuracies": 1.0, "rewards/chosen": -0.6399660110473633, "rewards/margins": 7.972440242767334, "rewards/rejected": -8.612405776977539, "step": 61040 }, { "epoch": 0.73, "learning_rate": 1.024922528079221e-06, "logits/chosen": -2.8858675956726074, "logits/rejected": -2.1222286224365234, "logps/chosen": -124.36653900146484, "logps/rejected": -929.0260009765625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6779057383537292, "rewards/margins": 8.220808982849121, "rewards/rejected": -8.898715019226074, "step": 61050 }, { "epoch": 0.73, "learning_rate": 1.0240792502923058e-06, "logits/chosen": -2.8470020294189453, "logits/rejected": -2.0805530548095703, "logps/chosen": -129.2334747314453, "logps/rejected": -1019.806640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6722273826599121, "rewards/margins": 9.12790298461914, "rewards/rejected": -9.800129890441895, "step": 61060 }, { "epoch": 0.73, "learning_rate": 1.0232362301970556e-06, "logits/chosen": -2.876718521118164, "logits/rejected": -2.2892508506774902, "logps/chosen": -135.72689819335938, "logps/rejected": -950.7540283203125, "loss": 0.1055, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8872312307357788, "rewards/margins": 8.222440719604492, "rewards/rejected": -9.109670639038086, "step": 61070 }, { "epoch": 0.73, "learning_rate": 1.0223934679406568e-06, "logits/chosen": -2.8644330501556396, "logits/rejected": -2.0875065326690674, "logps/chosen": -136.19564819335938, "logps/rejected": -1092.8135986328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8269282579421997, "rewards/margins": 9.673761367797852, "rewards/rejected": -10.500689506530762, "step": 61080 }, { "epoch": 0.73, "learning_rate": 1.0215509636702548e-06, "logits/chosen": -2.9020862579345703, "logits/rejected": -2.5546905994415283, "logps/chosen": -80.0325698852539, "logps/rejected": -819.8681640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.386622816324234, "rewards/margins": 7.447742462158203, "rewards/rejected": -7.8343658447265625, "step": 61090 }, { "epoch": 0.73, "learning_rate": 1.0207087175329483e-06, "logits/chosen": -2.857060194015503, "logits/rejected": -2.0254287719726562, "logps/chosen": -144.59918212890625, "logps/rejected": -1024.4588623046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8990647196769714, "rewards/margins": 8.941268920898438, "rewards/rejected": -9.840332984924316, "step": 61100 }, { "epoch": 0.73, "learning_rate": 1.0198667296757911e-06, "logits/chosen": -2.880242109298706, "logits/rejected": -2.5566773414611816, "logps/chosen": -72.62632751464844, "logps/rejected": -853.1931762695312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3457198441028595, "rewards/margins": 7.807431221008301, "rewards/rejected": -8.15315055847168, "step": 61110 }, { "epoch": 0.73, "learning_rate": 1.019025000245792e-06, "logits/chosen": -2.874140501022339, "logits/rejected": -2.1885337829589844, "logps/chosen": -115.56687927246094, "logps/rejected": -1110.5501708984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6596855521202087, "rewards/margins": 10.048023223876953, "rewards/rejected": -10.707708358764648, "step": 61120 }, { "epoch": 0.73, "learning_rate": 1.0181835293899148e-06, "logits/chosen": -2.8780932426452637, "logits/rejected": -2.407590389251709, "logps/chosen": -95.63279724121094, "logps/rejected": -925.40771484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5581844449043274, "rewards/margins": 8.29779052734375, "rewards/rejected": -8.855974197387695, "step": 61130 }, { "epoch": 0.73, "learning_rate": 1.0173423172550783e-06, "logits/chosen": -2.8363852500915527, "logits/rejected": -2.1982979774475098, "logps/chosen": -122.00970458984375, "logps/rejected": -1005.5572509765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7495815753936768, "rewards/margins": 8.910713195800781, "rewards/rejected": -9.660295486450195, "step": 61140 }, { "epoch": 0.73, "learning_rate": 1.0165013639881549e-06, "logits/chosen": -2.9009485244750977, "logits/rejected": -2.2041237354278564, "logps/chosen": -128.85585021972656, "logps/rejected": -903.3284912109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7743188142776489, "rewards/margins": 7.876849174499512, "rewards/rejected": -8.651168823242188, "step": 61150 }, { "epoch": 0.73, "learning_rate": 1.0156606697359728e-06, "logits/chosen": -2.8824710845947266, "logits/rejected": -2.4815850257873535, "logps/chosen": -88.31288146972656, "logps/rejected": -920.9845581054688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.45091715455055237, "rewards/margins": 8.364629745483398, "rewards/rejected": -8.815546035766602, "step": 61160 }, { "epoch": 0.73, "learning_rate": 1.0148202346453152e-06, "logits/chosen": -2.8749821186065674, "logits/rejected": -2.3040974140167236, "logps/chosen": -107.25279235839844, "logps/rejected": -935.6015625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5820160508155823, "rewards/margins": 8.386372566223145, "rewards/rejected": -8.968388557434082, "step": 61170 }, { "epoch": 0.73, "learning_rate": 1.0139800588629197e-06, "logits/chosen": -2.87731671333313, "logits/rejected": -2.335402488708496, "logps/chosen": -105.89442443847656, "logps/rejected": -942.6329956054688, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.5498811602592468, "rewards/margins": 8.486610412597656, "rewards/rejected": -9.036490440368652, "step": 61180 }, { "epoch": 0.73, "learning_rate": 1.013140142535479e-06, "logits/chosen": -2.8535549640655518, "logits/rejected": -2.028987407684326, "logps/chosen": -136.83859252929688, "logps/rejected": -1093.8541259765625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.7863109707832336, "rewards/margins": 9.747562408447266, "rewards/rejected": -10.533872604370117, "step": 61190 }, { "epoch": 0.73, "learning_rate": 1.0123004858096394e-06, "logits/chosen": -2.8570945262908936, "logits/rejected": -2.576961040496826, "logps/chosen": -141.64089965820312, "logps/rejected": -790.8155517578125, "loss": 0.275, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0262755155563354, "rewards/margins": 6.510832786560059, "rewards/rejected": -7.537109375, "step": 61200 }, { "epoch": 0.73, "learning_rate": 1.0114610888320043e-06, "logits/chosen": -2.9197492599487305, "logits/rejected": -2.30083966255188, "logps/chosen": -119.5265121459961, "logps/rejected": -959.8585815429688, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.7261368632316589, "rewards/margins": 8.471647262573242, "rewards/rejected": -9.197782516479492, "step": 61210 }, { "epoch": 0.73, "learning_rate": 1.0106219517491278e-06, "logits/chosen": -2.9130239486694336, "logits/rejected": -2.415360689163208, "logps/chosen": -123.976318359375, "logps/rejected": -800.1653442382812, "loss": 0.1062, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7371007800102234, "rewards/margins": 6.876443386077881, "rewards/rejected": -7.613543510437012, "step": 61220 }, { "epoch": 0.73, "learning_rate": 1.0097830747075218e-06, "logits/chosen": -2.840121030807495, "logits/rejected": -2.5628581047058105, "logps/chosen": -84.74927520751953, "logps/rejected": -859.4293823242188, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -0.48677486181259155, "rewards/margins": 7.726380825042725, "rewards/rejected": -8.213155746459961, "step": 61230 }, { "epoch": 0.73, "learning_rate": 1.0089444578536523e-06, "logits/chosen": -2.84822940826416, "logits/rejected": -2.238668918609619, "logps/chosen": -96.62767791748047, "logps/rejected": -844.0953369140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5124310255050659, "rewards/margins": 7.559731960296631, "rewards/rejected": -8.072163581848145, "step": 61240 }, { "epoch": 0.73, "learning_rate": 1.008106101333939e-06, "logits/chosen": -2.885847568511963, "logits/rejected": -2.111314058303833, "logps/chosen": -129.03927612304688, "logps/rejected": -1075.3812255859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.745901882648468, "rewards/margins": 9.597046852111816, "rewards/rejected": -10.342947959899902, "step": 61250 }, { "epoch": 0.73, "learning_rate": 1.0072680052947573e-06, "logits/chosen": -2.934784173965454, "logits/rejected": -2.347407817840576, "logps/chosen": -102.26253509521484, "logps/rejected": -915.5185546875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.5730565786361694, "rewards/margins": 8.210087776184082, "rewards/rejected": -8.783143997192383, "step": 61260 }, { "epoch": 0.73, "learning_rate": 1.0064301698824365e-06, "logits/chosen": -2.8257360458374023, "logits/rejected": -2.4472532272338867, "logps/chosen": -122.09320068359375, "logps/rejected": -874.8585815429688, "loss": 0.1412, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8091572523117065, "rewards/margins": 7.542845726013184, "rewards/rejected": -8.352005004882812, "step": 61270 }, { "epoch": 0.73, "learning_rate": 1.005592595243259e-06, "logits/chosen": -2.8633627891540527, "logits/rejected": -2.365833044052124, "logps/chosen": -93.60911560058594, "logps/rejected": -891.2214965820312, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.4621802270412445, "rewards/margins": 8.061932563781738, "rewards/rejected": -8.524113655090332, "step": 61280 }, { "epoch": 0.73, "learning_rate": 1.0047552815234638e-06, "logits/chosen": -2.883711099624634, "logits/rejected": -2.217115879058838, "logps/chosen": -149.36898803710938, "logps/rejected": -1036.06884765625, "loss": 0.1257, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9986379742622375, "rewards/margins": 8.96000862121582, "rewards/rejected": -9.958646774291992, "step": 61290 }, { "epoch": 0.73, "learning_rate": 1.0039182288692437e-06, "logits/chosen": -2.837385654449463, "logits/rejected": -2.2985947132110596, "logps/chosen": -93.18157958984375, "logps/rejected": -889.3484497070312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.47560063004493713, "rewards/margins": 8.052068710327148, "rewards/rejected": -8.527669906616211, "step": 61300 }, { "epoch": 0.73, "learning_rate": 1.0030814374267457e-06, "logits/chosen": -2.9242610931396484, "logits/rejected": -2.079542636871338, "logps/chosen": -124.62738037109375, "logps/rejected": -916.279296875, "loss": 0.0269, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7594456672668457, "rewards/margins": 8.002300262451172, "rewards/rejected": -8.761744499206543, "step": 61310 }, { "epoch": 0.73, "learning_rate": 1.002244907342071e-06, "logits/chosen": -2.910670757293701, "logits/rejected": -2.359513759613037, "logps/chosen": -99.03617858886719, "logps/rejected": -883.9410400390625, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -0.51207435131073, "rewards/margins": 7.936471462249756, "rewards/rejected": -8.448546409606934, "step": 61320 }, { "epoch": 0.73, "learning_rate": 1.0014086387612756e-06, "logits/chosen": -2.887666940689087, "logits/rejected": -2.157010078430176, "logps/chosen": -122.69561767578125, "logps/rejected": -1037.4566650390625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6854352951049805, "rewards/margins": 9.293779373168945, "rewards/rejected": -9.979214668273926, "step": 61330 }, { "epoch": 0.73, "learning_rate": 1.0005726318303707e-06, "logits/chosen": -2.8612000942230225, "logits/rejected": -2.2756574153900146, "logps/chosen": -133.67648315429688, "logps/rejected": -993.2750854492188, "loss": 0.1411, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8632612228393555, "rewards/margins": 8.674748420715332, "rewards/rejected": -9.538009643554688, "step": 61340 }, { "epoch": 0.73, "learning_rate": 9.99736886695319e-07, "logits/chosen": -2.8633782863616943, "logits/rejected": -2.210056781768799, "logps/chosen": -153.1700439453125, "logps/rejected": -875.6005859375, "loss": 0.2638, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9986980557441711, "rewards/margins": 7.3656415939331055, "rewards/rejected": -8.364337921142578, "step": 61350 }, { "epoch": 0.73, "learning_rate": 9.9890140350204e-07, "logits/chosen": -2.8981637954711914, "logits/rejected": -2.4749648571014404, "logps/chosen": -88.90426635742188, "logps/rejected": -790.6354370117188, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.4137519299983978, "rewards/margins": 7.1208086013793945, "rewards/rejected": -7.534559726715088, "step": 61360 }, { "epoch": 0.73, "learning_rate": 9.98066182396407e-07, "logits/chosen": -2.843989849090576, "logits/rejected": -2.3019375801086426, "logps/chosen": -108.23990631103516, "logps/rejected": -898.7191162109375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6339560747146606, "rewards/margins": 7.970863342285156, "rewards/rejected": -8.604820251464844, "step": 61370 }, { "epoch": 0.73, "learning_rate": 9.972312235242467e-07, "logits/chosen": -2.848083972930908, "logits/rejected": -2.3402342796325684, "logps/chosen": -84.89615631103516, "logps/rejected": -856.4279174804688, "loss": 0.0872, "rewards/accuracies": 1.0, "rewards/chosen": -0.4208998680114746, "rewards/margins": 7.7626633644104, "rewards/rejected": -8.183564186096191, "step": 61380 }, { "epoch": 0.73, "learning_rate": 9.963965270313414e-07, "logits/chosen": -2.9488461017608643, "logits/rejected": -2.3934383392333984, "logps/chosen": -110.13240051269531, "logps/rejected": -957.3575439453125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5606541037559509, "rewards/margins": 8.623563766479492, "rewards/rejected": -9.18421745300293, "step": 61390 }, { "epoch": 0.73, "learning_rate": 9.955620930634263e-07, "logits/chosen": -2.898536443710327, "logits/rejected": -2.5723202228546143, "logps/chosen": -81.5694808959961, "logps/rejected": -778.0420532226562, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4364245533943176, "rewards/margins": 6.973330497741699, "rewards/rejected": -7.409754753112793, "step": 61400 }, { "epoch": 0.74, "learning_rate": 9.947279217661914e-07, "logits/chosen": -2.866704225540161, "logits/rejected": -2.3027970790863037, "logps/chosen": -99.37442779541016, "logps/rejected": -972.37548828125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.5581472516059875, "rewards/margins": 8.76974868774414, "rewards/rejected": -9.327895164489746, "step": 61410 }, { "epoch": 0.74, "learning_rate": 9.938940132852806e-07, "logits/chosen": -2.882272243499756, "logits/rejected": -2.444843053817749, "logps/chosen": -107.7472152709961, "logps/rejected": -933.6184692382812, "loss": 0.1314, "rewards/accuracies": 1.0, "rewards/chosen": -0.6351019740104675, "rewards/margins": 8.318613052368164, "rewards/rejected": -8.953714370727539, "step": 61420 }, { "epoch": 0.74, "learning_rate": 9.93060367766292e-07, "logits/chosen": -2.8296380043029785, "logits/rejected": -2.360373020172119, "logps/chosen": -105.83243560791016, "logps/rejected": -936.3763427734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6056958436965942, "rewards/margins": 8.369834899902344, "rewards/rejected": -8.975530624389648, "step": 61430 }, { "epoch": 0.74, "learning_rate": 9.922269853547778e-07, "logits/chosen": -2.936875820159912, "logits/rejected": -2.591303586959839, "logps/chosen": -79.15973663330078, "logps/rejected": -798.4380493164062, "loss": 0.1194, "rewards/accuracies": 1.0, "rewards/chosen": -0.3758448660373688, "rewards/margins": 7.2484564781188965, "rewards/rejected": -7.624300956726074, "step": 61440 }, { "epoch": 0.74, "learning_rate": 9.913938661962443e-07, "logits/chosen": -2.8934550285339355, "logits/rejected": -2.6257338523864746, "logps/chosen": -65.43443298339844, "logps/rejected": -744.3494262695312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.25502297282218933, "rewards/margins": 6.8342390060424805, "rewards/rejected": -7.089262962341309, "step": 61450 }, { "epoch": 0.74, "learning_rate": 9.905610104361528e-07, "logits/chosen": -2.8885514736175537, "logits/rejected": -2.4952445030212402, "logps/chosen": -79.05196380615234, "logps/rejected": -800.847412109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.35842546820640564, "rewards/margins": 7.275017738342285, "rewards/rejected": -7.6334428787231445, "step": 61460 }, { "epoch": 0.74, "learning_rate": 9.897284182199158e-07, "logits/chosen": -2.9219448566436768, "logits/rejected": -2.1663851737976074, "logps/chosen": -134.23667907714844, "logps/rejected": -1131.0440673828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7605099678039551, "rewards/margins": 10.138853073120117, "rewards/rejected": -10.899362564086914, "step": 61470 }, { "epoch": 0.74, "learning_rate": 9.88896089692902e-07, "logits/chosen": -2.831444263458252, "logits/rejected": -2.2961666584014893, "logps/chosen": -144.69203186035156, "logps/rejected": -889.0872802734375, "loss": 0.191, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9812862277030945, "rewards/margins": 7.526776313781738, "rewards/rejected": -8.508062362670898, "step": 61480 }, { "epoch": 0.74, "learning_rate": 9.880640250004345e-07, "logits/chosen": -2.864794969558716, "logits/rejected": -2.341507911682129, "logps/chosen": -95.12196350097656, "logps/rejected": -908.8810424804688, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -0.4695375859737396, "rewards/margins": 8.245643615722656, "rewards/rejected": -8.715181350708008, "step": 61490 }, { "epoch": 0.74, "learning_rate": 9.872322242877886e-07, "logits/chosen": -2.850898265838623, "logits/rejected": -2.147016763687134, "logps/chosen": -121.0508041381836, "logps/rejected": -996.3820190429688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7401286363601685, "rewards/margins": 8.827607154846191, "rewards/rejected": -9.56773567199707, "step": 61500 }, { "epoch": 0.74, "learning_rate": 9.86400687700195e-07, "logits/chosen": -2.876986265182495, "logits/rejected": -2.304539918899536, "logps/chosen": -123.45616149902344, "logps/rejected": -1070.0125732421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7550113797187805, "rewards/margins": 9.542261123657227, "rewards/rejected": -10.297272682189941, "step": 61510 }, { "epoch": 0.74, "learning_rate": 9.855694153828373e-07, "logits/chosen": -2.8576295375823975, "logits/rejected": -2.3690340518951416, "logps/chosen": -105.67451477050781, "logps/rejected": -936.4163818359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5882381200790405, "rewards/margins": 8.385543823242188, "rewards/rejected": -8.973783493041992, "step": 61520 }, { "epoch": 0.74, "learning_rate": 9.847384074808546e-07, "logits/chosen": -2.885836124420166, "logits/rejected": -2.1033504009246826, "logps/chosen": -123.49223327636719, "logps/rejected": -945.3058471679688, "loss": 0.0793, "rewards/accuracies": 1.0, "rewards/chosen": -0.6526419520378113, "rewards/margins": 8.407705307006836, "rewards/rejected": -9.060346603393555, "step": 61530 }, { "epoch": 0.74, "learning_rate": 9.839076641393366e-07, "logits/chosen": -2.9105923175811768, "logits/rejected": -2.415818452835083, "logps/chosen": -98.22895812988281, "logps/rejected": -880.4315185546875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5792756080627441, "rewards/margins": 7.8489990234375, "rewards/rejected": -8.428274154663086, "step": 61540 }, { "epoch": 0.74, "learning_rate": 9.830771855033297e-07, "logits/chosen": -2.870880126953125, "logits/rejected": -2.2342183589935303, "logps/chosen": -121.35762023925781, "logps/rejected": -972.6700439453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6995700001716614, "rewards/margins": 8.619380950927734, "rewards/rejected": -9.318951606750488, "step": 61550 }, { "epoch": 0.74, "learning_rate": 9.822469717178332e-07, "logits/chosen": -2.836540699005127, "logits/rejected": -2.2013659477233887, "logps/chosen": -103.93821716308594, "logps/rejected": -935.85205078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5405124425888062, "rewards/margins": 8.432254791259766, "rewards/rejected": -8.972765922546387, "step": 61560 }, { "epoch": 0.74, "learning_rate": 9.814170229278e-07, "logits/chosen": -2.8714778423309326, "logits/rejected": -2.337085247039795, "logps/chosen": -107.87178039550781, "logps/rejected": -882.4691162109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5573240518569946, "rewards/margins": 7.871049404144287, "rewards/rejected": -8.428372383117676, "step": 61570 }, { "epoch": 0.74, "learning_rate": 9.805873392781367e-07, "logits/chosen": -2.882983922958374, "logits/rejected": -2.368612766265869, "logps/chosen": -90.4380111694336, "logps/rejected": -876.7593994140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.45947209000587463, "rewards/margins": 7.923449516296387, "rewards/rejected": -8.382922172546387, "step": 61580 }, { "epoch": 0.74, "learning_rate": 9.79757920913705e-07, "logits/chosen": -2.8760499954223633, "logits/rejected": -2.0562195777893066, "logps/chosen": -137.59194946289062, "logps/rejected": -978.3786010742188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8273491859436035, "rewards/margins": 8.546852111816406, "rewards/rejected": -9.374200820922852, "step": 61590 }, { "epoch": 0.74, "learning_rate": 9.789287679793172e-07, "logits/chosen": -2.891045331954956, "logits/rejected": -2.264343023300171, "logps/chosen": -109.03597259521484, "logps/rejected": -985.6819458007812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5840095281600952, "rewards/margins": 8.881179809570312, "rewards/rejected": -9.465188980102539, "step": 61600 }, { "epoch": 0.74, "learning_rate": 9.78099880619742e-07, "logits/chosen": -2.909428119659424, "logits/rejected": -2.6777734756469727, "logps/chosen": -70.1348876953125, "logps/rejected": -810.2760620117188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.33582696318626404, "rewards/margins": 7.3973798751831055, "rewards/rejected": -7.733206272125244, "step": 61610 }, { "epoch": 0.74, "learning_rate": 9.772712589797006e-07, "logits/chosen": -2.8980419635772705, "logits/rejected": -2.5194077491760254, "logps/chosen": -92.05867004394531, "logps/rejected": -854.6985473632812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.49326831102371216, "rewards/margins": 7.671576499938965, "rewards/rejected": -8.164844512939453, "step": 61620 }, { "epoch": 0.74, "learning_rate": 9.76442903203868e-07, "logits/chosen": -2.8438944816589355, "logits/rejected": -2.2100112438201904, "logps/chosen": -101.03778076171875, "logps/rejected": -894.0953369140625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4937240481376648, "rewards/margins": 8.074694633483887, "rewards/rejected": -8.568419456481934, "step": 61630 }, { "epoch": 0.74, "learning_rate": 9.756148134368734e-07, "logits/chosen": -2.8549530506134033, "logits/rejected": -2.3291878700256348, "logps/chosen": -109.79673767089844, "logps/rejected": -982.2449340820312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5690490007400513, "rewards/margins": 8.845526695251465, "rewards/rejected": -9.414576530456543, "step": 61640 }, { "epoch": 0.74, "learning_rate": 9.747869898232983e-07, "logits/chosen": -2.8908891677856445, "logits/rejected": -2.405998706817627, "logps/chosen": -100.40386962890625, "logps/rejected": -855.6748046875, "loss": 0.1183, "rewards/accuracies": 1.0, "rewards/chosen": -0.5529254674911499, "rewards/margins": 7.613900661468506, "rewards/rejected": -8.166826248168945, "step": 61650 }, { "epoch": 0.74, "learning_rate": 9.739594325076795e-07, "logits/chosen": -2.86069917678833, "logits/rejected": -2.124833583831787, "logps/chosen": -142.63230895996094, "logps/rejected": -1119.7623291015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8893495798110962, "rewards/margins": 9.891931533813477, "rewards/rejected": -10.781279563903809, "step": 61660 }, { "epoch": 0.74, "learning_rate": 9.731321416345046e-07, "logits/chosen": -2.8716490268707275, "logits/rejected": -2.06010103225708, "logps/chosen": -131.45599365234375, "logps/rejected": -984.0446166992188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7161151170730591, "rewards/margins": 8.724035263061523, "rewards/rejected": -9.440150260925293, "step": 61670 }, { "epoch": 0.74, "learning_rate": 9.723051173482174e-07, "logits/chosen": -2.946180820465088, "logits/rejected": -2.143916130065918, "logps/chosen": -124.65165710449219, "logps/rejected": -1063.902587890625, "loss": 0.164, "rewards/accuracies": 1.0, "rewards/chosen": -0.7220556139945984, "rewards/margins": 9.503340721130371, "rewards/rejected": -10.225397109985352, "step": 61680 }, { "epoch": 0.74, "learning_rate": 9.714783597932134e-07, "logits/chosen": -2.871925115585327, "logits/rejected": -2.3554680347442627, "logps/chosen": -93.43124389648438, "logps/rejected": -828.4573364257812, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -0.5117336511611938, "rewards/margins": 7.405812740325928, "rewards/rejected": -7.91754674911499, "step": 61690 }, { "epoch": 0.74, "learning_rate": 9.706518691138426e-07, "logits/chosen": -2.865037441253662, "logits/rejected": -2.3863847255706787, "logps/chosen": -135.66297912597656, "logps/rejected": -923.1970825195312, "loss": 0.1223, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.882038414478302, "rewards/margins": 7.967474937438965, "rewards/rejected": -8.849514961242676, "step": 61700 }, { "epoch": 0.74, "learning_rate": 9.698256454544077e-07, "logits/chosen": -2.897514820098877, "logits/rejected": -2.3192696571350098, "logps/chosen": -93.56414031982422, "logps/rejected": -915.9205322265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.44837960600852966, "rewards/margins": 8.330394744873047, "rewards/rejected": -8.778776168823242, "step": 61710 }, { "epoch": 0.74, "learning_rate": 9.689996889591652e-07, "logits/chosen": -2.900702953338623, "logits/rejected": -2.1729934215545654, "logps/chosen": -144.85227966308594, "logps/rejected": -984.513671875, "loss": 0.1467, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8911963701248169, "rewards/margins": 8.53971004486084, "rewards/rejected": -9.430907249450684, "step": 61720 }, { "epoch": 0.74, "learning_rate": 9.68173999772326e-07, "logits/chosen": -2.8581624031066895, "logits/rejected": -2.375269651412964, "logps/chosen": -105.88627624511719, "logps/rejected": -923.8087768554688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6063792705535889, "rewards/margins": 8.249456405639648, "rewards/rejected": -8.8558349609375, "step": 61730 }, { "epoch": 0.74, "learning_rate": 9.67348578038051e-07, "logits/chosen": -2.8341543674468994, "logits/rejected": -2.414071559906006, "logps/chosen": -95.76921081542969, "logps/rejected": -888.1687622070312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5216027498245239, "rewards/margins": 7.976907253265381, "rewards/rejected": -8.498510360717773, "step": 61740 }, { "epoch": 0.74, "learning_rate": 9.665234239004574e-07, "logits/chosen": -2.907717227935791, "logits/rejected": -2.368319034576416, "logps/chosen": -98.6282730102539, "logps/rejected": -986.9430541992188, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.507713258266449, "rewards/margins": 8.972210884094238, "rewards/rejected": -9.479923248291016, "step": 61750 }, { "epoch": 0.74, "learning_rate": 9.656985375036149e-07, "logits/chosen": -2.8997128009796143, "logits/rejected": -2.3773179054260254, "logps/chosen": -106.00328063964844, "logps/rejected": -878.6539306640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.546829104423523, "rewards/margins": 7.831961154937744, "rewards/rejected": -8.378788948059082, "step": 61760 }, { "epoch": 0.74, "learning_rate": 9.648739189915466e-07, "logits/chosen": -2.887390613555908, "logits/rejected": -2.492705821990967, "logps/chosen": -84.56474304199219, "logps/rejected": -923.6272583007812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4224996566772461, "rewards/margins": 8.441558837890625, "rewards/rejected": -8.864058494567871, "step": 61770 }, { "epoch": 0.74, "learning_rate": 9.640495685082283e-07, "logits/chosen": -2.8480589389801025, "logits/rejected": -2.4413199424743652, "logps/chosen": -109.02410888671875, "logps/rejected": -839.4510498046875, "loss": 0.0761, "rewards/accuracies": 1.0, "rewards/chosen": -0.6290919780731201, "rewards/margins": 7.384031772613525, "rewards/rejected": -8.013124465942383, "step": 61780 }, { "epoch": 0.74, "learning_rate": 9.632254861975904e-07, "logits/chosen": -2.9013864994049072, "logits/rejected": -2.496507167816162, "logps/chosen": -83.56086730957031, "logps/rejected": -935.2726440429688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4164115786552429, "rewards/margins": 8.547428131103516, "rewards/rejected": -8.96384048461914, "step": 61790 }, { "epoch": 0.74, "learning_rate": 9.624016722035134e-07, "logits/chosen": -2.8688416481018066, "logits/rejected": -2.307887315750122, "logps/chosen": -124.69222259521484, "logps/rejected": -945.9853515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7564480304718018, "rewards/margins": 8.306890487670898, "rewards/rejected": -9.063339233398438, "step": 61800 }, { "epoch": 0.74, "learning_rate": 9.615781266698331e-07, "logits/chosen": -2.8706774711608887, "logits/rejected": -2.4688258171081543, "logps/chosen": -90.10533905029297, "logps/rejected": -938.76708984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4715670645236969, "rewards/margins": 8.535440444946289, "rewards/rejected": -9.007007598876953, "step": 61810 }, { "epoch": 0.74, "learning_rate": 9.607548497403398e-07, "logits/chosen": -2.8985087871551514, "logits/rejected": -2.491753101348877, "logps/chosen": -84.7686767578125, "logps/rejected": -850.9671630859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4147142767906189, "rewards/margins": 7.699191093444824, "rewards/rejected": -8.11390495300293, "step": 61820 }, { "epoch": 0.74, "learning_rate": 9.59931841558775e-07, "logits/chosen": -2.9390311241149902, "logits/rejected": -2.2359583377838135, "logps/chosen": -122.72770690917969, "logps/rejected": -961.3113403320312, "loss": 0.0849, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6531943082809448, "rewards/margins": 8.577842712402344, "rewards/rejected": -9.231037139892578, "step": 61830 }, { "epoch": 0.74, "learning_rate": 9.59109102268834e-07, "logits/chosen": -2.9251186847686768, "logits/rejected": -2.3879432678222656, "logps/chosen": -90.56776428222656, "logps/rejected": -890.0894775390625, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": -0.42476686835289, "rewards/margins": 8.093656539916992, "rewards/rejected": -8.518424034118652, "step": 61840 }, { "epoch": 0.74, "learning_rate": 9.582866320141632e-07, "logits/chosen": -2.8882155418395996, "logits/rejected": -2.5846965312957764, "logps/chosen": -111.42216491699219, "logps/rejected": -818.5526123046875, "loss": 0.0799, "rewards/accuracies": 1.0, "rewards/chosen": -0.7046043276786804, "rewards/margins": 7.111982822418213, "rewards/rejected": -7.816587924957275, "step": 61850 }, { "epoch": 0.74, "learning_rate": 9.574644309383646e-07, "logits/chosen": -2.8605074882507324, "logits/rejected": -2.4477486610412598, "logps/chosen": -88.75889587402344, "logps/rejected": -861.9459228515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4826142191886902, "rewards/margins": 7.753439426422119, "rewards/rejected": -8.236053466796875, "step": 61860 }, { "epoch": 0.74, "learning_rate": 9.56642499184992e-07, "logits/chosen": -2.883028030395508, "logits/rejected": -2.1760904788970947, "logps/chosen": -113.48211669921875, "logps/rejected": -1101.7537841796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6410384774208069, "rewards/margins": 9.980437278747559, "rewards/rejected": -10.621476173400879, "step": 61870 }, { "epoch": 0.74, "learning_rate": 9.55820836897553e-07, "logits/chosen": -2.8724541664123535, "logits/rejected": -2.5633559226989746, "logps/chosen": -75.80094909667969, "logps/rejected": -875.9613037109375, "loss": 0.1307, "rewards/accuracies": 1.0, "rewards/chosen": -0.36526334285736084, "rewards/margins": 8.010714530944824, "rewards/rejected": -8.375978469848633, "step": 61880 }, { "epoch": 0.74, "learning_rate": 9.549994442195065e-07, "logits/chosen": -2.880887031555176, "logits/rejected": -2.115678310394287, "logps/chosen": -121.7364501953125, "logps/rejected": -1029.034912109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.71428382396698, "rewards/margins": 9.16191291809082, "rewards/rejected": -9.876197814941406, "step": 61890 }, { "epoch": 0.74, "learning_rate": 9.541783212942666e-07, "logits/chosen": -2.833348512649536, "logits/rejected": -2.18971586227417, "logps/chosen": -124.57835388183594, "logps/rejected": -1034.74169921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7042860388755798, "rewards/margins": 9.224782943725586, "rewards/rejected": -9.929069519042969, "step": 61900 }, { "epoch": 0.74, "learning_rate": 9.533574682651989e-07, "logits/chosen": -2.918020486831665, "logits/rejected": -2.530728578567505, "logps/chosen": -87.48689270019531, "logps/rejected": -845.12255859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4734364151954651, "rewards/margins": 7.589366912841797, "rewards/rejected": -8.062803268432617, "step": 61910 }, { "epoch": 0.74, "learning_rate": 9.525368852756209e-07, "logits/chosen": -2.922373056411743, "logits/rejected": -2.4832541942596436, "logps/chosen": -95.57086944580078, "logps/rejected": -936.6580200195312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4920891225337982, "rewards/margins": 8.476551055908203, "rewards/rejected": -8.968639373779297, "step": 61920 }, { "epoch": 0.74, "learning_rate": 9.517165724688046e-07, "logits/chosen": -2.934589385986328, "logits/rejected": -2.4414780139923096, "logps/chosen": -90.13063049316406, "logps/rejected": -889.6663208007812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4501972198486328, "rewards/margins": 8.06212329864502, "rewards/rejected": -8.512320518493652, "step": 61930 }, { "epoch": 0.74, "learning_rate": 9.50896529987975e-07, "logits/chosen": -2.9060912132263184, "logits/rejected": -2.4390206336975098, "logps/chosen": -92.4344482421875, "logps/rejected": -861.2161865234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.47391432523727417, "rewards/margins": 7.744898796081543, "rewards/rejected": -8.218812942504883, "step": 61940 }, { "epoch": 0.74, "learning_rate": 9.500767579763085e-07, "logits/chosen": -2.91294527053833, "logits/rejected": -2.303260564804077, "logps/chosen": -151.86837768554688, "logps/rejected": -958.201171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0318107604980469, "rewards/margins": 8.15036678314209, "rewards/rejected": -9.182178497314453, "step": 61950 }, { "epoch": 0.74, "learning_rate": 9.492572565769353e-07, "logits/chosen": -2.8906588554382324, "logits/rejected": -2.3306751251220703, "logps/chosen": -105.3844985961914, "logps/rejected": -976.3927612304688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5608574151992798, "rewards/margins": 8.806676864624023, "rewards/rejected": -9.367534637451172, "step": 61960 }, { "epoch": 0.74, "learning_rate": 9.484380259329382e-07, "logits/chosen": -2.888230800628662, "logits/rejected": -2.242112159729004, "logps/chosen": -104.24012756347656, "logps/rejected": -914.4405517578125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5491050481796265, "rewards/margins": 8.195006370544434, "rewards/rejected": -8.744112014770508, "step": 61970 }, { "epoch": 0.74, "learning_rate": 9.476190661873536e-07, "logits/chosen": -2.9212114810943604, "logits/rejected": -2.1694893836975098, "logps/chosen": -146.69775390625, "logps/rejected": -1047.2542724609375, "loss": 0.0758, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9191930890083313, "rewards/margins": 9.165270805358887, "rewards/rejected": -10.084464073181152, "step": 61980 }, { "epoch": 0.74, "learning_rate": 9.468003774831672e-07, "logits/chosen": -2.8326284885406494, "logits/rejected": -2.374849319458008, "logps/chosen": -84.77674102783203, "logps/rejected": -862.3640747070312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.43846315145492554, "rewards/margins": 7.815425872802734, "rewards/rejected": -8.253889083862305, "step": 61990 }, { "epoch": 0.74, "learning_rate": 9.459819599633213e-07, "logits/chosen": -2.8905515670776367, "logits/rejected": -2.4644153118133545, "logps/chosen": -101.15584564208984, "logps/rejected": -1011.8624267578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5550647974014282, "rewards/margins": 9.168456077575684, "rewards/rejected": -9.72352123260498, "step": 62000 }, { "epoch": 0.74, "learning_rate": 9.451638137707089e-07, "logits/chosen": -2.9399607181549072, "logits/rejected": -2.5387673377990723, "logps/chosen": -87.09146881103516, "logps/rejected": -811.06591796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4268273711204529, "rewards/margins": 7.303468227386475, "rewards/rejected": -7.730295658111572, "step": 62010 }, { "epoch": 0.74, "learning_rate": 9.443459390481766e-07, "logits/chosen": -2.899751901626587, "logits/rejected": -2.5459389686584473, "logps/chosen": -94.6673583984375, "logps/rejected": -801.4641723632812, "loss": 0.0743, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5091201663017273, "rewards/margins": 7.130802154541016, "rewards/rejected": -7.639923095703125, "step": 62020 }, { "epoch": 0.74, "learning_rate": 9.435283359385225e-07, "logits/chosen": -2.9022042751312256, "logits/rejected": -2.402211904525757, "logps/chosen": -104.3201904296875, "logps/rejected": -951.7767333984375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6083995699882507, "rewards/margins": 8.51530647277832, "rewards/rejected": -9.123706817626953, "step": 62030 }, { "epoch": 0.74, "learning_rate": 9.427110045844989e-07, "logits/chosen": -2.87485933303833, "logits/rejected": -2.465056896209717, "logps/chosen": -96.5943832397461, "logps/rejected": -855.8489379882812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4919087886810303, "rewards/margins": 7.67929220199585, "rewards/rejected": -8.1712007522583, "step": 62040 }, { "epoch": 0.74, "learning_rate": 9.418939451288079e-07, "logits/chosen": -2.9043896198272705, "logits/rejected": -2.3296549320220947, "logps/chosen": -113.62384033203125, "logps/rejected": -903.91162109375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6817391514778137, "rewards/margins": 7.98117733001709, "rewards/rejected": -8.662917137145996, "step": 62050 }, { "epoch": 0.74, "learning_rate": 9.410771577141065e-07, "logits/chosen": -2.855330228805542, "logits/rejected": -2.328322172164917, "logps/chosen": -123.27874755859375, "logps/rejected": -945.6251831054688, "loss": 0.0233, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7045105695724487, "rewards/margins": 8.356170654296875, "rewards/rejected": -9.06068229675293, "step": 62060 }, { "epoch": 0.74, "learning_rate": 9.402606424830036e-07, "logits/chosen": -2.854220151901245, "logits/rejected": -1.9654819965362549, "logps/chosen": -142.32383728027344, "logps/rejected": -1144.0948486328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8269750475883484, "rewards/margins": 10.209966659545898, "rewards/rejected": -11.03693962097168, "step": 62070 }, { "epoch": 0.74, "learning_rate": 9.394443995780606e-07, "logits/chosen": -2.859668254852295, "logits/rejected": -2.3690590858459473, "logps/chosen": -97.0003662109375, "logps/rejected": -901.462890625, "loss": 0.1097, "rewards/accuracies": 1.0, "rewards/chosen": -0.5268964171409607, "rewards/margins": 8.097310066223145, "rewards/rejected": -8.624205589294434, "step": 62080 }, { "epoch": 0.74, "learning_rate": 9.386284291417913e-07, "logits/chosen": -2.8758959770202637, "logits/rejected": -2.1535773277282715, "logps/chosen": -108.70997619628906, "logps/rejected": -1013.4986572265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.605538010597229, "rewards/margins": 9.146162033081055, "rewards/rejected": -9.751700401306152, "step": 62090 }, { "epoch": 0.74, "learning_rate": 9.378127313166615e-07, "logits/chosen": -2.8796188831329346, "logits/rejected": -2.4902660846710205, "logps/chosen": -78.89290618896484, "logps/rejected": -824.0079956054688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.37104523181915283, "rewards/margins": 7.496336460113525, "rewards/rejected": -7.8673810958862305, "step": 62100 }, { "epoch": 0.74, "learning_rate": 9.369973062450913e-07, "logits/chosen": -2.877556324005127, "logits/rejected": -2.2692084312438965, "logps/chosen": -130.66494750976562, "logps/rejected": -935.9562377929688, "loss": 0.1285, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8304012417793274, "rewards/margins": 8.144678115844727, "rewards/rejected": -8.975080490112305, "step": 62110 }, { "epoch": 0.74, "learning_rate": 9.361821540694491e-07, "logits/chosen": -2.839275360107422, "logits/rejected": -2.1927988529205322, "logps/chosen": -117.78472900390625, "logps/rejected": -989.8283081054688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6759745478630066, "rewards/margins": 8.809714317321777, "rewards/rejected": -9.485689163208008, "step": 62120 }, { "epoch": 0.74, "learning_rate": 9.353672749320597e-07, "logits/chosen": -2.882753610610962, "logits/rejected": -2.34493350982666, "logps/chosen": -115.25337982177734, "logps/rejected": -922.8736572265625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.7154171466827393, "rewards/margins": 8.133905410766602, "rewards/rejected": -8.849323272705078, "step": 62130 }, { "epoch": 0.74, "learning_rate": 9.345526689751983e-07, "logits/chosen": -2.912217617034912, "logits/rejected": -2.6061413288116455, "logps/chosen": -87.22789001464844, "logps/rejected": -804.1707153320312, "loss": 0.0296, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4482742249965668, "rewards/margins": 7.226068019866943, "rewards/rejected": -7.674341678619385, "step": 62140 }, { "epoch": 0.74, "learning_rate": 9.337383363410932e-07, "logits/chosen": -2.9268317222595215, "logits/rejected": -2.0947563648223877, "logps/chosen": -117.99610900878906, "logps/rejected": -1054.5804443359375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.5363798141479492, "rewards/margins": 9.60830020904541, "rewards/rejected": -10.144679069519043, "step": 62150 }, { "epoch": 0.74, "learning_rate": 9.329242771719247e-07, "logits/chosen": -2.905775308609009, "logits/rejected": -2.4135584831237793, "logps/chosen": -88.27375030517578, "logps/rejected": -847.2845458984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.41575533151626587, "rewards/margins": 7.683078765869141, "rewards/rejected": -8.098834037780762, "step": 62160 }, { "epoch": 0.74, "learning_rate": 9.321104916098247e-07, "logits/chosen": -2.9149727821350098, "logits/rejected": -2.299739122390747, "logps/chosen": -120.09831237792969, "logps/rejected": -977.904296875, "loss": 0.115, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7497406005859375, "rewards/margins": 8.629693984985352, "rewards/rejected": -9.379434585571289, "step": 62170 }, { "epoch": 0.74, "learning_rate": 9.312969797968793e-07, "logits/chosen": -2.921660900115967, "logits/rejected": -2.5916695594787598, "logps/chosen": -84.48965454101562, "logps/rejected": -845.27490234375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.4252072870731354, "rewards/margins": 7.644295692443848, "rewards/rejected": -8.069501876831055, "step": 62180 }, { "epoch": 0.74, "learning_rate": 9.304837418751233e-07, "logits/chosen": -2.8546359539031982, "logits/rejected": -2.3626327514648438, "logps/chosen": -94.88499450683594, "logps/rejected": -941.3306884765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5119541883468628, "rewards/margins": 8.50925064086914, "rewards/rejected": -9.021203994750977, "step": 62190 }, { "epoch": 0.74, "learning_rate": 9.296707779865468e-07, "logits/chosen": -2.9222347736358643, "logits/rejected": -2.375117301940918, "logps/chosen": -98.06871032714844, "logps/rejected": -967.4703369140625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.5006943941116333, "rewards/margins": 8.796262741088867, "rewards/rejected": -9.296957969665527, "step": 62200 }, { "epoch": 0.74, "learning_rate": 9.288580882730913e-07, "logits/chosen": -2.866384983062744, "logits/rejected": -2.2348525524139404, "logps/chosen": -128.72866821289062, "logps/rejected": -980.2532958984375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7979512810707092, "rewards/margins": 8.606660842895508, "rewards/rejected": -9.40461254119873, "step": 62210 }, { "epoch": 0.74, "learning_rate": 9.280456728766499e-07, "logits/chosen": -2.87827205657959, "logits/rejected": -2.0171360969543457, "logps/chosen": -130.91575622558594, "logps/rejected": -1110.01611328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7605144381523132, "rewards/margins": 9.933151245117188, "rewards/rejected": -10.693666458129883, "step": 62220 }, { "epoch": 0.74, "learning_rate": 9.272335319390679e-07, "logits/chosen": -2.8754749298095703, "logits/rejected": -2.563147783279419, "logps/chosen": -67.5699234008789, "logps/rejected": -786.4302978515625, "loss": 0.1073, "rewards/accuracies": 1.0, "rewards/chosen": -0.28610503673553467, "rewards/margins": 7.209689140319824, "rewards/rejected": -7.495794773101807, "step": 62230 }, { "epoch": 0.75, "learning_rate": 9.264216656021433e-07, "logits/chosen": -2.8725860118865967, "logits/rejected": -2.284466028213501, "logps/chosen": -92.41304016113281, "logps/rejected": -933.3084106445312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4127228856086731, "rewards/margins": 8.544496536254883, "rewards/rejected": -8.957220077514648, "step": 62240 }, { "epoch": 0.75, "learning_rate": 9.256100740076252e-07, "logits/chosen": -2.8817381858825684, "logits/rejected": -2.3592071533203125, "logps/chosen": -109.5387954711914, "logps/rejected": -883.0227661132812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6451610326766968, "rewards/margins": 7.812353610992432, "rewards/rejected": -8.457513809204102, "step": 62250 }, { "epoch": 0.75, "learning_rate": 9.247987572972156e-07, "logits/chosen": -2.8787150382995605, "logits/rejected": -2.4803738594055176, "logps/chosen": -84.34247589111328, "logps/rejected": -870.1671752929688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4031682014465332, "rewards/margins": 7.922502040863037, "rewards/rejected": -8.32567024230957, "step": 62260 }, { "epoch": 0.75, "learning_rate": 9.239877156125682e-07, "logits/chosen": -2.90834379196167, "logits/rejected": -2.3222711086273193, "logps/chosen": -105.61692810058594, "logps/rejected": -921.5244140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5817619562149048, "rewards/margins": 8.235812187194824, "rewards/rejected": -8.817574501037598, "step": 62270 }, { "epoch": 0.75, "learning_rate": 9.231769490952883e-07, "logits/chosen": -2.915067195892334, "logits/rejected": -2.4967617988586426, "logps/chosen": -89.61375427246094, "logps/rejected": -948.5119018554688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.46726831793785095, "rewards/margins": 8.621072769165039, "rewards/rejected": -9.088340759277344, "step": 62280 }, { "epoch": 0.75, "learning_rate": 9.223664578869334e-07, "logits/chosen": -2.826906681060791, "logits/rejected": -2.1665868759155273, "logps/chosen": -124.171142578125, "logps/rejected": -1071.0933837890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7812373042106628, "rewards/margins": 9.520349502563477, "rewards/rejected": -10.301586151123047, "step": 62290 }, { "epoch": 0.75, "learning_rate": 9.215562421290144e-07, "logits/chosen": -2.8633532524108887, "logits/rejected": -2.1654438972473145, "logps/chosen": -111.6424331665039, "logps/rejected": -987.5006713867188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6364889144897461, "rewards/margins": 8.860369682312012, "rewards/rejected": -9.496858596801758, "step": 62300 }, { "epoch": 0.75, "learning_rate": 9.207463019629909e-07, "logits/chosen": -2.916046380996704, "logits/rejected": -2.125379800796509, "logps/chosen": -141.8394775390625, "logps/rejected": -1083.294677734375, "loss": 0.111, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8800684809684753, "rewards/margins": 9.542243957519531, "rewards/rejected": -10.42231273651123, "step": 62310 }, { "epoch": 0.75, "learning_rate": 9.199366375302768e-07, "logits/chosen": -2.9132723808288574, "logits/rejected": -2.6080377101898193, "logps/chosen": -65.7103042602539, "logps/rejected": -802.7874145507812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.2669074833393097, "rewards/margins": 7.397233009338379, "rewards/rejected": -7.664140224456787, "step": 62320 }, { "epoch": 0.75, "learning_rate": 9.191272489722372e-07, "logits/chosen": -2.8573837280273438, "logits/rejected": -2.1898086071014404, "logps/chosen": -124.77461242675781, "logps/rejected": -877.3146362304688, "loss": 0.1964, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7703936100006104, "rewards/margins": 7.6082329750061035, "rewards/rejected": -8.378626823425293, "step": 62330 }, { "epoch": 0.75, "learning_rate": 9.183181364301891e-07, "logits/chosen": -2.8765625953674316, "logits/rejected": -2.431872844696045, "logps/chosen": -101.94834899902344, "logps/rejected": -918.5094604492188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5640528798103333, "rewards/margins": 8.222822189331055, "rewards/rejected": -8.786874771118164, "step": 62340 }, { "epoch": 0.75, "learning_rate": 9.175093000454017e-07, "logits/chosen": -2.8889236450195312, "logits/rejected": -2.3361291885375977, "logps/chosen": -105.81044006347656, "logps/rejected": -987.9105224609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5800094604492188, "rewards/margins": 8.913671493530273, "rewards/rejected": -9.493680953979492, "step": 62350 }, { "epoch": 0.75, "learning_rate": 9.167007399590963e-07, "logits/chosen": -2.9307260513305664, "logits/rejected": -2.4953579902648926, "logps/chosen": -90.3916015625, "logps/rejected": -901.8016357421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.43296200037002563, "rewards/margins": 8.197240829467773, "rewards/rejected": -8.630203247070312, "step": 62360 }, { "epoch": 0.75, "learning_rate": 9.158924563124433e-07, "logits/chosen": -2.8515732288360596, "logits/rejected": -2.3358254432678223, "logps/chosen": -92.28649139404297, "logps/rejected": -914.3479614257812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.480644166469574, "rewards/margins": 8.280096054077148, "rewards/rejected": -8.760740280151367, "step": 62370 }, { "epoch": 0.75, "learning_rate": 9.150844492465677e-07, "logits/chosen": -2.897346019744873, "logits/rejected": -2.4170584678649902, "logps/chosen": -91.15373229980469, "logps/rejected": -883.8624267578125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.4508029520511627, "rewards/margins": 8.01057243347168, "rewards/rejected": -8.461376190185547, "step": 62380 }, { "epoch": 0.75, "learning_rate": 9.142767189025453e-07, "logits/chosen": -2.9228622913360596, "logits/rejected": -2.3682820796966553, "logps/chosen": -99.35138702392578, "logps/rejected": -1006.3839111328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5003545880317688, "rewards/margins": 9.158781051635742, "rewards/rejected": -9.659134864807129, "step": 62390 }, { "epoch": 0.75, "learning_rate": 9.134692654214037e-07, "logits/chosen": -2.8784542083740234, "logits/rejected": -2.1493325233459473, "logps/chosen": -128.53457641601562, "logps/rejected": -1019.8801879882812, "loss": 0.0937, "rewards/accuracies": 1.0, "rewards/chosen": -0.7227071523666382, "rewards/margins": 9.062875747680664, "rewards/rejected": -9.785582542419434, "step": 62400 }, { "epoch": 0.75, "learning_rate": 9.126620889441217e-07, "logits/chosen": -2.9211251735687256, "logits/rejected": -2.461350679397583, "logps/chosen": -106.09037780761719, "logps/rejected": -899.2190551757812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5847905278205872, "rewards/margins": 8.024125099182129, "rewards/rejected": -8.608915328979492, "step": 62410 }, { "epoch": 0.75, "learning_rate": 9.118551896116306e-07, "logits/chosen": -2.8562264442443848, "logits/rejected": -2.4020323753356934, "logps/chosen": -89.1085205078125, "logps/rejected": -910.5593872070312, "loss": 0.1322, "rewards/accuracies": 1.0, "rewards/chosen": -0.4650112986564636, "rewards/margins": 8.261588096618652, "rewards/rejected": -8.726598739624023, "step": 62420 }, { "epoch": 0.75, "learning_rate": 9.110485675648135e-07, "logits/chosen": -2.9016942977905273, "logits/rejected": -2.548318386077881, "logps/chosen": -85.47459411621094, "logps/rejected": -854.4465942382812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4217754900455475, "rewards/margins": 7.7462568283081055, "rewards/rejected": -8.168031692504883, "step": 62430 }, { "epoch": 0.75, "learning_rate": 9.102422229445023e-07, "logits/chosen": -2.8426685333251953, "logits/rejected": -2.318225383758545, "logps/chosen": -123.39054870605469, "logps/rejected": -849.6478271484375, "loss": 0.1057, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7674232721328735, "rewards/margins": 7.348515510559082, "rewards/rejected": -8.115939140319824, "step": 62440 }, { "epoch": 0.75, "learning_rate": 9.094361558914835e-07, "logits/chosen": -2.897733211517334, "logits/rejected": -2.6186227798461914, "logps/chosen": -69.03903198242188, "logps/rejected": -833.0523681640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3126707971096039, "rewards/margins": 7.65134334564209, "rewards/rejected": -7.964013576507568, "step": 62450 }, { "epoch": 0.75, "learning_rate": 9.086303665464947e-07, "logits/chosen": -2.8798413276672363, "logits/rejected": -2.451698064804077, "logps/chosen": -83.91948699951172, "logps/rejected": -876.4909057617188, "loss": 0.1492, "rewards/accuracies": 1.0, "rewards/chosen": -0.41933155059814453, "rewards/margins": 7.967021942138672, "rewards/rejected": -8.386354446411133, "step": 62460 }, { "epoch": 0.75, "learning_rate": 9.078248550502236e-07, "logits/chosen": -2.8719937801361084, "logits/rejected": -2.0761637687683105, "logps/chosen": -102.79852294921875, "logps/rejected": -988.16064453125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5081645250320435, "rewards/margins": 8.976430892944336, "rewards/rejected": -9.484596252441406, "step": 62470 }, { "epoch": 0.75, "learning_rate": 9.07019621543311e-07, "logits/chosen": -2.774890422821045, "logits/rejected": -2.241037368774414, "logps/chosen": -101.18048858642578, "logps/rejected": -871.7197265625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5408450365066528, "rewards/margins": 7.809114933013916, "rewards/rejected": -8.349960327148438, "step": 62480 }, { "epoch": 0.75, "learning_rate": 9.062146661663482e-07, "logits/chosen": -2.86772084236145, "logits/rejected": -2.180107593536377, "logps/chosen": -127.53981018066406, "logps/rejected": -989.8177490234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7456119656562805, "rewards/margins": 8.75798511505127, "rewards/rejected": -9.503595352172852, "step": 62490 }, { "epoch": 0.75, "learning_rate": 9.054099890598788e-07, "logits/chosen": -2.912984609603882, "logits/rejected": -2.4496665000915527, "logps/chosen": -111.79667663574219, "logps/rejected": -827.5452270507812, "loss": 0.0755, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6568554043769836, "rewards/margins": 7.236750602722168, "rewards/rejected": -7.893605709075928, "step": 62500 }, { "epoch": 0.75, "learning_rate": 9.046055903643957e-07, "logits/chosen": -2.938511610031128, "logits/rejected": -2.1427581310272217, "logps/chosen": -152.12786865234375, "logps/rejected": -1141.75634765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9454180002212524, "rewards/margins": 10.059773445129395, "rewards/rejected": -11.005192756652832, "step": 62510 }, { "epoch": 0.75, "learning_rate": 9.038014702203457e-07, "logits/chosen": -2.88679838180542, "logits/rejected": -2.4363908767700195, "logps/chosen": -107.61112976074219, "logps/rejected": -968.7498168945312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6388459801673889, "rewards/margins": 8.656974792480469, "rewards/rejected": -9.295820236206055, "step": 62520 }, { "epoch": 0.75, "learning_rate": 9.029976287681255e-07, "logits/chosen": -2.8700473308563232, "logits/rejected": -2.2525134086608887, "logps/chosen": -111.20664978027344, "logps/rejected": -998.11083984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6735355854034424, "rewards/margins": 8.927911758422852, "rewards/rejected": -9.601447105407715, "step": 62530 }, { "epoch": 0.75, "learning_rate": 9.021940661480841e-07, "logits/chosen": -2.8926913738250732, "logits/rejected": -2.293651580810547, "logps/chosen": -122.17707824707031, "logps/rejected": -1033.885986328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6937575936317444, "rewards/margins": 9.24897575378418, "rewards/rejected": -9.942733764648438, "step": 62540 }, { "epoch": 0.75, "learning_rate": 9.013907825005209e-07, "logits/chosen": -2.895371913909912, "logits/rejected": -2.3064937591552734, "logps/chosen": -102.13523864746094, "logps/rejected": -910.005859375, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": -0.4862850308418274, "rewards/margins": 8.231927871704102, "rewards/rejected": -8.718213081359863, "step": 62550 }, { "epoch": 0.75, "learning_rate": 9.00587777965688e-07, "logits/chosen": -2.8828940391540527, "logits/rejected": -2.320908784866333, "logps/chosen": -116.01661682128906, "logps/rejected": -1043.7476806640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7003543972969055, "rewards/margins": 9.354659080505371, "rewards/rejected": -10.055012702941895, "step": 62560 }, { "epoch": 0.75, "learning_rate": 8.997850526837859e-07, "logits/chosen": -2.8817403316497803, "logits/rejected": -2.3565855026245117, "logps/chosen": -100.56476593017578, "logps/rejected": -889.9597778320312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5913842916488647, "rewards/margins": 7.920359134674072, "rewards/rejected": -8.511743545532227, "step": 62570 }, { "epoch": 0.75, "learning_rate": 8.989826067949694e-07, "logits/chosen": -2.9125542640686035, "logits/rejected": -2.4096477031707764, "logps/chosen": -94.86837005615234, "logps/rejected": -901.44482421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5133337378501892, "rewards/margins": 8.122286796569824, "rewards/rejected": -8.6356201171875, "step": 62580 }, { "epoch": 0.75, "learning_rate": 8.98180440439343e-07, "logits/chosen": -2.833392858505249, "logits/rejected": -2.230146884918213, "logps/chosen": -107.95030212402344, "logps/rejected": -983.0626831054688, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -0.5761371850967407, "rewards/margins": 8.85731315612793, "rewards/rejected": -9.433450698852539, "step": 62590 }, { "epoch": 0.75, "learning_rate": 8.97378553756963e-07, "logits/chosen": -2.9024765491485596, "logits/rejected": -2.1729331016540527, "logps/chosen": -125.63813781738281, "logps/rejected": -1038.7041015625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.7626811265945435, "rewards/margins": 9.208725929260254, "rewards/rejected": -9.971406936645508, "step": 62600 }, { "epoch": 0.75, "learning_rate": 8.96576946887836e-07, "logits/chosen": -2.905141830444336, "logits/rejected": -2.5205459594726562, "logps/chosen": -84.50254821777344, "logps/rejected": -875.1204833984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4464362561702728, "rewards/margins": 7.921484470367432, "rewards/rejected": -8.367920875549316, "step": 62610 }, { "epoch": 0.75, "learning_rate": 8.957756199719211e-07, "logits/chosen": -2.861872911453247, "logits/rejected": -2.300537109375, "logps/chosen": -91.39085388183594, "logps/rejected": -902.978515625, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": -0.43643718957901, "rewards/margins": 8.214179992675781, "rewards/rejected": -8.650617599487305, "step": 62620 }, { "epoch": 0.75, "learning_rate": 8.949745731491283e-07, "logits/chosen": -2.897951126098633, "logits/rejected": -2.474057674407959, "logps/chosen": -98.02503967285156, "logps/rejected": -873.8326416015625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.588894248008728, "rewards/margins": 7.760653018951416, "rewards/rejected": -8.349547386169434, "step": 62630 }, { "epoch": 0.75, "learning_rate": 8.941738065593161e-07, "logits/chosen": -2.859179735183716, "logits/rejected": -2.109562397003174, "logps/chosen": -112.48651123046875, "logps/rejected": -979.7970581054688, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6667051315307617, "rewards/margins": 8.750102996826172, "rewards/rejected": -9.416808128356934, "step": 62640 }, { "epoch": 0.75, "learning_rate": 8.933733203422967e-07, "logits/chosen": -2.864185094833374, "logits/rejected": -2.2725021839141846, "logps/chosen": -124.2734375, "logps/rejected": -1071.763671875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7730945944786072, "rewards/margins": 9.552007675170898, "rewards/rejected": -10.325102806091309, "step": 62650 }, { "epoch": 0.75, "learning_rate": 8.925731146378342e-07, "logits/chosen": -2.925084352493286, "logits/rejected": -2.5037999153137207, "logps/chosen": -82.51712799072266, "logps/rejected": -874.3355712890625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3980313837528229, "rewards/margins": 7.957812309265137, "rewards/rejected": -8.355844497680664, "step": 62660 }, { "epoch": 0.75, "learning_rate": 8.917731895856413e-07, "logits/chosen": -2.8593037128448486, "logits/rejected": -2.130342483520508, "logps/chosen": -129.04537963867188, "logps/rejected": -1125.942626953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7656704187393188, "rewards/margins": 10.081148147583008, "rewards/rejected": -10.846819877624512, "step": 62670 }, { "epoch": 0.75, "learning_rate": 8.90973545325384e-07, "logits/chosen": -2.904181957244873, "logits/rejected": -2.463552474975586, "logps/chosen": -81.5603256225586, "logps/rejected": -858.8194580078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.36377614736557007, "rewards/margins": 7.852790832519531, "rewards/rejected": -8.216567039489746, "step": 62680 }, { "epoch": 0.75, "learning_rate": 8.901741819966755e-07, "logits/chosen": -2.8731613159179688, "logits/rejected": -2.3135619163513184, "logps/chosen": -105.50352478027344, "logps/rejected": -984.7551879882812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6089626550674438, "rewards/margins": 8.842832565307617, "rewards/rejected": -9.451794624328613, "step": 62690 }, { "epoch": 0.75, "learning_rate": 8.89375099739084e-07, "logits/chosen": -2.8789989948272705, "logits/rejected": -2.447716236114502, "logps/chosen": -92.22456359863281, "logps/rejected": -918.6976318359375, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -0.4967133402824402, "rewards/margins": 8.304956436157227, "rewards/rejected": -8.80167007446289, "step": 62700 }, { "epoch": 0.75, "learning_rate": 8.885762986921268e-07, "logits/chosen": -2.8828537464141846, "logits/rejected": -2.4432733058929443, "logps/chosen": -118.95865631103516, "logps/rejected": -870.1126098632812, "loss": 0.1553, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6867977380752563, "rewards/margins": 7.639070987701416, "rewards/rejected": -8.325868606567383, "step": 62710 }, { "epoch": 0.75, "learning_rate": 8.877777789952724e-07, "logits/chosen": -2.8674440383911133, "logits/rejected": -2.256460189819336, "logps/chosen": -100.52787017822266, "logps/rejected": -984.9166259765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5251285433769226, "rewards/margins": 8.924684524536133, "rewards/rejected": -9.449812889099121, "step": 62720 }, { "epoch": 0.75, "learning_rate": 8.869795407879397e-07, "logits/chosen": -2.8809075355529785, "logits/rejected": -2.143660545349121, "logps/chosen": -130.21304321289062, "logps/rejected": -1072.1492919921875, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -0.7853623628616333, "rewards/margins": 9.512710571289062, "rewards/rejected": -10.298072814941406, "step": 62730 }, { "epoch": 0.75, "learning_rate": 8.861815842094995e-07, "logits/chosen": -2.899596691131592, "logits/rejected": -2.2988076210021973, "logps/chosen": -110.41767883300781, "logps/rejected": -949.0927734375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6011653542518616, "rewards/margins": 8.502117156982422, "rewards/rejected": -9.103283882141113, "step": 62740 }, { "epoch": 0.75, "learning_rate": 8.85383909399273e-07, "logits/chosen": -2.8691346645355225, "logits/rejected": -2.4699690341949463, "logps/chosen": -111.108154296875, "logps/rejected": -827.9269409179688, "loss": 0.0917, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.694137692451477, "rewards/margins": 7.211935520172119, "rewards/rejected": -7.906073093414307, "step": 62750 }, { "epoch": 0.75, "learning_rate": 8.845865164965311e-07, "logits/chosen": -2.8588480949401855, "logits/rejected": -2.1541686058044434, "logps/chosen": -126.24800872802734, "logps/rejected": -914.623046875, "loss": 0.0693, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7335305213928223, "rewards/margins": 8.014416694641113, "rewards/rejected": -8.747945785522461, "step": 62760 }, { "epoch": 0.75, "learning_rate": 8.837894056404967e-07, "logits/chosen": -2.8851516246795654, "logits/rejected": -2.321242094039917, "logps/chosen": -108.45082092285156, "logps/rejected": -972.3544921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6106335520744324, "rewards/margins": 8.715341567993164, "rewards/rejected": -9.325974464416504, "step": 62770 }, { "epoch": 0.75, "learning_rate": 8.829925769703429e-07, "logits/chosen": -2.890322208404541, "logits/rejected": -2.2347381114959717, "logps/chosen": -101.8981704711914, "logps/rejected": -968.6926879882812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5223339200019836, "rewards/margins": 8.782201766967773, "rewards/rejected": -9.304537773132324, "step": 62780 }, { "epoch": 0.75, "learning_rate": 8.821960306251945e-07, "logits/chosen": -2.8492259979248047, "logits/rejected": -2.3283588886260986, "logps/chosen": -104.5235366821289, "logps/rejected": -914.1066284179688, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.5703684687614441, "rewards/margins": 8.177474021911621, "rewards/rejected": -8.747842788696289, "step": 62790 }, { "epoch": 0.75, "learning_rate": 8.813997667441257e-07, "logits/chosen": -2.8532259464263916, "logits/rejected": -1.9105310440063477, "logps/chosen": -132.1981201171875, "logps/rejected": -1127.268798828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7650388479232788, "rewards/margins": 10.101366996765137, "rewards/rejected": -10.866406440734863, "step": 62800 }, { "epoch": 0.75, "learning_rate": 8.806037854661623e-07, "logits/chosen": -2.844966411590576, "logits/rejected": -2.155773639678955, "logps/chosen": -126.1563949584961, "logps/rejected": -1087.186279296875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7093108892440796, "rewards/margins": 9.751718521118164, "rewards/rejected": -10.461029052734375, "step": 62810 }, { "epoch": 0.75, "learning_rate": 8.798080869302811e-07, "logits/chosen": -2.8969502449035645, "logits/rejected": -2.5943479537963867, "logps/chosen": -64.30955505371094, "logps/rejected": -790.1831665039062, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.28277260065078735, "rewards/margins": 7.262208461761475, "rewards/rejected": -7.544981479644775, "step": 62820 }, { "epoch": 0.75, "learning_rate": 8.790126712754068e-07, "logits/chosen": -2.9170708656311035, "logits/rejected": -2.2002131938934326, "logps/chosen": -140.16734313964844, "logps/rejected": -937.4791259765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8528329730033875, "rewards/margins": 8.10678482055664, "rewards/rejected": -8.959617614746094, "step": 62830 }, { "epoch": 0.75, "learning_rate": 8.782175386404184e-07, "logits/chosen": -2.84879732131958, "logits/rejected": -2.2523016929626465, "logps/chosen": -95.9465103149414, "logps/rejected": -915.4816284179688, "loss": 0.0914, "rewards/accuracies": 1.0, "rewards/chosen": -0.5124026536941528, "rewards/margins": 8.258782386779785, "rewards/rejected": -8.771184921264648, "step": 62840 }, { "epoch": 0.75, "learning_rate": 8.774226891641435e-07, "logits/chosen": -2.8844029903411865, "logits/rejected": -2.1862027645111084, "logps/chosen": -122.00563049316406, "logps/rejected": -987.4498901367188, "loss": 0.0206, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7364374995231628, "rewards/margins": 8.733373641967773, "rewards/rejected": -9.469810485839844, "step": 62850 }, { "epoch": 0.75, "learning_rate": 8.766281229853605e-07, "logits/chosen": -2.852365016937256, "logits/rejected": -2.2623534202575684, "logps/chosen": -129.48648071289062, "logps/rejected": -1055.9107666015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7278815507888794, "rewards/margins": 9.438529968261719, "rewards/rejected": -10.166410446166992, "step": 62860 }, { "epoch": 0.75, "learning_rate": 8.758338402427982e-07, "logits/chosen": -2.8605425357818604, "logits/rejected": -2.3612499237060547, "logps/chosen": -86.59962463378906, "logps/rejected": -947.5436401367188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.40373674035072327, "rewards/margins": 8.69017505645752, "rewards/rejected": -9.093912124633789, "step": 62870 }, { "epoch": 0.75, "learning_rate": 8.750398410751376e-07, "logits/chosen": -2.8706343173980713, "logits/rejected": -2.3451387882232666, "logps/chosen": -110.472412109375, "logps/rejected": -985.8649291992188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6340836882591248, "rewards/margins": 8.826013565063477, "rewards/rejected": -9.460097312927246, "step": 62880 }, { "epoch": 0.75, "learning_rate": 8.742461256210066e-07, "logits/chosen": -2.91137433052063, "logits/rejected": -2.484919309616089, "logps/chosen": -95.41172790527344, "logps/rejected": -899.91552734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.48015087842941284, "rewards/margins": 8.13671875, "rewards/rejected": -8.616869926452637, "step": 62890 }, { "epoch": 0.75, "learning_rate": 8.734526940189869e-07, "logits/chosen": -2.899507999420166, "logits/rejected": -2.3531229496002197, "logps/chosen": -124.15535736083984, "logps/rejected": -1017.53515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.749557614326477, "rewards/margins": 9.021132469177246, "rewards/rejected": -9.770689964294434, "step": 62900 }, { "epoch": 0.75, "learning_rate": 8.726595464076093e-07, "logits/chosen": -2.898510694503784, "logits/rejected": -2.243032932281494, "logps/chosen": -105.64571380615234, "logps/rejected": -998.24267578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5127823948860168, "rewards/margins": 9.078683853149414, "rewards/rejected": -9.591466903686523, "step": 62910 }, { "epoch": 0.75, "learning_rate": 8.718666829253549e-07, "logits/chosen": -2.8427162170410156, "logits/rejected": -2.2398056983947754, "logps/chosen": -113.41679382324219, "logps/rejected": -1012.23486328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6702153086662292, "rewards/margins": 9.05497932434082, "rewards/rejected": -9.725194931030273, "step": 62920 }, { "epoch": 0.75, "learning_rate": 8.71074103710656e-07, "logits/chosen": -2.8258614540100098, "logits/rejected": -2.179776668548584, "logps/chosen": -121.80143737792969, "logps/rejected": -1019.2268676757812, "loss": 0.0904, "rewards/accuracies": 1.0, "rewards/chosen": -0.7135864496231079, "rewards/margins": 9.078566551208496, "rewards/rejected": -9.792153358459473, "step": 62930 }, { "epoch": 0.75, "learning_rate": 8.702818089018943e-07, "logits/chosen": -2.8879446983337402, "logits/rejected": -2.47334361076355, "logps/chosen": -123.4280014038086, "logps/rejected": -877.76513671875, "loss": 0.1524, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.793764591217041, "rewards/margins": 7.5979156494140625, "rewards/rejected": -8.391680717468262, "step": 62940 }, { "epoch": 0.75, "learning_rate": 8.694897986374031e-07, "logits/chosen": -2.9227561950683594, "logits/rejected": -2.5992019176483154, "logps/chosen": -78.85130310058594, "logps/rejected": -786.3021850585938, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3762030303478241, "rewards/margins": 7.12378454208374, "rewards/rejected": -7.499987602233887, "step": 62950 }, { "epoch": 0.75, "learning_rate": 8.68698073055464e-07, "logits/chosen": -2.8679556846618652, "logits/rejected": -2.330214738845825, "logps/chosen": -113.02579498291016, "logps/rejected": -998.58203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6713641285896301, "rewards/margins": 8.918326377868652, "rewards/rejected": -9.589691162109375, "step": 62960 }, { "epoch": 0.75, "learning_rate": 8.679066322943105e-07, "logits/chosen": -2.838486909866333, "logits/rejected": -2.0075764656066895, "logps/chosen": -147.11952209472656, "logps/rejected": -1171.868896484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8975347280502319, "rewards/margins": 10.420872688293457, "rewards/rejected": -11.31840705871582, "step": 62970 }, { "epoch": 0.75, "learning_rate": 8.671154764921261e-07, "logits/chosen": -2.855430841445923, "logits/rejected": -2.3677616119384766, "logps/chosen": -112.14060974121094, "logps/rejected": -965.4573974609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6156116127967834, "rewards/margins": 8.667524337768555, "rewards/rejected": -9.283136367797852, "step": 62980 }, { "epoch": 0.75, "learning_rate": 8.663246057870442e-07, "logits/chosen": -2.8872931003570557, "logits/rejected": -2.570500135421753, "logps/chosen": -81.53295135498047, "logps/rejected": -849.3748168945312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3966643214225769, "rewards/margins": 7.7090044021606445, "rewards/rejected": -8.105668067932129, "step": 62990 }, { "epoch": 0.75, "learning_rate": 8.655340203171489e-07, "logits/chosen": -2.883476972579956, "logits/rejected": -2.0566391944885254, "logps/chosen": -130.41183471679688, "logps/rejected": -930.1710815429688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8213737607002258, "rewards/margins": 8.08193302154541, "rewards/rejected": -8.90330696105957, "step": 63000 }, { "epoch": 0.75, "eval_logits/chosen": -2.886659860610962, "eval_logits/rejected": -1.769876480102539, "eval_logps/chosen": -243.67430114746094, "eval_logps/rejected": -1144.4586181640625, "eval_loss": 0.0013439792674034834, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.8249403238296509, "eval_rewards/margins": 9.152392387390137, "eval_rewards/rejected": -10.977333068847656, "eval_runtime": 1.2182, "eval_samples_per_second": 4.104, "eval_steps_per_second": 2.463, "step": 63000 }, { "epoch": 0.75, "learning_rate": 8.647437202204739e-07, "logits/chosen": -2.8831686973571777, "logits/rejected": -2.555203676223755, "logps/chosen": -82.9642333984375, "logps/rejected": -780.7357177734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4399278163909912, "rewards/margins": 6.986486911773682, "rewards/rejected": -7.426414489746094, "step": 63010 }, { "epoch": 0.75, "learning_rate": 8.639537056350047e-07, "logits/chosen": -2.905027389526367, "logits/rejected": -2.3988842964172363, "logps/chosen": -133.32720947265625, "logps/rejected": -921.4104614257812, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8380048871040344, "rewards/margins": 7.969901084899902, "rewards/rejected": -8.807906150817871, "step": 63020 }, { "epoch": 0.75, "learning_rate": 8.631639766986738e-07, "logits/chosen": -2.9293055534362793, "logits/rejected": -2.317979574203491, "logps/chosen": -109.26107025146484, "logps/rejected": -1001.2283325195312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6085706353187561, "rewards/margins": 8.983654022216797, "rewards/rejected": -9.592225074768066, "step": 63030 }, { "epoch": 0.75, "learning_rate": 8.62374533549366e-07, "logits/chosen": -2.8631248474121094, "logits/rejected": -1.9933496713638306, "logps/chosen": -131.8883056640625, "logps/rejected": -1006.7509765625, "loss": 0.1101, "rewards/accuracies": 1.0, "rewards/chosen": -0.758771538734436, "rewards/margins": 8.919986724853516, "rewards/rejected": -9.67875862121582, "step": 63040 }, { "epoch": 0.75, "learning_rate": 8.615853763249168e-07, "logits/chosen": -2.8911986351013184, "logits/rejected": -2.209254264831543, "logps/chosen": -118.5865249633789, "logps/rejected": -1035.9774169921875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6605644226074219, "rewards/margins": 9.296666145324707, "rewards/rejected": -9.957230567932129, "step": 63050 }, { "epoch": 0.75, "learning_rate": 8.607965051631104e-07, "logits/chosen": -2.871086597442627, "logits/rejected": -2.2443034648895264, "logps/chosen": -113.046875, "logps/rejected": -939.9300537109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6287137269973755, "rewards/margins": 8.384967803955078, "rewards/rejected": -9.013679504394531, "step": 63060 }, { "epoch": 0.75, "learning_rate": 8.600079202016811e-07, "logits/chosen": -2.859584331512451, "logits/rejected": -2.463653802871704, "logps/chosen": -98.40120697021484, "logps/rejected": -893.7706909179688, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.5022397637367249, "rewards/margins": 8.038352012634277, "rewards/rejected": -8.540590286254883, "step": 63070 }, { "epoch": 0.76, "learning_rate": 8.592196215783144e-07, "logits/chosen": -2.873849630355835, "logits/rejected": -2.330552577972412, "logps/chosen": -105.60794830322266, "logps/rejected": -936.0426635742188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5715557336807251, "rewards/margins": 8.393545150756836, "rewards/rejected": -8.96510124206543, "step": 63080 }, { "epoch": 0.76, "learning_rate": 8.584316094306447e-07, "logits/chosen": -2.897891044616699, "logits/rejected": -2.2993621826171875, "logps/chosen": -110.90206146240234, "logps/rejected": -880.98974609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5999763607978821, "rewards/margins": 7.812670707702637, "rewards/rejected": -8.412647247314453, "step": 63090 }, { "epoch": 0.76, "learning_rate": 8.576438838962564e-07, "logits/chosen": -2.8797237873077393, "logits/rejected": -2.474618911743164, "logps/chosen": -95.29431915283203, "logps/rejected": -962.71728515625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.49224036931991577, "rewards/margins": 8.749470710754395, "rewards/rejected": -9.24170970916748, "step": 63100 }, { "epoch": 0.76, "learning_rate": 8.568564451126848e-07, "logits/chosen": -2.8968043327331543, "logits/rejected": -2.513093948364258, "logps/chosen": -88.37904357910156, "logps/rejected": -850.9592895507812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.45790085196495056, "rewards/margins": 7.679616451263428, "rewards/rejected": -8.137516975402832, "step": 63110 }, { "epoch": 0.76, "learning_rate": 8.560692932174141e-07, "logits/chosen": -2.9077391624450684, "logits/rejected": -2.19101619720459, "logps/chosen": -114.50535583496094, "logps/rejected": -950.2149658203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.665307343006134, "rewards/margins": 8.435347557067871, "rewards/rejected": -9.100655555725098, "step": 63120 }, { "epoch": 0.76, "learning_rate": 8.552824283478792e-07, "logits/chosen": -2.9000442028045654, "logits/rejected": -2.4927096366882324, "logps/chosen": -93.73419189453125, "logps/rejected": -878.2482299804688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.49293360114097595, "rewards/margins": 7.909055233001709, "rewards/rejected": -8.401988983154297, "step": 63130 }, { "epoch": 0.76, "learning_rate": 8.544958506414653e-07, "logits/chosen": -2.8915305137634277, "logits/rejected": -2.597139835357666, "logps/chosen": -83.50111389160156, "logps/rejected": -851.4178466796875, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.4427475929260254, "rewards/margins": 7.679948329925537, "rewards/rejected": -8.122696876525879, "step": 63140 }, { "epoch": 0.76, "learning_rate": 8.537095602355044e-07, "logits/chosen": -2.9030776023864746, "logits/rejected": -2.210012435913086, "logps/chosen": -128.69171142578125, "logps/rejected": -1013.3505859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7449265122413635, "rewards/margins": 8.992273330688477, "rewards/rejected": -9.737199783325195, "step": 63150 }, { "epoch": 0.76, "learning_rate": 8.52923557267282e-07, "logits/chosen": -2.8926899433135986, "logits/rejected": -2.3341784477233887, "logps/chosen": -97.87289428710938, "logps/rejected": -974.5491943359375, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.533767580986023, "rewards/margins": 8.819684982299805, "rewards/rejected": -9.353452682495117, "step": 63160 }, { "epoch": 0.76, "learning_rate": 8.52137841874032e-07, "logits/chosen": -2.8871634006500244, "logits/rejected": -2.412041187286377, "logps/chosen": -106.46070861816406, "logps/rejected": -876.8883666992188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5589151382446289, "rewards/margins": 7.801888465881348, "rewards/rejected": -8.360803604125977, "step": 63170 }, { "epoch": 0.76, "learning_rate": 8.51352414192938e-07, "logits/chosen": -2.9269492626190186, "logits/rejected": -2.3944568634033203, "logps/chosen": -115.020751953125, "logps/rejected": -948.5343017578125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.7232276201248169, "rewards/margins": 8.363348960876465, "rewards/rejected": -9.086576461791992, "step": 63180 }, { "epoch": 0.76, "learning_rate": 8.505672743611337e-07, "logits/chosen": -2.844857692718506, "logits/rejected": -1.990989089012146, "logps/chosen": -149.8647918701172, "logps/rejected": -1083.5128173828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9763607978820801, "rewards/margins": 9.43655776977539, "rewards/rejected": -10.412919044494629, "step": 63190 }, { "epoch": 0.76, "learning_rate": 8.497824225157031e-07, "logits/chosen": -2.903384208679199, "logits/rejected": -2.3257031440734863, "logps/chosen": -99.87920379638672, "logps/rejected": -939.3515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.49504223465919495, "rewards/margins": 8.511727333068848, "rewards/rejected": -9.006769180297852, "step": 63200 }, { "epoch": 0.76, "learning_rate": 8.489978587936773e-07, "logits/chosen": -2.8601083755493164, "logits/rejected": -2.077937126159668, "logps/chosen": -156.31063842773438, "logps/rejected": -1020.3933715820312, "loss": 0.1425, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9489511251449585, "rewards/margins": 8.849660873413086, "rewards/rejected": -9.798612594604492, "step": 63210 }, { "epoch": 0.76, "learning_rate": 8.482135833320398e-07, "logits/chosen": -2.901416301727295, "logits/rejected": -2.6230990886688232, "logps/chosen": -104.03546142578125, "logps/rejected": -904.7156372070312, "loss": 0.0987, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6589778065681458, "rewards/margins": 8.001686096191406, "rewards/rejected": -8.660663604736328, "step": 63220 }, { "epoch": 0.76, "learning_rate": 8.474295962677231e-07, "logits/chosen": -2.8602592945098877, "logits/rejected": -2.2681562900543213, "logps/chosen": -107.27201080322266, "logps/rejected": -937.2259521484375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5620689392089844, "rewards/margins": 8.425518989562988, "rewards/rejected": -8.987588882446289, "step": 63230 }, { "epoch": 0.76, "learning_rate": 8.466458977376091e-07, "logits/chosen": -2.874774694442749, "logits/rejected": -2.166321277618408, "logps/chosen": -117.26153564453125, "logps/rejected": -966.6437377929688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6496239900588989, "rewards/margins": 8.628782272338867, "rewards/rejected": -9.27840518951416, "step": 63240 }, { "epoch": 0.76, "learning_rate": 8.458624878785293e-07, "logits/chosen": -2.9106993675231934, "logits/rejected": -2.311574697494507, "logps/chosen": -106.20560455322266, "logps/rejected": -889.4732666015625, "loss": 0.0972, "rewards/accuracies": 1.0, "rewards/chosen": -0.564595103263855, "rewards/margins": 7.9528045654296875, "rewards/rejected": -8.517398834228516, "step": 63250 }, { "epoch": 0.76, "learning_rate": 8.45079366827265e-07, "logits/chosen": -2.917654514312744, "logits/rejected": -2.3731439113616943, "logps/chosen": -103.71492004394531, "logps/rejected": -938.5091552734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5863645076751709, "rewards/margins": 8.409147262573242, "rewards/rejected": -8.995511054992676, "step": 63260 }, { "epoch": 0.76, "learning_rate": 8.442965347205476e-07, "logits/chosen": -2.853011131286621, "logits/rejected": -2.0879969596862793, "logps/chosen": -126.1533432006836, "logps/rejected": -1083.855712890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7591501474380493, "rewards/margins": 9.675694465637207, "rewards/rejected": -10.434843063354492, "step": 63270 }, { "epoch": 0.76, "learning_rate": 8.435139916950561e-07, "logits/chosen": -2.8962204456329346, "logits/rejected": -2.34633207321167, "logps/chosen": -114.49436950683594, "logps/rejected": -1000.0631103515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6680270433425903, "rewards/margins": 8.936079025268555, "rewards/rejected": -9.604105949401855, "step": 63280 }, { "epoch": 0.76, "learning_rate": 8.427317378874209e-07, "logits/chosen": -2.8640780448913574, "logits/rejected": -1.8054378032684326, "logps/chosen": -162.6269989013672, "logps/rejected": -1164.4000244140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9942114949226379, "rewards/margins": 10.215936660766602, "rewards/rejected": -11.210148811340332, "step": 63290 }, { "epoch": 0.76, "learning_rate": 8.419497734342213e-07, "logits/chosen": -2.890267848968506, "logits/rejected": -2.4186761379241943, "logps/chosen": -85.90743255615234, "logps/rejected": -840.1497192382812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.43971624970436096, "rewards/margins": 7.572062015533447, "rewards/rejected": -8.011777877807617, "step": 63300 }, { "epoch": 0.76, "learning_rate": 8.411680984719861e-07, "logits/chosen": -2.837977647781372, "logits/rejected": -2.3162083625793457, "logps/chosen": -117.21791076660156, "logps/rejected": -996.8779296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6432185769081116, "rewards/margins": 8.931402206420898, "rewards/rejected": -9.574621200561523, "step": 63310 }, { "epoch": 0.76, "learning_rate": 8.40386713137194e-07, "logits/chosen": -2.899493455886841, "logits/rejected": -2.327080726623535, "logps/chosen": -101.44474792480469, "logps/rejected": -874.8264770507812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5324057340621948, "rewards/margins": 7.816463470458984, "rewards/rejected": -8.348869323730469, "step": 63320 }, { "epoch": 0.76, "learning_rate": 8.396056175662723e-07, "logits/chosen": -2.8702282905578613, "logits/rejected": -2.3805043697357178, "logps/chosen": -94.27623748779297, "logps/rejected": -940.6888427734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4981527328491211, "rewards/margins": 8.519232749938965, "rewards/rejected": -9.01738452911377, "step": 63330 }, { "epoch": 0.76, "learning_rate": 8.388248118955991e-07, "logits/chosen": -2.895490884780884, "logits/rejected": -2.4660120010375977, "logps/chosen": -91.39222717285156, "logps/rejected": -902.7537841796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4679771959781647, "rewards/margins": 8.188617706298828, "rewards/rejected": -8.656594276428223, "step": 63340 }, { "epoch": 0.76, "learning_rate": 8.38044296261499e-07, "logits/chosen": -2.880511999130249, "logits/rejected": -2.313298225402832, "logps/chosen": -104.51698303222656, "logps/rejected": -885.1897583007812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5890844464302063, "rewards/margins": 7.88291072845459, "rewards/rejected": -8.471994400024414, "step": 63350 }, { "epoch": 0.76, "learning_rate": 8.372640708002491e-07, "logits/chosen": -2.8982465267181396, "logits/rejected": -2.165829658508301, "logps/chosen": -144.0896759033203, "logps/rejected": -918.3787841796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9816624522209167, "rewards/margins": 7.808079719543457, "rewards/rejected": -8.789741516113281, "step": 63360 }, { "epoch": 0.76, "learning_rate": 8.364841356480743e-07, "logits/chosen": -2.8687758445739746, "logits/rejected": -2.5485594272613525, "logps/chosen": -78.27177429199219, "logps/rejected": -843.37890625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.41497403383255005, "rewards/margins": 7.641152381896973, "rewards/rejected": -8.056127548217773, "step": 63370 }, { "epoch": 0.76, "learning_rate": 8.357044909411491e-07, "logits/chosen": -2.9179928302764893, "logits/rejected": -2.3762803077697754, "logps/chosen": -105.06103515625, "logps/rejected": -998.5382080078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5147758722305298, "rewards/margins": 9.071183204650879, "rewards/rejected": -9.585957527160645, "step": 63380 }, { "epoch": 0.76, "learning_rate": 8.349251368155975e-07, "logits/chosen": -2.8343162536621094, "logits/rejected": -2.1440491676330566, "logps/chosen": -116.28365325927734, "logps/rejected": -937.8397216796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6414812803268433, "rewards/margins": 8.343792915344238, "rewards/rejected": -8.985274314880371, "step": 63390 }, { "epoch": 0.76, "learning_rate": 8.341460734074933e-07, "logits/chosen": -2.881946086883545, "logits/rejected": -2.1737513542175293, "logps/chosen": -121.5960922241211, "logps/rejected": -1165.606689453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7001152634620667, "rewards/margins": 10.56342601776123, "rewards/rejected": -11.263540267944336, "step": 63400 }, { "epoch": 0.76, "learning_rate": 8.333673008528573e-07, "logits/chosen": -2.8889822959899902, "logits/rejected": -2.384333610534668, "logps/chosen": -80.68696594238281, "logps/rejected": -868.8751831054688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3607783615589142, "rewards/margins": 7.953089237213135, "rewards/rejected": -8.313867568969727, "step": 63410 }, { "epoch": 0.76, "learning_rate": 8.325888192876614e-07, "logits/chosen": -2.892204761505127, "logits/rejected": -2.2492330074310303, "logps/chosen": -121.3906021118164, "logps/rejected": -929.4593505859375, "loss": 0.1074, "rewards/accuracies": 1.0, "rewards/chosen": -0.7228930592536926, "rewards/margins": 8.173296928405762, "rewards/rejected": -8.896190643310547, "step": 63420 }, { "epoch": 0.76, "learning_rate": 8.318106288478267e-07, "logits/chosen": -2.9269473552703857, "logits/rejected": -2.272871494293213, "logps/chosen": -128.23435974121094, "logps/rejected": -956.455078125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7700355648994446, "rewards/margins": 8.397200584411621, "rewards/rejected": -9.167236328125, "step": 63430 }, { "epoch": 0.76, "learning_rate": 8.31032729669223e-07, "logits/chosen": -2.8895277976989746, "logits/rejected": -2.282698154449463, "logps/chosen": -104.41899108886719, "logps/rejected": -941.8077392578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5891507267951965, "rewards/margins": 8.433808326721191, "rewards/rejected": -9.022958755493164, "step": 63440 }, { "epoch": 0.76, "learning_rate": 8.302551218876695e-07, "logits/chosen": -2.8930184841156006, "logits/rejected": -2.3950061798095703, "logps/chosen": -92.31780242919922, "logps/rejected": -960.8082275390625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.4529293477535248, "rewards/margins": 8.762540817260742, "rewards/rejected": -9.215470314025879, "step": 63450 }, { "epoch": 0.76, "learning_rate": 8.294778056389341e-07, "logits/chosen": -2.9132769107818604, "logits/rejected": -2.1112194061279297, "logps/chosen": -140.0169219970703, "logps/rejected": -937.09228515625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8939335942268372, "rewards/margins": 8.08460807800293, "rewards/rejected": -8.978543281555176, "step": 63460 }, { "epoch": 0.76, "learning_rate": 8.287007810587349e-07, "logits/chosen": -2.886659622192383, "logits/rejected": -2.3629655838012695, "logps/chosen": -83.74935150146484, "logps/rejected": -847.5973510742188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4398830533027649, "rewards/margins": 7.660919189453125, "rewards/rejected": -8.100801467895508, "step": 63470 }, { "epoch": 0.76, "learning_rate": 8.279240482827369e-07, "logits/chosen": -2.883436441421509, "logits/rejected": -2.519136905670166, "logps/chosen": -96.79446411132812, "logps/rejected": -792.447509765625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5627995729446411, "rewards/margins": 6.972334384918213, "rewards/rejected": -7.535134315490723, "step": 63480 }, { "epoch": 0.76, "learning_rate": 8.271476074465551e-07, "logits/chosen": -2.9040818214416504, "logits/rejected": -2.2919702529907227, "logps/chosen": -124.72640228271484, "logps/rejected": -995.1634521484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6784171462059021, "rewards/margins": 8.865177154541016, "rewards/rejected": -9.543594360351562, "step": 63490 }, { "epoch": 0.76, "learning_rate": 8.263714586857557e-07, "logits/chosen": -2.8525049686431885, "logits/rejected": -2.2308242321014404, "logps/chosen": -113.73404693603516, "logps/rejected": -1064.4959716796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.6314960718154907, "rewards/margins": 9.61483383178711, "rewards/rejected": -10.246330261230469, "step": 63500 }, { "epoch": 0.76, "learning_rate": 8.255956021358513e-07, "logits/chosen": -2.8625669479370117, "logits/rejected": -2.1588761806488037, "logps/chosen": -144.0150146484375, "logps/rejected": -1029.2547607421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8586551547050476, "rewards/margins": 9.039074897766113, "rewards/rejected": -9.89772891998291, "step": 63510 }, { "epoch": 0.76, "learning_rate": 8.248200379323051e-07, "logits/chosen": -2.883025646209717, "logits/rejected": -2.3901071548461914, "logps/chosen": -109.6110610961914, "logps/rejected": -888.7340087890625, "loss": 0.0878, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6435670256614685, "rewards/margins": 7.858319282531738, "rewards/rejected": -8.501886367797852, "step": 63520 }, { "epoch": 0.76, "learning_rate": 8.240447662105269e-07, "logits/chosen": -2.8755080699920654, "logits/rejected": -2.171776533126831, "logps/chosen": -124.41573333740234, "logps/rejected": -959.4822387695312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7410039305686951, "rewards/margins": 8.444618225097656, "rewards/rejected": -9.185623168945312, "step": 63530 }, { "epoch": 0.76, "learning_rate": 8.232697871058779e-07, "logits/chosen": -2.8651061058044434, "logits/rejected": -2.231640338897705, "logps/chosen": -106.6843032836914, "logps/rejected": -1006.9615478515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5495225787162781, "rewards/margins": 9.118563652038574, "rewards/rejected": -9.668085098266602, "step": 63540 }, { "epoch": 0.76, "learning_rate": 8.224951007536669e-07, "logits/chosen": -2.884148359298706, "logits/rejected": -2.3526906967163086, "logps/chosen": -125.87384033203125, "logps/rejected": -885.2706298828125, "loss": 0.0987, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7505306005477905, "rewards/margins": 7.707191467285156, "rewards/rejected": -8.457721710205078, "step": 63550 }, { "epoch": 0.76, "learning_rate": 8.217207072891522e-07, "logits/chosen": -2.917800188064575, "logits/rejected": -2.136168956756592, "logps/chosen": -129.60720825195312, "logps/rejected": -1106.3260498046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.789212167263031, "rewards/margins": 9.860394477844238, "rewards/rejected": -10.649606704711914, "step": 63560 }, { "epoch": 0.76, "learning_rate": 8.209466068475411e-07, "logits/chosen": -2.8926734924316406, "logits/rejected": -2.366779088973999, "logps/chosen": -106.25909423828125, "logps/rejected": -979.41259765625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5806751251220703, "rewards/margins": 8.808635711669922, "rewards/rejected": -9.389310836791992, "step": 63570 }, { "epoch": 0.76, "learning_rate": 8.201727995639888e-07, "logits/chosen": -2.904655694961548, "logits/rejected": -2.4116275310516357, "logps/chosen": -108.3768081665039, "logps/rejected": -902.9241943359375, "loss": 0.0459, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6084831357002258, "rewards/margins": 8.027223587036133, "rewards/rejected": -8.635705947875977, "step": 63580 }, { "epoch": 0.76, "learning_rate": 8.193992855736011e-07, "logits/chosen": -2.88266658782959, "logits/rejected": -2.3904998302459717, "logps/chosen": -97.0669937133789, "logps/rejected": -908.6796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5189794301986694, "rewards/margins": 8.173663139343262, "rewards/rejected": -8.692644119262695, "step": 63590 }, { "epoch": 0.76, "learning_rate": 8.186260650114297e-07, "logits/chosen": -2.8469038009643555, "logits/rejected": -2.1614291667938232, "logps/chosen": -126.573486328125, "logps/rejected": -927.47021484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7455940246582031, "rewards/margins": 8.13855266571045, "rewards/rejected": -8.884146690368652, "step": 63600 }, { "epoch": 0.76, "learning_rate": 8.178531380124774e-07, "logits/chosen": -2.8578100204467773, "logits/rejected": -2.486663818359375, "logps/chosen": -75.64910125732422, "logps/rejected": -842.0612182617188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3557075262069702, "rewards/margins": 7.688066005706787, "rewards/rejected": -8.04377269744873, "step": 63610 }, { "epoch": 0.76, "learning_rate": 8.170805047116953e-07, "logits/chosen": -2.8586931228637695, "logits/rejected": -2.453784704208374, "logps/chosen": -81.87492370605469, "logps/rejected": -846.1139526367188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3818624019622803, "rewards/margins": 7.704017639160156, "rewards/rejected": -8.085880279541016, "step": 63620 }, { "epoch": 0.76, "learning_rate": 8.163081652439831e-07, "logits/chosen": -2.927093029022217, "logits/rejected": -2.526876211166382, "logps/chosen": -75.48998260498047, "logps/rejected": -867.6813354492188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3494751453399658, "rewards/margins": 7.957432746887207, "rewards/rejected": -8.306905746459961, "step": 63630 }, { "epoch": 0.76, "learning_rate": 8.155361197441889e-07, "logits/chosen": -2.9117729663848877, "logits/rejected": -2.451219081878662, "logps/chosen": -96.78941345214844, "logps/rejected": -880.4835205078125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.48557528853416443, "rewards/margins": 7.933101654052734, "rewards/rejected": -8.41867733001709, "step": 63640 }, { "epoch": 0.76, "learning_rate": 8.147643683471099e-07, "logits/chosen": -2.898838758468628, "logits/rejected": -2.352724075317383, "logps/chosen": -92.9031753540039, "logps/rejected": -955.9166259765625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.517828643321991, "rewards/margins": 8.658506393432617, "rewards/rejected": -9.176335334777832, "step": 63650 }, { "epoch": 0.76, "learning_rate": 8.139929111874922e-07, "logits/chosen": -2.911872625350952, "logits/rejected": -2.4033913612365723, "logps/chosen": -109.70368957519531, "logps/rejected": -896.9187622070312, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5544570684432983, "rewards/margins": 8.01375675201416, "rewards/rejected": -8.568214416503906, "step": 63660 }, { "epoch": 0.76, "learning_rate": 8.132217484000288e-07, "logits/chosen": -2.8994600772857666, "logits/rejected": -2.5360782146453857, "logps/chosen": -80.2435073852539, "logps/rejected": -836.7311401367188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4256533086299896, "rewards/margins": 7.5620927810668945, "rewards/rejected": -7.987745761871338, "step": 63670 }, { "epoch": 0.76, "learning_rate": 8.124508801193634e-07, "logits/chosen": -2.8911311626434326, "logits/rejected": -2.464578628540039, "logps/chosen": -99.54585266113281, "logps/rejected": -872.6408081054688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5734207630157471, "rewards/margins": 7.779489994049072, "rewards/rejected": -8.352910041809082, "step": 63680 }, { "epoch": 0.76, "learning_rate": 8.116803064800874e-07, "logits/chosen": -2.8932197093963623, "logits/rejected": -2.2683191299438477, "logps/chosen": -97.18000793457031, "logps/rejected": -980.7872924804688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4881739020347595, "rewards/margins": 8.922794342041016, "rewards/rejected": -9.410966873168945, "step": 63690 }, { "epoch": 0.76, "learning_rate": 8.109100276167406e-07, "logits/chosen": -2.8960695266723633, "logits/rejected": -2.5230872631073, "logps/chosen": -83.9857177734375, "logps/rejected": -906.9775390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4474005699157715, "rewards/margins": 8.246637344360352, "rewards/rejected": -8.694037437438965, "step": 63700 }, { "epoch": 0.76, "learning_rate": 8.101400436638119e-07, "logits/chosen": -2.903284788131714, "logits/rejected": -2.2621161937713623, "logps/chosen": -143.2559814453125, "logps/rejected": -1031.237548828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9039134979248047, "rewards/margins": 8.992659568786621, "rewards/rejected": -9.896574020385742, "step": 63710 }, { "epoch": 0.76, "learning_rate": 8.093703547557389e-07, "logits/chosen": -2.8754382133483887, "logits/rejected": -2.5974693298339844, "logps/chosen": -73.04838562011719, "logps/rejected": -806.423095703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3361634910106659, "rewards/margins": 7.363029479980469, "rewards/rejected": -7.699193477630615, "step": 63720 }, { "epoch": 0.76, "learning_rate": 8.086009610269058e-07, "logits/chosen": -2.9169909954071045, "logits/rejected": -2.5814480781555176, "logps/chosen": -65.82063293457031, "logps/rejected": -819.1312255859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.26584240794181824, "rewards/margins": 7.556153774261475, "rewards/rejected": -7.821995735168457, "step": 63730 }, { "epoch": 0.76, "learning_rate": 8.078318626116469e-07, "logits/chosen": -2.879298448562622, "logits/rejected": -2.2698073387145996, "logps/chosen": -101.37809753417969, "logps/rejected": -961.4642333984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5146216154098511, "rewards/margins": 8.686812400817871, "rewards/rejected": -9.201435089111328, "step": 63740 }, { "epoch": 0.76, "learning_rate": 8.070630596442452e-07, "logits/chosen": -2.8779232501983643, "logits/rejected": -2.057922840118408, "logps/chosen": -119.70619201660156, "logps/rejected": -990.2103271484375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6894051432609558, "rewards/margins": 8.824758529663086, "rewards/rejected": -9.514163970947266, "step": 63750 }, { "epoch": 0.76, "learning_rate": 8.062945522589314e-07, "logits/chosen": -2.868220567703247, "logits/rejected": -2.1049013137817383, "logps/chosen": -159.13877868652344, "logps/rejected": -961.5359497070312, "loss": 0.0719, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0751092433929443, "rewards/margins": 8.133365631103516, "rewards/rejected": -9.208475112915039, "step": 63760 }, { "epoch": 0.76, "learning_rate": 8.055263405898848e-07, "logits/chosen": -2.928677797317505, "logits/rejected": -2.35154390335083, "logps/chosen": -98.93528747558594, "logps/rejected": -829.2897338867188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5198394656181335, "rewards/margins": 7.396380424499512, "rewards/rejected": -7.916220664978027, "step": 63770 }, { "epoch": 0.76, "learning_rate": 8.047584247712326e-07, "logits/chosen": -2.893812894821167, "logits/rejected": -2.4232864379882812, "logps/chosen": -131.70953369140625, "logps/rejected": -919.54931640625, "loss": 0.1439, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8640034794807434, "rewards/margins": 7.94437313079834, "rewards/rejected": -8.80837631225586, "step": 63780 }, { "epoch": 0.76, "learning_rate": 8.039908049370521e-07, "logits/chosen": -2.889470338821411, "logits/rejected": -1.932509183883667, "logps/chosen": -153.42306518554688, "logps/rejected": -1016.4322509765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9140990972518921, "rewards/margins": 8.844843864440918, "rewards/rejected": -9.758943557739258, "step": 63790 }, { "epoch": 0.76, "learning_rate": 8.032234812213658e-07, "logits/chosen": -2.883702039718628, "logits/rejected": -2.335960865020752, "logps/chosen": -104.5804214477539, "logps/rejected": -1006.2970581054688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5110810995101929, "rewards/margins": 9.14111614227295, "rewards/rejected": -9.65219783782959, "step": 63800 }, { "epoch": 0.76, "learning_rate": 8.024564537581473e-07, "logits/chosen": -2.926994800567627, "logits/rejected": -2.3838958740234375, "logps/chosen": -133.12814331054688, "logps/rejected": -922.7199096679688, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.8754609823226929, "rewards/margins": 7.966689109802246, "rewards/rejected": -8.84214973449707, "step": 63810 }, { "epoch": 0.76, "learning_rate": 8.01689722681317e-07, "logits/chosen": -2.918046712875366, "logits/rejected": -2.3700737953186035, "logps/chosen": -104.1769027709961, "logps/rejected": -864.6954956054688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5546287894248962, "rewards/margins": 7.70168399810791, "rewards/rejected": -8.256312370300293, "step": 63820 }, { "epoch": 0.76, "learning_rate": 8.009232881247441e-07, "logits/chosen": -2.853705883026123, "logits/rejected": -2.1949281692504883, "logps/chosen": -127.44148254394531, "logps/rejected": -954.5469970703125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6827548146247864, "rewards/margins": 8.477527618408203, "rewards/rejected": -9.160283088684082, "step": 63830 }, { "epoch": 0.76, "learning_rate": 8.001571502222463e-07, "logits/chosen": -2.818115472793579, "logits/rejected": -2.2757821083068848, "logps/chosen": -115.29502868652344, "logps/rejected": -975.1943359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6850717663764954, "rewards/margins": 8.665677070617676, "rewards/rejected": -9.350748062133789, "step": 63840 }, { "epoch": 0.76, "learning_rate": 7.993913091075889e-07, "logits/chosen": -2.8956234455108643, "logits/rejected": -2.362072467803955, "logps/chosen": -134.62278747558594, "logps/rejected": -958.9375, "loss": 0.0949, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8379920721054077, "rewards/margins": 8.357314109802246, "rewards/rejected": -9.195306777954102, "step": 63850 }, { "epoch": 0.76, "learning_rate": 7.986257649144866e-07, "logits/chosen": -2.8508849143981934, "logits/rejected": -2.4016666412353516, "logps/chosen": -97.01445007324219, "logps/rejected": -883.6842041015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4811824858188629, "rewards/margins": 7.976355075836182, "rewards/rejected": -8.457536697387695, "step": 63860 }, { "epoch": 0.76, "learning_rate": 7.978605177765997e-07, "logits/chosen": -2.866668701171875, "logits/rejected": -2.3719160556793213, "logps/chosen": -102.97843170166016, "logps/rejected": -913.9354248046875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.5939873456954956, "rewards/margins": 8.150935173034668, "rewards/rejected": -8.744922637939453, "step": 63870 }, { "epoch": 0.76, "learning_rate": 7.970955678275386e-07, "logits/chosen": -2.8705694675445557, "logits/rejected": -2.4058034420013428, "logps/chosen": -96.40480041503906, "logps/rejected": -926.7067260742188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4995444416999817, "rewards/margins": 8.384943008422852, "rewards/rejected": -8.884488105773926, "step": 63880 }, { "epoch": 0.76, "learning_rate": 7.963309152008619e-07, "logits/chosen": -2.930131673812866, "logits/rejected": -2.3974404335021973, "logps/chosen": -92.30670928955078, "logps/rejected": -984.4287109375, "loss": 0.0891, "rewards/accuracies": 1.0, "rewards/chosen": -0.442425400018692, "rewards/margins": 9.011341094970703, "rewards/rejected": -9.453766822814941, "step": 63890 }, { "epoch": 0.76, "learning_rate": 7.955665600300755e-07, "logits/chosen": -2.875018358230591, "logits/rejected": -2.5517032146453857, "logps/chosen": -79.80851745605469, "logps/rejected": -850.6287231445312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3762803375720978, "rewards/margins": 7.759622097015381, "rewards/rejected": -8.135902404785156, "step": 63900 }, { "epoch": 0.77, "learning_rate": 7.948025024486341e-07, "logits/chosen": -2.8587050437927246, "logits/rejected": -2.3391520977020264, "logps/chosen": -114.3650131225586, "logps/rejected": -911.3199462890625, "loss": 0.106, "rewards/accuracies": 1.0, "rewards/chosen": -0.6971551775932312, "rewards/margins": 8.023566246032715, "rewards/rejected": -8.720722198486328, "step": 63910 }, { "epoch": 0.77, "learning_rate": 7.940387425899396e-07, "logits/chosen": -2.8644051551818848, "logits/rejected": -2.3287055492401123, "logps/chosen": -112.0271224975586, "logps/rejected": -976.7269287109375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6290255784988403, "rewards/margins": 8.74825668334961, "rewards/rejected": -9.377281188964844, "step": 63920 }, { "epoch": 0.77, "learning_rate": 7.932752805873428e-07, "logits/chosen": -2.8777077198028564, "logits/rejected": -2.184128999710083, "logps/chosen": -116.6380844116211, "logps/rejected": -952.2678833007812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6833280324935913, "rewards/margins": 8.454803466796875, "rewards/rejected": -9.138132095336914, "step": 63930 }, { "epoch": 0.77, "learning_rate": 7.925121165741414e-07, "logits/chosen": -2.868069648742676, "logits/rejected": -2.369205951690674, "logps/chosen": -92.41344451904297, "logps/rejected": -959.2532958984375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.47010940313339233, "rewards/margins": 8.731858253479004, "rewards/rejected": -9.201967239379883, "step": 63940 }, { "epoch": 0.77, "learning_rate": 7.917492506835825e-07, "logits/chosen": -2.8885817527770996, "logits/rejected": -2.370954990386963, "logps/chosen": -102.0121078491211, "logps/rejected": -854.1492309570312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5199461579322815, "rewards/margins": 7.643750190734863, "rewards/rejected": -8.1636962890625, "step": 63950 }, { "epoch": 0.77, "learning_rate": 7.909866830488597e-07, "logits/chosen": -2.8819377422332764, "logits/rejected": -2.230591058731079, "logps/chosen": -154.78115844726562, "logps/rejected": -1002.8440551757812, "loss": 0.1279, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0030847787857056, "rewards/margins": 8.605730056762695, "rewards/rejected": -9.608816146850586, "step": 63960 }, { "epoch": 0.77, "learning_rate": 7.902244138031157e-07, "logits/chosen": -2.8529582023620605, "logits/rejected": -2.222620964050293, "logps/chosen": -140.99508666992188, "logps/rejected": -946.1427612304688, "loss": 0.1168, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8988285064697266, "rewards/margins": 8.178167343139648, "rewards/rejected": -9.076995849609375, "step": 63970 }, { "epoch": 0.77, "learning_rate": 7.894624430794409e-07, "logits/chosen": -2.9037203788757324, "logits/rejected": -2.3541975021362305, "logps/chosen": -104.040771484375, "logps/rejected": -997.7913208007812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5372999906539917, "rewards/margins": 9.056720733642578, "rewards/rejected": -9.594019889831543, "step": 63980 }, { "epoch": 0.77, "learning_rate": 7.887007710108721e-07, "logits/chosen": -2.8797595500946045, "logits/rejected": -2.2375826835632324, "logps/chosen": -122.37480163574219, "logps/rejected": -992.8526611328125, "loss": 0.1582, "rewards/accuracies": 1.0, "rewards/chosen": -0.7435980439186096, "rewards/margins": 8.796223640441895, "rewards/rejected": -9.539820671081543, "step": 63990 }, { "epoch": 0.77, "learning_rate": 7.879393977303956e-07, "logits/chosen": -2.8506839275360107, "logits/rejected": -2.394369125366211, "logps/chosen": -95.0003662109375, "logps/rejected": -921.0072021484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5049929618835449, "rewards/margins": 8.305845260620117, "rewards/rejected": -8.81083869934082, "step": 64000 }, { "epoch": 0.77, "learning_rate": 7.871783233709452e-07, "logits/chosen": -2.898568630218506, "logits/rejected": -2.1527457237243652, "logps/chosen": -132.7812957763672, "logps/rejected": -984.2545776367188, "loss": 0.139, "rewards/accuracies": 1.0, "rewards/chosen": -0.7900813221931458, "rewards/margins": 8.637434959411621, "rewards/rejected": -9.427515983581543, "step": 64010 }, { "epoch": 0.77, "learning_rate": 7.864175480654024e-07, "logits/chosen": -2.820496082305908, "logits/rejected": -1.9700568914413452, "logps/chosen": -135.2368621826172, "logps/rejected": -1161.692626953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.758099377155304, "rewards/margins": 10.43366527557373, "rewards/rejected": -11.191763877868652, "step": 64020 }, { "epoch": 0.77, "learning_rate": 7.856570719465967e-07, "logits/chosen": -2.850254774093628, "logits/rejected": -2.1861603260040283, "logps/chosen": -127.06758880615234, "logps/rejected": -945.5123901367188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7413758635520935, "rewards/margins": 8.312578201293945, "rewards/rejected": -9.053954124450684, "step": 64030 }, { "epoch": 0.77, "learning_rate": 7.848968951473054e-07, "logits/chosen": -2.891916275024414, "logits/rejected": -2.3183538913726807, "logps/chosen": -111.06536865234375, "logps/rejected": -922.2384643554688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6366073489189148, "rewards/margins": 8.200494766235352, "rewards/rejected": -8.837100982666016, "step": 64040 }, { "epoch": 0.77, "learning_rate": 7.841370178002519e-07, "logits/chosen": -2.867598295211792, "logits/rejected": -2.473968029022217, "logps/chosen": -87.8703384399414, "logps/rejected": -878.9879150390625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.3978356420993805, "rewards/margins": 8.008219718933105, "rewards/rejected": -8.40605640411377, "step": 64050 }, { "epoch": 0.77, "learning_rate": 7.833774400381095e-07, "logits/chosen": -2.882878065109253, "logits/rejected": -2.4469501972198486, "logps/chosen": -80.49642181396484, "logps/rejected": -873.9775390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3830607533454895, "rewards/margins": 7.9756340980529785, "rewards/rejected": -8.358694076538086, "step": 64060 }, { "epoch": 0.77, "learning_rate": 7.826181619934986e-07, "logits/chosen": -2.841634750366211, "logits/rejected": -1.8774042129516602, "logps/chosen": -134.48475646972656, "logps/rejected": -1160.8131103515625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7613822817802429, "rewards/margins": 10.430349349975586, "rewards/rejected": -11.191732406616211, "step": 64070 }, { "epoch": 0.77, "learning_rate": 7.818591837989864e-07, "logits/chosen": -2.8747799396514893, "logits/rejected": -2.2157464027404785, "logps/chosen": -116.77164459228516, "logps/rejected": -1023.9826049804688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6716309189796448, "rewards/margins": 9.161062240600586, "rewards/rejected": -9.832693099975586, "step": 64080 }, { "epoch": 0.77, "learning_rate": 7.811005055870891e-07, "logits/chosen": -2.916651964187622, "logits/rejected": -2.2738518714904785, "logps/chosen": -104.33846282958984, "logps/rejected": -965.3873291015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5658776760101318, "rewards/margins": 8.695155143737793, "rewards/rejected": -9.26103401184082, "step": 64090 }, { "epoch": 0.77, "learning_rate": 7.803421274902692e-07, "logits/chosen": -2.886502742767334, "logits/rejected": -2.1244969367980957, "logps/chosen": -121.90254974365234, "logps/rejected": -1019.9051513671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7170194387435913, "rewards/margins": 9.072890281677246, "rewards/rejected": -9.789911270141602, "step": 64100 }, { "epoch": 0.77, "learning_rate": 7.795840496409388e-07, "logits/chosen": -2.9261655807495117, "logits/rejected": -2.4502220153808594, "logps/chosen": -111.3231430053711, "logps/rejected": -814.6978759765625, "loss": 0.095, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6853601932525635, "rewards/margins": 7.080294609069824, "rewards/rejected": -7.765655517578125, "step": 64110 }, { "epoch": 0.77, "learning_rate": 7.78826272171454e-07, "logits/chosen": -2.8828539848327637, "logits/rejected": -2.293578624725342, "logps/chosen": -113.48416900634766, "logps/rejected": -972.45703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6546975374221802, "rewards/margins": 8.66847038269043, "rewards/rejected": -9.32316780090332, "step": 64120 }, { "epoch": 0.77, "learning_rate": 7.780687952141219e-07, "logits/chosen": -2.8777503967285156, "logits/rejected": -2.406592845916748, "logps/chosen": -98.90187072753906, "logps/rejected": -938.8580932617188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5494096875190735, "rewards/margins": 8.447860717773438, "rewards/rejected": -8.997270584106445, "step": 64130 }, { "epoch": 0.77, "learning_rate": 7.773116189011956e-07, "logits/chosen": -2.8012874126434326, "logits/rejected": -2.185354232788086, "logps/chosen": -102.58799743652344, "logps/rejected": -936.0892333984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5352098941802979, "rewards/margins": 8.427510261535645, "rewards/rejected": -8.962719917297363, "step": 64140 }, { "epoch": 0.77, "learning_rate": 7.765547433648762e-07, "logits/chosen": -2.8521206378936768, "logits/rejected": -1.8439620733261108, "logps/chosen": -152.59393310546875, "logps/rejected": -1143.5194091796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.871616005897522, "rewards/margins": 10.149660110473633, "rewards/rejected": -11.021276473999023, "step": 64150 }, { "epoch": 0.77, "learning_rate": 7.75798168737312e-07, "logits/chosen": -2.9119961261749268, "logits/rejected": -2.3980398178100586, "logps/chosen": -103.302734375, "logps/rejected": -908.3858642578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5801141262054443, "rewards/margins": 8.106461524963379, "rewards/rejected": -8.686574935913086, "step": 64160 }, { "epoch": 0.77, "learning_rate": 7.750418951505987e-07, "logits/chosen": -2.82727313041687, "logits/rejected": -2.2928342819213867, "logps/chosen": -98.07206726074219, "logps/rejected": -944.3805541992188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.516240119934082, "rewards/margins": 8.542743682861328, "rewards/rejected": -9.058984756469727, "step": 64170 }, { "epoch": 0.77, "learning_rate": 7.742859227367802e-07, "logits/chosen": -2.898005723953247, "logits/rejected": -2.500340700149536, "logps/chosen": -139.90867614746094, "logps/rejected": -952.0982666015625, "loss": 0.0923, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9219378232955933, "rewards/margins": 8.220706939697266, "rewards/rejected": -9.142644882202148, "step": 64180 }, { "epoch": 0.77, "learning_rate": 7.735302516278462e-07, "logits/chosen": -2.8241240978240967, "logits/rejected": -2.38472318649292, "logps/chosen": -94.2488784790039, "logps/rejected": -890.76953125, "loss": 0.0726, "rewards/accuracies": 1.0, "rewards/chosen": -0.4938432276248932, "rewards/margins": 8.03147029876709, "rewards/rejected": -8.525314331054688, "step": 64190 }, { "epoch": 0.77, "learning_rate": 7.727748819557349e-07, "logits/chosen": -2.8895537853240967, "logits/rejected": -2.4287972450256348, "logps/chosen": -87.72435760498047, "logps/rejected": -885.6134643554688, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.4448358416557312, "rewards/margins": 8.027826309204102, "rewards/rejected": -8.472661018371582, "step": 64200 }, { "epoch": 0.77, "learning_rate": 7.720198138523322e-07, "logits/chosen": -2.894646406173706, "logits/rejected": -2.3829901218414307, "logps/chosen": -118.35438537597656, "logps/rejected": -953.2454833984375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.6545004844665527, "rewards/margins": 8.481245040893555, "rewards/rejected": -9.135744094848633, "step": 64210 }, { "epoch": 0.77, "learning_rate": 7.712650474494707e-07, "logits/chosen": -2.926865816116333, "logits/rejected": -2.47247576713562, "logps/chosen": -91.27005767822266, "logps/rejected": -874.5230712890625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.4906507134437561, "rewards/margins": 7.882427215576172, "rewards/rejected": -8.373077392578125, "step": 64220 }, { "epoch": 0.77, "learning_rate": 7.705105828789305e-07, "logits/chosen": -2.9156100749969482, "logits/rejected": -2.3667590618133545, "logps/chosen": -90.80989074707031, "logps/rejected": -922.6613159179688, "loss": 0.1024, "rewards/accuracies": 1.0, "rewards/chosen": -0.4616682529449463, "rewards/margins": 8.38023853302002, "rewards/rejected": -8.84190559387207, "step": 64230 }, { "epoch": 0.77, "learning_rate": 7.6975642027244e-07, "logits/chosen": -2.896862506866455, "logits/rejected": -2.383358955383301, "logps/chosen": -106.2735824584961, "logps/rejected": -1019.0486450195312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5679019689559937, "rewards/margins": 9.226776123046875, "rewards/rejected": -9.794678688049316, "step": 64240 }, { "epoch": 0.77, "learning_rate": 7.690025597616721e-07, "logits/chosen": -2.8481063842773438, "logits/rejected": -2.2661240100860596, "logps/chosen": -92.67642974853516, "logps/rejected": -951.4345703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.45801568031311035, "rewards/margins": 8.670148849487305, "rewards/rejected": -9.128164291381836, "step": 64250 }, { "epoch": 0.77, "learning_rate": 7.682490014782498e-07, "logits/chosen": -2.864658832550049, "logits/rejected": -2.323047637939453, "logps/chosen": -92.6794662475586, "logps/rejected": -931.68701171875, "loss": 0.1302, "rewards/accuracies": 1.0, "rewards/chosen": -0.4918311536312103, "rewards/margins": 8.434640884399414, "rewards/rejected": -8.926472663879395, "step": 64260 }, { "epoch": 0.77, "learning_rate": 7.674957455537418e-07, "logits/chosen": -2.8935890197753906, "logits/rejected": -2.2605557441711426, "logps/chosen": -104.5002670288086, "logps/rejected": -966.197265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5453861355781555, "rewards/margins": 8.729516983032227, "rewards/rejected": -9.27490234375, "step": 64270 }, { "epoch": 0.77, "learning_rate": 7.667427921196649e-07, "logits/chosen": -2.88792085647583, "logits/rejected": -2.433912754058838, "logps/chosen": -92.65565490722656, "logps/rejected": -1009.4635009765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.45951956510543823, "rewards/margins": 9.246996879577637, "rewards/rejected": -9.706515312194824, "step": 64280 }, { "epoch": 0.77, "learning_rate": 7.659901413074827e-07, "logits/chosen": -2.8695483207702637, "logits/rejected": -2.512422800064087, "logps/chosen": -79.6757583618164, "logps/rejected": -897.4528198242188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.36906975507736206, "rewards/margins": 8.233209609985352, "rewards/rejected": -8.602279663085938, "step": 64290 }, { "epoch": 0.77, "learning_rate": 7.652377932486057e-07, "logits/chosen": -2.8639602661132812, "logits/rejected": -2.2841084003448486, "logps/chosen": -109.99980163574219, "logps/rejected": -968.3519287109375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6572784185409546, "rewards/margins": 8.635865211486816, "rewards/rejected": -9.293142318725586, "step": 64300 }, { "epoch": 0.77, "learning_rate": 7.644857480743928e-07, "logits/chosen": -2.819457769393921, "logits/rejected": -2.1395392417907715, "logps/chosen": -116.53787994384766, "logps/rejected": -990.9122314453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6274291276931763, "rewards/margins": 8.872289657592773, "rewards/rejected": -9.49971866607666, "step": 64310 }, { "epoch": 0.77, "learning_rate": 7.637340059161474e-07, "logits/chosen": -2.9014828205108643, "logits/rejected": -2.448742628097534, "logps/chosen": -98.93204498291016, "logps/rejected": -883.7777099609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5089682340621948, "rewards/margins": 7.937430381774902, "rewards/rejected": -8.446398735046387, "step": 64320 }, { "epoch": 0.77, "learning_rate": 7.629825669051222e-07, "logits/chosen": -2.90354585647583, "logits/rejected": -2.2294578552246094, "logps/chosen": -111.97712707519531, "logps/rejected": -971.8294677734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6365314722061157, "rewards/margins": 8.69063663482666, "rewards/rejected": -9.327168464660645, "step": 64330 }, { "epoch": 0.77, "learning_rate": 7.622314311725154e-07, "logits/chosen": -2.875715970993042, "logits/rejected": -2.127267599105835, "logps/chosen": -128.34080505371094, "logps/rejected": -973.2023315429688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.772868812084198, "rewards/margins": 8.544743537902832, "rewards/rejected": -9.317611694335938, "step": 64340 }, { "epoch": 0.77, "learning_rate": 7.614805988494753e-07, "logits/chosen": -2.8029685020446777, "logits/rejected": -2.237326145172119, "logps/chosen": -122.26585388183594, "logps/rejected": -949.17919921875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7321223020553589, "rewards/margins": 8.35774040222168, "rewards/rejected": -9.089862823486328, "step": 64350 }, { "epoch": 0.77, "learning_rate": 7.60730070067095e-07, "logits/chosen": -2.8609893321990967, "logits/rejected": -2.1755940914154053, "logps/chosen": -125.8594741821289, "logps/rejected": -1013.4693603515625, "loss": 0.0965, "rewards/accuracies": 1.0, "rewards/chosen": -0.7297497987747192, "rewards/margins": 9.005735397338867, "rewards/rejected": -9.73548412322998, "step": 64360 }, { "epoch": 0.77, "learning_rate": 7.59979844956413e-07, "logits/chosen": -2.9092507362365723, "logits/rejected": -2.3677096366882324, "logps/chosen": -107.4112548828125, "logps/rejected": -958.8615112304688, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.5791958570480347, "rewards/margins": 8.623574256896973, "rewards/rejected": -9.20276927947998, "step": 64370 }, { "epoch": 0.77, "learning_rate": 7.592299236484174e-07, "logits/chosen": -2.8920114040374756, "logits/rejected": -2.4041450023651123, "logps/chosen": -104.4903335571289, "logps/rejected": -911.7637939453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5659821629524231, "rewards/margins": 8.166671752929688, "rewards/rejected": -8.732653617858887, "step": 64380 }, { "epoch": 0.77, "learning_rate": 7.58480306274042e-07, "logits/chosen": -2.854336738586426, "logits/rejected": -2.099769353866577, "logps/chosen": -120.73344421386719, "logps/rejected": -998.9528198242188, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6005946397781372, "rewards/margins": 8.985758781433105, "rewards/rejected": -9.586353302001953, "step": 64390 }, { "epoch": 0.77, "learning_rate": 7.577309929641688e-07, "logits/chosen": -2.872098207473755, "logits/rejected": -2.2831084728240967, "logps/chosen": -95.85221862792969, "logps/rejected": -939.08154296875, "loss": 0.1029, "rewards/accuracies": 1.0, "rewards/chosen": -0.4794654846191406, "rewards/margins": 8.525250434875488, "rewards/rejected": -9.004715919494629, "step": 64400 }, { "epoch": 0.77, "learning_rate": 7.569819838496247e-07, "logits/chosen": -2.916983127593994, "logits/rejected": -2.4050092697143555, "logps/chosen": -90.15177917480469, "logps/rejected": -846.7981567382812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.453056275844574, "rewards/margins": 7.618109703063965, "rewards/rejected": -8.071166038513184, "step": 64410 }, { "epoch": 0.77, "learning_rate": 7.562332790611856e-07, "logits/chosen": -2.885606050491333, "logits/rejected": -2.1356377601623535, "logps/chosen": -110.34156799316406, "logps/rejected": -984.5988159179688, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": -0.6247719526290894, "rewards/margins": 8.825095176696777, "rewards/rejected": -9.449865341186523, "step": 64420 }, { "epoch": 0.77, "learning_rate": 7.554848787295737e-07, "logits/chosen": -2.909010410308838, "logits/rejected": -2.326787233352661, "logps/chosen": -139.89651489257812, "logps/rejected": -979.1322021484375, "loss": 0.1348, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8675370216369629, "rewards/margins": 8.522172927856445, "rewards/rejected": -9.38970947265625, "step": 64430 }, { "epoch": 0.77, "learning_rate": 7.547367829854557e-07, "logits/chosen": -2.9216501712799072, "logits/rejected": -2.6592729091644287, "logps/chosen": -112.33866119384766, "logps/rejected": -752.3470458984375, "loss": 0.1625, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7156018018722534, "rewards/margins": 6.430961608886719, "rewards/rejected": -7.146563529968262, "step": 64440 }, { "epoch": 0.77, "learning_rate": 7.539889919594484e-07, "logits/chosen": -2.865506649017334, "logits/rejected": -2.146360158920288, "logps/chosen": -123.64286804199219, "logps/rejected": -1055.6865234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6582121849060059, "rewards/margins": 9.5032377243042, "rewards/rejected": -10.161450386047363, "step": 64450 }, { "epoch": 0.77, "learning_rate": 7.532415057821138e-07, "logits/chosen": -2.8446412086486816, "logits/rejected": -2.5327727794647217, "logps/chosen": -77.48793029785156, "logps/rejected": -836.8981323242188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3744068443775177, "rewards/margins": 7.60573673248291, "rewards/rejected": -7.9801435470581055, "step": 64460 }, { "epoch": 0.77, "learning_rate": 7.524943245839606e-07, "logits/chosen": -2.8638787269592285, "logits/rejected": -2.4440793991088867, "logps/chosen": -103.46099853515625, "logps/rejected": -947.3822021484375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5716298818588257, "rewards/margins": 8.491998672485352, "rewards/rejected": -9.063629150390625, "step": 64470 }, { "epoch": 0.77, "learning_rate": 7.517474484954451e-07, "logits/chosen": -2.858508586883545, "logits/rejected": -2.495814561843872, "logps/chosen": -80.8317642211914, "logps/rejected": -904.1989135742188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4338700771331787, "rewards/margins": 8.242754936218262, "rewards/rejected": -8.676624298095703, "step": 64480 }, { "epoch": 0.77, "learning_rate": 7.510008776469694e-07, "logits/chosen": -2.9255659580230713, "logits/rejected": -2.403712511062622, "logps/chosen": -106.33778381347656, "logps/rejected": -960.9456176757812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6309507489204407, "rewards/margins": 8.589883804321289, "rewards/rejected": -9.220834732055664, "step": 64490 }, { "epoch": 0.77, "learning_rate": 7.502546121688836e-07, "logits/chosen": -2.91312313079834, "logits/rejected": -2.3510677814483643, "logps/chosen": -101.22648620605469, "logps/rejected": -883.38037109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5619779229164124, "rewards/margins": 7.882796287536621, "rewards/rejected": -8.44477367401123, "step": 64500 }, { "epoch": 0.77, "learning_rate": 7.495086521914821e-07, "logits/chosen": -2.870389938354492, "logits/rejected": -2.492051601409912, "logps/chosen": -91.2424087524414, "logps/rejected": -840.7777099609375, "loss": 0.0961, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5262559652328491, "rewards/margins": 7.499185085296631, "rewards/rejected": -8.025442123413086, "step": 64510 }, { "epoch": 0.77, "learning_rate": 7.487629978450081e-07, "logits/chosen": -2.9217944145202637, "logits/rejected": -2.482984781265259, "logps/chosen": -96.5851821899414, "logps/rejected": -939.2586059570312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5180798172950745, "rewards/margins": 8.489545822143555, "rewards/rejected": -9.007625579833984, "step": 64520 }, { "epoch": 0.77, "learning_rate": 7.480176492596508e-07, "logits/chosen": -2.839714765548706, "logits/rejected": -2.306688070297241, "logps/chosen": -109.1355972290039, "logps/rejected": -1001.9923095703125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.5992649793624878, "rewards/margins": 9.00604248046875, "rewards/rejected": -9.605306625366211, "step": 64530 }, { "epoch": 0.77, "learning_rate": 7.472726065655459e-07, "logits/chosen": -2.8447842597961426, "logits/rejected": -2.2658607959747314, "logps/chosen": -162.4919891357422, "logps/rejected": -816.372314453125, "loss": 0.2089, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1121876239776611, "rewards/margins": 6.651291847229004, "rewards/rejected": -7.763479709625244, "step": 64540 }, { "epoch": 0.77, "learning_rate": 7.465278698927761e-07, "logits/chosen": -2.8962879180908203, "logits/rejected": -2.4403493404388428, "logps/chosen": -96.34648895263672, "logps/rejected": -880.23974609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5356329679489136, "rewards/margins": 7.874686241149902, "rewards/rejected": -8.410319328308105, "step": 64550 }, { "epoch": 0.77, "learning_rate": 7.457834393713709e-07, "logits/chosen": -2.8277649879455566, "logits/rejected": -2.4550201892852783, "logps/chosen": -100.6128158569336, "logps/rejected": -803.5455322265625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5959945321083069, "rewards/margins": 7.068556308746338, "rewards/rejected": -7.664549350738525, "step": 64560 }, { "epoch": 0.77, "learning_rate": 7.450393151313043e-07, "logits/chosen": -2.873626232147217, "logits/rejected": -2.3952624797821045, "logps/chosen": -106.83878326416016, "logps/rejected": -897.6657104492188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6008208990097046, "rewards/margins": 7.993418216705322, "rewards/rejected": -8.594239234924316, "step": 64570 }, { "epoch": 0.77, "learning_rate": 7.442954973024991e-07, "logits/chosen": -2.8714427947998047, "logits/rejected": -1.9961283206939697, "logps/chosen": -138.92481994628906, "logps/rejected": -1029.042724609375, "loss": 0.117, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7987674474716187, "rewards/margins": 9.085890769958496, "rewards/rejected": -9.884657859802246, "step": 64580 }, { "epoch": 0.77, "learning_rate": 7.435519860148236e-07, "logits/chosen": -2.8688747882843018, "logits/rejected": -2.113481044769287, "logps/chosen": -134.71380615234375, "logps/rejected": -1011.1295776367188, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8285614252090454, "rewards/margins": 8.865694999694824, "rewards/rejected": -9.694257736206055, "step": 64590 }, { "epoch": 0.77, "learning_rate": 7.428087813980931e-07, "logits/chosen": -2.8765523433685303, "logits/rejected": -2.3846442699432373, "logps/chosen": -98.66777038574219, "logps/rejected": -891.2098388671875, "loss": 0.1293, "rewards/accuracies": 1.0, "rewards/chosen": -0.5049866437911987, "rewards/margins": 8.026139259338379, "rewards/rejected": -8.531126022338867, "step": 64600 }, { "epoch": 0.77, "learning_rate": 7.42065883582069e-07, "logits/chosen": -2.8862690925598145, "logits/rejected": -2.3904294967651367, "logps/chosen": -90.9373779296875, "logps/rejected": -834.64208984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.45044589042663574, "rewards/margins": 7.512997627258301, "rewards/rejected": -7.963443756103516, "step": 64610 }, { "epoch": 0.77, "learning_rate": 7.413232926964592e-07, "logits/chosen": -2.8594448566436768, "logits/rejected": -2.3472533226013184, "logps/chosen": -109.15730285644531, "logps/rejected": -893.58935546875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.622674286365509, "rewards/margins": 7.940126895904541, "rewards/rejected": -8.562800407409668, "step": 64620 }, { "epoch": 0.77, "learning_rate": 7.405810088709189e-07, "logits/chosen": -2.8858401775360107, "logits/rejected": -2.4429385662078857, "logps/chosen": -100.50333404541016, "logps/rejected": -804.3829956054688, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.5681105852127075, "rewards/margins": 7.093106269836426, "rewards/rejected": -7.66121768951416, "step": 64630 }, { "epoch": 0.77, "learning_rate": 7.398390322350468e-07, "logits/chosen": -2.885603189468384, "logits/rejected": -2.4887208938598633, "logps/chosen": -91.7851333618164, "logps/rejected": -894.8212890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4809163510799408, "rewards/margins": 8.079511642456055, "rewards/rejected": -8.560429573059082, "step": 64640 }, { "epoch": 0.77, "learning_rate": 7.390973629183915e-07, "logits/chosen": -2.8899688720703125, "logits/rejected": -2.4728264808654785, "logps/chosen": -82.72399139404297, "logps/rejected": -786.9876098632812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4318099021911621, "rewards/margins": 7.0604376792907715, "rewards/rejected": -7.492247581481934, "step": 64650 }, { "epoch": 0.77, "learning_rate": 7.383560010504453e-07, "logits/chosen": -2.920725107192993, "logits/rejected": -2.6163206100463867, "logps/chosen": -120.47483825683594, "logps/rejected": -810.0625, "loss": 0.1958, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8016706705093384, "rewards/margins": 6.92968225479126, "rewards/rejected": -7.73135232925415, "step": 64660 }, { "epoch": 0.77, "learning_rate": 7.376149467606492e-07, "logits/chosen": -2.8503971099853516, "logits/rejected": -2.3410823345184326, "logps/chosen": -105.65525817871094, "logps/rejected": -900.3724365234375, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.5874770879745483, "rewards/margins": 8.035599708557129, "rewards/rejected": -8.623077392578125, "step": 64670 }, { "epoch": 0.77, "learning_rate": 7.368742001783882e-07, "logits/chosen": -2.9452433586120605, "logits/rejected": -2.663917303085327, "logps/chosen": -89.0570068359375, "logps/rejected": -745.1298217773438, "loss": 0.0436, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5054773092269897, "rewards/margins": 6.575619697570801, "rewards/rejected": -7.081097602844238, "step": 64680 }, { "epoch": 0.77, "learning_rate": 7.36133761432995e-07, "logits/chosen": -2.8722591400146484, "logits/rejected": -2.404783248901367, "logps/chosen": -109.29618835449219, "logps/rejected": -879.0789184570312, "loss": 0.0958, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6667930483818054, "rewards/margins": 7.75685977935791, "rewards/rejected": -8.423653602600098, "step": 64690 }, { "epoch": 0.77, "learning_rate": 7.35393630653749e-07, "logits/chosen": -2.8631958961486816, "logits/rejected": -2.293809413909912, "logps/chosen": -113.80949401855469, "logps/rejected": -995.0740966796875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6002198457717896, "rewards/margins": 8.955445289611816, "rewards/rejected": -9.555665969848633, "step": 64700 }, { "epoch": 0.77, "learning_rate": 7.346538079698731e-07, "logits/chosen": -2.869558811187744, "logits/rejected": -2.333604574203491, "logps/chosen": -102.5909423828125, "logps/rejected": -869.7550659179688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5434123873710632, "rewards/margins": 7.780430793762207, "rewards/rejected": -8.323843002319336, "step": 64710 }, { "epoch": 0.77, "learning_rate": 7.339142935105395e-07, "logits/chosen": -2.9289169311523438, "logits/rejected": -2.431821346282959, "logps/chosen": -97.2223892211914, "logps/rejected": -962.6642456054688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5220270156860352, "rewards/margins": 8.715651512145996, "rewards/rejected": -9.237678527832031, "step": 64720 }, { "epoch": 0.77, "learning_rate": 7.331750874048651e-07, "logits/chosen": -2.8844590187072754, "logits/rejected": -2.329204559326172, "logps/chosen": -119.01170349121094, "logps/rejected": -991.9212036132812, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6447717547416687, "rewards/margins": 8.88992977142334, "rewards/rejected": -9.53470230102539, "step": 64730 }, { "epoch": 0.77, "learning_rate": 7.324361897819135e-07, "logits/chosen": -2.903212785720825, "logits/rejected": -2.4314124584198, "logps/chosen": -97.26438903808594, "logps/rejected": -899.322265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5231078863143921, "rewards/margins": 8.098030090332031, "rewards/rejected": -8.621137619018555, "step": 64740 }, { "epoch": 0.78, "learning_rate": 7.316976007706936e-07, "logits/chosen": -2.8184115886688232, "logits/rejected": -2.18658709526062, "logps/chosen": -113.72853088378906, "logps/rejected": -999.6886596679688, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -0.698742687702179, "rewards/margins": 8.901944160461426, "rewards/rejected": -9.600686073303223, "step": 64750 }, { "epoch": 0.78, "learning_rate": 7.309593205001616e-07, "logits/chosen": -2.852388620376587, "logits/rejected": -2.3281710147857666, "logps/chosen": -102.61153411865234, "logps/rejected": -942.2344970703125, "loss": 0.0856, "rewards/accuracies": 1.0, "rewards/chosen": -0.5619033575057983, "rewards/margins": 8.475497245788574, "rewards/rejected": -9.037398338317871, "step": 64760 }, { "epoch": 0.78, "learning_rate": 7.302213490992189e-07, "logits/chosen": -2.8647725582122803, "logits/rejected": -2.260978937149048, "logps/chosen": -91.61351776123047, "logps/rejected": -950.6555786132812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4339486062526703, "rewards/margins": 8.696868896484375, "rewards/rejected": -9.130817413330078, "step": 64770 }, { "epoch": 0.78, "learning_rate": 7.294836866967134e-07, "logits/chosen": -2.875361919403076, "logits/rejected": -2.2588677406311035, "logps/chosen": -113.36041259765625, "logps/rejected": -965.4806518554688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6412220001220703, "rewards/margins": 8.615750312805176, "rewards/rejected": -9.25697135925293, "step": 64780 }, { "epoch": 0.78, "learning_rate": 7.287463334214386e-07, "logits/chosen": -2.8934831619262695, "logits/rejected": -2.486368417739868, "logps/chosen": -110.4867172241211, "logps/rejected": -848.1326904296875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6747812628746033, "rewards/margins": 7.4173712730407715, "rewards/rejected": -8.09215259552002, "step": 64790 }, { "epoch": 0.78, "learning_rate": 7.280092894021346e-07, "logits/chosen": -2.9182686805725098, "logits/rejected": -2.613921642303467, "logps/chosen": -75.13444519042969, "logps/rejected": -849.3961181640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3668549656867981, "rewards/margins": 7.748204231262207, "rewards/rejected": -8.115058898925781, "step": 64800 }, { "epoch": 0.78, "learning_rate": 7.272725547674872e-07, "logits/chosen": -2.846703290939331, "logits/rejected": -2.328949213027954, "logps/chosen": -114.65666198730469, "logps/rejected": -1011.1340942382812, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.677249550819397, "rewards/margins": 9.037025451660156, "rewards/rejected": -9.714275360107422, "step": 64810 }, { "epoch": 0.78, "learning_rate": 7.26536129646129e-07, "logits/chosen": -2.8713388442993164, "logits/rejected": -2.1973114013671875, "logps/chosen": -112.67057800292969, "logps/rejected": -958.6070556640625, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.6140896081924438, "rewards/margins": 8.587420463562012, "rewards/rejected": -9.201510429382324, "step": 64820 }, { "epoch": 0.78, "learning_rate": 7.258000141666363e-07, "logits/chosen": -2.8490378856658936, "logits/rejected": -2.297330856323242, "logps/chosen": -91.86647033691406, "logps/rejected": -908.3006591796875, "loss": 0.1513, "rewards/accuracies": 1.0, "rewards/chosen": -0.4524906277656555, "rewards/margins": 8.24215316772461, "rewards/rejected": -8.6946439743042, "step": 64830 }, { "epoch": 0.78, "learning_rate": 7.250642084575332e-07, "logits/chosen": -2.8548927307128906, "logits/rejected": -2.6053051948547363, "logps/chosen": -68.24398040771484, "logps/rejected": -772.2769775390625, "loss": 0.1073, "rewards/accuracies": 1.0, "rewards/chosen": -0.3284371495246887, "rewards/margins": 7.0276594161987305, "rewards/rejected": -7.3560967445373535, "step": 64840 }, { "epoch": 0.78, "learning_rate": 7.243287126472897e-07, "logits/chosen": -2.8730790615081787, "logits/rejected": -2.551873207092285, "logps/chosen": -85.37014770507812, "logps/rejected": -779.279296875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.47197872400283813, "rewards/margins": 6.937856197357178, "rewards/rejected": -7.4098358154296875, "step": 64850 }, { "epoch": 0.78, "learning_rate": 7.235935268643216e-07, "logits/chosen": -2.879152297973633, "logits/rejected": -2.154214382171631, "logps/chosen": -135.42051696777344, "logps/rejected": -998.7706909179688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.822726845741272, "rewards/margins": 8.744563102722168, "rewards/rejected": -9.567290306091309, "step": 64860 }, { "epoch": 0.78, "learning_rate": 7.228586512369895e-07, "logits/chosen": -2.836296319961548, "logits/rejected": -2.570344924926758, "logps/chosen": -86.32457733154297, "logps/rejected": -855.0152587890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4845278859138489, "rewards/margins": 7.690141201019287, "rewards/rejected": -8.17466926574707, "step": 64870 }, { "epoch": 0.78, "learning_rate": 7.221240858936021e-07, "logits/chosen": -2.875696897506714, "logits/rejected": -2.3593432903289795, "logps/chosen": -105.1672592163086, "logps/rejected": -930.2607421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5561089515686035, "rewards/margins": 8.354202270507812, "rewards/rejected": -8.910311698913574, "step": 64880 }, { "epoch": 0.78, "learning_rate": 7.213898309624106e-07, "logits/chosen": -2.8496217727661133, "logits/rejected": -2.441429853439331, "logps/chosen": -96.78760528564453, "logps/rejected": -990.5809326171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5559038519859314, "rewards/margins": 8.959806442260742, "rewards/rejected": -9.515710830688477, "step": 64890 }, { "epoch": 0.78, "learning_rate": 7.206558865716151e-07, "logits/chosen": -2.9441704750061035, "logits/rejected": -2.4640564918518066, "logps/chosen": -106.52876281738281, "logps/rejected": -865.0740356445312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6056416034698486, "rewards/margins": 7.671388149261475, "rewards/rejected": -8.277029037475586, "step": 64900 }, { "epoch": 0.78, "learning_rate": 7.199222528493593e-07, "logits/chosen": -2.903104305267334, "logits/rejected": -2.2237255573272705, "logps/chosen": -115.92276763916016, "logps/rejected": -935.7076416015625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6283294558525085, "rewards/margins": 8.329007148742676, "rewards/rejected": -8.95733642578125, "step": 64910 }, { "epoch": 0.78, "learning_rate": 7.191889299237347e-07, "logits/chosen": -2.863633632659912, "logits/rejected": -2.1606409549713135, "logps/chosen": -113.28071594238281, "logps/rejected": -1003.2860107421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.645580530166626, "rewards/margins": 8.996247291564941, "rewards/rejected": -9.641827583312988, "step": 64920 }, { "epoch": 0.78, "learning_rate": 7.184559179227768e-07, "logits/chosen": -2.841850519180298, "logits/rejected": -2.2025041580200195, "logps/chosen": -114.39119720458984, "logps/rejected": -932.4359130859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6344336271286011, "rewards/margins": 8.294084548950195, "rewards/rejected": -8.928518295288086, "step": 64930 }, { "epoch": 0.78, "learning_rate": 7.177232169744674e-07, "logits/chosen": -2.908932685852051, "logits/rejected": -2.2411184310913086, "logps/chosen": -102.98304748535156, "logps/rejected": -940.76513671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5866490602493286, "rewards/margins": 8.417858123779297, "rewards/rejected": -9.004507064819336, "step": 64940 }, { "epoch": 0.78, "learning_rate": 7.169908272067353e-07, "logits/chosen": -2.8738746643066406, "logits/rejected": -2.3632307052612305, "logps/chosen": -101.15769958496094, "logps/rejected": -929.2073974609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5736294388771057, "rewards/margins": 8.340484619140625, "rewards/rejected": -8.914113998413086, "step": 64950 }, { "epoch": 0.78, "learning_rate": 7.162587487474517e-07, "logits/chosen": -2.8856003284454346, "logits/rejected": -2.658440351486206, "logps/chosen": -86.7236099243164, "logps/rejected": -745.7178344726562, "loss": 0.083, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.48624253273010254, "rewards/margins": 6.608237266540527, "rewards/rejected": -7.094480037689209, "step": 64960 }, { "epoch": 0.78, "learning_rate": 7.155269817244365e-07, "logits/chosen": -2.850872278213501, "logits/rejected": -2.4114696979522705, "logps/chosen": -132.54977416992188, "logps/rejected": -783.5252685546875, "loss": 0.1372, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8665465116500854, "rewards/margins": 6.597696781158447, "rewards/rejected": -7.464243412017822, "step": 64970 }, { "epoch": 0.78, "learning_rate": 7.147955262654541e-07, "logits/chosen": -2.8885676860809326, "logits/rejected": -2.6106467247009277, "logps/chosen": -77.16810607910156, "logps/rejected": -765.1096801757812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.31814688444137573, "rewards/margins": 6.959012031555176, "rewards/rejected": -7.277159214019775, "step": 64980 }, { "epoch": 0.78, "learning_rate": 7.140643824982147e-07, "logits/chosen": -2.9271082878112793, "logits/rejected": -2.3845582008361816, "logps/chosen": -94.79103088378906, "logps/rejected": -988.8037109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.49844303727149963, "rewards/margins": 8.996648788452148, "rewards/rejected": -9.495092391967773, "step": 64990 }, { "epoch": 0.78, "learning_rate": 7.133335505503738e-07, "logits/chosen": -2.835564136505127, "logits/rejected": -2.1233420372009277, "logps/chosen": -126.23429870605469, "logps/rejected": -953.4622802734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7116211652755737, "rewards/margins": 8.398597717285156, "rewards/rejected": -9.110219955444336, "step": 65000 }, { "epoch": 0.78, "learning_rate": 7.126030305495329e-07, "logits/chosen": -2.8851544857025146, "logits/rejected": -2.506800651550293, "logps/chosen": -80.95221710205078, "logps/rejected": -843.9998779296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3852010667324066, "rewards/margins": 7.671900272369385, "rewards/rejected": -8.057101249694824, "step": 65010 }, { "epoch": 0.78, "learning_rate": 7.118728226232391e-07, "logits/chosen": -2.9279441833496094, "logits/rejected": -2.2361130714416504, "logps/chosen": -123.28755187988281, "logps/rejected": -951.1950073242188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7390182018280029, "rewards/margins": 8.366389274597168, "rewards/rejected": -9.10540771484375, "step": 65020 }, { "epoch": 0.78, "learning_rate": 7.111429268989836e-07, "logits/chosen": -2.8634719848632812, "logits/rejected": -2.1756439208984375, "logps/chosen": -145.6331787109375, "logps/rejected": -963.7506103515625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.8877711296081543, "rewards/margins": 8.348539352416992, "rewards/rejected": -9.236310958862305, "step": 65030 }, { "epoch": 0.78, "learning_rate": 7.104133435042046e-07, "logits/chosen": -2.8550713062286377, "logits/rejected": -2.498081684112549, "logps/chosen": -102.50514221191406, "logps/rejected": -778.7566528320312, "loss": 0.0563, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6041173338890076, "rewards/margins": 6.794328212738037, "rewards/rejected": -7.398445129394531, "step": 65040 }, { "epoch": 0.78, "learning_rate": 7.096840725662855e-07, "logits/chosen": -2.8914554119110107, "logits/rejected": -2.4735934734344482, "logps/chosen": -92.02729797363281, "logps/rejected": -884.8395385742188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4390714168548584, "rewards/margins": 8.022611618041992, "rewards/rejected": -8.46168327331543, "step": 65050 }, { "epoch": 0.78, "learning_rate": 7.08955114212555e-07, "logits/chosen": -2.890141725540161, "logits/rejected": -2.5824532508850098, "logps/chosen": -70.82962799072266, "logps/rejected": -832.2268676757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.324876070022583, "rewards/margins": 7.619397163391113, "rewards/rejected": -7.944273948669434, "step": 65060 }, { "epoch": 0.78, "learning_rate": 7.082264685702869e-07, "logits/chosen": -2.8815484046936035, "logits/rejected": -2.355412006378174, "logps/chosen": -110.57039642333984, "logps/rejected": -991.8992919921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6126164197921753, "rewards/margins": 8.919029235839844, "rewards/rejected": -9.531645774841309, "step": 65070 }, { "epoch": 0.78, "learning_rate": 7.074981357667022e-07, "logits/chosen": -2.8979244232177734, "logits/rejected": -2.251335859298706, "logps/chosen": -87.22972869873047, "logps/rejected": -930.6731567382812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.41198769211769104, "rewards/margins": 8.518882751464844, "rewards/rejected": -8.93087100982666, "step": 65080 }, { "epoch": 0.78, "learning_rate": 7.067701159289633e-07, "logits/chosen": -2.8621230125427246, "logits/rejected": -2.5690505504608154, "logps/chosen": -83.07595825195312, "logps/rejected": -835.6708984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4250636696815491, "rewards/margins": 7.549114227294922, "rewards/rejected": -7.974177360534668, "step": 65090 }, { "epoch": 0.78, "learning_rate": 7.060424091841819e-07, "logits/chosen": -2.878467082977295, "logits/rejected": -2.3436388969421387, "logps/chosen": -129.42771911621094, "logps/rejected": -894.1546020507812, "loss": 0.1361, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8444199562072754, "rewards/margins": 7.721125602722168, "rewards/rejected": -8.565546989440918, "step": 65100 }, { "epoch": 0.78, "learning_rate": 7.053150156594132e-07, "logits/chosen": -2.8216590881347656, "logits/rejected": -2.2722887992858887, "logps/chosen": -117.13432312011719, "logps/rejected": -907.8714599609375, "loss": 0.076, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7383300065994263, "rewards/margins": 7.9543256759643555, "rewards/rejected": -8.692655563354492, "step": 65110 }, { "epoch": 0.78, "learning_rate": 7.045879354816585e-07, "logits/chosen": -2.9284045696258545, "logits/rejected": -2.553398609161377, "logps/chosen": -81.51268005371094, "logps/rejected": -852.7119140625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4240291118621826, "rewards/margins": 7.7081499099731445, "rewards/rejected": -8.13217830657959, "step": 65120 }, { "epoch": 0.78, "learning_rate": 7.038611687778635e-07, "logits/chosen": -2.9044406414031982, "logits/rejected": -2.2859416007995605, "logps/chosen": -96.32283782958984, "logps/rejected": -941.0198974609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5213386416435242, "rewards/margins": 8.513313293457031, "rewards/rejected": -9.034650802612305, "step": 65130 }, { "epoch": 0.78, "learning_rate": 7.031347156749199e-07, "logits/chosen": -2.838519334793091, "logits/rejected": -2.254397392272949, "logps/chosen": -132.0199432373047, "logps/rejected": -987.126953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8444045782089233, "rewards/margins": 8.627914428710938, "rewards/rejected": -9.472319602966309, "step": 65140 }, { "epoch": 0.78, "learning_rate": 7.024085762996651e-07, "logits/chosen": -2.8568034172058105, "logits/rejected": -2.3339574337005615, "logps/chosen": -76.4654541015625, "logps/rejected": -876.6936645507812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.35038286447525024, "rewards/margins": 8.034680366516113, "rewards/rejected": -8.385062217712402, "step": 65150 }, { "epoch": 0.78, "learning_rate": 7.016827507788795e-07, "logits/chosen": -2.896791934967041, "logits/rejected": -2.313105583190918, "logps/chosen": -111.679443359375, "logps/rejected": -971.2357177734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6037396788597107, "rewards/margins": 8.713165283203125, "rewards/rejected": -9.316905975341797, "step": 65160 }, { "epoch": 0.78, "learning_rate": 7.009572392392911e-07, "logits/chosen": -2.8658573627471924, "logits/rejected": -2.201385259628296, "logps/chosen": -107.08595275878906, "logps/rejected": -1017.8939208984375, "loss": 0.0193, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5323404669761658, "rewards/margins": 9.269187927246094, "rewards/rejected": -9.80152702331543, "step": 65170 }, { "epoch": 0.78, "learning_rate": 7.002320418075714e-07, "logits/chosen": -2.8667993545532227, "logits/rejected": -2.255465507507324, "logps/chosen": -105.5604019165039, "logps/rejected": -990.0823974609375, "loss": 0.1135, "rewards/accuracies": 1.0, "rewards/chosen": -0.560697078704834, "rewards/margins": 8.945871353149414, "rewards/rejected": -9.506568908691406, "step": 65180 }, { "epoch": 0.78, "learning_rate": 6.995071586103392e-07, "logits/chosen": -2.87292742729187, "logits/rejected": -2.0587165355682373, "logps/chosen": -143.87759399414062, "logps/rejected": -1079.557861328125, "loss": 0.0756, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8382275700569153, "rewards/margins": 9.553020477294922, "rewards/rejected": -10.391246795654297, "step": 65190 }, { "epoch": 0.78, "learning_rate": 6.987825897741573e-07, "logits/chosen": -2.8856043815612793, "logits/rejected": -2.3165416717529297, "logps/chosen": -87.66122436523438, "logps/rejected": -942.4675903320312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4286159873008728, "rewards/margins": 8.6066312789917, "rewards/rejected": -9.035248756408691, "step": 65200 }, { "epoch": 0.78, "learning_rate": 6.980583354255316e-07, "logits/chosen": -2.879028081893921, "logits/rejected": -2.3339104652404785, "logps/chosen": -115.23530578613281, "logps/rejected": -956.0148315429688, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6592954397201538, "rewards/margins": 8.495783805847168, "rewards/rejected": -9.155078887939453, "step": 65210 }, { "epoch": 0.78, "learning_rate": 6.973343956909162e-07, "logits/chosen": -2.8757214546203613, "logits/rejected": -2.2270169258117676, "logps/chosen": -101.6025619506836, "logps/rejected": -914.1683349609375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.5401620268821716, "rewards/margins": 8.205972671508789, "rewards/rejected": -8.746134757995605, "step": 65220 }, { "epoch": 0.78, "learning_rate": 6.966107706967085e-07, "logits/chosen": -2.9054296016693115, "logits/rejected": -2.1996190547943115, "logps/chosen": -130.44271850585938, "logps/rejected": -1012.88427734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7815822958946228, "rewards/margins": 8.941875457763672, "rewards/rejected": -9.723457336425781, "step": 65230 }, { "epoch": 0.78, "learning_rate": 6.958874605692515e-07, "logits/chosen": -2.8592264652252197, "logits/rejected": -2.4203081130981445, "logps/chosen": -100.31468200683594, "logps/rejected": -888.3513793945312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5592305064201355, "rewards/margins": 7.955859184265137, "rewards/rejected": -8.515089988708496, "step": 65240 }, { "epoch": 0.78, "learning_rate": 6.95164465434833e-07, "logits/chosen": -2.8824009895324707, "logits/rejected": -2.5796539783477783, "logps/chosen": -77.3603744506836, "logps/rejected": -856.9490966796875, "loss": 0.1006, "rewards/accuracies": 1.0, "rewards/chosen": -0.36357012391090393, "rewards/margins": 7.826318264007568, "rewards/rejected": -8.189888000488281, "step": 65250 }, { "epoch": 0.78, "learning_rate": 6.944417854196861e-07, "logits/chosen": -2.8999269008636475, "logits/rejected": -2.2961277961730957, "logps/chosen": -112.11869812011719, "logps/rejected": -1026.2008056640625, "loss": 0.0857, "rewards/accuracies": 1.0, "rewards/chosen": -0.6050878167152405, "rewards/margins": 9.267657279968262, "rewards/rejected": -9.872746467590332, "step": 65260 }, { "epoch": 0.78, "learning_rate": 6.937194206499897e-07, "logits/chosen": -2.866760492324829, "logits/rejected": -2.3801183700561523, "logps/chosen": -119.89314270019531, "logps/rejected": -904.10400390625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.7635215520858765, "rewards/margins": 7.89291524887085, "rewards/rejected": -8.656437873840332, "step": 65270 }, { "epoch": 0.78, "learning_rate": 6.929973712518651e-07, "logits/chosen": -2.884472608566284, "logits/rejected": -2.42130446434021, "logps/chosen": -102.4568099975586, "logps/rejected": -895.34326171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5149023532867432, "rewards/margins": 8.038993835449219, "rewards/rejected": -8.553895950317383, "step": 65280 }, { "epoch": 0.78, "learning_rate": 6.922756373513806e-07, "logits/chosen": -2.9163708686828613, "logits/rejected": -2.531567335128784, "logps/chosen": -77.24076080322266, "logps/rejected": -844.0816650390625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.3118073046207428, "rewards/margins": 7.754276275634766, "rewards/rejected": -8.066083908081055, "step": 65290 }, { "epoch": 0.78, "learning_rate": 6.915542190745489e-07, "logits/chosen": -2.8596882820129395, "logits/rejected": -2.282647132873535, "logps/chosen": -107.05278015136719, "logps/rejected": -1042.9505615234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6180570721626282, "rewards/margins": 9.4119234085083, "rewards/rejected": -10.029980659484863, "step": 65300 }, { "epoch": 0.78, "learning_rate": 6.908331165473281e-07, "logits/chosen": -2.882685899734497, "logits/rejected": -2.4885361194610596, "logps/chosen": -99.87678527832031, "logps/rejected": -837.4973754882812, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": -0.5641070604324341, "rewards/margins": 7.422435760498047, "rewards/rejected": -7.986544132232666, "step": 65310 }, { "epoch": 0.78, "learning_rate": 6.901123298956202e-07, "logits/chosen": -2.8602280616760254, "logits/rejected": -2.267434597015381, "logps/chosen": -117.06539154052734, "logps/rejected": -954.9915161132812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7077094316482544, "rewards/margins": 8.4468412399292, "rewards/rejected": -9.154550552368164, "step": 65320 }, { "epoch": 0.78, "learning_rate": 6.893918592452728e-07, "logits/chosen": -2.872316598892212, "logits/rejected": -2.51446533203125, "logps/chosen": -103.6342544555664, "logps/rejected": -765.8226318359375, "loss": 0.1108, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6286515593528748, "rewards/margins": 6.6523542404174805, "rewards/rejected": -7.281005859375, "step": 65330 }, { "epoch": 0.78, "learning_rate": 6.886717047220787e-07, "logits/chosen": -2.874976396560669, "logits/rejected": -2.297250270843506, "logps/chosen": -98.58958435058594, "logps/rejected": -949.5650634765625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.5196770429611206, "rewards/margins": 8.597832679748535, "rewards/rejected": -9.117509841918945, "step": 65340 }, { "epoch": 0.78, "learning_rate": 6.879518664517737e-07, "logits/chosen": -2.8777716159820557, "logits/rejected": -2.1816275119781494, "logps/chosen": -152.2706756591797, "logps/rejected": -1147.864501953125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.8710131645202637, "rewards/margins": 10.196045875549316, "rewards/rejected": -11.067058563232422, "step": 65350 }, { "epoch": 0.78, "learning_rate": 6.872323445600398e-07, "logits/chosen": -2.8643126487731934, "logits/rejected": -2.297834873199463, "logps/chosen": -99.91563415527344, "logps/rejected": -946.0711059570312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5365518927574158, "rewards/margins": 8.531698226928711, "rewards/rejected": -9.068249702453613, "step": 65360 }, { "epoch": 0.78, "learning_rate": 6.865131391725038e-07, "logits/chosen": -2.884385585784912, "logits/rejected": -2.2393109798431396, "logps/chosen": -121.8612060546875, "logps/rejected": -1008.6881713867188, "loss": 0.0699, "rewards/accuracies": 1.0, "rewards/chosen": -0.7262623310089111, "rewards/margins": 8.957630157470703, "rewards/rejected": -9.683891296386719, "step": 65370 }, { "epoch": 0.78, "learning_rate": 6.857942504147371e-07, "logits/chosen": -2.8776919841766357, "logits/rejected": -2.596433162689209, "logps/chosen": -62.35169219970703, "logps/rejected": -781.1375732421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.269074022769928, "rewards/margins": 7.162205696105957, "rewards/rejected": -7.43127965927124, "step": 65380 }, { "epoch": 0.78, "learning_rate": 6.850756784122556e-07, "logits/chosen": -2.889594316482544, "logits/rejected": -2.401763439178467, "logps/chosen": -102.07450103759766, "logps/rejected": -964.70703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5071532130241394, "rewards/margins": 8.749740600585938, "rewards/rejected": -9.2568941116333, "step": 65390 }, { "epoch": 0.78, "learning_rate": 6.843574232905206e-07, "logits/chosen": -2.8856570720672607, "logits/rejected": -2.449723958969116, "logps/chosen": -73.8696517944336, "logps/rejected": -839.8396606445312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.32935458421707153, "rewards/margins": 7.699243068695068, "rewards/rejected": -8.02859878540039, "step": 65400 }, { "epoch": 0.78, "learning_rate": 6.836394851749359e-07, "logits/chosen": -2.9257562160491943, "logits/rejected": -2.487391948699951, "logps/chosen": -100.98521423339844, "logps/rejected": -845.69482421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5132938623428345, "rewards/margins": 7.541806697845459, "rewards/rejected": -8.055100440979004, "step": 65410 }, { "epoch": 0.78, "learning_rate": 6.829218641908525e-07, "logits/chosen": -2.905273675918579, "logits/rejected": -2.515331506729126, "logps/chosen": -85.71226501464844, "logps/rejected": -901.38037109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4481230676174164, "rewards/margins": 8.180027961730957, "rewards/rejected": -8.628150939941406, "step": 65420 }, { "epoch": 0.78, "learning_rate": 6.82204560463565e-07, "logits/chosen": -2.874546527862549, "logits/rejected": -2.202500820159912, "logps/chosen": -108.64451599121094, "logps/rejected": -1000.4977416992188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6067212224006653, "rewards/margins": 9.011846542358398, "rewards/rejected": -9.618566513061523, "step": 65430 }, { "epoch": 0.78, "learning_rate": 6.814875741183122e-07, "logits/chosen": -2.875511407852173, "logits/rejected": -2.3201682567596436, "logps/chosen": -108.81028747558594, "logps/rejected": -993.3973388671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5601374506950378, "rewards/margins": 8.977075576782227, "rewards/rejected": -9.537213325500488, "step": 65440 }, { "epoch": 0.78, "learning_rate": 6.807709052802783e-07, "logits/chosen": -2.89462947845459, "logits/rejected": -2.2854340076446533, "logps/chosen": -111.81413269042969, "logps/rejected": -1020.4974365234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5880356431007385, "rewards/margins": 9.215433120727539, "rewards/rejected": -9.803468704223633, "step": 65450 }, { "epoch": 0.78, "learning_rate": 6.800545540745918e-07, "logits/chosen": -2.840313196182251, "logits/rejected": -2.3811511993408203, "logps/chosen": -88.30458068847656, "logps/rejected": -874.27392578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4268272817134857, "rewards/margins": 7.945446968078613, "rewards/rejected": -8.372274398803711, "step": 65460 }, { "epoch": 0.78, "learning_rate": 6.793385206263262e-07, "logits/chosen": -2.8865184783935547, "logits/rejected": -2.447981357574463, "logps/chosen": -104.7294692993164, "logps/rejected": -859.353515625, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.6322005987167358, "rewards/margins": 7.573042392730713, "rewards/rejected": -8.205242156982422, "step": 65470 }, { "epoch": 0.78, "learning_rate": 6.786228050604976e-07, "logits/chosen": -2.852816343307495, "logits/rejected": -2.2899417877197266, "logps/chosen": -121.2578125, "logps/rejected": -932.0504760742188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6930665373802185, "rewards/margins": 8.23017692565918, "rewards/rejected": -8.92324447631836, "step": 65480 }, { "epoch": 0.78, "learning_rate": 6.779074075020684e-07, "logits/chosen": -2.9041523933410645, "logits/rejected": -2.681421995162964, "logps/chosen": -104.82426452636719, "logps/rejected": -755.6455688476562, "loss": 0.1579, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6778143048286438, "rewards/margins": 6.492307186126709, "rewards/rejected": -7.170121192932129, "step": 65490 }, { "epoch": 0.78, "learning_rate": 6.771923280759452e-07, "logits/chosen": -2.861142635345459, "logits/rejected": -2.3636927604675293, "logps/chosen": -116.75601959228516, "logps/rejected": -977.841796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6809262037277222, "rewards/margins": 8.704042434692383, "rewards/rejected": -9.384969711303711, "step": 65500 }, { "epoch": 0.78, "learning_rate": 6.76477566906979e-07, "logits/chosen": -2.862356185913086, "logits/rejected": -2.1945393085479736, "logps/chosen": -122.83598327636719, "logps/rejected": -1037.6107177734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7268930077552795, "rewards/margins": 9.260269165039062, "rewards/rejected": -9.987162590026855, "step": 65510 }, { "epoch": 0.78, "learning_rate": 6.75763124119965e-07, "logits/chosen": -2.8895263671875, "logits/rejected": -2.371227741241455, "logps/chosen": -124.7941665649414, "logps/rejected": -941.1594848632812, "loss": 0.0949, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7572669386863708, "rewards/margins": 8.261945724487305, "rewards/rejected": -9.01921272277832, "step": 65520 }, { "epoch": 0.78, "learning_rate": 6.750489998396429e-07, "logits/chosen": -2.8651275634765625, "logits/rejected": -2.246668815612793, "logps/chosen": -112.05796813964844, "logps/rejected": -850.92919921875, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.5983139276504517, "rewards/margins": 7.519741058349609, "rewards/rejected": -8.118054389953613, "step": 65530 }, { "epoch": 0.78, "learning_rate": 6.743351941906975e-07, "logits/chosen": -2.8553977012634277, "logits/rejected": -2.382991075515747, "logps/chosen": -97.75243377685547, "logps/rejected": -937.0993041992188, "loss": 0.1236, "rewards/accuracies": 1.0, "rewards/chosen": -0.486005961894989, "rewards/margins": 8.492769241333008, "rewards/rejected": -8.978774070739746, "step": 65540 }, { "epoch": 0.78, "learning_rate": 6.736217072977561e-07, "logits/chosen": -2.8548450469970703, "logits/rejected": -2.2788240909576416, "logps/chosen": -126.28556823730469, "logps/rejected": -930.6256103515625, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.7704179883003235, "rewards/margins": 8.145947456359863, "rewards/rejected": -8.916364669799805, "step": 65550 }, { "epoch": 0.78, "learning_rate": 6.729085392853918e-07, "logits/chosen": -2.8955078125, "logits/rejected": -2.4585788249969482, "logps/chosen": -90.4070053100586, "logps/rejected": -843.8883056640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.459155410528183, "rewards/margins": 7.607806205749512, "rewards/rejected": -8.066961288452148, "step": 65560 }, { "epoch": 0.78, "learning_rate": 6.721956902781221e-07, "logits/chosen": -2.90592622756958, "logits/rejected": -2.4291322231292725, "logps/chosen": -89.59437561035156, "logps/rejected": -905.3311767578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.47024139761924744, "rewards/margins": 8.200705528259277, "rewards/rejected": -8.67094898223877, "step": 65570 }, { "epoch": 0.79, "learning_rate": 6.714831604004082e-07, "logits/chosen": -2.8863444328308105, "logits/rejected": -2.227719783782959, "logps/chosen": -124.72850036621094, "logps/rejected": -977.7120971679688, "loss": 0.089, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7593261003494263, "rewards/margins": 8.615440368652344, "rewards/rejected": -9.37476634979248, "step": 65580 }, { "epoch": 0.79, "learning_rate": 6.707709497766562e-07, "logits/chosen": -2.889927625656128, "logits/rejected": -2.2243564128875732, "logps/chosen": -113.80853271484375, "logps/rejected": -912.1802978515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.641200840473175, "rewards/margins": 8.086675643920898, "rewards/rejected": -8.727876663208008, "step": 65590 }, { "epoch": 0.79, "learning_rate": 6.700590585312162e-07, "logits/chosen": -2.807330369949341, "logits/rejected": -2.2941629886627197, "logps/chosen": -91.8052749633789, "logps/rejected": -926.6593017578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4863499104976654, "rewards/margins": 8.396834373474121, "rewards/rejected": -8.883184432983398, "step": 65600 }, { "epoch": 0.79, "learning_rate": 6.693474867883817e-07, "logits/chosen": -2.9002952575683594, "logits/rejected": -2.19258975982666, "logps/chosen": -105.7103042602539, "logps/rejected": -1082.28515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.574516773223877, "rewards/margins": 9.84322452545166, "rewards/rejected": -10.417740821838379, "step": 65610 }, { "epoch": 0.79, "learning_rate": 6.686362346723916e-07, "logits/chosen": -2.8573861122131348, "logits/rejected": -2.3494277000427246, "logps/chosen": -93.94508361816406, "logps/rejected": -958.5940551757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.498887836933136, "rewards/margins": 8.692059516906738, "rewards/rejected": -9.190946578979492, "step": 65620 }, { "epoch": 0.79, "learning_rate": 6.679253023074287e-07, "logits/chosen": -2.8637821674346924, "logits/rejected": -2.4178731441497803, "logps/chosen": -121.78145599365234, "logps/rejected": -921.1085205078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.763187825679779, "rewards/margins": 8.060262680053711, "rewards/rejected": -8.823451042175293, "step": 65630 }, { "epoch": 0.79, "learning_rate": 6.672146898176196e-07, "logits/chosen": -2.8976151943206787, "logits/rejected": -2.5370383262634277, "logps/chosen": -88.26041412353516, "logps/rejected": -845.5817260742188, "loss": 0.0907, "rewards/accuracies": 1.0, "rewards/chosen": -0.4702814519405365, "rewards/margins": 7.6041460037231445, "rewards/rejected": -8.074427604675293, "step": 65640 }, { "epoch": 0.79, "learning_rate": 6.66504397327036e-07, "logits/chosen": -2.884204626083374, "logits/rejected": -2.218662738800049, "logps/chosen": -112.61844635009766, "logps/rejected": -894.1441650390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.614020824432373, "rewards/margins": 7.942253112792969, "rewards/rejected": -8.5562744140625, "step": 65650 }, { "epoch": 0.79, "learning_rate": 6.657944249596918e-07, "logits/chosen": -2.910270929336548, "logits/rejected": -2.4008731842041016, "logps/chosen": -137.65357971191406, "logps/rejected": -980.9430541992188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8890476226806641, "rewards/margins": 8.527063369750977, "rewards/rejected": -9.41611099243164, "step": 65660 }, { "epoch": 0.79, "learning_rate": 6.650847728395466e-07, "logits/chosen": -2.8167104721069336, "logits/rejected": -2.2070791721343994, "logps/chosen": -113.4126205444336, "logps/rejected": -1027.1168212890625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.656134307384491, "rewards/margins": 9.20158576965332, "rewards/rejected": -9.857719421386719, "step": 65670 }, { "epoch": 0.79, "learning_rate": 6.643754410905035e-07, "logits/chosen": -2.901987075805664, "logits/rejected": -2.4297780990600586, "logps/chosen": -97.45875549316406, "logps/rejected": -896.80859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5172240734100342, "rewards/margins": 8.064391136169434, "rewards/rejected": -8.581615447998047, "step": 65680 }, { "epoch": 0.79, "learning_rate": 6.636664298364104e-07, "logits/chosen": -2.876053810119629, "logits/rejected": -2.3363022804260254, "logps/chosen": -113.64738464355469, "logps/rejected": -960.9244384765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6693670153617859, "rewards/margins": 8.517974853515625, "rewards/rejected": -9.187341690063477, "step": 65690 }, { "epoch": 0.79, "learning_rate": 6.629577392010586e-07, "logits/chosen": -2.888157844543457, "logits/rejected": -2.436594009399414, "logps/chosen": -104.29437255859375, "logps/rejected": -904.9367065429688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6212491989135742, "rewards/margins": 8.039972305297852, "rewards/rejected": -8.661221504211426, "step": 65700 }, { "epoch": 0.79, "learning_rate": 6.62249369308183e-07, "logits/chosen": -2.8742003440856934, "logits/rejected": -2.280686855316162, "logps/chosen": -116.4837646484375, "logps/rejected": -916.2999267578125, "loss": 0.1091, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7199219465255737, "rewards/margins": 8.040600776672363, "rewards/rejected": -8.760522842407227, "step": 65710 }, { "epoch": 0.79, "learning_rate": 6.615413202814644e-07, "logits/chosen": -2.8904123306274414, "logits/rejected": -2.181211471557617, "logps/chosen": -114.72811126708984, "logps/rejected": -997.5236206054688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6711021065711975, "rewards/margins": 8.901830673217773, "rewards/rejected": -9.572932243347168, "step": 65720 }, { "epoch": 0.79, "learning_rate": 6.608335922445244e-07, "logits/chosen": -2.90964937210083, "logits/rejected": -2.4377901554107666, "logps/chosen": -107.54649353027344, "logps/rejected": -839.2264404296875, "loss": 0.1284, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6590558290481567, "rewards/margins": 7.370554447174072, "rewards/rejected": -8.029611587524414, "step": 65730 }, { "epoch": 0.79, "learning_rate": 6.601261853209312e-07, "logits/chosen": -2.890368938446045, "logits/rejected": -2.4951822757720947, "logps/chosen": -85.1034164428711, "logps/rejected": -818.4232177734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4038734436035156, "rewards/margins": 7.405605316162109, "rewards/rejected": -7.809478759765625, "step": 65740 }, { "epoch": 0.79, "learning_rate": 6.594190996341959e-07, "logits/chosen": -2.9141733646392822, "logits/rejected": -2.240722179412842, "logps/chosen": -115.70381927490234, "logps/rejected": -990.515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6919193267822266, "rewards/margins": 8.82691764831543, "rewards/rejected": -9.51883602142334, "step": 65750 }, { "epoch": 0.79, "learning_rate": 6.587123353077738e-07, "logits/chosen": -2.9031662940979004, "logits/rejected": -2.3945274353027344, "logps/chosen": -97.74113464355469, "logps/rejected": -938.6707763671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5587180256843567, "rewards/margins": 8.438238143920898, "rewards/rejected": -8.996954917907715, "step": 65760 }, { "epoch": 0.79, "learning_rate": 6.580058924650639e-07, "logits/chosen": -2.8715736865997314, "logits/rejected": -2.402470111846924, "logps/chosen": -116.7590103149414, "logps/rejected": -823.87744140625, "loss": 0.0992, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7357953786849976, "rewards/margins": 7.118325233459473, "rewards/rejected": -7.854119777679443, "step": 65770 }, { "epoch": 0.79, "learning_rate": 6.572997712294094e-07, "logits/chosen": -2.9066758155822754, "logits/rejected": -2.6819517612457275, "logps/chosen": -73.97377014160156, "logps/rejected": -783.2572631835938, "loss": 0.2117, "rewards/accuracies": 1.0, "rewards/chosen": -0.31947603821754456, "rewards/margins": 7.138295650482178, "rewards/rejected": -7.457772254943848, "step": 65780 }, { "epoch": 0.79, "learning_rate": 6.565939717240977e-07, "logits/chosen": -2.875875949859619, "logits/rejected": -2.3330111503601074, "logps/chosen": -91.23957824707031, "logps/rejected": -953.2073364257812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4878818094730377, "rewards/margins": 8.657934188842773, "rewards/rejected": -9.1458158493042, "step": 65790 }, { "epoch": 0.79, "learning_rate": 6.558884940723581e-07, "logits/chosen": -2.868433952331543, "logits/rejected": -2.478909969329834, "logps/chosen": -81.61437225341797, "logps/rejected": -904.7596435546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4434274137020111, "rewards/margins": 8.234070777893066, "rewards/rejected": -8.677496910095215, "step": 65800 }, { "epoch": 0.79, "learning_rate": 6.551833383973655e-07, "logits/chosen": -2.871772289276123, "logits/rejected": -2.217189073562622, "logps/chosen": -115.23968505859375, "logps/rejected": -895.3377075195312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6941148638725281, "rewards/margins": 7.873960018157959, "rewards/rejected": -8.568074226379395, "step": 65810 }, { "epoch": 0.79, "learning_rate": 6.54478504822238e-07, "logits/chosen": -2.847011089324951, "logits/rejected": -2.32407808303833, "logps/chosen": -108.60368347167969, "logps/rejected": -898.0286865234375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5953401327133179, "rewards/margins": 7.999835968017578, "rewards/rejected": -8.595175743103027, "step": 65820 }, { "epoch": 0.79, "learning_rate": 6.537739934700383e-07, "logits/chosen": -2.8632376194000244, "logits/rejected": -2.286283254623413, "logps/chosen": -109.86573791503906, "logps/rejected": -898.6325073242188, "loss": 0.1236, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6405268311500549, "rewards/margins": 7.9611663818359375, "rewards/rejected": -8.601694107055664, "step": 65830 }, { "epoch": 0.79, "learning_rate": 6.530698044637712e-07, "logits/chosen": -2.8923087120056152, "logits/rejected": -2.2138447761535645, "logps/chosen": -112.87522888183594, "logps/rejected": -1059.8453369140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6144336462020874, "rewards/margins": 9.580211639404297, "rewards/rejected": -10.194644927978516, "step": 65840 }, { "epoch": 0.79, "learning_rate": 6.523659379263877e-07, "logits/chosen": -2.909323215484619, "logits/rejected": -2.436647415161133, "logps/chosen": -106.07765197753906, "logps/rejected": -922.3748168945312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5883010625839233, "rewards/margins": 8.247637748718262, "rewards/rejected": -8.835939407348633, "step": 65850 }, { "epoch": 0.79, "learning_rate": 6.516623939807789e-07, "logits/chosen": -2.8748652935028076, "logits/rejected": -2.4174342155456543, "logps/chosen": -106.45780944824219, "logps/rejected": -993.1701049804688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5854510068893433, "rewards/margins": 8.952200889587402, "rewards/rejected": -9.537651062011719, "step": 65860 }, { "epoch": 0.79, "learning_rate": 6.509591727497827e-07, "logits/chosen": -2.86891508102417, "logits/rejected": -2.207977771759033, "logps/chosen": -106.84928894042969, "logps/rejected": -952.7315673828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5754159688949585, "rewards/margins": 8.555460929870605, "rewards/rejected": -9.130876541137695, "step": 65870 }, { "epoch": 0.79, "learning_rate": 6.502562743561794e-07, "logits/chosen": -2.846666097640991, "logits/rejected": -2.294861078262329, "logps/chosen": -87.12132263183594, "logps/rejected": -931.9386596679688, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.4290122985839844, "rewards/margins": 8.511187553405762, "rewards/rejected": -8.94019889831543, "step": 65880 }, { "epoch": 0.79, "learning_rate": 6.495536989226933e-07, "logits/chosen": -2.887342691421509, "logits/rejected": -2.2643332481384277, "logps/chosen": -123.1388168334961, "logps/rejected": -1013.052734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.690280556678772, "rewards/margins": 9.045393943786621, "rewards/rejected": -9.735673904418945, "step": 65890 }, { "epoch": 0.79, "learning_rate": 6.488514465719919e-07, "logits/chosen": -2.886465311050415, "logits/rejected": -2.428184986114502, "logps/chosen": -101.53064727783203, "logps/rejected": -960.1325073242188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5516515374183655, "rewards/margins": 8.659778594970703, "rewards/rejected": -9.21142864227295, "step": 65900 }, { "epoch": 0.79, "learning_rate": 6.481495174266863e-07, "logits/chosen": -2.9119138717651367, "logits/rejected": -2.4966604709625244, "logps/chosen": -95.5378646850586, "logps/rejected": -875.9679565429688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.46302729845046997, "rewards/margins": 7.913923740386963, "rewards/rejected": -8.37695026397705, "step": 65910 }, { "epoch": 0.79, "learning_rate": 6.474479116093327e-07, "logits/chosen": -2.888036012649536, "logits/rejected": -2.296297550201416, "logps/chosen": -109.2246322631836, "logps/rejected": -924.5852661132812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6240153908729553, "rewards/margins": 8.213135719299316, "rewards/rejected": -8.837150573730469, "step": 65920 }, { "epoch": 0.79, "learning_rate": 6.467466292424277e-07, "logits/chosen": -2.8967456817626953, "logits/rejected": -2.199380397796631, "logps/chosen": -137.397216796875, "logps/rejected": -1029.430419921875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8057271242141724, "rewards/margins": 9.079415321350098, "rewards/rejected": -9.885143280029297, "step": 65930 }, { "epoch": 0.79, "learning_rate": 6.460456704484142e-07, "logits/chosen": -2.8683905601501465, "logits/rejected": -2.337172746658325, "logps/chosen": -96.13314056396484, "logps/rejected": -904.5930786132812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5098782777786255, "rewards/margins": 8.147359848022461, "rewards/rejected": -8.657238960266113, "step": 65940 }, { "epoch": 0.79, "learning_rate": 6.453450353496773e-07, "logits/chosen": -2.8715171813964844, "logits/rejected": -2.3854634761810303, "logps/chosen": -109.58308410644531, "logps/rejected": -992.8499145507812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.570947527885437, "rewards/margins": 8.960960388183594, "rewards/rejected": -9.53190803527832, "step": 65950 }, { "epoch": 0.79, "learning_rate": 6.446447240685461e-07, "logits/chosen": -2.8566231727600098, "logits/rejected": -2.5866336822509766, "logps/chosen": -85.37994384765625, "logps/rejected": -822.7431640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4485344886779785, "rewards/margins": 7.3919677734375, "rewards/rejected": -7.840502738952637, "step": 65960 }, { "epoch": 0.79, "learning_rate": 6.43944736727293e-07, "logits/chosen": -2.887868881225586, "logits/rejected": -2.423024892807007, "logps/chosen": -81.3469467163086, "logps/rejected": -853.9010009765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3541034758090973, "rewards/margins": 7.7936248779296875, "rewards/rejected": -8.147727966308594, "step": 65970 }, { "epoch": 0.79, "learning_rate": 6.432450734481341e-07, "logits/chosen": -2.854128122329712, "logits/rejected": -2.105870485305786, "logps/chosen": -133.2703857421875, "logps/rejected": -1130.375244140625, "loss": 0.0968, "rewards/accuracies": 1.0, "rewards/chosen": -0.8102210164070129, "rewards/margins": 10.085973739624023, "rewards/rejected": -10.896195411682129, "step": 65980 }, { "epoch": 0.79, "learning_rate": 6.425457343532293e-07, "logits/chosen": -2.902921199798584, "logits/rejected": -2.22509765625, "logps/chosen": -133.2804412841797, "logps/rejected": -907.595703125, "loss": 0.1495, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8312556147575378, "rewards/margins": 7.8620476722717285, "rewards/rejected": -8.693303108215332, "step": 65990 }, { "epoch": 0.79, "learning_rate": 6.418467195646794e-07, "logits/chosen": -2.8643860816955566, "logits/rejected": -2.4489541053771973, "logps/chosen": -87.41275024414062, "logps/rejected": -909.5927734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.40470418334007263, "rewards/margins": 8.312509536743164, "rewards/rejected": -8.717214584350586, "step": 66000 }, { "epoch": 0.79, "eval_logits/chosen": -2.884075880050659, "eval_logits/rejected": -1.7597004175186157, "eval_logps/chosen": -244.2650909423828, "eval_logps/rejected": -1143.70166015625, "eval_loss": 0.0012870641658082604, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.8308480978012085, "eval_rewards/margins": 9.13891315460205, "eval_rewards/rejected": -10.969761848449707, "eval_runtime": 1.2148, "eval_samples_per_second": 4.116, "eval_steps_per_second": 2.469, "step": 66000 }, { "epoch": 0.79, "learning_rate": 6.411480292045316e-07, "logits/chosen": -2.9278078079223633, "logits/rejected": -2.132169008255005, "logps/chosen": -106.28263854980469, "logps/rejected": -1086.3128662109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5726374387741089, "rewards/margins": 9.897418975830078, "rewards/rejected": -10.470057487487793, "step": 66010 }, { "epoch": 0.79, "learning_rate": 6.404496633947743e-07, "logits/chosen": -2.8446033000946045, "logits/rejected": -2.156397819519043, "logps/chosen": -114.8230972290039, "logps/rejected": -1027.3997802734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6179661750793457, "rewards/margins": 9.240557670593262, "rewards/rejected": -9.858524322509766, "step": 66020 }, { "epoch": 0.79, "learning_rate": 6.397516222573419e-07, "logits/chosen": -2.8864896297454834, "logits/rejected": -2.30167818069458, "logps/chosen": -142.25033569335938, "logps/rejected": -1002.5656127929688, "loss": 0.1009, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9653929471969604, "rewards/margins": 8.646553039550781, "rewards/rejected": -9.611946105957031, "step": 66030 }, { "epoch": 0.79, "learning_rate": 6.390539059141105e-07, "logits/chosen": -2.845853328704834, "logits/rejected": -2.234511137008667, "logps/chosen": -101.39918518066406, "logps/rejected": -958.4510498046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5454690456390381, "rewards/margins": 8.652058601379395, "rewards/rejected": -9.197526931762695, "step": 66040 }, { "epoch": 0.79, "learning_rate": 6.383565144868977e-07, "logits/chosen": -2.882606267929077, "logits/rejected": -2.252721071243286, "logps/chosen": -109.89411926269531, "logps/rejected": -941.2424926757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5607706904411316, "rewards/margins": 8.470479965209961, "rewards/rejected": -9.03125, "step": 66050 }, { "epoch": 0.79, "learning_rate": 6.376594480974671e-07, "logits/chosen": -2.9011340141296387, "logits/rejected": -2.296053409576416, "logps/chosen": -129.8809356689453, "logps/rejected": -1016.8635864257812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7715033292770386, "rewards/margins": 8.9850435256958, "rewards/rejected": -9.756546020507812, "step": 66060 }, { "epoch": 0.79, "learning_rate": 6.369627068675244e-07, "logits/chosen": -2.835235118865967, "logits/rejected": -2.411909580230713, "logps/chosen": -95.03406524658203, "logps/rejected": -850.6270751953125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5075858235359192, "rewards/margins": 7.622200012207031, "rewards/rejected": -8.129785537719727, "step": 66070 }, { "epoch": 0.79, "learning_rate": 6.362662909187184e-07, "logits/chosen": -2.8357229232788086, "logits/rejected": -2.1214680671691895, "logps/chosen": -151.14663696289062, "logps/rejected": -970.7443237304688, "loss": 0.0924, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9464913606643677, "rewards/margins": 8.353516578674316, "rewards/rejected": -9.300006866455078, "step": 66080 }, { "epoch": 0.79, "learning_rate": 6.355702003726422e-07, "logits/chosen": -2.910733222961426, "logits/rejected": -2.408627510070801, "logps/chosen": -101.74454498291016, "logps/rejected": -907.3128051757812, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": -0.5683754086494446, "rewards/margins": 8.115595817565918, "rewards/rejected": -8.68397045135498, "step": 66090 }, { "epoch": 0.79, "learning_rate": 6.348744353508304e-07, "logits/chosen": -2.9447104930877686, "logits/rejected": -2.480968952178955, "logps/chosen": -87.18994140625, "logps/rejected": -867.9807739257812, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4085136353969574, "rewards/margins": 7.884419918060303, "rewards/rejected": -8.292933464050293, "step": 66100 }, { "epoch": 0.79, "learning_rate": 6.341789959747629e-07, "logits/chosen": -2.877502918243408, "logits/rejected": -2.5204648971557617, "logps/chosen": -94.53258514404297, "logps/rejected": -787.470703125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.509909451007843, "rewards/margins": 6.993182182312012, "rewards/rejected": -7.503091335296631, "step": 66110 }, { "epoch": 0.79, "learning_rate": 6.334838823658598e-07, "logits/chosen": -2.8714003562927246, "logits/rejected": -2.2922191619873047, "logps/chosen": -96.64127349853516, "logps/rejected": -914.8759765625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4617897868156433, "rewards/margins": 8.298925399780273, "rewards/rejected": -8.76071548461914, "step": 66120 }, { "epoch": 0.79, "learning_rate": 6.32789094645487e-07, "logits/chosen": -2.905587911605835, "logits/rejected": -2.0760128498077393, "logps/chosen": -134.41592407226562, "logps/rejected": -1027.2718505859375, "loss": 0.0227, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7946828603744507, "rewards/margins": 9.069334030151367, "rewards/rejected": -9.864018440246582, "step": 66130 }, { "epoch": 0.79, "learning_rate": 6.320946329349523e-07, "logits/chosen": -2.910158634185791, "logits/rejected": -2.352947235107422, "logps/chosen": -89.31932067871094, "logps/rejected": -943.3704833984375, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.4590652883052826, "rewards/margins": 8.586021423339844, "rewards/rejected": -9.045086860656738, "step": 66140 }, { "epoch": 0.79, "learning_rate": 6.314004973555066e-07, "logits/chosen": -2.8704233169555664, "logits/rejected": -2.434232234954834, "logps/chosen": -90.10906982421875, "logps/rejected": -882.0885009765625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.43887320160865784, "rewards/margins": 8.010741233825684, "rewards/rejected": -8.449613571166992, "step": 66150 }, { "epoch": 0.79, "learning_rate": 6.307066880283444e-07, "logits/chosen": -2.839616298675537, "logits/rejected": -2.1572375297546387, "logps/chosen": -117.24102783203125, "logps/rejected": -967.0775146484375, "loss": 0.083, "rewards/accuracies": 1.0, "rewards/chosen": -0.6850980520248413, "rewards/margins": 8.582635879516602, "rewards/rejected": -9.267733573913574, "step": 66160 }, { "epoch": 0.79, "learning_rate": 6.300132050746033e-07, "logits/chosen": -2.885099172592163, "logits/rejected": -2.342578172683716, "logps/chosen": -123.68342590332031, "logps/rejected": -941.2501831054688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7443472743034363, "rewards/margins": 8.27099609375, "rewards/rejected": -9.01534366607666, "step": 66170 }, { "epoch": 0.79, "learning_rate": 6.293200486153625e-07, "logits/chosen": -2.901092290878296, "logits/rejected": -2.285961627960205, "logps/chosen": -126.40980529785156, "logps/rejected": -911.98779296875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7665520310401917, "rewards/margins": 7.960543632507324, "rewards/rejected": -8.727096557617188, "step": 66180 }, { "epoch": 0.79, "learning_rate": 6.286272187716453e-07, "logits/chosen": -2.8550891876220703, "logits/rejected": -2.2575924396514893, "logps/chosen": -119.14115142822266, "logps/rejected": -919.7234497070312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6950940489768982, "rewards/margins": 8.128477096557617, "rewards/rejected": -8.823572158813477, "step": 66190 }, { "epoch": 0.79, "learning_rate": 6.279347156644184e-07, "logits/chosen": -2.9085655212402344, "logits/rejected": -2.326916217803955, "logps/chosen": -135.779052734375, "logps/rejected": -996.4410400390625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.829192042350769, "rewards/margins": 8.741485595703125, "rewards/rejected": -9.570677757263184, "step": 66200 }, { "epoch": 0.79, "learning_rate": 6.272425394145906e-07, "logits/chosen": -2.8373007774353027, "logits/rejected": -2.3692288398742676, "logps/chosen": -102.29845428466797, "logps/rejected": -953.7243041992188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5512838959693909, "rewards/margins": 8.595224380493164, "rewards/rejected": -9.146509170532227, "step": 66210 }, { "epoch": 0.79, "learning_rate": 6.26550690143014e-07, "logits/chosen": -2.8895983695983887, "logits/rejected": -2.384063243865967, "logps/chosen": -110.73548889160156, "logps/rejected": -974.1875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6401494145393372, "rewards/margins": 8.711664199829102, "rewards/rejected": -9.351813316345215, "step": 66220 }, { "epoch": 0.79, "learning_rate": 6.258591679704837e-07, "logits/chosen": -2.88490629196167, "logits/rejected": -2.4060821533203125, "logps/chosen": -98.1119384765625, "logps/rejected": -816.5391235351562, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5018226504325867, "rewards/margins": 7.286781311035156, "rewards/rejected": -7.788603782653809, "step": 66230 }, { "epoch": 0.79, "learning_rate": 6.251679730177382e-07, "logits/chosen": -2.8774800300598145, "logits/rejected": -2.384695529937744, "logps/chosen": -98.70664978027344, "logps/rejected": -887.0618896484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5343570113182068, "rewards/margins": 7.950579643249512, "rewards/rejected": -8.484936714172363, "step": 66240 }, { "epoch": 0.79, "learning_rate": 6.244771054054569e-07, "logits/chosen": -2.8620879650115967, "logits/rejected": -2.4133756160736084, "logps/chosen": -96.57487487792969, "logps/rejected": -925.6681518554688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4856232702732086, "rewards/margins": 8.38178825378418, "rewards/rejected": -8.867411613464355, "step": 66250 }, { "epoch": 0.79, "learning_rate": 6.237865652542638e-07, "logits/chosen": -2.903127670288086, "logits/rejected": -2.4677176475524902, "logps/chosen": -115.01301574707031, "logps/rejected": -922.7804565429688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7133742570877075, "rewards/margins": 8.112859725952148, "rewards/rejected": -8.826233863830566, "step": 66260 }, { "epoch": 0.79, "learning_rate": 6.230963526847253e-07, "logits/chosen": -2.8772683143615723, "logits/rejected": -2.1312661170959473, "logps/chosen": -130.50640869140625, "logps/rejected": -953.7720947265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7861031889915466, "rewards/margins": 8.365392684936523, "rewards/rejected": -9.151495933532715, "step": 66270 }, { "epoch": 0.79, "learning_rate": 6.224064678173511e-07, "logits/chosen": -2.8698318004608154, "logits/rejected": -2.3913919925689697, "logps/chosen": -93.3282470703125, "logps/rejected": -850.9886474609375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.4747626781463623, "rewards/margins": 7.665362358093262, "rewards/rejected": -8.140125274658203, "step": 66280 }, { "epoch": 0.79, "learning_rate": 6.217169107725926e-07, "logits/chosen": -2.8717315196990967, "logits/rejected": -2.2186782360076904, "logps/chosen": -124.796875, "logps/rejected": -1038.2391357421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6894496083259583, "rewards/margins": 9.278996467590332, "rewards/rejected": -9.96844482421875, "step": 66290 }, { "epoch": 0.79, "learning_rate": 6.210276816708447e-07, "logits/chosen": -2.8762741088867188, "logits/rejected": -2.4505696296691895, "logps/chosen": -90.27214050292969, "logps/rejected": -892.7838134765625, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.49071064591407776, "rewards/margins": 8.05870246887207, "rewards/rejected": -8.54941177368164, "step": 66300 }, { "epoch": 0.79, "learning_rate": 6.203387806324459e-07, "logits/chosen": -2.8654022216796875, "logits/rejected": -2.48366117477417, "logps/chosen": -111.78763580322266, "logps/rejected": -822.3054809570312, "loss": 0.1988, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7314282655715942, "rewards/margins": 7.116189002990723, "rewards/rejected": -7.847618103027344, "step": 66310 }, { "epoch": 0.79, "learning_rate": 6.196502077776745e-07, "logits/chosen": -2.8856685161590576, "logits/rejected": -2.521435260772705, "logps/chosen": -77.30284118652344, "logps/rejected": -861.4490356445312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.3811258375644684, "rewards/margins": 7.866199493408203, "rewards/rejected": -8.247323989868164, "step": 66320 }, { "epoch": 0.79, "learning_rate": 6.189619632267546e-07, "logits/chosen": -2.837008476257324, "logits/rejected": -2.3418965339660645, "logps/chosen": -101.0641860961914, "logps/rejected": -878.0213623046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5584203004837036, "rewards/margins": 7.8449296951293945, "rewards/rejected": -8.403348922729492, "step": 66330 }, { "epoch": 0.79, "learning_rate": 6.182740470998514e-07, "logits/chosen": -2.879936695098877, "logits/rejected": -2.3939552307128906, "logps/chosen": -101.72441101074219, "logps/rejected": -927.5784301757812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5572747588157654, "rewards/margins": 8.33082389831543, "rewards/rejected": -8.88809871673584, "step": 66340 }, { "epoch": 0.79, "learning_rate": 6.175864595170733e-07, "logits/chosen": -2.8640217781066895, "logits/rejected": -2.2246205806732178, "logps/chosen": -102.18931579589844, "logps/rejected": -1023.2540283203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5116485953330994, "rewards/margins": 9.32812786102295, "rewards/rejected": -9.839776992797852, "step": 66350 }, { "epoch": 0.79, "learning_rate": 6.168992005984714e-07, "logits/chosen": -2.9126245975494385, "logits/rejected": -2.3952596187591553, "logps/chosen": -94.58138275146484, "logps/rejected": -944.6334838867188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.47832900285720825, "rewards/margins": 8.586604118347168, "rewards/rejected": -9.064933776855469, "step": 66360 }, { "epoch": 0.79, "learning_rate": 6.162122704640394e-07, "logits/chosen": -2.8699700832366943, "logits/rejected": -2.412616014480591, "logps/chosen": -107.90342712402344, "logps/rejected": -866.0730590820312, "loss": 0.0928, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6067622900009155, "rewards/margins": 7.6783599853515625, "rewards/rejected": -8.285122871398926, "step": 66370 }, { "epoch": 0.79, "learning_rate": 6.155256692337125e-07, "logits/chosen": -2.9068284034729004, "logits/rejected": -2.3490142822265625, "logps/chosen": -101.52619934082031, "logps/rejected": -1007.2484130859375, "loss": 0.16, "rewards/accuracies": 1.0, "rewards/chosen": -0.5549715757369995, "rewards/margins": 9.128107070922852, "rewards/rejected": -9.683076858520508, "step": 66380 }, { "epoch": 0.79, "learning_rate": 6.148393970273698e-07, "logits/chosen": -2.8703503608703613, "logits/rejected": -2.447427272796631, "logps/chosen": -100.60556030273438, "logps/rejected": -878.9303588867188, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -0.563778281211853, "rewards/margins": 7.835694313049316, "rewards/rejected": -8.3994722366333, "step": 66390 }, { "epoch": 0.79, "learning_rate": 6.141534539648325e-07, "logits/chosen": -2.826540231704712, "logits/rejected": -2.2269580364227295, "logps/chosen": -144.0367431640625, "logps/rejected": -987.89892578125, "loss": 0.0896, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.96412193775177, "rewards/margins": 8.521896362304688, "rewards/rejected": -9.486018180847168, "step": 66400 }, { "epoch": 0.79, "learning_rate": 6.134678401658645e-07, "logits/chosen": -2.8822133541107178, "logits/rejected": -2.0610222816467285, "logps/chosen": -145.13397216796875, "logps/rejected": -990.4610595703125, "loss": 0.0937, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9260209202766418, "rewards/margins": 8.57487678527832, "rewards/rejected": -9.500897407531738, "step": 66410 }, { "epoch": 0.8, "learning_rate": 6.127825557501721e-07, "logits/chosen": -2.8431434631347656, "logits/rejected": -2.2301013469696045, "logps/chosen": -121.67021179199219, "logps/rejected": -986.2572021484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7231038808822632, "rewards/margins": 8.74382209777832, "rewards/rejected": -9.466926574707031, "step": 66420 }, { "epoch": 0.8, "learning_rate": 6.120976008374038e-07, "logits/chosen": -2.9080920219421387, "logits/rejected": -2.4863853454589844, "logps/chosen": -96.61451721191406, "logps/rejected": -886.38525390625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.5382391214370728, "rewards/margins": 7.953224182128906, "rewards/rejected": -8.491463661193848, "step": 66430 }, { "epoch": 0.8, "learning_rate": 6.114129755471509e-07, "logits/chosen": -2.8801398277282715, "logits/rejected": -2.367035388946533, "logps/chosen": -128.4056854248047, "logps/rejected": -905.2191162109375, "loss": 0.1135, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8070682287216187, "rewards/margins": 7.869708061218262, "rewards/rejected": -8.676775932312012, "step": 66440 }, { "epoch": 0.8, "learning_rate": 6.107286799989473e-07, "logits/chosen": -2.8655295372009277, "logits/rejected": -2.1820154190063477, "logps/chosen": -122.7300796508789, "logps/rejected": -1010.18603515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.68968266248703, "rewards/margins": 9.018951416015625, "rewards/rejected": -9.708633422851562, "step": 66450 }, { "epoch": 0.8, "learning_rate": 6.100447143122687e-07, "logits/chosen": -2.856707811355591, "logits/rejected": -2.2179980278015137, "logps/chosen": -126.70201110839844, "logps/rejected": -1004.8162231445312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7698415517807007, "rewards/margins": 8.86408805847168, "rewards/rejected": -9.633929252624512, "step": 66460 }, { "epoch": 0.8, "learning_rate": 6.09361078606534e-07, "logits/chosen": -2.916595220565796, "logits/rejected": -2.4938273429870605, "logps/chosen": -96.67752838134766, "logps/rejected": -889.9415893554688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5351608991622925, "rewards/margins": 7.97268533706665, "rewards/rejected": -8.507845878601074, "step": 66470 }, { "epoch": 0.8, "learning_rate": 6.086777730011039e-07, "logits/chosen": -2.843829870223999, "logits/rejected": -2.290809154510498, "logps/chosen": -104.9186782836914, "logps/rejected": -986.271484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5774654150009155, "rewards/margins": 8.894749641418457, "rewards/rejected": -9.472216606140137, "step": 66480 }, { "epoch": 0.8, "learning_rate": 6.079947976152825e-07, "logits/chosen": -2.9053409099578857, "logits/rejected": -2.2528433799743652, "logps/chosen": -128.65110778808594, "logps/rejected": -987.72607421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7447402477264404, "rewards/margins": 8.721458435058594, "rewards/rejected": -9.466200828552246, "step": 66490 }, { "epoch": 0.8, "learning_rate": 6.07312152568314e-07, "logits/chosen": -2.851806640625, "logits/rejected": -2.2481555938720703, "logps/chosen": -122.8148193359375, "logps/rejected": -1005.60400390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.763151228427887, "rewards/margins": 8.899049758911133, "rewards/rejected": -9.662199974060059, "step": 66500 }, { "epoch": 0.8, "learning_rate": 6.06629837979387e-07, "logits/chosen": -2.8602051734924316, "logits/rejected": -2.214369535446167, "logps/chosen": -153.77255249023438, "logps/rejected": -998.25, "loss": 0.1327, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.043853521347046, "rewards/margins": 8.543974876403809, "rewards/rejected": -9.587828636169434, "step": 66510 }, { "epoch": 0.8, "learning_rate": 6.059478539676314e-07, "logits/chosen": -2.8597664833068848, "logits/rejected": -2.385821580886841, "logps/chosen": -73.7807388305664, "logps/rejected": -791.0101318359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.3357773721218109, "rewards/margins": 7.20101261138916, "rewards/rejected": -7.536790370941162, "step": 66520 }, { "epoch": 0.8, "learning_rate": 6.052662006521201e-07, "logits/chosen": -2.863436222076416, "logits/rejected": -2.2661855220794678, "logps/chosen": -116.0121841430664, "logps/rejected": -982.1124267578125, "loss": 0.1199, "rewards/accuracies": 1.0, "rewards/chosen": -0.6744321584701538, "rewards/margins": 8.766534805297852, "rewards/rejected": -9.440966606140137, "step": 66530 }, { "epoch": 0.8, "learning_rate": 6.045848781518679e-07, "logits/chosen": -2.8854434490203857, "logits/rejected": -2.5339717864990234, "logps/chosen": -88.87721252441406, "logps/rejected": -797.600341796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4259251058101654, "rewards/margins": 7.169485569000244, "rewards/rejected": -7.595410346984863, "step": 66540 }, { "epoch": 0.8, "learning_rate": 6.039038865858316e-07, "logits/chosen": -2.880225658416748, "logits/rejected": -2.1203665733337402, "logps/chosen": -126.36148834228516, "logps/rejected": -1159.815673828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6661839485168457, "rewards/margins": 10.52148151397705, "rewards/rejected": -11.187664031982422, "step": 66550 }, { "epoch": 0.8, "learning_rate": 6.03223226072911e-07, "logits/chosen": -2.8889570236206055, "logits/rejected": -2.390194892883301, "logps/chosen": -106.77911376953125, "logps/rejected": -866.6650390625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6096380949020386, "rewards/margins": 7.681465148925781, "rewards/rejected": -8.291104316711426, "step": 66560 }, { "epoch": 0.8, "learning_rate": 6.025428967319466e-07, "logits/chosen": -2.8468947410583496, "logits/rejected": -2.331693649291992, "logps/chosen": -81.93110656738281, "logps/rejected": -948.1339111328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3368215560913086, "rewards/margins": 8.7645902633667, "rewards/rejected": -9.101410865783691, "step": 66570 }, { "epoch": 0.8, "learning_rate": 6.018628986817224e-07, "logits/chosen": -2.891709566116333, "logits/rejected": -2.537871837615967, "logps/chosen": -79.86809539794922, "logps/rejected": -854.98193359375, "loss": 0.0802, "rewards/accuracies": 1.0, "rewards/chosen": -0.40275129675865173, "rewards/margins": 7.761438846588135, "rewards/rejected": -8.164190292358398, "step": 66580 }, { "epoch": 0.8, "learning_rate": 6.011832320409644e-07, "logits/chosen": -2.876030206680298, "logits/rejected": -2.486285448074341, "logps/chosen": -162.8705596923828, "logps/rejected": -858.6085205078125, "loss": 0.2908, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1857726573944092, "rewards/margins": 7.024065971374512, "rewards/rejected": -8.2098388671875, "step": 66590 }, { "epoch": 0.8, "learning_rate": 6.0050389692834e-07, "logits/chosen": -2.869607448577881, "logits/rejected": -2.2993462085723877, "logps/chosen": -121.62080383300781, "logps/rejected": -928.4554443359375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7266284823417664, "rewards/margins": 8.161230087280273, "rewards/rejected": -8.887857437133789, "step": 66600 }, { "epoch": 0.8, "learning_rate": 5.998248934624598e-07, "logits/chosen": -2.8919243812561035, "logits/rejected": -2.250663995742798, "logps/chosen": -104.6459732055664, "logps/rejected": -1021.9639892578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5440765619277954, "rewards/margins": 9.29534912109375, "rewards/rejected": -9.839426040649414, "step": 66610 }, { "epoch": 0.8, "learning_rate": 5.991462217618757e-07, "logits/chosen": -2.877044677734375, "logits/rejected": -2.4385368824005127, "logps/chosen": -87.39958953857422, "logps/rejected": -941.0487060546875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.44805464148521423, "rewards/margins": 8.57939624786377, "rewards/rejected": -9.027449607849121, "step": 66620 }, { "epoch": 0.8, "learning_rate": 5.984678819450825e-07, "logits/chosen": -2.8485562801361084, "logits/rejected": -2.4170899391174316, "logps/chosen": -96.65333557128906, "logps/rejected": -863.8450927734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5224987268447876, "rewards/margins": 7.726925849914551, "rewards/rejected": -8.249425888061523, "step": 66630 }, { "epoch": 0.8, "learning_rate": 5.977898741305152e-07, "logits/chosen": -2.8860440254211426, "logits/rejected": -2.2818994522094727, "logps/chosen": -103.62223815917969, "logps/rejected": -1045.4959716796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5548507571220398, "rewards/margins": 9.491100311279297, "rewards/rejected": -10.045949935913086, "step": 66640 }, { "epoch": 0.8, "learning_rate": 5.971121984365527e-07, "logits/chosen": -2.8640778064727783, "logits/rejected": -2.348550319671631, "logps/chosen": -100.80184936523438, "logps/rejected": -955.3134765625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5458674430847168, "rewards/margins": 8.618124008178711, "rewards/rejected": -9.163991928100586, "step": 66650 }, { "epoch": 0.8, "learning_rate": 5.964348549815152e-07, "logits/chosen": -2.8556666374206543, "logits/rejected": -2.2625479698181152, "logps/chosen": -126.04740142822266, "logps/rejected": -1019.3160400390625, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.7784934639930725, "rewards/margins": 9.021817207336426, "rewards/rejected": -9.800310134887695, "step": 66660 }, { "epoch": 0.8, "learning_rate": 5.957578438836653e-07, "logits/chosen": -2.8739733695983887, "logits/rejected": -2.082965850830078, "logps/chosen": -125.66634368896484, "logps/rejected": -1010.3963012695312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6857832670211792, "rewards/margins": 9.018237113952637, "rewards/rejected": -9.704020500183105, "step": 66670 }, { "epoch": 0.8, "learning_rate": 5.950811652612071e-07, "logits/chosen": -2.866034984588623, "logits/rejected": -2.2393784523010254, "logps/chosen": -118.04351806640625, "logps/rejected": -953.7498779296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.633625328540802, "rewards/margins": 8.519432067871094, "rewards/rejected": -9.153058052062988, "step": 66680 }, { "epoch": 0.8, "learning_rate": 5.944048192322871e-07, "logits/chosen": -2.8699898719787598, "logits/rejected": -2.24636173248291, "logps/chosen": -122.94989013671875, "logps/rejected": -944.0778198242188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7230145335197449, "rewards/margins": 8.323747634887695, "rewards/rejected": -9.046762466430664, "step": 66690 }, { "epoch": 0.8, "learning_rate": 5.937288059149926e-07, "logits/chosen": -2.886291980743408, "logits/rejected": -2.374927520751953, "logps/chosen": -93.54331970214844, "logps/rejected": -887.2379760742188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4846280515193939, "rewards/margins": 8.00041389465332, "rewards/rejected": -8.4850435256958, "step": 66700 }, { "epoch": 0.8, "learning_rate": 5.930531254273542e-07, "logits/chosen": -2.8729119300842285, "logits/rejected": -2.3572494983673096, "logps/chosen": -101.52791595458984, "logps/rejected": -935.8117065429688, "loss": 0.1509, "rewards/accuracies": 1.0, "rewards/chosen": -0.5295303463935852, "rewards/margins": 8.429144859313965, "rewards/rejected": -8.958674430847168, "step": 66710 }, { "epoch": 0.8, "learning_rate": 5.923777778873438e-07, "logits/chosen": -2.838461399078369, "logits/rejected": -2.273730754852295, "logps/chosen": -108.62078857421875, "logps/rejected": -968.2388916015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6286331415176392, "rewards/margins": 8.658265113830566, "rewards/rejected": -9.286898612976074, "step": 66720 }, { "epoch": 0.8, "learning_rate": 5.917027634128749e-07, "logits/chosen": -2.8810904026031494, "logits/rejected": -2.35295033454895, "logps/chosen": -111.6119384765625, "logps/rejected": -1011.4749145507812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.614834189414978, "rewards/margins": 9.121870040893555, "rewards/rejected": -9.73670482635498, "step": 66730 }, { "epoch": 0.8, "learning_rate": 5.910280821218039e-07, "logits/chosen": -2.910332441329956, "logits/rejected": -2.494723320007324, "logps/chosen": -77.86678314208984, "logps/rejected": -853.3377685546875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.360314279794693, "rewards/margins": 7.793498992919922, "rewards/rejected": -8.153813362121582, "step": 66740 }, { "epoch": 0.8, "learning_rate": 5.903537341319277e-07, "logits/chosen": -2.8981118202209473, "logits/rejected": -2.4512577056884766, "logps/chosen": -81.71910095214844, "logps/rejected": -889.0855712890625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3920661211013794, "rewards/margins": 8.12432861328125, "rewards/rejected": -8.51639461517334, "step": 66750 }, { "epoch": 0.8, "learning_rate": 5.896797195609861e-07, "logits/chosen": -2.913856029510498, "logits/rejected": -2.4003207683563232, "logps/chosen": -96.83518981933594, "logps/rejected": -835.34521484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4418264329433441, "rewards/margins": 7.523492336273193, "rewards/rejected": -7.965318202972412, "step": 66760 }, { "epoch": 0.8, "learning_rate": 5.890060385266593e-07, "logits/chosen": -2.854872226715088, "logits/rejected": -2.134894847869873, "logps/chosen": -116.0692138671875, "logps/rejected": -1045.84130859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6487024426460266, "rewards/margins": 9.409561157226562, "rewards/rejected": -10.058263778686523, "step": 66770 }, { "epoch": 0.8, "learning_rate": 5.883326911465706e-07, "logits/chosen": -2.8351798057556152, "logits/rejected": -2.3233678340911865, "logps/chosen": -110.28216552734375, "logps/rejected": -973.1744384765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6837989091873169, "rewards/margins": 8.652284622192383, "rewards/rejected": -9.33608341217041, "step": 66780 }, { "epoch": 0.8, "learning_rate": 5.876596775382845e-07, "logits/chosen": -2.8870787620544434, "logits/rejected": -2.443420171737671, "logps/chosen": -90.77066040039062, "logps/rejected": -859.7347412109375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4465504288673401, "rewards/margins": 7.769228458404541, "rewards/rejected": -8.215778350830078, "step": 66790 }, { "epoch": 0.8, "learning_rate": 5.869869978193074e-07, "logits/chosen": -2.901071310043335, "logits/rejected": -2.4894821643829346, "logps/chosen": -97.33045959472656, "logps/rejected": -900.8448486328125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5738588571548462, "rewards/margins": 8.048562049865723, "rewards/rejected": -8.622421264648438, "step": 66800 }, { "epoch": 0.8, "learning_rate": 5.863146521070875e-07, "logits/chosen": -2.835252285003662, "logits/rejected": -2.0720407962799072, "logps/chosen": -127.21368408203125, "logps/rejected": -964.50390625, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.7239938974380493, "rewards/margins": 8.515049934387207, "rewards/rejected": -9.239045143127441, "step": 66810 }, { "epoch": 0.8, "learning_rate": 5.856426405190141e-07, "logits/chosen": -2.8954110145568848, "logits/rejected": -2.4679253101348877, "logps/chosen": -115.64338684082031, "logps/rejected": -823.3172607421875, "loss": 0.2189, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7149648666381836, "rewards/margins": 7.1341657638549805, "rewards/rejected": -7.8491315841674805, "step": 66820 }, { "epoch": 0.8, "learning_rate": 5.849709631724196e-07, "logits/chosen": -2.882681369781494, "logits/rejected": -2.123089551925659, "logps/chosen": -114.7737808227539, "logps/rejected": -1070.5179443359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.592983603477478, "rewards/margins": 9.69145679473877, "rewards/rejected": -10.284440040588379, "step": 66830 }, { "epoch": 0.8, "learning_rate": 5.842996201845755e-07, "logits/chosen": -2.893364429473877, "logits/rejected": -2.1469852924346924, "logps/chosen": -129.64096069335938, "logps/rejected": -995.6412353515625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7932215929031372, "rewards/margins": 8.766855239868164, "rewards/rejected": -9.560076713562012, "step": 66840 }, { "epoch": 0.8, "learning_rate": 5.836286116726969e-07, "logits/chosen": -2.8495891094207764, "logits/rejected": -2.291977643966675, "logps/chosen": -99.2191390991211, "logps/rejected": -923.7171630859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5245846509933472, "rewards/margins": 8.320757865905762, "rewards/rejected": -8.845342636108398, "step": 66850 }, { "epoch": 0.8, "learning_rate": 5.829579377539399e-07, "logits/chosen": -2.90779447555542, "logits/rejected": -2.4323859214782715, "logps/chosen": -111.280517578125, "logps/rejected": -955.89501953125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6068519949913025, "rewards/margins": 8.544921875, "rewards/rejected": -9.151774406433105, "step": 66860 }, { "epoch": 0.8, "learning_rate": 5.82287598545403e-07, "logits/chosen": -2.8761789798736572, "logits/rejected": -2.3261916637420654, "logps/chosen": -115.7154769897461, "logps/rejected": -943.3935546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6730163097381592, "rewards/margins": 8.371234893798828, "rewards/rejected": -9.044252395629883, "step": 66870 }, { "epoch": 0.8, "learning_rate": 5.816175941641258e-07, "logits/chosen": -2.871389865875244, "logits/rejected": -2.425976276397705, "logps/chosen": -128.15213012695312, "logps/rejected": -864.81982421875, "loss": 0.1241, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8117812871932983, "rewards/margins": 7.44094705581665, "rewards/rejected": -8.252728462219238, "step": 66880 }, { "epoch": 0.8, "learning_rate": 5.80947924727088e-07, "logits/chosen": -2.9128851890563965, "logits/rejected": -2.5365405082702637, "logps/chosen": -87.24581146240234, "logps/rejected": -870.7930908203125, "loss": 0.1229, "rewards/accuracies": 1.0, "rewards/chosen": -0.4702575206756592, "rewards/margins": 7.8412957191467285, "rewards/rejected": -8.311553955078125, "step": 66890 }, { "epoch": 0.8, "learning_rate": 5.802785903512126e-07, "logits/chosen": -2.8632733821868896, "logits/rejected": -2.227954626083374, "logps/chosen": -122.46014404296875, "logps/rejected": -893.50537109375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7127091288566589, "rewards/margins": 7.829987525939941, "rewards/rejected": -8.542696952819824, "step": 66900 }, { "epoch": 0.8, "learning_rate": 5.796095911533637e-07, "logits/chosen": -2.8826775550842285, "logits/rejected": -2.452556848526001, "logps/chosen": -84.70465087890625, "logps/rejected": -917.1798706054688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.41215720772743225, "rewards/margins": 8.372308731079102, "rewards/rejected": -8.784466743469238, "step": 66910 }, { "epoch": 0.8, "learning_rate": 5.789409272503463e-07, "logits/chosen": -2.8980050086975098, "logits/rejected": -2.3310115337371826, "logps/chosen": -115.93391418457031, "logps/rejected": -964.0979614257812, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6878677010536194, "rewards/margins": 8.546623229980469, "rewards/rejected": -9.234492301940918, "step": 66920 }, { "epoch": 0.8, "learning_rate": 5.782725987589075e-07, "logits/chosen": -2.9250612258911133, "logits/rejected": -2.2135024070739746, "logps/chosen": -114.0791015625, "logps/rejected": -955.0972900390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5770910382270813, "rewards/margins": 8.573137283325195, "rewards/rejected": -9.150228500366211, "step": 66930 }, { "epoch": 0.8, "learning_rate": 5.776046057957354e-07, "logits/chosen": -2.8501462936401367, "logits/rejected": -2.2602367401123047, "logps/chosen": -108.85563659667969, "logps/rejected": -968.4597778320312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6288084387779236, "rewards/margins": 8.683473587036133, "rewards/rejected": -9.312280654907227, "step": 66940 }, { "epoch": 0.8, "learning_rate": 5.769369484774608e-07, "logits/chosen": -2.854341506958008, "logits/rejected": -2.3458175659179688, "logps/chosen": -121.33195495605469, "logps/rejected": -897.0634765625, "loss": 0.1407, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7654917240142822, "rewards/margins": 7.816043853759766, "rewards/rejected": -8.581535339355469, "step": 66950 }, { "epoch": 0.8, "learning_rate": 5.762696269206533e-07, "logits/chosen": -2.9083216190338135, "logits/rejected": -2.6323208808898926, "logps/chosen": -82.62344360351562, "logps/rejected": -810.4402465820312, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.4276583790779114, "rewards/margins": 7.303953647613525, "rewards/rejected": -7.731612205505371, "step": 66960 }, { "epoch": 0.8, "learning_rate": 5.756026412418261e-07, "logits/chosen": -2.9102203845977783, "logits/rejected": -2.1490089893341064, "logps/chosen": -116.13631439208984, "logps/rejected": -959.7658081054688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6490787863731384, "rewards/margins": 8.550676345825195, "rewards/rejected": -9.199755668640137, "step": 66970 }, { "epoch": 0.8, "learning_rate": 5.749359915574329e-07, "logits/chosen": -2.870532512664795, "logits/rejected": -2.3122642040252686, "logps/chosen": -107.07395935058594, "logps/rejected": -943.1688232421875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.5887778997421265, "rewards/margins": 8.445016860961914, "rewards/rejected": -9.033793449401855, "step": 66980 }, { "epoch": 0.8, "learning_rate": 5.742696779838689e-07, "logits/chosen": -2.879546642303467, "logits/rejected": -2.197946310043335, "logps/chosen": -95.57221984863281, "logps/rejected": -936.7864379882812, "loss": 0.1171, "rewards/accuracies": 1.0, "rewards/chosen": -0.43764549493789673, "rewards/margins": 8.542062759399414, "rewards/rejected": -8.979708671569824, "step": 66990 }, { "epoch": 0.8, "learning_rate": 5.736037006374703e-07, "logits/chosen": -2.8580374717712402, "logits/rejected": -2.4220056533813477, "logps/chosen": -95.1664810180664, "logps/rejected": -778.2578125, "loss": 0.127, "rewards/accuracies": 1.0, "rewards/chosen": -0.5592349767684937, "rewards/margins": 6.853313446044922, "rewards/rejected": -7.4125494956970215, "step": 67000 }, { "epoch": 0.8, "learning_rate": 5.729380596345163e-07, "logits/chosen": -2.880894660949707, "logits/rejected": -2.2393290996551514, "logps/chosen": -113.1237564086914, "logps/rejected": -918.9376831054688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6400700211524963, "rewards/margins": 8.156103134155273, "rewards/rejected": -8.796173095703125, "step": 67010 }, { "epoch": 0.8, "learning_rate": 5.722727550912241e-07, "logits/chosen": -2.857360601425171, "logits/rejected": -2.542344570159912, "logps/chosen": -96.3417739868164, "logps/rejected": -825.5723876953125, "loss": 0.0704, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5751493573188782, "rewards/margins": 7.299233436584473, "rewards/rejected": -7.874382019042969, "step": 67020 }, { "epoch": 0.8, "learning_rate": 5.716077871237549e-07, "logits/chosen": -2.814706802368164, "logits/rejected": -2.488675594329834, "logps/chosen": -77.17037200927734, "logps/rejected": -809.8207397460938, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.38385289907455444, "rewards/margins": 7.342643737792969, "rewards/rejected": -7.726495265960693, "step": 67030 }, { "epoch": 0.8, "learning_rate": 5.709431558482101e-07, "logits/chosen": -2.907595634460449, "logits/rejected": -2.1795077323913574, "logps/chosen": -139.32485961914062, "logps/rejected": -1007.8933715820312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8166297674179077, "rewards/margins": 8.846394538879395, "rewards/rejected": -9.663023948669434, "step": 67040 }, { "epoch": 0.8, "learning_rate": 5.702788613806326e-07, "logits/chosen": -2.894292116165161, "logits/rejected": -2.4901139736175537, "logps/chosen": -111.8037109375, "logps/rejected": -920.3416748046875, "loss": 0.1029, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6647647619247437, "rewards/margins": 8.165349960327148, "rewards/rejected": -8.830113410949707, "step": 67050 }, { "epoch": 0.8, "learning_rate": 5.696149038370064e-07, "logits/chosen": -2.8586082458496094, "logits/rejected": -2.3238368034362793, "logps/chosen": -112.1747817993164, "logps/rejected": -927.2200317382812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6268326044082642, "rewards/margins": 8.245859146118164, "rewards/rejected": -8.87269115447998, "step": 67060 }, { "epoch": 0.8, "learning_rate": 5.689512833332564e-07, "logits/chosen": -2.8402950763702393, "logits/rejected": -2.322563648223877, "logps/chosen": -118.00251770019531, "logps/rejected": -928.1328125, "loss": 0.1125, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7253001928329468, "rewards/margins": 8.166342735290527, "rewards/rejected": -8.891642570495605, "step": 67070 }, { "epoch": 0.8, "learning_rate": 5.6828799998525e-07, "logits/chosen": -2.885354518890381, "logits/rejected": -2.2098071575164795, "logps/chosen": -117.37342834472656, "logps/rejected": -968.2540893554688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7114479541778564, "rewards/margins": 8.5638427734375, "rewards/rejected": -9.275290489196777, "step": 67080 }, { "epoch": 0.8, "learning_rate": 5.67625053908793e-07, "logits/chosen": -2.84549880027771, "logits/rejected": -2.1247289180755615, "logps/chosen": -121.17515563964844, "logps/rejected": -983.5283203125, "loss": 0.1346, "rewards/accuracies": 1.0, "rewards/chosen": -0.687839150428772, "rewards/margins": 8.741701126098633, "rewards/rejected": -9.429540634155273, "step": 67090 }, { "epoch": 0.8, "learning_rate": 5.669624452196342e-07, "logits/chosen": -2.8572802543640137, "logits/rejected": -2.3694424629211426, "logps/chosen": -106.25807189941406, "logps/rejected": -866.69921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6276261210441589, "rewards/margins": 7.657282829284668, "rewards/rejected": -8.284910202026367, "step": 67100 }, { "epoch": 0.8, "learning_rate": 5.663001740334642e-07, "logits/chosen": -2.8660082817077637, "logits/rejected": -2.2903289794921875, "logps/chosen": -105.44963073730469, "logps/rejected": -999.1707153320312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6084153652191162, "rewards/margins": 8.987383842468262, "rewards/rejected": -9.595799446105957, "step": 67110 }, { "epoch": 0.8, "learning_rate": 5.656382404659131e-07, "logits/chosen": -2.86824631690979, "logits/rejected": -2.170368194580078, "logps/chosen": -119.91279602050781, "logps/rejected": -908.4195556640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.690751314163208, "rewards/margins": 7.997805595397949, "rewards/rejected": -8.688557624816895, "step": 67120 }, { "epoch": 0.8, "learning_rate": 5.649766446325524e-07, "logits/chosen": -2.8592028617858887, "logits/rejected": -2.422104835510254, "logps/chosen": -101.80518341064453, "logps/rejected": -906.9972534179688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5677472352981567, "rewards/margins": 8.115235328674316, "rewards/rejected": -8.682982444763184, "step": 67130 }, { "epoch": 0.8, "learning_rate": 5.643153866488957e-07, "logits/chosen": -2.867980480194092, "logits/rejected": -2.2955915927886963, "logps/chosen": -129.82875061035156, "logps/rejected": -951.2384643554688, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.7786901593208313, "rewards/margins": 8.346033096313477, "rewards/rejected": -9.124722480773926, "step": 67140 }, { "epoch": 0.8, "learning_rate": 5.636544666303966e-07, "logits/chosen": -2.8571133613586426, "logits/rejected": -2.2604222297668457, "logps/chosen": -107.2228775024414, "logps/rejected": -922.88427734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6421386003494263, "rewards/margins": 8.214120864868164, "rewards/rejected": -8.856260299682617, "step": 67150 }, { "epoch": 0.8, "learning_rate": 5.629938846924493e-07, "logits/chosen": -2.8899173736572266, "logits/rejected": -2.282921314239502, "logps/chosen": -97.63536071777344, "logps/rejected": -926.3043212890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5275070667266846, "rewards/margins": 8.352411270141602, "rewards/rejected": -8.879918098449707, "step": 67160 }, { "epoch": 0.8, "learning_rate": 5.623336409503899e-07, "logits/chosen": -2.8875808715820312, "logits/rejected": -2.461297035217285, "logps/chosen": -96.1120834350586, "logps/rejected": -962.4495849609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5046786069869995, "rewards/margins": 8.71683406829834, "rewards/rejected": -9.221513748168945, "step": 67170 }, { "epoch": 0.8, "learning_rate": 5.616737355194951e-07, "logits/chosen": -2.9249789714813232, "logits/rejected": -2.1921513080596924, "logps/chosen": -116.3792953491211, "logps/rejected": -930.7166137695312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6612585783004761, "rewards/margins": 8.256766319274902, "rewards/rejected": -8.918025016784668, "step": 67180 }, { "epoch": 0.8, "learning_rate": 5.610141685149825e-07, "logits/chosen": -2.870311975479126, "logits/rejected": -2.4020307064056396, "logps/chosen": -81.90586853027344, "logps/rejected": -890.7393798828125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.397422730922699, "rewards/margins": 8.142694473266602, "rewards/rejected": -8.540117263793945, "step": 67190 }, { "epoch": 0.8, "learning_rate": 5.603549400520111e-07, "logits/chosen": -2.8702640533447266, "logits/rejected": -2.1320815086364746, "logps/chosen": -131.45416259765625, "logps/rejected": -1038.007080078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7481192946434021, "rewards/margins": 9.23747444152832, "rewards/rejected": -9.985594749450684, "step": 67200 }, { "epoch": 0.8, "learning_rate": 5.596960502456805e-07, "logits/chosen": -2.9024338722229004, "logits/rejected": -2.2323741912841797, "logps/chosen": -117.56880187988281, "logps/rejected": -1060.833984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6103415489196777, "rewards/margins": 9.58761215209961, "rewards/rejected": -10.197954177856445, "step": 67210 }, { "epoch": 0.8, "learning_rate": 5.590374992110298e-07, "logits/chosen": -2.896074056625366, "logits/rejected": -2.5196990966796875, "logps/chosen": -73.57688903808594, "logps/rejected": -808.80029296875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.33260685205459595, "rewards/margins": 7.3694586753845215, "rewards/rejected": -7.702064514160156, "step": 67220 }, { "epoch": 0.8, "learning_rate": 5.583792870630411e-07, "logits/chosen": -2.8958725929260254, "logits/rejected": -2.580223560333252, "logps/chosen": -77.43604278564453, "logps/rejected": -815.5755615234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3874727487564087, "rewards/margins": 7.37488317489624, "rewards/rejected": -7.762355804443359, "step": 67230 }, { "epoch": 0.8, "learning_rate": 5.577214139166362e-07, "logits/chosen": -2.8536384105682373, "logits/rejected": -2.338136911392212, "logps/chosen": -116.6304702758789, "logps/rejected": -992.3846435546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7096049785614014, "rewards/margins": 8.819997787475586, "rewards/rejected": -9.529603004455566, "step": 67240 }, { "epoch": 0.81, "learning_rate": 5.570638798866779e-07, "logits/chosen": -2.9060873985290527, "logits/rejected": -2.4694175720214844, "logps/chosen": -82.02608489990234, "logps/rejected": -933.9230346679688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3895765244960785, "rewards/margins": 8.573099136352539, "rewards/rejected": -8.962675094604492, "step": 67250 }, { "epoch": 0.81, "learning_rate": 5.564066850879699e-07, "logits/chosen": -2.870750904083252, "logits/rejected": -2.3707547187805176, "logps/chosen": -92.96598815917969, "logps/rejected": -941.8790283203125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.47599178552627563, "rewards/margins": 8.56184196472168, "rewards/rejected": -9.037832260131836, "step": 67260 }, { "epoch": 0.81, "learning_rate": 5.557498296352568e-07, "logits/chosen": -2.8682315349578857, "logits/rejected": -2.3346457481384277, "logps/chosen": -103.12723541259766, "logps/rejected": -1017.328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5249050855636597, "rewards/margins": 9.260579109191895, "rewards/rejected": -9.78548526763916, "step": 67270 }, { "epoch": 0.81, "learning_rate": 5.550933136432241e-07, "logits/chosen": -2.87463116645813, "logits/rejected": -2.1886565685272217, "logps/chosen": -115.5447769165039, "logps/rejected": -979.2337036132812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6489911675453186, "rewards/margins": 8.746495246887207, "rewards/rejected": -9.395484924316406, "step": 67280 }, { "epoch": 0.81, "learning_rate": 5.544371372264956e-07, "logits/chosen": -2.8699541091918945, "logits/rejected": -2.409425735473633, "logps/chosen": -101.89070892333984, "logps/rejected": -914.4288940429688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5706474184989929, "rewards/margins": 8.193944931030273, "rewards/rejected": -8.764593124389648, "step": 67290 }, { "epoch": 0.81, "learning_rate": 5.537813004996401e-07, "logits/chosen": -2.901184558868408, "logits/rejected": -2.046083450317383, "logps/chosen": -121.8898696899414, "logps/rejected": -1045.5135498046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6906290054321289, "rewards/margins": 9.346597671508789, "rewards/rejected": -10.037227630615234, "step": 67300 }, { "epoch": 0.81, "learning_rate": 5.531258035771642e-07, "logits/chosen": -2.906248092651367, "logits/rejected": -2.4038851261138916, "logps/chosen": -108.613525390625, "logps/rejected": -931.3821411132812, "loss": 0.0873, "rewards/accuracies": 1.0, "rewards/chosen": -0.6035182476043701, "rewards/margins": 8.321127891540527, "rewards/rejected": -8.924646377563477, "step": 67310 }, { "epoch": 0.81, "learning_rate": 5.524706465735158e-07, "logits/chosen": -2.8910670280456543, "logits/rejected": -2.2798876762390137, "logps/chosen": -101.34639739990234, "logps/rejected": -1011.5281372070312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5396363139152527, "rewards/margins": 9.182849884033203, "rewards/rejected": -9.722485542297363, "step": 67320 }, { "epoch": 0.81, "learning_rate": 5.51815829603084e-07, "logits/chosen": -2.907801866531372, "logits/rejected": -2.3509726524353027, "logps/chosen": -123.37971496582031, "logps/rejected": -1020.9285278320312, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.7651467323303223, "rewards/margins": 9.04126262664795, "rewards/rejected": -9.80640983581543, "step": 67330 }, { "epoch": 0.81, "learning_rate": 5.511613527801971e-07, "logits/chosen": -2.8872122764587402, "logits/rejected": -2.1984546184539795, "logps/chosen": -110.41526794433594, "logps/rejected": -952.20068359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6218627691268921, "rewards/margins": 8.509119987487793, "rewards/rejected": -9.130982398986816, "step": 67340 }, { "epoch": 0.81, "learning_rate": 5.50507216219125e-07, "logits/chosen": -2.860212802886963, "logits/rejected": -2.1743085384368896, "logps/chosen": -107.44276428222656, "logps/rejected": -919.2897338867188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6016192436218262, "rewards/margins": 8.20767593383789, "rewards/rejected": -8.809293746948242, "step": 67350 }, { "epoch": 0.81, "learning_rate": 5.498534200340785e-07, "logits/chosen": -2.8826441764831543, "logits/rejected": -2.3583240509033203, "logps/chosen": -104.6651382446289, "logps/rejected": -935.7427978515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.570999801158905, "rewards/margins": 8.376213073730469, "rewards/rejected": -8.947212219238281, "step": 67360 }, { "epoch": 0.81, "learning_rate": 5.491999643392088e-07, "logits/chosen": -2.8695600032806396, "logits/rejected": -2.3280444145202637, "logps/chosen": -116.37496185302734, "logps/rejected": -892.0007934570312, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.6870511770248413, "rewards/margins": 7.8445892333984375, "rewards/rejected": -8.53164005279541, "step": 67370 }, { "epoch": 0.81, "learning_rate": 5.485468492486071e-07, "logits/chosen": -2.829918622970581, "logits/rejected": -2.0933098793029785, "logps/chosen": -127.18692779541016, "logps/rejected": -927.4415283203125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7263044714927673, "rewards/margins": 8.155389785766602, "rewards/rejected": -8.881692886352539, "step": 67380 }, { "epoch": 0.81, "learning_rate": 5.478940748763056e-07, "logits/chosen": -2.835273027420044, "logits/rejected": -2.0994060039520264, "logps/chosen": -125.93153381347656, "logps/rejected": -1062.1009521484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7285454869270325, "rewards/margins": 9.485844612121582, "rewards/rejected": -10.21438980102539, "step": 67390 }, { "epoch": 0.81, "learning_rate": 5.472416413362777e-07, "logits/chosen": -2.870640516281128, "logits/rejected": -2.078477382659912, "logps/chosen": -136.44290161132812, "logps/rejected": -1000.2556762695312, "loss": 0.167, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8006890416145325, "rewards/margins": 8.797174453735352, "rewards/rejected": -9.597864151000977, "step": 67400 }, { "epoch": 0.81, "learning_rate": 5.465895487424347e-07, "logits/chosen": -2.859079599380493, "logits/rejected": -2.293668270111084, "logps/chosen": -105.6571044921875, "logps/rejected": -986.5687255859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5276293754577637, "rewards/margins": 8.942330360412598, "rewards/rejected": -9.469958305358887, "step": 67410 }, { "epoch": 0.81, "learning_rate": 5.459377972086313e-07, "logits/chosen": -2.8671488761901855, "logits/rejected": -2.4468705654144287, "logps/chosen": -89.41020202636719, "logps/rejected": -911.5716552734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.47079864144325256, "rewards/margins": 8.263975143432617, "rewards/rejected": -8.734773635864258, "step": 67420 }, { "epoch": 0.81, "learning_rate": 5.452863868486613e-07, "logits/chosen": -2.866222858428955, "logits/rejected": -2.4627668857574463, "logps/chosen": -109.4791030883789, "logps/rejected": -940.4447021484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6832980513572693, "rewards/margins": 8.330738067626953, "rewards/rejected": -9.014036178588867, "step": 67430 }, { "epoch": 0.81, "learning_rate": 5.446353177762593e-07, "logits/chosen": -2.886967420578003, "logits/rejected": -2.601870059967041, "logps/chosen": -69.86087799072266, "logps/rejected": -806.2804565429688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.2982671856880188, "rewards/margins": 7.389091491699219, "rewards/rejected": -7.687358856201172, "step": 67440 }, { "epoch": 0.81, "learning_rate": 5.439845901051e-07, "logits/chosen": -2.831301689147949, "logits/rejected": -2.226196765899658, "logps/chosen": -109.81318664550781, "logps/rejected": -900.0955200195312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5795167088508606, "rewards/margins": 8.024419784545898, "rewards/rejected": -8.603937149047852, "step": 67450 }, { "epoch": 0.81, "learning_rate": 5.433342039487985e-07, "logits/chosen": -2.881361246109009, "logits/rejected": -2.509986162185669, "logps/chosen": -120.3048095703125, "logps/rejected": -780.8145141601562, "loss": 0.1486, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7922285795211792, "rewards/margins": 6.638343811035156, "rewards/rejected": -7.430572509765625, "step": 67460 }, { "epoch": 0.81, "learning_rate": 5.426841594209111e-07, "logits/chosen": -2.873224973678589, "logits/rejected": -2.4308173656463623, "logps/chosen": -108.16429138183594, "logps/rejected": -973.4923095703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6599847078323364, "rewards/margins": 8.687040328979492, "rewards/rejected": -9.347024917602539, "step": 67470 }, { "epoch": 0.81, "learning_rate": 5.42034456634933e-07, "logits/chosen": -2.823143243789673, "logits/rejected": -2.4873502254486084, "logps/chosen": -77.22941589355469, "logps/rejected": -735.0914306640625, "loss": 0.1051, "rewards/accuracies": 1.0, "rewards/chosen": -0.3738560080528259, "rewards/margins": 6.601229190826416, "rewards/rejected": -6.975086212158203, "step": 67480 }, { "epoch": 0.81, "learning_rate": 5.413850957043007e-07, "logits/chosen": -2.876404285430908, "logits/rejected": -2.3733363151550293, "logps/chosen": -123.39229583740234, "logps/rejected": -854.7960815429688, "loss": 0.0571, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7374570369720459, "rewards/margins": 7.422929286956787, "rewards/rejected": -8.160386085510254, "step": 67490 }, { "epoch": 0.81, "learning_rate": 5.407360767423906e-07, "logits/chosen": -2.864482879638672, "logits/rejected": -2.1764259338378906, "logps/chosen": -135.27830505371094, "logps/rejected": -917.6593017578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7698327302932739, "rewards/margins": 8.012130737304688, "rewards/rejected": -8.781963348388672, "step": 67500 }, { "epoch": 0.81, "learning_rate": 5.4008739986252e-07, "logits/chosen": -2.855999708175659, "logits/rejected": -2.4044857025146484, "logps/chosen": -116.13160705566406, "logps/rejected": -948.8416137695312, "loss": 0.1028, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.734418511390686, "rewards/margins": 8.359456062316895, "rewards/rejected": -9.093873977661133, "step": 67510 }, { "epoch": 0.81, "learning_rate": 5.394390651779461e-07, "logits/chosen": -2.9273509979248047, "logits/rejected": -2.5227012634277344, "logps/chosen": -88.71356201171875, "logps/rejected": -863.2945556640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4701513648033142, "rewards/margins": 7.780773162841797, "rewards/rejected": -8.250925064086914, "step": 67520 }, { "epoch": 0.81, "learning_rate": 5.387910728018667e-07, "logits/chosen": -2.9449448585510254, "logits/rejected": -2.4679296016693115, "logps/chosen": -87.71619415283203, "logps/rejected": -942.7233276367188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4322190284729004, "rewards/margins": 8.607202529907227, "rewards/rejected": -9.039420127868652, "step": 67530 }, { "epoch": 0.81, "learning_rate": 5.381434228474183e-07, "logits/chosen": -2.903170108795166, "logits/rejected": -2.259068012237549, "logps/chosen": -123.11387634277344, "logps/rejected": -954.9378662109375, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -0.7440562844276428, "rewards/margins": 8.406152725219727, "rewards/rejected": -9.150208473205566, "step": 67540 }, { "epoch": 0.81, "learning_rate": 5.374961154276795e-07, "logits/chosen": -2.9120161533355713, "logits/rejected": -2.3066189289093018, "logps/chosen": -117.10208892822266, "logps/rejected": -865.8941650390625, "loss": 0.1044, "rewards/accuracies": 1.0, "rewards/chosen": -0.7016667723655701, "rewards/margins": 7.564755439758301, "rewards/rejected": -8.2664213180542, "step": 67550 }, { "epoch": 0.81, "learning_rate": 5.368491506556684e-07, "logits/chosen": -2.909226655960083, "logits/rejected": -2.294257640838623, "logps/chosen": -98.36761474609375, "logps/rejected": -969.9698486328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5274203419685364, "rewards/margins": 8.7819242477417, "rewards/rejected": -9.309345245361328, "step": 67560 }, { "epoch": 0.81, "learning_rate": 5.362025286443428e-07, "logits/chosen": -2.8868632316589355, "logits/rejected": -2.139352560043335, "logps/chosen": -132.87387084960938, "logps/rejected": -1077.52294921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7541518211364746, "rewards/margins": 9.603750228881836, "rewards/rejected": -10.357900619506836, "step": 67570 }, { "epoch": 0.81, "learning_rate": 5.355562495066019e-07, "logits/chosen": -2.876364231109619, "logits/rejected": -2.509981155395508, "logps/chosen": -92.18048858642578, "logps/rejected": -907.0157470703125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.48831820487976074, "rewards/margins": 8.195234298706055, "rewards/rejected": -8.683552742004395, "step": 67580 }, { "epoch": 0.81, "learning_rate": 5.349103133552838e-07, "logits/chosen": -2.896348476409912, "logits/rejected": -2.142059803009033, "logps/chosen": -128.57412719726562, "logps/rejected": -1107.9090576171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7255648970603943, "rewards/margins": 9.941122055053711, "rewards/rejected": -10.666686058044434, "step": 67590 }, { "epoch": 0.81, "learning_rate": 5.342647203031675e-07, "logits/chosen": -2.8886563777923584, "logits/rejected": -2.3611900806427, "logps/chosen": -100.40699768066406, "logps/rejected": -944.0654296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4923189580440521, "rewards/margins": 8.559106826782227, "rewards/rejected": -9.05142593383789, "step": 67600 }, { "epoch": 0.81, "learning_rate": 5.336194704629713e-07, "logits/chosen": -2.875924825668335, "logits/rejected": -2.2254929542541504, "logps/chosen": -117.18997955322266, "logps/rejected": -916.2984619140625, "loss": 0.1024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6551147699356079, "rewards/margins": 8.112703323364258, "rewards/rejected": -8.767817497253418, "step": 67610 }, { "epoch": 0.81, "learning_rate": 5.329745639473538e-07, "logits/chosen": -2.8985238075256348, "logits/rejected": -2.34879732131958, "logps/chosen": -106.8274917602539, "logps/rejected": -1032.21533203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5852750539779663, "rewards/margins": 9.3480806350708, "rewards/rejected": -9.933355331420898, "step": 67620 }, { "epoch": 0.81, "learning_rate": 5.323300008689142e-07, "logits/chosen": -2.8572566509246826, "logits/rejected": -2.316570520401001, "logps/chosen": -128.34054565429688, "logps/rejected": -929.7146606445312, "loss": 0.1586, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8259538412094116, "rewards/margins": 8.073564529418945, "rewards/rejected": -8.899518013000488, "step": 67630 }, { "epoch": 0.81, "learning_rate": 5.316857813401918e-07, "logits/chosen": -2.886375904083252, "logits/rejected": -2.6186344623565674, "logps/chosen": -99.11570739746094, "logps/rejected": -842.21630859375, "loss": 0.3042, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.612218976020813, "rewards/margins": 7.430454254150391, "rewards/rejected": -8.042673110961914, "step": 67640 }, { "epoch": 0.81, "learning_rate": 5.310419054736649e-07, "logits/chosen": -2.8689236640930176, "logits/rejected": -2.3024234771728516, "logps/chosen": -132.91726684570312, "logps/rejected": -944.9844970703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8122333288192749, "rewards/margins": 8.254185676574707, "rewards/rejected": -9.066417694091797, "step": 67650 }, { "epoch": 0.81, "learning_rate": 5.30398373381753e-07, "logits/chosen": -2.8764758110046387, "logits/rejected": -2.28175687789917, "logps/chosen": -112.94451904296875, "logps/rejected": -992.0150146484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6769723892211914, "rewards/margins": 8.853289604187012, "rewards/rejected": -9.530261993408203, "step": 67660 }, { "epoch": 0.81, "learning_rate": 5.297551851768154e-07, "logits/chosen": -2.907219648361206, "logits/rejected": -2.296104669570923, "logps/chosen": -99.27943420410156, "logps/rejected": -928.3341674804688, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.5614396333694458, "rewards/margins": 8.337800025939941, "rewards/rejected": -8.899238586425781, "step": 67670 }, { "epoch": 0.81, "learning_rate": 5.291123409711496e-07, "logits/chosen": -2.8702683448791504, "logits/rejected": -2.1770434379577637, "logps/chosen": -131.7244415283203, "logps/rejected": -1055.0924072265625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.78889000415802, "rewards/margins": 9.348596572875977, "rewards/rejected": -10.137487411499023, "step": 67680 }, { "epoch": 0.81, "learning_rate": 5.28469840876995e-07, "logits/chosen": -2.8774356842041016, "logits/rejected": -2.385822296142578, "logps/chosen": -95.11610412597656, "logps/rejected": -987.9989013671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5316857099533081, "rewards/margins": 8.96782112121582, "rewards/rejected": -9.499506950378418, "step": 67690 }, { "epoch": 0.81, "learning_rate": 5.278276850065308e-07, "logits/chosen": -2.8955750465393066, "logits/rejected": -2.1755576133728027, "logps/chosen": -125.6741714477539, "logps/rejected": -989.6519775390625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7072097659111023, "rewards/margins": 8.803521156311035, "rewards/rejected": -9.51073169708252, "step": 67700 }, { "epoch": 0.81, "learning_rate": 5.271858734718747e-07, "logits/chosen": -2.887211799621582, "logits/rejected": -2.590496301651001, "logps/chosen": -71.82997131347656, "logps/rejected": -812.7470703125, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.34684884548187256, "rewards/margins": 7.425042629241943, "rewards/rejected": -7.771890163421631, "step": 67710 }, { "epoch": 0.81, "learning_rate": 5.265444063850869e-07, "logits/chosen": -2.8429603576660156, "logits/rejected": -2.295569896697998, "logps/chosen": -105.1794204711914, "logps/rejected": -871.5399169921875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.6044819355010986, "rewards/margins": 7.735347747802734, "rewards/rejected": -8.33983039855957, "step": 67720 }, { "epoch": 0.81, "learning_rate": 5.25903283858164e-07, "logits/chosen": -2.898108959197998, "logits/rejected": -2.512338161468506, "logps/chosen": -104.12862396240234, "logps/rejected": -946.4090576171875, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -0.5985453724861145, "rewards/margins": 8.473013877868652, "rewards/rejected": -9.071558952331543, "step": 67730 }, { "epoch": 0.81, "learning_rate": 5.25262506003045e-07, "logits/chosen": -2.86120867729187, "logits/rejected": -2.4072842597961426, "logps/chosen": -86.17317962646484, "logps/rejected": -970.5949096679688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.43619996309280396, "rewards/margins": 8.878141403198242, "rewards/rejected": -9.31434154510498, "step": 67740 }, { "epoch": 0.81, "learning_rate": 5.246220729316079e-07, "logits/chosen": -2.881316661834717, "logits/rejected": -2.411003589630127, "logps/chosen": -93.98117065429688, "logps/rejected": -829.6876220703125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.458992063999176, "rewards/margins": 7.466885566711426, "rewards/rejected": -7.925878047943115, "step": 67750 }, { "epoch": 0.81, "learning_rate": 5.239819847556707e-07, "logits/chosen": -2.8842146396636963, "logits/rejected": -2.184662342071533, "logps/chosen": -125.98719787597656, "logps/rejected": -947.0647583007812, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -0.7788295745849609, "rewards/margins": 8.275285720825195, "rewards/rejected": -9.05411434173584, "step": 67760 }, { "epoch": 0.81, "learning_rate": 5.233422415869904e-07, "logits/chosen": -2.856337785720825, "logits/rejected": -2.505568742752075, "logps/chosen": -89.57438659667969, "logps/rejected": -794.66796875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.47346821427345276, "rewards/margins": 7.090601444244385, "rewards/rejected": -7.564068794250488, "step": 67770 }, { "epoch": 0.81, "learning_rate": 5.227028435372652e-07, "logits/chosen": -2.9096457958221436, "logits/rejected": -2.247274398803711, "logps/chosen": -113.7041015625, "logps/rejected": -895.3485107421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5897298455238342, "rewards/margins": 7.973071098327637, "rewards/rejected": -8.562801361083984, "step": 67780 }, { "epoch": 0.81, "learning_rate": 5.220637907181322e-07, "logits/chosen": -2.8946633338928223, "logits/rejected": -2.1339287757873535, "logps/chosen": -140.86862182617188, "logps/rejected": -994.7551879882812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8655516505241394, "rewards/margins": 8.674688339233398, "rewards/rejected": -9.540240287780762, "step": 67790 }, { "epoch": 0.81, "learning_rate": 5.214250832411672e-07, "logits/chosen": -2.8434653282165527, "logits/rejected": -2.177072525024414, "logps/chosen": -134.0455322265625, "logps/rejected": -973.23828125, "loss": 0.0913, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8329867124557495, "rewards/margins": 8.504190444946289, "rewards/rejected": -9.337177276611328, "step": 67800 }, { "epoch": 0.81, "learning_rate": 5.207867212178874e-07, "logits/chosen": -2.9475693702697754, "logits/rejected": -2.6130311489105225, "logps/chosen": -80.32762145996094, "logps/rejected": -886.6356201171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.39125871658325195, "rewards/margins": 8.10383415222168, "rewards/rejected": -8.495092391967773, "step": 67810 }, { "epoch": 0.81, "learning_rate": 5.201487047597492e-07, "logits/chosen": -2.889573097229004, "logits/rejected": -2.274946928024292, "logps/chosen": -137.51806640625, "logps/rejected": -969.4306640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7562812566757202, "rewards/margins": 8.518392562866211, "rewards/rejected": -9.274673461914062, "step": 67820 }, { "epoch": 0.81, "learning_rate": 5.195110339781484e-07, "logits/chosen": -2.859854221343994, "logits/rejected": -2.366177558898926, "logps/chosen": -103.44215393066406, "logps/rejected": -997.5984497070312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5942192077636719, "rewards/margins": 8.976906776428223, "rewards/rejected": -9.571125984191895, "step": 67830 }, { "epoch": 0.81, "learning_rate": 5.188737089844204e-07, "logits/chosen": -2.8708484172821045, "logits/rejected": -2.4637112617492676, "logps/chosen": -93.4544448852539, "logps/rejected": -860.9202880859375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.4676036834716797, "rewards/margins": 7.747183322906494, "rewards/rejected": -8.214787483215332, "step": 67840 }, { "epoch": 0.81, "learning_rate": 5.18236729889841e-07, "logits/chosen": -2.8817555904388428, "logits/rejected": -2.2878308296203613, "logps/chosen": -155.999267578125, "logps/rejected": -886.8065185546875, "loss": 0.2483, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0902358293533325, "rewards/margins": 7.394165992736816, "rewards/rejected": -8.484402656555176, "step": 67850 }, { "epoch": 0.81, "learning_rate": 5.176000968056238e-07, "logits/chosen": -2.882615327835083, "logits/rejected": -2.335540294647217, "logps/chosen": -132.70909118652344, "logps/rejected": -940.7359619140625, "loss": 0.149, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8701288104057312, "rewards/margins": 8.153310775756836, "rewards/rejected": -9.02344036102295, "step": 67860 }, { "epoch": 0.81, "learning_rate": 5.169638098429239e-07, "logits/chosen": -2.919912099838257, "logits/rejected": -2.3711771965026855, "logps/chosen": -93.40184020996094, "logps/rejected": -912.98046875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.47108983993530273, "rewards/margins": 8.269978523254395, "rewards/rejected": -8.741067886352539, "step": 67870 }, { "epoch": 0.81, "learning_rate": 5.163278691128351e-07, "logits/chosen": -2.892451763153076, "logits/rejected": -2.3112075328826904, "logps/chosen": -112.72933197021484, "logps/rejected": -838.0237426757812, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.6415342092514038, "rewards/margins": 7.351862907409668, "rewards/rejected": -7.993395805358887, "step": 67880 }, { "epoch": 0.81, "learning_rate": 5.156922747263912e-07, "logits/chosen": -2.8740391731262207, "logits/rejected": -2.2382612228393555, "logps/chosen": -117.41446685791016, "logps/rejected": -1049.093017578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7172660827636719, "rewards/margins": 9.36679458618164, "rewards/rejected": -10.084059715270996, "step": 67890 }, { "epoch": 0.81, "learning_rate": 5.150570267945646e-07, "logits/chosen": -2.8773751258850098, "logits/rejected": -2.2699801921844482, "logps/chosen": -116.9825210571289, "logps/rejected": -1048.201416015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6445876955986023, "rewards/margins": 9.424585342407227, "rewards/rejected": -10.069170951843262, "step": 67900 }, { "epoch": 0.81, "learning_rate": 5.144221254282683e-07, "logits/chosen": -2.882521629333496, "logits/rejected": -2.286998748779297, "logps/chosen": -128.12905883789062, "logps/rejected": -916.5498046875, "loss": 0.1194, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8058339953422546, "rewards/margins": 7.9665045738220215, "rewards/rejected": -8.7723388671875, "step": 67910 }, { "epoch": 0.81, "learning_rate": 5.13787570738355e-07, "logits/chosen": -2.855569362640381, "logits/rejected": -2.343127727508545, "logps/chosen": -110.8873519897461, "logps/rejected": -885.11474609375, "loss": 0.0996, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6487646102905273, "rewards/margins": 7.81887674331665, "rewards/rejected": -8.46764087677002, "step": 67920 }, { "epoch": 0.81, "learning_rate": 5.131533628356144e-07, "logits/chosen": -2.8698651790618896, "logits/rejected": -2.169013500213623, "logps/chosen": -115.88162994384766, "logps/rejected": -899.7605590820312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6760368347167969, "rewards/margins": 7.923300743103027, "rewards/rejected": -8.59933853149414, "step": 67930 }, { "epoch": 0.81, "learning_rate": 5.125195018307788e-07, "logits/chosen": -2.9090349674224854, "logits/rejected": -2.1849772930145264, "logps/chosen": -125.93208312988281, "logps/rejected": -1027.411865234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7267807722091675, "rewards/margins": 9.141258239746094, "rewards/rejected": -9.868038177490234, "step": 67940 }, { "epoch": 0.81, "learning_rate": 5.118859878345178e-07, "logits/chosen": -2.9157536029815674, "logits/rejected": -2.446471691131592, "logps/chosen": -113.08015441894531, "logps/rejected": -875.4798583984375, "loss": 0.0966, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.696993350982666, "rewards/margins": 7.673858642578125, "rewards/rejected": -8.370851516723633, "step": 67950 }, { "epoch": 0.81, "learning_rate": 5.112528209574419e-07, "logits/chosen": -2.864990472793579, "logits/rejected": -2.4247257709503174, "logps/chosen": -90.06507873535156, "logps/rejected": -847.07568359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.450974702835083, "rewards/margins": 7.638466835021973, "rewards/rejected": -8.089441299438477, "step": 67960 }, { "epoch": 0.81, "learning_rate": 5.106200013100998e-07, "logits/chosen": -2.864392042160034, "logits/rejected": -2.304548740386963, "logps/chosen": -108.18183898925781, "logps/rejected": -814.9916381835938, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5887112617492676, "rewards/margins": 7.180580139160156, "rewards/rejected": -7.769291877746582, "step": 67970 }, { "epoch": 0.81, "learning_rate": 5.099875290029801e-07, "logits/chosen": -2.860614061355591, "logits/rejected": -2.2151412963867188, "logps/chosen": -129.06094360351562, "logps/rejected": -1019.9609375, "loss": 0.1982, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.846732497215271, "rewards/margins": 8.952085494995117, "rewards/rejected": -9.79881763458252, "step": 67980 }, { "epoch": 0.81, "learning_rate": 5.093554041465118e-07, "logits/chosen": -2.8482720851898193, "logits/rejected": -2.409727096557617, "logps/chosen": -96.639892578125, "logps/rejected": -943.7742309570312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5552388429641724, "rewards/margins": 8.488380432128906, "rewards/rejected": -9.043620109558105, "step": 67990 }, { "epoch": 0.81, "learning_rate": 5.087236268510603e-07, "logits/chosen": -2.861534595489502, "logits/rejected": -2.131094455718994, "logps/chosen": -109.39393615722656, "logps/rejected": -1051.6959228515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6034995913505554, "rewards/margins": 9.50354290008545, "rewards/rejected": -10.10704517364502, "step": 68000 }, { "epoch": 0.81, "learning_rate": 5.080921972269332e-07, "logits/chosen": -2.8589284420013428, "logits/rejected": -2.1773173809051514, "logps/chosen": -140.60079956054688, "logps/rejected": -1040.5975341796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8755501508712769, "rewards/margins": 9.11299991607666, "rewards/rejected": -9.988550186157227, "step": 68010 }, { "epoch": 0.81, "learning_rate": 5.074611153843759e-07, "logits/chosen": -2.8328747749328613, "logits/rejected": -2.2674214839935303, "logps/chosen": -117.70484924316406, "logps/rejected": -938.3971557617188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.685742199420929, "rewards/margins": 8.30937671661377, "rewards/rejected": -8.995119094848633, "step": 68020 }, { "epoch": 0.81, "learning_rate": 5.068303814335737e-07, "logits/chosen": -2.8848280906677246, "logits/rejected": -2.5696799755096436, "logps/chosen": -66.92674255371094, "logps/rejected": -780.6922607421875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.3027743399143219, "rewards/margins": 7.137750148773193, "rewards/rejected": -7.440524101257324, "step": 68030 }, { "epoch": 0.81, "learning_rate": 5.061999954846514e-07, "logits/chosen": -2.8750548362731934, "logits/rejected": -2.1923556327819824, "logps/chosen": -106.88737487792969, "logps/rejected": -941.8824462890625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5553435683250427, "rewards/margins": 8.482498168945312, "rewards/rejected": -9.037841796875, "step": 68040 }, { "epoch": 0.81, "learning_rate": 5.05569957647673e-07, "logits/chosen": -2.8661699295043945, "logits/rejected": -2.3593735694885254, "logps/chosen": -155.65481567382812, "logps/rejected": -930.4002685546875, "loss": 0.1405, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0439131259918213, "rewards/margins": 7.846099853515625, "rewards/rejected": -8.890012741088867, "step": 68050 }, { "epoch": 0.81, "learning_rate": 5.049402680326399e-07, "logits/chosen": -2.8403327465057373, "logits/rejected": -2.3355331420898438, "logps/chosen": -104.93815612792969, "logps/rejected": -812.0413818359375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6018290519714355, "rewards/margins": 7.137704372406006, "rewards/rejected": -7.739533424377441, "step": 68060 }, { "epoch": 0.81, "learning_rate": 5.04310926749495e-07, "logits/chosen": -2.897580862045288, "logits/rejected": -2.073272228240967, "logps/chosen": -107.34153747558594, "logps/rejected": -938.1194458007812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5658511519432068, "rewards/margins": 8.414907455444336, "rewards/rejected": -8.980757713317871, "step": 68070 }, { "epoch": 0.81, "learning_rate": 5.036819339081197e-07, "logits/chosen": -2.857822895050049, "logits/rejected": -2.3466079235076904, "logps/chosen": -103.12385559082031, "logps/rejected": -920.0770263671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.60752934217453, "rewards/margins": 8.20354175567627, "rewards/rejected": -8.811070442199707, "step": 68080 }, { "epoch": 0.82, "learning_rate": 5.03053289618334e-07, "logits/chosen": -2.910560131072998, "logits/rejected": -2.4267916679382324, "logps/chosen": -81.87290954589844, "logps/rejected": -960.2626953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3849678039550781, "rewards/margins": 8.836338996887207, "rewards/rejected": -9.221305847167969, "step": 68090 }, { "epoch": 0.82, "learning_rate": 5.024249939898976e-07, "logits/chosen": -2.864114284515381, "logits/rejected": -2.360334634780884, "logps/chosen": -89.34971618652344, "logps/rejected": -886.5706787109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.45572566986083984, "rewards/margins": 8.043893814086914, "rewards/rejected": -8.49962043762207, "step": 68100 }, { "epoch": 0.82, "learning_rate": 5.017970471325093e-07, "logits/chosen": -2.9040791988372803, "logits/rejected": -2.4895782470703125, "logps/chosen": -88.33848571777344, "logps/rejected": -871.9613037109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4655906558036804, "rewards/margins": 7.86789083480835, "rewards/rejected": -8.33348274230957, "step": 68110 }, { "epoch": 0.82, "learning_rate": 5.011694491558075e-07, "logits/chosen": -2.9055330753326416, "logits/rejected": -2.4646544456481934, "logps/chosen": -92.56236267089844, "logps/rejected": -866.7552490234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.44596657156944275, "rewards/margins": 7.83481502532959, "rewards/rejected": -8.280781745910645, "step": 68120 }, { "epoch": 0.82, "learning_rate": 5.005422001693672e-07, "logits/chosen": -2.883676052093506, "logits/rejected": -2.4356281757354736, "logps/chosen": -83.88630676269531, "logps/rejected": -851.0142822265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.39762741327285767, "rewards/margins": 7.733445167541504, "rewards/rejected": -8.131072044372559, "step": 68130 }, { "epoch": 0.82, "learning_rate": 4.999153002827059e-07, "logits/chosen": -2.882690668106079, "logits/rejected": -2.3943591117858887, "logps/chosen": -95.22953796386719, "logps/rejected": -867.3898315429688, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.5087282657623291, "rewards/margins": 7.773809909820557, "rewards/rejected": -8.282538414001465, "step": 68140 }, { "epoch": 0.82, "learning_rate": 4.992887496052781e-07, "logits/chosen": -2.9166276454925537, "logits/rejected": -2.6075761318206787, "logps/chosen": -91.28363037109375, "logps/rejected": -713.7115478515625, "loss": 0.1176, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5361191630363464, "rewards/margins": 6.232504844665527, "rewards/rejected": -6.768623352050781, "step": 68150 }, { "epoch": 0.82, "learning_rate": 4.98662548246478e-07, "logits/chosen": -2.8548922538757324, "logits/rejected": -2.336759090423584, "logps/chosen": -99.06156921386719, "logps/rejected": -953.2215576171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5386639833450317, "rewards/margins": 8.6101655960083, "rewards/rejected": -9.14883041381836, "step": 68160 }, { "epoch": 0.82, "learning_rate": 4.980366963156394e-07, "logits/chosen": -2.885765552520752, "logits/rejected": -2.392282485961914, "logps/chosen": -91.84809875488281, "logps/rejected": -881.2813720703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4858582019805908, "rewards/margins": 7.948629856109619, "rewards/rejected": -8.434488296508789, "step": 68170 }, { "epoch": 0.82, "learning_rate": 4.974111939220325e-07, "logits/chosen": -2.9161198139190674, "logits/rejected": -2.3675427436828613, "logps/chosen": -122.6084213256836, "logps/rejected": -953.2159423828125, "loss": 0.1756, "rewards/accuracies": 1.0, "rewards/chosen": -0.6718671917915344, "rewards/margins": 8.457319259643555, "rewards/rejected": -9.12918758392334, "step": 68180 }, { "epoch": 0.82, "learning_rate": 4.96786041174869e-07, "logits/chosen": -2.87923002243042, "logits/rejected": -2.32559871673584, "logps/chosen": -96.42852020263672, "logps/rejected": -951.0685424804688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5126833319664001, "rewards/margins": 8.635519027709961, "rewards/rejected": -9.148202896118164, "step": 68190 }, { "epoch": 0.82, "learning_rate": 4.961612381832995e-07, "logits/chosen": -2.9230523109436035, "logits/rejected": -2.5312185287475586, "logps/chosen": -92.95403289794922, "logps/rejected": -886.7277221679688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.512126088142395, "rewards/margins": 7.963139533996582, "rewards/rejected": -8.475265502929688, "step": 68200 }, { "epoch": 0.82, "learning_rate": 4.955367850564119e-07, "logits/chosen": -2.832191228866577, "logits/rejected": -2.3518924713134766, "logps/chosen": -113.53836822509766, "logps/rejected": -970.3720703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6460922360420227, "rewards/margins": 8.659769058227539, "rewards/rejected": -9.30586051940918, "step": 68210 }, { "epoch": 0.82, "learning_rate": 4.949126819032349e-07, "logits/chosen": -2.842742681503296, "logits/rejected": -2.1887266635894775, "logps/chosen": -111.3093032836914, "logps/rejected": -1062.1875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5923031568527222, "rewards/margins": 9.64389705657959, "rewards/rejected": -10.236200332641602, "step": 68220 }, { "epoch": 0.82, "learning_rate": 4.942889288327343e-07, "logits/chosen": -2.858060121536255, "logits/rejected": -2.291517734527588, "logps/chosen": -132.3367156982422, "logps/rejected": -1004.6998901367188, "loss": 0.139, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8027631640434265, "rewards/margins": 8.844134330749512, "rewards/rejected": -9.64689826965332, "step": 68230 }, { "epoch": 0.82, "learning_rate": 4.936655259538172e-07, "logits/chosen": -2.8500308990478516, "logits/rejected": -2.320128917694092, "logps/chosen": -119.91839599609375, "logps/rejected": -917.0787353515625, "loss": 0.0942, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7276912331581116, "rewards/margins": 8.058685302734375, "rewards/rejected": -8.786376953125, "step": 68240 }, { "epoch": 0.82, "learning_rate": 4.930424733753261e-07, "logits/chosen": -2.9250731468200684, "logits/rejected": -2.362196683883667, "logps/chosen": -114.21578216552734, "logps/rejected": -974.5115356445312, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.6026408076286316, "rewards/margins": 8.74471664428711, "rewards/rejected": -9.347356796264648, "step": 68250 }, { "epoch": 0.82, "learning_rate": 4.92419771206045e-07, "logits/chosen": -2.8705525398254395, "logits/rejected": -2.692833423614502, "logps/chosen": -88.55900573730469, "logps/rejected": -709.4833984375, "loss": 0.2682, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.508662223815918, "rewards/margins": 6.22367000579834, "rewards/rejected": -6.732332706451416, "step": 68260 }, { "epoch": 0.82, "learning_rate": 4.91797419554696e-07, "logits/chosen": -2.8804497718811035, "logits/rejected": -2.3339104652404785, "logps/chosen": -115.21815490722656, "logps/rejected": -983.6954345703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6470705270767212, "rewards/margins": 8.800870895385742, "rewards/rejected": -9.447940826416016, "step": 68270 }, { "epoch": 0.82, "learning_rate": 4.9117541852994e-07, "logits/chosen": -2.841047763824463, "logits/rejected": -2.4520535469055176, "logps/chosen": -84.44300842285156, "logps/rejected": -867.2394409179688, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.4071555733680725, "rewards/margins": 7.885453224182129, "rewards/rejected": -8.292608261108398, "step": 68280 }, { "epoch": 0.82, "learning_rate": 4.905537682403769e-07, "logits/chosen": -2.8701727390289307, "logits/rejected": -2.0528838634490967, "logps/chosen": -149.68714904785156, "logps/rejected": -1078.9803466796875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9351814389228821, "rewards/margins": 9.440313339233398, "rewards/rejected": -10.375494956970215, "step": 68290 }, { "epoch": 0.82, "learning_rate": 4.899324687945444e-07, "logits/chosen": -2.8712267875671387, "logits/rejected": -2.1502511501312256, "logps/chosen": -104.95719146728516, "logps/rejected": -910.7698974609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.595971941947937, "rewards/margins": 8.121882438659668, "rewards/rejected": -8.717855453491211, "step": 68300 }, { "epoch": 0.82, "learning_rate": 4.893115203009214e-07, "logits/chosen": -2.8867716789245605, "logits/rejected": -2.297179698944092, "logps/chosen": -139.3264923095703, "logps/rejected": -949.7255859375, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.8897034525871277, "rewards/margins": 8.212322235107422, "rewards/rejected": -9.102025032043457, "step": 68310 }, { "epoch": 0.82, "learning_rate": 4.886909228679213e-07, "logits/chosen": -2.866567611694336, "logits/rejected": -2.199409008026123, "logps/chosen": -128.24041748046875, "logps/rejected": -1085.6636962890625, "loss": 0.1114, "rewards/accuracies": 1.0, "rewards/chosen": -0.7836136817932129, "rewards/margins": 9.664321899414062, "rewards/rejected": -10.447935104370117, "step": 68320 }, { "epoch": 0.82, "learning_rate": 4.880706766039e-07, "logits/chosen": -2.8562417030334473, "logits/rejected": -2.462635040283203, "logps/chosen": -101.10328674316406, "logps/rejected": -787.8456420898438, "loss": 0.1984, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6241141557693481, "rewards/margins": 6.876162528991699, "rewards/rejected": -7.500277042388916, "step": 68330 }, { "epoch": 0.82, "learning_rate": 4.874507816171509e-07, "logits/chosen": -2.8553285598754883, "logits/rejected": -2.3531947135925293, "logps/chosen": -100.57902526855469, "logps/rejected": -964.7086181640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5262769460678101, "rewards/margins": 8.714433670043945, "rewards/rejected": -9.240711212158203, "step": 68340 }, { "epoch": 0.82, "learning_rate": 4.868312380159057e-07, "logits/chosen": -2.9518024921417236, "logits/rejected": -2.580838441848755, "logps/chosen": -92.97457122802734, "logps/rejected": -924.7525634765625, "loss": 0.0671, "rewards/accuracies": 1.0, "rewards/chosen": -0.48643508553504944, "rewards/margins": 8.36398696899414, "rewards/rejected": -8.850420951843262, "step": 68350 }, { "epoch": 0.82, "learning_rate": 4.862120459083347e-07, "logits/chosen": -2.8647067546844482, "logits/rejected": -2.5737032890319824, "logps/chosen": -74.15194702148438, "logps/rejected": -780.7006225585938, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3478546142578125, "rewards/margins": 7.089419364929199, "rewards/rejected": -7.437273979187012, "step": 68360 }, { "epoch": 0.82, "learning_rate": 4.855932054025484e-07, "logits/chosen": -2.8758137226104736, "logits/rejected": -2.2390360832214355, "logps/chosen": -112.65826416015625, "logps/rejected": -1026.0352783203125, "loss": 0.1516, "rewards/accuracies": 1.0, "rewards/chosen": -0.630057692527771, "rewards/margins": 9.220587730407715, "rewards/rejected": -9.85064697265625, "step": 68370 }, { "epoch": 0.82, "learning_rate": 4.84974716606593e-07, "logits/chosen": -2.872274875640869, "logits/rejected": -2.0835487842559814, "logps/chosen": -144.71975708007812, "logps/rejected": -1092.0047607421875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.8929265141487122, "rewards/margins": 9.62159538269043, "rewards/rejected": -10.514521598815918, "step": 68380 }, { "epoch": 0.82, "learning_rate": 4.843565796284555e-07, "logits/chosen": -2.8861541748046875, "logits/rejected": -2.1981165409088135, "logps/chosen": -123.34625244140625, "logps/rejected": -948.8330078125, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -0.725868284702301, "rewards/margins": 8.373405456542969, "rewards/rejected": -9.099274635314941, "step": 68390 }, { "epoch": 0.82, "learning_rate": 4.837387945760608e-07, "logits/chosen": -2.8546199798583984, "logits/rejected": -2.033705234527588, "logps/chosen": -135.79135131835938, "logps/rejected": -1109.5592041015625, "loss": 0.1005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7927986979484558, "rewards/margins": 9.9038667678833, "rewards/rejected": -10.696664810180664, "step": 68400 }, { "epoch": 0.82, "learning_rate": 4.831213615572728e-07, "logits/chosen": -2.8942389488220215, "logits/rejected": -2.4472644329071045, "logps/chosen": -81.93687438964844, "logps/rejected": -857.62353515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.39866989850997925, "rewards/margins": 7.7785749435424805, "rewards/rejected": -8.177245140075684, "step": 68410 }, { "epoch": 0.82, "learning_rate": 4.825042806798935e-07, "logits/chosen": -2.8715739250183105, "logits/rejected": -2.2103335857391357, "logps/chosen": -124.18949890136719, "logps/rejected": -996.7828979492188, "loss": 0.0252, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.744672417640686, "rewards/margins": 8.810674667358398, "rewards/rejected": -9.555346488952637, "step": 68420 }, { "epoch": 0.82, "learning_rate": 4.818875520516633e-07, "logits/chosen": -2.8982226848602295, "logits/rejected": -2.5286524295806885, "logps/chosen": -68.65217590332031, "logps/rejected": -838.7947998046875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.31249475479125977, "rewards/margins": 7.699746608734131, "rewards/rejected": -8.01224136352539, "step": 68430 }, { "epoch": 0.82, "learning_rate": 4.812711757802621e-07, "logits/chosen": -2.871471881866455, "logits/rejected": -2.231543779373169, "logps/chosen": -106.47181701660156, "logps/rejected": -881.43603515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.47067075967788696, "rewards/margins": 7.944018363952637, "rewards/rejected": -8.414689064025879, "step": 68440 }, { "epoch": 0.82, "learning_rate": 4.806551519733063e-07, "logits/chosen": -2.8461079597473145, "logits/rejected": -2.434155225753784, "logps/chosen": -98.6268081665039, "logps/rejected": -831.8570556640625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5538020133972168, "rewards/margins": 7.3898210525512695, "rewards/rejected": -7.9436235427856445, "step": 68450 }, { "epoch": 0.82, "learning_rate": 4.800394807383524e-07, "logits/chosen": -2.930732011795044, "logits/rejected": -2.296902656555176, "logps/chosen": -95.90718078613281, "logps/rejected": -918.9755859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4788444936275482, "rewards/margins": 8.339334487915039, "rewards/rejected": -8.8181791305542, "step": 68460 }, { "epoch": 0.82, "learning_rate": 4.794241621828952e-07, "logits/chosen": -2.871896266937256, "logits/rejected": -2.4202628135681152, "logps/chosen": -82.76316833496094, "logps/rejected": -839.8663940429688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4160277247428894, "rewards/margins": 7.59628963470459, "rewards/rejected": -8.012316703796387, "step": 68470 }, { "epoch": 0.82, "learning_rate": 4.788091964143671e-07, "logits/chosen": -2.913689374923706, "logits/rejected": -2.4618027210235596, "logps/chosen": -92.86013793945312, "logps/rejected": -843.2935791015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5191625952720642, "rewards/margins": 7.535223484039307, "rewards/rejected": -8.054387092590332, "step": 68480 }, { "epoch": 0.82, "learning_rate": 4.781945835401398e-07, "logits/chosen": -2.921016216278076, "logits/rejected": -2.221054792404175, "logps/chosen": -103.18574523925781, "logps/rejected": -947.6282958984375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5908358693122864, "rewards/margins": 8.49223518371582, "rewards/rejected": -9.083070755004883, "step": 68490 }, { "epoch": 0.82, "learning_rate": 4.775803236675228e-07, "logits/chosen": -2.8822784423828125, "logits/rejected": -2.4567952156066895, "logps/chosen": -113.64583587646484, "logps/rejected": -900.3191528320312, "loss": 0.1097, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7093351483345032, "rewards/margins": 7.918038368225098, "rewards/rejected": -8.627372741699219, "step": 68500 }, { "epoch": 0.82, "learning_rate": 4.76966416903765e-07, "logits/chosen": -2.874218463897705, "logits/rejected": -2.253453493118286, "logps/chosen": -115.5770034790039, "logps/rejected": -976.4586181640625, "loss": 0.0795, "rewards/accuracies": 1.0, "rewards/chosen": -0.6443784832954407, "rewards/margins": 8.718952178955078, "rewards/rejected": -9.36332893371582, "step": 68510 }, { "epoch": 0.82, "learning_rate": 4.763528633560513e-07, "logits/chosen": -2.9036223888397217, "logits/rejected": -2.2902262210845947, "logps/chosen": -102.06021881103516, "logps/rejected": -882.9869995117188, "loss": 0.244, "rewards/accuracies": 1.0, "rewards/chosen": -0.5614474415779114, "rewards/margins": 7.866813659667969, "rewards/rejected": -8.428261756896973, "step": 68520 }, { "epoch": 0.82, "learning_rate": 4.7573966313150726e-07, "logits/chosen": -2.891833543777466, "logits/rejected": -2.27799916267395, "logps/chosen": -101.9530258178711, "logps/rejected": -940.7888793945312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5773211121559143, "rewards/margins": 8.441219329833984, "rewards/rejected": -9.01854133605957, "step": 68530 }, { "epoch": 0.82, "learning_rate": 4.751268163371958e-07, "logits/chosen": -2.836216688156128, "logits/rejected": -2.1484215259552, "logps/chosen": -129.58209228515625, "logps/rejected": -942.7843627929688, "loss": 0.103, "rewards/accuracies": 1.0, "rewards/chosen": -0.7565882205963135, "rewards/margins": 8.291059494018555, "rewards/rejected": -9.047648429870605, "step": 68540 }, { "epoch": 0.82, "learning_rate": 4.7451432308011803e-07, "logits/chosen": -2.876035213470459, "logits/rejected": -2.1939330101013184, "logps/chosen": -110.82332611083984, "logps/rejected": -985.5451049804688, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6301148533821106, "rewards/margins": 8.839303016662598, "rewards/rejected": -9.4694185256958, "step": 68550 }, { "epoch": 0.82, "learning_rate": 4.73902183467215e-07, "logits/chosen": -2.856844425201416, "logits/rejected": -2.0891404151916504, "logps/chosen": -134.79568481445312, "logps/rejected": -1128.5538330078125, "loss": 0.0826, "rewards/accuracies": 1.0, "rewards/chosen": -0.8510411381721497, "rewards/margins": 10.043855667114258, "rewards/rejected": -10.8948974609375, "step": 68560 }, { "epoch": 0.82, "learning_rate": 4.732903976053627e-07, "logits/chosen": -2.873075485229492, "logits/rejected": -2.358262062072754, "logps/chosen": -95.34092712402344, "logps/rejected": -874.99853515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.43297672271728516, "rewards/margins": 7.933126926422119, "rewards/rejected": -8.366104125976562, "step": 68570 }, { "epoch": 0.82, "learning_rate": 4.7267896560137833e-07, "logits/chosen": -2.909868001937866, "logits/rejected": -2.3649609088897705, "logps/chosen": -89.9798812866211, "logps/rejected": -902.05322265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.41351765394210815, "rewards/margins": 8.209500312805176, "rewards/rejected": -8.623019218444824, "step": 68580 }, { "epoch": 0.82, "learning_rate": 4.7206788756201576e-07, "logits/chosen": -2.872284173965454, "logits/rejected": -2.2196359634399414, "logps/chosen": -129.2374267578125, "logps/rejected": -1042.149658203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7249537706375122, "rewards/margins": 9.277783393859863, "rewards/rejected": -10.002737045288086, "step": 68590 }, { "epoch": 0.82, "learning_rate": 4.714571635939677e-07, "logits/chosen": -2.8933424949645996, "logits/rejected": -2.48197865486145, "logps/chosen": -90.49575805664062, "logps/rejected": -840.7009887695312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.47675004601478577, "rewards/margins": 7.553404331207275, "rewards/rejected": -8.030153274536133, "step": 68600 }, { "epoch": 0.82, "learning_rate": 4.708467938038647e-07, "logits/chosen": -2.901832103729248, "logits/rejected": -2.27235746383667, "logps/chosen": -114.73182678222656, "logps/rejected": -971.7391357421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6417277455329895, "rewards/margins": 8.66775131225586, "rewards/rejected": -9.309478759765625, "step": 68610 }, { "epoch": 0.82, "learning_rate": 4.7023677829827584e-07, "logits/chosen": -2.907865047454834, "logits/rejected": -2.4279561042785645, "logps/chosen": -106.77131652832031, "logps/rejected": -953.0499267578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5884431004524231, "rewards/margins": 8.5559663772583, "rewards/rejected": -9.144408226013184, "step": 68620 }, { "epoch": 0.82, "learning_rate": 4.696271171837088e-07, "logits/chosen": -2.8636045455932617, "logits/rejected": -2.364161491394043, "logps/chosen": -122.39383697509766, "logps/rejected": -958.4523315429688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7266180515289307, "rewards/margins": 8.463603019714355, "rewards/rejected": -9.190221786499023, "step": 68630 }, { "epoch": 0.82, "learning_rate": 4.690178105666074e-07, "logits/chosen": -2.8891711235046387, "logits/rejected": -2.5670619010925293, "logps/chosen": -77.02572631835938, "logps/rejected": -800.2281494140625, "loss": 0.0983, "rewards/accuracies": 1.0, "rewards/chosen": -0.3716237246990204, "rewards/margins": 7.264451026916504, "rewards/rejected": -7.636074066162109, "step": 68640 }, { "epoch": 0.82, "learning_rate": 4.684088585533553e-07, "logits/chosen": -2.898775339126587, "logits/rejected": -2.3014960289001465, "logps/chosen": -109.9569320678711, "logps/rejected": -1001.4736328125, "loss": 0.1326, "rewards/accuracies": 1.0, "rewards/chosen": -0.5760194063186646, "rewards/margins": 9.031682968139648, "rewards/rejected": -9.607702255249023, "step": 68650 }, { "epoch": 0.82, "learning_rate": 4.6780026125027414e-07, "logits/chosen": -2.9239516258239746, "logits/rejected": -2.527225971221924, "logps/chosen": -85.04508209228516, "logps/rejected": -937.7655029296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4045083522796631, "rewards/margins": 8.579290390014648, "rewards/rejected": -8.98379898071289, "step": 68660 }, { "epoch": 0.82, "learning_rate": 4.671920187636231e-07, "logits/chosen": -2.8977503776550293, "logits/rejected": -2.372032880783081, "logps/chosen": -90.08753967285156, "logps/rejected": -879.8460083007812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.4647853374481201, "rewards/margins": 7.938762664794922, "rewards/rejected": -8.403549194335938, "step": 68670 }, { "epoch": 0.82, "learning_rate": 4.665841311995997e-07, "logits/chosen": -2.8798956871032715, "logits/rejected": -2.232727527618408, "logps/chosen": -131.8668670654297, "logps/rejected": -984.9654541015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8240810632705688, "rewards/margins": 8.619203567504883, "rewards/rejected": -9.44328498840332, "step": 68680 }, { "epoch": 0.82, "learning_rate": 4.6597659866434026e-07, "logits/chosen": -2.83240008354187, "logits/rejected": -2.4259636402130127, "logps/chosen": -97.05280303955078, "logps/rejected": -870.2352294921875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5409247279167175, "rewards/margins": 7.785681247711182, "rewards/rejected": -8.326606750488281, "step": 68690 }, { "epoch": 0.82, "learning_rate": 4.6536942126391647e-07, "logits/chosen": -2.87113356590271, "logits/rejected": -2.265596389770508, "logps/chosen": -115.28253173828125, "logps/rejected": -888.2462158203125, "loss": 0.0977, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6894522309303284, "rewards/margins": 7.802818298339844, "rewards/rejected": -8.492269515991211, "step": 68700 }, { "epoch": 0.82, "learning_rate": 4.6476259910434085e-07, "logits/chosen": -2.86161208152771, "logits/rejected": -2.417825937271118, "logps/chosen": -93.35311126708984, "logps/rejected": -909.5582885742188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4891681671142578, "rewards/margins": 8.209637641906738, "rewards/rejected": -8.69880485534668, "step": 68710 }, { "epoch": 0.82, "learning_rate": 4.6415613229156303e-07, "logits/chosen": -2.8805105686187744, "logits/rejected": -2.204263210296631, "logps/chosen": -119.70987701416016, "logps/rejected": -1055.171142578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.628620445728302, "rewards/margins": 9.499852180480957, "rewards/rejected": -10.128473281860352, "step": 68720 }, { "epoch": 0.82, "learning_rate": 4.6355002093147e-07, "logits/chosen": -2.880211353302002, "logits/rejected": -2.331648588180542, "logps/chosen": -116.008056640625, "logps/rejected": -908.9371948242188, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.703170120716095, "rewards/margins": 7.989398956298828, "rewards/rejected": -8.6925687789917, "step": 68730 }, { "epoch": 0.82, "learning_rate": 4.6294426512988737e-07, "logits/chosen": -2.874101161956787, "logits/rejected": -2.452969789505005, "logps/chosen": -79.58833312988281, "logps/rejected": -786.5037231445312, "loss": 0.092, "rewards/accuracies": 1.0, "rewards/chosen": -0.38969871401786804, "rewards/margins": 7.104142189025879, "rewards/rejected": -7.49384069442749, "step": 68740 }, { "epoch": 0.82, "learning_rate": 4.6233886499257836e-07, "logits/chosen": -2.891324281692505, "logits/rejected": -2.089865207672119, "logps/chosen": -119.01435852050781, "logps/rejected": -1091.183837890625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6594623327255249, "rewards/margins": 9.832964897155762, "rewards/rejected": -10.492425918579102, "step": 68750 }, { "epoch": 0.82, "learning_rate": 4.6173382062524476e-07, "logits/chosen": -2.893718957901001, "logits/rejected": -2.5230774879455566, "logps/chosen": -96.02716827392578, "logps/rejected": -915.3463745117188, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.46922603249549866, "rewards/margins": 8.30152702331543, "rewards/rejected": -8.770753860473633, "step": 68760 }, { "epoch": 0.82, "learning_rate": 4.611291321335243e-07, "logits/chosen": -2.8930985927581787, "logits/rejected": -2.1984493732452393, "logps/chosen": -137.61294555664062, "logps/rejected": -963.2903442382812, "loss": 0.1257, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8661310076713562, "rewards/margins": 8.374795913696289, "rewards/rejected": -9.240926742553711, "step": 68770 }, { "epoch": 0.82, "learning_rate": 4.6052479962299473e-07, "logits/chosen": -2.8691248893737793, "logits/rejected": -2.371676206588745, "logps/chosen": -153.543212890625, "logps/rejected": -832.9794921875, "loss": 0.248, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0933940410614014, "rewards/margins": 6.8540520668029785, "rewards/rejected": -7.947447299957275, "step": 68780 }, { "epoch": 0.82, "learning_rate": 4.599208231991709e-07, "logits/chosen": -2.883744955062866, "logits/rejected": -2.310441017150879, "logps/chosen": -95.12217712402344, "logps/rejected": -966.2160034179688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5124692916870117, "rewards/margins": 8.768086433410645, "rewards/rejected": -9.28055477142334, "step": 68790 }, { "epoch": 0.82, "learning_rate": 4.59317202967505e-07, "logits/chosen": -2.9130654335021973, "logits/rejected": -2.3300604820251465, "logps/chosen": -113.32041931152344, "logps/rejected": -912.6502075195312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6511552333831787, "rewards/margins": 8.083765983581543, "rewards/rejected": -8.734922409057617, "step": 68800 }, { "epoch": 0.82, "learning_rate": 4.587139390333881e-07, "logits/chosen": -2.8913328647613525, "logits/rejected": -2.410003900527954, "logps/chosen": -92.20767974853516, "logps/rejected": -863.9964599609375, "loss": 0.1378, "rewards/accuracies": 1.0, "rewards/chosen": -0.4664239287376404, "rewards/margins": 7.779455661773682, "rewards/rejected": -8.245879173278809, "step": 68810 }, { "epoch": 0.82, "learning_rate": 4.5811103150214755e-07, "logits/chosen": -2.862948417663574, "logits/rejected": -2.034087657928467, "logps/chosen": -118.40472412109375, "logps/rejected": -1084.0836181640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6202937960624695, "rewards/margins": 9.828887939453125, "rewards/rejected": -10.449182510375977, "step": 68820 }, { "epoch": 0.82, "learning_rate": 4.5750848047905083e-07, "logits/chosen": -2.941816806793213, "logits/rejected": -2.4313430786132812, "logps/chosen": -100.20121002197266, "logps/rejected": -893.4605712890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5421575307846069, "rewards/margins": 8.006475448608398, "rewards/rejected": -8.548632621765137, "step": 68830 }, { "epoch": 0.82, "learning_rate": 4.5690628606929964e-07, "logits/chosen": -2.857813596725464, "logits/rejected": -2.2641892433166504, "logps/chosen": -114.22640228271484, "logps/rejected": -959.6599731445312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6009732484817505, "rewards/margins": 8.589279174804688, "rewards/rejected": -9.190253257751465, "step": 68840 }, { "epoch": 0.82, "learning_rate": 4.5630444837803686e-07, "logits/chosen": -2.8580093383789062, "logits/rejected": -2.022533655166626, "logps/chosen": -146.6038360595703, "logps/rejected": -1148.619140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8725525736808777, "rewards/margins": 10.198551177978516, "rewards/rejected": -11.071104049682617, "step": 68850 }, { "epoch": 0.82, "learning_rate": 4.5570296751034085e-07, "logits/chosen": -2.8880858421325684, "logits/rejected": -2.3929076194763184, "logps/chosen": -109.97579193115234, "logps/rejected": -926.1434326171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6231855154037476, "rewards/margins": 8.243947982788086, "rewards/rejected": -8.867134094238281, "step": 68860 }, { "epoch": 0.82, "learning_rate": 4.551018435712293e-07, "logits/chosen": -2.8989310264587402, "logits/rejected": -2.4880146980285645, "logps/chosen": -89.97040557861328, "logps/rejected": -880.4189453125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.4348713457584381, "rewards/margins": 7.983043670654297, "rewards/rejected": -8.417916297912598, "step": 68870 }, { "epoch": 0.82, "learning_rate": 4.54501076665656e-07, "logits/chosen": -2.8655455112457275, "logits/rejected": -2.366616725921631, "logps/chosen": -125.20503234863281, "logps/rejected": -837.6033935546875, "loss": 0.1305, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8175169229507446, "rewards/margins": 7.179235935211182, "rewards/rejected": -7.9967522621154785, "step": 68880 }, { "epoch": 0.82, "learning_rate": 4.5390066689851465e-07, "logits/chosen": -2.846764087677002, "logits/rejected": -2.3320162296295166, "logps/chosen": -111.0641098022461, "logps/rejected": -987.2824096679688, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.6470762491226196, "rewards/margins": 8.823437690734863, "rewards/rejected": -9.470513343811035, "step": 68890 }, { "epoch": 0.82, "learning_rate": 4.533006143746332e-07, "logits/chosen": -2.8619134426116943, "logits/rejected": -2.0537846088409424, "logps/chosen": -167.42965698242188, "logps/rejected": -1050.365234375, "loss": 0.1592, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1156965494155884, "rewards/margins": 8.970455169677734, "rewards/rejected": -10.086151123046875, "step": 68900 }, { "epoch": 0.82, "learning_rate": 4.5270091919877997e-07, "logits/chosen": -2.8718788623809814, "logits/rejected": -2.2259178161621094, "logps/chosen": -104.90144348144531, "logps/rejected": -1016.0480346679688, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.5706087946891785, "rewards/margins": 9.188313484191895, "rewards/rejected": -9.75892162322998, "step": 68910 }, { "epoch": 0.82, "learning_rate": 4.521015814756602e-07, "logits/chosen": -2.9029476642608643, "logits/rejected": -2.3682687282562256, "logps/chosen": -96.96012878417969, "logps/rejected": -902.1531982421875, "loss": 0.1368, "rewards/accuracies": 1.0, "rewards/chosen": -0.5080402493476868, "rewards/margins": 8.122176170349121, "rewards/rejected": -8.630216598510742, "step": 68920 }, { "epoch": 0.83, "learning_rate": 4.5150260130991633e-07, "logits/chosen": -2.8473801612854004, "logits/rejected": -2.1149299144744873, "logps/chosen": -125.5628662109375, "logps/rejected": -1040.980712890625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7409987449645996, "rewards/margins": 9.251365661621094, "rewards/rejected": -9.992364883422852, "step": 68930 }, { "epoch": 0.83, "learning_rate": 4.509039788061292e-07, "logits/chosen": -2.8584628105163574, "logits/rejected": -2.2152209281921387, "logps/chosen": -129.56301879882812, "logps/rejected": -1005.4195556640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7480707168579102, "rewards/margins": 8.910531044006348, "rewards/rejected": -9.658601760864258, "step": 68940 }, { "epoch": 0.83, "learning_rate": 4.5030571406881606e-07, "logits/chosen": -2.876755475997925, "logits/rejected": -2.3433620929718018, "logps/chosen": -110.18192291259766, "logps/rejected": -936.2037963867188, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6333169937133789, "rewards/margins": 8.34355640411377, "rewards/rejected": -8.976873397827148, "step": 68950 }, { "epoch": 0.83, "learning_rate": 4.49707807202433e-07, "logits/chosen": -2.8516433238983154, "logits/rejected": -2.174335479736328, "logps/chosen": -123.10273742675781, "logps/rejected": -1030.4835205078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.711139976978302, "rewards/margins": 9.194134712219238, "rewards/rejected": -9.9052734375, "step": 68960 }, { "epoch": 0.83, "learning_rate": 4.49110258311371e-07, "logits/chosen": -2.8853204250335693, "logits/rejected": -2.3443455696105957, "logps/chosen": -96.60562896728516, "logps/rejected": -903.9119262695312, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.513865053653717, "rewards/margins": 8.142202377319336, "rewards/rejected": -8.65606689453125, "step": 68970 }, { "epoch": 0.83, "learning_rate": 4.485130674999627e-07, "logits/chosen": -2.8802309036254883, "logits/rejected": -2.411475896835327, "logps/chosen": -92.45413970947266, "logps/rejected": -924.6787109375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.4714033603668213, "rewards/margins": 8.381882667541504, "rewards/rejected": -8.853285789489746, "step": 68980 }, { "epoch": 0.83, "learning_rate": 4.47916234872475e-07, "logits/chosen": -2.8777377605438232, "logits/rejected": -2.167971611022949, "logps/chosen": -105.64991760253906, "logps/rejected": -1012.9219970703125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.49536338448524475, "rewards/margins": 9.238497734069824, "rewards/rejected": -9.733861923217773, "step": 68990 }, { "epoch": 0.83, "learning_rate": 4.473197605331131e-07, "logits/chosen": -2.8416268825531006, "logits/rejected": -2.1577694416046143, "logps/chosen": -121.52921295166016, "logps/rejected": -1043.696044921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7225026488304138, "rewards/margins": 9.306135177612305, "rewards/rejected": -10.028636932373047, "step": 69000 }, { "epoch": 0.83, "eval_logits/chosen": -2.885847330093384, "eval_logits/rejected": -1.7619274854660034, "eval_logps/chosen": -241.52198791503906, "eval_logps/rejected": -1140.6275634765625, "eval_loss": 0.0013161799870431423, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.8034172058105469, "eval_rewards/margins": 9.135604858398438, "eval_rewards/rejected": -10.939022064208984, "eval_runtime": 1.2147, "eval_samples_per_second": 4.116, "eval_steps_per_second": 2.47, "step": 69000 }, { "epoch": 0.83, "learning_rate": 4.467236445860207e-07, "logits/chosen": -2.896045446395874, "logits/rejected": -2.4732208251953125, "logps/chosen": -103.10444641113281, "logps/rejected": -946.2579345703125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.5834747552871704, "rewards/margins": 8.478194236755371, "rewards/rejected": -9.061668395996094, "step": 69010 }, { "epoch": 0.83, "learning_rate": 4.461278871352767e-07, "logits/chosen": -2.9182357788085938, "logits/rejected": -2.2253475189208984, "logps/chosen": -118.48258972167969, "logps/rejected": -1042.820556640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6817479133605957, "rewards/margins": 9.336148262023926, "rewards/rejected": -10.017894744873047, "step": 69020 }, { "epoch": 0.83, "learning_rate": 4.455324882848991e-07, "logits/chosen": -2.901425361633301, "logits/rejected": -2.3369014263153076, "logps/chosen": -108.08842468261719, "logps/rejected": -988.2069091796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5829871296882629, "rewards/margins": 8.879395484924316, "rewards/rejected": -9.462382316589355, "step": 69030 }, { "epoch": 0.83, "learning_rate": 4.4493744813884296e-07, "logits/chosen": -2.870882511138916, "logits/rejected": -2.5451574325561523, "logps/chosen": -82.85826110839844, "logps/rejected": -876.7901611328125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.44109973311424255, "rewards/margins": 7.940039157867432, "rewards/rejected": -8.38113784790039, "step": 69040 }, { "epoch": 0.83, "learning_rate": 4.4434276680100057e-07, "logits/chosen": -2.8330295085906982, "logits/rejected": -1.9611040353775024, "logps/chosen": -148.4724578857422, "logps/rejected": -1064.429931640625, "loss": 0.1287, "rewards/accuracies": 1.0, "rewards/chosen": -0.8716225624084473, "rewards/margins": 9.372270584106445, "rewards/rejected": -10.243891716003418, "step": 69050 }, { "epoch": 0.83, "learning_rate": 4.437484443752019e-07, "logits/chosen": -2.8449440002441406, "logits/rejected": -2.354734420776367, "logps/chosen": -102.07768249511719, "logps/rejected": -918.2979736328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5329558849334717, "rewards/margins": 8.25260066986084, "rewards/rejected": -8.78555679321289, "step": 69060 }, { "epoch": 0.83, "learning_rate": 4.4315448096521384e-07, "logits/chosen": -2.9155731201171875, "logits/rejected": -2.6404199600219727, "logps/chosen": -78.57209777832031, "logps/rejected": -741.3790893554688, "loss": 0.1043, "rewards/accuracies": 1.0, "rewards/chosen": -0.41351431608200073, "rewards/margins": 6.630352973937988, "rewards/rejected": -7.0438666343688965, "step": 69070 }, { "epoch": 0.83, "learning_rate": 4.4256087667474164e-07, "logits/chosen": -2.870577335357666, "logits/rejected": -2.4556803703308105, "logps/chosen": -107.65382385253906, "logps/rejected": -834.1058349609375, "loss": 0.2686, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6401761174201965, "rewards/margins": 7.327415466308594, "rewards/rejected": -7.967591285705566, "step": 69080 }, { "epoch": 0.83, "learning_rate": 4.419676316074251e-07, "logits/chosen": -2.869816303253174, "logits/rejected": -2.147782325744629, "logps/chosen": -127.96418762207031, "logps/rejected": -1017.0062255859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7489975690841675, "rewards/margins": 9.003811836242676, "rewards/rejected": -9.752809524536133, "step": 69090 }, { "epoch": 0.83, "learning_rate": 4.4137474586684435e-07, "logits/chosen": -2.878920078277588, "logits/rejected": -2.372255802154541, "logps/chosen": -114.1059799194336, "logps/rejected": -943.0012817382812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6922294497489929, "rewards/margins": 8.344157218933105, "rewards/rejected": -9.036388397216797, "step": 69100 }, { "epoch": 0.83, "learning_rate": 4.4078221955651506e-07, "logits/chosen": -2.881028890609741, "logits/rejected": -2.108445882797241, "logps/chosen": -110.05570983886719, "logps/rejected": -1004.7515869140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6121970415115356, "rewards/margins": 9.04675579071045, "rewards/rejected": -9.658953666687012, "step": 69110 }, { "epoch": 0.83, "learning_rate": 4.4019005277989135e-07, "logits/chosen": -2.913604259490967, "logits/rejected": -2.4037258625030518, "logps/chosen": -109.15946197509766, "logps/rejected": -939.3134765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5767049789428711, "rewards/margins": 8.425790786743164, "rewards/rejected": -9.002495765686035, "step": 69120 }, { "epoch": 0.83, "learning_rate": 4.395982456403633e-07, "logits/chosen": -2.8806300163269043, "logits/rejected": -2.3844966888427734, "logps/chosen": -90.41232299804688, "logps/rejected": -830.4889526367188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.40231984853744507, "rewards/margins": 7.518226623535156, "rewards/rejected": -7.920546054840088, "step": 69130 }, { "epoch": 0.83, "learning_rate": 4.390067982412594e-07, "logits/chosen": -2.9118118286132812, "logits/rejected": -2.4740283489227295, "logps/chosen": -97.94676208496094, "logps/rejected": -908.0479736328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5373365879058838, "rewards/margins": 8.156550407409668, "rewards/rejected": -8.693887710571289, "step": 69140 }, { "epoch": 0.83, "learning_rate": 4.3841571068584473e-07, "logits/chosen": -2.874432325363159, "logits/rejected": -2.417489528656006, "logps/chosen": -95.38177490234375, "logps/rejected": -905.6650390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5242877006530762, "rewards/margins": 8.158326148986816, "rewards/rejected": -8.682613372802734, "step": 69150 }, { "epoch": 0.83, "learning_rate": 4.378249830773204e-07, "logits/chosen": -2.87817645072937, "logits/rejected": -2.4309000968933105, "logps/chosen": -90.87623596191406, "logps/rejected": -842.7756958007812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.43439069390296936, "rewards/margins": 7.604681968688965, "rewards/rejected": -8.03907299041748, "step": 69160 }, { "epoch": 0.83, "learning_rate": 4.3723461551882676e-07, "logits/chosen": -2.8625218868255615, "logits/rejected": -2.1786160469055176, "logps/chosen": -108.05882263183594, "logps/rejected": -1047.326171875, "loss": 0.0907, "rewards/accuracies": 1.0, "rewards/chosen": -0.5567020177841187, "rewards/margins": 9.510708808898926, "rewards/rejected": -10.06740951538086, "step": 69170 }, { "epoch": 0.83, "learning_rate": 4.366446081134401e-07, "logits/chosen": -2.8499855995178223, "logits/rejected": -2.282430648803711, "logps/chosen": -94.73033142089844, "logps/rejected": -789.546142578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4337155222892761, "rewards/margins": 7.094250679016113, "rewards/rejected": -7.527965545654297, "step": 69180 }, { "epoch": 0.83, "learning_rate": 4.3605496096417425e-07, "logits/chosen": -2.854499101638794, "logits/rejected": -2.1916160583496094, "logps/chosen": -138.33029174804688, "logps/rejected": -1027.206787109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8809946179389954, "rewards/margins": 8.9664306640625, "rewards/rejected": -9.847424507141113, "step": 69190 }, { "epoch": 0.83, "learning_rate": 4.354656741739799e-07, "logits/chosen": -2.8827176094055176, "logits/rejected": -2.487215280532837, "logps/chosen": -149.6622772216797, "logps/rejected": -806.3391723632812, "loss": 0.1946, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.006196141242981, "rewards/margins": 6.652535915374756, "rewards/rejected": -7.658730983734131, "step": 69200 }, { "epoch": 0.83, "learning_rate": 4.3487674784574524e-07, "logits/chosen": -2.8885293006896973, "logits/rejected": -1.9724922180175781, "logps/chosen": -145.1946258544922, "logps/rejected": -1009.5955200195312, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.8906525373458862, "rewards/margins": 8.799201965332031, "rewards/rejected": -9.689855575561523, "step": 69210 }, { "epoch": 0.83, "learning_rate": 4.3428818208229447e-07, "logits/chosen": -2.90586519241333, "logits/rejected": -2.3511886596679688, "logps/chosen": -113.35905456542969, "logps/rejected": -931.0283203125, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.657821774482727, "rewards/margins": 8.25004768371582, "rewards/rejected": -8.907869338989258, "step": 69220 }, { "epoch": 0.83, "learning_rate": 4.336999769863895e-07, "logits/chosen": -2.8710525035858154, "logits/rejected": -2.375757932662964, "logps/chosen": -82.48246765136719, "logps/rejected": -832.2720947265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.38853639364242554, "rewards/margins": 7.555192470550537, "rewards/rejected": -7.943729400634766, "step": 69230 }, { "epoch": 0.83, "learning_rate": 4.331121326607299e-07, "logits/chosen": -2.858020305633545, "logits/rejected": -2.368847370147705, "logps/chosen": -117.578125, "logps/rejected": -950.2936401367188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7280415296554565, "rewards/margins": 8.37983226776123, "rewards/rejected": -9.10787296295166, "step": 69240 }, { "epoch": 0.83, "learning_rate": 4.325246492079513e-07, "logits/chosen": -2.902487277984619, "logits/rejected": -2.482046127319336, "logps/chosen": -89.48145294189453, "logps/rejected": -889.1412963867188, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.4696601927280426, "rewards/margins": 8.038801193237305, "rewards/rejected": -8.508461952209473, "step": 69250 }, { "epoch": 0.83, "learning_rate": 4.3193752673062665e-07, "logits/chosen": -2.8649587631225586, "logits/rejected": -1.925598382949829, "logps/chosen": -145.0509033203125, "logps/rejected": -1073.549560546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8593740463256836, "rewards/margins": 9.475321769714355, "rewards/rejected": -10.334695816040039, "step": 69260 }, { "epoch": 0.83, "learning_rate": 4.3135076533126625e-07, "logits/chosen": -2.902099609375, "logits/rejected": -2.5734124183654785, "logps/chosen": -81.89224243164062, "logps/rejected": -826.2557373046875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4414742887020111, "rewards/margins": 7.443275451660156, "rewards/rejected": -7.8847503662109375, "step": 69270 }, { "epoch": 0.83, "learning_rate": 4.3076436511231724e-07, "logits/chosen": -2.8891282081604004, "logits/rejected": -2.422121286392212, "logps/chosen": -88.25747680664062, "logps/rejected": -865.5743408203125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4252959191799164, "rewards/margins": 7.845114231109619, "rewards/rejected": -8.270410537719727, "step": 69280 }, { "epoch": 0.83, "learning_rate": 4.3017832617616244e-07, "logits/chosen": -2.8811590671539307, "logits/rejected": -2.6334993839263916, "logps/chosen": -95.23448181152344, "logps/rejected": -766.0420532226562, "loss": 0.0938, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5602065324783325, "rewards/margins": 6.727730751037598, "rewards/rejected": -7.287936210632324, "step": 69290 }, { "epoch": 0.83, "learning_rate": 4.295926486251234e-07, "logits/chosen": -2.955501079559326, "logits/rejected": -2.5930116176605225, "logps/chosen": -87.46412658691406, "logps/rejected": -835.0120849609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4108589291572571, "rewards/margins": 7.562918186187744, "rewards/rejected": -7.973777770996094, "step": 69300 }, { "epoch": 0.83, "learning_rate": 4.2900733256145744e-07, "logits/chosen": -2.9056854248046875, "logits/rejected": -2.3632423877716064, "logps/chosen": -135.50331115722656, "logps/rejected": -1002.2330932617188, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.8616922497749329, "rewards/margins": 8.759428977966309, "rewards/rejected": -9.621121406555176, "step": 69310 }, { "epoch": 0.83, "learning_rate": 4.284223780873592e-07, "logits/chosen": -2.9183080196380615, "logits/rejected": -2.400550365447998, "logps/chosen": -108.76763916015625, "logps/rejected": -940.7789916992188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.655856728553772, "rewards/margins": 8.360311508178711, "rewards/rejected": -9.016168594360352, "step": 69320 }, { "epoch": 0.83, "learning_rate": 4.2783778530496035e-07, "logits/chosen": -2.9257373809814453, "logits/rejected": -2.389744281768799, "logps/chosen": -82.18317413330078, "logps/rejected": -947.6185302734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4069910943508148, "rewards/margins": 8.675541877746582, "rewards/rejected": -9.08253288269043, "step": 69330 }, { "epoch": 0.83, "learning_rate": 4.2725355431632895e-07, "logits/chosen": -2.8973870277404785, "logits/rejected": -2.4222521781921387, "logps/chosen": -79.19224548339844, "logps/rejected": -966.9815673828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3384130299091339, "rewards/margins": 8.947357177734375, "rewards/rejected": -9.285770416259766, "step": 69340 }, { "epoch": 0.83, "learning_rate": 4.266696852234706e-07, "logits/chosen": -2.8561604022979736, "logits/rejected": -2.4033467769622803, "logps/chosen": -88.17210388183594, "logps/rejected": -880.2789306640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4338175654411316, "rewards/margins": 7.993536949157715, "rewards/rejected": -8.427353858947754, "step": 69350 }, { "epoch": 0.83, "learning_rate": 4.2608617812832653e-07, "logits/chosen": -2.8594608306884766, "logits/rejected": -2.288386821746826, "logps/chosen": -149.54730224609375, "logps/rejected": -968.8380737304688, "loss": 0.1307, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.897302508354187, "rewards/margins": 8.38960075378418, "rewards/rejected": -9.286903381347656, "step": 69360 }, { "epoch": 0.83, "learning_rate": 4.255030331327753e-07, "logits/chosen": -2.900728940963745, "logits/rejected": -2.3720481395721436, "logps/chosen": -99.87183380126953, "logps/rejected": -887.3946533203125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5824669003486633, "rewards/margins": 7.911950588226318, "rewards/rejected": -8.494417190551758, "step": 69370 }, { "epoch": 0.83, "learning_rate": 4.249202503386332e-07, "logits/chosen": -2.859386920928955, "logits/rejected": -2.1388659477233887, "logps/chosen": -107.6525650024414, "logps/rejected": -932.1521606445312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5698335766792297, "rewards/margins": 8.348383903503418, "rewards/rejected": -8.918218612670898, "step": 69380 }, { "epoch": 0.83, "learning_rate": 4.243378298476511e-07, "logits/chosen": -2.8110873699188232, "logits/rejected": -2.2265093326568604, "logps/chosen": -113.65145111083984, "logps/rejected": -922.3404541015625, "loss": 0.0839, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6254381537437439, "rewards/margins": 8.207364082336426, "rewards/rejected": -8.832802772521973, "step": 69390 }, { "epoch": 0.83, "learning_rate": 4.2375577176152067e-07, "logits/chosen": -2.867656707763672, "logits/rejected": -2.1662135124206543, "logps/chosen": -128.72328186035156, "logps/rejected": -952.9945068359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7831825017929077, "rewards/margins": 8.36054515838623, "rewards/rejected": -9.143728256225586, "step": 69400 }, { "epoch": 0.83, "learning_rate": 4.231740761818651e-07, "logits/chosen": -2.9138669967651367, "logits/rejected": -2.4974875450134277, "logps/chosen": -85.28400421142578, "logps/rejected": -864.7440185546875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.4502377510070801, "rewards/margins": 7.812729835510254, "rewards/rejected": -8.262968063354492, "step": 69410 }, { "epoch": 0.83, "learning_rate": 4.225927432102475e-07, "logits/chosen": -2.8392815589904785, "logits/rejected": -2.251193046569824, "logps/chosen": -99.08323669433594, "logps/rejected": -951.6239013671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5420008897781372, "rewards/margins": 8.579228401184082, "rewards/rejected": -9.12122917175293, "step": 69420 }, { "epoch": 0.83, "learning_rate": 4.220117729481674e-07, "logits/chosen": -2.858320951461792, "logits/rejected": -2.1926803588867188, "logps/chosen": -120.7868881225586, "logps/rejected": -1032.661865234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6897705793380737, "rewards/margins": 9.225141525268555, "rewards/rejected": -9.914911270141602, "step": 69430 }, { "epoch": 0.83, "learning_rate": 4.214311654970604e-07, "logits/chosen": -2.834338665008545, "logits/rejected": -2.268261432647705, "logps/chosen": -101.72880554199219, "logps/rejected": -986.1755981445312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.542454719543457, "rewards/margins": 8.917221069335938, "rewards/rejected": -9.459674835205078, "step": 69440 }, { "epoch": 0.83, "learning_rate": 4.2085092095829895e-07, "logits/chosen": -2.8858981132507324, "logits/rejected": -1.9999027252197266, "logps/chosen": -143.10726928710938, "logps/rejected": -1084.0438232421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9122213125228882, "rewards/margins": 9.506182670593262, "rewards/rejected": -10.418403625488281, "step": 69450 }, { "epoch": 0.83, "learning_rate": 4.202710394331927e-07, "logits/chosen": -2.823493719100952, "logits/rejected": -2.1194705963134766, "logps/chosen": -118.01338958740234, "logps/rejected": -1099.7940673828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6786242127418518, "rewards/margins": 9.916725158691406, "rewards/rejected": -10.595349311828613, "step": 69460 }, { "epoch": 0.83, "learning_rate": 4.196915210229863e-07, "logits/chosen": -2.8620541095733643, "logits/rejected": -2.3085570335388184, "logps/chosen": -110.7175521850586, "logps/rejected": -885.6099853515625, "loss": 0.1124, "rewards/accuracies": 1.0, "rewards/chosen": -0.6406664252281189, "rewards/margins": 7.841426849365234, "rewards/rejected": -8.482091903686523, "step": 69470 }, { "epoch": 0.83, "learning_rate": 4.191123658288621e-07, "logits/chosen": -2.8812479972839355, "logits/rejected": -2.3108372688293457, "logps/chosen": -97.87948608398438, "logps/rejected": -952.5638427734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5091723203659058, "rewards/margins": 8.641054153442383, "rewards/rejected": -9.150224685668945, "step": 69480 }, { "epoch": 0.83, "learning_rate": 4.1853357395193974e-07, "logits/chosen": -2.8300812244415283, "logits/rejected": -2.3031232357025146, "logps/chosen": -110.94911193847656, "logps/rejected": -935.6726684570312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6440186500549316, "rewards/margins": 8.328460693359375, "rewards/rejected": -8.972478866577148, "step": 69490 }, { "epoch": 0.83, "learning_rate": 4.179551454932743e-07, "logits/chosen": -2.917358636856079, "logits/rejected": -2.321706771850586, "logps/chosen": -145.43768310546875, "logps/rejected": -982.1535034179688, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.0192348957061768, "rewards/margins": 8.405688285827637, "rewards/rejected": -9.42492389678955, "step": 69500 }, { "epoch": 0.83, "learning_rate": 4.173770805538577e-07, "logits/chosen": -2.857412338256836, "logits/rejected": -2.4797844886779785, "logps/chosen": -79.05620574951172, "logps/rejected": -845.9659423828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4006304144859314, "rewards/margins": 7.699221134185791, "rewards/rejected": -8.099851608276367, "step": 69510 }, { "epoch": 0.83, "learning_rate": 4.1679937923461865e-07, "logits/chosen": -2.8918280601501465, "logits/rejected": -2.429206371307373, "logps/chosen": -103.99888610839844, "logps/rejected": -872.4761962890625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5758952498435974, "rewards/margins": 7.750255584716797, "rewards/rejected": -8.326150894165039, "step": 69520 }, { "epoch": 0.83, "learning_rate": 4.162220416364229e-07, "logits/chosen": -2.8868467807769775, "logits/rejected": -2.1195273399353027, "logps/chosen": -117.4202651977539, "logps/rejected": -995.3611450195312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6416985392570496, "rewards/margins": 8.904500961303711, "rewards/rejected": -9.546199798583984, "step": 69530 }, { "epoch": 0.83, "learning_rate": 4.156450678600704e-07, "logits/chosen": -2.888850688934326, "logits/rejected": -2.3491756916046143, "logps/chosen": -122.975830078125, "logps/rejected": -980.783203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7693372368812561, "rewards/margins": 8.646028518676758, "rewards/rejected": -9.415364265441895, "step": 69540 }, { "epoch": 0.83, "learning_rate": 4.150684580063002e-07, "logits/chosen": -2.8815553188323975, "logits/rejected": -1.9823529720306396, "logps/chosen": -159.44415283203125, "logps/rejected": -1102.570068359375, "loss": 0.0965, "rewards/accuracies": 1.0, "rewards/chosen": -0.9219304323196411, "rewards/margins": 9.704153060913086, "rewards/rejected": -10.626082420349121, "step": 69550 }, { "epoch": 0.83, "learning_rate": 4.1449221217578663e-07, "logits/chosen": -2.8216099739074707, "logits/rejected": -2.2328152656555176, "logps/chosen": -109.95050048828125, "logps/rejected": -853.1099853515625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.6539312601089478, "rewards/margins": 7.491888523101807, "rewards/rejected": -8.145819664001465, "step": 69560 }, { "epoch": 0.83, "learning_rate": 4.139163304691402e-07, "logits/chosen": -2.923405885696411, "logits/rejected": -2.4032552242279053, "logps/chosen": -104.1506576538086, "logps/rejected": -979.4949951171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5813037157058716, "rewards/margins": 8.81490421295166, "rewards/rejected": -9.396206855773926, "step": 69570 }, { "epoch": 0.83, "learning_rate": 4.1334081298690906e-07, "logits/chosen": -2.8700854778289795, "logits/rejected": -2.1605019569396973, "logps/chosen": -121.69908142089844, "logps/rejected": -1042.52099609375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6683541536331177, "rewards/margins": 9.357109069824219, "rewards/rejected": -10.02546215057373, "step": 69580 }, { "epoch": 0.83, "learning_rate": 4.127656598295762e-07, "logits/chosen": -2.9280362129211426, "logits/rejected": -2.362595796585083, "logps/chosen": -99.14757537841797, "logps/rejected": -980.8828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4845667779445648, "rewards/margins": 8.949858665466309, "rewards/rejected": -9.434426307678223, "step": 69590 }, { "epoch": 0.83, "learning_rate": 4.1219087109756276e-07, "logits/chosen": -2.8507556915283203, "logits/rejected": -2.3592653274536133, "logps/chosen": -100.70086669921875, "logps/rejected": -965.8737182617188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5431713461875916, "rewards/margins": 8.72691535949707, "rewards/rejected": -9.270085334777832, "step": 69600 }, { "epoch": 0.83, "learning_rate": 4.11616446891224e-07, "logits/chosen": -2.868042230606079, "logits/rejected": -2.242771863937378, "logps/chosen": -96.69219207763672, "logps/rejected": -864.6975708007812, "loss": 0.1046, "rewards/accuracies": 1.0, "rewards/chosen": -0.4966716170310974, "rewards/margins": 7.784318447113037, "rewards/rejected": -8.280990600585938, "step": 69610 }, { "epoch": 0.83, "learning_rate": 4.110423873108535e-07, "logits/chosen": -2.908993721008301, "logits/rejected": -2.4822335243225098, "logps/chosen": -90.42369842529297, "logps/rejected": -855.9811401367188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4573717713356018, "rewards/margins": 7.699496269226074, "rewards/rejected": -8.156869888305664, "step": 69620 }, { "epoch": 0.83, "learning_rate": 4.104686924566803e-07, "logits/chosen": -2.9160075187683105, "logits/rejected": -2.428100109100342, "logps/chosen": -128.2857208251953, "logps/rejected": -935.2431640625, "loss": 0.0982, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8311716914176941, "rewards/margins": 8.127992630004883, "rewards/rejected": -8.9591646194458, "step": 69630 }, { "epoch": 0.83, "learning_rate": 4.098953624288698e-07, "logits/chosen": -2.8599331378936768, "logits/rejected": -2.705933094024658, "logps/chosen": -54.62017059326172, "logps/rejected": -710.5027465820312, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.2086574137210846, "rewards/margins": 6.536999702453613, "rewards/rejected": -6.745657920837402, "step": 69640 }, { "epoch": 0.83, "learning_rate": 4.0932239732752427e-07, "logits/chosen": -2.9125816822052, "logits/rejected": -2.309302806854248, "logps/chosen": -143.19552612304688, "logps/rejected": -936.3131713867188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9026273488998413, "rewards/margins": 8.07104206085205, "rewards/rejected": -8.973669052124023, "step": 69650 }, { "epoch": 0.83, "learning_rate": 4.087497972526819e-07, "logits/chosen": -2.899872303009033, "logits/rejected": -2.3612937927246094, "logps/chosen": -102.54252624511719, "logps/rejected": -958.3033447265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.558571457862854, "rewards/margins": 8.62724494934082, "rewards/rejected": -9.185815811157227, "step": 69660 }, { "epoch": 0.83, "learning_rate": 4.081775623043163e-07, "logits/chosen": -2.8919835090637207, "logits/rejected": -2.253232479095459, "logps/chosen": -115.84212493896484, "logps/rejected": -918.8240966796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6590412855148315, "rewards/margins": 8.136428833007812, "rewards/rejected": -8.795470237731934, "step": 69670 }, { "epoch": 0.83, "learning_rate": 4.076056925823385e-07, "logits/chosen": -2.856847047805786, "logits/rejected": -2.5396475791931152, "logps/chosen": -73.3629379272461, "logps/rejected": -785.484130859375, "loss": 0.1595, "rewards/accuracies": 1.0, "rewards/chosen": -0.33413204550743103, "rewards/margins": 7.155413627624512, "rewards/rejected": -7.489546298980713, "step": 69680 }, { "epoch": 0.83, "learning_rate": 4.0703418818659523e-07, "logits/chosen": -2.9074409008026123, "logits/rejected": -2.2420096397399902, "logps/chosen": -107.3239974975586, "logps/rejected": -998.658203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5864125490188599, "rewards/margins": 9.00448226928711, "rewards/rejected": -9.59089469909668, "step": 69690 }, { "epoch": 0.83, "learning_rate": 4.0646304921687e-07, "logits/chosen": -2.8947014808654785, "logits/rejected": -2.5167922973632812, "logps/chosen": -80.02167510986328, "logps/rejected": -927.8215942382812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.38966208696365356, "rewards/margins": 8.50800895690918, "rewards/rejected": -8.897671699523926, "step": 69700 }, { "epoch": 0.83, "learning_rate": 4.058922757728817e-07, "logits/chosen": -2.8523006439208984, "logits/rejected": -2.2516183853149414, "logps/chosen": -100.82398986816406, "logps/rejected": -953.1817626953125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5117651224136353, "rewards/margins": 8.639822006225586, "rewards/rejected": -9.151586532592773, "step": 69710 }, { "epoch": 0.83, "learning_rate": 4.053218679542859e-07, "logits/chosen": -2.8759331703186035, "logits/rejected": -2.4735827445983887, "logps/chosen": -98.20994567871094, "logps/rejected": -818.4009399414062, "loss": 0.0588, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5535141825675964, "rewards/margins": 7.2429633140563965, "rewards/rejected": -7.796477317810059, "step": 69720 }, { "epoch": 0.83, "learning_rate": 4.04751825860675e-07, "logits/chosen": -2.8828420639038086, "logits/rejected": -2.1805553436279297, "logps/chosen": -122.72039794921875, "logps/rejected": -988.2337036132812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7371745109558105, "rewards/margins": 8.743661880493164, "rewards/rejected": -9.480835914611816, "step": 69730 }, { "epoch": 0.83, "learning_rate": 4.0418214959157514e-07, "logits/chosen": -2.898176670074463, "logits/rejected": -2.313014507293701, "logps/chosen": -108.66365814208984, "logps/rejected": -925.5872802734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.577117383480072, "rewards/margins": 8.27928352355957, "rewards/rejected": -8.856401443481445, "step": 69740 }, { "epoch": 0.83, "learning_rate": 4.036128392464511e-07, "logits/chosen": -2.876215934753418, "logits/rejected": -2.3644204139709473, "logps/chosen": -90.4968490600586, "logps/rejected": -963.806640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.49951988458633423, "rewards/margins": 8.751484870910645, "rewards/rejected": -9.251005172729492, "step": 69750 }, { "epoch": 0.84, "learning_rate": 4.0304389492470315e-07, "logits/chosen": -2.89184308052063, "logits/rejected": -2.273174285888672, "logps/chosen": -104.67459869384766, "logps/rejected": -979.3614501953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.491870641708374, "rewards/margins": 8.915289878845215, "rewards/rejected": -9.407159805297852, "step": 69760 }, { "epoch": 0.84, "learning_rate": 4.024753167256668e-07, "logits/chosen": -2.8899078369140625, "logits/rejected": -2.4198498725891113, "logps/chosen": -91.05049133300781, "logps/rejected": -814.01904296875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4731236398220062, "rewards/margins": 7.2926201820373535, "rewards/rejected": -7.765743255615234, "step": 69770 }, { "epoch": 0.84, "learning_rate": 4.0190710474861477e-07, "logits/chosen": -2.8914952278137207, "logits/rejected": -2.350304126739502, "logps/chosen": -106.93336486816406, "logps/rejected": -936.4362182617188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6243306398391724, "rewards/margins": 8.358160018920898, "rewards/rejected": -8.982490539550781, "step": 69780 }, { "epoch": 0.84, "learning_rate": 4.013392590927548e-07, "logits/chosen": -2.8834986686706543, "logits/rejected": -2.2067809104919434, "logps/chosen": -106.85099029541016, "logps/rejected": -889.4977416992188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6045169830322266, "rewards/margins": 7.902756690979004, "rewards/rejected": -8.507272720336914, "step": 69790 }, { "epoch": 0.84, "learning_rate": 4.007717798572322e-07, "logits/chosen": -2.9114317893981934, "logits/rejected": -2.532869577407837, "logps/chosen": -79.29991912841797, "logps/rejected": -933.26708984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.39997783303260803, "rewards/margins": 8.552192687988281, "rewards/rejected": -8.952171325683594, "step": 69800 }, { "epoch": 0.84, "learning_rate": 4.0020466714112526e-07, "logits/chosen": -2.922699451446533, "logits/rejected": -2.5862622261047363, "logps/chosen": -73.55204772949219, "logps/rejected": -797.138671875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.3395790457725525, "rewards/margins": 7.269285678863525, "rewards/rejected": -7.608864784240723, "step": 69810 }, { "epoch": 0.84, "learning_rate": 3.99637921043452e-07, "logits/chosen": -2.8593242168426514, "logits/rejected": -2.2096924781799316, "logps/chosen": -116.9415512084961, "logps/rejected": -983.3806762695312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6439305543899536, "rewards/margins": 8.801263809204102, "rewards/rejected": -9.445194244384766, "step": 69820 }, { "epoch": 0.84, "learning_rate": 3.990715416631641e-07, "logits/chosen": -2.833458185195923, "logits/rejected": -2.455467700958252, "logps/chosen": -81.44268035888672, "logps/rejected": -804.08154296875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.42281490564346313, "rewards/margins": 7.2483720779418945, "rewards/rejected": -7.671187400817871, "step": 69830 }, { "epoch": 0.84, "learning_rate": 3.985055290991502e-07, "logits/chosen": -2.8916819095611572, "logits/rejected": -2.15303897857666, "logps/chosen": -120.00025939941406, "logps/rejected": -1030.741943359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.648482620716095, "rewards/margins": 9.261999130249023, "rewards/rejected": -9.910482406616211, "step": 69840 }, { "epoch": 0.84, "learning_rate": 3.979398834502349e-07, "logits/chosen": -2.9005870819091797, "logits/rejected": -2.495570421218872, "logps/chosen": -98.08123016357422, "logps/rejected": -861.421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5215100049972534, "rewards/margins": 7.699269771575928, "rewards/rejected": -8.220780372619629, "step": 69850 }, { "epoch": 0.84, "learning_rate": 3.9737460481517693e-07, "logits/chosen": -2.844801425933838, "logits/rejected": -2.239393711090088, "logps/chosen": -119.4109878540039, "logps/rejected": -944.5101318359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.649358868598938, "rewards/margins": 8.378128051757812, "rewards/rejected": -9.027486801147461, "step": 69860 }, { "epoch": 0.84, "learning_rate": 3.968096932926732e-07, "logits/chosen": -2.8708982467651367, "logits/rejected": -2.1972737312316895, "logps/chosen": -118.27000427246094, "logps/rejected": -1060.7403564453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6693910360336304, "rewards/margins": 9.537561416625977, "rewards/rejected": -10.206950187683105, "step": 69870 }, { "epoch": 0.84, "learning_rate": 3.962451489813557e-07, "logits/chosen": -2.8683414459228516, "logits/rejected": -2.3018546104431152, "logps/chosen": -103.11749267578125, "logps/rejected": -972.2452392578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.529866099357605, "rewards/margins": 8.799062728881836, "rewards/rejected": -9.32892894744873, "step": 69880 }, { "epoch": 0.84, "learning_rate": 3.9568097197979243e-07, "logits/chosen": -2.891045331954956, "logits/rejected": -2.134812831878662, "logps/chosen": -122.97249603271484, "logps/rejected": -948.9400634765625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6817303895950317, "rewards/margins": 8.369939804077148, "rewards/rejected": -9.051669120788574, "step": 69890 }, { "epoch": 0.84, "learning_rate": 3.951171623864866e-07, "logits/chosen": -2.8436594009399414, "logits/rejected": -2.1963706016540527, "logps/chosen": -115.412353515625, "logps/rejected": -1010.7542724609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6779887080192566, "rewards/margins": 9.02495002746582, "rewards/rejected": -9.702938079833984, "step": 69900 }, { "epoch": 0.84, "learning_rate": 3.9455372029987835e-07, "logits/chosen": -2.892038345336914, "logits/rejected": -2.4477057456970215, "logps/chosen": -90.81026458740234, "logps/rejected": -914.9568481445312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.46741771697998047, "rewards/margins": 8.300812721252441, "rewards/rejected": -8.768230438232422, "step": 69910 }, { "epoch": 0.84, "learning_rate": 3.9399064581834374e-07, "logits/chosen": -2.8311381340026855, "logits/rejected": -2.217411518096924, "logps/chosen": -118.40960693359375, "logps/rejected": -977.48046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6698318719863892, "rewards/margins": 8.702558517456055, "rewards/rejected": -9.372390747070312, "step": 69920 }, { "epoch": 0.84, "learning_rate": 3.934279390401921e-07, "logits/chosen": -2.8153624534606934, "logits/rejected": -2.094756603240967, "logps/chosen": -126.5316162109375, "logps/rejected": -924.9714965820312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7424582839012146, "rewards/margins": 8.11225414276123, "rewards/rejected": -8.854711532592773, "step": 69930 }, { "epoch": 0.84, "learning_rate": 3.9286560006367186e-07, "logits/chosen": -2.8536486625671387, "logits/rejected": -2.4071249961853027, "logps/chosen": -88.34443664550781, "logps/rejected": -929.6292724609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.44150620698928833, "rewards/margins": 8.464284896850586, "rewards/rejected": -8.905790328979492, "step": 69940 }, { "epoch": 0.84, "learning_rate": 3.9230362898696556e-07, "logits/chosen": -2.913567304611206, "logits/rejected": -2.5024266242980957, "logps/chosen": -84.16188049316406, "logps/rejected": -784.1903076171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4033777713775635, "rewards/margins": 7.066001892089844, "rewards/rejected": -7.469380855560303, "step": 69950 }, { "epoch": 0.84, "learning_rate": 3.9174202590819166e-07, "logits/chosen": -2.8535869121551514, "logits/rejected": -2.3781189918518066, "logps/chosen": -90.59992218017578, "logps/rejected": -886.4342041015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4728333353996277, "rewards/margins": 8.025874137878418, "rewards/rejected": -8.49870777130127, "step": 69960 }, { "epoch": 0.84, "learning_rate": 3.911807909254045e-07, "logits/chosen": -2.8701491355895996, "logits/rejected": -2.3307878971099854, "logps/chosen": -97.00215148925781, "logps/rejected": -845.3485107421875, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -0.43486690521240234, "rewards/margins": 7.647651672363281, "rewards/rejected": -8.082517623901367, "step": 69970 }, { "epoch": 0.84, "learning_rate": 3.9061992413659516e-07, "logits/chosen": -2.9367735385894775, "logits/rejected": -2.613412380218506, "logps/chosen": -71.53529357910156, "logps/rejected": -848.8966064453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.344136506319046, "rewards/margins": 7.765737056732178, "rewards/rejected": -8.109872817993164, "step": 69980 }, { "epoch": 0.84, "learning_rate": 3.900594256396875e-07, "logits/chosen": -2.8415462970733643, "logits/rejected": -1.9963197708129883, "logps/chosen": -139.38726806640625, "logps/rejected": -1177.968017578125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8355057835578918, "rewards/margins": 10.509038925170898, "rewards/rejected": -11.34454345703125, "step": 69990 }, { "epoch": 0.84, "learning_rate": 3.894992955325444e-07, "logits/chosen": -2.8321549892425537, "logits/rejected": -2.271388530731201, "logps/chosen": -127.39459228515625, "logps/rejected": -931.0836791992188, "loss": 0.0994, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7648071050643921, "rewards/margins": 8.17214298248291, "rewards/rejected": -8.936949729919434, "step": 70000 }, { "epoch": 0.84, "learning_rate": 3.889395339129626e-07, "logits/chosen": -2.903348207473755, "logits/rejected": -2.2209486961364746, "logps/chosen": -105.12764739990234, "logps/rejected": -971.6945190429688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5150051712989807, "rewards/margins": 8.812971115112305, "rewards/rejected": -9.327977180480957, "step": 70010 }, { "epoch": 0.84, "learning_rate": 3.8838014087867473e-07, "logits/chosen": -2.8738224506378174, "logits/rejected": -2.4474880695343018, "logps/chosen": -95.47694396972656, "logps/rejected": -862.4324340820312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5357370972633362, "rewards/margins": 7.70025634765625, "rewards/rejected": -8.235994338989258, "step": 70020 }, { "epoch": 0.84, "learning_rate": 3.878211165273496e-07, "logits/chosen": -2.894176959991455, "logits/rejected": -2.139650344848633, "logps/chosen": -112.77076721191406, "logps/rejected": -956.4885864257812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5932247638702393, "rewards/margins": 8.574956893920898, "rewards/rejected": -9.168182373046875, "step": 70030 }, { "epoch": 0.84, "learning_rate": 3.8726246095659117e-07, "logits/chosen": -2.888964891433716, "logits/rejected": -2.5769522190093994, "logps/chosen": -140.71353149414062, "logps/rejected": -769.2499389648438, "loss": 0.3307, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0119744539260864, "rewards/margins": 6.294029712677002, "rewards/rejected": -7.306003570556641, "step": 70040 }, { "epoch": 0.84, "learning_rate": 3.8670417426393964e-07, "logits/chosen": -2.843684196472168, "logits/rejected": -2.122152805328369, "logps/chosen": -116.7848892211914, "logps/rejected": -1025.52099609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6469534635543823, "rewards/margins": 9.215070724487305, "rewards/rejected": -9.862022399902344, "step": 70050 }, { "epoch": 0.84, "learning_rate": 3.861462565468696e-07, "logits/chosen": -2.895061492919922, "logits/rejected": -2.1280550956726074, "logps/chosen": -129.0179443359375, "logps/rejected": -1153.5224609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7714487314224243, "rewards/margins": 10.351922988891602, "rewards/rejected": -11.123371124267578, "step": 70060 }, { "epoch": 0.84, "learning_rate": 3.8558870790279204e-07, "logits/chosen": -2.889590263366699, "logits/rejected": -1.9093048572540283, "logps/chosen": -130.1112823486328, "logps/rejected": -1014.2227783203125, "loss": 0.1466, "rewards/accuracies": 1.0, "rewards/chosen": -0.70924311876297, "rewards/margins": 9.017618179321289, "rewards/rejected": -9.726861953735352, "step": 70070 }, { "epoch": 0.84, "learning_rate": 3.8503152842905346e-07, "logits/chosen": -2.8443243503570557, "logits/rejected": -2.2962794303894043, "logps/chosen": -95.59781646728516, "logps/rejected": -927.6760864257812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4520609974861145, "rewards/margins": 8.447441101074219, "rewards/rejected": -8.899502754211426, "step": 70080 }, { "epoch": 0.84, "learning_rate": 3.8447471822293607e-07, "logits/chosen": -2.8518028259277344, "logits/rejected": -2.4045443534851074, "logps/chosen": -104.40641021728516, "logps/rejected": -884.4344482421875, "loss": 0.0861, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6138173937797546, "rewards/margins": 7.845022678375244, "rewards/rejected": -8.45883846282959, "step": 70090 }, { "epoch": 0.84, "learning_rate": 3.8391827738165717e-07, "logits/chosen": -2.8496439456939697, "logits/rejected": -2.2407925128936768, "logps/chosen": -144.80189514160156, "logps/rejected": -900.88720703125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.964934229850769, "rewards/margins": 7.6461920738220215, "rewards/rejected": -8.611126899719238, "step": 70100 }, { "epoch": 0.84, "learning_rate": 3.833622060023698e-07, "logits/chosen": -2.8717188835144043, "logits/rejected": -2.1609458923339844, "logps/chosen": -112.00439453125, "logps/rejected": -1037.76171875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6146687865257263, "rewards/margins": 9.361968040466309, "rewards/rejected": -9.97663688659668, "step": 70110 }, { "epoch": 0.84, "learning_rate": 3.8280650418216327e-07, "logits/chosen": -2.877786874771118, "logits/rejected": -2.497398853302002, "logps/chosen": -90.01026916503906, "logps/rejected": -920.4544067382812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.45560216903686523, "rewards/margins": 8.365656852722168, "rewards/rejected": -8.821258544921875, "step": 70120 }, { "epoch": 0.84, "learning_rate": 3.822511720180602e-07, "logits/chosen": -2.9087653160095215, "logits/rejected": -2.4266788959503174, "logps/chosen": -94.12083435058594, "logps/rejected": -883.4638671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.49548521637916565, "rewards/margins": 7.949976444244385, "rewards/rejected": -8.445462226867676, "step": 70130 }, { "epoch": 0.84, "learning_rate": 3.8169620960702065e-07, "logits/chosen": -2.8520667552948, "logits/rejected": -2.327327251434326, "logps/chosen": -95.0368423461914, "logps/rejected": -974.8709716796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4886016845703125, "rewards/margins": 8.871820449829102, "rewards/rejected": -9.360421180725098, "step": 70140 }, { "epoch": 0.84, "learning_rate": 3.8114161704593976e-07, "logits/chosen": -2.9201273918151855, "logits/rejected": -2.4379613399505615, "logps/chosen": -100.81105041503906, "logps/rejected": -934.0552978515625, "loss": 0.1572, "rewards/accuracies": 1.0, "rewards/chosen": -0.5436088442802429, "rewards/margins": 8.411006927490234, "rewards/rejected": -8.954615592956543, "step": 70150 }, { "epoch": 0.84, "learning_rate": 3.8058739443164737e-07, "logits/chosen": -2.8769469261169434, "logits/rejected": -2.2447609901428223, "logps/chosen": -133.8598175048828, "logps/rejected": -974.0704956054688, "loss": 0.2355, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9036208391189575, "rewards/margins": 8.446843147277832, "rewards/rejected": -9.350464820861816, "step": 70160 }, { "epoch": 0.84, "learning_rate": 3.800335418609097e-07, "logits/chosen": -2.857006072998047, "logits/rejected": -2.1955699920654297, "logps/chosen": -116.87733459472656, "logps/rejected": -965.0852661132812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6644631624221802, "rewards/margins": 8.582536697387695, "rewards/rejected": -9.246999740600586, "step": 70170 }, { "epoch": 0.84, "learning_rate": 3.794800594304282e-07, "logits/chosen": -2.9217429161071777, "logits/rejected": -2.291250705718994, "logps/chosen": -120.45343017578125, "logps/rejected": -1036.8189697265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6826203465461731, "rewards/margins": 9.275800704956055, "rewards/rejected": -9.958422660827637, "step": 70180 }, { "epoch": 0.84, "learning_rate": 3.789269472368384e-07, "logits/chosen": -2.8236842155456543, "logits/rejected": -2.4687819480895996, "logps/chosen": -82.41716003417969, "logps/rejected": -745.19482421875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.44895458221435547, "rewards/margins": 6.6433563232421875, "rewards/rejected": -7.092310905456543, "step": 70190 }, { "epoch": 0.84, "learning_rate": 3.7837420537671253e-07, "logits/chosen": -2.937870502471924, "logits/rejected": -2.3772153854370117, "logps/chosen": -113.89395904541016, "logps/rejected": -985.5616455078125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6315978765487671, "rewards/margins": 8.830270767211914, "rewards/rejected": -9.461868286132812, "step": 70200 }, { "epoch": 0.84, "learning_rate": 3.778218339465578e-07, "logits/chosen": -2.861504316329956, "logits/rejected": -2.2642693519592285, "logps/chosen": -101.28779602050781, "logps/rejected": -935.5490112304688, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5275372862815857, "rewards/margins": 8.44422435760498, "rewards/rejected": -8.971761703491211, "step": 70210 }, { "epoch": 0.84, "learning_rate": 3.7726983304281653e-07, "logits/chosen": -2.854588270187378, "logits/rejected": -2.3472886085510254, "logps/chosen": -92.87020111083984, "logps/rejected": -877.5827026367188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.47511744499206543, "rewards/margins": 7.914811134338379, "rewards/rejected": -8.389927864074707, "step": 70220 }, { "epoch": 0.84, "learning_rate": 3.7671820276186653e-07, "logits/chosen": -2.928316116333008, "logits/rejected": -2.258927822113037, "logps/chosen": -104.2401123046875, "logps/rejected": -980.3972778320312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5964659452438354, "rewards/margins": 8.813376426696777, "rewards/rejected": -9.409842491149902, "step": 70230 }, { "epoch": 0.84, "learning_rate": 3.76166943200022e-07, "logits/chosen": -2.8543035984039307, "logits/rejected": -2.140033006668091, "logps/chosen": -129.64883422851562, "logps/rejected": -1021.2657470703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7576670050621033, "rewards/margins": 9.05136775970459, "rewards/rejected": -9.809035301208496, "step": 70240 }, { "epoch": 0.84, "learning_rate": 3.7561605445353035e-07, "logits/chosen": -2.8950066566467285, "logits/rejected": -2.3953981399536133, "logps/chosen": -81.65180969238281, "logps/rejected": -874.9708862304688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3772640824317932, "rewards/margins": 7.999286651611328, "rewards/rejected": -8.376550674438477, "step": 70250 }, { "epoch": 0.84, "learning_rate": 3.7506553661857486e-07, "logits/chosen": -2.847470283508301, "logits/rejected": -2.255283832550049, "logps/chosen": -111.22074890136719, "logps/rejected": -944.7478637695312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6094983816146851, "rewards/margins": 8.448464393615723, "rewards/rejected": -9.057963371276855, "step": 70260 }, { "epoch": 0.84, "learning_rate": 3.7451538979127536e-07, "logits/chosen": -2.886845588684082, "logits/rejected": -2.342236042022705, "logps/chosen": -107.50856018066406, "logps/rejected": -924.0316162109375, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -0.6095012426376343, "rewards/margins": 8.225534439086914, "rewards/rejected": -8.83503532409668, "step": 70270 }, { "epoch": 0.84, "learning_rate": 3.739656140676853e-07, "logits/chosen": -2.9030649662017822, "logits/rejected": -2.414665699005127, "logps/chosen": -110.7298812866211, "logps/rejected": -931.8367309570312, "loss": 0.084, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.628788948059082, "rewards/margins": 8.28808879852295, "rewards/rejected": -8.916879653930664, "step": 70280 }, { "epoch": 0.84, "learning_rate": 3.7341620954379434e-07, "logits/chosen": -2.846834182739258, "logits/rejected": -2.3632726669311523, "logps/chosen": -89.89527893066406, "logps/rejected": -901.1574096679688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4573475420475006, "rewards/margins": 8.16700267791748, "rewards/rejected": -8.624350547790527, "step": 70290 }, { "epoch": 0.84, "learning_rate": 3.728671763155273e-07, "logits/chosen": -2.910334348678589, "logits/rejected": -2.6177945137023926, "logps/chosen": -92.61808776855469, "logps/rejected": -730.5219116210938, "loss": 0.1716, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5014069676399231, "rewards/margins": 6.441407680511475, "rewards/rejected": -6.94281530380249, "step": 70300 }, { "epoch": 0.84, "learning_rate": 3.723185144787428e-07, "logits/chosen": -2.876616954803467, "logits/rejected": -2.5095951557159424, "logps/chosen": -102.52437591552734, "logps/rejected": -867.9089965820312, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.5908258557319641, "rewards/margins": 7.704161167144775, "rewards/rejected": -8.294986724853516, "step": 70310 }, { "epoch": 0.84, "learning_rate": 3.717702241292365e-07, "logits/chosen": -2.9070215225219727, "logits/rejected": -2.202314615249634, "logps/chosen": -134.6937713623047, "logps/rejected": -1056.845947265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7672552466392517, "rewards/margins": 9.384943008422852, "rewards/rejected": -10.152198791503906, "step": 70320 }, { "epoch": 0.84, "learning_rate": 3.712223053627381e-07, "logits/chosen": -2.897024631500244, "logits/rejected": -2.4373559951782227, "logps/chosen": -84.99064636230469, "logps/rejected": -876.1272583007812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.42875251173973083, "rewards/margins": 7.972177028656006, "rewards/rejected": -8.400930404663086, "step": 70330 }, { "epoch": 0.84, "learning_rate": 3.706747582749126e-07, "logits/chosen": -2.8874123096466064, "logits/rejected": -2.491990327835083, "logps/chosen": -90.56631469726562, "logps/rejected": -879.0601806640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4852883815765381, "rewards/margins": 7.915633201599121, "rewards/rejected": -8.400920867919922, "step": 70340 }, { "epoch": 0.84, "learning_rate": 3.7012758296136043e-07, "logits/chosen": -2.8771066665649414, "logits/rejected": -2.268331289291382, "logps/chosen": -141.90235900878906, "logps/rejected": -1020.03662109375, "loss": 0.1633, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9527098536491394, "rewards/margins": 8.852097511291504, "rewards/rejected": -9.80480670928955, "step": 70350 }, { "epoch": 0.84, "learning_rate": 3.6958077951761704e-07, "logits/chosen": -2.920264720916748, "logits/rejected": -2.2008590698242188, "logps/chosen": -116.85142517089844, "logps/rejected": -1032.715576171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6447175741195679, "rewards/margins": 9.280047416687012, "rewards/rejected": -9.924764633178711, "step": 70360 }, { "epoch": 0.84, "learning_rate": 3.6903434803915284e-07, "logits/chosen": -2.8892016410827637, "logits/rejected": -2.250300168991089, "logps/chosen": -106.92506408691406, "logps/rejected": -1005.3893432617188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5743086338043213, "rewards/margins": 9.075898170471191, "rewards/rejected": -9.65020751953125, "step": 70370 }, { "epoch": 0.84, "learning_rate": 3.6848828862137254e-07, "logits/chosen": -2.842766523361206, "logits/rejected": -2.0259759426116943, "logps/chosen": -154.0604248046875, "logps/rejected": -1069.74658203125, "loss": 0.1385, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9914697408676147, "rewards/margins": 9.302675247192383, "rewards/rejected": -10.294144630432129, "step": 70380 }, { "epoch": 0.84, "learning_rate": 3.6794260135961685e-07, "logits/chosen": -2.9243922233581543, "logits/rejected": -2.4111266136169434, "logps/chosen": -95.32247924804688, "logps/rejected": -854.3225708007812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.492499440908432, "rewards/margins": 7.678572177886963, "rewards/rejected": -8.171072006225586, "step": 70390 }, { "epoch": 0.84, "learning_rate": 3.673972863491618e-07, "logits/chosen": -2.8656668663024902, "logits/rejected": -2.192551374435425, "logps/chosen": -122.14329528808594, "logps/rejected": -977.9515380859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6909151673316956, "rewards/margins": 8.688549041748047, "rewards/rejected": -9.379464149475098, "step": 70400 }, { "epoch": 0.84, "learning_rate": 3.66852343685217e-07, "logits/chosen": -2.8949742317199707, "logits/rejected": -2.3200972080230713, "logps/chosen": -138.88722229003906, "logps/rejected": -970.9638671875, "loss": 0.0935, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9525511860847473, "rewards/margins": 8.366647720336914, "rewards/rejected": -9.319199562072754, "step": 70410 }, { "epoch": 0.84, "learning_rate": 3.663077734629289e-07, "logits/chosen": -2.8643221855163574, "logits/rejected": -1.9676882028579712, "logps/chosen": -142.57089233398438, "logps/rejected": -1165.5042724609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8090858459472656, "rewards/margins": 10.420183181762695, "rewards/rejected": -11.229269027709961, "step": 70420 }, { "epoch": 0.84, "learning_rate": 3.6576357577737726e-07, "logits/chosen": -2.9105923175811768, "logits/rejected": -2.2706212997436523, "logps/chosen": -112.73158264160156, "logps/rejected": -979.04638671875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6033774614334106, "rewards/margins": 8.786080360412598, "rewards/rejected": -9.389456748962402, "step": 70430 }, { "epoch": 0.84, "learning_rate": 3.652197507235783e-07, "logits/chosen": -2.8541810512542725, "logits/rejected": -2.3336246013641357, "logps/chosen": -94.65678405761719, "logps/rejected": -880.3123779296875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.49425262212753296, "rewards/margins": 7.923532009124756, "rewards/rejected": -8.417784690856934, "step": 70440 }, { "epoch": 0.84, "learning_rate": 3.6467629839648106e-07, "logits/chosen": -2.9340925216674805, "logits/rejected": -2.320796489715576, "logps/chosen": -111.13275146484375, "logps/rejected": -926.5032958984375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.6119588017463684, "rewards/margins": 8.276918411254883, "rewards/rejected": -8.888876914978027, "step": 70450 }, { "epoch": 0.84, "learning_rate": 3.641332188909716e-07, "logits/chosen": -2.8529653549194336, "logits/rejected": -2.2659084796905518, "logps/chosen": -129.16180419921875, "logps/rejected": -954.6962890625, "loss": 0.11, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8077186346054077, "rewards/margins": 8.347833633422852, "rewards/rejected": -9.155552864074707, "step": 70460 }, { "epoch": 0.84, "learning_rate": 3.6359051230187015e-07, "logits/chosen": -2.8792812824249268, "logits/rejected": -2.3105504512786865, "logps/chosen": -108.53013610839844, "logps/rejected": -916.8634643554688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6100622415542603, "rewards/margins": 8.162275314331055, "rewards/rejected": -8.772337913513184, "step": 70470 }, { "epoch": 0.84, "learning_rate": 3.630481787239315e-07, "logits/chosen": -2.8917877674102783, "logits/rejected": -2.097045660018921, "logps/chosen": -113.0946044921875, "logps/rejected": -1016.3187255859375, "loss": 0.1508, "rewards/accuracies": 1.0, "rewards/chosen": -0.6247411966323853, "rewards/margins": 9.137250900268555, "rewards/rejected": -9.761991500854492, "step": 70480 }, { "epoch": 0.84, "learning_rate": 3.6250621825184574e-07, "logits/chosen": -2.8654284477233887, "logits/rejected": -2.177004814147949, "logps/chosen": -106.22224426269531, "logps/rejected": -999.1697998046875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.59874427318573, "rewards/margins": 9.017213821411133, "rewards/rejected": -9.615958213806152, "step": 70490 }, { "epoch": 0.84, "learning_rate": 3.61964630980238e-07, "logits/chosen": -2.921330213546753, "logits/rejected": -2.3823142051696777, "logps/chosen": -107.26883697509766, "logps/rejected": -976.9111328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5905518531799316, "rewards/margins": 8.778725624084473, "rewards/rejected": -9.369277954101562, "step": 70500 }, { "epoch": 0.84, "learning_rate": 3.6142341700366736e-07, "logits/chosen": -2.890251874923706, "logits/rejected": -2.313123941421509, "logps/chosen": -104.25433349609375, "logps/rejected": -1006.9373168945312, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.5159545540809631, "rewards/margins": 9.161480903625488, "rewards/rejected": -9.677434921264648, "step": 70510 }, { "epoch": 0.84, "learning_rate": 3.608825764166282e-07, "logits/chosen": -2.893775224685669, "logits/rejected": -2.477267026901245, "logps/chosen": -112.14683532714844, "logps/rejected": -863.6357421875, "loss": 0.084, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7271841168403625, "rewards/margins": 7.526309967041016, "rewards/rejected": -8.253494262695312, "step": 70520 }, { "epoch": 0.84, "learning_rate": 3.603421093135501e-07, "logits/chosen": -2.932370901107788, "logits/rejected": -2.370832681655884, "logps/chosen": -96.89358520507812, "logps/rejected": -936.9703369140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.47310155630111694, "rewards/margins": 8.507596015930176, "rewards/rejected": -8.980697631835938, "step": 70530 }, { "epoch": 0.84, "learning_rate": 3.5980201578879714e-07, "logits/chosen": -2.8537392616271973, "logits/rejected": -2.2719624042510986, "logps/chosen": -131.8360595703125, "logps/rejected": -923.9114379882812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7766309380531311, "rewards/margins": 8.068180084228516, "rewards/rejected": -8.844810485839844, "step": 70540 }, { "epoch": 0.84, "learning_rate": 3.592622959366679e-07, "logits/chosen": -2.8870866298675537, "logits/rejected": -2.379246473312378, "logps/chosen": -95.05790710449219, "logps/rejected": -884.5490112304688, "loss": 0.0757, "rewards/accuracies": 1.0, "rewards/chosen": -0.5140591859817505, "rewards/margins": 7.956063747406006, "rewards/rejected": -8.470123291015625, "step": 70550 }, { "epoch": 0.84, "learning_rate": 3.5872294985139636e-07, "logits/chosen": -2.891263484954834, "logits/rejected": -2.381068468093872, "logps/chosen": -102.96402740478516, "logps/rejected": -911.380859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6119597554206848, "rewards/margins": 8.107186317443848, "rewards/rejected": -8.719144821166992, "step": 70560 }, { "epoch": 0.84, "learning_rate": 3.5818397762715116e-07, "logits/chosen": -2.875332832336426, "logits/rejected": -2.509612560272217, "logps/chosen": -77.67527770996094, "logps/rejected": -831.1434326171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.36681056022644043, "rewards/margins": 7.568826198577881, "rewards/rejected": -7.9356369972229, "step": 70570 }, { "epoch": 0.84, "learning_rate": 3.576453793580342e-07, "logits/chosen": -2.8697783946990967, "logits/rejected": -2.0159342288970947, "logps/chosen": -136.09378051757812, "logps/rejected": -1167.1744384765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7785779237747192, "rewards/margins": 10.496150970458984, "rewards/rejected": -11.27472972869873, "step": 70580 }, { "epoch": 0.84, "learning_rate": 3.571071551380839e-07, "logits/chosen": -2.847426652908325, "logits/rejected": -2.274155855178833, "logps/chosen": -108.96537780761719, "logps/rejected": -1010.1857299804688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6512845158576965, "rewards/margins": 9.072702407836914, "rewards/rejected": -9.723986625671387, "step": 70590 }, { "epoch": 0.85, "learning_rate": 3.5656930506127287e-07, "logits/chosen": -2.8827977180480957, "logits/rejected": -2.3758881092071533, "logps/chosen": -97.71431732177734, "logps/rejected": -871.8858642578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.55070960521698, "rewards/margins": 7.788405418395996, "rewards/rejected": -8.339115142822266, "step": 70600 }, { "epoch": 0.85, "learning_rate": 3.5603182922150786e-07, "logits/chosen": -2.8661811351776123, "logits/rejected": -2.4418256282806396, "logps/chosen": -104.27099609375, "logps/rejected": -893.6622314453125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5916503667831421, "rewards/margins": 7.965732574462891, "rewards/rejected": -8.55738353729248, "step": 70610 }, { "epoch": 0.85, "learning_rate": 3.5549472771263085e-07, "logits/chosen": -2.864584445953369, "logits/rejected": -2.4225587844848633, "logps/chosen": -115.65287780761719, "logps/rejected": -937.6932373046875, "loss": 0.0262, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7226029634475708, "rewards/margins": 8.269820213317871, "rewards/rejected": -8.992423057556152, "step": 70620 }, { "epoch": 0.85, "learning_rate": 3.5495800062841813e-07, "logits/chosen": -2.904874086380005, "logits/rejected": -2.1586270332336426, "logps/chosen": -127.04183197021484, "logps/rejected": -980.26513671875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6801787614822388, "rewards/margins": 8.724570274353027, "rewards/rejected": -9.404748916625977, "step": 70630 }, { "epoch": 0.85, "learning_rate": 3.544216480625817e-07, "logits/chosen": -2.8910911083221436, "logits/rejected": -2.5198569297790527, "logps/chosen": -80.1303939819336, "logps/rejected": -907.3291015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3814239203929901, "rewards/margins": 8.313998222351074, "rewards/rejected": -8.695423126220703, "step": 70640 }, { "epoch": 0.85, "learning_rate": 3.538856701087656e-07, "logits/chosen": -2.910890817642212, "logits/rejected": -2.6060357093811035, "logps/chosen": -70.6723861694336, "logps/rejected": -776.8209228515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3217741847038269, "rewards/margins": 7.072686195373535, "rewards/rejected": -7.394460201263428, "step": 70650 }, { "epoch": 0.85, "learning_rate": 3.5335006686055053e-07, "logits/chosen": -2.8910913467407227, "logits/rejected": -2.5556845664978027, "logps/chosen": -113.4961166381836, "logps/rejected": -824.9851684570312, "loss": 0.1508, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7382300496101379, "rewards/margins": 7.143441677093506, "rewards/rejected": -7.8816728591918945, "step": 70660 }, { "epoch": 0.85, "learning_rate": 3.528148384114524e-07, "logits/chosen": -2.8768646717071533, "logits/rejected": -2.514223575592041, "logps/chosen": -80.79374694824219, "logps/rejected": -839.9208984375, "loss": 0.0945, "rewards/accuracies": 1.0, "rewards/chosen": -0.393049418926239, "rewards/margins": 7.6289496421813965, "rewards/rejected": -8.02199935913086, "step": 70670 }, { "epoch": 0.85, "learning_rate": 3.522799848549199e-07, "logits/chosen": -2.8814339637756348, "logits/rejected": -2.533141851425171, "logps/chosen": -88.7564697265625, "logps/rejected": -824.3349609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4284074306488037, "rewards/margins": 7.436948299407959, "rewards/rejected": -7.865355491638184, "step": 70680 }, { "epoch": 0.85, "learning_rate": 3.5174550628433754e-07, "logits/chosen": -2.8889451026916504, "logits/rejected": -2.5488173961639404, "logps/chosen": -107.11091613769531, "logps/rejected": -921.21728515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6317709684371948, "rewards/margins": 8.178866386413574, "rewards/rejected": -8.810637474060059, "step": 70690 }, { "epoch": 0.85, "learning_rate": 3.5121140279302246e-07, "logits/chosen": -2.866309404373169, "logits/rejected": -2.437727451324463, "logps/chosen": -76.23143005371094, "logps/rejected": -851.4912109375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.36909550428390503, "rewards/margins": 7.786130428314209, "rewards/rejected": -8.155226707458496, "step": 70700 }, { "epoch": 0.85, "learning_rate": 3.506776744742288e-07, "logits/chosen": -2.851257085800171, "logits/rejected": -2.47345232963562, "logps/chosen": -88.34500885009766, "logps/rejected": -893.5564575195312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4439353346824646, "rewards/margins": 8.112146377563477, "rewards/rejected": -8.556081771850586, "step": 70710 }, { "epoch": 0.85, "learning_rate": 3.5014432142114334e-07, "logits/chosen": -2.918602228164673, "logits/rejected": -2.344177722930908, "logps/chosen": -114.2546157836914, "logps/rejected": -831.5739135742188, "loss": 0.0977, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6641315221786499, "rewards/margins": 7.257961273193359, "rewards/rejected": -7.922092437744141, "step": 70720 }, { "epoch": 0.85, "learning_rate": 3.496113437268886e-07, "logits/chosen": -2.8496155738830566, "logits/rejected": -2.276994228363037, "logps/chosen": -106.1871109008789, "logps/rejected": -992.1066284179688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6275933980941772, "rewards/margins": 8.891469955444336, "rewards/rejected": -9.519063949584961, "step": 70730 }, { "epoch": 0.85, "learning_rate": 3.4907874148452094e-07, "logits/chosen": -2.9022018909454346, "logits/rejected": -2.437302827835083, "logps/chosen": -95.50322723388672, "logps/rejected": -815.1441650390625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5376803278923035, "rewards/margins": 7.243321895599365, "rewards/rejected": -7.781002044677734, "step": 70740 }, { "epoch": 0.85, "learning_rate": 3.485465147870309e-07, "logits/chosen": -2.8423190116882324, "logits/rejected": -2.2469093799591064, "logps/chosen": -106.32652282714844, "logps/rejected": -943.6605224609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5548759698867798, "rewards/margins": 8.495462417602539, "rewards/rejected": -9.050336837768555, "step": 70750 }, { "epoch": 0.85, "learning_rate": 3.4801466372734463e-07, "logits/chosen": -2.880283832550049, "logits/rejected": -2.334228038787842, "logps/chosen": -95.64557647705078, "logps/rejected": -925.1614990234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5186771750450134, "rewards/margins": 8.34582233428955, "rewards/rejected": -8.86449909210205, "step": 70760 }, { "epoch": 0.85, "learning_rate": 3.4748318839832043e-07, "logits/chosen": -2.899399757385254, "logits/rejected": -2.2817327976226807, "logps/chosen": -114.6002197265625, "logps/rejected": -996.5205078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6631354093551636, "rewards/margins": 8.900550842285156, "rewards/rejected": -9.56368637084961, "step": 70770 }, { "epoch": 0.85, "learning_rate": 3.4695208889275327e-07, "logits/chosen": -2.881117105484009, "logits/rejected": -2.3442490100860596, "logps/chosen": -98.99974822998047, "logps/rejected": -909.51220703125, "loss": 0.0818, "rewards/accuracies": 1.0, "rewards/chosen": -0.5280276536941528, "rewards/margins": 8.198445320129395, "rewards/rejected": -8.726472854614258, "step": 70780 }, { "epoch": 0.85, "learning_rate": 3.4642136530337155e-07, "logits/chosen": -2.8758294582366943, "logits/rejected": -2.336630344390869, "logps/chosen": -96.87366485595703, "logps/rejected": -883.6329956054688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5271893739700317, "rewards/margins": 7.9342756271362305, "rewards/rejected": -8.461465835571289, "step": 70790 }, { "epoch": 0.85, "learning_rate": 3.4589101772283835e-07, "logits/chosen": -2.89540958404541, "logits/rejected": -2.324957847595215, "logps/chosen": -118.5957260131836, "logps/rejected": -941.376953125, "loss": 0.1143, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7518479824066162, "rewards/margins": 8.260533332824707, "rewards/rejected": -9.012380599975586, "step": 70800 }, { "epoch": 0.85, "learning_rate": 3.453610462437504e-07, "logits/chosen": -2.872389554977417, "logits/rejected": -2.0757555961608887, "logps/chosen": -128.21290588378906, "logps/rejected": -1001.0983276367188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7429514527320862, "rewards/margins": 8.863210678100586, "rewards/rejected": -9.606162071228027, "step": 70810 }, { "epoch": 0.85, "learning_rate": 3.448314509586401e-07, "logits/chosen": -2.862761974334717, "logits/rejected": -2.3970372676849365, "logps/chosen": -104.11827087402344, "logps/rejected": -984.3438720703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6268336176872253, "rewards/margins": 8.821246147155762, "rewards/rejected": -9.448080062866211, "step": 70820 }, { "epoch": 0.85, "learning_rate": 3.443022319599723e-07, "logits/chosen": -2.9453349113464355, "logits/rejected": -2.528613805770874, "logps/chosen": -102.36610412597656, "logps/rejected": -951.3681640625, "loss": 0.0793, "rewards/accuracies": 1.0, "rewards/chosen": -0.581012487411499, "rewards/margins": 8.541865348815918, "rewards/rejected": -9.122878074645996, "step": 70830 }, { "epoch": 0.85, "learning_rate": 3.4377338934014765e-07, "logits/chosen": -2.909804105758667, "logits/rejected": -2.395073413848877, "logps/chosen": -120.05766296386719, "logps/rejected": -918.9210815429688, "loss": 0.0788, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7273600697517395, "rewards/margins": 8.071891784667969, "rewards/rejected": -8.799250602722168, "step": 70840 }, { "epoch": 0.85, "learning_rate": 3.4324492319150056e-07, "logits/chosen": -2.8729848861694336, "logits/rejected": -2.495387315750122, "logps/chosen": -90.27096557617188, "logps/rejected": -841.0245971679688, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4493246078491211, "rewards/margins": 7.58843994140625, "rewards/rejected": -8.037765502929688, "step": 70850 }, { "epoch": 0.85, "learning_rate": 3.4271683360629945e-07, "logits/chosen": -2.9014573097229004, "logits/rejected": -2.0849428176879883, "logps/chosen": -158.5771942138672, "logps/rejected": -1102.078369140625, "loss": 0.0942, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9599162936210632, "rewards/margins": 9.64659309387207, "rewards/rejected": -10.6065092086792, "step": 70860 }, { "epoch": 0.85, "learning_rate": 3.4218912067674804e-07, "logits/chosen": -2.872396469116211, "logits/rejected": -2.2171242237091064, "logps/chosen": -124.05421447753906, "logps/rejected": -1018.7990112304688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6495317816734314, "rewards/margins": 9.129648208618164, "rewards/rejected": -9.779179573059082, "step": 70870 }, { "epoch": 0.85, "learning_rate": 3.4166178449498267e-07, "logits/chosen": -2.8588976860046387, "logits/rejected": -2.3195252418518066, "logps/chosen": -104.00172424316406, "logps/rejected": -876.1905517578125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.6023426055908203, "rewards/margins": 7.7760419845581055, "rewards/rejected": -8.37838363647461, "step": 70880 }, { "epoch": 0.85, "learning_rate": 3.411348251530761e-07, "logits/chosen": -2.8873589038848877, "logits/rejected": -2.2738747596740723, "logps/chosen": -96.9563217163086, "logps/rejected": -964.3062744140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.49851685762405396, "rewards/margins": 8.734932899475098, "rewards/rejected": -9.233448028564453, "step": 70890 }, { "epoch": 0.85, "learning_rate": 3.406082427430324e-07, "logits/chosen": -2.915146827697754, "logits/rejected": -2.4123640060424805, "logps/chosen": -88.42681884765625, "logps/rejected": -939.2947387695312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.42921900749206543, "rewards/margins": 8.573019027709961, "rewards/rejected": -9.002237319946289, "step": 70900 }, { "epoch": 0.85, "learning_rate": 3.40082037356792e-07, "logits/chosen": -2.8660988807678223, "logits/rejected": -2.541707992553711, "logps/chosen": -81.32215881347656, "logps/rejected": -865.6912231445312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.42793789505958557, "rewards/margins": 7.851849555969238, "rewards/rejected": -8.279787063598633, "step": 70910 }, { "epoch": 0.85, "learning_rate": 3.395562090862289e-07, "logits/chosen": -2.894883632659912, "logits/rejected": -2.590879440307617, "logps/chosen": -76.95899963378906, "logps/rejected": -835.38427734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.35002806782722473, "rewards/margins": 7.607822418212891, "rewards/rejected": -7.957849979400635, "step": 70920 }, { "epoch": 0.85, "learning_rate": 3.390307580231514e-07, "logits/chosen": -2.910261631011963, "logits/rejected": -2.5427308082580566, "logps/chosen": -91.94046020507812, "logps/rejected": -837.03759765625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5144695043563843, "rewards/margins": 7.466551303863525, "rewards/rejected": -7.981019496917725, "step": 70930 }, { "epoch": 0.85, "learning_rate": 3.385056842593018e-07, "logits/chosen": -2.8380990028381348, "logits/rejected": -2.1816792488098145, "logps/chosen": -122.9380874633789, "logps/rejected": -1010.0563354492188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.740092396736145, "rewards/margins": 8.964441299438477, "rewards/rejected": -9.704533576965332, "step": 70940 }, { "epoch": 0.85, "learning_rate": 3.379809878863563e-07, "logits/chosen": -2.9129984378814697, "logits/rejected": -2.4628398418426514, "logps/chosen": -99.5543212890625, "logps/rejected": -892.8273315429688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5074622631072998, "rewards/margins": 8.03050422668457, "rewards/rejected": -8.537965774536133, "step": 70950 }, { "epoch": 0.85, "learning_rate": 3.3745666899592603e-07, "logits/chosen": -2.9368488788604736, "logits/rejected": -2.294959545135498, "logps/chosen": -105.5101318359375, "logps/rejected": -974.3414306640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5684439539909363, "rewards/margins": 8.774176597595215, "rewards/rejected": -9.342620849609375, "step": 70960 }, { "epoch": 0.85, "learning_rate": 3.3693272767955477e-07, "logits/chosen": -2.8457767963409424, "logits/rejected": -2.3954765796661377, "logps/chosen": -118.58329772949219, "logps/rejected": -980.9801025390625, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.6871899366378784, "rewards/margins": 8.745574951171875, "rewards/rejected": -9.432765007019043, "step": 70970 }, { "epoch": 0.85, "learning_rate": 3.3640916402872174e-07, "logits/chosen": -2.9610483646392822, "logits/rejected": -2.5637593269348145, "logps/chosen": -84.59620666503906, "logps/rejected": -894.9840087890625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.37802380323410034, "rewards/margins": 8.179460525512695, "rewards/rejected": -8.55748462677002, "step": 70980 }, { "epoch": 0.85, "learning_rate": 3.358859781348392e-07, "logits/chosen": -2.8619863986968994, "logits/rejected": -2.2611851692199707, "logps/chosen": -111.376220703125, "logps/rejected": -916.9967651367188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5542870163917542, "rewards/margins": 8.230168342590332, "rewards/rejected": -8.784455299377441, "step": 70990 }, { "epoch": 0.85, "learning_rate": 3.353631700892546e-07, "logits/chosen": -2.8960297107696533, "logits/rejected": -2.4925577640533447, "logps/chosen": -105.09992980957031, "logps/rejected": -899.0220947265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6156030893325806, "rewards/margins": 7.976208686828613, "rewards/rejected": -8.591812133789062, "step": 71000 }, { "epoch": 0.85, "learning_rate": 3.348407399832485e-07, "logits/chosen": -2.8982322216033936, "logits/rejected": -2.156846284866333, "logps/chosen": -113.81864929199219, "logps/rejected": -1064.234619140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5588816404342651, "rewards/margins": 9.678323745727539, "rewards/rejected": -10.237203598022461, "step": 71010 }, { "epoch": 0.85, "learning_rate": 3.3431868790803594e-07, "logits/chosen": -2.896470546722412, "logits/rejected": -2.4502367973327637, "logps/chosen": -115.41668701171875, "logps/rejected": -891.3798828125, "loss": 0.0686, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6918801069259644, "rewards/margins": 7.840163230895996, "rewards/rejected": -8.532044410705566, "step": 71020 }, { "epoch": 0.85, "learning_rate": 3.3379701395476527e-07, "logits/chosen": -2.8829495906829834, "logits/rejected": -2.13145112991333, "logps/chosen": -111.98930358886719, "logps/rejected": -997.4659423828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5990459322929382, "rewards/margins": 8.97626781463623, "rewards/rejected": -9.575313568115234, "step": 71030 }, { "epoch": 0.85, "learning_rate": 3.332757182145196e-07, "logits/chosen": -2.880298614501953, "logits/rejected": -2.3392462730407715, "logps/chosen": -125.7509994506836, "logps/rejected": -984.2483520507812, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -0.7523893117904663, "rewards/margins": 8.698003768920898, "rewards/rejected": -9.450392723083496, "step": 71040 }, { "epoch": 0.85, "learning_rate": 3.3275480077831545e-07, "logits/chosen": -2.8444480895996094, "logits/rejected": -2.2884535789489746, "logps/chosen": -144.1716766357422, "logps/rejected": -920.5670776367188, "loss": 0.0969, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9499089121818542, "rewards/margins": 7.873663902282715, "rewards/rejected": -8.823573112487793, "step": 71050 }, { "epoch": 0.85, "learning_rate": 3.322342617371041e-07, "logits/chosen": -2.9076857566833496, "logits/rejected": -2.366664409637451, "logps/chosen": -88.98658752441406, "logps/rejected": -883.6389770507812, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.42056283354759216, "rewards/margins": 8.025080680847168, "rewards/rejected": -8.445642471313477, "step": 71060 }, { "epoch": 0.85, "learning_rate": 3.3171410118176983e-07, "logits/chosen": -2.9616432189941406, "logits/rejected": -2.4158577919006348, "logps/chosen": -104.5760726928711, "logps/rejected": -962.1306762695312, "loss": 0.1084, "rewards/accuracies": 1.0, "rewards/chosen": -0.5452448129653931, "rewards/margins": 8.67810344696045, "rewards/rejected": -9.223347663879395, "step": 71070 }, { "epoch": 0.85, "learning_rate": 3.311943192031314e-07, "logits/chosen": -2.8922481536865234, "logits/rejected": -2.2588133811950684, "logps/chosen": -120.3166732788086, "logps/rejected": -981.9158935546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6557660698890686, "rewards/margins": 8.765803337097168, "rewards/rejected": -9.42156982421875, "step": 71080 }, { "epoch": 0.85, "learning_rate": 3.306749158919412e-07, "logits/chosen": -2.9160258769989014, "logits/rejected": -2.0835208892822266, "logps/chosen": -137.7499237060547, "logps/rejected": -985.4143676757812, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.83379065990448, "rewards/margins": 8.611050605773926, "rewards/rejected": -9.444841384887695, "step": 71090 }, { "epoch": 0.85, "learning_rate": 3.3015589133888556e-07, "logits/chosen": -2.892580986022949, "logits/rejected": -2.400078535079956, "logps/chosen": -95.17420959472656, "logps/rejected": -943.1189575195312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4965699315071106, "rewards/margins": 8.52479362487793, "rewards/rejected": -9.021364212036133, "step": 71100 }, { "epoch": 0.85, "learning_rate": 3.296372456345848e-07, "logits/chosen": -2.918670654296875, "logits/rejected": -2.251971483230591, "logps/chosen": -107.3531723022461, "logps/rejected": -977.6316528320312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5715275406837463, "rewards/margins": 8.818194389343262, "rewards/rejected": -9.38972282409668, "step": 71110 }, { "epoch": 0.85, "learning_rate": 3.291189788695931e-07, "logits/chosen": -2.8954362869262695, "logits/rejected": -2.5022339820861816, "logps/chosen": -78.43785095214844, "logps/rejected": -879.3919677734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.37815865874290466, "rewards/margins": 8.045210838317871, "rewards/rejected": -8.423369407653809, "step": 71120 }, { "epoch": 0.85, "learning_rate": 3.286010911343984e-07, "logits/chosen": -2.8906118869781494, "logits/rejected": -2.163618564605713, "logps/chosen": -111.59449768066406, "logps/rejected": -1032.326904296875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6031821966171265, "rewards/margins": 9.320194244384766, "rewards/rejected": -9.923376083374023, "step": 71130 }, { "epoch": 0.85, "learning_rate": 3.2808358251942287e-07, "logits/chosen": -2.885627508163452, "logits/rejected": -2.3684258460998535, "logps/chosen": -104.04327392578125, "logps/rejected": -937.2781372070312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5684815645217896, "rewards/margins": 8.398866653442383, "rewards/rejected": -8.967348098754883, "step": 71140 }, { "epoch": 0.85, "learning_rate": 3.2756645311502105e-07, "logits/chosen": -2.8313145637512207, "logits/rejected": -2.222294807434082, "logps/chosen": -138.94094848632812, "logps/rejected": -1002.1569213867188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.898465633392334, "rewards/margins": 8.724684715270996, "rewards/rejected": -9.623150825500488, "step": 71150 }, { "epoch": 0.85, "learning_rate": 3.2704970301148274e-07, "logits/chosen": -2.8464853763580322, "logits/rejected": -2.114331007003784, "logps/chosen": -114.50929260253906, "logps/rejected": -980.4631958007812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6433672904968262, "rewards/margins": 8.779036521911621, "rewards/rejected": -9.422403335571289, "step": 71160 }, { "epoch": 0.85, "learning_rate": 3.2653333229903134e-07, "logits/chosen": -2.855896472930908, "logits/rejected": -2.02958345413208, "logps/chosen": -148.08187866210938, "logps/rejected": -1059.245361328125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8674377202987671, "rewards/margins": 9.303990364074707, "rewards/rejected": -10.171427726745605, "step": 71170 }, { "epoch": 0.85, "learning_rate": 3.2601734106782356e-07, "logits/chosen": -2.9135518074035645, "logits/rejected": -2.5257253646850586, "logps/chosen": -95.32005310058594, "logps/rejected": -916.63427734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.503905713558197, "rewards/margins": 8.281478881835938, "rewards/rejected": -8.785384178161621, "step": 71180 }, { "epoch": 0.85, "learning_rate": 3.255017294079499e-07, "logits/chosen": -2.8702800273895264, "logits/rejected": -2.2953343391418457, "logps/chosen": -115.04390716552734, "logps/rejected": -891.5863037109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6421456336975098, "rewards/margins": 7.889391899108887, "rewards/rejected": -8.531537055969238, "step": 71190 }, { "epoch": 0.85, "learning_rate": 3.249864974094349e-07, "logits/chosen": -2.8932366371154785, "logits/rejected": -2.3753981590270996, "logps/chosen": -110.99105072021484, "logps/rejected": -861.4080200195312, "loss": 0.0897, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6691402196884155, "rewards/margins": 7.563159942626953, "rewards/rejected": -8.2322998046875, "step": 71200 }, { "epoch": 0.85, "learning_rate": 3.2447164516223733e-07, "logits/chosen": -2.915118455886841, "logits/rejected": -2.46075439453125, "logps/chosen": -84.56657409667969, "logps/rejected": -887.7111206054688, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.42871466279029846, "rewards/margins": 8.062275886535645, "rewards/rejected": -8.49099063873291, "step": 71210 }, { "epoch": 0.85, "learning_rate": 3.2395717275624765e-07, "logits/chosen": -2.8755791187286377, "logits/rejected": -2.1662890911102295, "logps/chosen": -126.89652252197266, "logps/rejected": -927.8599853515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7329842448234558, "rewards/margins": 8.14680290222168, "rewards/rejected": -8.879786491394043, "step": 71220 }, { "epoch": 0.85, "learning_rate": 3.2344308028129185e-07, "logits/chosen": -2.8396899700164795, "logits/rejected": -2.2507195472717285, "logps/chosen": -120.68070220947266, "logps/rejected": -1038.429931640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6866332292556763, "rewards/margins": 9.304055213928223, "rewards/rejected": -9.99068832397461, "step": 71230 }, { "epoch": 0.85, "learning_rate": 3.229293678271289e-07, "logits/chosen": -2.941126585006714, "logits/rejected": -2.3327596187591553, "logps/chosen": -127.0250473022461, "logps/rejected": -972.6419067382812, "loss": 0.0947, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7421720623970032, "rewards/margins": 8.58398723602295, "rewards/rejected": -9.32615852355957, "step": 71240 }, { "epoch": 0.85, "learning_rate": 3.224160354834521e-07, "logits/chosen": -2.879909038543701, "logits/rejected": -2.310131788253784, "logps/chosen": -97.4843978881836, "logps/rejected": -892.5237426757812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5381715893745422, "rewards/margins": 7.997282981872559, "rewards/rejected": -8.535454750061035, "step": 71250 }, { "epoch": 0.85, "learning_rate": 3.219030833398873e-07, "logits/chosen": -2.9129626750946045, "logits/rejected": -2.478644847869873, "logps/chosen": -92.17411041259766, "logps/rejected": -893.7068481445312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.45756882429122925, "rewards/margins": 8.09514045715332, "rewards/rejected": -8.552709579467773, "step": 71260 }, { "epoch": 0.85, "learning_rate": 3.21390511485995e-07, "logits/chosen": -2.8645641803741455, "logits/rejected": -2.234212875366211, "logps/chosen": -146.84066772460938, "logps/rejected": -992.1849365234375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.9910504221916199, "rewards/margins": 8.529916763305664, "rewards/rejected": -9.520965576171875, "step": 71270 }, { "epoch": 0.85, "learning_rate": 3.208783200112689e-07, "logits/chosen": -2.891468048095703, "logits/rejected": -2.4334282875061035, "logps/chosen": -118.70854187011719, "logps/rejected": -818.5604858398438, "loss": 0.1523, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7298242449760437, "rewards/margins": 7.060604095458984, "rewards/rejected": -7.79042911529541, "step": 71280 }, { "epoch": 0.85, "learning_rate": 3.203665090051353e-07, "logits/chosen": -2.876404285430908, "logits/rejected": -2.304584503173828, "logps/chosen": -121.57948303222656, "logps/rejected": -847.9938354492188, "loss": 0.1257, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7470853924751282, "rewards/margins": 7.350119590759277, "rewards/rejected": -8.097204208374023, "step": 71290 }, { "epoch": 0.85, "learning_rate": 3.198550785569557e-07, "logits/chosen": -2.9435648918151855, "logits/rejected": -2.3468050956726074, "logps/chosen": -97.56047058105469, "logps/rejected": -828.9029541015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.49940046668052673, "rewards/margins": 7.395291805267334, "rewards/rejected": -7.894692897796631, "step": 71300 }, { "epoch": 0.85, "learning_rate": 3.193440287560243e-07, "logits/chosen": -2.8708720207214355, "logits/rejected": -2.342621326446533, "logps/chosen": -100.94418334960938, "logps/rejected": -901.1780395507812, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -0.534473717212677, "rewards/margins": 8.083145141601562, "rewards/rejected": -8.617620468139648, "step": 71310 }, { "epoch": 0.85, "learning_rate": 3.188333596915688e-07, "logits/chosen": -2.868110179901123, "logits/rejected": -2.5098047256469727, "logps/chosen": -112.5870590209961, "logps/rejected": -752.3161010742188, "loss": 0.1226, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7465775609016418, "rewards/margins": 6.415369987487793, "rewards/rejected": -7.161947727203369, "step": 71320 }, { "epoch": 0.85, "learning_rate": 3.183230714527508e-07, "logits/chosen": -2.896601438522339, "logits/rejected": -2.5227835178375244, "logps/chosen": -87.49938201904297, "logps/rejected": -880.9207763671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4745398461818695, "rewards/margins": 7.947740077972412, "rewards/rejected": -8.422279357910156, "step": 71330 }, { "epoch": 0.85, "learning_rate": 3.178131641286658e-07, "logits/chosen": -2.853705883026123, "logits/rejected": -2.0791211128234863, "logps/chosen": -131.92295837402344, "logps/rejected": -997.11328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7859228849411011, "rewards/margins": 8.78453540802002, "rewards/rejected": -9.570459365844727, "step": 71340 }, { "epoch": 0.85, "learning_rate": 3.1730363780834104e-07, "logits/chosen": -2.8737213611602783, "logits/rejected": -2.229095935821533, "logps/chosen": -120.9915771484375, "logps/rejected": -995.9539184570312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7479010820388794, "rewards/margins": 8.813232421875, "rewards/rejected": -9.56113338470459, "step": 71350 }, { "epoch": 0.85, "learning_rate": 3.16794492580739e-07, "logits/chosen": -2.8955254554748535, "logits/rejected": -2.4305613040924072, "logps/chosen": -93.33538818359375, "logps/rejected": -965.2373046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5081139802932739, "rewards/margins": 8.753901481628418, "rewards/rejected": -9.262014389038086, "step": 71360 }, { "epoch": 0.85, "learning_rate": 3.162857285347548e-07, "logits/chosen": -2.911144971847534, "logits/rejected": -2.428056478500366, "logps/chosen": -121.3592300415039, "logps/rejected": -998.7525634765625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.7214409112930298, "rewards/margins": 8.870626449584961, "rewards/rejected": -9.59206771850586, "step": 71370 }, { "epoch": 0.85, "learning_rate": 3.1577734575921754e-07, "logits/chosen": -2.833507537841797, "logits/rejected": -2.374873161315918, "logps/chosen": -103.12327575683594, "logps/rejected": -972.8298950195312, "loss": 0.1281, "rewards/accuracies": 1.0, "rewards/chosen": -0.5804532766342163, "rewards/margins": 8.760890007019043, "rewards/rejected": -9.341344833374023, "step": 71380 }, { "epoch": 0.85, "learning_rate": 3.152693443428892e-07, "logits/chosen": -2.866621732711792, "logits/rejected": -2.359478235244751, "logps/chosen": -121.528564453125, "logps/rejected": -1018.4583740234375, "loss": 0.1119, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7452855706214905, "rewards/margins": 9.04509449005127, "rewards/rejected": -9.790380477905273, "step": 71390 }, { "epoch": 0.85, "learning_rate": 3.147617243744655e-07, "logits/chosen": -2.8881232738494873, "logits/rejected": -2.3793084621429443, "logps/chosen": -95.16699981689453, "logps/rejected": -935.9779052734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.46611371636390686, "rewards/margins": 8.512724876403809, "rewards/rejected": -8.978837966918945, "step": 71400 }, { "epoch": 0.85, "learning_rate": 3.142544859425761e-07, "logits/chosen": -2.877639055252075, "logits/rejected": -2.389082431793213, "logps/chosen": -93.61341094970703, "logps/rejected": -893.7041015625, "loss": 0.0947, "rewards/accuracies": 1.0, "rewards/chosen": -0.5227338075637817, "rewards/margins": 8.033703804016113, "rewards/rejected": -8.556436538696289, "step": 71410 }, { "epoch": 0.85, "learning_rate": 3.137476291357822e-07, "logits/chosen": -2.861569881439209, "logits/rejected": -2.2317750453948975, "logps/chosen": -108.52973937988281, "logps/rejected": -1003.6519775390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5651017427444458, "rewards/margins": 9.079815864562988, "rewards/rejected": -9.644917488098145, "step": 71420 }, { "epoch": 0.86, "learning_rate": 3.132411540425803e-07, "logits/chosen": -2.907601833343506, "logits/rejected": -2.3657281398773193, "logps/chosen": -94.14836120605469, "logps/rejected": -999.1324462890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.44819384813308716, "rewards/margins": 9.15918254852295, "rewards/rejected": -9.607378005981445, "step": 71430 }, { "epoch": 0.86, "learning_rate": 3.1273506075139933e-07, "logits/chosen": -2.9093210697174072, "logits/rejected": -2.459728479385376, "logps/chosen": -85.62403106689453, "logps/rejected": -909.0211181640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.43138203024864197, "rewards/margins": 8.268110275268555, "rewards/rejected": -8.699492454528809, "step": 71440 }, { "epoch": 0.86, "learning_rate": 3.1222934935060176e-07, "logits/chosen": -2.943690538406372, "logits/rejected": -2.5858185291290283, "logps/chosen": -67.3128433227539, "logps/rejected": -843.60009765625, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -0.27887827157974243, "rewards/margins": 7.786616325378418, "rewards/rejected": -8.065494537353516, "step": 71450 }, { "epoch": 0.86, "learning_rate": 3.117240199284838e-07, "logits/chosen": -2.8474535942077637, "logits/rejected": -2.046433687210083, "logps/chosen": -137.82240295410156, "logps/rejected": -1164.473876953125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7430186867713928, "rewards/margins": 10.481395721435547, "rewards/rejected": -11.22441577911377, "step": 71460 }, { "epoch": 0.86, "learning_rate": 3.1121907257327415e-07, "logits/chosen": -2.879208564758301, "logits/rejected": -2.0524466037750244, "logps/chosen": -138.6619110107422, "logps/rejected": -1030.4638671875, "loss": 0.0989, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8754748106002808, "rewards/margins": 9.040079116821289, "rewards/rejected": -9.915553092956543, "step": 71470 }, { "epoch": 0.86, "learning_rate": 3.1071450737313575e-07, "logits/chosen": -2.871361017227173, "logits/rejected": -2.448657512664795, "logps/chosen": -87.46206665039062, "logps/rejected": -875.6036987304688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4305165410041809, "rewards/margins": 7.956340789794922, "rewards/rejected": -8.386857032775879, "step": 71480 }, { "epoch": 0.86, "learning_rate": 3.102103244161636e-07, "logits/chosen": -2.8514788150787354, "logits/rejected": -2.2989563941955566, "logps/chosen": -123.86967468261719, "logps/rejected": -980.3250732421875, "loss": 0.0286, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7084317207336426, "rewards/margins": 8.703664779663086, "rewards/rejected": -9.412096977233887, "step": 71490 }, { "epoch": 0.86, "learning_rate": 3.097065237903862e-07, "logits/chosen": -2.887024402618408, "logits/rejected": -2.3588218688964844, "logps/chosen": -105.32508850097656, "logps/rejected": -917.7044067382812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5249125361442566, "rewards/margins": 8.261937141418457, "rewards/rejected": -8.786848068237305, "step": 71500 }, { "epoch": 0.86, "learning_rate": 3.0920310558376714e-07, "logits/chosen": -2.8745226860046387, "logits/rejected": -2.665531635284424, "logps/chosen": -69.94800567626953, "logps/rejected": -725.9969482421875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.3600383400917053, "rewards/margins": 6.529661655426025, "rewards/rejected": -6.8896989822387695, "step": 71510 }, { "epoch": 0.86, "learning_rate": 3.0870006988420106e-07, "logits/chosen": -2.924931764602661, "logits/rejected": -2.0615079402923584, "logps/chosen": -131.3654022216797, "logps/rejected": -1019.62353515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7518815994262695, "rewards/margins": 9.046445846557617, "rewards/rejected": -9.79832649230957, "step": 71520 }, { "epoch": 0.86, "learning_rate": 3.0819741677951734e-07, "logits/chosen": -2.860713243484497, "logits/rejected": -2.428928852081299, "logps/chosen": -86.76460266113281, "logps/rejected": -848.1939697265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4507528841495514, "rewards/margins": 7.655057430267334, "rewards/rejected": -8.105810165405273, "step": 71530 }, { "epoch": 0.86, "learning_rate": 3.076951463574765e-07, "logits/chosen": -2.8319172859191895, "logits/rejected": -2.049015998840332, "logps/chosen": -156.6048126220703, "logps/rejected": -1060.5181884765625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.0449960231781006, "rewards/margins": 9.14813232421875, "rewards/rejected": -10.193127632141113, "step": 71540 }, { "epoch": 0.86, "learning_rate": 3.071932587057744e-07, "logits/chosen": -2.915097713470459, "logits/rejected": -2.4081199169158936, "logps/chosen": -106.4015121459961, "logps/rejected": -992.2530517578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6046980619430542, "rewards/margins": 8.915617942810059, "rewards/rejected": -9.520317077636719, "step": 71550 }, { "epoch": 0.86, "learning_rate": 3.06691753912039e-07, "logits/chosen": -2.872971534729004, "logits/rejected": -2.178586959838867, "logps/chosen": -110.46627044677734, "logps/rejected": -966.3615112304688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6069338917732239, "rewards/margins": 8.66020679473877, "rewards/rejected": -9.267141342163086, "step": 71560 }, { "epoch": 0.86, "learning_rate": 3.0619063206383164e-07, "logits/chosen": -2.901808500289917, "logits/rejected": -2.5382437705993652, "logps/chosen": -113.52986145019531, "logps/rejected": -823.0537109375, "loss": 0.1133, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6798521280288696, "rewards/margins": 7.171257972717285, "rewards/rejected": -7.851109981536865, "step": 71570 }, { "epoch": 0.86, "learning_rate": 3.056898932486471e-07, "logits/chosen": -2.832127571105957, "logits/rejected": -2.3119893074035645, "logps/chosen": -98.88025665283203, "logps/rejected": -966.2593994140625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4930810332298279, "rewards/margins": 8.770264625549316, "rewards/rejected": -9.263345718383789, "step": 71580 }, { "epoch": 0.86, "learning_rate": 3.051895375539124e-07, "logits/chosen": -2.928987979888916, "logits/rejected": -2.502995491027832, "logps/chosen": -98.66756439208984, "logps/rejected": -843.42578125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5545541048049927, "rewards/margins": 7.510017395019531, "rewards/rejected": -8.06457233428955, "step": 71590 }, { "epoch": 0.86, "learning_rate": 3.0468956506698953e-07, "logits/chosen": -2.8899991512298584, "logits/rejected": -2.534144163131714, "logps/chosen": -110.65213775634766, "logps/rejected": -851.2962646484375, "loss": 0.1171, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6720606088638306, "rewards/margins": 7.452797889709473, "rewards/rejected": -8.124857902526855, "step": 71600 }, { "epoch": 0.86, "learning_rate": 3.041899758751707e-07, "logits/chosen": -2.835472583770752, "logits/rejected": -2.24072003364563, "logps/chosen": -111.57368469238281, "logps/rejected": -1009.48193359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.607667088508606, "rewards/margins": 9.09621524810791, "rewards/rejected": -9.703882217407227, "step": 71610 }, { "epoch": 0.86, "learning_rate": 3.036907700656835e-07, "logits/chosen": -2.8958423137664795, "logits/rejected": -2.0606703758239746, "logps/chosen": -137.06814575195312, "logps/rejected": -1080.005615234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8100444674491882, "rewards/margins": 9.56527042388916, "rewards/rejected": -10.375314712524414, "step": 71620 }, { "epoch": 0.86, "learning_rate": 3.0319194772568783e-07, "logits/chosen": -2.9040050506591797, "logits/rejected": -2.374073028564453, "logps/chosen": -90.92544555664062, "logps/rejected": -808.9241943359375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.48716169595718384, "rewards/margins": 7.229408264160156, "rewards/rejected": -7.716570854187012, "step": 71630 }, { "epoch": 0.86, "learning_rate": 3.0269350894227665e-07, "logits/chosen": -2.9158668518066406, "logits/rejected": -2.4963879585266113, "logps/chosen": -84.37628936767578, "logps/rejected": -746.4284057617188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.3942508101463318, "rewards/margins": 6.698797702789307, "rewards/rejected": -7.093049049377441, "step": 71640 }, { "epoch": 0.86, "learning_rate": 3.02195453802476e-07, "logits/chosen": -2.879298686981201, "logits/rejected": -2.2850584983825684, "logps/chosen": -110.33842468261719, "logps/rejected": -1069.627685546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5949075818061829, "rewards/margins": 9.707271575927734, "rewards/rejected": -10.302178382873535, "step": 71650 }, { "epoch": 0.86, "learning_rate": 3.016977823932457e-07, "logits/chosen": -2.80875301361084, "logits/rejected": -2.1821844577789307, "logps/chosen": -119.11650085449219, "logps/rejected": -1066.745849609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6565289497375488, "rewards/margins": 9.611651420593262, "rewards/rejected": -10.268180847167969, "step": 71660 }, { "epoch": 0.86, "learning_rate": 3.0120049480147657e-07, "logits/chosen": -2.870116710662842, "logits/rejected": -2.0087523460388184, "logps/chosen": -126.4030532836914, "logps/rejected": -1105.3338623046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7217347621917725, "rewards/margins": 9.929610252380371, "rewards/rejected": -10.65134334564209, "step": 71670 }, { "epoch": 0.86, "learning_rate": 3.0070359111399384e-07, "logits/chosen": -2.87373685836792, "logits/rejected": -2.3683900833129883, "logps/chosen": -93.46627044677734, "logps/rejected": -920.8525390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4770457148551941, "rewards/margins": 8.346495628356934, "rewards/rejected": -8.823541641235352, "step": 71680 }, { "epoch": 0.86, "learning_rate": 3.002070714175562e-07, "logits/chosen": -2.879204750061035, "logits/rejected": -2.526110887527466, "logps/chosen": -100.90708923339844, "logps/rejected": -836.8859252929688, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.5306746363639832, "rewards/margins": 7.457360744476318, "rewards/rejected": -7.988035678863525, "step": 71690 }, { "epoch": 0.86, "learning_rate": 2.9971093579885396e-07, "logits/chosen": -2.8684446811676025, "logits/rejected": -2.2386598587036133, "logps/chosen": -116.64093017578125, "logps/rejected": -940.9801025390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6677724123001099, "rewards/margins": 8.351747512817383, "rewards/rejected": -9.019518852233887, "step": 71700 }, { "epoch": 0.86, "learning_rate": 2.992151843445112e-07, "logits/chosen": -2.88542103767395, "logits/rejected": -2.5103650093078613, "logps/chosen": -83.94631958007812, "logps/rejected": -914.0597534179688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4044055938720703, "rewards/margins": 8.347816467285156, "rewards/rejected": -8.752222061157227, "step": 71710 }, { "epoch": 0.86, "learning_rate": 2.987198171410852e-07, "logits/chosen": -2.8632657527923584, "logits/rejected": -2.02744460105896, "logps/chosen": -148.22386169433594, "logps/rejected": -1142.005859375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9173290133476257, "rewards/margins": 10.077924728393555, "rewards/rejected": -10.995253562927246, "step": 71720 }, { "epoch": 0.86, "learning_rate": 2.9822483427506574e-07, "logits/chosen": -2.9375100135803223, "logits/rejected": -2.565014362335205, "logps/chosen": -103.42557525634766, "logps/rejected": -792.9818115234375, "loss": 0.0769, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5818035006523132, "rewards/margins": 6.977369785308838, "rewards/rejected": -7.5591721534729, "step": 71730 }, { "epoch": 0.86, "learning_rate": 2.9773023583287433e-07, "logits/chosen": -2.8379883766174316, "logits/rejected": -2.5113091468811035, "logps/chosen": -83.36997985839844, "logps/rejected": -909.7882080078125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.43440189957618713, "rewards/margins": 8.267339706420898, "rewards/rejected": -8.701742172241211, "step": 71740 }, { "epoch": 0.86, "learning_rate": 2.972360219008674e-07, "logits/chosen": -2.8610522747039795, "logits/rejected": -2.440035343170166, "logps/chosen": -89.17420959472656, "logps/rejected": -921.51611328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4651423394680023, "rewards/margins": 8.354848861694336, "rewards/rejected": -8.819990158081055, "step": 71750 }, { "epoch": 0.86, "learning_rate": 2.96742192565333e-07, "logits/chosen": -2.9257454872131348, "logits/rejected": -2.326720714569092, "logps/chosen": -110.95692443847656, "logps/rejected": -965.5753784179688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5585666298866272, "rewards/margins": 8.705154418945312, "rewards/rejected": -9.263721466064453, "step": 71760 }, { "epoch": 0.86, "learning_rate": 2.962487479124926e-07, "logits/chosen": -2.8808200359344482, "logits/rejected": -2.395087718963623, "logps/chosen": -87.45255279541016, "logps/rejected": -891.7818603515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.45738911628723145, "rewards/margins": 8.06462287902832, "rewards/rejected": -8.522012710571289, "step": 71770 }, { "epoch": 0.86, "learning_rate": 2.957556880285001e-07, "logits/chosen": -2.8827555179595947, "logits/rejected": -2.1551296710968018, "logps/chosen": -126.82536315917969, "logps/rejected": -1129.447021484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7452422976493835, "rewards/margins": 10.140409469604492, "rewards/rejected": -10.885652542114258, "step": 71780 }, { "epoch": 0.86, "learning_rate": 2.952630129994424e-07, "logits/chosen": -2.8953471183776855, "logits/rejected": -2.246542453765869, "logps/chosen": -120.05256652832031, "logps/rejected": -970.94873046875, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.6864303350448608, "rewards/margins": 8.626925468444824, "rewards/rejected": -9.313356399536133, "step": 71790 }, { "epoch": 0.86, "learning_rate": 2.9477072291133975e-07, "logits/chosen": -2.870187282562256, "logits/rejected": -2.3299756050109863, "logps/chosen": -104.41783142089844, "logps/rejected": -878.0982666015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5867114663124084, "rewards/margins": 7.804404258728027, "rewards/rejected": -8.39111614227295, "step": 71800 }, { "epoch": 0.86, "learning_rate": 2.942788178501435e-07, "logits/chosen": -2.875466823577881, "logits/rejected": -2.3712916374206543, "logps/chosen": -87.26658630371094, "logps/rejected": -905.1013793945312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4392244815826416, "rewards/margins": 8.231789588928223, "rewards/rejected": -8.671014785766602, "step": 71810 }, { "epoch": 0.86, "learning_rate": 2.9378729790173976e-07, "logits/chosen": -2.8495020866394043, "logits/rejected": -2.3132472038269043, "logps/chosen": -124.9129409790039, "logps/rejected": -1009.693359375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7784307599067688, "rewards/margins": 8.932172775268555, "rewards/rejected": -9.710603713989258, "step": 71820 }, { "epoch": 0.86, "learning_rate": 2.9329616315194624e-07, "logits/chosen": -2.9130377769470215, "logits/rejected": -2.190338134765625, "logps/chosen": -118.63516998291016, "logps/rejected": -1022.7273559570312, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6243396997451782, "rewards/margins": 9.202978134155273, "rewards/rejected": -9.827317237854004, "step": 71830 }, { "epoch": 0.86, "learning_rate": 2.928054136865138e-07, "logits/chosen": -2.8754191398620605, "logits/rejected": -2.218949317932129, "logps/chosen": -131.6890411376953, "logps/rejected": -1027.656005859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8084503412246704, "rewards/margins": 9.0639009475708, "rewards/rejected": -9.872350692749023, "step": 71840 }, { "epoch": 0.86, "learning_rate": 2.9231504959112587e-07, "logits/chosen": -2.8613762855529785, "logits/rejected": -2.3712470531463623, "logps/chosen": -89.45042419433594, "logps/rejected": -875.5020751953125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.40987467765808105, "rewards/margins": 7.976319789886475, "rewards/rejected": -8.38619327545166, "step": 71850 }, { "epoch": 0.86, "learning_rate": 2.918250709513992e-07, "logits/chosen": -2.8913979530334473, "logits/rejected": -2.5012810230255127, "logps/chosen": -84.0791015625, "logps/rejected": -801.6481323242188, "loss": 0.0846, "rewards/accuracies": 1.0, "rewards/chosen": -0.4135994017124176, "rewards/margins": 7.223427772521973, "rewards/rejected": -7.637026309967041, "step": 71860 }, { "epoch": 0.86, "learning_rate": 2.913354778528815e-07, "logits/chosen": -2.8277485370635986, "logits/rejected": -2.2162413597106934, "logps/chosen": -100.8881607055664, "logps/rejected": -950.5255737304688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5520458817481995, "rewards/margins": 8.559941291809082, "rewards/rejected": -9.111987113952637, "step": 71870 }, { "epoch": 0.86, "learning_rate": 2.908462703810555e-07, "logits/chosen": -2.948002338409424, "logits/rejected": -2.46030855178833, "logps/chosen": -107.07777404785156, "logps/rejected": -1028.65087890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5716381669044495, "rewards/margins": 9.318961143493652, "rewards/rejected": -9.890600204467773, "step": 71880 }, { "epoch": 0.86, "learning_rate": 2.9035744862133453e-07, "logits/chosen": -2.881120204925537, "logits/rejected": -2.327390193939209, "logps/chosen": -103.39622497558594, "logps/rejected": -974.2565307617188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5377518534660339, "rewards/margins": 8.79987907409668, "rewards/rejected": -9.337630271911621, "step": 71890 }, { "epoch": 0.86, "learning_rate": 2.8986901265906644e-07, "logits/chosen": -2.8982222080230713, "logits/rejected": -2.19177508354187, "logps/chosen": -111.15130615234375, "logps/rejected": -1001.3570556640625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5859525203704834, "rewards/margins": 9.036087989807129, "rewards/rejected": -9.622039794921875, "step": 71900 }, { "epoch": 0.86, "learning_rate": 2.893809625795302e-07, "logits/chosen": -2.8998539447784424, "logits/rejected": -2.444199323654175, "logps/chosen": -104.02362060546875, "logps/rejected": -897.7586059570312, "loss": 0.0433, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5855010747909546, "rewards/margins": 8.011552810668945, "rewards/rejected": -8.597053527832031, "step": 71910 }, { "epoch": 0.86, "learning_rate": 2.8889329846793826e-07, "logits/chosen": -2.9226346015930176, "logits/rejected": -2.5353283882141113, "logps/chosen": -84.11370849609375, "logps/rejected": -874.7581176757812, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.44542068243026733, "rewards/margins": 7.916770935058594, "rewards/rejected": -8.362192153930664, "step": 71920 }, { "epoch": 0.86, "learning_rate": 2.8840602040943496e-07, "logits/chosen": -2.8613390922546387, "logits/rejected": -2.2201969623565674, "logps/chosen": -131.5519561767578, "logps/rejected": -897.9362182617188, "loss": 0.1323, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8772821426391602, "rewards/margins": 7.712663173675537, "rewards/rejected": -8.589944839477539, "step": 71930 }, { "epoch": 0.86, "learning_rate": 2.879191284890984e-07, "logits/chosen": -2.891368865966797, "logits/rejected": -2.6731743812561035, "logps/chosen": -66.56489562988281, "logps/rejected": -838.474609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.26516711711883545, "rewards/margins": 7.743809700012207, "rewards/rejected": -8.008976936340332, "step": 71940 }, { "epoch": 0.86, "learning_rate": 2.87432622791938e-07, "logits/chosen": -2.9133267402648926, "logits/rejected": -2.4943366050720215, "logps/chosen": -115.5462875366211, "logps/rejected": -902.7335815429688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.719931960105896, "rewards/margins": 7.92132568359375, "rewards/rejected": -8.64125919342041, "step": 71950 }, { "epoch": 0.86, "learning_rate": 2.8694650340289676e-07, "logits/chosen": -2.8881735801696777, "logits/rejected": -2.5024170875549316, "logps/chosen": -118.42942810058594, "logps/rejected": -877.3302001953125, "loss": 0.1008, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7447064518928528, "rewards/margins": 7.651003360748291, "rewards/rejected": -8.395709991455078, "step": 71960 }, { "epoch": 0.86, "learning_rate": 2.864607704068495e-07, "logits/chosen": -2.8981246948242188, "logits/rejected": -2.4251413345336914, "logps/chosen": -100.91143035888672, "logps/rejected": -888.6242065429688, "loss": 0.111, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5678702592849731, "rewards/margins": 7.938634395599365, "rewards/rejected": -8.506505012512207, "step": 71970 }, { "epoch": 0.86, "learning_rate": 2.859754238886042e-07, "logits/chosen": -2.8609910011291504, "logits/rejected": -2.267395257949829, "logps/chosen": -117.34610748291016, "logps/rejected": -1023.107421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7111536860466003, "rewards/margins": 9.11542797088623, "rewards/rejected": -9.826581954956055, "step": 71980 }, { "epoch": 0.86, "learning_rate": 2.8549046393290037e-07, "logits/chosen": -2.910900592803955, "logits/rejected": -2.452223300933838, "logps/chosen": -93.55406951904297, "logps/rejected": -916.49609375, "loss": 0.0744, "rewards/accuracies": 1.0, "rewards/chosen": -0.46500667929649353, "rewards/margins": 8.311777114868164, "rewards/rejected": -8.776782989501953, "step": 71990 }, { "epoch": 0.86, "learning_rate": 2.8500589062441104e-07, "logits/chosen": -2.8808746337890625, "logits/rejected": -2.3519437313079834, "logps/chosen": -102.4483642578125, "logps/rejected": -972.9392700195312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5692372918128967, "rewards/margins": 8.76943302154541, "rewards/rejected": -9.338669776916504, "step": 72000 }, { "epoch": 0.86, "eval_logits/chosen": -2.8852379322052, "eval_logits/rejected": -1.760801911354065, "eval_logps/chosen": -240.88677978515625, "eval_logps/rejected": -1137.69140625, "eval_loss": 0.0013450515689328313, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.7970651388168335, "eval_rewards/margins": 9.112595558166504, "eval_rewards/rejected": -10.909660339355469, "eval_runtime": 1.2151, "eval_samples_per_second": 4.115, "eval_steps_per_second": 2.469, "step": 72000 }, { "epoch": 0.86, "learning_rate": 2.8452170404774105e-07, "logits/chosen": -2.8943891525268555, "logits/rejected": -2.5402204990386963, "logps/chosen": -86.45478820800781, "logps/rejected": -776.5822143554688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4380033016204834, "rewards/margins": 6.952565670013428, "rewards/rejected": -7.390570163726807, "step": 72010 }, { "epoch": 0.86, "learning_rate": 2.8403790428742863e-07, "logits/chosen": -2.941527843475342, "logits/rejected": -2.523063898086548, "logps/chosen": -90.31871032714844, "logps/rejected": -835.9925537109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4512256681919098, "rewards/margins": 7.52081823348999, "rewards/rejected": -7.972044467926025, "step": 72020 }, { "epoch": 0.86, "learning_rate": 2.835544914279434e-07, "logits/chosen": -2.926156997680664, "logits/rejected": -2.2240848541259766, "logps/chosen": -121.22601318359375, "logps/rejected": -991.79248046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6971604228019714, "rewards/margins": 8.816119194030762, "rewards/rejected": -9.513279914855957, "step": 72030 }, { "epoch": 0.86, "learning_rate": 2.830714655536879e-07, "logits/chosen": -2.9315695762634277, "logits/rejected": -2.419297695159912, "logps/chosen": -87.39027404785156, "logps/rejected": -883.2506713867188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4048937261104584, "rewards/margins": 8.045549392700195, "rewards/rejected": -8.450443267822266, "step": 72040 }, { "epoch": 0.86, "learning_rate": 2.825888267489979e-07, "logits/chosen": -2.9272913932800293, "logits/rejected": -2.3868112564086914, "logps/chosen": -107.1159439086914, "logps/rejected": -945.4035034179688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6063658595085144, "rewards/margins": 8.455490112304688, "rewards/rejected": -9.061856269836426, "step": 72050 }, { "epoch": 0.86, "learning_rate": 2.821065750981397e-07, "logits/chosen": -2.8690662384033203, "logits/rejected": -2.3296494483947754, "logps/chosen": -115.8916015625, "logps/rejected": -894.1134033203125, "loss": 0.0227, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7167799472808838, "rewards/margins": 7.840372562408447, "rewards/rejected": -8.55715274810791, "step": 72060 }, { "epoch": 0.86, "learning_rate": 2.8162471068531344e-07, "logits/chosen": -2.8748252391815186, "logits/rejected": -1.9428203105926514, "logps/chosen": -147.90145874023438, "logps/rejected": -1161.653076171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9261037707328796, "rewards/margins": 10.282276153564453, "rewards/rejected": -11.208379745483398, "step": 72070 }, { "epoch": 0.86, "learning_rate": 2.811432335946515e-07, "logits/chosen": -2.8659071922302246, "logits/rejected": -2.462862491607666, "logps/chosen": -98.13169860839844, "logps/rejected": -851.5849609375, "loss": 0.0904, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5789171457290649, "rewards/margins": 7.556438446044922, "rewards/rejected": -8.135355949401855, "step": 72080 }, { "epoch": 0.86, "learning_rate": 2.8066214391021834e-07, "logits/chosen": -2.797435760498047, "logits/rejected": -2.141932964324951, "logps/chosen": -115.4782485961914, "logps/rejected": -863.0084838867188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6507051587104797, "rewards/margins": 7.592207908630371, "rewards/rejected": -8.242913246154785, "step": 72090 }, { "epoch": 0.86, "learning_rate": 2.801814417160109e-07, "logits/chosen": -2.8625283241271973, "logits/rejected": -2.4797778129577637, "logps/chosen": -95.31573486328125, "logps/rejected": -839.5091552734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5152239799499512, "rewards/margins": 7.50750732421875, "rewards/rejected": -8.022732734680176, "step": 72100 }, { "epoch": 0.86, "learning_rate": 2.7970112709595847e-07, "logits/chosen": -2.9018924236297607, "logits/rejected": -2.435176372528076, "logps/chosen": -92.68101501464844, "logps/rejected": -855.0943603515625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4645055830478668, "rewards/margins": 7.701667785644531, "rewards/rejected": -8.166172981262207, "step": 72110 }, { "epoch": 0.86, "learning_rate": 2.792212001339231e-07, "logits/chosen": -2.877598524093628, "logits/rejected": -2.3730854988098145, "logps/chosen": -89.54949951171875, "logps/rejected": -854.9072265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4755043089389801, "rewards/margins": 7.685426235198975, "rewards/rejected": -8.160929679870605, "step": 72120 }, { "epoch": 0.86, "learning_rate": 2.7874166091369783e-07, "logits/chosen": -2.930205821990967, "logits/rejected": -2.262490749359131, "logps/chosen": -118.38993835449219, "logps/rejected": -985.1204833984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6300808191299438, "rewards/margins": 8.825850486755371, "rewards/rejected": -9.455930709838867, "step": 72130 }, { "epoch": 0.86, "learning_rate": 2.7826250951900917e-07, "logits/chosen": -2.8783810138702393, "logits/rejected": -2.4421255588531494, "logps/chosen": -96.26611328125, "logps/rejected": -946.4421997070312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.482273668050766, "rewards/margins": 8.59361743927002, "rewards/rejected": -9.075891494750977, "step": 72140 }, { "epoch": 0.86, "learning_rate": 2.777837460335159e-07, "logits/chosen": -2.8524186611175537, "logits/rejected": -2.3594794273376465, "logps/chosen": -88.02348327636719, "logps/rejected": -862.0814208984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.43750423192977905, "rewards/margins": 7.804612636566162, "rewards/rejected": -8.242116928100586, "step": 72150 }, { "epoch": 0.86, "learning_rate": 2.7730537054080854e-07, "logits/chosen": -2.887249231338501, "logits/rejected": -2.4861769676208496, "logps/chosen": -85.68072509765625, "logps/rejected": -886.4397583007812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.442788690328598, "rewards/margins": 8.04275131225586, "rewards/rejected": -8.485540390014648, "step": 72160 }, { "epoch": 0.86, "learning_rate": 2.768273831244103e-07, "logits/chosen": -2.907451629638672, "logits/rejected": -2.1369829177856445, "logps/chosen": -126.16357421875, "logps/rejected": -1036.8900146484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7599572539329529, "rewards/margins": 9.214491844177246, "rewards/rejected": -9.97445011138916, "step": 72170 }, { "epoch": 0.86, "learning_rate": 2.7634978386777675e-07, "logits/chosen": -2.87742280960083, "logits/rejected": -2.364224672317505, "logps/chosen": -124.1203384399414, "logps/rejected": -921.4559326171875, "loss": 0.1305, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7650404572486877, "rewards/margins": 8.050275802612305, "rewards/rejected": -8.815316200256348, "step": 72180 }, { "epoch": 0.86, "learning_rate": 2.7587257285429436e-07, "logits/chosen": -2.8652844429016113, "logits/rejected": -2.5652453899383545, "logps/chosen": -75.47068786621094, "logps/rejected": -784.9139404296875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3297392725944519, "rewards/margins": 7.1508378982543945, "rewards/rejected": -7.480576515197754, "step": 72190 }, { "epoch": 0.86, "learning_rate": 2.753957501672838e-07, "logits/chosen": -2.878082275390625, "logits/rejected": -2.310594320297241, "logps/chosen": -86.3871841430664, "logps/rejected": -952.2951049804688, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.4242398142814636, "rewards/margins": 8.715067863464355, "rewards/rejected": -9.139309883117676, "step": 72200 }, { "epoch": 0.86, "learning_rate": 2.7491931588999656e-07, "logits/chosen": -2.9019267559051514, "logits/rejected": -2.2459850311279297, "logps/chosen": -108.6673812866211, "logps/rejected": -931.8197021484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6066773533821106, "rewards/margins": 8.319964408874512, "rewards/rejected": -8.926641464233398, "step": 72210 }, { "epoch": 0.86, "learning_rate": 2.744432701056168e-07, "logits/chosen": -2.8286945819854736, "logits/rejected": -2.194561004638672, "logps/chosen": -107.97978210449219, "logps/rejected": -896.8787231445312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5892127156257629, "rewards/margins": 7.981196403503418, "rewards/rejected": -8.570409774780273, "step": 72220 }, { "epoch": 0.86, "learning_rate": 2.7396761289726097e-07, "logits/chosen": -2.885498523712158, "logits/rejected": -2.4220240116119385, "logps/chosen": -99.34211730957031, "logps/rejected": -902.9002075195312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5283528566360474, "rewards/margins": 8.121326446533203, "rewards/rejected": -8.649678230285645, "step": 72230 }, { "epoch": 0.86, "learning_rate": 2.7349234434797726e-07, "logits/chosen": -2.8730366230010986, "logits/rejected": -2.2028212547302246, "logps/chosen": -123.33259582519531, "logps/rejected": -1006.634765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7769057750701904, "rewards/margins": 8.894081115722656, "rewards/rejected": -9.67098617553711, "step": 72240 }, { "epoch": 0.86, "learning_rate": 2.730174645407471e-07, "logits/chosen": -2.8966293334960938, "logits/rejected": -2.3666274547576904, "logps/chosen": -89.59107971191406, "logps/rejected": -880.6901245117188, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.4377613663673401, "rewards/margins": 7.981167793273926, "rewards/rejected": -8.418929100036621, "step": 72250 }, { "epoch": 0.86, "learning_rate": 2.725429735584817e-07, "logits/chosen": -2.848433017730713, "logits/rejected": -2.406641721725464, "logps/chosen": -109.38447570800781, "logps/rejected": -954.2926025390625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.6218734979629517, "rewards/margins": 8.52637004852295, "rewards/rejected": -9.14824390411377, "step": 72260 }, { "epoch": 0.87, "learning_rate": 2.7206887148402666e-07, "logits/chosen": -2.919238567352295, "logits/rejected": -2.4588332176208496, "logps/chosen": -97.08427429199219, "logps/rejected": -844.8970947265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5041815042495728, "rewards/margins": 7.547338962554932, "rewards/rejected": -8.051519393920898, "step": 72270 }, { "epoch": 0.87, "learning_rate": 2.715951584001589e-07, "logits/chosen": -2.894658088684082, "logits/rejected": -2.581472873687744, "logps/chosen": -75.15370178222656, "logps/rejected": -818.8695678710938, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.3761148154735565, "rewards/margins": 7.434503078460693, "rewards/rejected": -7.8106184005737305, "step": 72280 }, { "epoch": 0.87, "learning_rate": 2.7112183438958745e-07, "logits/chosen": -2.932781934738159, "logits/rejected": -2.4434163570404053, "logps/chosen": -93.88542175292969, "logps/rejected": -882.8099365234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.516594648361206, "rewards/margins": 7.921006679534912, "rewards/rejected": -8.437601089477539, "step": 72290 }, { "epoch": 0.87, "learning_rate": 2.706488995349532e-07, "logits/chosen": -2.8706653118133545, "logits/rejected": -2.2258472442626953, "logps/chosen": -120.24930572509766, "logps/rejected": -871.3810424804688, "loss": 0.0836, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6790816187858582, "rewards/margins": 7.657035827636719, "rewards/rejected": -8.3361177444458, "step": 72300 }, { "epoch": 0.87, "learning_rate": 2.7017635391882927e-07, "logits/chosen": -2.8904061317443848, "logits/rejected": -2.4788401126861572, "logps/chosen": -82.81336975097656, "logps/rejected": -873.6351318359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.42223405838012695, "rewards/margins": 7.928945064544678, "rewards/rejected": -8.351179122924805, "step": 72310 }, { "epoch": 0.87, "learning_rate": 2.6970419762372136e-07, "logits/chosen": -2.9124650955200195, "logits/rejected": -2.3150060176849365, "logps/chosen": -95.92679595947266, "logps/rejected": -915.8938598632812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5124560594558716, "rewards/margins": 8.255230903625488, "rewards/rejected": -8.76768684387207, "step": 72320 }, { "epoch": 0.87, "learning_rate": 2.6923243073206565e-07, "logits/chosen": -2.8452000617980957, "logits/rejected": -2.222429037094116, "logps/chosen": -110.4839859008789, "logps/rejected": -925.236328125, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -0.5895298719406128, "rewards/margins": 8.269081115722656, "rewards/rejected": -8.858611106872559, "step": 72330 }, { "epoch": 0.87, "learning_rate": 2.6876105332623166e-07, "logits/chosen": -2.897627115249634, "logits/rejected": -2.6213502883911133, "logps/chosen": -98.11076354980469, "logps/rejected": -744.2679443359375, "loss": 0.2166, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6111807227134705, "rewards/margins": 6.465290069580078, "rewards/rejected": -7.076470851898193, "step": 72340 }, { "epoch": 0.87, "learning_rate": 2.6829006548852096e-07, "logits/chosen": -2.9183382987976074, "logits/rejected": -2.2287964820861816, "logps/chosen": -108.3375244140625, "logps/rejected": -971.9990234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6343773007392883, "rewards/margins": 8.682500839233398, "rewards/rejected": -9.316878318786621, "step": 72350 }, { "epoch": 0.87, "learning_rate": 2.6781946730116664e-07, "logits/chosen": -2.9302725791931152, "logits/rejected": -2.3119781017303467, "logps/chosen": -140.755615234375, "logps/rejected": -885.7658081054688, "loss": 0.1151, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8581458926200867, "rewards/margins": 7.598082065582275, "rewards/rejected": -8.45622730255127, "step": 72360 }, { "epoch": 0.87, "learning_rate": 2.673492588463342e-07, "logits/chosen": -2.8398101329803467, "logits/rejected": -2.279625415802002, "logps/chosen": -102.35066223144531, "logps/rejected": -929.4561767578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5538989305496216, "rewards/margins": 8.358701705932617, "rewards/rejected": -8.91260051727295, "step": 72370 }, { "epoch": 0.87, "learning_rate": 2.668794402061195e-07, "logits/chosen": -2.8880579471588135, "logits/rejected": -2.3407044410705566, "logps/chosen": -101.414794921875, "logps/rejected": -960.1458129882812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.49262088537216187, "rewards/margins": 8.721540451049805, "rewards/rejected": -9.21416187286377, "step": 72380 }, { "epoch": 0.87, "learning_rate": 2.6641001146255257e-07, "logits/chosen": -2.8988070487976074, "logits/rejected": -2.4944605827331543, "logps/chosen": -90.7144775390625, "logps/rejected": -898.0167846679688, "loss": 0.0846, "rewards/accuracies": 1.0, "rewards/chosen": -0.5043137669563293, "rewards/margins": 8.085563659667969, "rewards/rejected": -8.589877128601074, "step": 72390 }, { "epoch": 0.87, "learning_rate": 2.6594097269759393e-07, "logits/chosen": -2.8833413124084473, "logits/rejected": -2.2369415760040283, "logps/chosen": -110.7470703125, "logps/rejected": -983.7379760742188, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.6243123412132263, "rewards/margins": 8.791374206542969, "rewards/rejected": -9.415687561035156, "step": 72400 }, { "epoch": 0.87, "learning_rate": 2.654723239931367e-07, "logits/chosen": -2.881077289581299, "logits/rejected": -2.428579092025757, "logps/chosen": -92.79920959472656, "logps/rejected": -912.6917114257812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4710715711116791, "rewards/margins": 8.272706031799316, "rewards/rejected": -8.743776321411133, "step": 72410 }, { "epoch": 0.87, "learning_rate": 2.650040654310057e-07, "logits/chosen": -2.8919310569763184, "logits/rejected": -2.3606035709381104, "logps/chosen": -111.79066467285156, "logps/rejected": -935.4969482421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6458468437194824, "rewards/margins": 8.320558547973633, "rewards/rejected": -8.966404914855957, "step": 72420 }, { "epoch": 0.87, "learning_rate": 2.6453619709295745e-07, "logits/chosen": -2.876342535018921, "logits/rejected": -2.3485217094421387, "logps/chosen": -106.78840637207031, "logps/rejected": -876.5660400390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5987459421157837, "rewards/margins": 7.7754364013671875, "rewards/rejected": -8.374181747436523, "step": 72430 }, { "epoch": 0.87, "learning_rate": 2.640687190606808e-07, "logits/chosen": -2.853288173675537, "logits/rejected": -2.1834187507629395, "logps/chosen": -125.63423919677734, "logps/rejected": -1015.04296875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7345770597457886, "rewards/margins": 9.025729179382324, "rewards/rejected": -9.760305404663086, "step": 72440 }, { "epoch": 0.87, "learning_rate": 2.636016314157952e-07, "logits/chosen": -2.90216326713562, "logits/rejected": -2.3060154914855957, "logps/chosen": -123.21711730957031, "logps/rejected": -998.70703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7299851179122925, "rewards/margins": 8.849327087402344, "rewards/rejected": -9.579312324523926, "step": 72450 }, { "epoch": 0.87, "learning_rate": 2.631349342398537e-07, "logits/chosen": -2.889796495437622, "logits/rejected": -2.3763720989227295, "logps/chosen": -102.51839447021484, "logps/rejected": -942.0714721679688, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": -0.5502869486808777, "rewards/margins": 8.476673126220703, "rewards/rejected": -9.026959419250488, "step": 72460 }, { "epoch": 0.87, "learning_rate": 2.6266862761433977e-07, "logits/chosen": -2.8139684200286865, "logits/rejected": -2.3621926307678223, "logps/chosen": -95.33122253417969, "logps/rejected": -1042.987060546875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5291212201118469, "rewards/margins": 9.513774871826172, "rewards/rejected": -10.04289722442627, "step": 72470 }, { "epoch": 0.87, "learning_rate": 2.6220271162066963e-07, "logits/chosen": -2.8803980350494385, "logits/rejected": -2.4055631160736084, "logps/chosen": -106.0936508178711, "logps/rejected": -890.1795043945312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5736191868782043, "rewards/margins": 7.945687294006348, "rewards/rejected": -8.519305229187012, "step": 72480 }, { "epoch": 0.87, "learning_rate": 2.61737186340191e-07, "logits/chosen": -2.896746873855591, "logits/rejected": -2.1694140434265137, "logps/chosen": -125.78904724121094, "logps/rejected": -945.9122924804688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7525679469108582, "rewards/margins": 8.313236236572266, "rewards/rejected": -9.065805435180664, "step": 72490 }, { "epoch": 0.87, "learning_rate": 2.6127205185418337e-07, "logits/chosen": -2.907803773880005, "logits/rejected": -2.278348445892334, "logps/chosen": -120.83646392822266, "logps/rejected": -892.7277221679688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6876375079154968, "rewards/margins": 7.849183559417725, "rewards/rejected": -8.536821365356445, "step": 72500 }, { "epoch": 0.87, "learning_rate": 2.6080730824385723e-07, "logits/chosen": -2.8767569065093994, "logits/rejected": -1.923784613609314, "logps/chosen": -131.65155029296875, "logps/rejected": -1148.6728515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7586622834205627, "rewards/margins": 10.31805419921875, "rewards/rejected": -11.076715469360352, "step": 72510 }, { "epoch": 0.87, "learning_rate": 2.603429555903561e-07, "logits/chosen": -2.894767999649048, "logits/rejected": -2.22224497795105, "logps/chosen": -121.83902740478516, "logps/rejected": -972.6220703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6341342329978943, "rewards/margins": 8.700326919555664, "rewards/rejected": -9.33446216583252, "step": 72520 }, { "epoch": 0.87, "learning_rate": 2.598789939747545e-07, "logits/chosen": -2.8567774295806885, "logits/rejected": -2.0016181468963623, "logps/chosen": -140.18785095214844, "logps/rejected": -1187.17333984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8085090517997742, "rewards/margins": 10.653961181640625, "rewards/rejected": -11.462471008300781, "step": 72530 }, { "epoch": 0.87, "learning_rate": 2.5941542347805883e-07, "logits/chosen": -2.8404393196105957, "logits/rejected": -2.3253002166748047, "logps/chosen": -118.43440246582031, "logps/rejected": -878.6510009765625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7035125494003296, "rewards/margins": 7.690320014953613, "rewards/rejected": -8.393832206726074, "step": 72540 }, { "epoch": 0.87, "learning_rate": 2.589522441812073e-07, "logits/chosen": -2.8635451793670654, "logits/rejected": -2.1631617546081543, "logps/chosen": -111.82341003417969, "logps/rejected": -886.2462158203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5960246324539185, "rewards/margins": 7.882132053375244, "rewards/rejected": -8.478156089782715, "step": 72550 }, { "epoch": 0.87, "learning_rate": 2.584894561650697e-07, "logits/chosen": -2.912524461746216, "logits/rejected": -2.4051942825317383, "logps/chosen": -117.59916687011719, "logps/rejected": -948.1900634765625, "loss": 0.0997, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7192775011062622, "rewards/margins": 8.374216079711914, "rewards/rejected": -9.093493461608887, "step": 72560 }, { "epoch": 0.87, "learning_rate": 2.580270595104481e-07, "logits/chosen": -2.8819832801818848, "logits/rejected": -2.3907628059387207, "logps/chosen": -106.69743347167969, "logps/rejected": -878.2579956054688, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.596269965171814, "rewards/margins": 7.793309211730957, "rewards/rejected": -8.389577865600586, "step": 72570 }, { "epoch": 0.87, "learning_rate": 2.5756505429807455e-07, "logits/chosen": -2.8698840141296387, "logits/rejected": -2.567812919616699, "logps/chosen": -88.02743530273438, "logps/rejected": -796.5051879882812, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.48569759726524353, "rewards/margins": 7.111806392669678, "rewards/rejected": -7.597502708435059, "step": 72580 }, { "epoch": 0.87, "learning_rate": 2.5710344060861427e-07, "logits/chosen": -2.8922009468078613, "logits/rejected": -2.2388722896575928, "logps/chosen": -174.26406860351562, "logps/rejected": -931.2146606445312, "loss": 0.1313, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1591116189956665, "rewards/margins": 7.7477569580078125, "rewards/rejected": -8.906867980957031, "step": 72590 }, { "epoch": 0.87, "learning_rate": 2.566422185226641e-07, "logits/chosen": -2.9237313270568848, "logits/rejected": -2.5818467140197754, "logps/chosen": -88.7587890625, "logps/rejected": -845.5103759765625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.43632927536964417, "rewards/margins": 7.637110710144043, "rewards/rejected": -8.073439598083496, "step": 72600 }, { "epoch": 0.87, "learning_rate": 2.561813881207517e-07, "logits/chosen": -2.860985279083252, "logits/rejected": -2.1071362495422363, "logps/chosen": -130.01853942871094, "logps/rejected": -1022.4302978515625, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": -0.7177618741989136, "rewards/margins": 9.104063987731934, "rewards/rejected": -9.821825981140137, "step": 72610 }, { "epoch": 0.87, "learning_rate": 2.557209494833371e-07, "logits/chosen": -2.9008500576019287, "logits/rejected": -2.2891736030578613, "logps/chosen": -108.87010192871094, "logps/rejected": -910.0988159179688, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.6264809370040894, "rewards/margins": 8.094025611877441, "rewards/rejected": -8.720505714416504, "step": 72620 }, { "epoch": 0.87, "learning_rate": 2.552609026908115e-07, "logits/chosen": -2.8903374671936035, "logits/rejected": -2.2815675735473633, "logps/chosen": -126.5000991821289, "logps/rejected": -1001.3546752929688, "loss": 0.075, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7668901681900024, "rewards/margins": 8.835836410522461, "rewards/rejected": -9.602727890014648, "step": 72630 }, { "epoch": 0.87, "learning_rate": 2.5480124782349846e-07, "logits/chosen": -2.875713348388672, "logits/rejected": -2.3490991592407227, "logps/chosen": -96.30670928955078, "logps/rejected": -871.7195434570312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5050432085990906, "rewards/margins": 7.828129768371582, "rewards/rejected": -8.333172798156738, "step": 72640 }, { "epoch": 0.87, "learning_rate": 2.5434198496165105e-07, "logits/chosen": -2.880821466445923, "logits/rejected": -2.451097249984741, "logps/chosen": -104.9956283569336, "logps/rejected": -903.2296752929688, "loss": 0.254, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5957919359207153, "rewards/margins": 8.052739143371582, "rewards/rejected": -8.648530960083008, "step": 72650 }, { "epoch": 0.87, "learning_rate": 2.5388311418545613e-07, "logits/chosen": -2.884967803955078, "logits/rejected": -2.367227077484131, "logps/chosen": -91.54879760742188, "logps/rejected": -903.4119262695312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.466693252325058, "rewards/margins": 8.183415412902832, "rewards/rejected": -8.650108337402344, "step": 72660 }, { "epoch": 0.87, "learning_rate": 2.534246355750311e-07, "logits/chosen": -2.9019529819488525, "logits/rejected": -2.3461174964904785, "logps/chosen": -91.70853424072266, "logps/rejected": -886.5159301757812, "loss": 0.0494, "rewards/accuracies": 1.0, "rewards/chosen": -0.5017756819725037, "rewards/margins": 7.979855537414551, "rewards/rejected": -8.481630325317383, "step": 72670 }, { "epoch": 0.87, "learning_rate": 2.529665492104252e-07, "logits/chosen": -2.8928451538085938, "logits/rejected": -2.6058788299560547, "logps/chosen": -68.29907989501953, "logps/rejected": -813.9754028320312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.3005372881889343, "rewards/margins": 7.4615607261657715, "rewards/rejected": -7.7620978355407715, "step": 72680 }, { "epoch": 0.87, "learning_rate": 2.5250885517161873e-07, "logits/chosen": -2.911496162414551, "logits/rejected": -2.181039333343506, "logps/chosen": -126.27912902832031, "logps/rejected": -1074.007568359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7624815702438354, "rewards/margins": 9.576857566833496, "rewards/rejected": -10.339339256286621, "step": 72690 }, { "epoch": 0.87, "learning_rate": 2.5205155353852455e-07, "logits/chosen": -2.875082015991211, "logits/rejected": -2.4342970848083496, "logps/chosen": -79.6257095336914, "logps/rejected": -881.1375122070312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.36593374609947205, "rewards/margins": 8.06279468536377, "rewards/rejected": -8.428728103637695, "step": 72700 }, { "epoch": 0.87, "learning_rate": 2.515946443909853e-07, "logits/chosen": -2.854975700378418, "logits/rejected": -2.0783562660217285, "logps/chosen": -133.66090393066406, "logps/rejected": -1096.9808349609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8280625343322754, "rewards/margins": 9.743447303771973, "rewards/rejected": -10.57150936126709, "step": 72710 }, { "epoch": 0.87, "learning_rate": 2.5113812780877654e-07, "logits/chosen": -2.8755111694335938, "logits/rejected": -2.368612289428711, "logps/chosen": -122.63740539550781, "logps/rejected": -859.5914916992188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7830926775932312, "rewards/margins": 7.408744812011719, "rewards/rejected": -8.1918363571167, "step": 72720 }, { "epoch": 0.87, "learning_rate": 2.5068200387160456e-07, "logits/chosen": -2.87450909614563, "logits/rejected": -2.4078197479248047, "logps/chosen": -108.99235534667969, "logps/rejected": -915.9656372070312, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6319150924682617, "rewards/margins": 8.129742622375488, "rewards/rejected": -8.76165771484375, "step": 72730 }, { "epoch": 0.87, "learning_rate": 2.502262726591076e-07, "logits/chosen": -2.8887367248535156, "logits/rejected": -2.5199832916259766, "logps/chosen": -95.7253646850586, "logps/rejected": -800.0872802734375, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.5579680800437927, "rewards/margins": 7.065737247467041, "rewards/rejected": -7.623705863952637, "step": 72740 }, { "epoch": 0.87, "learning_rate": 2.4977093425085505e-07, "logits/chosen": -2.861147403717041, "logits/rejected": -2.3759167194366455, "logps/chosen": -103.10200500488281, "logps/rejected": -929.9249877929688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6028879284858704, "rewards/margins": 8.303194046020508, "rewards/rejected": -8.906082153320312, "step": 72750 }, { "epoch": 0.87, "learning_rate": 2.4931598872634744e-07, "logits/chosen": -2.88936710357666, "logits/rejected": -2.3475475311279297, "logps/chosen": -105.98466491699219, "logps/rejected": -956.2152099609375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.5960540175437927, "rewards/margins": 8.56904125213623, "rewards/rejected": -9.165094375610352, "step": 72760 }, { "epoch": 0.87, "learning_rate": 2.488614361650174e-07, "logits/chosen": -2.943669319152832, "logits/rejected": -2.5170652866363525, "logps/chosen": -87.79861450195312, "logps/rejected": -864.83740234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.41713395714759827, "rewards/margins": 7.862113952636719, "rewards/rejected": -8.279248237609863, "step": 72770 }, { "epoch": 0.87, "learning_rate": 2.484072766462281e-07, "logits/chosen": -2.946141242980957, "logits/rejected": -2.419503688812256, "logps/chosen": -104.8613052368164, "logps/rejected": -858.7061767578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5959807634353638, "rewards/margins": 7.594236850738525, "rewards/rejected": -8.190217971801758, "step": 72780 }, { "epoch": 0.87, "learning_rate": 2.479535102492747e-07, "logits/chosen": -2.8207390308380127, "logits/rejected": -2.1456496715545654, "logps/chosen": -122.61808013916016, "logps/rejected": -1034.0511474609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6847017407417297, "rewards/margins": 9.259674072265625, "rewards/rejected": -9.944375991821289, "step": 72790 }, { "epoch": 0.87, "learning_rate": 2.4750013705338386e-07, "logits/chosen": -2.9436042308807373, "logits/rejected": -2.187161922454834, "logps/chosen": -127.714111328125, "logps/rejected": -1033.499755859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6932419538497925, "rewards/margins": 9.216575622558594, "rewards/rejected": -9.909818649291992, "step": 72800 }, { "epoch": 0.87, "learning_rate": 2.470471571377131e-07, "logits/chosen": -2.9168851375579834, "logits/rejected": -2.218364953994751, "logps/chosen": -122.15677642822266, "logps/rejected": -971.2448120117188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6724907159805298, "rewards/margins": 8.6366548538208, "rewards/rejected": -9.309146881103516, "step": 72810 }, { "epoch": 0.87, "learning_rate": 2.4659457058135163e-07, "logits/chosen": -2.860762357711792, "logits/rejected": -2.1101880073547363, "logps/chosen": -119.5765380859375, "logps/rejected": -948.2317504882812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6091048121452332, "rewards/margins": 8.466367721557617, "rewards/rejected": -9.075471878051758, "step": 72820 }, { "epoch": 0.87, "learning_rate": 2.4614237746331924e-07, "logits/chosen": -2.8623626232147217, "logits/rejected": -2.143998622894287, "logps/chosen": -131.56198120117188, "logps/rejected": -974.3723754882812, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7688871622085571, "rewards/margins": 8.562832832336426, "rewards/rejected": -9.331719398498535, "step": 72830 }, { "epoch": 0.87, "learning_rate": 2.456905778625679e-07, "logits/chosen": -2.8327622413635254, "logits/rejected": -2.2661967277526855, "logps/chosen": -122.72959899902344, "logps/rejected": -1044.668212890625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.706883430480957, "rewards/margins": 9.332192420959473, "rewards/rejected": -10.03907585144043, "step": 72840 }, { "epoch": 0.87, "learning_rate": 2.4523917185798053e-07, "logits/chosen": -2.8928282260894775, "logits/rejected": -2.494662046432495, "logps/chosen": -79.81189727783203, "logps/rejected": -815.2619018554688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3766304552555084, "rewards/margins": 7.409745693206787, "rewards/rejected": -7.786375522613525, "step": 72850 }, { "epoch": 0.87, "learning_rate": 2.447881595283713e-07, "logits/chosen": -2.8968441486358643, "logits/rejected": -2.4656946659088135, "logps/chosen": -99.19287872314453, "logps/rejected": -914.7022705078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5300337076187134, "rewards/margins": 8.238116264343262, "rewards/rejected": -8.768150329589844, "step": 72860 }, { "epoch": 0.87, "learning_rate": 2.4433754095248617e-07, "logits/chosen": -2.8949222564697266, "logits/rejected": -2.449296474456787, "logps/chosen": -86.07938385009766, "logps/rejected": -828.3211059570312, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.4411527216434479, "rewards/margins": 7.4669928550720215, "rewards/rejected": -7.908144474029541, "step": 72870 }, { "epoch": 0.87, "learning_rate": 2.4388731620900136e-07, "logits/chosen": -2.849696636199951, "logits/rejected": -2.13804292678833, "logps/chosen": -120.61366271972656, "logps/rejected": -966.44189453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7407598495483398, "rewards/margins": 8.519888877868652, "rewards/rejected": -9.260650634765625, "step": 72880 }, { "epoch": 0.87, "learning_rate": 2.434374853765256e-07, "logits/chosen": -2.8803486824035645, "logits/rejected": -2.5981783866882324, "logps/chosen": -61.886741638183594, "logps/rejected": -799.3587036132812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.26814013719558716, "rewards/margins": 7.36477518081665, "rewards/rejected": -7.632913112640381, "step": 72890 }, { "epoch": 0.87, "learning_rate": 2.42988048533597e-07, "logits/chosen": -2.843384027481079, "logits/rejected": -2.261744499206543, "logps/chosen": -113.95939636230469, "logps/rejected": -904.181640625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.6915680170059204, "rewards/margins": 7.951773166656494, "rewards/rejected": -8.643341064453125, "step": 72900 }, { "epoch": 0.87, "learning_rate": 2.4253900575868655e-07, "logits/chosen": -2.8718791007995605, "logits/rejected": -2.5830533504486084, "logps/chosen": -73.3221664428711, "logps/rejected": -749.3778076171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.32243067026138306, "rewards/margins": 6.809854030609131, "rewards/rejected": -7.132285118103027, "step": 72910 }, { "epoch": 0.87, "learning_rate": 2.420903571301958e-07, "logits/chosen": -2.8606441020965576, "logits/rejected": -2.3975653648376465, "logps/chosen": -112.0801773071289, "logps/rejected": -870.0013427734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6197420358657837, "rewards/margins": 7.688178062438965, "rewards/rejected": -8.307920455932617, "step": 72920 }, { "epoch": 0.87, "learning_rate": 2.416421027264579e-07, "logits/chosen": -2.931910991668701, "logits/rejected": -2.3712284564971924, "logps/chosen": -107.70936584472656, "logps/rejected": -955.4710693359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5852320194244385, "rewards/margins": 8.577756881713867, "rewards/rejected": -9.162988662719727, "step": 72930 }, { "epoch": 0.87, "learning_rate": 2.411942426257363e-07, "logits/chosen": -2.8984313011169434, "logits/rejected": -2.5340733528137207, "logps/chosen": -82.3561019897461, "logps/rejected": -847.1500244140625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.422060489654541, "rewards/margins": 7.665553092956543, "rewards/rejected": -8.087615013122559, "step": 72940 }, { "epoch": 0.87, "learning_rate": 2.407467769062269e-07, "logits/chosen": -2.851196765899658, "logits/rejected": -2.484501600265503, "logps/chosen": -93.5981216430664, "logps/rejected": -894.404296875, "loss": 0.0942, "rewards/accuracies": 1.0, "rewards/chosen": -0.47750210762023926, "rewards/margins": 8.093294143676758, "rewards/rejected": -8.570796012878418, "step": 72950 }, { "epoch": 0.87, "learning_rate": 2.402997056460549e-07, "logits/chosen": -2.901362895965576, "logits/rejected": -2.4170095920562744, "logps/chosen": -113.2480697631836, "logps/rejected": -940.6788940429688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6589824557304382, "rewards/margins": 8.353409767150879, "rewards/rejected": -9.012392044067383, "step": 72960 }, { "epoch": 0.87, "learning_rate": 2.398530289232784e-07, "logits/chosen": -2.852301597595215, "logits/rejected": -2.220688819885254, "logps/chosen": -121.8208999633789, "logps/rejected": -869.9908447265625, "loss": 0.0881, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7600879073143005, "rewards/margins": 7.541746616363525, "rewards/rejected": -8.301835060119629, "step": 72970 }, { "epoch": 0.87, "learning_rate": 2.394067468158856e-07, "logits/chosen": -2.882721185684204, "logits/rejected": -2.446075201034546, "logps/chosen": -97.89063262939453, "logps/rejected": -904.3215942382812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5279262661933899, "rewards/margins": 8.121284484863281, "rewards/rejected": -8.649211883544922, "step": 72980 }, { "epoch": 0.87, "learning_rate": 2.3896085940179644e-07, "logits/chosen": -2.8984761238098145, "logits/rejected": -2.2228386402130127, "logps/chosen": -131.16563415527344, "logps/rejected": -939.408203125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8195670247077942, "rewards/margins": 8.17712116241455, "rewards/rejected": -8.996687889099121, "step": 72990 }, { "epoch": 0.87, "learning_rate": 2.3851536675886134e-07, "logits/chosen": -2.893921375274658, "logits/rejected": -2.2531425952911377, "logps/chosen": -104.90252685546875, "logps/rejected": -928.4549560546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5574895143508911, "rewards/margins": 8.347139358520508, "rewards/rejected": -8.904627799987793, "step": 73000 }, { "epoch": 0.87, "learning_rate": 2.380702689648623e-07, "logits/chosen": -2.897564649581909, "logits/rejected": -2.106199264526367, "logps/chosen": -131.80615234375, "logps/rejected": -1029.3671875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8295024037361145, "rewards/margins": 9.07136344909668, "rewards/rejected": -9.900866508483887, "step": 73010 }, { "epoch": 0.87, "learning_rate": 2.3762556609751242e-07, "logits/chosen": -2.9298434257507324, "logits/rejected": -2.532426357269287, "logps/chosen": -92.80278015136719, "logps/rejected": -847.6370849609375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.48979195952415466, "rewards/margins": 7.615300178527832, "rewards/rejected": -8.10509204864502, "step": 73020 }, { "epoch": 0.87, "learning_rate": 2.3718125823445488e-07, "logits/chosen": -2.8711585998535156, "logits/rejected": -2.469823122024536, "logps/chosen": -83.90556335449219, "logps/rejected": -895.7282104492188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4414937496185303, "rewards/margins": 8.14540958404541, "rewards/rejected": -8.58690357208252, "step": 73030 }, { "epoch": 0.87, "learning_rate": 2.3673734545326482e-07, "logits/chosen": -2.8755316734313965, "logits/rejected": -2.223327159881592, "logps/chosen": -97.09767150878906, "logps/rejected": -890.7760620117188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.47467002272605896, "rewards/margins": 8.051987648010254, "rewards/rejected": -8.526658058166504, "step": 73040 }, { "epoch": 0.87, "learning_rate": 2.3629382783144833e-07, "logits/chosen": -2.844209909439087, "logits/rejected": -2.4323716163635254, "logps/chosen": -87.35418701171875, "logps/rejected": -862.3458251953125, "loss": 0.1008, "rewards/accuracies": 1.0, "rewards/chosen": -0.41452494263648987, "rewards/margins": 7.8101935386657715, "rewards/rejected": -8.224719047546387, "step": 73050 }, { "epoch": 0.87, "learning_rate": 2.3585070544644235e-07, "logits/chosen": -2.8591182231903076, "logits/rejected": -2.374258518218994, "logps/chosen": -90.47591400146484, "logps/rejected": -924.81787109375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4551795423030853, "rewards/margins": 8.401179313659668, "rewards/rejected": -8.856358528137207, "step": 73060 }, { "epoch": 0.87, "learning_rate": 2.3540797837561468e-07, "logits/chosen": -2.9083077907562256, "logits/rejected": -2.4469704627990723, "logps/chosen": -105.29750061035156, "logps/rejected": -892.4094848632812, "loss": 0.075, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5690131783485413, "rewards/margins": 7.970107078552246, "rewards/rejected": -8.5391206741333, "step": 73070 }, { "epoch": 0.87, "learning_rate": 2.3496564669626466e-07, "logits/chosen": -2.885957717895508, "logits/rejected": -2.60430908203125, "logps/chosen": -88.21510314941406, "logps/rejected": -775.3373413085938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4809935986995697, "rewards/margins": 6.898309230804443, "rewards/rejected": -7.379302024841309, "step": 73080 }, { "epoch": 0.87, "learning_rate": 2.3452371048562212e-07, "logits/chosen": -2.879809856414795, "logits/rejected": -2.3768868446350098, "logps/chosen": -96.98870849609375, "logps/rejected": -825.2041015625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5401221513748169, "rewards/margins": 7.335339546203613, "rewards/rejected": -7.875460147857666, "step": 73090 }, { "epoch": 0.88, "learning_rate": 2.3408216982084703e-07, "logits/chosen": -2.8616278171539307, "logits/rejected": -2.2744879722595215, "logps/chosen": -118.80611419677734, "logps/rejected": -945.3302001953125, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.7410396337509155, "rewards/margins": 8.309553146362305, "rewards/rejected": -9.050593376159668, "step": 73100 }, { "epoch": 0.88, "learning_rate": 2.336410247790319e-07, "logits/chosen": -2.8970789909362793, "logits/rejected": -2.5001440048217773, "logps/chosen": -89.27722930908203, "logps/rejected": -862.3937377929688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.49028831720352173, "rewards/margins": 7.75998067855835, "rewards/rejected": -8.250268936157227, "step": 73110 }, { "epoch": 0.88, "learning_rate": 2.3320027543719925e-07, "logits/chosen": -2.9172050952911377, "logits/rejected": -2.5237364768981934, "logps/chosen": -97.54600524902344, "logps/rejected": -859.2410278320312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5609147548675537, "rewards/margins": 7.6473388671875, "rewards/rejected": -8.208253860473633, "step": 73120 }, { "epoch": 0.88, "learning_rate": 2.3275992187230283e-07, "logits/chosen": -2.897763729095459, "logits/rejected": -2.262230396270752, "logps/chosen": -132.03378295898438, "logps/rejected": -860.3936767578125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8497712016105652, "rewards/margins": 7.3723344802856445, "rewards/rejected": -8.222105026245117, "step": 73130 }, { "epoch": 0.88, "learning_rate": 2.3231996416122728e-07, "logits/chosen": -2.8772220611572266, "logits/rejected": -2.0836801528930664, "logps/chosen": -126.4191665649414, "logps/rejected": -1043.160400390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7183213233947754, "rewards/margins": 9.301389694213867, "rewards/rejected": -10.019710540771484, "step": 73140 }, { "epoch": 0.88, "learning_rate": 2.3188040238078746e-07, "logits/chosen": -2.861516237258911, "logits/rejected": -2.1339612007141113, "logps/chosen": -126.37944030761719, "logps/rejected": -993.3279418945312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7292708158493042, "rewards/margins": 8.795677185058594, "rewards/rejected": -9.524947166442871, "step": 73150 }, { "epoch": 0.88, "learning_rate": 2.314412366077304e-07, "logits/chosen": -2.8766818046569824, "logits/rejected": -2.267688512802124, "logps/chosen": -104.97599029541016, "logps/rejected": -907.3269653320312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5680988430976868, "rewards/margins": 8.113449096679688, "rewards/rejected": -8.681547164916992, "step": 73160 }, { "epoch": 0.88, "learning_rate": 2.3100246691873245e-07, "logits/chosen": -2.8900532722473145, "logits/rejected": -2.282285213470459, "logps/chosen": -109.62522888183594, "logps/rejected": -958.580078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5326465964317322, "rewards/margins": 8.65356731414795, "rewards/rejected": -9.186213493347168, "step": 73170 }, { "epoch": 0.88, "learning_rate": 2.3056409339040132e-07, "logits/chosen": -2.8890230655670166, "logits/rejected": -2.4058542251586914, "logps/chosen": -88.67277526855469, "logps/rejected": -887.7197265625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.46949586272239685, "rewards/margins": 8.034262657165527, "rewards/rejected": -8.503759384155273, "step": 73180 }, { "epoch": 0.88, "learning_rate": 2.301261160992771e-07, "logits/chosen": -2.8849549293518066, "logits/rejected": -2.116105556488037, "logps/chosen": -118.8399658203125, "logps/rejected": -990.1710205078125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6102794408798218, "rewards/margins": 8.888096809387207, "rewards/rejected": -9.498376846313477, "step": 73190 }, { "epoch": 0.88, "learning_rate": 2.2968853512182837e-07, "logits/chosen": -2.893799304962158, "logits/rejected": -2.207228422164917, "logps/chosen": -122.8674087524414, "logps/rejected": -1067.572509765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6564868688583374, "rewards/margins": 9.604543685913086, "rewards/rejected": -10.261029243469238, "step": 73200 }, { "epoch": 0.88, "learning_rate": 2.2925135053445653e-07, "logits/chosen": -2.8588101863861084, "logits/rejected": -2.0115485191345215, "logps/chosen": -129.67361450195312, "logps/rejected": -1074.8428955078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6923465728759766, "rewards/margins": 9.651535034179688, "rewards/rejected": -10.34388256072998, "step": 73210 }, { "epoch": 0.88, "learning_rate": 2.2881456241349187e-07, "logits/chosen": -2.9036450386047363, "logits/rejected": -2.32474946975708, "logps/chosen": -101.27053833007812, "logps/rejected": -933.5921630859375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5630444288253784, "rewards/margins": 8.374310493469238, "rewards/rejected": -8.93735408782959, "step": 73220 }, { "epoch": 0.88, "learning_rate": 2.2837817083519647e-07, "logits/chosen": -2.8775343894958496, "logits/rejected": -2.15269136428833, "logps/chosen": -121.5613021850586, "logps/rejected": -1008.5928955078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.732143759727478, "rewards/margins": 8.944841384887695, "rewards/rejected": -9.676983833312988, "step": 73230 }, { "epoch": 0.88, "learning_rate": 2.279421758757633e-07, "logits/chosen": -2.8708133697509766, "logits/rejected": -2.237964153289795, "logps/chosen": -113.02763366699219, "logps/rejected": -959.68359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6504203081130981, "rewards/margins": 8.540966033935547, "rewards/rejected": -9.191386222839355, "step": 73240 }, { "epoch": 0.88, "learning_rate": 2.2750657761131583e-07, "logits/chosen": -2.8863558769226074, "logits/rejected": -2.4563803672790527, "logps/chosen": -89.15885925292969, "logps/rejected": -901.2523193359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4658084809780121, "rewards/margins": 8.159242630004883, "rewards/rejected": -8.62505054473877, "step": 73250 }, { "epoch": 0.88, "learning_rate": 2.270713761179083e-07, "logits/chosen": -2.844698667526245, "logits/rejected": -2.141456127166748, "logps/chosen": -105.9463119506836, "logps/rejected": -992.1004028320312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5266542434692383, "rewards/margins": 8.991363525390625, "rewards/rejected": -9.51801872253418, "step": 73260 }, { "epoch": 0.88, "learning_rate": 2.26636571471526e-07, "logits/chosen": -2.8885295391082764, "logits/rejected": -2.500492811203003, "logps/chosen": -87.96772003173828, "logps/rejected": -855.8536987304688, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": -0.46795886754989624, "rewards/margins": 7.702078342437744, "rewards/rejected": -8.170037269592285, "step": 73270 }, { "epoch": 0.88, "learning_rate": 2.2620216374808434e-07, "logits/chosen": -2.821044683456421, "logits/rejected": -2.2753708362579346, "logps/chosen": -98.57731628417969, "logps/rejected": -967.2619018554688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5124672651290894, "rewards/margins": 8.769350051879883, "rewards/rejected": -9.281817436218262, "step": 73280 }, { "epoch": 0.88, "learning_rate": 2.2576815302342931e-07, "logits/chosen": -2.8661880493164062, "logits/rejected": -2.129599094390869, "logps/chosen": -126.09809875488281, "logps/rejected": -971.9774169921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6934213042259216, "rewards/margins": 8.615400314331055, "rewards/rejected": -9.308821678161621, "step": 73290 }, { "epoch": 0.88, "learning_rate": 2.2533453937333832e-07, "logits/chosen": -2.848698854446411, "logits/rejected": -2.1088240146636963, "logps/chosen": -137.56344604492188, "logps/rejected": -1009.3040771484375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.843370795249939, "rewards/margins": 8.854925155639648, "rewards/rejected": -9.698297500610352, "step": 73300 }, { "epoch": 0.88, "learning_rate": 2.2490132287351947e-07, "logits/chosen": -2.867637872695923, "logits/rejected": -2.284273147583008, "logps/chosen": -130.23585510253906, "logps/rejected": -827.8819580078125, "loss": 0.067, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8171302080154419, "rewards/margins": 7.074765205383301, "rewards/rejected": -7.891895294189453, "step": 73310 }, { "epoch": 0.88, "learning_rate": 2.2446850359961054e-07, "logits/chosen": -2.8870902061462402, "logits/rejected": -2.3789823055267334, "logps/chosen": -91.52510070800781, "logps/rejected": -937.19091796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.453838586807251, "rewards/margins": 8.535604476928711, "rewards/rejected": -8.989444732666016, "step": 73320 }, { "epoch": 0.88, "learning_rate": 2.240360816271811e-07, "logits/chosen": -2.8872528076171875, "logits/rejected": -2.4137449264526367, "logps/chosen": -104.27783203125, "logps/rejected": -963.4271240234375, "loss": 0.0934, "rewards/accuracies": 1.0, "rewards/chosen": -0.5669050812721252, "rewards/margins": 8.669857025146484, "rewards/rejected": -9.236762046813965, "step": 73330 }, { "epoch": 0.88, "learning_rate": 2.2360405703173077e-07, "logits/chosen": -2.8622286319732666, "logits/rejected": -2.335352659225464, "logps/chosen": -89.16682434082031, "logps/rejected": -868.4353637695312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4285219609737396, "rewards/margins": 7.870769500732422, "rewards/rejected": -8.299291610717773, "step": 73340 }, { "epoch": 0.88, "learning_rate": 2.2317242988868947e-07, "logits/chosen": -2.8685243129730225, "logits/rejected": -2.3624966144561768, "logps/chosen": -96.53910827636719, "logps/rejected": -947.9066162109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5029894113540649, "rewards/margins": 8.598701477050781, "rewards/rejected": -9.101691246032715, "step": 73350 }, { "epoch": 0.88, "learning_rate": 2.2274120027341806e-07, "logits/chosen": -2.8741114139556885, "logits/rejected": -2.3281078338623047, "logps/chosen": -106.1601791381836, "logps/rejected": -1093.0537109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5698967576026917, "rewards/margins": 9.954652786254883, "rewards/rejected": -10.52454948425293, "step": 73360 }, { "epoch": 0.88, "learning_rate": 2.223103682612085e-07, "logits/chosen": -2.859867572784424, "logits/rejected": -2.5059139728546143, "logps/chosen": -158.49185180664062, "logps/rejected": -709.67578125, "loss": 0.3695, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1120030879974365, "rewards/margins": 5.625685214996338, "rewards/rejected": -6.7376885414123535, "step": 73370 }, { "epoch": 0.88, "learning_rate": 2.2187993392728256e-07, "logits/chosen": -2.892880916595459, "logits/rejected": -2.2236251831054688, "logps/chosen": -111.63276672363281, "logps/rejected": -958.3390502929688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6087959408760071, "rewards/margins": 8.582136154174805, "rewards/rejected": -9.190933227539062, "step": 73380 }, { "epoch": 0.88, "learning_rate": 2.2144989734679266e-07, "logits/chosen": -2.91573166847229, "logits/rejected": -2.349693775177002, "logps/chosen": -109.6047134399414, "logps/rejected": -959.2630004882812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6101435422897339, "rewards/margins": 8.600153923034668, "rewards/rejected": -9.210296630859375, "step": 73390 }, { "epoch": 0.88, "learning_rate": 2.2102025859482263e-07, "logits/chosen": -2.867248296737671, "logits/rejected": -2.196756362915039, "logps/chosen": -105.9020004272461, "logps/rejected": -993.2879028320312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5483813881874084, "rewards/margins": 8.990205764770508, "rewards/rejected": -9.538586616516113, "step": 73400 }, { "epoch": 0.88, "learning_rate": 2.205910177463863e-07, "logits/chosen": -2.8895504474639893, "logits/rejected": -2.430635929107666, "logps/chosen": -94.19282531738281, "logps/rejected": -881.1437377929688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4943709969520569, "rewards/margins": 7.938869476318359, "rewards/rejected": -8.43324089050293, "step": 73410 }, { "epoch": 0.88, "learning_rate": 2.2016217487642682e-07, "logits/chosen": -2.8497304916381836, "logits/rejected": -2.261599063873291, "logps/chosen": -112.77640533447266, "logps/rejected": -990.3304443359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6674857139587402, "rewards/margins": 8.842764854431152, "rewards/rejected": -9.51025104522705, "step": 73420 }, { "epoch": 0.88, "learning_rate": 2.1973373005981985e-07, "logits/chosen": -2.8676106929779053, "logits/rejected": -2.168755054473877, "logps/chosen": -118.7483139038086, "logps/rejected": -989.7459106445312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7017465829849243, "rewards/margins": 8.79001235961914, "rewards/rejected": -9.491758346557617, "step": 73430 }, { "epoch": 0.88, "learning_rate": 2.1930568337137025e-07, "logits/chosen": -2.8429296016693115, "logits/rejected": -2.3621416091918945, "logps/chosen": -106.62516784667969, "logps/rejected": -911.4990234375, "loss": 0.0834, "rewards/accuracies": 1.0, "rewards/chosen": -0.6011587977409363, "rewards/margins": 8.131121635437012, "rewards/rejected": -8.732280731201172, "step": 73440 }, { "epoch": 0.88, "learning_rate": 2.188780348858141e-07, "logits/chosen": -2.909400224685669, "logits/rejected": -2.357011318206787, "logps/chosen": -132.3505096435547, "logps/rejected": -954.3206787109375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.8715642690658569, "rewards/margins": 8.267990112304688, "rewards/rejected": -9.139554977416992, "step": 73450 }, { "epoch": 0.88, "learning_rate": 2.184507846778175e-07, "logits/chosen": -2.8571619987487793, "logits/rejected": -2.5156760215759277, "logps/chosen": -102.69146728515625, "logps/rejected": -794.5810546875, "loss": 0.1477, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6404932141304016, "rewards/margins": 6.929072380065918, "rewards/rejected": -7.569565773010254, "step": 73460 }, { "epoch": 0.88, "learning_rate": 2.1802393282197743e-07, "logits/chosen": -2.873711347579956, "logits/rejected": -2.4854323863983154, "logps/chosen": -128.88922119140625, "logps/rejected": -794.8209838867188, "loss": 0.2118, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8721638917922974, "rewards/margins": 6.7012529373168945, "rewards/rejected": -7.573416233062744, "step": 73470 }, { "epoch": 0.88, "learning_rate": 2.1759747939282066e-07, "logits/chosen": -2.8802692890167236, "logits/rejected": -2.334503650665283, "logps/chosen": -95.89506530761719, "logps/rejected": -932.2686767578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5034117698669434, "rewards/margins": 8.428447723388672, "rewards/rejected": -8.931859016418457, "step": 73480 }, { "epoch": 0.88, "learning_rate": 2.171714244648046e-07, "logits/chosen": -2.8563430309295654, "logits/rejected": -2.415959119796753, "logps/chosen": -95.27723693847656, "logps/rejected": -995.7717895507812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5347110629081726, "rewards/margins": 9.041141510009766, "rewards/rejected": -9.57585334777832, "step": 73490 }, { "epoch": 0.88, "learning_rate": 2.167457681123178e-07, "logits/chosen": -2.853217363357544, "logits/rejected": -2.1754708290100098, "logps/chosen": -121.97422790527344, "logps/rejected": -937.5126953125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7192881107330322, "rewards/margins": 8.234992980957031, "rewards/rejected": -8.954279899597168, "step": 73500 }, { "epoch": 0.88, "learning_rate": 2.1632051040967828e-07, "logits/chosen": -2.8114380836486816, "logits/rejected": -2.2980425357818604, "logps/chosen": -99.19984436035156, "logps/rejected": -937.8255615234375, "loss": 0.1529, "rewards/accuracies": 1.0, "rewards/chosen": -0.49651065468788147, "rewards/margins": 8.488809585571289, "rewards/rejected": -8.985319137573242, "step": 73510 }, { "epoch": 0.88, "learning_rate": 2.1589565143113474e-07, "logits/chosen": -2.9086711406707764, "logits/rejected": -2.261894464492798, "logps/chosen": -111.1257095336914, "logps/rejected": -1074.6351318359375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5803989171981812, "rewards/margins": 9.770179748535156, "rewards/rejected": -10.350578308105469, "step": 73520 }, { "epoch": 0.88, "learning_rate": 2.1547119125086695e-07, "logits/chosen": -2.929837465286255, "logits/rejected": -2.378570079803467, "logps/chosen": -99.63053131103516, "logps/rejected": -889.5987548828125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5815407037734985, "rewards/margins": 7.928702354431152, "rewards/rejected": -8.510244369506836, "step": 73530 }, { "epoch": 0.88, "learning_rate": 2.150471299429846e-07, "logits/chosen": -2.8820395469665527, "logits/rejected": -2.4933791160583496, "logps/chosen": -98.77439880371094, "logps/rejected": -861.7457275390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5387963652610779, "rewards/margins": 7.688735008239746, "rewards/rejected": -8.227532386779785, "step": 73540 }, { "epoch": 0.88, "learning_rate": 2.1462346758152674e-07, "logits/chosen": -2.922330379486084, "logits/rejected": -2.4586410522460938, "logps/chosen": -108.5342788696289, "logps/rejected": -979.6277465820312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6345723867416382, "rewards/margins": 8.764127731323242, "rewards/rejected": -9.398700714111328, "step": 73550 }, { "epoch": 0.88, "learning_rate": 2.1420020424046394e-07, "logits/chosen": -2.871887683868408, "logits/rejected": -2.3899383544921875, "logps/chosen": -96.4743423461914, "logps/rejected": -859.1588134765625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.48947674036026, "rewards/margins": 7.716090202331543, "rewards/rejected": -8.205568313598633, "step": 73560 }, { "epoch": 0.88, "learning_rate": 2.1377733999369682e-07, "logits/chosen": -2.860935926437378, "logits/rejected": -2.216245651245117, "logps/chosen": -131.2985382080078, "logps/rejected": -992.5499877929688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7976155281066895, "rewards/margins": 8.732102394104004, "rewards/rejected": -9.529717445373535, "step": 73570 }, { "epoch": 0.88, "learning_rate": 2.1335487491505685e-07, "logits/chosen": -2.9201111793518066, "logits/rejected": -2.307217597961426, "logps/chosen": -111.14627838134766, "logps/rejected": -890.3394775390625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6102232933044434, "rewards/margins": 7.905702114105225, "rewards/rejected": -8.515925407409668, "step": 73580 }, { "epoch": 0.88, "learning_rate": 2.129328090783045e-07, "logits/chosen": -2.876603364944458, "logits/rejected": -2.3848347663879395, "logps/chosen": -101.1187515258789, "logps/rejected": -936.7874145507812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5223644971847534, "rewards/margins": 8.449263572692871, "rewards/rejected": -8.97162914276123, "step": 73590 }, { "epoch": 0.88, "learning_rate": 2.1251114255713163e-07, "logits/chosen": -2.9069931507110596, "logits/rejected": -2.364896297454834, "logps/chosen": -109.1570053100586, "logps/rejected": -916.1466064453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.621286928653717, "rewards/margins": 8.15259838104248, "rewards/rejected": -8.773885726928711, "step": 73600 }, { "epoch": 0.88, "learning_rate": 2.1208987542516018e-07, "logits/chosen": -2.901869773864746, "logits/rejected": -2.3628337383270264, "logps/chosen": -102.00157165527344, "logps/rejected": -823.4007568359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5439273715019226, "rewards/margins": 7.315080165863037, "rewards/rejected": -7.859007358551025, "step": 73610 }, { "epoch": 0.88, "learning_rate": 2.116690077559422e-07, "logits/chosen": -2.8351545333862305, "logits/rejected": -2.3872668743133545, "logps/chosen": -98.6166000366211, "logps/rejected": -883.8203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5327851176261902, "rewards/margins": 7.925252437591553, "rewards/rejected": -8.458036422729492, "step": 73620 }, { "epoch": 0.88, "learning_rate": 2.1124853962295966e-07, "logits/chosen": -2.8844830989837646, "logits/rejected": -2.1518681049346924, "logps/chosen": -106.87510681152344, "logps/rejected": -912.67236328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5711180567741394, "rewards/margins": 8.147661209106445, "rewards/rejected": -8.718778610229492, "step": 73630 }, { "epoch": 0.88, "learning_rate": 2.108284710996253e-07, "logits/chosen": -2.8490560054779053, "logits/rejected": -2.505723237991333, "logps/chosen": -91.11393737792969, "logps/rejected": -862.5910034179688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5005461573600769, "rewards/margins": 7.746743202209473, "rewards/rejected": -8.247289657592773, "step": 73640 }, { "epoch": 0.88, "learning_rate": 2.1040880225928233e-07, "logits/chosen": -2.9074325561523438, "logits/rejected": -2.5308332443237305, "logps/chosen": -93.21504974365234, "logps/rejected": -841.1961059570312, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.49427732825279236, "rewards/margins": 7.538794040679932, "rewards/rejected": -8.033071517944336, "step": 73650 }, { "epoch": 0.88, "learning_rate": 2.099895331752036e-07, "logits/chosen": -2.885542392730713, "logits/rejected": -2.163879156112671, "logps/chosen": -130.04824829101562, "logps/rejected": -1025.5399169921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7394611239433289, "rewards/margins": 9.10766887664795, "rewards/rejected": -9.847129821777344, "step": 73660 }, { "epoch": 0.88, "learning_rate": 2.095706639205916e-07, "logits/chosen": -2.8904223442077637, "logits/rejected": -2.2042829990386963, "logps/chosen": -120.42622375488281, "logps/rejected": -896.5847778320312, "loss": 0.0881, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7333287596702576, "rewards/margins": 7.837287902832031, "rewards/rejected": -8.57061767578125, "step": 73670 }, { "epoch": 0.88, "learning_rate": 2.0915219456858067e-07, "logits/chosen": -2.8303909301757812, "logits/rejected": -2.3523054122924805, "logps/chosen": -113.4782485961914, "logps/rejected": -935.7255859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6664693355560303, "rewards/margins": 8.308571815490723, "rewards/rejected": -8.975040435791016, "step": 73680 }, { "epoch": 0.88, "learning_rate": 2.0873412519223368e-07, "logits/chosen": -2.833963632583618, "logits/rejected": -2.2587759494781494, "logps/chosen": -127.49131774902344, "logps/rejected": -980.0730590820312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7070215940475464, "rewards/margins": 8.705358505249023, "rewards/rejected": -9.412381172180176, "step": 73690 }, { "epoch": 0.88, "learning_rate": 2.083164558645448e-07, "logits/chosen": -2.922304153442383, "logits/rejected": -2.3330111503601074, "logps/chosen": -97.48866271972656, "logps/rejected": -923.2952880859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.46512261033058167, "rewards/margins": 8.382074356079102, "rewards/rejected": -8.847196578979492, "step": 73700 }, { "epoch": 0.88, "learning_rate": 2.078991866584379e-07, "logits/chosen": -2.8667914867401123, "logits/rejected": -2.413175106048584, "logps/chosen": -120.78865814208984, "logps/rejected": -903.1199951171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7704959511756897, "rewards/margins": 7.8687005043029785, "rewards/rejected": -8.639196395874023, "step": 73710 }, { "epoch": 0.88, "learning_rate": 2.074823176467669e-07, "logits/chosen": -2.8835129737854004, "logits/rejected": -2.468782901763916, "logps/chosen": -87.81844329833984, "logps/rejected": -903.5587158203125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.4531695246696472, "rewards/margins": 8.210494041442871, "rewards/rejected": -8.663663864135742, "step": 73720 }, { "epoch": 0.88, "learning_rate": 2.070658489023164e-07, "logits/chosen": -2.8760578632354736, "logits/rejected": -2.452601909637451, "logps/chosen": -105.75309753417969, "logps/rejected": -842.8363037109375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6132932901382446, "rewards/margins": 7.429052829742432, "rewards/rejected": -8.04234504699707, "step": 73730 }, { "epoch": 0.88, "learning_rate": 2.0664978049780016e-07, "logits/chosen": -2.818835973739624, "logits/rejected": -2.4395928382873535, "logps/chosen": -80.02653503417969, "logps/rejected": -826.6156005859375, "loss": 0.1749, "rewards/accuracies": 1.0, "rewards/chosen": -0.3898414075374603, "rewards/margins": 7.499495506286621, "rewards/rejected": -7.889337062835693, "step": 73740 }, { "epoch": 0.88, "learning_rate": 2.0623411250586256e-07, "logits/chosen": -2.9000232219696045, "logits/rejected": -2.3397531509399414, "logps/chosen": -98.39627838134766, "logps/rejected": -843.4464721679688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5267378091812134, "rewards/margins": 7.531842231750488, "rewards/rejected": -8.05858039855957, "step": 73750 }, { "epoch": 0.88, "learning_rate": 2.0581884499907834e-07, "logits/chosen": -2.8223319053649902, "logits/rejected": -2.4730684757232666, "logps/chosen": -76.33087921142578, "logps/rejected": -855.8203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.38495495915412903, "rewards/margins": 7.804884433746338, "rewards/rejected": -8.189840316772461, "step": 73760 }, { "epoch": 0.88, "learning_rate": 2.054039780499517e-07, "logits/chosen": -2.8758654594421387, "logits/rejected": -2.3802685737609863, "logps/chosen": -98.64318084716797, "logps/rejected": -874.42822265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5100600719451904, "rewards/margins": 7.857654571533203, "rewards/rejected": -8.367715835571289, "step": 73770 }, { "epoch": 0.88, "learning_rate": 2.049895117309178e-07, "logits/chosen": -2.836740016937256, "logits/rejected": -2.5004923343658447, "logps/chosen": -97.72679138183594, "logps/rejected": -873.7556762695312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5861592888832092, "rewards/margins": 7.7706122398376465, "rewards/rejected": -8.356772422790527, "step": 73780 }, { "epoch": 0.88, "learning_rate": 2.045754461143415e-07, "logits/chosen": -2.848175048828125, "logits/rejected": -2.271082878112793, "logps/chosen": -108.740966796875, "logps/rejected": -933.1905517578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5806916356086731, "rewards/margins": 8.355112075805664, "rewards/rejected": -8.93580436706543, "step": 73790 }, { "epoch": 0.88, "learning_rate": 2.0416178127251662e-07, "logits/chosen": -2.893152952194214, "logits/rejected": -2.1688225269317627, "logps/chosen": -117.8960189819336, "logps/rejected": -1118.952392578125, "loss": 0.104, "rewards/accuracies": 1.0, "rewards/chosen": -0.6190026998519897, "rewards/margins": 10.168131828308105, "rewards/rejected": -10.787134170532227, "step": 73800 }, { "epoch": 0.88, "learning_rate": 2.037485172776682e-07, "logits/chosen": -2.865544080734253, "logits/rejected": -2.5360679626464844, "logps/chosen": -79.985107421875, "logps/rejected": -802.3262329101562, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.41368213295936584, "rewards/margins": 7.240102291107178, "rewards/rejected": -7.653784275054932, "step": 73810 }, { "epoch": 0.88, "learning_rate": 2.03335654201951e-07, "logits/chosen": -2.8375115394592285, "logits/rejected": -2.1330487728118896, "logps/chosen": -117.40916442871094, "logps/rejected": -1092.032958984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6373661756515503, "rewards/margins": 9.87584114074707, "rewards/rejected": -10.513208389282227, "step": 73820 }, { "epoch": 0.88, "learning_rate": 2.0292319211744988e-07, "logits/chosen": -2.877686023712158, "logits/rejected": -2.2238802909851074, "logps/chosen": -134.878662109375, "logps/rejected": -1035.907958984375, "loss": 0.1504, "rewards/accuracies": 1.0, "rewards/chosen": -0.8259049654006958, "rewards/margins": 9.142513275146484, "rewards/rejected": -9.968420028686523, "step": 73830 }, { "epoch": 0.88, "learning_rate": 2.025111310961797e-07, "logits/chosen": -2.8551747798919678, "logits/rejected": -2.158285617828369, "logps/chosen": -114.38226318359375, "logps/rejected": -914.1370849609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6435477137565613, "rewards/margins": 8.108444213867188, "rewards/rejected": -8.751991271972656, "step": 73840 }, { "epoch": 0.88, "learning_rate": 2.0209947121008487e-07, "logits/chosen": -2.8967432975769043, "logits/rejected": -2.0260825157165527, "logps/chosen": -132.8555450439453, "logps/rejected": -1093.1126708984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7924435138702393, "rewards/margins": 9.724370956420898, "rewards/rejected": -10.516813278198242, "step": 73850 }, { "epoch": 0.88, "learning_rate": 2.0168821253104098e-07, "logits/chosen": -2.8713831901550293, "logits/rejected": -2.260782480239868, "logps/chosen": -145.2219696044922, "logps/rejected": -1043.926513671875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.9401365518569946, "rewards/margins": 9.088354110717773, "rewards/rejected": -10.028491973876953, "step": 73860 }, { "epoch": 0.88, "learning_rate": 2.012773551308511e-07, "logits/chosen": -2.9450743198394775, "logits/rejected": -2.553445816040039, "logps/chosen": -65.2916488647461, "logps/rejected": -847.7989501953125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.2662428915500641, "rewards/margins": 7.84459924697876, "rewards/rejected": -8.110841751098633, "step": 73870 }, { "epoch": 0.88, "learning_rate": 2.0086689908125068e-07, "logits/chosen": -2.8980300426483154, "logits/rejected": -2.549586772918701, "logps/chosen": -97.98737335205078, "logps/rejected": -844.9105224609375, "loss": 0.0517, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5689452290534973, "rewards/margins": 7.50390625, "rewards/rejected": -8.072851181030273, "step": 73880 }, { "epoch": 0.88, "learning_rate": 2.0045684445390428e-07, "logits/chosen": -2.9133288860321045, "logits/rejected": -2.158398389816284, "logps/chosen": -133.2089385986328, "logps/rejected": -1010.4365234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7693447470664978, "rewards/margins": 8.9290189743042, "rewards/rejected": -9.698363304138184, "step": 73890 }, { "epoch": 0.88, "learning_rate": 2.000471913204058e-07, "logits/chosen": -2.9468581676483154, "logits/rejected": -2.614854335784912, "logps/chosen": -83.85630798339844, "logps/rejected": -851.3616943359375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.44177061319351196, "rewards/margins": 7.683003902435303, "rewards/rejected": -8.124774932861328, "step": 73900 }, { "epoch": 0.88, "learning_rate": 1.9963793975228018e-07, "logits/chosen": -2.925255537033081, "logits/rejected": -2.2833826541900635, "logps/chosen": -110.0370101928711, "logps/rejected": -870.3878784179688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5973936319351196, "rewards/margins": 7.723193168640137, "rewards/rejected": -8.320586204528809, "step": 73910 }, { "epoch": 0.88, "learning_rate": 1.9922908982098115e-07, "logits/chosen": -2.789897918701172, "logits/rejected": -1.907334566116333, "logps/chosen": -136.0136260986328, "logps/rejected": -1138.13623046875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7807551622390747, "rewards/margins": 10.191807746887207, "rewards/rejected": -10.972561836242676, "step": 73920 }, { "epoch": 0.88, "learning_rate": 1.9882064159789356e-07, "logits/chosen": -2.866100788116455, "logits/rejected": -2.1056885719299316, "logps/chosen": -110.3084945678711, "logps/rejected": -1086.2032470703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6052355766296387, "rewards/margins": 9.856985092163086, "rewards/rejected": -10.462220191955566, "step": 73930 }, { "epoch": 0.89, "learning_rate": 1.9841259515433004e-07, "logits/chosen": -2.8992292881011963, "logits/rejected": -2.2342710494995117, "logps/chosen": -107.71546936035156, "logps/rejected": -1000.6783447265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.546245276927948, "rewards/margins": 9.064580917358398, "rewards/rejected": -9.61082649230957, "step": 73940 }, { "epoch": 0.89, "learning_rate": 1.9800495056153502e-07, "logits/chosen": -2.8794233798980713, "logits/rejected": -2.5031471252441406, "logps/chosen": -95.63236236572266, "logps/rejected": -908.9871826171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5135759115219116, "rewards/margins": 8.193313598632812, "rewards/rejected": -8.706890106201172, "step": 73950 }, { "epoch": 0.89, "learning_rate": 1.975977078906821e-07, "logits/chosen": -2.8637397289276123, "logits/rejected": -2.3359358310699463, "logps/chosen": -107.04219818115234, "logps/rejected": -926.99951171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6336251497268677, "rewards/margins": 8.25139045715332, "rewards/rejected": -8.885015487670898, "step": 73960 }, { "epoch": 0.89, "learning_rate": 1.97190867212875e-07, "logits/chosen": -2.9218785762786865, "logits/rejected": -2.189957618713379, "logps/chosen": -118.86235046386719, "logps/rejected": -952.8145751953125, "loss": 0.1206, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7128103971481323, "rewards/margins": 8.431364059448242, "rewards/rejected": -9.14417552947998, "step": 73970 }, { "epoch": 0.89, "learning_rate": 1.967844285991463e-07, "logits/chosen": -2.8946003913879395, "logits/rejected": -2.395750045776367, "logps/chosen": -90.16422271728516, "logps/rejected": -889.0859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.42444324493408203, "rewards/margins": 8.082934379577637, "rewards/rejected": -8.507377624511719, "step": 73980 }, { "epoch": 0.89, "learning_rate": 1.963783921204601e-07, "logits/chosen": -2.9377074241638184, "logits/rejected": -2.592712879180908, "logps/chosen": -81.0342025756836, "logps/rejected": -908.3841552734375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.3753553330898285, "rewards/margins": 8.332090377807617, "rewards/rejected": -8.70744514465332, "step": 73990 }, { "epoch": 0.89, "learning_rate": 1.959727578477083e-07, "logits/chosen": -2.8910937309265137, "logits/rejected": -2.5093281269073486, "logps/chosen": -76.85527038574219, "logps/rejected": -839.79296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3576640486717224, "rewards/margins": 7.66106653213501, "rewards/rejected": -8.018730163574219, "step": 74000 }, { "epoch": 0.89, "learning_rate": 1.9556752585171367e-07, "logits/chosen": -2.8920340538024902, "logits/rejected": -2.2524211406707764, "logps/chosen": -119.65409851074219, "logps/rejected": -972.765625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6825875043869019, "rewards/margins": 8.652588844299316, "rewards/rejected": -9.335175514221191, "step": 74010 }, { "epoch": 0.89, "learning_rate": 1.9516269620322827e-07, "logits/chosen": -2.943477153778076, "logits/rejected": -2.601046085357666, "logps/chosen": -81.240478515625, "logps/rejected": -887.3883056640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.39738184213638306, "rewards/margins": 8.095541000366211, "rewards/rejected": -8.492921829223633, "step": 74020 }, { "epoch": 0.89, "learning_rate": 1.947582689729352e-07, "logits/chosen": -2.855597972869873, "logits/rejected": -2.390676498413086, "logps/chosen": -80.50794982910156, "logps/rejected": -853.2984619140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.38484206795692444, "rewards/margins": 7.754547119140625, "rewards/rejected": -8.139389991760254, "step": 74030 }, { "epoch": 0.89, "learning_rate": 1.9435424423144583e-07, "logits/chosen": -2.873016595840454, "logits/rejected": -2.2084085941314697, "logps/chosen": -114.88330078125, "logps/rejected": -944.0650634765625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6414649486541748, "rewards/margins": 8.408510208129883, "rewards/rejected": -9.04997444152832, "step": 74040 }, { "epoch": 0.89, "learning_rate": 1.939506220493023e-07, "logits/chosen": -2.8996450901031494, "logits/rejected": -2.296393394470215, "logps/chosen": -119.09814453125, "logps/rejected": -1002.3541259765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7086290121078491, "rewards/margins": 8.894905090332031, "rewards/rejected": -9.603533744812012, "step": 74050 }, { "epoch": 0.89, "learning_rate": 1.9354740249697467e-07, "logits/chosen": -2.8960092067718506, "logits/rejected": -2.2854673862457275, "logps/chosen": -97.26935577392578, "logps/rejected": -965.6106567382812, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5244855880737305, "rewards/margins": 8.73322868347168, "rewards/rejected": -9.257715225219727, "step": 74060 }, { "epoch": 0.89, "learning_rate": 1.9314458564486493e-07, "logits/chosen": -2.8742833137512207, "logits/rejected": -2.458718776702881, "logps/chosen": -91.77250671386719, "logps/rejected": -844.3234252929688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.49906212091445923, "rewards/margins": 7.564430236816406, "rewards/rejected": -8.063491821289062, "step": 74070 }, { "epoch": 0.89, "learning_rate": 1.927421715633035e-07, "logits/chosen": -2.913081645965576, "logits/rejected": -2.473740816116333, "logps/chosen": -105.63856506347656, "logps/rejected": -957.9493408203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5866535902023315, "rewards/margins": 8.593828201293945, "rewards/rejected": -9.180481910705566, "step": 74080 }, { "epoch": 0.89, "learning_rate": 1.9234016032255053e-07, "logits/chosen": -2.868163585662842, "logits/rejected": -2.0816609859466553, "logps/chosen": -133.557861328125, "logps/rejected": -1171.71875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.7629349827766418, "rewards/margins": 10.543701171875, "rewards/rejected": -11.306635856628418, "step": 74090 }, { "epoch": 0.89, "learning_rate": 1.919385519927966e-07, "logits/chosen": -2.8627986907958984, "logits/rejected": -2.3415369987487793, "logps/chosen": -120.7628173828125, "logps/rejected": -979.9931640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7407098412513733, "rewards/margins": 8.665788650512695, "rewards/rejected": -9.406499862670898, "step": 74100 }, { "epoch": 0.89, "learning_rate": 1.915373466441614e-07, "logits/chosen": -2.8582160472869873, "logits/rejected": -2.2153234481811523, "logps/chosen": -119.36845397949219, "logps/rejected": -906.9054565429688, "loss": 0.0812, "rewards/accuracies": 1.0, "rewards/chosen": -0.727819561958313, "rewards/margins": 7.940154075622559, "rewards/rejected": -8.667974472045898, "step": 74110 }, { "epoch": 0.89, "learning_rate": 1.9113654434669364e-07, "logits/chosen": -2.8866353034973145, "logits/rejected": -2.234135389328003, "logps/chosen": -124.9475326538086, "logps/rejected": -955.4332275390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7292289137840271, "rewards/margins": 8.420659065246582, "rewards/rejected": -9.149887084960938, "step": 74120 }, { "epoch": 0.89, "learning_rate": 1.9073614517037265e-07, "logits/chosen": -2.831728458404541, "logits/rejected": -2.194307565689087, "logps/chosen": -127.1246109008789, "logps/rejected": -968.2742919921875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.7943310737609863, "rewards/margins": 8.499251365661621, "rewards/rejected": -9.293582916259766, "step": 74130 }, { "epoch": 0.89, "learning_rate": 1.903361491851069e-07, "logits/chosen": -2.865712881088257, "logits/rejected": -2.3742547035217285, "logps/chosen": -108.011962890625, "logps/rejected": -832.2628784179688, "loss": 0.1533, "rewards/accuracies": 1.0, "rewards/chosen": -0.6629406213760376, "rewards/margins": 7.2911858558654785, "rewards/rejected": -7.954125881195068, "step": 74140 }, { "epoch": 0.89, "learning_rate": 1.8993655646073472e-07, "logits/chosen": -2.9185073375701904, "logits/rejected": -2.3946194648742676, "logps/chosen": -108.42950439453125, "logps/rejected": -959.43115234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.546407163143158, "rewards/margins": 8.651196479797363, "rewards/rejected": -9.197602272033691, "step": 74150 }, { "epoch": 0.89, "learning_rate": 1.895373670670239e-07, "logits/chosen": -2.9075074195861816, "logits/rejected": -2.543290376663208, "logps/chosen": -81.67156982421875, "logps/rejected": -824.8612060546875, "loss": 0.0925, "rewards/accuracies": 1.0, "rewards/chosen": -0.35719436407089233, "rewards/margins": 7.516291618347168, "rewards/rejected": -7.873486518859863, "step": 74160 }, { "epoch": 0.89, "learning_rate": 1.8913858107367178e-07, "logits/chosen": -2.9042880535125732, "logits/rejected": -2.325535535812378, "logps/chosen": -101.6192855834961, "logps/rejected": -873.9953002929688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5484305620193481, "rewards/margins": 7.80334997177124, "rewards/rejected": -8.351780891418457, "step": 74170 }, { "epoch": 0.89, "learning_rate": 1.8874019855030567e-07, "logits/chosen": -2.9025399684906006, "logits/rejected": -2.6611855030059814, "logps/chosen": -87.83316040039062, "logps/rejected": -744.52197265625, "loss": 0.0967, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5096535086631775, "rewards/margins": 6.560386657714844, "rewards/rejected": -7.070040702819824, "step": 74180 }, { "epoch": 0.89, "learning_rate": 1.8834221956648108e-07, "logits/chosen": -2.8609070777893066, "logits/rejected": -2.33192777633667, "logps/chosen": -167.6279296875, "logps/rejected": -941.8092041015625, "loss": 0.2079, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.113065481185913, "rewards/margins": 7.8887457847595215, "rewards/rejected": -9.001811027526855, "step": 74190 }, { "epoch": 0.89, "learning_rate": 1.879446441916849e-07, "logits/chosen": -2.887421131134033, "logits/rejected": -2.481502056121826, "logps/chosen": -84.29987335205078, "logps/rejected": -877.5431518554688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.42340248823165894, "rewards/margins": 7.957598686218262, "rewards/rejected": -8.380999565124512, "step": 74200 }, { "epoch": 0.89, "learning_rate": 1.8754747249533219e-07, "logits/chosen": -2.8831002712249756, "logits/rejected": -2.3617053031921387, "logps/chosen": -93.91950988769531, "logps/rejected": -900.13916015625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5264866352081299, "rewards/margins": 8.08379077911377, "rewards/rejected": -8.61027717590332, "step": 74210 }, { "epoch": 0.89, "learning_rate": 1.87150704546768e-07, "logits/chosen": -2.883021116256714, "logits/rejected": -2.5892109870910645, "logps/chosen": -76.25202941894531, "logps/rejected": -860.9940185546875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3929899334907532, "rewards/margins": 7.834031581878662, "rewards/rejected": -8.227021217346191, "step": 74220 }, { "epoch": 0.89, "learning_rate": 1.8675434041526745e-07, "logits/chosen": -2.8329203128814697, "logits/rejected": -2.323059558868408, "logps/chosen": -127.67857360839844, "logps/rejected": -937.7154541015625, "loss": 0.1502, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8128799200057983, "rewards/margins": 8.17276668548584, "rewards/rejected": -8.985647201538086, "step": 74230 }, { "epoch": 0.89, "learning_rate": 1.8635838017003417e-07, "logits/chosen": -2.891418933868408, "logits/rejected": -2.193233013153076, "logps/chosen": -129.92189025878906, "logps/rejected": -1015.05517578125, "loss": 0.0959, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7760143280029297, "rewards/margins": 8.981103897094727, "rewards/rejected": -9.75711727142334, "step": 74240 }, { "epoch": 0.89, "learning_rate": 1.8596282388020247e-07, "logits/chosen": -2.851254463195801, "logits/rejected": -2.2594094276428223, "logps/chosen": -106.91435241699219, "logps/rejected": -1035.06689453125, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -0.6078232526779175, "rewards/margins": 9.34889030456543, "rewards/rejected": -9.956713676452637, "step": 74250 }, { "epoch": 0.89, "learning_rate": 1.8556767161483412e-07, "logits/chosen": -2.8632893562316895, "logits/rejected": -2.2658536434173584, "logps/chosen": -100.68263244628906, "logps/rejected": -910.1708984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5426512956619263, "rewards/margins": 8.168272018432617, "rewards/rejected": -8.710922241210938, "step": 74260 }, { "epoch": 0.89, "learning_rate": 1.8517292344292226e-07, "logits/chosen": -2.9037365913391113, "logits/rejected": -2.3891825675964355, "logps/chosen": -107.65177154541016, "logps/rejected": -962.2283935546875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5598634481430054, "rewards/margins": 8.6693696975708, "rewards/rejected": -9.229233741760254, "step": 74270 }, { "epoch": 0.89, "learning_rate": 1.8477857943338896e-07, "logits/chosen": -2.8987693786621094, "logits/rejected": -2.4280784130096436, "logps/chosen": -94.62371063232422, "logps/rejected": -861.8115234375, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": -0.5206429362297058, "rewards/margins": 7.724989414215088, "rewards/rejected": -8.245631217956543, "step": 74280 }, { "epoch": 0.89, "learning_rate": 1.8438463965508523e-07, "logits/chosen": -2.932542562484741, "logits/rejected": -2.5227012634277344, "logps/chosen": -89.9182357788086, "logps/rejected": -898.3126220703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4949184060096741, "rewards/margins": 8.095937728881836, "rewards/rejected": -8.590856552124023, "step": 74290 }, { "epoch": 0.89, "learning_rate": 1.8399110417679222e-07, "logits/chosen": -2.853604793548584, "logits/rejected": -1.998289704322815, "logps/chosen": -109.82438659667969, "logps/rejected": -962.4049682617188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5880102515220642, "rewards/margins": 8.629338264465332, "rewards/rejected": -9.217348098754883, "step": 74300 }, { "epoch": 0.89, "learning_rate": 1.8359797306722023e-07, "logits/chosen": -2.856480121612549, "logits/rejected": -2.250962257385254, "logps/chosen": -97.7197494506836, "logps/rejected": -933.8455200195312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5140827298164368, "rewards/margins": 8.431882858276367, "rewards/rejected": -8.945966720581055, "step": 74310 }, { "epoch": 0.89, "learning_rate": 1.8320524639500847e-07, "logits/chosen": -2.857020139694214, "logits/rejected": -2.056082248687744, "logps/chosen": -116.7503433227539, "logps/rejected": -1052.912353515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6369501948356628, "rewards/margins": 9.48419189453125, "rewards/rejected": -10.121143341064453, "step": 74320 }, { "epoch": 0.89, "learning_rate": 1.8281292422872576e-07, "logits/chosen": -2.8946762084960938, "logits/rejected": -2.513158082962036, "logps/chosen": -85.01533508300781, "logps/rejected": -896.5382690429688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4403678774833679, "rewards/margins": 8.142048835754395, "rewards/rejected": -8.582414627075195, "step": 74330 }, { "epoch": 0.89, "learning_rate": 1.8242100663687117e-07, "logits/chosen": -2.902141571044922, "logits/rejected": -2.384938955307007, "logps/chosen": -107.4865493774414, "logps/rejected": -918.0169677734375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.6467085480690002, "rewards/margins": 8.147897720336914, "rewards/rejected": -8.794607162475586, "step": 74340 }, { "epoch": 0.89, "learning_rate": 1.8202949368787165e-07, "logits/chosen": -2.894495964050293, "logits/rejected": -2.521348714828491, "logps/chosen": -100.81831359863281, "logps/rejected": -864.52783203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5624122023582458, "rewards/margins": 7.701437473297119, "rewards/rejected": -8.263849258422852, "step": 74350 }, { "epoch": 0.89, "learning_rate": 1.8163838545008472e-07, "logits/chosen": -2.8718667030334473, "logits/rejected": -2.175318717956543, "logps/chosen": -120.49542236328125, "logps/rejected": -990.7174072265625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6806521415710449, "rewards/margins": 8.819883346557617, "rewards/rejected": -9.50053596496582, "step": 74360 }, { "epoch": 0.89, "learning_rate": 1.8124768199179688e-07, "logits/chosen": -2.8797969818115234, "logits/rejected": -2.4140522480010986, "logps/chosen": -106.8183822631836, "logps/rejected": -917.1941528320312, "loss": 0.0832, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5799452662467957, "rewards/margins": 8.213445663452148, "rewards/rejected": -8.793391227722168, "step": 74370 }, { "epoch": 0.89, "learning_rate": 1.8085738338122383e-07, "logits/chosen": -2.863844394683838, "logits/rejected": -2.2965354919433594, "logps/chosen": -114.14764404296875, "logps/rejected": -961.3902587890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.699400782585144, "rewards/margins": 8.518794059753418, "rewards/rejected": -9.218194961547852, "step": 74380 }, { "epoch": 0.89, "learning_rate": 1.8046748968651022e-07, "logits/chosen": -2.8922340869903564, "logits/rejected": -2.3775219917297363, "logps/chosen": -90.86106872558594, "logps/rejected": -959.3681640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.45418041944503784, "rewards/margins": 8.762642860412598, "rewards/rejected": -9.216822624206543, "step": 74390 }, { "epoch": 0.89, "learning_rate": 1.800780009757308e-07, "logits/chosen": -2.918565511703491, "logits/rejected": -2.495692014694214, "logps/chosen": -78.40204620361328, "logps/rejected": -769.0745849609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.3626593053340912, "rewards/margins": 6.967672824859619, "rewards/rejected": -7.330332279205322, "step": 74400 }, { "epoch": 0.89, "learning_rate": 1.796889173168892e-07, "logits/chosen": -2.868041515350342, "logits/rejected": -2.2097606658935547, "logps/chosen": -107.9864273071289, "logps/rejected": -937.0029296875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6006268262863159, "rewards/margins": 8.390236854553223, "rewards/rejected": -8.990863800048828, "step": 74410 }, { "epoch": 0.89, "learning_rate": 1.79300238777918e-07, "logits/chosen": -2.906402826309204, "logits/rejected": -2.4663634300231934, "logps/chosen": -100.72401428222656, "logps/rejected": -918.29345703125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5471414923667908, "rewards/margins": 8.257102966308594, "rewards/rejected": -8.804244995117188, "step": 74420 }, { "epoch": 0.89, "learning_rate": 1.7891196542667965e-07, "logits/chosen": -2.8896164894104004, "logits/rejected": -2.2231693267822266, "logps/chosen": -122.05940246582031, "logps/rejected": -991.5244140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6804989576339722, "rewards/margins": 8.836522102355957, "rewards/rejected": -9.517021179199219, "step": 74430 }, { "epoch": 0.89, "learning_rate": 1.785240973309657e-07, "logits/chosen": -2.9364216327667236, "logits/rejected": -2.431499481201172, "logps/chosen": -117.6156997680664, "logps/rejected": -880.6554565429688, "loss": 0.1116, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7271224856376648, "rewards/margins": 7.696700096130371, "rewards/rejected": -8.423822402954102, "step": 74440 }, { "epoch": 0.89, "learning_rate": 1.7813663455849673e-07, "logits/chosen": -2.867999315261841, "logits/rejected": -2.4062013626098633, "logps/chosen": -91.13489532470703, "logps/rejected": -873.8748779296875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.4417497515678406, "rewards/margins": 7.914458274841309, "rewards/rejected": -8.356208801269531, "step": 74450 }, { "epoch": 0.89, "learning_rate": 1.7774957717692249e-07, "logits/chosen": -2.8491222858428955, "logits/rejected": -2.2017886638641357, "logps/chosen": -121.078857421875, "logps/rejected": -981.7639770507812, "loss": 0.0321, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7435177564620972, "rewards/margins": 8.683196067810059, "rewards/rejected": -9.426713943481445, "step": 74460 }, { "epoch": 0.89, "learning_rate": 1.7736292525382255e-07, "logits/chosen": -2.8579275608062744, "logits/rejected": -2.3013205528259277, "logps/chosen": -101.57305145263672, "logps/rejected": -952.9251708984375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.5373435020446777, "rewards/margins": 8.618351936340332, "rewards/rejected": -9.155694961547852, "step": 74470 }, { "epoch": 0.89, "learning_rate": 1.7697667885670483e-07, "logits/chosen": -2.885319232940674, "logits/rejected": -2.5338568687438965, "logps/chosen": -74.7059326171875, "logps/rejected": -844.2139892578125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.36123818159103394, "rewards/margins": 7.717149257659912, "rewards/rejected": -8.078387260437012, "step": 74480 }, { "epoch": 0.89, "learning_rate": 1.7659083805300707e-07, "logits/chosen": -2.8654568195343018, "logits/rejected": -2.1031479835510254, "logps/chosen": -145.1419219970703, "logps/rejected": -1128.4298095703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8353842496871948, "rewards/margins": 10.026629447937012, "rewards/rejected": -10.86201286315918, "step": 74490 }, { "epoch": 0.89, "learning_rate": 1.7620540291009652e-07, "logits/chosen": -2.8867859840393066, "logits/rejected": -2.4128880500793457, "logps/chosen": -88.37442779541016, "logps/rejected": -887.8773193359375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.4540330469608307, "rewards/margins": 8.019200325012207, "rewards/rejected": -8.473234176635742, "step": 74500 }, { "epoch": 0.89, "learning_rate": 1.758203734952682e-07, "logits/chosen": -2.8810935020446777, "logits/rejected": -2.2696540355682373, "logps/chosen": -116.05964660644531, "logps/rejected": -982.5731201171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6403197646141052, "rewards/margins": 8.781229019165039, "rewards/rejected": -9.421547889709473, "step": 74510 }, { "epoch": 0.89, "learning_rate": 1.7543574987574752e-07, "logits/chosen": -2.8583502769470215, "logits/rejected": -2.195819139480591, "logps/chosen": -104.25306701660156, "logps/rejected": -1025.884521484375, "loss": 0.125, "rewards/accuracies": 1.0, "rewards/chosen": -0.575276255607605, "rewards/margins": 9.299447059631348, "rewards/rejected": -9.874723434448242, "step": 74520 }, { "epoch": 0.89, "learning_rate": 1.7505153211868853e-07, "logits/chosen": -2.84157133102417, "logits/rejected": -2.2780938148498535, "logps/chosen": -119.7025375366211, "logps/rejected": -994.1735229492188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.720004141330719, "rewards/margins": 8.820121765136719, "rewards/rejected": -9.540125846862793, "step": 74530 }, { "epoch": 0.89, "learning_rate": 1.746677202911748e-07, "logits/chosen": -2.891226053237915, "logits/rejected": -2.18538761138916, "logps/chosen": -114.4725570678711, "logps/rejected": -1017.8991088867188, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.6185621023178101, "rewards/margins": 9.151789665222168, "rewards/rejected": -9.77035140991211, "step": 74540 }, { "epoch": 0.89, "learning_rate": 1.742843144602188e-07, "logits/chosen": -2.866875410079956, "logits/rejected": -2.3583931922912598, "logps/chosen": -82.32133483886719, "logps/rejected": -874.2360229492188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3876190483570099, "rewards/margins": 7.973931789398193, "rewards/rejected": -8.361551284790039, "step": 74550 }, { "epoch": 0.89, "learning_rate": 1.7390131469276206e-07, "logits/chosen": -2.903529405593872, "logits/rejected": -2.4681763648986816, "logps/chosen": -111.46250915527344, "logps/rejected": -878.18798828125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.6401311159133911, "rewards/margins": 7.76798152923584, "rewards/rejected": -8.408113479614258, "step": 74560 }, { "epoch": 0.89, "learning_rate": 1.7351872105567603e-07, "logits/chosen": -2.873689651489258, "logits/rejected": -2.379329204559326, "logps/chosen": -108.62286376953125, "logps/rejected": -852.5208129882812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6406583786010742, "rewards/margins": 7.489445686340332, "rewards/rejected": -8.13010311126709, "step": 74570 }, { "epoch": 0.89, "learning_rate": 1.7313653361575894e-07, "logits/chosen": -2.8724253177642822, "logits/rejected": -2.069387435913086, "logps/chosen": -149.70840454101562, "logps/rejected": -1141.2388916015625, "loss": 0.0836, "rewards/accuracies": 1.0, "rewards/chosen": -0.9144997596740723, "rewards/margins": 10.065845489501953, "rewards/rejected": -10.980344772338867, "step": 74580 }, { "epoch": 0.89, "learning_rate": 1.7275475243974077e-07, "logits/chosen": -2.878528118133545, "logits/rejected": -2.3366494178771973, "logps/chosen": -103.3931884765625, "logps/rejected": -882.80810546875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5783417224884033, "rewards/margins": 7.868855953216553, "rewards/rejected": -8.447198867797852, "step": 74590 }, { "epoch": 0.89, "learning_rate": 1.7237337759427903e-07, "logits/chosen": -2.8917770385742188, "logits/rejected": -2.332921266555786, "logps/chosen": -101.12984466552734, "logps/rejected": -888.7879028320312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5133441686630249, "rewards/margins": 7.995095729827881, "rewards/rejected": -8.508440017700195, "step": 74600 }, { "epoch": 0.89, "learning_rate": 1.7199240914596098e-07, "logits/chosen": -2.8254904747009277, "logits/rejected": -1.9466819763183594, "logps/chosen": -114.87025451660156, "logps/rejected": -956.3743286132812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5751566886901855, "rewards/margins": 8.608224868774414, "rewards/rejected": -9.183382034301758, "step": 74610 }, { "epoch": 0.89, "learning_rate": 1.7161184716130235e-07, "logits/chosen": -2.8579325675964355, "logits/rejected": -2.339393138885498, "logps/chosen": -101.53772735595703, "logps/rejected": -968.6776123046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5336045026779175, "rewards/margins": 8.761518478393555, "rewards/rejected": -9.295124053955078, "step": 74620 }, { "epoch": 0.89, "learning_rate": 1.7123169170674886e-07, "logits/chosen": -2.8392627239227295, "logits/rejected": -2.3911616802215576, "logps/chosen": -103.30712890625, "logps/rejected": -889.22265625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.5982283353805542, "rewards/margins": 7.9157257080078125, "rewards/rejected": -8.51395320892334, "step": 74630 }, { "epoch": 0.89, "learning_rate": 1.7085194284867378e-07, "logits/chosen": -2.8993401527404785, "logits/rejected": -2.4914400577545166, "logps/chosen": -99.76527404785156, "logps/rejected": -903.58447265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5558421611785889, "rewards/margins": 8.092556953430176, "rewards/rejected": -8.648399353027344, "step": 74640 }, { "epoch": 0.89, "learning_rate": 1.7047260065338051e-07, "logits/chosen": -2.8984344005584717, "logits/rejected": -2.4649085998535156, "logps/chosen": -88.89616394042969, "logps/rejected": -851.1270751953125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.44711384177207947, "rewards/margins": 7.686448097229004, "rewards/rejected": -8.133562088012695, "step": 74650 }, { "epoch": 0.89, "learning_rate": 1.700936651871013e-07, "logits/chosen": -2.8887360095977783, "logits/rejected": -2.2698466777801514, "logps/chosen": -99.06602478027344, "logps/rejected": -898.97021484375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.5387272238731384, "rewards/margins": 8.05693531036377, "rewards/rejected": -8.595663070678711, "step": 74660 }, { "epoch": 0.89, "learning_rate": 1.6971513651599685e-07, "logits/chosen": -2.9086670875549316, "logits/rejected": -2.4847264289855957, "logps/chosen": -74.72987365722656, "logps/rejected": -869.9049072265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.3245411515235901, "rewards/margins": 7.990912437438965, "rewards/rejected": -8.315452575683594, "step": 74670 }, { "epoch": 0.89, "learning_rate": 1.6933701470615766e-07, "logits/chosen": -2.9222497940063477, "logits/rejected": -2.0804781913757324, "logps/chosen": -138.73635864257812, "logps/rejected": -1121.0858154296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.798845648765564, "rewards/margins": 9.982274055480957, "rewards/rejected": -10.781118392944336, "step": 74680 }, { "epoch": 0.89, "learning_rate": 1.6895929982360253e-07, "logits/chosen": -2.8400914669036865, "logits/rejected": -2.5558855533599854, "logps/chosen": -88.87527465820312, "logps/rejected": -760.2061767578125, "loss": 0.0949, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5406617522239685, "rewards/margins": 6.698750972747803, "rewards/rejected": -7.239412784576416, "step": 74690 }, { "epoch": 0.89, "learning_rate": 1.6858199193427987e-07, "logits/chosen": -2.900315761566162, "logits/rejected": -2.440432548522949, "logps/chosen": -90.31974792480469, "logps/rejected": -832.7359619140625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.49325379729270935, "rewards/margins": 7.453449249267578, "rewards/rejected": -7.946702480316162, "step": 74700 }, { "epoch": 0.89, "learning_rate": 1.6820509110406586e-07, "logits/chosen": -2.9115028381347656, "logits/rejected": -2.449660062789917, "logps/chosen": -95.33467864990234, "logps/rejected": -902.2630615234375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.4986537992954254, "rewards/margins": 8.150650978088379, "rewards/rejected": -8.649304389953613, "step": 74710 }, { "epoch": 0.89, "learning_rate": 1.6782859739876673e-07, "logits/chosen": -2.9075875282287598, "logits/rejected": -2.658282995223999, "logps/chosen": -59.250640869140625, "logps/rejected": -831.1795043945312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.22011248767375946, "rewards/margins": 7.713118553161621, "rewards/rejected": -7.933230400085449, "step": 74720 }, { "epoch": 0.89, "learning_rate": 1.6745251088411685e-07, "logits/chosen": -2.813936710357666, "logits/rejected": -2.3700077533721924, "logps/chosen": -96.38768005371094, "logps/rejected": -842.4107666015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5350126624107361, "rewards/margins": 7.518532752990723, "rewards/rejected": -8.053544998168945, "step": 74730 }, { "epoch": 0.89, "learning_rate": 1.6707683162578037e-07, "logits/chosen": -2.916797399520874, "logits/rejected": -2.583118200302124, "logps/chosen": -88.08678436279297, "logps/rejected": -857.9962768554688, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.4359658658504486, "rewards/margins": 7.767953395843506, "rewards/rejected": -8.203919410705566, "step": 74740 }, { "epoch": 0.89, "learning_rate": 1.6670155968934953e-07, "logits/chosen": -2.8953471183776855, "logits/rejected": -2.2597458362579346, "logps/chosen": -116.41950988769531, "logps/rejected": -978.4172973632812, "loss": 0.0801, "rewards/accuracies": 1.0, "rewards/chosen": -0.6740066409111023, "rewards/margins": 8.711849212646484, "rewards/rejected": -9.385855674743652, "step": 74750 }, { "epoch": 0.89, "learning_rate": 1.6632669514034612e-07, "logits/chosen": -2.9119973182678223, "logits/rejected": -2.391430616378784, "logps/chosen": -97.37742614746094, "logps/rejected": -961.8820190429688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5626426339149475, "rewards/margins": 8.673101425170898, "rewards/rejected": -9.235745429992676, "step": 74760 }, { "epoch": 0.9, "learning_rate": 1.659522380442205e-07, "logits/chosen": -2.9245002269744873, "logits/rejected": -2.4955806732177734, "logps/chosen": -76.30552673339844, "logps/rejected": -840.8772583007812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3358429968357086, "rewards/margins": 7.684399604797363, "rewards/rejected": -8.020241737365723, "step": 74770 }, { "epoch": 0.9, "learning_rate": 1.6557818846635127e-07, "logits/chosen": -2.9119815826416016, "logits/rejected": -2.568267822265625, "logps/chosen": -71.33949279785156, "logps/rejected": -887.1627197265625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.30008286237716675, "rewards/margins": 8.189878463745117, "rewards/rejected": -8.489960670471191, "step": 74780 }, { "epoch": 0.9, "learning_rate": 1.652045464720467e-07, "logits/chosen": -2.908076524734497, "logits/rejected": -2.054560899734497, "logps/chosen": -122.8364486694336, "logps/rejected": -1080.4046630859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6806968450546265, "rewards/margins": 9.729293823242188, "rewards/rejected": -10.409990310668945, "step": 74790 }, { "epoch": 0.9, "learning_rate": 1.648313121265438e-07, "logits/chosen": -2.847733974456787, "logits/rejected": -2.160492420196533, "logps/chosen": -119.69112396240234, "logps/rejected": -991.93408203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7083562016487122, "rewards/margins": 8.824419021606445, "rewards/rejected": -9.532773971557617, "step": 74800 }, { "epoch": 0.9, "learning_rate": 1.6445848549500821e-07, "logits/chosen": -2.788926601409912, "logits/rejected": -2.262709140777588, "logps/chosen": -113.36578369140625, "logps/rejected": -889.50830078125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6501267552375793, "rewards/margins": 7.8722405433654785, "rewards/rejected": -8.522367477416992, "step": 74810 }, { "epoch": 0.9, "learning_rate": 1.6408606664253395e-07, "logits/chosen": -2.88568115234375, "logits/rejected": -2.2268669605255127, "logps/chosen": -111.3783187866211, "logps/rejected": -920.861328125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6244170069694519, "rewards/margins": 8.193265914916992, "rewards/rejected": -8.817682266235352, "step": 74820 }, { "epoch": 0.9, "learning_rate": 1.6371405563414545e-07, "logits/chosen": -2.8405561447143555, "logits/rejected": -1.921661376953125, "logps/chosen": -150.27981567382812, "logps/rejected": -1233.4620361328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.895612359046936, "rewards/margins": 11.010435104370117, "rewards/rejected": -11.906045913696289, "step": 74830 }, { "epoch": 0.9, "learning_rate": 1.6334245253479374e-07, "logits/chosen": -2.8897616863250732, "logits/rejected": -2.1419894695281982, "logps/chosen": -128.50283813476562, "logps/rejected": -975.8251953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7680361270904541, "rewards/margins": 8.577764511108398, "rewards/rejected": -9.345800399780273, "step": 74840 }, { "epoch": 0.9, "learning_rate": 1.6297125740936003e-07, "logits/chosen": -2.900177478790283, "logits/rejected": -2.4358561038970947, "logps/chosen": -87.4365463256836, "logps/rejected": -834.0118408203125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4142359793186188, "rewards/margins": 7.5424370765686035, "rewards/rejected": -7.956673622131348, "step": 74850 }, { "epoch": 0.9, "learning_rate": 1.6260047032265385e-07, "logits/chosen": -2.896979570388794, "logits/rejected": -2.334519386291504, "logps/chosen": -90.4443359375, "logps/rejected": -997.2316284179688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.43729934096336365, "rewards/margins": 9.147557258605957, "rewards/rejected": -9.584856033325195, "step": 74860 }, { "epoch": 0.9, "learning_rate": 1.6223009133941315e-07, "logits/chosen": -2.867300510406494, "logits/rejected": -2.279189109802246, "logps/chosen": -131.48471069335938, "logps/rejected": -911.59375, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -0.8220508694648743, "rewards/margins": 7.882554054260254, "rewards/rejected": -8.704605102539062, "step": 74870 }, { "epoch": 0.9, "learning_rate": 1.6186012052430617e-07, "logits/chosen": -2.8823368549346924, "logits/rejected": -2.485623836517334, "logps/chosen": -76.57848358154297, "logps/rejected": -848.1644287109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3591218888759613, "rewards/margins": 7.7384934425354, "rewards/rejected": -8.097615242004395, "step": 74880 }, { "epoch": 0.9, "learning_rate": 1.6149055794192876e-07, "logits/chosen": -2.900455951690674, "logits/rejected": -2.319441318511963, "logps/chosen": -97.7699966430664, "logps/rejected": -942.01904296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.517408549785614, "rewards/margins": 8.517172813415527, "rewards/rejected": -9.034582138061523, "step": 74890 }, { "epoch": 0.9, "learning_rate": 1.6112140365680429e-07, "logits/chosen": -2.893866777420044, "logits/rejected": -2.318026065826416, "logps/chosen": -114.38093566894531, "logps/rejected": -960.7166748046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6263777613639832, "rewards/margins": 8.583990097045898, "rewards/rejected": -9.210370063781738, "step": 74900 }, { "epoch": 0.9, "learning_rate": 1.6075265773338705e-07, "logits/chosen": -2.881639003753662, "logits/rejected": -2.4179282188415527, "logps/chosen": -91.28780364990234, "logps/rejected": -900.1610107421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4458250403404236, "rewards/margins": 8.161328315734863, "rewards/rejected": -8.607152938842773, "step": 74910 }, { "epoch": 0.9, "learning_rate": 1.6038432023605888e-07, "logits/chosen": -2.894484758377075, "logits/rejected": -2.4793193340301514, "logps/chosen": -100.24079895019531, "logps/rejected": -922.001953125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5397946238517761, "rewards/margins": 8.28773021697998, "rewards/rejected": -8.827524185180664, "step": 74920 }, { "epoch": 0.9, "learning_rate": 1.6001639122913e-07, "logits/chosen": -2.9070115089416504, "logits/rejected": -2.3744659423828125, "logps/chosen": -89.26103973388672, "logps/rejected": -850.2606201171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4722104072570801, "rewards/margins": 7.6591033935546875, "rewards/rejected": -8.131312370300293, "step": 74930 }, { "epoch": 0.9, "learning_rate": 1.5964887077684066e-07, "logits/chosen": -2.851142406463623, "logits/rejected": -2.1029391288757324, "logps/chosen": -128.04605102539062, "logps/rejected": -986.6932373046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7379422783851624, "rewards/margins": 8.709600448608398, "rewards/rejected": -9.447543144226074, "step": 74940 }, { "epoch": 0.9, "learning_rate": 1.5928175894335847e-07, "logits/chosen": -2.868152141571045, "logits/rejected": -2.199686050415039, "logps/chosen": -125.40028381347656, "logps/rejected": -1085.5538330078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7004464864730835, "rewards/margins": 9.761521339416504, "rewards/rejected": -10.461965560913086, "step": 74950 }, { "epoch": 0.9, "learning_rate": 1.5891505579277988e-07, "logits/chosen": -2.8880743980407715, "logits/rejected": -2.1864724159240723, "logps/chosen": -131.09591674804688, "logps/rejected": -1018.6233520507812, "loss": 0.1035, "rewards/accuracies": 1.0, "rewards/chosen": -0.7805667519569397, "rewards/margins": 9.001863479614258, "rewards/rejected": -9.782430648803711, "step": 74960 }, { "epoch": 0.9, "learning_rate": 1.5854876138913038e-07, "logits/chosen": -2.834036350250244, "logits/rejected": -2.267023801803589, "logps/chosen": -115.9035873413086, "logps/rejected": -1080.32958984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6451951861381531, "rewards/margins": 9.74482536315918, "rewards/rejected": -10.390019416809082, "step": 74970 }, { "epoch": 0.9, "learning_rate": 1.581828757963641e-07, "logits/chosen": -2.9222254753112793, "logits/rejected": -2.3883039951324463, "logps/chosen": -145.4200897216797, "logps/rejected": -885.4793701171875, "loss": 0.0929, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9576184153556824, "rewards/margins": 7.4924163818359375, "rewards/rejected": -8.450035095214844, "step": 74980 }, { "epoch": 0.9, "learning_rate": 1.5781739907836353e-07, "logits/chosen": -2.844101667404175, "logits/rejected": -2.5290274620056152, "logps/chosen": -110.76206970214844, "logps/rejected": -734.6329345703125, "loss": 0.1994, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7330238819122314, "rewards/margins": 6.249833106994629, "rewards/rejected": -6.982856750488281, "step": 74990 }, { "epoch": 0.9, "learning_rate": 1.5745233129893983e-07, "logits/chosen": -2.899728298187256, "logits/rejected": -2.2938754558563232, "logps/chosen": -113.6786880493164, "logps/rejected": -893.5673828125, "loss": 0.0239, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6757320165634155, "rewards/margins": 7.870002746582031, "rewards/rejected": -8.545735359191895, "step": 75000 }, { "epoch": 0.9, "eval_logits/chosen": -2.8872220516204834, "eval_logits/rejected": -1.7772589921951294, "eval_logps/chosen": -240.9354705810547, "eval_logps/rejected": -1140.7237548828125, "eval_loss": 0.0013282055733725429, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.7975519895553589, "eval_rewards/margins": 9.14243221282959, "eval_rewards/rejected": -10.939983367919922, "eval_runtime": 1.2163, "eval_samples_per_second": 4.111, "eval_steps_per_second": 2.467, "step": 75000 }, { "epoch": 0.9, "learning_rate": 1.5708767252183316e-07, "logits/chosen": -2.8664402961730957, "logits/rejected": -2.387831449508667, "logps/chosen": -94.72005462646484, "logps/rejected": -889.517578125, "loss": 0.1141, "rewards/accuracies": 1.0, "rewards/chosen": -0.49496570229530334, "rewards/margins": 8.01404094696045, "rewards/rejected": -8.509007453918457, "step": 75010 }, { "epoch": 0.9, "learning_rate": 1.56723422810712e-07, "logits/chosen": -2.874025821685791, "logits/rejected": -2.154231548309326, "logps/chosen": -142.7093505859375, "logps/rejected": -1047.567138671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9183799028396606, "rewards/margins": 9.161273002624512, "rewards/rejected": -10.079652786254883, "step": 75020 }, { "epoch": 0.9, "learning_rate": 1.5635958222917276e-07, "logits/chosen": -2.86924409866333, "logits/rejected": -2.2109222412109375, "logps/chosen": -106.49871826171875, "logps/rejected": -917.93505859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5763159990310669, "rewards/margins": 8.214092254638672, "rewards/rejected": -8.790407180786133, "step": 75030 }, { "epoch": 0.9, "learning_rate": 1.5599615084074122e-07, "logits/chosen": -2.8506557941436768, "logits/rejected": -2.246030330657959, "logps/chosen": -111.59223937988281, "logps/rejected": -965.2400512695312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6365269422531128, "rewards/margins": 8.61803913116455, "rewards/rejected": -9.254565238952637, "step": 75040 }, { "epoch": 0.9, "learning_rate": 1.5563312870887138e-07, "logits/chosen": -2.8884940147399902, "logits/rejected": -2.504687786102295, "logps/chosen": -82.61607360839844, "logps/rejected": -809.5506591796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4088786244392395, "rewards/margins": 7.308237552642822, "rewards/rejected": -7.717115879058838, "step": 75050 }, { "epoch": 0.9, "learning_rate": 1.552705158969464e-07, "logits/chosen": -2.8956165313720703, "logits/rejected": -2.181309938430786, "logps/chosen": -118.06368255615234, "logps/rejected": -976.7337646484375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6782069802284241, "rewards/margins": 8.704663276672363, "rewards/rejected": -9.3828706741333, "step": 75060 }, { "epoch": 0.9, "learning_rate": 1.549083124682768e-07, "logits/chosen": -2.8853440284729004, "logits/rejected": -2.364539623260498, "logps/chosen": -100.32791137695312, "logps/rejected": -998.4528198242188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5372711420059204, "rewards/margins": 9.051170349121094, "rewards/rejected": -9.588441848754883, "step": 75070 }, { "epoch": 0.9, "learning_rate": 1.545465184861028e-07, "logits/chosen": -2.8708877563476562, "logits/rejected": -2.4919955730438232, "logps/chosen": -100.24992370605469, "logps/rejected": -895.6306762695312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5758346319198608, "rewards/margins": 7.995961666107178, "rewards/rejected": -8.571795463562012, "step": 75080 }, { "epoch": 0.9, "learning_rate": 1.5418513401359274e-07, "logits/chosen": -2.842170000076294, "logits/rejected": -2.19398832321167, "logps/chosen": -126.38145446777344, "logps/rejected": -1021.4710083007812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.765966534614563, "rewards/margins": 9.057581901550293, "rewards/rejected": -9.823548316955566, "step": 75090 }, { "epoch": 0.9, "learning_rate": 1.5382415911384285e-07, "logits/chosen": -2.900717258453369, "logits/rejected": -2.043034553527832, "logps/chosen": -135.22607421875, "logps/rejected": -1098.0025634765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7707043290138245, "rewards/margins": 9.80384635925293, "rewards/rejected": -10.574551582336426, "step": 75100 }, { "epoch": 0.9, "learning_rate": 1.534635938498785e-07, "logits/chosen": -2.875804901123047, "logits/rejected": -2.1580920219421387, "logps/chosen": -124.5333023071289, "logps/rejected": -1069.658447265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7057968378067017, "rewards/margins": 9.588669776916504, "rewards/rejected": -10.294466018676758, "step": 75110 }, { "epoch": 0.9, "learning_rate": 1.531034382846533e-07, "logits/chosen": -2.88124418258667, "logits/rejected": -2.3131299018859863, "logps/chosen": -91.6709976196289, "logps/rejected": -920.5877685546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.45562058687210083, "rewards/margins": 8.3600435256958, "rewards/rejected": -8.815664291381836, "step": 75120 }, { "epoch": 0.9, "learning_rate": 1.5274369248104965e-07, "logits/chosen": -2.9001073837280273, "logits/rejected": -2.4594712257385254, "logps/chosen": -121.72279357910156, "logps/rejected": -910.9730224609375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7727903723716736, "rewards/margins": 7.956620693206787, "rewards/rejected": -8.729411125183105, "step": 75130 }, { "epoch": 0.9, "learning_rate": 1.5238435650187816e-07, "logits/chosen": -2.851217269897461, "logits/rejected": -2.2953977584838867, "logps/chosen": -103.78215026855469, "logps/rejected": -968.9299926757812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5706979036331177, "rewards/margins": 8.722143173217773, "rewards/rejected": -9.292840957641602, "step": 75140 }, { "epoch": 0.9, "learning_rate": 1.5202543040987812e-07, "logits/chosen": -2.8615288734436035, "logits/rejected": -2.37453031539917, "logps/chosen": -97.27333068847656, "logps/rejected": -904.1809692382812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5423384308815002, "rewards/margins": 8.119451522827148, "rewards/rejected": -8.66179084777832, "step": 75150 }, { "epoch": 0.9, "learning_rate": 1.516669142677163e-07, "logits/chosen": -2.8855433464050293, "logits/rejected": -2.329512596130371, "logps/chosen": -110.34654235839844, "logps/rejected": -984.5245971679688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6186314821243286, "rewards/margins": 8.852286338806152, "rewards/rejected": -9.470918655395508, "step": 75160 }, { "epoch": 0.9, "learning_rate": 1.5130880813798904e-07, "logits/chosen": -2.9108574390411377, "logits/rejected": -2.2575769424438477, "logps/chosen": -113.75467681884766, "logps/rejected": -1077.7353515625, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": -0.6026877164840698, "rewards/margins": 9.768585205078125, "rewards/rejected": -10.371273040771484, "step": 75170 }, { "epoch": 0.9, "learning_rate": 1.509511120832205e-07, "logits/chosen": -2.864495277404785, "logits/rejected": -2.497328281402588, "logps/chosen": -93.08657836914062, "logps/rejected": -827.7916870117188, "loss": 0.0267, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.507266640663147, "rewards/margins": 7.373729705810547, "rewards/rejected": -7.8809967041015625, "step": 75180 }, { "epoch": 0.9, "learning_rate": 1.5059382616586378e-07, "logits/chosen": -2.8863472938537598, "logits/rejected": -2.224064588546753, "logps/chosen": -115.34452056884766, "logps/rejected": -1034.7421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6793811917304993, "rewards/margins": 9.264326095581055, "rewards/rejected": -9.943705558776855, "step": 75190 }, { "epoch": 0.9, "learning_rate": 1.5023695044829979e-07, "logits/chosen": -2.905071258544922, "logits/rejected": -2.499887228012085, "logps/chosen": -93.78864288330078, "logps/rejected": -875.6474609375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5005303621292114, "rewards/margins": 7.868517875671387, "rewards/rejected": -8.369047164916992, "step": 75200 }, { "epoch": 0.9, "learning_rate": 1.4988048499283813e-07, "logits/chosen": -2.888622760772705, "logits/rejected": -2.169579267501831, "logps/chosen": -114.11320495605469, "logps/rejected": -972.8045043945312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.622541069984436, "rewards/margins": 8.708854675292969, "rewards/rejected": -9.33139705657959, "step": 75210 }, { "epoch": 0.9, "learning_rate": 1.4952442986171682e-07, "logits/chosen": -2.822669506072998, "logits/rejected": -2.172913074493408, "logps/chosen": -112.52796936035156, "logps/rejected": -1071.174560546875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6527518630027771, "rewards/margins": 9.653148651123047, "rewards/rejected": -10.305900573730469, "step": 75220 }, { "epoch": 0.9, "learning_rate": 1.491687851171017e-07, "logits/chosen": -2.8507206439971924, "logits/rejected": -2.28248929977417, "logps/chosen": -115.73481750488281, "logps/rejected": -974.0125122070312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6727586984634399, "rewards/margins": 8.669947624206543, "rewards/rejected": -9.342707633972168, "step": 75230 }, { "epoch": 0.9, "learning_rate": 1.4881355082108722e-07, "logits/chosen": -2.9512314796447754, "logits/rejected": -2.2258846759796143, "logps/chosen": -124.64083099365234, "logps/rejected": -1007.3771362304688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6646470427513123, "rewards/margins": 9.012892723083496, "rewards/rejected": -9.67754077911377, "step": 75240 }, { "epoch": 0.9, "learning_rate": 1.4845872703569686e-07, "logits/chosen": -2.906555414199829, "logits/rejected": -2.418483257293701, "logps/chosen": -99.18116760253906, "logps/rejected": -920.3931884765625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5756303071975708, "rewards/margins": 8.229031562805176, "rewards/rejected": -8.80466079711914, "step": 75250 }, { "epoch": 0.9, "learning_rate": 1.4810431382288188e-07, "logits/chosen": -2.859912395477295, "logits/rejected": -2.4732182025909424, "logps/chosen": -75.26898956298828, "logps/rejected": -800.26513671875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.3445020020008087, "rewards/margins": 7.286097049713135, "rewards/rejected": -7.630599021911621, "step": 75260 }, { "epoch": 0.9, "learning_rate": 1.477503112445211e-07, "logits/chosen": -2.846464157104492, "logits/rejected": -2.3711161613464355, "logps/chosen": -103.7216567993164, "logps/rejected": -900.9793090820312, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6003974676132202, "rewards/margins": 8.024301528930664, "rewards/rejected": -8.624699592590332, "step": 75270 }, { "epoch": 0.9, "learning_rate": 1.4739671936242316e-07, "logits/chosen": -2.8327255249023438, "logits/rejected": -2.3338656425476074, "logps/chosen": -109.83052825927734, "logps/rejected": -931.9118041992188, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.6373481750488281, "rewards/margins": 8.289093971252441, "rewards/rejected": -8.926443099975586, "step": 75280 }, { "epoch": 0.9, "learning_rate": 1.4704353823832396e-07, "logits/chosen": -2.8873209953308105, "logits/rejected": -2.468961238861084, "logps/chosen": -104.91043853759766, "logps/rejected": -900.9010009765625, "loss": 0.0877, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6391477584838867, "rewards/margins": 7.994425296783447, "rewards/rejected": -8.633572578430176, "step": 75290 }, { "epoch": 0.9, "learning_rate": 1.4669076793388802e-07, "logits/chosen": -2.880537509918213, "logits/rejected": -2.143944263458252, "logps/chosen": -138.5967254638672, "logps/rejected": -1056.7080078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8092383146286011, "rewards/margins": 9.3534574508667, "rewards/rejected": -10.16269588470459, "step": 75300 }, { "epoch": 0.9, "learning_rate": 1.463384085107078e-07, "logits/chosen": -2.890716552734375, "logits/rejected": -2.364262104034424, "logps/chosen": -104.1219253540039, "logps/rejected": -949.7078857421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5715906620025635, "rewards/margins": 8.526717185974121, "rewards/rejected": -9.098307609558105, "step": 75310 }, { "epoch": 0.9, "learning_rate": 1.4598646003030432e-07, "logits/chosen": -2.8659236431121826, "logits/rejected": -2.15911602973938, "logps/chosen": -117.40155029296875, "logps/rejected": -1053.476318359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6589847803115845, "rewards/margins": 9.469826698303223, "rewards/rejected": -10.128812789916992, "step": 75320 }, { "epoch": 0.9, "learning_rate": 1.4563492255412702e-07, "logits/chosen": -2.8768091201782227, "logits/rejected": -2.565645694732666, "logps/chosen": -83.17288970947266, "logps/rejected": -807.5137329101562, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.40906181931495667, "rewards/margins": 7.292824745178223, "rewards/rejected": -7.701886177062988, "step": 75330 }, { "epoch": 0.9, "learning_rate": 1.4528379614355381e-07, "logits/chosen": -2.8891263008117676, "logits/rejected": -2.5799167156219482, "logps/chosen": -88.38334655761719, "logps/rejected": -833.8228759765625, "loss": 0.1044, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4656497538089752, "rewards/margins": 7.505002498626709, "rewards/rejected": -7.970652103424072, "step": 75340 }, { "epoch": 0.9, "learning_rate": 1.4493308085988923e-07, "logits/chosen": -2.8515522480010986, "logits/rejected": -2.260331392288208, "logps/chosen": -112.81352233886719, "logps/rejected": -1086.1614990234375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6551095247268677, "rewards/margins": 9.804166793823242, "rewards/rejected": -10.459278106689453, "step": 75350 }, { "epoch": 0.9, "learning_rate": 1.4458277676436817e-07, "logits/chosen": -2.8698573112487793, "logits/rejected": -2.1505043506622314, "logps/chosen": -131.57455444335938, "logps/rejected": -985.4183349609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7625836730003357, "rewards/margins": 8.68237018585205, "rewards/rejected": -9.444952011108398, "step": 75360 }, { "epoch": 0.9, "learning_rate": 1.44232883918152e-07, "logits/chosen": -2.8563570976257324, "logits/rejected": -2.117083787918091, "logps/chosen": -158.64950561523438, "logps/rejected": -1087.336181640625, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.0096323490142822, "rewards/margins": 9.443107604980469, "rewards/rejected": -10.452739715576172, "step": 75370 }, { "epoch": 0.9, "learning_rate": 1.4388340238233156e-07, "logits/chosen": -2.8404383659362793, "logits/rejected": -2.4581360816955566, "logps/chosen": -99.74365997314453, "logps/rejected": -758.3626708984375, "loss": 0.1255, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6072534918785095, "rewards/margins": 6.610718727111816, "rewards/rejected": -7.217970848083496, "step": 75380 }, { "epoch": 0.9, "learning_rate": 1.4353433221792528e-07, "logits/chosen": -2.9155516624450684, "logits/rejected": -2.525097370147705, "logps/chosen": -104.95928955078125, "logps/rejected": -806.4706420898438, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.5950444340705872, "rewards/margins": 7.076097011566162, "rewards/rejected": -7.671141147613525, "step": 75390 }, { "epoch": 0.9, "learning_rate": 1.431856734858797e-07, "logits/chosen": -2.8792519569396973, "logits/rejected": -2.156176805496216, "logps/chosen": -153.39947509765625, "logps/rejected": -933.1236572265625, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.0164813995361328, "rewards/margins": 7.9405317306518555, "rewards/rejected": -8.957014083862305, "step": 75400 }, { "epoch": 0.9, "learning_rate": 1.4283742624707025e-07, "logits/chosen": -2.879385471343994, "logits/rejected": -2.3506619930267334, "logps/chosen": -111.32149505615234, "logps/rejected": -967.6878662109375, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -0.6371585130691528, "rewards/margins": 8.651021957397461, "rewards/rejected": -9.288179397583008, "step": 75410 }, { "epoch": 0.9, "learning_rate": 1.4248959056229916e-07, "logits/chosen": -2.909939765930176, "logits/rejected": -2.393650531768799, "logps/chosen": -100.76232147216797, "logps/rejected": -932.3796997070312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5599902868270874, "rewards/margins": 8.373361587524414, "rewards/rejected": -8.933350563049316, "step": 75420 }, { "epoch": 0.9, "learning_rate": 1.4214216649229756e-07, "logits/chosen": -2.9237864017486572, "logits/rejected": -2.2664732933044434, "logps/chosen": -103.69026947021484, "logps/rejected": -953.1267700195312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.54090815782547, "rewards/margins": 8.59650707244873, "rewards/rejected": -9.137415885925293, "step": 75430 }, { "epoch": 0.9, "learning_rate": 1.4179515409772527e-07, "logits/chosen": -2.8972511291503906, "logits/rejected": -2.400177478790283, "logps/chosen": -92.163330078125, "logps/rejected": -850.8826293945312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.48011723160743713, "rewards/margins": 7.647479057312012, "rewards/rejected": -8.127595901489258, "step": 75440 }, { "epoch": 0.9, "learning_rate": 1.4144855343916936e-07, "logits/chosen": -2.8764843940734863, "logits/rejected": -2.413898468017578, "logps/chosen": -81.96973419189453, "logps/rejected": -857.2249755859375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4132705628871918, "rewards/margins": 7.778372287750244, "rewards/rejected": -8.191642761230469, "step": 75450 }, { "epoch": 0.9, "learning_rate": 1.411023645771453e-07, "logits/chosen": -2.8544363975524902, "logits/rejected": -2.4420838356018066, "logps/chosen": -82.05024719238281, "logps/rejected": -900.9454345703125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.3445351719856262, "rewards/margins": 8.274629592895508, "rewards/rejected": -8.619165420532227, "step": 75460 }, { "epoch": 0.9, "learning_rate": 1.407565875720973e-07, "logits/chosen": -2.8824234008789062, "logits/rejected": -2.368511199951172, "logps/chosen": -98.5540771484375, "logps/rejected": -884.5679931640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5576497316360474, "rewards/margins": 7.897787570953369, "rewards/rejected": -8.455437660217285, "step": 75470 }, { "epoch": 0.9, "learning_rate": 1.4041122248439647e-07, "logits/chosen": -2.9265925884246826, "logits/rejected": -2.5778207778930664, "logps/chosen": -79.28121948242188, "logps/rejected": -765.3291015625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.3553539216518402, "rewards/margins": 6.92417049407959, "rewards/rejected": -7.2795233726501465, "step": 75480 }, { "epoch": 0.9, "learning_rate": 1.4006626937434237e-07, "logits/chosen": -2.8415446281433105, "logits/rejected": -2.345677614212036, "logps/chosen": -104.28421783447266, "logps/rejected": -957.826171875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5944012403488159, "rewards/margins": 8.610857009887695, "rewards/rejected": -9.2052583694458, "step": 75490 }, { "epoch": 0.9, "learning_rate": 1.3972172830216347e-07, "logits/chosen": -2.8967676162719727, "logits/rejected": -2.356480360031128, "logps/chosen": -123.5012435913086, "logps/rejected": -1025.7381591796875, "loss": 0.0264, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7724534869194031, "rewards/margins": 9.084739685058594, "rewards/rejected": -9.857194900512695, "step": 75500 }, { "epoch": 0.9, "learning_rate": 1.3937759932801525e-07, "logits/chosen": -2.916865110397339, "logits/rejected": -2.272858142852783, "logps/chosen": -123.11589050292969, "logps/rejected": -914.1937255859375, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.7271197438240051, "rewards/margins": 8.019207000732422, "rewards/rejected": -8.746325492858887, "step": 75510 }, { "epoch": 0.9, "learning_rate": 1.3903388251198162e-07, "logits/chosen": -2.8812451362609863, "logits/rejected": -2.5402991771698, "logps/chosen": -89.84722900390625, "logps/rejected": -814.7689819335938, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4623265266418457, "rewards/margins": 7.298134803771973, "rewards/rejected": -7.760461330413818, "step": 75520 }, { "epoch": 0.9, "learning_rate": 1.3869057791407514e-07, "logits/chosen": -2.9188332557678223, "logits/rejected": -2.598853588104248, "logps/chosen": -93.57157897949219, "logps/rejected": -781.6162719726562, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5706208348274231, "rewards/margins": 6.869549751281738, "rewards/rejected": -7.440171241760254, "step": 75530 }, { "epoch": 0.9, "learning_rate": 1.383476855942356e-07, "logits/chosen": -2.9056506156921387, "logits/rejected": -2.5756380558013916, "logps/chosen": -66.86085510253906, "logps/rejected": -811.2265625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.30280035734176636, "rewards/margins": 7.434284210205078, "rewards/rejected": -7.73708438873291, "step": 75540 }, { "epoch": 0.9, "learning_rate": 1.38005205612331e-07, "logits/chosen": -2.8634908199310303, "logits/rejected": -2.2968554496765137, "logps/chosen": -132.5513916015625, "logps/rejected": -954.4932861328125, "loss": 0.2604, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8762673139572144, "rewards/margins": 8.291776657104492, "rewards/rejected": -9.16804313659668, "step": 75550 }, { "epoch": 0.9, "learning_rate": 1.3766313802815707e-07, "logits/chosen": -2.9597654342651367, "logits/rejected": -2.6130878925323486, "logps/chosen": -80.83464050292969, "logps/rejected": -866.9239501953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.39883458614349365, "rewards/margins": 7.900196075439453, "rewards/rejected": -8.299030303955078, "step": 75560 }, { "epoch": 0.9, "learning_rate": 1.37321482901438e-07, "logits/chosen": -2.9048240184783936, "logits/rejected": -2.2417757511138916, "logps/chosen": -138.81765747070312, "logps/rejected": -1055.6678466796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7818289399147034, "rewards/margins": 9.348366737365723, "rewards/rejected": -10.130194664001465, "step": 75570 }, { "epoch": 0.9, "learning_rate": 1.3698024029182606e-07, "logits/chosen": -2.878070831298828, "logits/rejected": -2.346073865890503, "logps/chosen": -92.40937805175781, "logps/rejected": -974.9684448242188, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -0.4851051867008209, "rewards/margins": 8.866870880126953, "rewards/rejected": -9.35197639465332, "step": 75580 }, { "epoch": 0.9, "learning_rate": 1.3663941025890138e-07, "logits/chosen": -2.8490312099456787, "logits/rejected": -2.06911301612854, "logps/chosen": -131.90602111816406, "logps/rejected": -984.2809448242188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7742785215377808, "rewards/margins": 8.648757934570312, "rewards/rejected": -9.423035621643066, "step": 75590 }, { "epoch": 0.9, "learning_rate": 1.3629899286217135e-07, "logits/chosen": -2.8616185188293457, "logits/rejected": -2.397845983505249, "logps/chosen": -92.7401351928711, "logps/rejected": -923.2086791992188, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.45994478464126587, "rewards/margins": 8.386934280395508, "rewards/rejected": -8.846879005432129, "step": 75600 }, { "epoch": 0.91, "learning_rate": 1.3595898816107289e-07, "logits/chosen": -2.9257938861846924, "logits/rejected": -2.1935858726501465, "logps/chosen": -121.6133041381836, "logps/rejected": -959.1033935546875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6957887411117554, "rewards/margins": 8.507633209228516, "rewards/rejected": -9.203421592712402, "step": 75610 }, { "epoch": 0.91, "learning_rate": 1.3561939621496878e-07, "logits/chosen": -2.914015531539917, "logits/rejected": -2.4370222091674805, "logps/chosen": -95.87542724609375, "logps/rejected": -934.27734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5131354331970215, "rewards/margins": 8.454144477844238, "rewards/rejected": -8.967279434204102, "step": 75620 }, { "epoch": 0.91, "learning_rate": 1.3528021708315132e-07, "logits/chosen": -2.871741533279419, "logits/rejected": -2.0748209953308105, "logps/chosen": -140.15695190429688, "logps/rejected": -1136.8455810546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8550143241882324, "rewards/margins": 10.100852012634277, "rewards/rejected": -10.955867767333984, "step": 75630 }, { "epoch": 0.91, "learning_rate": 1.3494145082484035e-07, "logits/chosen": -2.9212424755096436, "logits/rejected": -2.396771192550659, "logps/chosen": -105.22115325927734, "logps/rejected": -963.8897705078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5523802042007446, "rewards/margins": 8.690154075622559, "rewards/rejected": -9.242533683776855, "step": 75640 }, { "epoch": 0.91, "learning_rate": 1.3460309749918326e-07, "logits/chosen": -2.87021803855896, "logits/rejected": -2.374326467514038, "logps/chosen": -100.7763900756836, "logps/rejected": -1006.8125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5614050030708313, "rewards/margins": 9.113139152526855, "rewards/rejected": -9.674543380737305, "step": 75650 }, { "epoch": 0.91, "learning_rate": 1.3426515716525563e-07, "logits/chosen": -2.8724870681762695, "logits/rejected": -2.1877634525299072, "logps/chosen": -124.09727478027344, "logps/rejected": -1013.7630615234375, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.7033296823501587, "rewards/margins": 9.034342765808105, "rewards/rejected": -9.737672805786133, "step": 75660 }, { "epoch": 0.91, "learning_rate": 1.3392762988206158e-07, "logits/chosen": -2.9125874042510986, "logits/rejected": -2.3070147037506104, "logps/chosen": -105.33940124511719, "logps/rejected": -900.53076171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5420247912406921, "rewards/margins": 8.070328712463379, "rewards/rejected": -8.612353324890137, "step": 75670 }, { "epoch": 0.91, "learning_rate": 1.3359051570853156e-07, "logits/chosen": -2.861891508102417, "logits/rejected": -2.0040557384490967, "logps/chosen": -136.85635375976562, "logps/rejected": -1081.227294921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7998589277267456, "rewards/margins": 9.593265533447266, "rewards/rejected": -10.393125534057617, "step": 75680 }, { "epoch": 0.91, "learning_rate": 1.3325381470352478e-07, "logits/chosen": -2.8942198753356934, "logits/rejected": -2.294360399246216, "logps/chosen": -102.96223449707031, "logps/rejected": -1001.8404541015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5355567336082458, "rewards/margins": 9.080137252807617, "rewards/rejected": -9.615694046020508, "step": 75690 }, { "epoch": 0.91, "learning_rate": 1.3291752692582898e-07, "logits/chosen": -2.8584344387054443, "logits/rejected": -2.1052536964416504, "logps/chosen": -116.38387298583984, "logps/rejected": -968.8177490234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6406008005142212, "rewards/margins": 8.654406547546387, "rewards/rejected": -9.295007705688477, "step": 75700 }, { "epoch": 0.91, "learning_rate": 1.3258165243415804e-07, "logits/chosen": -2.8956308364868164, "logits/rejected": -2.249321937561035, "logps/chosen": -107.1821517944336, "logps/rejected": -1012.8602294921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5455061197280884, "rewards/margins": 9.19540023803711, "rewards/rejected": -9.740907669067383, "step": 75710 }, { "epoch": 0.91, "learning_rate": 1.3224619128715588e-07, "logits/chosen": -2.8943123817443848, "logits/rejected": -2.451115608215332, "logps/chosen": -95.33778381347656, "logps/rejected": -801.1315307617188, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5543211102485657, "rewards/margins": 7.0866217613220215, "rewards/rejected": -7.6409430503845215, "step": 75720 }, { "epoch": 0.91, "learning_rate": 1.3191114354339285e-07, "logits/chosen": -2.865617275238037, "logits/rejected": -2.2546510696411133, "logps/chosen": -108.1163558959961, "logps/rejected": -995.45654296875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.6047554612159729, "rewards/margins": 8.948282241821289, "rewards/rejected": -9.553037643432617, "step": 75730 }, { "epoch": 0.91, "learning_rate": 1.315765092613666e-07, "logits/chosen": -2.9285435676574707, "logits/rejected": -2.430997848510742, "logps/chosen": -123.34383392333984, "logps/rejected": -906.158203125, "loss": 0.1372, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8124040365219116, "rewards/margins": 7.855584144592285, "rewards/rejected": -8.667987823486328, "step": 75740 }, { "epoch": 0.91, "learning_rate": 1.3124228849950405e-07, "logits/chosen": -2.8811514377593994, "logits/rejected": -2.2763962745666504, "logps/chosen": -124.85966491699219, "logps/rejected": -1016.5174560546875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7853630781173706, "rewards/margins": 8.983179092407227, "rewards/rejected": -9.768542289733887, "step": 75750 }, { "epoch": 0.91, "learning_rate": 1.3090848131615875e-07, "logits/chosen": -2.8435494899749756, "logits/rejected": -2.2998883724212646, "logps/chosen": -99.47439575195312, "logps/rejected": -1055.9786376953125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5444725155830383, "rewards/margins": 9.620893478393555, "rewards/rejected": -10.165367126464844, "step": 75760 }, { "epoch": 0.91, "learning_rate": 1.3057508776961299e-07, "logits/chosen": -2.84367036819458, "logits/rejected": -2.359527587890625, "logps/chosen": -106.67166900634766, "logps/rejected": -962.9171142578125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5792173147201538, "rewards/margins": 8.660228729248047, "rewards/rejected": -9.239445686340332, "step": 75770 }, { "epoch": 0.91, "learning_rate": 1.3024210791807574e-07, "logits/chosen": -2.8929619789123535, "logits/rejected": -2.3625245094299316, "logps/chosen": -86.08623504638672, "logps/rejected": -895.2738037109375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.43975120782852173, "rewards/margins": 8.141332626342773, "rewards/rejected": -8.581084251403809, "step": 75780 }, { "epoch": 0.91, "learning_rate": 1.2990954181968551e-07, "logits/chosen": -2.8290624618530273, "logits/rejected": -2.540987491607666, "logps/chosen": -77.15171813964844, "logps/rejected": -806.4744873046875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.3795776963233948, "rewards/margins": 7.308938503265381, "rewards/rejected": -7.688517093658447, "step": 75790 }, { "epoch": 0.91, "learning_rate": 1.2957738953250587e-07, "logits/chosen": -2.8569140434265137, "logits/rejected": -2.3775930404663086, "logps/chosen": -107.84769439697266, "logps/rejected": -994.4859619140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6236470341682434, "rewards/margins": 8.943912506103516, "rewards/rejected": -9.567560195922852, "step": 75800 }, { "epoch": 0.91, "learning_rate": 1.2924565111453068e-07, "logits/chosen": -2.8470237255096436, "logits/rejected": -2.3770954608917236, "logps/chosen": -96.29510498046875, "logps/rejected": -879.0203857421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5204912424087524, "rewards/margins": 7.890206813812256, "rewards/rejected": -8.410697937011719, "step": 75810 }, { "epoch": 0.91, "learning_rate": 1.2891432662368032e-07, "logits/chosen": -2.8807742595672607, "logits/rejected": -2.293501853942871, "logps/chosen": -131.29489135742188, "logps/rejected": -823.4984130859375, "loss": 0.1527, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7818142175674438, "rewards/margins": 7.07205867767334, "rewards/rejected": -7.853873252868652, "step": 75820 }, { "epoch": 0.91, "learning_rate": 1.2858341611780318e-07, "logits/chosen": -2.87506365776062, "logits/rejected": -2.583672285079956, "logps/chosen": -63.85107421875, "logps/rejected": -787.7598876953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.26864510774612427, "rewards/margins": 7.2427520751953125, "rewards/rejected": -7.511396884918213, "step": 75830 }, { "epoch": 0.91, "learning_rate": 1.2825291965467529e-07, "logits/chosen": -2.8689751625061035, "logits/rejected": -2.3197543621063232, "logps/chosen": -96.23545837402344, "logps/rejected": -895.1134033203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5264196991920471, "rewards/margins": 8.035821914672852, "rewards/rejected": -8.56224250793457, "step": 75840 }, { "epoch": 0.91, "learning_rate": 1.2792283729200023e-07, "logits/chosen": -2.8818917274475098, "logits/rejected": -2.3561689853668213, "logps/chosen": -102.0813980102539, "logps/rejected": -886.7791137695312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5353023409843445, "rewards/margins": 7.94000244140625, "rewards/rejected": -8.475305557250977, "step": 75850 }, { "epoch": 0.91, "learning_rate": 1.2759316908740994e-07, "logits/chosen": -2.870887279510498, "logits/rejected": -2.0817811489105225, "logps/chosen": -128.27450561523438, "logps/rejected": -985.7037963867188, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.7545324563980103, "rewards/margins": 8.70054817199707, "rewards/rejected": -9.45508098602295, "step": 75860 }, { "epoch": 0.91, "learning_rate": 1.2726391509846308e-07, "logits/chosen": -2.854520797729492, "logits/rejected": -2.4408717155456543, "logps/chosen": -79.6485595703125, "logps/rejected": -960.9654541015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3660183846950531, "rewards/margins": 8.84894847869873, "rewards/rejected": -9.214967727661133, "step": 75870 }, { "epoch": 0.91, "learning_rate": 1.269350753826465e-07, "logits/chosen": -2.8936126232147217, "logits/rejected": -2.373739242553711, "logps/chosen": -105.76615905761719, "logps/rejected": -885.7216796875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6250301599502563, "rewards/margins": 7.855676174163818, "rewards/rejected": -8.480707168579102, "step": 75880 }, { "epoch": 0.91, "learning_rate": 1.2660664999737477e-07, "logits/chosen": -2.82625150680542, "logits/rejected": -2.421560525894165, "logps/chosen": -93.59780883789062, "logps/rejected": -874.9945068359375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5360899567604065, "rewards/margins": 7.829242706298828, "rewards/rejected": -8.365331649780273, "step": 75890 }, { "epoch": 0.91, "learning_rate": 1.2627863899999037e-07, "logits/chosen": -2.8790018558502197, "logits/rejected": -2.469853639602661, "logps/chosen": -80.83320617675781, "logps/rejected": -830.1732177734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.41037535667419434, "rewards/margins": 7.5114946365356445, "rewards/rejected": -7.921870231628418, "step": 75900 }, { "epoch": 0.91, "learning_rate": 1.259510424477628e-07, "logits/chosen": -2.8908791542053223, "logits/rejected": -2.144927501678467, "logps/chosen": -132.8652801513672, "logps/rejected": -1086.566162109375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.772873044013977, "rewards/margins": 9.672040939331055, "rewards/rejected": -10.444913864135742, "step": 75910 }, { "epoch": 0.91, "learning_rate": 1.256238603978896e-07, "logits/chosen": -2.8702807426452637, "logits/rejected": -2.15545916557312, "logps/chosen": -133.90953063964844, "logps/rejected": -1016.2799072265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7641316652297974, "rewards/margins": 8.989965438842773, "rewards/rejected": -9.754096984863281, "step": 75920 }, { "epoch": 0.91, "learning_rate": 1.252970929074959e-07, "logits/chosen": -2.8993515968322754, "logits/rejected": -2.4761481285095215, "logps/chosen": -84.88489532470703, "logps/rejected": -933.0270385742188, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.39394837617874146, "rewards/margins": 8.553827285766602, "rewards/rejected": -8.947774887084961, "step": 75930 }, { "epoch": 0.91, "learning_rate": 1.2497074003363446e-07, "logits/chosen": -2.911057472229004, "logits/rejected": -2.5002264976501465, "logps/chosen": -89.71483612060547, "logps/rejected": -891.2648315429688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4232162535190582, "rewards/margins": 8.107595443725586, "rewards/rejected": -8.530811309814453, "step": 75940 }, { "epoch": 0.91, "learning_rate": 1.2464480183328521e-07, "logits/chosen": -2.8580996990203857, "logits/rejected": -2.501553535461426, "logps/chosen": -118.78614807128906, "logps/rejected": -900.7303466796875, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.7442739009857178, "rewards/margins": 7.876486301422119, "rewards/rejected": -8.620759963989258, "step": 75950 }, { "epoch": 0.91, "learning_rate": 1.2431927836335622e-07, "logits/chosen": -2.84686279296875, "logits/rejected": -2.4294888973236084, "logps/chosen": -122.59439849853516, "logps/rejected": -852.3143310546875, "loss": 0.0938, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7812227606773376, "rewards/margins": 7.3600754737854, "rewards/rejected": -8.141298294067383, "step": 75960 }, { "epoch": 0.91, "learning_rate": 1.239941696806829e-07, "logits/chosen": -2.869927167892456, "logits/rejected": -2.5157485008239746, "logps/chosen": -87.15042877197266, "logps/rejected": -869.1868286132812, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.39497464895248413, "rewards/margins": 7.906085014343262, "rewards/rejected": -8.301058769226074, "step": 75970 }, { "epoch": 0.91, "learning_rate": 1.236694758420287e-07, "logits/chosen": -2.8748412132263184, "logits/rejected": -2.3931093215942383, "logps/chosen": -95.54564666748047, "logps/rejected": -961.0611572265625, "loss": 0.0998, "rewards/accuracies": 1.0, "rewards/chosen": -0.4897783398628235, "rewards/margins": 8.734695434570312, "rewards/rejected": -9.22447395324707, "step": 75980 }, { "epoch": 0.91, "learning_rate": 1.2334519690408442e-07, "logits/chosen": -2.8322596549987793, "logits/rejected": -2.208627939224243, "logps/chosen": -127.11502838134766, "logps/rejected": -880.1378173828125, "loss": 0.0809, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.753564715385437, "rewards/margins": 7.6711626052856445, "rewards/rejected": -8.424727439880371, "step": 75990 }, { "epoch": 0.91, "learning_rate": 1.2302133292346752e-07, "logits/chosen": -2.8587348461151123, "logits/rejected": -2.2604498863220215, "logps/chosen": -97.84190368652344, "logps/rejected": -886.9610595703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5351977944374084, "rewards/margins": 7.9436445236206055, "rewards/rejected": -8.478841781616211, "step": 76000 }, { "epoch": 0.91, "learning_rate": 1.2269788395672417e-07, "logits/chosen": -2.878268241882324, "logits/rejected": -2.172464370727539, "logps/chosen": -113.7501220703125, "logps/rejected": -1012.6906127929688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6356045007705688, "rewards/margins": 9.091742515563965, "rewards/rejected": -9.727346420288086, "step": 76010 }, { "epoch": 0.91, "learning_rate": 1.223748500603275e-07, "logits/chosen": -2.9083080291748047, "logits/rejected": -2.4172489643096924, "logps/chosen": -91.32803344726562, "logps/rejected": -925.1027221679688, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -0.41684097051620483, "rewards/margins": 8.445028305053711, "rewards/rejected": -8.861870765686035, "step": 76020 }, { "epoch": 0.91, "learning_rate": 1.2205223129067855e-07, "logits/chosen": -2.8315296173095703, "logits/rejected": -2.1344187259674072, "logps/chosen": -143.06834411621094, "logps/rejected": -962.0177612304688, "loss": 0.118, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9290529489517212, "rewards/margins": 8.311686515808105, "rewards/rejected": -9.240739822387695, "step": 76030 }, { "epoch": 0.91, "learning_rate": 1.217300277041053e-07, "logits/chosen": -2.87304949760437, "logits/rejected": -2.4155004024505615, "logps/chosen": -105.80311584472656, "logps/rejected": -927.8845825195312, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5297524333000183, "rewards/margins": 8.382513046264648, "rewards/rejected": -8.91226577758789, "step": 76040 }, { "epoch": 0.91, "learning_rate": 1.2140823935686413e-07, "logits/chosen": -2.8865132331848145, "logits/rejected": -2.0634775161743164, "logps/chosen": -143.91903686523438, "logps/rejected": -1079.097900390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.863747775554657, "rewards/margins": 9.507770538330078, "rewards/rejected": -10.3715181350708, "step": 76050 }, { "epoch": 0.91, "learning_rate": 1.2108686630513816e-07, "logits/chosen": -2.8641273975372314, "logits/rejected": -2.2189509868621826, "logps/chosen": -133.00277709960938, "logps/rejected": -921.3455200195312, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.8344151377677917, "rewards/margins": 7.9990668296813965, "rewards/rejected": -8.833481788635254, "step": 76060 }, { "epoch": 0.91, "learning_rate": 1.2076590860503806e-07, "logits/chosen": -2.8803133964538574, "logits/rejected": -2.593608856201172, "logps/chosen": -113.52925872802734, "logps/rejected": -764.1229248046875, "loss": 0.1636, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7358709573745728, "rewards/margins": 6.531607151031494, "rewards/rejected": -7.267477989196777, "step": 76070 }, { "epoch": 0.91, "learning_rate": 1.2044536631260235e-07, "logits/chosen": -2.891610622406006, "logits/rejected": -2.3177382946014404, "logps/chosen": -101.6996078491211, "logps/rejected": -966.5592651367188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5605575442314148, "rewards/margins": 8.723088264465332, "rewards/rejected": -9.283644676208496, "step": 76080 }, { "epoch": 0.91, "learning_rate": 1.2012523948379652e-07, "logits/chosen": -2.900296688079834, "logits/rejected": -2.3791675567626953, "logps/chosen": -118.99122619628906, "logps/rejected": -946.1868896484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6840693950653076, "rewards/margins": 8.370625495910645, "rewards/rejected": -9.054695129394531, "step": 76090 }, { "epoch": 0.91, "learning_rate": 1.198055281745142e-07, "logits/chosen": -2.8805601596832275, "logits/rejected": -2.2983462810516357, "logps/chosen": -103.66748046875, "logps/rejected": -900.9287109375, "loss": 0.118, "rewards/accuracies": 1.0, "rewards/chosen": -0.5584476590156555, "rewards/margins": 8.056886672973633, "rewards/rejected": -8.615335464477539, "step": 76100 }, { "epoch": 0.91, "learning_rate": 1.19486232440576e-07, "logits/chosen": -2.893367052078247, "logits/rejected": -2.3412017822265625, "logps/chosen": -104.48822021484375, "logps/rejected": -873.603515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5958654880523682, "rewards/margins": 7.754249572753906, "rewards/rejected": -8.350114822387695, "step": 76110 }, { "epoch": 0.91, "learning_rate": 1.1916735233772985e-07, "logits/chosen": -2.8996005058288574, "logits/rejected": -2.385493755340576, "logps/chosen": -94.3221664428711, "logps/rejected": -754.8832397460938, "loss": 0.1495, "rewards/accuracies": 1.0, "rewards/chosen": -0.47729673981666565, "rewards/margins": 6.705076694488525, "rewards/rejected": -7.182373046875, "step": 76120 }, { "epoch": 0.91, "learning_rate": 1.1884888792165178e-07, "logits/chosen": -2.8960399627685547, "logits/rejected": -2.266688823699951, "logps/chosen": -99.06385040283203, "logps/rejected": -944.8095703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5238810777664185, "rewards/margins": 8.533509254455566, "rewards/rejected": -9.057390213012695, "step": 76130 }, { "epoch": 0.91, "learning_rate": 1.1853083924794423e-07, "logits/chosen": -2.830655097961426, "logits/rejected": -2.4585466384887695, "logps/chosen": -107.32859802246094, "logps/rejected": -956.8707885742188, "loss": 0.155, "rewards/accuracies": 1.0, "rewards/chosen": -0.6591184735298157, "rewards/margins": 8.53272819519043, "rewards/rejected": -9.191848754882812, "step": 76140 }, { "epoch": 0.91, "learning_rate": 1.1821320637213807e-07, "logits/chosen": -2.8652446269989014, "logits/rejected": -2.2397379875183105, "logps/chosen": -120.44892883300781, "logps/rejected": -971.9827880859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7155315279960632, "rewards/margins": 8.609865188598633, "rewards/rejected": -9.325396537780762, "step": 76150 }, { "epoch": 0.91, "learning_rate": 1.1789598934969088e-07, "logits/chosen": -2.882727861404419, "logits/rejected": -2.637791156768799, "logps/chosen": -69.61365509033203, "logps/rejected": -803.2500610351562, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.34239810705184937, "rewards/margins": 7.3140974044799805, "rewards/rejected": -7.656496524810791, "step": 76160 }, { "epoch": 0.91, "learning_rate": 1.1757918823598779e-07, "logits/chosen": -2.8833115100860596, "logits/rejected": -2.3144774436950684, "logps/chosen": -98.03253936767578, "logps/rejected": -930.4903564453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5000202059745789, "rewards/margins": 8.41608715057373, "rewards/rejected": -8.916109085083008, "step": 76170 }, { "epoch": 0.91, "learning_rate": 1.1726280308634207e-07, "logits/chosen": -2.850942611694336, "logits/rejected": -2.2334117889404297, "logps/chosen": -149.1046905517578, "logps/rejected": -974.7932739257812, "loss": 0.1542, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0099276304244995, "rewards/margins": 8.336195945739746, "rewards/rejected": -9.346123695373535, "step": 76180 }, { "epoch": 0.91, "learning_rate": 1.1694683395599288e-07, "logits/chosen": -2.8825173377990723, "logits/rejected": -2.2197232246398926, "logps/chosen": -125.19709777832031, "logps/rejected": -1071.1749267578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7292864322662354, "rewards/margins": 9.572049140930176, "rewards/rejected": -10.301336288452148, "step": 76190 }, { "epoch": 0.91, "learning_rate": 1.1663128090010772e-07, "logits/chosen": -2.8827805519104004, "logits/rejected": -2.358585834503174, "logps/chosen": -96.48087310791016, "logps/rejected": -900.33544921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4823020398616791, "rewards/margins": 8.139628410339355, "rewards/rejected": -8.621931076049805, "step": 76200 }, { "epoch": 0.91, "learning_rate": 1.1631614397378143e-07, "logits/chosen": -2.875209331512451, "logits/rejected": -2.4348347187042236, "logps/chosen": -86.60128021240234, "logps/rejected": -835.8048095703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.44278091192245483, "rewards/margins": 7.543717861175537, "rewards/rejected": -7.986498832702637, "step": 76210 }, { "epoch": 0.91, "learning_rate": 1.1600142323203612e-07, "logits/chosen": -2.950104236602783, "logits/rejected": -2.332132577896118, "logps/chosen": -105.97203063964844, "logps/rejected": -1021.5911865234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6044008135795593, "rewards/margins": 9.217617988586426, "rewards/rejected": -9.82201862335205, "step": 76220 }, { "epoch": 0.91, "learning_rate": 1.1568711872982086e-07, "logits/chosen": -2.896472215652466, "logits/rejected": -2.2626712322235107, "logps/chosen": -121.02490234375, "logps/rejected": -923.5681762695312, "loss": 0.1567, "rewards/accuracies": 1.0, "rewards/chosen": -0.7175347208976746, "rewards/margins": 8.124265670776367, "rewards/rejected": -8.841800689697266, "step": 76230 }, { "epoch": 0.91, "learning_rate": 1.153732305220126e-07, "logits/chosen": -2.931936264038086, "logits/rejected": -2.3315227031707764, "logps/chosen": -108.60716247558594, "logps/rejected": -1010.990234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6164757013320923, "rewards/margins": 9.098749160766602, "rewards/rejected": -9.71522331237793, "step": 76240 }, { "epoch": 0.91, "learning_rate": 1.1505975866341556e-07, "logits/chosen": -2.857569694519043, "logits/rejected": -2.4740331172943115, "logps/chosen": -85.90919494628906, "logps/rejected": -849.5817260742188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.41274747252464294, "rewards/margins": 7.692020416259766, "rewards/rejected": -8.104766845703125, "step": 76250 }, { "epoch": 0.91, "learning_rate": 1.1474670320876041e-07, "logits/chosen": -2.8536033630371094, "logits/rejected": -2.4463560581207275, "logps/chosen": -90.11717987060547, "logps/rejected": -800.8986206054688, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": -0.5187082290649414, "rewards/margins": 7.113983154296875, "rewards/rejected": -7.632691383361816, "step": 76260 }, { "epoch": 0.91, "learning_rate": 1.144340642127062e-07, "logits/chosen": -2.873903751373291, "logits/rejected": -2.3156542778015137, "logps/chosen": -108.59297943115234, "logps/rejected": -943.8369140625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6223742961883545, "rewards/margins": 8.428696632385254, "rewards/rejected": -9.051071166992188, "step": 76270 }, { "epoch": 0.91, "learning_rate": 1.141218417298387e-07, "logits/chosen": -2.881726026535034, "logits/rejected": -2.2936816215515137, "logps/chosen": -107.65156555175781, "logps/rejected": -1020.5750732421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5773254036903381, "rewards/margins": 9.231181144714355, "rewards/rejected": -9.808506965637207, "step": 76280 }, { "epoch": 0.91, "learning_rate": 1.1381003581467127e-07, "logits/chosen": -2.841176748275757, "logits/rejected": -2.1971914768218994, "logps/chosen": -130.77474975585938, "logps/rejected": -945.61865234375, "loss": 0.0662, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.854040801525116, "rewards/margins": 8.205412864685059, "rewards/rejected": -9.059453964233398, "step": 76290 }, { "epoch": 0.91, "learning_rate": 1.1349864652164395e-07, "logits/chosen": -2.8754160404205322, "logits/rejected": -2.363849639892578, "logps/chosen": -109.45594787597656, "logps/rejected": -847.13623046875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6223641037940979, "rewards/margins": 7.464900970458984, "rewards/rejected": -8.087265014648438, "step": 76300 }, { "epoch": 0.91, "learning_rate": 1.1318767390512519e-07, "logits/chosen": -2.8804104328155518, "logits/rejected": -2.395918369293213, "logps/chosen": -97.62144470214844, "logps/rejected": -938.4117431640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.528561532497406, "rewards/margins": 8.453039169311523, "rewards/rejected": -8.981600761413574, "step": 76310 }, { "epoch": 0.91, "learning_rate": 1.1287711801940937e-07, "logits/chosen": -2.8762781620025635, "logits/rejected": -2.2751128673553467, "logps/chosen": -102.1138916015625, "logps/rejected": -976.95703125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5128906965255737, "rewards/margins": 8.868690490722656, "rewards/rejected": -9.38158130645752, "step": 76320 }, { "epoch": 0.91, "learning_rate": 1.1256697891871865e-07, "logits/chosen": -2.8766262531280518, "logits/rejected": -2.1446728706359863, "logps/chosen": -120.0954818725586, "logps/rejected": -959.048828125, "loss": 0.0856, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7156206369400024, "rewards/margins": 8.476408958435059, "rewards/rejected": -9.192028999328613, "step": 76330 }, { "epoch": 0.91, "learning_rate": 1.122572566572025e-07, "logits/chosen": -2.8837971687316895, "logits/rejected": -2.620460033416748, "logps/chosen": -77.99415588378906, "logps/rejected": -826.5537109375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4144589900970459, "rewards/margins": 7.465062141418457, "rewards/rejected": -7.879521369934082, "step": 76340 }, { "epoch": 0.91, "learning_rate": 1.1194795128893765e-07, "logits/chosen": -2.867086410522461, "logits/rejected": -2.2627780437469482, "logps/chosen": -122.3432846069336, "logps/rejected": -956.3858642578125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.6686095595359802, "rewards/margins": 8.500493049621582, "rewards/rejected": -9.169102668762207, "step": 76350 }, { "epoch": 0.91, "learning_rate": 1.1163906286792814e-07, "logits/chosen": -2.9037394523620605, "logits/rejected": -2.3914968967437744, "logps/chosen": -110.24996185302734, "logps/rejected": -935.5308837890625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6076270341873169, "rewards/margins": 8.356139183044434, "rewards/rejected": -8.963767051696777, "step": 76360 }, { "epoch": 0.91, "learning_rate": 1.1133059144810471e-07, "logits/chosen": -2.925330638885498, "logits/rejected": -2.540224313735962, "logps/chosen": -94.0134506225586, "logps/rejected": -892.5966796875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.48110485076904297, "rewards/margins": 8.048173904418945, "rewards/rejected": -8.529278755187988, "step": 76370 }, { "epoch": 0.91, "learning_rate": 1.1102253708332622e-07, "logits/chosen": -2.846457004547119, "logits/rejected": -2.2319140434265137, "logps/chosen": -131.38597106933594, "logps/rejected": -885.0916748046875, "loss": 0.1109, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8034541010856628, "rewards/margins": 7.653893947601318, "rewards/rejected": -8.457348823547363, "step": 76380 }, { "epoch": 0.91, "learning_rate": 1.1071489982737715e-07, "logits/chosen": -2.8643805980682373, "logits/rejected": -2.2723968029022217, "logps/chosen": -110.9029769897461, "logps/rejected": -848.0108642578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6151968240737915, "rewards/margins": 7.457493782043457, "rewards/rejected": -8.0726900100708, "step": 76390 }, { "epoch": 0.91, "learning_rate": 1.1040767973397093e-07, "logits/chosen": -2.894549608230591, "logits/rejected": -2.371586561203003, "logps/chosen": -128.61605834960938, "logps/rejected": -887.0398559570312, "loss": 0.092, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7840641736984253, "rewards/margins": 7.711185455322266, "rewards/rejected": -8.49524974822998, "step": 76400 }, { "epoch": 0.91, "learning_rate": 1.1010087685674659e-07, "logits/chosen": -2.8217146396636963, "logits/rejected": -2.194901943206787, "logps/chosen": -96.66658020019531, "logps/rejected": -896.85986328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5398333668708801, "rewards/margins": 8.04436206817627, "rewards/rejected": -8.584196090698242, "step": 76410 }, { "epoch": 0.91, "learning_rate": 1.0979449124927182e-07, "logits/chosen": -2.8993802070617676, "logits/rejected": -2.405259132385254, "logps/chosen": -130.17861938476562, "logps/rejected": -851.7998046875, "loss": 0.1354, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7782169580459595, "rewards/margins": 7.3533935546875, "rewards/rejected": -8.131610870361328, "step": 76420 }, { "epoch": 0.91, "learning_rate": 1.0948852296504026e-07, "logits/chosen": -2.8666841983795166, "logits/rejected": -2.397724151611328, "logps/chosen": -89.68253326416016, "logps/rejected": -917.3709716796875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.4495786726474762, "rewards/margins": 8.345144271850586, "rewards/rejected": -8.794722557067871, "step": 76430 }, { "epoch": 0.92, "learning_rate": 1.091829720574733e-07, "logits/chosen": -2.899017572402954, "logits/rejected": -2.364102840423584, "logps/chosen": -103.67413330078125, "logps/rejected": -968.533203125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5456486940383911, "rewards/margins": 8.740484237670898, "rewards/rejected": -9.2861328125, "step": 76440 }, { "epoch": 0.92, "learning_rate": 1.088778385799194e-07, "logits/chosen": -2.9057769775390625, "logits/rejected": -2.2397773265838623, "logps/chosen": -118.48683166503906, "logps/rejected": -974.8289794921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7188440561294556, "rewards/margins": 8.621658325195312, "rewards/rejected": -9.340502738952637, "step": 76450 }, { "epoch": 0.92, "learning_rate": 1.0857312258565373e-07, "logits/chosen": -2.8866889476776123, "logits/rejected": -2.4259564876556396, "logps/chosen": -82.9249267578125, "logps/rejected": -856.1611328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3929532468318939, "rewards/margins": 7.791586399078369, "rewards/rejected": -8.184540748596191, "step": 76460 }, { "epoch": 0.92, "learning_rate": 1.082688241278787e-07, "logits/chosen": -2.914412498474121, "logits/rejected": -2.38200306892395, "logps/chosen": -93.39521789550781, "logps/rejected": -880.3653564453125, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5100647211074829, "rewards/margins": 7.918465614318848, "rewards/rejected": -8.428529739379883, "step": 76470 }, { "epoch": 0.92, "learning_rate": 1.0796494325972434e-07, "logits/chosen": -2.878533124923706, "logits/rejected": -2.208707332611084, "logps/chosen": -110.86856842041016, "logps/rejected": -893.0812377929688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.672512412071228, "rewards/margins": 7.879416465759277, "rewards/rejected": -8.551929473876953, "step": 76480 }, { "epoch": 0.92, "learning_rate": 1.0766148003424709e-07, "logits/chosen": -2.873244285583496, "logits/rejected": -2.311185359954834, "logps/chosen": -110.64176940917969, "logps/rejected": -956.8724365234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6556392908096313, "rewards/margins": 8.52790641784668, "rewards/rejected": -9.18354606628418, "step": 76490 }, { "epoch": 0.92, "learning_rate": 1.0735843450443123e-07, "logits/chosen": -2.9037461280822754, "logits/rejected": -2.3586554527282715, "logps/chosen": -104.6582260131836, "logps/rejected": -945.9566650390625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6224182844161987, "rewards/margins": 8.459108352661133, "rewards/rejected": -9.081525802612305, "step": 76500 }, { "epoch": 0.92, "learning_rate": 1.0705580672318777e-07, "logits/chosen": -2.8389084339141846, "logits/rejected": -2.094306468963623, "logps/chosen": -130.4456024169922, "logps/rejected": -957.85791015625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.735306441783905, "rewards/margins": 8.45511245727539, "rewards/rejected": -9.19041919708252, "step": 76510 }, { "epoch": 0.92, "learning_rate": 1.067535967433539e-07, "logits/chosen": -2.8564600944519043, "logits/rejected": -2.0095555782318115, "logps/chosen": -168.94131469726562, "logps/rejected": -1035.3394775390625, "loss": 0.1397, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1069839000701904, "rewards/margins": 8.856273651123047, "rewards/rejected": -9.9632568359375, "step": 76520 }, { "epoch": 0.92, "learning_rate": 1.0645180461769516e-07, "logits/chosen": -2.90421724319458, "logits/rejected": -2.5183918476104736, "logps/chosen": -94.50923156738281, "logps/rejected": -878.9620971679688, "loss": 0.0863, "rewards/accuracies": 1.0, "rewards/chosen": -0.51760333776474, "rewards/margins": 7.883070468902588, "rewards/rejected": -8.400673866271973, "step": 76530 }, { "epoch": 0.92, "learning_rate": 1.0615043039890361e-07, "logits/chosen": -2.921635150909424, "logits/rejected": -2.3491392135620117, "logps/chosen": -115.4976806640625, "logps/rejected": -1040.3934326171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6365576982498169, "rewards/margins": 9.374631881713867, "rewards/rejected": -10.011189460754395, "step": 76540 }, { "epoch": 0.92, "learning_rate": 1.0584947413959767e-07, "logits/chosen": -2.8932347297668457, "logits/rejected": -2.203150987625122, "logps/chosen": -111.81413269042969, "logps/rejected": -985.3402099609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6204209327697754, "rewards/margins": 8.823484420776367, "rewards/rejected": -9.4439058303833, "step": 76550 }, { "epoch": 0.92, "learning_rate": 1.0554893589232506e-07, "logits/chosen": -2.871699094772339, "logits/rejected": -2.0893359184265137, "logps/chosen": -118.59100341796875, "logps/rejected": -1113.95263671875, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": -0.6660245656967163, "rewards/margins": 10.066370010375977, "rewards/rejected": -10.73239517211914, "step": 76560 }, { "epoch": 0.92, "learning_rate": 1.0524881570955742e-07, "logits/chosen": -2.928966999053955, "logits/rejected": -2.4590978622436523, "logps/chosen": -95.61405181884766, "logps/rejected": -864.88671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5419970750808716, "rewards/margins": 7.736865997314453, "rewards/rejected": -8.278863906860352, "step": 76570 }, { "epoch": 0.92, "learning_rate": 1.0494911364369559e-07, "logits/chosen": -2.9053893089294434, "logits/rejected": -2.2341103553771973, "logps/chosen": -117.12086486816406, "logps/rejected": -976.3319091796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7112685441970825, "rewards/margins": 8.656484603881836, "rewards/rejected": -9.367752075195312, "step": 76580 }, { "epoch": 0.92, "learning_rate": 1.0464982974706633e-07, "logits/chosen": -2.864314317703247, "logits/rejected": -2.380082368850708, "logps/chosen": -102.64994812011719, "logps/rejected": -953.3646240234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5454620122909546, "rewards/margins": 8.583956718444824, "rewards/rejected": -9.129419326782227, "step": 76590 }, { "epoch": 0.92, "learning_rate": 1.0435096407192397e-07, "logits/chosen": -2.901124954223633, "logits/rejected": -2.270402431488037, "logps/chosen": -114.17097473144531, "logps/rejected": -915.7916259765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6647036075592041, "rewards/margins": 8.105249404907227, "rewards/rejected": -8.769953727722168, "step": 76600 }, { "epoch": 0.92, "learning_rate": 1.040525166704498e-07, "logits/chosen": -2.901050090789795, "logits/rejected": -2.390984058380127, "logps/chosen": -123.36512756347656, "logps/rejected": -942.6326293945312, "loss": 0.1066, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7457648515701294, "rewards/margins": 8.280445098876953, "rewards/rejected": -9.026209831237793, "step": 76610 }, { "epoch": 0.92, "learning_rate": 1.0375448759475187e-07, "logits/chosen": -2.8824470043182373, "logits/rejected": -2.315051317214966, "logps/chosen": -119.0052490234375, "logps/rejected": -889.4268798828125, "loss": 0.0745, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7035462856292725, "rewards/margins": 7.8058600425720215, "rewards/rejected": -8.509405136108398, "step": 76620 }, { "epoch": 0.92, "learning_rate": 1.0345687689686523e-07, "logits/chosen": -2.917032241821289, "logits/rejected": -2.469900369644165, "logps/chosen": -129.21762084960938, "logps/rejected": -895.9793701171875, "loss": 0.1064, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8469604253768921, "rewards/margins": 7.745048522949219, "rewards/rejected": -8.592009544372559, "step": 76630 }, { "epoch": 0.92, "learning_rate": 1.0315968462875136e-07, "logits/chosen": -2.883126974105835, "logits/rejected": -2.090977668762207, "logps/chosen": -125.94999694824219, "logps/rejected": -1063.32275390625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6944049000740051, "rewards/margins": 9.533723831176758, "rewards/rejected": -10.228128433227539, "step": 76640 }, { "epoch": 0.92, "learning_rate": 1.0286291084229988e-07, "logits/chosen": -2.875218391418457, "logits/rejected": -2.253213882446289, "logps/chosen": -110.0786361694336, "logps/rejected": -946.2982177734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6347222328186035, "rewards/margins": 8.429018020629883, "rewards/rejected": -9.063739776611328, "step": 76650 }, { "epoch": 0.92, "learning_rate": 1.0256655558932627e-07, "logits/chosen": -2.8797149658203125, "logits/rejected": -2.2504618167877197, "logps/chosen": -102.89144134521484, "logps/rejected": -947.8156127929688, "loss": 0.1149, "rewards/accuracies": 1.0, "rewards/chosen": -0.5563105940818787, "rewards/margins": 8.526456832885742, "rewards/rejected": -9.08276653289795, "step": 76660 }, { "epoch": 0.92, "learning_rate": 1.0227061892157359e-07, "logits/chosen": -2.9701449871063232, "logits/rejected": -2.446990966796875, "logps/chosen": -109.63932800292969, "logps/rejected": -913.7306518554688, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": -0.6523693203926086, "rewards/margins": 8.105402946472168, "rewards/rejected": -8.757772445678711, "step": 76670 }, { "epoch": 0.92, "learning_rate": 1.0197510089071133e-07, "logits/chosen": -2.8812713623046875, "logits/rejected": -2.595106840133667, "logps/chosen": -74.6294174194336, "logps/rejected": -800.2884521484375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3644399046897888, "rewards/margins": 7.27115535736084, "rewards/rejected": -7.635595798492432, "step": 76680 }, { "epoch": 0.92, "learning_rate": 1.0168000154833629e-07, "logits/chosen": -2.8884711265563965, "logits/rejected": -2.1582984924316406, "logps/chosen": -116.98149108886719, "logps/rejected": -1005.1292724609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6593627333641052, "rewards/margins": 9.0083646774292, "rewards/rejected": -9.66772747039795, "step": 76690 }, { "epoch": 0.92, "learning_rate": 1.0138532094597198e-07, "logits/chosen": -2.8964219093322754, "logits/rejected": -2.2702548503875732, "logps/chosen": -115.1373062133789, "logps/rejected": -915.0098876953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6384239196777344, "rewards/margins": 8.106766700744629, "rewards/rejected": -8.745190620422363, "step": 76700 }, { "epoch": 0.92, "learning_rate": 1.010910591350689e-07, "logits/chosen": -2.8612873554229736, "logits/rejected": -2.2742438316345215, "logps/chosen": -109.57051849365234, "logps/rejected": -962.4052734375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.5438581705093384, "rewards/margins": 8.67934513092041, "rewards/rejected": -9.223204612731934, "step": 76710 }, { "epoch": 0.92, "learning_rate": 1.00797216167004e-07, "logits/chosen": -2.833982229232788, "logits/rejected": -2.3784329891204834, "logps/chosen": -111.27726745605469, "logps/rejected": -943.8739013671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6154999732971191, "rewards/margins": 8.437335014343262, "rewards/rejected": -9.052835464477539, "step": 76720 }, { "epoch": 0.92, "learning_rate": 1.0050379209308153e-07, "logits/chosen": -2.920506000518799, "logits/rejected": -2.2938714027404785, "logps/chosen": -110.86224365234375, "logps/rejected": -938.0699462890625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6509239077568054, "rewards/margins": 8.333176612854004, "rewards/rejected": -8.984100341796875, "step": 76730 }, { "epoch": 0.92, "learning_rate": 1.0021078696453274e-07, "logits/chosen": -2.8769826889038086, "logits/rejected": -2.322140693664551, "logps/chosen": -131.20755004882812, "logps/rejected": -986.9371337890625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7799323201179504, "rewards/margins": 8.695460319519043, "rewards/rejected": -9.47539234161377, "step": 76740 }, { "epoch": 0.92, "learning_rate": 9.99182008325153e-08, "logits/chosen": -2.899484157562256, "logits/rejected": -2.6005825996398926, "logps/chosen": -81.3451156616211, "logps/rejected": -848.7059326171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.40686431527137756, "rewards/margins": 7.701286315917969, "rewards/rejected": -8.108149528503418, "step": 76750 }, { "epoch": 0.92, "learning_rate": 9.962603374811448e-08, "logits/chosen": -2.859462261199951, "logits/rejected": -2.1974430084228516, "logps/chosen": -118.25386810302734, "logps/rejected": -1018.2352294921875, "loss": 0.1633, "rewards/accuracies": 1.0, "rewards/chosen": -0.6206339597702026, "rewards/margins": 9.163952827453613, "rewards/rejected": -9.784585952758789, "step": 76760 }, { "epoch": 0.92, "learning_rate": 9.933428576234111e-08, "logits/chosen": -2.912165403366089, "logits/rejected": -2.498283624649048, "logps/chosen": -83.65231323242188, "logps/rejected": -887.8243408203125, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": -0.39822331070899963, "rewards/margins": 8.106958389282227, "rewards/rejected": -8.505182266235352, "step": 76770 }, { "epoch": 0.92, "learning_rate": 9.904295692613392e-08, "logits/chosen": -2.912327527999878, "logits/rejected": -2.3243906497955322, "logps/chosen": -103.8486557006836, "logps/rejected": -1018.1246948242188, "loss": 0.1247, "rewards/accuracies": 1.0, "rewards/chosen": -0.5629270672798157, "rewards/margins": 9.219282150268555, "rewards/rejected": -9.782209396362305, "step": 76780 }, { "epoch": 0.92, "learning_rate": 9.875204729035803e-08, "logits/chosen": -2.9319348335266113, "logits/rejected": -2.260629177093506, "logps/chosen": -117.3553695678711, "logps/rejected": -945.0374755859375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6598416566848755, "rewards/margins": 8.386373519897461, "rewards/rejected": -9.046216011047363, "step": 76790 }, { "epoch": 0.92, "learning_rate": 9.846155690580561e-08, "logits/chosen": -2.8806450366973877, "logits/rejected": -2.473822832107544, "logps/chosen": -87.03621673583984, "logps/rejected": -859.3131103515625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.4538392424583435, "rewards/margins": 7.763765811920166, "rewards/rejected": -8.217604637145996, "step": 76800 }, { "epoch": 0.92, "learning_rate": 9.817148582319524e-08, "logits/chosen": -2.8842577934265137, "logits/rejected": -2.219522476196289, "logps/chosen": -128.92433166503906, "logps/rejected": -1020.3055419921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7619462609291077, "rewards/margins": 9.032389640808105, "rewards/rejected": -9.794336318969727, "step": 76810 }, { "epoch": 0.92, "learning_rate": 9.788183409317281e-08, "logits/chosen": -2.8505566120147705, "logits/rejected": -2.009434938430786, "logps/chosen": -134.82998657226562, "logps/rejected": -1012.8279418945312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7929984331130981, "rewards/margins": 8.933418273925781, "rewards/rejected": -9.726417541503906, "step": 76820 }, { "epoch": 0.92, "learning_rate": 9.759260176631063e-08, "logits/chosen": -2.866570234298706, "logits/rejected": -2.3976712226867676, "logps/chosen": -82.13187408447266, "logps/rejected": -857.0857543945312, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.3968636691570282, "rewards/margins": 7.793351173400879, "rewards/rejected": -8.190214157104492, "step": 76830 }, { "epoch": 0.92, "learning_rate": 9.730378889310748e-08, "logits/chosen": -2.9111876487731934, "logits/rejected": -2.253094434738159, "logps/chosen": -117.99739074707031, "logps/rejected": -969.79736328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6765117049217224, "rewards/margins": 8.618556022644043, "rewards/rejected": -9.29506778717041, "step": 76840 }, { "epoch": 0.92, "learning_rate": 9.70153955239897e-08, "logits/chosen": -2.889836549758911, "logits/rejected": -2.2306015491485596, "logps/chosen": -108.00439453125, "logps/rejected": -1015.9186401367188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5741812586784363, "rewards/margins": 9.190389633178711, "rewards/rejected": -9.764570236206055, "step": 76850 }, { "epoch": 0.92, "learning_rate": 9.672742170931004e-08, "logits/chosen": -2.916815996170044, "logits/rejected": -2.4909119606018066, "logps/chosen": -86.32294464111328, "logps/rejected": -890.7107543945312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.44288158416748047, "rewards/margins": 8.07983112335205, "rewards/rejected": -8.522711753845215, "step": 76860 }, { "epoch": 0.92, "learning_rate": 9.643986749934747e-08, "logits/chosen": -2.9241855144500732, "logits/rejected": -2.371371269226074, "logps/chosen": -129.0713348388672, "logps/rejected": -801.757080078125, "loss": 0.164, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.841889500617981, "rewards/margins": 6.801436424255371, "rewards/rejected": -7.643326759338379, "step": 76870 }, { "epoch": 0.92, "learning_rate": 9.61527329443085e-08, "logits/chosen": -2.8368847370147705, "logits/rejected": -2.1947600841522217, "logps/chosen": -167.4479217529297, "logps/rejected": -958.2855224609375, "loss": 0.2534, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1498985290527344, "rewards/margins": 8.043771743774414, "rewards/rejected": -9.193669319152832, "step": 76880 }, { "epoch": 0.92, "learning_rate": 9.586601809432606e-08, "logits/chosen": -2.8975272178649902, "logits/rejected": -2.30562424659729, "logps/chosen": -95.6422119140625, "logps/rejected": -933.5745239257812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4922742247581482, "rewards/margins": 8.441944122314453, "rewards/rejected": -8.934219360351562, "step": 76890 }, { "epoch": 0.92, "learning_rate": 9.557972299945983e-08, "logits/chosen": -2.822471857070923, "logits/rejected": -2.3820149898529053, "logps/chosen": -85.15858459472656, "logps/rejected": -836.3076171875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.42243799567222595, "rewards/margins": 7.567918300628662, "rewards/rejected": -7.9903564453125, "step": 76900 }, { "epoch": 0.92, "learning_rate": 9.529384770969568e-08, "logits/chosen": -2.914527177810669, "logits/rejected": -2.4252524375915527, "logps/chosen": -100.753662109375, "logps/rejected": -914.6456909179688, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5106777548789978, "rewards/margins": 8.240303039550781, "rewards/rejected": -8.750982284545898, "step": 76910 }, { "epoch": 0.92, "learning_rate": 9.500839227494702e-08, "logits/chosen": -2.923119306564331, "logits/rejected": -2.484910488128662, "logps/chosen": -84.84199523925781, "logps/rejected": -913.9011840820312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4174065589904785, "rewards/margins": 8.333857536315918, "rewards/rejected": -8.751263618469238, "step": 76920 }, { "epoch": 0.92, "learning_rate": 9.47233567450534e-08, "logits/chosen": -2.9168899059295654, "logits/rejected": -2.438081741333008, "logps/chosen": -107.5573501586914, "logps/rejected": -943.7066650390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.592430830001831, "rewards/margins": 8.448819160461426, "rewards/rejected": -9.041250228881836, "step": 76930 }, { "epoch": 0.92, "learning_rate": 9.443874116978142e-08, "logits/chosen": -2.907762050628662, "logits/rejected": -2.2395787239074707, "logps/chosen": -127.77870178222656, "logps/rejected": -994.99267578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6923686265945435, "rewards/margins": 8.858919143676758, "rewards/rejected": -9.551286697387695, "step": 76940 }, { "epoch": 0.92, "learning_rate": 9.415454559882381e-08, "logits/chosen": -2.919651746749878, "logits/rejected": -2.5238144397735596, "logps/chosen": -102.46046447753906, "logps/rejected": -914.6692504882812, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5339937806129456, "rewards/margins": 8.214306831359863, "rewards/rejected": -8.74830150604248, "step": 76950 }, { "epoch": 0.92, "learning_rate": 9.38707700818009e-08, "logits/chosen": -2.842259407043457, "logits/rejected": -2.504077434539795, "logps/chosen": -69.62370300292969, "logps/rejected": -810.0849609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.327987939119339, "rewards/margins": 7.407826900482178, "rewards/rejected": -7.735814571380615, "step": 76960 }, { "epoch": 0.92, "learning_rate": 9.35874146682586e-08, "logits/chosen": -2.851893901824951, "logits/rejected": -2.3934290409088135, "logps/chosen": -109.7085189819336, "logps/rejected": -877.6015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6546968221664429, "rewards/margins": 7.722193241119385, "rewards/rejected": -8.376890182495117, "step": 76970 }, { "epoch": 0.92, "learning_rate": 9.33044794076704e-08, "logits/chosen": -2.886600971221924, "logits/rejected": -2.155534505844116, "logps/chosen": -126.59068298339844, "logps/rejected": -1044.0836181640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7401875853538513, "rewards/margins": 9.301916122436523, "rewards/rejected": -10.04210376739502, "step": 76980 }, { "epoch": 0.92, "learning_rate": 9.302196434943567e-08, "logits/chosen": -2.8767430782318115, "logits/rejected": -2.2770273685455322, "logps/chosen": -120.15446472167969, "logps/rejected": -1001.0911865234375, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.7014701962471008, "rewards/margins": 8.921808242797852, "rewards/rejected": -9.623279571533203, "step": 76990 }, { "epoch": 0.92, "learning_rate": 9.273986954288078e-08, "logits/chosen": -2.9163081645965576, "logits/rejected": -2.5089175701141357, "logps/chosen": -95.49069213867188, "logps/rejected": -866.6902465820312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5435168147087097, "rewards/margins": 7.748379707336426, "rewards/rejected": -8.291895866394043, "step": 77000 }, { "epoch": 0.92, "learning_rate": 9.245819503725883e-08, "logits/chosen": -2.906116008758545, "logits/rejected": -2.260758876800537, "logps/chosen": -107.9581527709961, "logps/rejected": -886.7288208007812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5985926389694214, "rewards/margins": 7.8744049072265625, "rewards/rejected": -8.472997665405273, "step": 77010 }, { "epoch": 0.92, "learning_rate": 9.217694088174966e-08, "logits/chosen": -2.8773884773254395, "logits/rejected": -2.0914275646209717, "logps/chosen": -112.976318359375, "logps/rejected": -1041.254638671875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6141258478164673, "rewards/margins": 9.417987823486328, "rewards/rejected": -10.032114028930664, "step": 77020 }, { "epoch": 0.92, "learning_rate": 9.1896107125459e-08, "logits/chosen": -2.8674983978271484, "logits/rejected": -2.5846798419952393, "logps/chosen": -101.84004974365234, "logps/rejected": -838.7340087890625, "loss": 0.107, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6137683987617493, "rewards/margins": 7.384465217590332, "rewards/rejected": -7.99823522567749, "step": 77030 }, { "epoch": 0.92, "learning_rate": 9.161569381741953e-08, "logits/chosen": -2.8507180213928223, "logits/rejected": -2.27436900138855, "logps/chosen": -109.47651672363281, "logps/rejected": -1023.2443237304688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5977602601051331, "rewards/margins": 9.232915878295898, "rewards/rejected": -9.83067798614502, "step": 77040 }, { "epoch": 0.92, "learning_rate": 9.133570100659128e-08, "logits/chosen": -2.8969497680664062, "logits/rejected": -2.379284381866455, "logps/chosen": -100.7066421508789, "logps/rejected": -901.01123046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.567836344242096, "rewards/margins": 8.049760818481445, "rewards/rejected": -8.617597579956055, "step": 77050 }, { "epoch": 0.92, "learning_rate": 9.10561287418596e-08, "logits/chosen": -2.878274440765381, "logits/rejected": -2.3236594200134277, "logps/chosen": -106.6084213256836, "logps/rejected": -984.7630615234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.502648115158081, "rewards/margins": 8.949131965637207, "rewards/rejected": -9.451780319213867, "step": 77060 }, { "epoch": 0.92, "learning_rate": 9.077697707203736e-08, "logits/chosen": -2.8893895149230957, "logits/rejected": -2.404961585998535, "logps/chosen": -121.91546630859375, "logps/rejected": -890.7366333007812, "loss": 0.1166, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.771713137626648, "rewards/margins": 7.750541687011719, "rewards/rejected": -8.522255897521973, "step": 77070 }, { "epoch": 0.92, "learning_rate": 9.049824604586394e-08, "logits/chosen": -2.8603084087371826, "logits/rejected": -2.2352452278137207, "logps/chosen": -121.3064193725586, "logps/rejected": -984.6383666992188, "loss": 0.157, "rewards/accuracies": 1.0, "rewards/chosen": -0.725304901599884, "rewards/margins": 8.738478660583496, "rewards/rejected": -9.463784217834473, "step": 77080 }, { "epoch": 0.92, "learning_rate": 9.021993571200427e-08, "logits/chosen": -2.8506081104278564, "logits/rejected": -2.262739419937134, "logps/chosen": -121.42927551269531, "logps/rejected": -1041.6458740234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7207781076431274, "rewards/margins": 9.296565055847168, "rewards/rejected": -10.017343521118164, "step": 77090 }, { "epoch": 0.92, "learning_rate": 8.994204611905088e-08, "logits/chosen": -2.874964952468872, "logits/rejected": -2.232480764389038, "logps/chosen": -128.89329528808594, "logps/rejected": -956.193359375, "loss": 0.1178, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7962828874588013, "rewards/margins": 8.376665115356445, "rewards/rejected": -9.172948837280273, "step": 77100 }, { "epoch": 0.92, "learning_rate": 8.966457731552275e-08, "logits/chosen": -2.833681106567383, "logits/rejected": -2.107876777648926, "logps/chosen": -112.1333999633789, "logps/rejected": -920.06103515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6675182580947876, "rewards/margins": 8.14421272277832, "rewards/rejected": -8.811731338500977, "step": 77110 }, { "epoch": 0.92, "learning_rate": 8.9387529349865e-08, "logits/chosen": -2.8833155632019043, "logits/rejected": -2.37974214553833, "logps/chosen": -79.20491790771484, "logps/rejected": -963.9593505859375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.3637726306915283, "rewards/margins": 8.888311386108398, "rewards/rejected": -9.252082824707031, "step": 77120 }, { "epoch": 0.92, "learning_rate": 8.911090227044949e-08, "logits/chosen": -2.923360586166382, "logits/rejected": -2.445946216583252, "logps/chosen": -85.97683715820312, "logps/rejected": -828.4611206054688, "loss": 0.0856, "rewards/accuracies": 1.0, "rewards/chosen": -0.41852444410324097, "rewards/margins": 7.484082221984863, "rewards/rejected": -7.9026055335998535, "step": 77130 }, { "epoch": 0.92, "learning_rate": 8.883469612557427e-08, "logits/chosen": -2.876168727874756, "logits/rejected": -2.4841601848602295, "logps/chosen": -73.9913101196289, "logps/rejected": -877.0994873046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3309388756752014, "rewards/margins": 8.055038452148438, "rewards/rejected": -8.38597583770752, "step": 77140 }, { "epoch": 0.92, "learning_rate": 8.855891096346464e-08, "logits/chosen": -2.879368782043457, "logits/rejected": -2.523416042327881, "logps/chosen": -74.99097442626953, "logps/rejected": -903.32666015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.35791799426078796, "rewards/margins": 8.300432205200195, "rewards/rejected": -8.658349990844727, "step": 77150 }, { "epoch": 0.92, "learning_rate": 8.828354683227152e-08, "logits/chosen": -2.8931164741516113, "logits/rejected": -2.3004801273345947, "logps/chosen": -119.63859558105469, "logps/rejected": -893.6427001953125, "loss": 0.1048, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7197333574295044, "rewards/margins": 7.833456516265869, "rewards/rejected": -8.553189277648926, "step": 77160 }, { "epoch": 0.92, "learning_rate": 8.800860378007287e-08, "logits/chosen": -2.913616895675659, "logits/rejected": -2.718555450439453, "logps/chosen": -77.60368347167969, "logps/rejected": -745.1165771484375, "loss": 0.1353, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4245903491973877, "rewards/margins": 6.671311378479004, "rewards/rejected": -7.0959014892578125, "step": 77170 }, { "epoch": 0.92, "learning_rate": 8.773408185487303e-08, "logits/chosen": -2.908146619796753, "logits/rejected": -2.2505383491516113, "logps/chosen": -117.3057632446289, "logps/rejected": -959.3942260742188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.638071596622467, "rewards/margins": 8.569822311401367, "rewards/rejected": -9.207893371582031, "step": 77180 }, { "epoch": 0.92, "learning_rate": 8.745998110460285e-08, "logits/chosen": -2.851970672607422, "logits/rejected": -2.0787887573242188, "logps/chosen": -128.9259490966797, "logps/rejected": -984.4456787109375, "loss": 0.165, "rewards/accuracies": 1.0, "rewards/chosen": -0.711816668510437, "rewards/margins": 8.737586975097656, "rewards/rejected": -9.449403762817383, "step": 77190 }, { "epoch": 0.92, "learning_rate": 8.718630157711905e-08, "logits/chosen": -2.869502544403076, "logits/rejected": -2.4670987129211426, "logps/chosen": -81.31645965576172, "logps/rejected": -874.3605346679688, "loss": 0.1361, "rewards/accuracies": 1.0, "rewards/chosen": -0.37129274010658264, "rewards/margins": 7.98708438873291, "rewards/rejected": -8.358376502990723, "step": 77200 }, { "epoch": 0.92, "learning_rate": 8.691304332020589e-08, "logits/chosen": -2.8237030506134033, "logits/rejected": -2.336930513381958, "logps/chosen": -115.38502502441406, "logps/rejected": -858.9661865234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6987800598144531, "rewards/margins": 7.502367973327637, "rewards/rejected": -8.201147079467773, "step": 77210 }, { "epoch": 0.92, "learning_rate": 8.664020638157355e-08, "logits/chosen": -2.8671462535858154, "logits/rejected": -2.386320114135742, "logps/chosen": -87.38416290283203, "logps/rejected": -791.3176879882812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.4117801785469055, "rewards/margins": 7.127754211425781, "rewards/rejected": -7.539534091949463, "step": 77220 }, { "epoch": 0.92, "learning_rate": 8.63677908088581e-08, "logits/chosen": -2.892758846282959, "logits/rejected": -2.2063746452331543, "logps/chosen": -115.71207427978516, "logps/rejected": -967.2610473632812, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.6254323124885559, "rewards/margins": 8.645101547241211, "rewards/rejected": -9.270535469055176, "step": 77230 }, { "epoch": 0.92, "learning_rate": 8.609579664962286e-08, "logits/chosen": -2.8520989418029785, "logits/rejected": -2.0610620975494385, "logps/chosen": -116.94419860839844, "logps/rejected": -1066.46533203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6521762609481812, "rewards/margins": 9.607522964477539, "rewards/rejected": -10.259698867797852, "step": 77240 }, { "epoch": 0.92, "learning_rate": 8.582422395135709e-08, "logits/chosen": -2.9269256591796875, "logits/rejected": -2.3310656547546387, "logps/chosen": -107.82698059082031, "logps/rejected": -940.9710083007812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5427035093307495, "rewards/margins": 8.473093032836914, "rewards/rejected": -9.015796661376953, "step": 77250 }, { "epoch": 0.92, "learning_rate": 8.555307276147673e-08, "logits/chosen": -2.8863749504089355, "logits/rejected": -2.4361507892608643, "logps/chosen": -108.15855407714844, "logps/rejected": -903.7620239257812, "loss": 0.1419, "rewards/accuracies": 1.0, "rewards/chosen": -0.6360528469085693, "rewards/margins": 8.000246047973633, "rewards/rejected": -8.636299133300781, "step": 77260 }, { "epoch": 0.92, "learning_rate": 8.52823431273242e-08, "logits/chosen": -2.891310214996338, "logits/rejected": -2.5375170707702637, "logps/chosen": -86.32132720947266, "logps/rejected": -866.5001831054688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.401310533285141, "rewards/margins": 7.885440826416016, "rewards/rejected": -8.286750793457031, "step": 77270 }, { "epoch": 0.93, "learning_rate": 8.501203509616807e-08, "logits/chosen": -2.836300849914551, "logits/rejected": -2.458604335784912, "logps/chosen": -101.53204345703125, "logps/rejected": -903.1507568359375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5693838000297546, "rewards/margins": 8.071878433227539, "rewards/rejected": -8.64126205444336, "step": 77280 }, { "epoch": 0.93, "learning_rate": 8.474214871520308e-08, "logits/chosen": -2.9111454486846924, "logits/rejected": -2.4822471141815186, "logps/chosen": -100.68818664550781, "logps/rejected": -977.0833129882812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5268164277076721, "rewards/margins": 8.850801467895508, "rewards/rejected": -9.377617835998535, "step": 77290 }, { "epoch": 0.93, "learning_rate": 8.447268403155073e-08, "logits/chosen": -2.911649227142334, "logits/rejected": -2.453003168106079, "logps/chosen": -109.30216979980469, "logps/rejected": -963.1477661132812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5911332964897156, "rewards/margins": 8.644159317016602, "rewards/rejected": -9.235292434692383, "step": 77300 }, { "epoch": 0.93, "learning_rate": 8.420364109225892e-08, "logits/chosen": -2.8883070945739746, "logits/rejected": -2.4212987422943115, "logps/chosen": -89.79996490478516, "logps/rejected": -878.0685424804688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4041334092617035, "rewards/margins": 7.986081600189209, "rewards/rejected": -8.390214920043945, "step": 77310 }, { "epoch": 0.93, "learning_rate": 8.393501994430203e-08, "logits/chosen": -2.8951592445373535, "logits/rejected": -2.152944564819336, "logps/chosen": -119.19148254394531, "logps/rejected": -1018.39013671875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.606057345867157, "rewards/margins": 9.176814079284668, "rewards/rejected": -9.78287124633789, "step": 77320 }, { "epoch": 0.93, "learning_rate": 8.366682063458004e-08, "logits/chosen": -2.887641429901123, "logits/rejected": -2.3617520332336426, "logps/chosen": -111.41374206542969, "logps/rejected": -937.3375854492188, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.6310543417930603, "rewards/margins": 8.351094245910645, "rewards/rejected": -8.982147216796875, "step": 77330 }, { "epoch": 0.93, "learning_rate": 8.33990432099202e-08, "logits/chosen": -2.8854286670684814, "logits/rejected": -2.388486862182617, "logps/chosen": -87.54158020019531, "logps/rejected": -836.3128051757812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.43081822991371155, "rewards/margins": 7.556606292724609, "rewards/rejected": -7.987425327301025, "step": 77340 }, { "epoch": 0.93, "learning_rate": 8.31316877170757e-08, "logits/chosen": -2.901700496673584, "logits/rejected": -2.3437247276306152, "logps/chosen": -101.4924545288086, "logps/rejected": -874.5339965820312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.51555335521698, "rewards/margins": 7.831007957458496, "rewards/rejected": -8.346561431884766, "step": 77350 }, { "epoch": 0.93, "learning_rate": 8.286475420272582e-08, "logits/chosen": -2.8869786262512207, "logits/rejected": -2.279818058013916, "logps/chosen": -106.90672302246094, "logps/rejected": -998.416015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5991848707199097, "rewards/margins": 8.981057167053223, "rewards/rejected": -9.580241203308105, "step": 77360 }, { "epoch": 0.93, "learning_rate": 8.259824271347666e-08, "logits/chosen": -2.9400925636291504, "logits/rejected": -2.3884506225585938, "logps/chosen": -119.67619323730469, "logps/rejected": -1011.4191284179688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6832270622253418, "rewards/margins": 9.022256851196289, "rewards/rejected": -9.705483436584473, "step": 77370 }, { "epoch": 0.93, "learning_rate": 8.233215329586014e-08, "logits/chosen": -2.8952836990356445, "logits/rejected": -2.231924295425415, "logps/chosen": -130.7109375, "logps/rejected": -964.6287231445312, "loss": 0.1157, "rewards/accuracies": 1.0, "rewards/chosen": -0.7828419208526611, "rewards/margins": 8.465658187866211, "rewards/rejected": -9.24850082397461, "step": 77380 }, { "epoch": 0.93, "learning_rate": 8.206648599633465e-08, "logits/chosen": -2.853684663772583, "logits/rejected": -2.052194833755493, "logps/chosen": -117.03311920166016, "logps/rejected": -956.2252197265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.651538610458374, "rewards/margins": 8.519086837768555, "rewards/rejected": -9.170625686645508, "step": 77390 }, { "epoch": 0.93, "learning_rate": 8.180124086128588e-08, "logits/chosen": -2.8541321754455566, "logits/rejected": -2.244081974029541, "logps/chosen": -129.92623901367188, "logps/rejected": -1003.0496215820312, "loss": 0.1417, "rewards/accuracies": 1.0, "rewards/chosen": -0.8171176910400391, "rewards/margins": 8.807085990905762, "rewards/rejected": -9.6242036819458, "step": 77400 }, { "epoch": 0.93, "learning_rate": 8.153641793702371e-08, "logits/chosen": -2.8622944355010986, "logits/rejected": -2.3666651248931885, "logps/chosen": -125.9615707397461, "logps/rejected": -950.6306762695312, "loss": 0.1863, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8086129426956177, "rewards/margins": 8.30601692199707, "rewards/rejected": -9.114629745483398, "step": 77410 }, { "epoch": 0.93, "learning_rate": 8.127201726978589e-08, "logits/chosen": -2.879887819290161, "logits/rejected": -2.430424213409424, "logps/chosen": -120.91282653808594, "logps/rejected": -876.2615966796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7182059288024902, "rewards/margins": 7.654895782470703, "rewards/rejected": -8.373102188110352, "step": 77420 }, { "epoch": 0.93, "learning_rate": 8.100803890573633e-08, "logits/chosen": -2.8687098026275635, "logits/rejected": -2.3729300498962402, "logps/chosen": -89.56324005126953, "logps/rejected": -938.8826904296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4396025538444519, "rewards/margins": 8.55922794342041, "rewards/rejected": -8.998830795288086, "step": 77430 }, { "epoch": 0.93, "learning_rate": 8.074448289096454e-08, "logits/chosen": -2.9074323177337646, "logits/rejected": -2.3509316444396973, "logps/chosen": -121.13188171386719, "logps/rejected": -925.5445556640625, "loss": 0.0527, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7402140498161316, "rewards/margins": 8.13769817352295, "rewards/rejected": -8.877911567687988, "step": 77440 }, { "epoch": 0.93, "learning_rate": 8.048134927148704e-08, "logits/chosen": -2.879711151123047, "logits/rejected": -2.3037707805633545, "logps/chosen": -124.76560974121094, "logps/rejected": -955.2620849609375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7843063473701477, "rewards/margins": 8.377633094787598, "rewards/rejected": -9.161940574645996, "step": 77450 }, { "epoch": 0.93, "learning_rate": 8.021863809324599e-08, "logits/chosen": -2.930309295654297, "logits/rejected": -2.5022988319396973, "logps/chosen": -78.60942077636719, "logps/rejected": -876.6760864257812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.39834535121917725, "rewards/margins": 7.994397163391113, "rewards/rejected": -8.392742156982422, "step": 77460 }, { "epoch": 0.93, "learning_rate": 7.995634940211022e-08, "logits/chosen": -2.860365390777588, "logits/rejected": -2.3012466430664062, "logps/chosen": -101.7419204711914, "logps/rejected": -920.2615356445312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.526770830154419, "rewards/margins": 8.284342765808105, "rewards/rejected": -8.811113357543945, "step": 77470 }, { "epoch": 0.93, "learning_rate": 7.969448324387425e-08, "logits/chosen": -2.846355438232422, "logits/rejected": -2.383333683013916, "logps/chosen": -101.67228698730469, "logps/rejected": -972.6669921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5588165521621704, "rewards/margins": 8.762845039367676, "rewards/rejected": -9.321660995483398, "step": 77480 }, { "epoch": 0.93, "learning_rate": 7.943303966425952e-08, "logits/chosen": -2.8473401069641113, "logits/rejected": -2.3112826347351074, "logps/chosen": -93.8291244506836, "logps/rejected": -946.7183837890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4737037718296051, "rewards/margins": 8.610433578491211, "rewards/rejected": -9.084137916564941, "step": 77490 }, { "epoch": 0.93, "learning_rate": 7.917201870891316e-08, "logits/chosen": -2.899188280105591, "logits/rejected": -2.301236391067505, "logps/chosen": -117.19754791259766, "logps/rejected": -874.6189575195312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6581563949584961, "rewards/margins": 7.706142425537109, "rewards/rejected": -8.364298820495605, "step": 77500 }, { "epoch": 0.93, "learning_rate": 7.891142042340871e-08, "logits/chosen": -2.9324448108673096, "logits/rejected": -2.3681538105010986, "logps/chosen": -118.67375183105469, "logps/rejected": -954.90576171875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6524745225906372, "rewards/margins": 8.483428001403809, "rewards/rejected": -9.135903358459473, "step": 77510 }, { "epoch": 0.93, "learning_rate": 7.865124485324587e-08, "logits/chosen": -2.891878128051758, "logits/rejected": -2.411346912384033, "logps/chosen": -77.41266632080078, "logps/rejected": -846.42138671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3712611496448517, "rewards/margins": 7.706179618835449, "rewards/rejected": -8.07744026184082, "step": 77520 }, { "epoch": 0.93, "learning_rate": 7.839149204385083e-08, "logits/chosen": -2.8754167556762695, "logits/rejected": -2.6141529083251953, "logps/chosen": -98.7660140991211, "logps/rejected": -685.0572509765625, "loss": 0.1165, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6031737327575684, "rewards/margins": 5.880804061889648, "rewards/rejected": -6.483977317810059, "step": 77530 }, { "epoch": 0.93, "learning_rate": 7.813216204057589e-08, "logits/chosen": -2.8620197772979736, "logits/rejected": -2.401848316192627, "logps/chosen": -88.20838165283203, "logps/rejected": -917.5599365234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.444180428981781, "rewards/margins": 8.34531307220459, "rewards/rejected": -8.7894926071167, "step": 77540 }, { "epoch": 0.93, "learning_rate": 7.787325488869845e-08, "logits/chosen": -2.8397624492645264, "logits/rejected": -2.4135162830352783, "logps/chosen": -77.52674865722656, "logps/rejected": -852.8978271484375, "loss": 0.108, "rewards/accuracies": 1.0, "rewards/chosen": -0.3773627281188965, "rewards/margins": 7.778465270996094, "rewards/rejected": -8.155828475952148, "step": 77550 }, { "epoch": 0.93, "learning_rate": 7.761477063342377e-08, "logits/chosen": -2.9009692668914795, "logits/rejected": -2.295776128768921, "logps/chosen": -112.4500961303711, "logps/rejected": -927.9788818359375, "loss": 0.1289, "rewards/accuracies": 1.0, "rewards/chosen": -0.6203035116195679, "rewards/margins": 8.264822006225586, "rewards/rejected": -8.885124206542969, "step": 77560 }, { "epoch": 0.93, "learning_rate": 7.73567093198821e-08, "logits/chosen": -2.8560752868652344, "logits/rejected": -2.3006930351257324, "logps/chosen": -119.03271484375, "logps/rejected": -891.5985107421875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6955119967460632, "rewards/margins": 7.823098182678223, "rewards/rejected": -8.518610000610352, "step": 77570 }, { "epoch": 0.93, "learning_rate": 7.709907099313047e-08, "logits/chosen": -2.913809299468994, "logits/rejected": -2.345322847366333, "logps/chosen": -128.77001953125, "logps/rejected": -1025.0052490234375, "loss": 0.092, "rewards/accuracies": 1.0, "rewards/chosen": -0.7071784734725952, "rewards/margins": 9.149658203125, "rewards/rejected": -9.856837272644043, "step": 77580 }, { "epoch": 0.93, "learning_rate": 7.684185569815178e-08, "logits/chosen": -2.9341394901275635, "logits/rejected": -2.5961644649505615, "logps/chosen": -78.4539794921875, "logps/rejected": -838.1038208007812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3647342026233673, "rewards/margins": 7.6307806968688965, "rewards/rejected": -7.9955153465271, "step": 77590 }, { "epoch": 0.93, "learning_rate": 7.65850634798554e-08, "logits/chosen": -2.868349075317383, "logits/rejected": -2.0697906017303467, "logps/chosen": -142.47946166992188, "logps/rejected": -1038.113037109375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8479711413383484, "rewards/margins": 9.098573684692383, "rewards/rejected": -9.946542739868164, "step": 77600 }, { "epoch": 0.93, "learning_rate": 7.632869438307572e-08, "logits/chosen": -2.8876900672912598, "logits/rejected": -2.24149751663208, "logps/chosen": -94.51056671142578, "logps/rejected": -968.2433471679688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4948176443576813, "rewards/margins": 8.799976348876953, "rewards/rejected": -9.294794082641602, "step": 77610 }, { "epoch": 0.93, "learning_rate": 7.607274845257445e-08, "logits/chosen": -2.885281801223755, "logits/rejected": -2.4382059574127197, "logps/chosen": -88.86373901367188, "logps/rejected": -886.7117919921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.41810980439186096, "rewards/margins": 8.073612213134766, "rewards/rejected": -8.491722106933594, "step": 77620 }, { "epoch": 0.93, "learning_rate": 7.581722573303946e-08, "logits/chosen": -2.890707492828369, "logits/rejected": -2.4889132976531982, "logps/chosen": -98.61827087402344, "logps/rejected": -865.1368408203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5025713443756104, "rewards/margins": 7.770570278167725, "rewards/rejected": -8.273141860961914, "step": 77630 }, { "epoch": 0.93, "learning_rate": 7.556212626908366e-08, "logits/chosen": -2.8652782440185547, "logits/rejected": -2.5158286094665527, "logps/chosen": -89.41693878173828, "logps/rejected": -863.2708740234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4765750467777252, "rewards/margins": 7.784615993499756, "rewards/rejected": -8.261190414428711, "step": 77640 }, { "epoch": 0.93, "learning_rate": 7.530745010524726e-08, "logits/chosen": -2.8323915004730225, "logits/rejected": -2.253931760787964, "logps/chosen": -115.8856430053711, "logps/rejected": -931.6202392578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6728655099868774, "rewards/margins": 8.236613273620605, "rewards/rejected": -8.909479141235352, "step": 77650 }, { "epoch": 0.93, "learning_rate": 7.505319728599581e-08, "logits/chosen": -2.9266202449798584, "logits/rejected": -2.3581645488739014, "logps/chosen": -105.9242935180664, "logps/rejected": -905.6494140625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6266071200370789, "rewards/margins": 8.037919998168945, "rewards/rejected": -8.664527893066406, "step": 77660 }, { "epoch": 0.93, "learning_rate": 7.4799367855721e-08, "logits/chosen": -2.819636106491089, "logits/rejected": -1.9137938022613525, "logps/chosen": -169.35903930664062, "logps/rejected": -1030.8465576171875, "loss": 0.1207, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1579469442367554, "rewards/margins": 8.751091957092285, "rewards/rejected": -9.909037590026855, "step": 77670 }, { "epoch": 0.93, "learning_rate": 7.454596185874102e-08, "logits/chosen": -2.889395236968994, "logits/rejected": -2.219388484954834, "logps/chosen": -116.72794342041016, "logps/rejected": -852.0452880859375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6456943154335022, "rewards/margins": 7.486512660980225, "rewards/rejected": -8.132206916809082, "step": 77680 }, { "epoch": 0.93, "learning_rate": 7.429297933929935e-08, "logits/chosen": -2.870028257369995, "logits/rejected": -2.2964580059051514, "logps/chosen": -98.79338073730469, "logps/rejected": -885.7357177734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5242804884910583, "rewards/margins": 7.9476518630981445, "rewards/rejected": -8.471933364868164, "step": 77690 }, { "epoch": 0.93, "learning_rate": 7.404042034156677e-08, "logits/chosen": -2.8975512981414795, "logits/rejected": -1.951865553855896, "logps/chosen": -148.3939208984375, "logps/rejected": -1161.6978759765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.826917827129364, "rewards/margins": 10.38935375213623, "rewards/rejected": -11.21627140045166, "step": 77700 }, { "epoch": 0.93, "learning_rate": 7.378828490963858e-08, "logits/chosen": -2.915468692779541, "logits/rejected": -2.08616042137146, "logps/chosen": -151.96131896972656, "logps/rejected": -1038.3956298828125, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": -0.9480310678482056, "rewards/margins": 9.023526191711426, "rewards/rejected": -9.9715576171875, "step": 77710 }, { "epoch": 0.93, "learning_rate": 7.353657308753759e-08, "logits/chosen": -2.8827404975891113, "logits/rejected": -2.3305633068084717, "logps/chosen": -117.56783294677734, "logps/rejected": -974.685546875, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.7218040823936462, "rewards/margins": 8.630526542663574, "rewards/rejected": -9.352331161499023, "step": 77720 }, { "epoch": 0.93, "learning_rate": 7.328528491921172e-08, "logits/chosen": -2.864746332168579, "logits/rejected": -2.4436697959899902, "logps/chosen": -89.47685241699219, "logps/rejected": -853.4517822265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4402318000793457, "rewards/margins": 7.702902317047119, "rewards/rejected": -8.143133163452148, "step": 77730 }, { "epoch": 0.93, "learning_rate": 7.303442044853532e-08, "logits/chosen": -2.9097278118133545, "logits/rejected": -2.501694440841675, "logps/chosen": -131.7266387939453, "logps/rejected": -761.4607543945312, "loss": 0.1395, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.9097034335136414, "rewards/margins": 6.329447269439697, "rewards/rejected": -7.2391510009765625, "step": 77740 }, { "epoch": 0.93, "learning_rate": 7.278397971930833e-08, "logits/chosen": -2.891939640045166, "logits/rejected": -2.3640923500061035, "logps/chosen": -90.6862564086914, "logps/rejected": -881.0906982421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.43178147077560425, "rewards/margins": 7.984443664550781, "rewards/rejected": -8.416224479675293, "step": 77750 }, { "epoch": 0.93, "learning_rate": 7.253396277525692e-08, "logits/chosen": -2.8581125736236572, "logits/rejected": -2.126220703125, "logps/chosen": -107.74885559082031, "logps/rejected": -1095.230712890625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.540780246257782, "rewards/margins": 10.026403427124023, "rewards/rejected": -10.567184448242188, "step": 77760 }, { "epoch": 0.93, "learning_rate": 7.228436966003394e-08, "logits/chosen": -2.886862277984619, "logits/rejected": -2.1327567100524902, "logps/chosen": -117.6373519897461, "logps/rejected": -1059.527099609375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.6745850443840027, "rewards/margins": 9.524896621704102, "rewards/rejected": -10.199481010437012, "step": 77770 }, { "epoch": 0.93, "learning_rate": 7.203520041721728e-08, "logits/chosen": -2.911278009414673, "logits/rejected": -2.584876537322998, "logps/chosen": -104.9768295288086, "logps/rejected": -772.786376953125, "loss": 0.0202, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6395349502563477, "rewards/margins": 6.710491180419922, "rewards/rejected": -7.3500261306762695, "step": 77780 }, { "epoch": 0.93, "learning_rate": 7.178645509031106e-08, "logits/chosen": -2.9233531951904297, "logits/rejected": -2.4266982078552246, "logps/chosen": -90.33600616455078, "logps/rejected": -934.94677734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.4826053977012634, "rewards/margins": 8.491344451904297, "rewards/rejected": -8.973950386047363, "step": 77790 }, { "epoch": 0.93, "learning_rate": 7.153813372274609e-08, "logits/chosen": -2.925652027130127, "logits/rejected": -2.5125467777252197, "logps/chosen": -81.56661987304688, "logps/rejected": -840.0926513671875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4179714620113373, "rewards/margins": 7.595394134521484, "rewards/rejected": -8.013365745544434, "step": 77800 }, { "epoch": 0.93, "learning_rate": 7.129023635787797e-08, "logits/chosen": -2.86814546585083, "logits/rejected": -2.3870668411254883, "logps/chosen": -101.5057144165039, "logps/rejected": -916.2650146484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5660721063613892, "rewards/margins": 8.21619701385498, "rewards/rejected": -8.782269477844238, "step": 77810 }, { "epoch": 0.93, "learning_rate": 7.104276303898899e-08, "logits/chosen": -2.9322431087493896, "logits/rejected": -2.3334853649139404, "logps/chosen": -113.83258056640625, "logps/rejected": -888.6998901367188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6761909127235413, "rewards/margins": 7.8267011642456055, "rewards/rejected": -8.50289249420166, "step": 77820 }, { "epoch": 0.93, "learning_rate": 7.07957138092874e-08, "logits/chosen": -2.8510892391204834, "logits/rejected": -2.3301823139190674, "logps/chosen": -105.1693115234375, "logps/rejected": -957.58837890625, "loss": 0.0955, "rewards/accuracies": 1.0, "rewards/chosen": -0.5782166719436646, "rewards/margins": 8.614251136779785, "rewards/rejected": -9.19246768951416, "step": 77830 }, { "epoch": 0.93, "learning_rate": 7.054908871190757e-08, "logits/chosen": -2.88486647605896, "logits/rejected": -2.05338716506958, "logps/chosen": -125.68818664550781, "logps/rejected": -1042.7293701171875, "loss": 0.0853, "rewards/accuracies": 1.0, "rewards/chosen": -0.6782010793685913, "rewards/margins": 9.347423553466797, "rewards/rejected": -10.02562427520752, "step": 77840 }, { "epoch": 0.93, "learning_rate": 7.030288778990924e-08, "logits/chosen": -2.8597970008850098, "logits/rejected": -2.4268925189971924, "logps/chosen": -105.79096984863281, "logps/rejected": -937.5838012695312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5514353513717651, "rewards/margins": 8.425095558166504, "rewards/rejected": -8.976531982421875, "step": 77850 }, { "epoch": 0.93, "learning_rate": 7.005711108627882e-08, "logits/chosen": -2.882460117340088, "logits/rejected": -2.1809725761413574, "logps/chosen": -132.16094970703125, "logps/rejected": -996.73486328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8230490684509277, "rewards/margins": 8.736061096191406, "rewards/rejected": -9.559110641479492, "step": 77860 }, { "epoch": 0.93, "learning_rate": 6.981175864392759e-08, "logits/chosen": -2.8554913997650146, "logits/rejected": -2.352236270904541, "logps/chosen": -89.51795959472656, "logps/rejected": -875.3279418945312, "loss": 0.0871, "rewards/accuracies": 1.0, "rewards/chosen": -0.4065566658973694, "rewards/margins": 7.971477508544922, "rewards/rejected": -8.378035545349121, "step": 77870 }, { "epoch": 0.93, "learning_rate": 6.956683050569402e-08, "logits/chosen": -2.875617265701294, "logits/rejected": -2.4064688682556152, "logps/chosen": -98.62479400634766, "logps/rejected": -851.4762573242188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5250009298324585, "rewards/margins": 7.599625587463379, "rewards/rejected": -8.124626159667969, "step": 77880 }, { "epoch": 0.93, "learning_rate": 6.932232671434114e-08, "logits/chosen": -2.8826773166656494, "logits/rejected": -2.3055641651153564, "logps/chosen": -111.93682861328125, "logps/rejected": -951.1160278320312, "loss": 0.0895, "rewards/accuracies": 1.0, "rewards/chosen": -0.6328872442245483, "rewards/margins": 8.491913795471191, "rewards/rejected": -9.124801635742188, "step": 77890 }, { "epoch": 0.93, "learning_rate": 6.907824731255953e-08, "logits/chosen": -2.8593695163726807, "logits/rejected": -2.252476692199707, "logps/chosen": -122.28794860839844, "logps/rejected": -889.9757080078125, "loss": 0.0371, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.708023190498352, "rewards/margins": 7.821054935455322, "rewards/rejected": -8.529078483581543, "step": 77900 }, { "epoch": 0.93, "learning_rate": 6.883459234296425e-08, "logits/chosen": -2.9076309204101562, "logits/rejected": -2.3215930461883545, "logps/chosen": -132.11160278320312, "logps/rejected": -866.8273315429688, "loss": 0.0966, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8665281534194946, "rewards/margins": 7.432208061218262, "rewards/rejected": -8.298736572265625, "step": 77910 }, { "epoch": 0.93, "learning_rate": 6.859136184809684e-08, "logits/chosen": -2.886439323425293, "logits/rejected": -2.266669988632202, "logps/chosen": -115.0667724609375, "logps/rejected": -985.19873046875, "loss": 0.0893, "rewards/accuracies": 1.0, "rewards/chosen": -0.684375524520874, "rewards/margins": 8.765950202941895, "rewards/rejected": -9.450325965881348, "step": 77920 }, { "epoch": 0.93, "learning_rate": 6.834855587042471e-08, "logits/chosen": -2.8859710693359375, "logits/rejected": -2.274812698364258, "logps/chosen": -117.06767272949219, "logps/rejected": -952.8740234375, "loss": 0.1906, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7024277448654175, "rewards/margins": 8.427029609680176, "rewards/rejected": -9.129457473754883, "step": 77930 }, { "epoch": 0.93, "learning_rate": 6.810617445234119e-08, "logits/chosen": -2.8737452030181885, "logits/rejected": -2.331491231918335, "logps/chosen": -98.41703796386719, "logps/rejected": -988.1942138671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.51910799741745, "rewards/margins": 8.961202621459961, "rewards/rejected": -9.480310440063477, "step": 77940 }, { "epoch": 0.93, "learning_rate": 6.786421763616491e-08, "logits/chosen": -2.930227518081665, "logits/rejected": -2.3541793823242188, "logps/chosen": -144.072509765625, "logps/rejected": -921.0906982421875, "loss": 0.1523, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9641658067703247, "rewards/margins": 7.862971305847168, "rewards/rejected": -8.827136993408203, "step": 77950 }, { "epoch": 0.93, "learning_rate": 6.762268546414153e-08, "logits/chosen": -2.879955291748047, "logits/rejected": -2.4428703784942627, "logps/chosen": -95.57865905761719, "logps/rejected": -924.1834716796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5195735096931458, "rewards/margins": 8.33504867553711, "rewards/rejected": -8.854623794555664, "step": 77960 }, { "epoch": 0.93, "learning_rate": 6.738157797844175e-08, "logits/chosen": -2.892993211746216, "logits/rejected": -2.2412283420562744, "logps/chosen": -133.89707946777344, "logps/rejected": -1038.1842041015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7973334789276123, "rewards/margins": 9.173376083374023, "rewards/rejected": -9.970709800720215, "step": 77970 }, { "epoch": 0.93, "learning_rate": 6.714089522116191e-08, "logits/chosen": -2.836120843887329, "logits/rejected": -2.4779229164123535, "logps/chosen": -72.8519287109375, "logps/rejected": -800.4982299804688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.32511523365974426, "rewards/margins": 7.308829307556152, "rewards/rejected": -7.633944511413574, "step": 77980 }, { "epoch": 0.93, "learning_rate": 6.690063723432533e-08, "logits/chosen": -2.9314420223236084, "logits/rejected": -2.0964741706848145, "logps/chosen": -152.57687377929688, "logps/rejected": -1057.9852294921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.9650722742080688, "rewards/margins": 9.200836181640625, "rewards/rejected": -10.165908813476562, "step": 77990 }, { "epoch": 0.93, "learning_rate": 6.66608040598793e-08, "logits/chosen": -2.9007818698883057, "logits/rejected": -2.3184993267059326, "logps/chosen": -111.3721694946289, "logps/rejected": -934.42919921875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6089698672294617, "rewards/margins": 8.338366508483887, "rewards/rejected": -8.94733715057373, "step": 78000 }, { "epoch": 0.93, "eval_logits/chosen": -2.886115789413452, "eval_logits/rejected": -1.7732505798339844, "eval_logps/chosen": -239.80364990234375, "eval_logps/rejected": -1138.6900634765625, "eval_loss": 0.001312321168370545, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.786233901977539, "eval_rewards/margins": 9.133413314819336, "eval_rewards/rejected": -10.919647216796875, "eval_runtime": 1.2156, "eval_samples_per_second": 4.113, "eval_steps_per_second": 2.468, "step": 78000 }, { "epoch": 0.93, "learning_rate": 6.642139573969864e-08, "logits/chosen": -2.90291166305542, "logits/rejected": -2.625034809112549, "logps/chosen": -81.64732360839844, "logps/rejected": -828.302734375, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.4327712953090668, "rewards/margins": 7.4718122482299805, "rewards/rejected": -7.904582977294922, "step": 78010 }, { "epoch": 0.93, "learning_rate": 6.618241231558325e-08, "logits/chosen": -2.893217086791992, "logits/rejected": -2.4075064659118652, "logps/chosen": -119.6391372680664, "logps/rejected": -862.3170166015625, "loss": 0.022, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6937206387519836, "rewards/margins": 7.524266242980957, "rewards/rejected": -8.217986106872559, "step": 78020 }, { "epoch": 0.93, "learning_rate": 6.594385382925921e-08, "logits/chosen": -2.932126760482788, "logits/rejected": -2.5443978309631348, "logps/chosen": -74.12147521972656, "logps/rejected": -808.1467895507812, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.3422073721885681, "rewards/margins": 7.371960639953613, "rewards/rejected": -7.714167594909668, "step": 78030 }, { "epoch": 0.93, "learning_rate": 6.570572032237793e-08, "logits/chosen": -2.8992819786071777, "logits/rejected": -2.393083095550537, "logps/chosen": -119.98661804199219, "logps/rejected": -921.4537353515625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.718951940536499, "rewards/margins": 8.098977088928223, "rewards/rejected": -8.817927360534668, "step": 78040 }, { "epoch": 0.93, "learning_rate": 6.546801183651697e-08, "logits/chosen": -2.8969905376434326, "logits/rejected": -2.193696975708008, "logps/chosen": -150.52883911132812, "logps/rejected": -878.52734375, "loss": 0.121, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0077053308486938, "rewards/margins": 7.3882622718811035, "rewards/rejected": -8.395967483520508, "step": 78050 }, { "epoch": 0.93, "learning_rate": 6.523072841317979e-08, "logits/chosen": -2.8905446529388428, "logits/rejected": -2.188046932220459, "logps/chosen": -119.9087905883789, "logps/rejected": -954.1494140625, "loss": 0.0786, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6841563582420349, "rewards/margins": 8.450017929077148, "rewards/rejected": -9.134175300598145, "step": 78060 }, { "epoch": 0.93, "learning_rate": 6.499387009379466e-08, "logits/chosen": -2.8581433296203613, "logits/rejected": -2.3913357257843018, "logps/chosen": -92.18269348144531, "logps/rejected": -807.3934936523438, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.44861871004104614, "rewards/margins": 7.247161865234375, "rewards/rejected": -7.6957807540893555, "step": 78070 }, { "epoch": 0.93, "learning_rate": 6.475743691971709e-08, "logits/chosen": -2.855135679244995, "logits/rejected": -2.4230260848999023, "logps/chosen": -90.49604034423828, "logps/rejected": -911.6951293945312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.46205058693885803, "rewards/margins": 8.280014038085938, "rewards/rejected": -8.742063522338867, "step": 78080 }, { "epoch": 0.93, "learning_rate": 6.45214289322274e-08, "logits/chosen": -2.904484272003174, "logits/rejected": -2.400235176086426, "logps/chosen": -97.47461700439453, "logps/rejected": -923.5247192382812, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.5038896203041077, "rewards/margins": 8.337540626525879, "rewards/rejected": -8.841428756713867, "step": 78090 }, { "epoch": 0.93, "learning_rate": 6.42858461725318e-08, "logits/chosen": -2.899782419204712, "logits/rejected": -2.4467897415161133, "logps/chosen": -96.1402587890625, "logps/rejected": -826.921875, "loss": 0.1811, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5525726079940796, "rewards/margins": 7.339603424072266, "rewards/rejected": -7.892176151275635, "step": 78100 }, { "epoch": 0.94, "learning_rate": 6.405068868176267e-08, "logits/chosen": -2.8689181804656982, "logits/rejected": -2.0294816493988037, "logps/chosen": -155.40939331054688, "logps/rejected": -1139.830810546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.012717604637146, "rewards/margins": 9.965597152709961, "rewards/rejected": -10.978316307067871, "step": 78110 }, { "epoch": 0.94, "learning_rate": 6.381595650097799e-08, "logits/chosen": -2.8923792839050293, "logits/rejected": -2.519146680831909, "logps/chosen": -98.18656158447266, "logps/rejected": -929.2333984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5130296945571899, "rewards/margins": 8.384970664978027, "rewards/rejected": -8.898000717163086, "step": 78120 }, { "epoch": 0.94, "learning_rate": 6.358164967116081e-08, "logits/chosen": -2.871352434158325, "logits/rejected": -2.3583924770355225, "logps/chosen": -98.08869171142578, "logps/rejected": -890.3654174804688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5321487188339233, "rewards/margins": 7.989048004150391, "rewards/rejected": -8.521196365356445, "step": 78130 }, { "epoch": 0.94, "learning_rate": 6.334776823322092e-08, "logits/chosen": -2.91614031791687, "logits/rejected": -2.6965415477752686, "logps/chosen": -62.74580764770508, "logps/rejected": -759.88916015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.2647785544395447, "rewards/margins": 6.959084987640381, "rewards/rejected": -7.223863124847412, "step": 78140 }, { "epoch": 0.94, "learning_rate": 6.311431222799285e-08, "logits/chosen": -2.7981557846069336, "logits/rejected": -1.9810190200805664, "logps/chosen": -120.4760971069336, "logps/rejected": -1088.4127197265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6394718289375305, "rewards/margins": 9.842227935791016, "rewards/rejected": -10.48169994354248, "step": 78150 }, { "epoch": 0.94, "learning_rate": 6.288128169623791e-08, "logits/chosen": -2.852055549621582, "logits/rejected": -2.299816131591797, "logps/chosen": -114.1937026977539, "logps/rejected": -998.5164184570312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6744444966316223, "rewards/margins": 8.922696113586426, "rewards/rejected": -9.59714126586914, "step": 78160 }, { "epoch": 0.94, "learning_rate": 6.264867667864244e-08, "logits/chosen": -2.911315441131592, "logits/rejected": -2.330303192138672, "logps/chosen": -107.57242584228516, "logps/rejected": -940.1068115234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5427271723747253, "rewards/margins": 8.47500991821289, "rewards/rejected": -9.01773738861084, "step": 78170 }, { "epoch": 0.94, "learning_rate": 6.241649721581866e-08, "logits/chosen": -2.922947406768799, "logits/rejected": -2.5801711082458496, "logps/chosen": -75.49629974365234, "logps/rejected": -796.6740112304688, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.3172522783279419, "rewards/margins": 7.2723822593688965, "rewards/rejected": -7.589634895324707, "step": 78180 }, { "epoch": 0.94, "learning_rate": 6.218474334830444e-08, "logits/chosen": -2.900812864303589, "logits/rejected": -2.3989222049713135, "logps/chosen": -111.2132797241211, "logps/rejected": -850.15673828125, "loss": 0.0317, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.673379123210907, "rewards/margins": 7.437585353851318, "rewards/rejected": -8.110963821411133, "step": 78190 }, { "epoch": 0.94, "learning_rate": 6.195341511656294e-08, "logits/chosen": -2.8954145908355713, "logits/rejected": -2.391169786453247, "logps/chosen": -83.73699951171875, "logps/rejected": -864.1436767578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3927229046821594, "rewards/margins": 7.848355293273926, "rewards/rejected": -8.24107837677002, "step": 78200 }, { "epoch": 0.94, "learning_rate": 6.172251256098383e-08, "logits/chosen": -2.9112188816070557, "logits/rejected": -2.537569999694824, "logps/chosen": -121.48735046386719, "logps/rejected": -906.7083129882812, "loss": 0.089, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.77117919921875, "rewards/margins": 7.903616905212402, "rewards/rejected": -8.674795150756836, "step": 78210 }, { "epoch": 0.94, "learning_rate": 6.149203572188206e-08, "logits/chosen": -2.888667583465576, "logits/rejected": -2.375565528869629, "logps/chosen": -97.9354019165039, "logps/rejected": -904.9080810546875, "loss": 0.0215, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5897996425628662, "rewards/margins": 8.06794548034668, "rewards/rejected": -8.657743453979492, "step": 78220 }, { "epoch": 0.94, "learning_rate": 6.126198463949823e-08, "logits/chosen": -2.925868034362793, "logits/rejected": -2.5009665489196777, "logps/chosen": -90.36503601074219, "logps/rejected": -893.5930786132812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.41939234733581543, "rewards/margins": 8.141410827636719, "rewards/rejected": -8.560802459716797, "step": 78230 }, { "epoch": 0.94, "learning_rate": 6.103235935399854e-08, "logits/chosen": -2.881058931350708, "logits/rejected": -2.477680206298828, "logps/chosen": -108.27545166015625, "logps/rejected": -820.064453125, "loss": 0.2174, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6359556913375854, "rewards/margins": 7.184144496917725, "rewards/rejected": -7.820099830627441, "step": 78240 }, { "epoch": 0.94, "learning_rate": 6.08031599054748e-08, "logits/chosen": -2.8699164390563965, "logits/rejected": -2.543247699737549, "logps/chosen": -76.59847259521484, "logps/rejected": -851.966796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.37450969219207764, "rewards/margins": 7.773359775543213, "rewards/rejected": -8.147869110107422, "step": 78250 }, { "epoch": 0.94, "learning_rate": 6.057438633394502e-08, "logits/chosen": -2.9289498329162598, "logits/rejected": -2.380385398864746, "logps/chosen": -97.07832336425781, "logps/rejected": -878.4343872070312, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -0.5415626764297485, "rewards/margins": 7.866374969482422, "rewards/rejected": -8.407938003540039, "step": 78260 }, { "epoch": 0.94, "learning_rate": 6.034603867935196e-08, "logits/chosen": -2.8856277465820312, "logits/rejected": -2.59470796585083, "logps/chosen": -71.88190460205078, "logps/rejected": -756.1126708984375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3273133933544159, "rewards/margins": 6.868314266204834, "rewards/rejected": -7.195627689361572, "step": 78270 }, { "epoch": 0.94, "learning_rate": 6.011811698156483e-08, "logits/chosen": -2.8630549907684326, "logits/rejected": -2.483959913253784, "logps/chosen": -83.26731872558594, "logps/rejected": -908.9075317382812, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.41722288727760315, "rewards/margins": 8.3064546585083, "rewards/rejected": -8.723676681518555, "step": 78280 }, { "epoch": 0.94, "learning_rate": 5.989062128037821e-08, "logits/chosen": -2.9256629943847656, "logits/rejected": -2.4845385551452637, "logps/chosen": -74.06531524658203, "logps/rejected": -934.0084228515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.31712859869003296, "rewards/margins": 8.636817932128906, "rewards/rejected": -8.953946113586426, "step": 78290 }, { "epoch": 0.94, "learning_rate": 5.966355161551168e-08, "logits/chosen": -2.874711275100708, "logits/rejected": -2.2562694549560547, "logps/chosen": -113.78125, "logps/rejected": -951.3176879882812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6060323119163513, "rewards/margins": 8.511656761169434, "rewards/rejected": -9.117690086364746, "step": 78300 }, { "epoch": 0.94, "learning_rate": 5.943690802661162e-08, "logits/chosen": -2.871246576309204, "logits/rejected": -2.399040460586548, "logps/chosen": -96.46946716308594, "logps/rejected": -944.72314453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5322328209877014, "rewards/margins": 8.52994155883789, "rewards/rejected": -9.062174797058105, "step": 78310 }, { "epoch": 0.94, "learning_rate": 5.921069055324885e-08, "logits/chosen": -2.860593318939209, "logits/rejected": -2.564587116241455, "logps/chosen": -76.51737976074219, "logps/rejected": -834.6480712890625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.3465871214866638, "rewards/margins": 7.622913360595703, "rewards/rejected": -7.969500541687012, "step": 78320 }, { "epoch": 0.94, "learning_rate": 5.89848992349204e-08, "logits/chosen": -2.8713650703430176, "logits/rejected": -2.4789862632751465, "logps/chosen": -85.45142364501953, "logps/rejected": -863.2064208984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.43126019835472107, "rewards/margins": 7.817181587219238, "rewards/rejected": -8.248441696166992, "step": 78330 }, { "epoch": 0.94, "learning_rate": 5.87595341110489e-08, "logits/chosen": -2.8605105876922607, "logits/rejected": -2.214775562286377, "logps/chosen": -124.60709381103516, "logps/rejected": -1044.5460205078125, "loss": 0.1105, "rewards/accuracies": 1.0, "rewards/chosen": -0.7221511602401733, "rewards/margins": 9.340479850769043, "rewards/rejected": -10.062631607055664, "step": 78340 }, { "epoch": 0.94, "learning_rate": 5.853459522098259e-08, "logits/chosen": -2.8582942485809326, "logits/rejected": -2.2714574337005615, "logps/chosen": -118.57694244384766, "logps/rejected": -959.9080200195312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6627553105354309, "rewards/margins": 8.529420852661133, "rewards/rejected": -9.192174911499023, "step": 78350 }, { "epoch": 0.94, "learning_rate": 5.8310082603995065e-08, "logits/chosen": -2.8939623832702637, "logits/rejected": -2.3351876735687256, "logps/chosen": -107.55943298339844, "logps/rejected": -957.0948486328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5945224165916443, "rewards/margins": 8.594430923461914, "rewards/rejected": -9.188953399658203, "step": 78360 }, { "epoch": 0.94, "learning_rate": 5.808599629928552e-08, "logits/chosen": -2.8916239738464355, "logits/rejected": -1.941653847694397, "logps/chosen": -143.52410888671875, "logps/rejected": -1136.6318359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8578319549560547, "rewards/margins": 10.088537216186523, "rewards/rejected": -10.946371078491211, "step": 78370 }, { "epoch": 0.94, "learning_rate": 5.786233634597932e-08, "logits/chosen": -2.861361265182495, "logits/rejected": -2.355292797088623, "logps/chosen": -99.42473602294922, "logps/rejected": -868.3259887695312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.49058079719543457, "rewards/margins": 7.7985734939575195, "rewards/rejected": -8.289155006408691, "step": 78380 }, { "epoch": 0.94, "learning_rate": 5.763910278312607e-08, "logits/chosen": -2.9024932384490967, "logits/rejected": -2.252431869506836, "logps/chosen": -120.81790924072266, "logps/rejected": -928.4674072265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6958004236221313, "rewards/margins": 8.189521789550781, "rewards/rejected": -8.885323524475098, "step": 78390 }, { "epoch": 0.94, "learning_rate": 5.741629564970208e-08, "logits/chosen": -2.9036152362823486, "logits/rejected": -2.484243869781494, "logps/chosen": -89.23835754394531, "logps/rejected": -935.9761962890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.44499310851097107, "rewards/margins": 8.533205032348633, "rewards/rejected": -8.97819709777832, "step": 78400 }, { "epoch": 0.94, "learning_rate": 5.719391498460902e-08, "logits/chosen": -2.813408851623535, "logits/rejected": -2.072174549102783, "logps/chosen": -131.1338653564453, "logps/rejected": -990.3048095703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7608928084373474, "rewards/margins": 8.74788761138916, "rewards/rejected": -9.508780479431152, "step": 78410 }, { "epoch": 0.94, "learning_rate": 5.697196082667361e-08, "logits/chosen": -2.8808789253234863, "logits/rejected": -2.2941269874572754, "logps/chosen": -114.14317321777344, "logps/rejected": -916.3314208984375, "loss": 0.1035, "rewards/accuracies": 1.0, "rewards/chosen": -0.639177143573761, "rewards/margins": 8.139280319213867, "rewards/rejected": -8.778457641601562, "step": 78420 }, { "epoch": 0.94, "learning_rate": 5.6750433214648734e-08, "logits/chosen": -2.8236327171325684, "logits/rejected": -2.245302438735962, "logps/chosen": -103.22093200683594, "logps/rejected": -960.2918090820312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5547798871994019, "rewards/margins": 8.64405345916748, "rewards/rejected": -9.198832511901855, "step": 78430 }, { "epoch": 0.94, "learning_rate": 5.652933218721263e-08, "logits/chosen": -2.8811609745025635, "logits/rejected": -2.30861234664917, "logps/chosen": -110.44203186035156, "logps/rejected": -962.5442504882812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.612239420413971, "rewards/margins": 8.611244201660156, "rewards/rejected": -9.223482131958008, "step": 78440 }, { "epoch": 0.94, "learning_rate": 5.630865778296829e-08, "logits/chosen": -2.9184911251068115, "logits/rejected": -2.3795313835144043, "logps/chosen": -132.58718872070312, "logps/rejected": -1000.7951049804688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8592463731765747, "rewards/margins": 8.749417304992676, "rewards/rejected": -9.608663558959961, "step": 78450 }, { "epoch": 0.94, "learning_rate": 5.608841004044546e-08, "logits/chosen": -2.9051523208618164, "logits/rejected": -2.214172124862671, "logps/chosen": -142.4889373779297, "logps/rejected": -1039.16748046875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8861501812934875, "rewards/margins": 9.101705551147461, "rewards/rejected": -9.987855911254883, "step": 78460 }, { "epoch": 0.94, "learning_rate": 5.5868588998098385e-08, "logits/chosen": -2.901642322540283, "logits/rejected": -2.539498805999756, "logps/chosen": -79.43707275390625, "logps/rejected": -847.5989990234375, "loss": 0.1223, "rewards/accuracies": 1.0, "rewards/chosen": -0.3797987699508667, "rewards/margins": 7.711236476898193, "rewards/rejected": -8.091035842895508, "step": 78470 }, { "epoch": 0.94, "learning_rate": 5.564919469430746e-08, "logits/chosen": -2.9289002418518066, "logits/rejected": -2.6228671073913574, "logps/chosen": -76.6412124633789, "logps/rejected": -843.5667724609375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.3608016073703766, "rewards/margins": 7.693306922912598, "rewards/rejected": -8.054108619689941, "step": 78480 }, { "epoch": 0.94, "learning_rate": 5.5430227167378445e-08, "logits/chosen": -2.9259960651397705, "logits/rejected": -2.638322353363037, "logps/chosen": -76.7524185180664, "logps/rejected": -768.9168701171875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.389076292514801, "rewards/margins": 6.924005031585693, "rewards/rejected": -7.31308126449585, "step": 78490 }, { "epoch": 0.94, "learning_rate": 5.5211686455542145e-08, "logits/chosen": -2.8809762001037598, "logits/rejected": -2.3822314739227295, "logps/chosen": -102.1176986694336, "logps/rejected": -930.6287841796875, "loss": 0.0859, "rewards/accuracies": 1.0, "rewards/chosen": -0.5895869135856628, "rewards/margins": 8.336931228637695, "rewards/rejected": -8.926518440246582, "step": 78500 }, { "epoch": 0.94, "learning_rate": 5.4993572596955256e-08, "logits/chosen": -2.8828494548797607, "logits/rejected": -2.156351089477539, "logps/chosen": -121.3046646118164, "logps/rejected": -1053.208251953125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6434661746025085, "rewards/margins": 9.474431991577148, "rewards/rejected": -10.117898941040039, "step": 78510 }, { "epoch": 0.94, "learning_rate": 5.4775885629699824e-08, "logits/chosen": -2.8933589458465576, "logits/rejected": -2.4088149070739746, "logps/chosen": -102.91495513916016, "logps/rejected": -891.3515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5737866759300232, "rewards/margins": 7.95281982421875, "rewards/rejected": -8.526605606079102, "step": 78520 }, { "epoch": 0.94, "learning_rate": 5.455862559178349e-08, "logits/chosen": -2.875962972640991, "logits/rejected": -2.10235857963562, "logps/chosen": -132.45550537109375, "logps/rejected": -1125.899658203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7651897072792053, "rewards/margins": 10.091611862182617, "rewards/rejected": -10.856801986694336, "step": 78530 }, { "epoch": 0.94, "learning_rate": 5.434179252113925e-08, "logits/chosen": -2.904209852218628, "logits/rejected": -2.4299025535583496, "logps/chosen": -79.44839477539062, "logps/rejected": -882.4617919921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3692653775215149, "rewards/margins": 8.060541152954102, "rewards/rejected": -8.429807662963867, "step": 78540 }, { "epoch": 0.94, "learning_rate": 5.412538645562543e-08, "logits/chosen": -2.875955104827881, "logits/rejected": -2.1940102577209473, "logps/chosen": -124.22650146484375, "logps/rejected": -958.2821044921875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.705155611038208, "rewards/margins": 8.489156723022461, "rewards/rejected": -9.194311141967773, "step": 78550 }, { "epoch": 0.94, "learning_rate": 5.390940743302597e-08, "logits/chosen": -2.915501356124878, "logits/rejected": -2.2155096530914307, "logps/chosen": -132.18284606933594, "logps/rejected": -1079.09228515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7783594727516174, "rewards/margins": 9.602181434631348, "rewards/rejected": -10.38054084777832, "step": 78560 }, { "epoch": 0.94, "learning_rate": 5.369385549105044e-08, "logits/chosen": -2.887800455093384, "logits/rejected": -2.5040202140808105, "logps/chosen": -108.1030044555664, "logps/rejected": -797.7352294921875, "loss": 0.106, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6170030236244202, "rewards/margins": 6.987987518310547, "rewards/rejected": -7.604990482330322, "step": 78570 }, { "epoch": 0.94, "learning_rate": 5.3478730667333444e-08, "logits/chosen": -2.897249698638916, "logits/rejected": -2.5267975330352783, "logps/chosen": -67.20799255371094, "logps/rejected": -822.1798095703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3095118999481201, "rewards/margins": 7.535996437072754, "rewards/rejected": -7.845507621765137, "step": 78580 }, { "epoch": 0.94, "learning_rate": 5.326403299943494e-08, "logits/chosen": -2.840144395828247, "logits/rejected": -2.2875516414642334, "logps/chosen": -117.73978424072266, "logps/rejected": -982.8170166015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6403967142105103, "rewards/margins": 8.789710998535156, "rewards/rejected": -9.430109024047852, "step": 78590 }, { "epoch": 0.94, "learning_rate": 5.3049762524840784e-08, "logits/chosen": -2.8660144805908203, "logits/rejected": -2.293463945388794, "logps/chosen": -115.49995422363281, "logps/rejected": -997.9310302734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6730888485908508, "rewards/margins": 8.908271789550781, "rewards/rejected": -9.58135986328125, "step": 78600 }, { "epoch": 0.94, "learning_rate": 5.2835919280961876e-08, "logits/chosen": -2.8740487098693848, "logits/rejected": -2.6115283966064453, "logps/chosen": -70.46200561523438, "logps/rejected": -834.5126953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.2932441532611847, "rewards/margins": 7.6815690994262695, "rewards/rejected": -7.974812984466553, "step": 78610 }, { "epoch": 0.94, "learning_rate": 5.262250330513502e-08, "logits/chosen": -2.8757472038269043, "logits/rejected": -2.368931531906128, "logps/chosen": -111.9214096069336, "logps/rejected": -909.8102416992188, "loss": 0.0209, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6135158538818359, "rewards/margins": 8.0872802734375, "rewards/rejected": -8.700796127319336, "step": 78620 }, { "epoch": 0.94, "learning_rate": 5.240951463462152e-08, "logits/chosen": -2.8641934394836426, "logits/rejected": -2.0776619911193848, "logps/chosen": -109.83653259277344, "logps/rejected": -945.2737426757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6081910729408264, "rewards/margins": 8.451837539672852, "rewards/rejected": -9.060029029846191, "step": 78630 }, { "epoch": 0.94, "learning_rate": 5.219695330660912e-08, "logits/chosen": -2.8694772720336914, "logits/rejected": -2.3206372261047363, "logps/chosen": -97.08707427978516, "logps/rejected": -927.9078979492188, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.50501549243927, "rewards/margins": 8.389947891235352, "rewards/rejected": -8.894963264465332, "step": 78640 }, { "epoch": 0.94, "learning_rate": 5.198481935821009e-08, "logits/chosen": -2.89094614982605, "logits/rejected": -2.411567211151123, "logps/chosen": -121.95406341552734, "logps/rejected": -805.9667358398438, "loss": 0.0763, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7492856383323669, "rewards/margins": 6.910421848297119, "rewards/rejected": -7.659706115722656, "step": 78650 }, { "epoch": 0.94, "learning_rate": 5.17731128264623e-08, "logits/chosen": -2.9442172050476074, "logits/rejected": -2.5631296634674072, "logps/chosen": -78.95685577392578, "logps/rejected": -778.8567504882812, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.3539772629737854, "rewards/margins": 7.0607404708862305, "rewards/rejected": -7.414717674255371, "step": 78660 }, { "epoch": 0.94, "learning_rate": 5.156183374832951e-08, "logits/chosen": -2.856458902359009, "logits/rejected": -2.5173754692077637, "logps/chosen": -70.36595153808594, "logps/rejected": -857.7730712890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3067001402378082, "rewards/margins": 7.895050048828125, "rewards/rejected": -8.201749801635742, "step": 78670 }, { "epoch": 0.94, "learning_rate": 5.1350982160700555e-08, "logits/chosen": -2.8745830059051514, "logits/rejected": -2.38259220123291, "logps/chosen": -115.71412658691406, "logps/rejected": -867.9498901367188, "loss": 0.0747, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7169878482818604, "rewards/margins": 7.565064430236816, "rewards/rejected": -8.282052993774414, "step": 78680 }, { "epoch": 0.94, "learning_rate": 5.114055810038904e-08, "logits/chosen": -2.864285707473755, "logits/rejected": -2.44905948638916, "logps/chosen": -99.00138854980469, "logps/rejected": -923.0021362304688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5835093259811401, "rewards/margins": 8.254610061645508, "rewards/rejected": -8.838117599487305, "step": 78690 }, { "epoch": 0.94, "learning_rate": 5.093056160413473e-08, "logits/chosen": -2.8876256942749023, "logits/rejected": -2.4375967979431152, "logps/chosen": -88.733154296875, "logps/rejected": -877.1995849609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4339107573032379, "rewards/margins": 7.957205772399902, "rewards/rejected": -8.391117095947266, "step": 78700 }, { "epoch": 0.94, "learning_rate": 5.072099270860248e-08, "logits/chosen": -2.8420231342315674, "logits/rejected": -2.2451412677764893, "logps/chosen": -100.11011505126953, "logps/rejected": -888.0807495117188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5760161280632019, "rewards/margins": 7.918290615081787, "rewards/rejected": -8.494305610656738, "step": 78710 }, { "epoch": 0.94, "learning_rate": 5.051185145038218e-08, "logits/chosen": -2.869337797164917, "logits/rejected": -2.391695737838745, "logps/chosen": -89.3573989868164, "logps/rejected": -896.3972778320312, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.49313482642173767, "rewards/margins": 8.101771354675293, "rewards/rejected": -8.5949068069458, "step": 78720 }, { "epoch": 0.94, "learning_rate": 5.030313786598934e-08, "logits/chosen": -2.8460171222686768, "logits/rejected": -2.413731098175049, "logps/chosen": -139.92282104492188, "logps/rejected": -788.1271362304688, "loss": 0.2362, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9354081153869629, "rewards/margins": 6.57205057144165, "rewards/rejected": -7.5074591636657715, "step": 78730 }, { "epoch": 0.94, "learning_rate": 5.009485199186509e-08, "logits/chosen": -2.901669502258301, "logits/rejected": -2.3334624767303467, "logps/chosen": -102.54600524902344, "logps/rejected": -955.33740234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5214582681655884, "rewards/margins": 8.632076263427734, "rewards/rejected": -9.153534889221191, "step": 78740 }, { "epoch": 0.94, "learning_rate": 4.988699386437506e-08, "logits/chosen": -2.929715633392334, "logits/rejected": -2.1481471061706543, "logps/chosen": -132.44161987304688, "logps/rejected": -1013.0462646484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7980842590332031, "rewards/margins": 8.9262056350708, "rewards/rejected": -9.724289894104004, "step": 78750 }, { "epoch": 0.94, "learning_rate": 4.967956351981162e-08, "logits/chosen": -2.886727809906006, "logits/rejected": -2.2859623432159424, "logps/chosen": -121.9014663696289, "logps/rejected": -1020.90869140625, "loss": 0.0947, "rewards/accuracies": 1.0, "rewards/chosen": -0.7331648468971252, "rewards/margins": 9.073671340942383, "rewards/rejected": -9.806836128234863, "step": 78760 }, { "epoch": 0.94, "learning_rate": 4.9472560994390516e-08, "logits/chosen": -2.8699193000793457, "logits/rejected": -2.1306920051574707, "logps/chosen": -125.5373306274414, "logps/rejected": -938.6061401367188, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.8001738786697388, "rewards/margins": 8.19693374633789, "rewards/rejected": -8.997109413146973, "step": 78770 }, { "epoch": 0.94, "learning_rate": 4.926598632425422e-08, "logits/chosen": -2.9005961418151855, "logits/rejected": -2.6242196559906006, "logps/chosen": -67.31153106689453, "logps/rejected": -814.7184448242188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.29184311628341675, "rewards/margins": 7.475837707519531, "rewards/rejected": -7.7676801681518555, "step": 78780 }, { "epoch": 0.94, "learning_rate": 4.905983954547e-08, "logits/chosen": -2.958874464035034, "logits/rejected": -2.5031447410583496, "logps/chosen": -128.1722412109375, "logps/rejected": -889.20166015625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.8506015539169312, "rewards/margins": 7.65121603012085, "rewards/rejected": -8.50181770324707, "step": 78790 }, { "epoch": 0.94, "learning_rate": 4.8854120694030726e-08, "logits/chosen": -2.8795390129089355, "logits/rejected": -2.3178210258483887, "logps/chosen": -97.65547943115234, "logps/rejected": -974.3759765625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5020112991333008, "rewards/margins": 8.856557846069336, "rewards/rejected": -9.358569145202637, "step": 78800 }, { "epoch": 0.94, "learning_rate": 4.864882980585406e-08, "logits/chosen": -2.8607449531555176, "logits/rejected": -2.358764886856079, "logps/chosen": -117.6014633178711, "logps/rejected": -940.0859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6929891109466553, "rewards/margins": 8.323770523071289, "rewards/rejected": -9.016758918762207, "step": 78810 }, { "epoch": 0.94, "learning_rate": 4.8443966916783546e-08, "logits/chosen": -2.8259036540985107, "logits/rejected": -2.323448419570923, "logps/chosen": -87.12364959716797, "logps/rejected": -835.89306640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4594349265098572, "rewards/margins": 7.527653694152832, "rewards/rejected": -7.987088203430176, "step": 78820 }, { "epoch": 0.94, "learning_rate": 4.8239532062587526e-08, "logits/chosen": -2.903031826019287, "logits/rejected": -2.2357404232025146, "logps/chosen": -140.92425537109375, "logps/rejected": -1093.6234130859375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8673070073127747, "rewards/margins": 9.637243270874023, "rewards/rejected": -10.50455093383789, "step": 78830 }, { "epoch": 0.94, "learning_rate": 4.803552527895966e-08, "logits/chosen": -2.8522019386291504, "logits/rejected": -2.2569267749786377, "logps/chosen": -112.02017974853516, "logps/rejected": -969.1697387695312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6083752512931824, "rewards/margins": 8.68691635131836, "rewards/rejected": -9.295293807983398, "step": 78840 }, { "epoch": 0.94, "learning_rate": 4.783194660151896e-08, "logits/chosen": -2.866816997528076, "logits/rejected": -2.3789258003234863, "logps/chosen": -143.66116333007812, "logps/rejected": -933.8468017578125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.9720951914787292, "rewards/margins": 7.9687604904174805, "rewards/rejected": -8.940855026245117, "step": 78850 }, { "epoch": 0.94, "learning_rate": 4.762879606580978e-08, "logits/chosen": -2.869356155395508, "logits/rejected": -2.4921813011169434, "logps/chosen": -115.735107421875, "logps/rejected": -813.74755859375, "loss": 0.0989, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7340649366378784, "rewards/margins": 7.0239667892456055, "rewards/rejected": -7.758031368255615, "step": 78860 }, { "epoch": 0.94, "learning_rate": 4.742607370730179e-08, "logits/chosen": -2.8740391731262207, "logits/rejected": -2.283491373062134, "logps/chosen": -115.06291198730469, "logps/rejected": -827.43017578125, "loss": 0.0527, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7004370093345642, "rewards/margins": 7.191417694091797, "rewards/rejected": -7.891855716705322, "step": 78870 }, { "epoch": 0.94, "learning_rate": 4.722377956138946e-08, "logits/chosen": -2.9006125926971436, "logits/rejected": -2.417696714401245, "logps/chosen": -92.68000793457031, "logps/rejected": -938.8816528320312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.45759525895118713, "rewards/margins": 8.559375762939453, "rewards/rejected": -9.016972541809082, "step": 78880 }, { "epoch": 0.94, "learning_rate": 4.7021913663392874e-08, "logits/chosen": -2.8698623180389404, "logits/rejected": -2.2883472442626953, "logps/chosen": -97.60270690917969, "logps/rejected": -863.6463623046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5232614278793335, "rewards/margins": 7.749081611633301, "rewards/rejected": -8.272343635559082, "step": 78890 }, { "epoch": 0.94, "learning_rate": 4.682047604855772e-08, "logits/chosen": -2.9012672901153564, "logits/rejected": -2.605313777923584, "logps/chosen": -85.2635726928711, "logps/rejected": -817.4292602539062, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.43835797905921936, "rewards/margins": 7.366755485534668, "rewards/rejected": -7.805113792419434, "step": 78900 }, { "epoch": 0.94, "learning_rate": 4.6619466752053646e-08, "logits/chosen": -2.897820234298706, "logits/rejected": -2.3496243953704834, "logps/chosen": -120.2678451538086, "logps/rejected": -969.1632080078125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7222094535827637, "rewards/margins": 8.582415580749512, "rewards/rejected": -9.304624557495117, "step": 78910 }, { "epoch": 0.94, "learning_rate": 4.641888580897674e-08, "logits/chosen": -2.875120162963867, "logits/rejected": -2.3530678749084473, "logps/chosen": -105.38011169433594, "logps/rejected": -900.8621215820312, "loss": 0.103, "rewards/accuracies": 1.0, "rewards/chosen": -0.517452597618103, "rewards/margins": 8.092450141906738, "rewards/rejected": -8.609903335571289, "step": 78920 }, { "epoch": 0.94, "learning_rate": 4.621873325434789e-08, "logits/chosen": -2.838075637817383, "logits/rejected": -1.992334008216858, "logps/chosen": -137.1001434326172, "logps/rejected": -1097.140380859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7923148274421692, "rewards/margins": 9.768760681152344, "rewards/rejected": -10.561075210571289, "step": 78930 }, { "epoch": 0.94, "learning_rate": 4.6019009123113304e-08, "logits/chosen": -2.8414855003356934, "logits/rejected": -2.1987297534942627, "logps/chosen": -116.8018798828125, "logps/rejected": -944.6683349609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7132757306098938, "rewards/margins": 8.339099884033203, "rewards/rejected": -9.052375793457031, "step": 78940 }, { "epoch": 0.95, "learning_rate": 4.581971345014369e-08, "logits/chosen": -2.9408926963806152, "logits/rejected": -2.5384302139282227, "logps/chosen": -93.82604217529297, "logps/rejected": -848.8844604492188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5005384683609009, "rewards/margins": 7.604567050933838, "rewards/rejected": -8.10510540008545, "step": 78950 }, { "epoch": 0.95, "learning_rate": 4.56208462702365e-08, "logits/chosen": -2.9103732109069824, "logits/rejected": -2.417088508605957, "logps/chosen": -74.21339416503906, "logps/rejected": -817.3073120117188, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.33063435554504395, "rewards/margins": 7.474093437194824, "rewards/rejected": -7.804728031158447, "step": 78960 }, { "epoch": 0.95, "learning_rate": 4.542240761811228e-08, "logits/chosen": -2.8647301197052, "logits/rejected": -2.316567897796631, "logps/chosen": -146.50775146484375, "logps/rejected": -1034.158935546875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9347610473632812, "rewards/margins": 8.999442100524902, "rewards/rejected": -9.934202194213867, "step": 78970 }, { "epoch": 0.95, "learning_rate": 4.522439752841862e-08, "logits/chosen": -2.9137375354766846, "logits/rejected": -2.5458645820617676, "logps/chosen": -85.06612396240234, "logps/rejected": -860.7058715820312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4690406322479248, "rewards/margins": 7.776553153991699, "rewards/rejected": -8.24559497833252, "step": 78980 }, { "epoch": 0.95, "learning_rate": 4.502681603572701e-08, "logits/chosen": -2.902282476425171, "logits/rejected": -2.429483413696289, "logps/chosen": -85.00425720214844, "logps/rejected": -816.8922119140625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.41381049156188965, "rewards/margins": 7.3774213790893555, "rewards/rejected": -7.791231632232666, "step": 78990 }, { "epoch": 0.95, "learning_rate": 4.482966317453513e-08, "logits/chosen": -2.894496202468872, "logits/rejected": -2.1432132720947266, "logps/chosen": -134.43324279785156, "logps/rejected": -1002.541015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8024152517318726, "rewards/margins": 8.82948112487793, "rewards/rejected": -9.631896018981934, "step": 79000 }, { "epoch": 0.95, "learning_rate": 4.4632938979265186e-08, "logits/chosen": -2.8580710887908936, "logits/rejected": -2.125887632369995, "logps/chosen": -147.19082641601562, "logps/rejected": -978.0997314453125, "loss": 0.0965, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9558225870132446, "rewards/margins": 8.419682502746582, "rewards/rejected": -9.375504493713379, "step": 79010 }, { "epoch": 0.95, "learning_rate": 4.443664348426468e-08, "logits/chosen": -2.885648250579834, "logits/rejected": -2.4152302742004395, "logps/chosen": -85.17964935302734, "logps/rejected": -872.40380859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.41488194465637207, "rewards/margins": 7.926393032073975, "rewards/rejected": -8.341276168823242, "step": 79020 }, { "epoch": 0.95, "learning_rate": 4.4240776723806224e-08, "logits/chosen": -2.9041457176208496, "logits/rejected": -2.4228832721710205, "logps/chosen": -85.53031158447266, "logps/rejected": -860.8753051757812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4047587811946869, "rewards/margins": 7.814595699310303, "rewards/rejected": -8.219354629516602, "step": 79030 }, { "epoch": 0.95, "learning_rate": 4.4045338732087437e-08, "logits/chosen": -2.862067699432373, "logits/rejected": -2.054774284362793, "logps/chosen": -123.1823959350586, "logps/rejected": -1004.1851806640625, "loss": 0.1291, "rewards/accuracies": 1.0, "rewards/chosen": -0.693808913230896, "rewards/margins": 8.950414657592773, "rewards/rejected": -9.644224166870117, "step": 79040 }, { "epoch": 0.95, "learning_rate": 4.385032954323132e-08, "logits/chosen": -2.852733612060547, "logits/rejected": -2.3169264793395996, "logps/chosen": -109.72566986083984, "logps/rejected": -918.3564453125, "loss": 0.1223, "rewards/accuracies": 1.0, "rewards/chosen": -0.5971958637237549, "rewards/margins": 8.197736740112305, "rewards/rejected": -8.794933319091797, "step": 79050 }, { "epoch": 0.95, "learning_rate": 4.36557491912859e-08, "logits/chosen": -2.8831520080566406, "logits/rejected": -2.4326086044311523, "logps/chosen": -98.83661651611328, "logps/rejected": -952.8724365234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5409498810768127, "rewards/margins": 8.599207878112793, "rewards/rejected": -9.140157699584961, "step": 79060 }, { "epoch": 0.95, "learning_rate": 4.346159771022457e-08, "logits/chosen": -2.892329216003418, "logits/rejected": -2.492297649383545, "logps/chosen": -83.63463592529297, "logps/rejected": -811.0535278320312, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.40133315324783325, "rewards/margins": 7.3362860679626465, "rewards/rejected": -7.737618923187256, "step": 79070 }, { "epoch": 0.95, "learning_rate": 4.326787513394548e-08, "logits/chosen": -2.8817648887634277, "logits/rejected": -2.1801846027374268, "logps/chosen": -137.2412872314453, "logps/rejected": -1072.468017578125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.784748375415802, "rewards/margins": 9.525388717651367, "rewards/rejected": -10.310136795043945, "step": 79080 }, { "epoch": 0.95, "learning_rate": 4.307458149627214e-08, "logits/chosen": -2.897735118865967, "logits/rejected": -2.5829532146453857, "logps/chosen": -72.04600524902344, "logps/rejected": -800.0185546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.29284924268722534, "rewards/margins": 7.307662010192871, "rewards/rejected": -7.600511074066162, "step": 79090 }, { "epoch": 0.95, "learning_rate": 4.28817168309531e-08, "logits/chosen": -2.90010142326355, "logits/rejected": -2.3650431632995605, "logps/chosen": -99.01468658447266, "logps/rejected": -935.3043212890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5246062278747559, "rewards/margins": 8.447504043579102, "rewards/rejected": -8.9721097946167, "step": 79100 }, { "epoch": 0.95, "learning_rate": 4.2689281171661986e-08, "logits/chosen": -2.8747670650482178, "logits/rejected": -2.213331937789917, "logps/chosen": -109.0782241821289, "logps/rejected": -982.6466064453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5770339965820312, "rewards/margins": 8.857025146484375, "rewards/rejected": -9.434060096740723, "step": 79110 }, { "epoch": 0.95, "learning_rate": 4.249727455199748e-08, "logits/chosen": -2.9265122413635254, "logits/rejected": -2.4041390419006348, "logps/chosen": -99.39234161376953, "logps/rejected": -909.2935791015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5643433332443237, "rewards/margins": 8.144845962524414, "rewards/rejected": -8.709188461303711, "step": 79120 }, { "epoch": 0.95, "learning_rate": 4.2305697005483315e-08, "logits/chosen": -2.8718037605285645, "logits/rejected": -2.330712080001831, "logps/chosen": -101.26319885253906, "logps/rejected": -884.5731201171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5202781558036804, "rewards/margins": 7.940208435058594, "rewards/rejected": -8.46048641204834, "step": 79130 }, { "epoch": 0.95, "learning_rate": 4.211454856556857e-08, "logits/chosen": -2.937126636505127, "logits/rejected": -2.5328660011291504, "logps/chosen": -90.39457702636719, "logps/rejected": -919.3239135742188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4717448651790619, "rewards/margins": 8.356664657592773, "rewards/rejected": -8.828410148620605, "step": 79140 }, { "epoch": 0.95, "learning_rate": 4.192382926562738e-08, "logits/chosen": -2.876396656036377, "logits/rejected": -2.1959891319274902, "logps/chosen": -110.17497253417969, "logps/rejected": -1081.8575439453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6249162554740906, "rewards/margins": 9.800638198852539, "rewards/rejected": -10.425554275512695, "step": 79150 }, { "epoch": 0.95, "learning_rate": 4.173353913895839e-08, "logits/chosen": -2.8836300373077393, "logits/rejected": -2.4593467712402344, "logps/chosen": -95.48358154296875, "logps/rejected": -872.8821411132812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5019861459732056, "rewards/margins": 7.84139347076416, "rewards/rejected": -8.343379974365234, "step": 79160 }, { "epoch": 0.95, "learning_rate": 4.154367821878585e-08, "logits/chosen": -2.908720016479492, "logits/rejected": -2.2552707195281982, "logps/chosen": -102.29682922363281, "logps/rejected": -895.6964111328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5542095899581909, "rewards/margins": 8.025147438049316, "rewards/rejected": -8.579357147216797, "step": 79170 }, { "epoch": 0.95, "learning_rate": 4.135424653825909e-08, "logits/chosen": -2.908175468444824, "logits/rejected": -2.481548547744751, "logps/chosen": -89.38863372802734, "logps/rejected": -852.8189697265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.46121373772621155, "rewards/margins": 7.68216609954834, "rewards/rejected": -8.143379211425781, "step": 79180 }, { "epoch": 0.95, "learning_rate": 4.1165244130452474e-08, "logits/chosen": -2.872077703475952, "logits/rejected": -2.2440638542175293, "logps/chosen": -106.44708251953125, "logps/rejected": -1021.7952880859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5758927464485168, "rewards/margins": 9.239304542541504, "rewards/rejected": -9.815197944641113, "step": 79190 }, { "epoch": 0.95, "learning_rate": 4.097667102836489e-08, "logits/chosen": -2.873314380645752, "logits/rejected": -2.188356876373291, "logps/chosen": -126.28360748291016, "logps/rejected": -893.5609130859375, "loss": 0.1047, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7943108081817627, "rewards/margins": 7.754185676574707, "rewards/rejected": -8.548498153686523, "step": 79200 }, { "epoch": 0.95, "learning_rate": 4.078852726492083e-08, "logits/chosen": -2.8614296913146973, "logits/rejected": -2.2700371742248535, "logps/chosen": -152.18743896484375, "logps/rejected": -898.3382568359375, "loss": 0.2308, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0683554410934448, "rewards/margins": 7.511805057525635, "rewards/rejected": -8.580161094665527, "step": 79210 }, { "epoch": 0.95, "learning_rate": 4.060081287297013e-08, "logits/chosen": -2.890361785888672, "logits/rejected": -2.227475643157959, "logps/chosen": -110.6890640258789, "logps/rejected": -1031.0455322265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5930946469306946, "rewards/margins": 9.31828498840332, "rewards/rejected": -9.911377906799316, "step": 79220 }, { "epoch": 0.95, "learning_rate": 4.04135278852863e-08, "logits/chosen": -2.8359744548797607, "logits/rejected": -2.352619171142578, "logps/chosen": -99.2298812866211, "logps/rejected": -860.8045043945312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5221333503723145, "rewards/margins": 7.681765556335449, "rewards/rejected": -8.203898429870605, "step": 79230 }, { "epoch": 0.95, "learning_rate": 4.022667233456956e-08, "logits/chosen": -2.8354434967041016, "logits/rejected": -2.0652756690979004, "logps/chosen": -111.3585205078125, "logps/rejected": -1040.3577880859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6209567785263062, "rewards/margins": 9.393030166625977, "rewards/rejected": -10.01398754119873, "step": 79240 }, { "epoch": 0.95, "learning_rate": 4.00402462534441e-08, "logits/chosen": -2.8716320991516113, "logits/rejected": -2.3404381275177, "logps/chosen": -131.21728515625, "logps/rejected": -903.4112548828125, "loss": 0.1057, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.855697751045227, "rewards/margins": 7.791309356689453, "rewards/rejected": -8.647007942199707, "step": 79250 }, { "epoch": 0.95, "learning_rate": 3.9854249674459445e-08, "logits/chosen": -2.8861632347106934, "logits/rejected": -2.6427266597747803, "logps/chosen": -92.96241760253906, "logps/rejected": -771.5667114257812, "loss": 0.1473, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5837297439575195, "rewards/margins": 6.765052795410156, "rewards/rejected": -7.348782539367676, "step": 79260 }, { "epoch": 0.95, "learning_rate": 3.9668682630089883e-08, "logits/chosen": -2.900481939315796, "logits/rejected": -2.3348565101623535, "logps/chosen": -106.65129089355469, "logps/rejected": -969.3914794921875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5996063947677612, "rewards/margins": 8.698251724243164, "rewards/rejected": -9.297857284545898, "step": 79270 }, { "epoch": 0.95, "learning_rate": 3.9483545152735335e-08, "logits/chosen": -2.8720602989196777, "logits/rejected": -2.2619035243988037, "logps/chosen": -114.80142974853516, "logps/rejected": -1034.7672119140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.668980062007904, "rewards/margins": 9.286623001098633, "rewards/rejected": -9.955602645874023, "step": 79280 }, { "epoch": 0.95, "learning_rate": 3.929883727471967e-08, "logits/chosen": -2.8778178691864014, "logits/rejected": -2.352013111114502, "logps/chosen": -98.7049789428711, "logps/rejected": -904.6759643554688, "loss": 0.122, "rewards/accuracies": 1.0, "rewards/chosen": -0.5000125169754028, "rewards/margins": 8.161738395690918, "rewards/rejected": -8.661750793457031, "step": 79290 }, { "epoch": 0.95, "learning_rate": 3.911455902829292e-08, "logits/chosen": -2.8813061714172363, "logits/rejected": -2.4672961235046387, "logps/chosen": -81.57379150390625, "logps/rejected": -781.7171630859375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.32746824622154236, "rewards/margins": 7.1286115646362305, "rewards/rejected": -7.456079959869385, "step": 79300 }, { "epoch": 0.95, "learning_rate": 3.893071044562907e-08, "logits/chosen": -2.893594264984131, "logits/rejected": -2.2911269664764404, "logps/chosen": -108.57010650634766, "logps/rejected": -910.2750854492188, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6003615260124207, "rewards/margins": 8.118171691894531, "rewards/rejected": -8.718533515930176, "step": 79310 }, { "epoch": 0.95, "learning_rate": 3.874729155882828e-08, "logits/chosen": -2.8758010864257812, "logits/rejected": -2.2202281951904297, "logps/chosen": -116.37440490722656, "logps/rejected": -978.5208129882812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6489182114601135, "rewards/margins": 8.73965072631836, "rewards/rejected": -9.388569831848145, "step": 79320 }, { "epoch": 0.95, "learning_rate": 3.8564302399914097e-08, "logits/chosen": -2.898944139480591, "logits/rejected": -2.6469969749450684, "logps/chosen": -63.33280563354492, "logps/rejected": -787.9423828125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.25750869512557983, "rewards/margins": 7.264431953430176, "rewards/rejected": -7.5219407081604, "step": 79330 }, { "epoch": 0.95, "learning_rate": 3.8381743000836524e-08, "logits/chosen": -2.90592622756958, "logits/rejected": -2.247472047805786, "logps/chosen": -107.049560546875, "logps/rejected": -994.1046142578125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5309659242630005, "rewards/margins": 9.015729904174805, "rewards/rejected": -9.546695709228516, "step": 79340 }, { "epoch": 0.95, "learning_rate": 3.819961339347006e-08, "logits/chosen": -2.8871161937713623, "logits/rejected": -2.320842742919922, "logps/chosen": -99.99281311035156, "logps/rejected": -960.32177734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5318637490272522, "rewards/margins": 8.694040298461914, "rewards/rejected": -9.22590446472168, "step": 79350 }, { "epoch": 0.95, "learning_rate": 3.8017913609613436e-08, "logits/chosen": -2.866281747817993, "logits/rejected": -2.2519032955169678, "logps/chosen": -117.950927734375, "logps/rejected": -892.8004760742188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6886706352233887, "rewards/margins": 7.863066673278809, "rewards/rejected": -8.551736831665039, "step": 79360 }, { "epoch": 0.95, "learning_rate": 3.7836643680991546e-08, "logits/chosen": -2.8526833057403564, "logits/rejected": -2.4158875942230225, "logps/chosen": -107.80134582519531, "logps/rejected": -844.1951293945312, "loss": 0.1157, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6358758807182312, "rewards/margins": 7.422624111175537, "rewards/rejected": -8.058499336242676, "step": 79370 }, { "epoch": 0.95, "learning_rate": 3.7655803639252976e-08, "logits/chosen": -2.8413403034210205, "logits/rejected": -2.058690309524536, "logps/chosen": -123.90201568603516, "logps/rejected": -1051.8658447265625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.7164983749389648, "rewards/margins": 9.395426750183105, "rewards/rejected": -10.11192512512207, "step": 79380 }, { "epoch": 0.95, "learning_rate": 3.7475393515972184e-08, "logits/chosen": -2.929037094116211, "logits/rejected": -2.4033706188201904, "logps/chosen": -90.38356018066406, "logps/rejected": -934.7392578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4444397985935211, "rewards/margins": 8.516433715820312, "rewards/rejected": -8.960872650146484, "step": 79390 }, { "epoch": 0.95, "learning_rate": 3.729541334264841e-08, "logits/chosen": -2.874438524246216, "logits/rejected": -2.220081090927124, "logps/chosen": -102.50860595703125, "logps/rejected": -999.14208984375, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.5272909998893738, "rewards/margins": 9.059938430786133, "rewards/rejected": -9.587228775024414, "step": 79400 }, { "epoch": 0.95, "learning_rate": 3.71158631507057e-08, "logits/chosen": -2.9059274196624756, "logits/rejected": -2.398918867111206, "logps/chosen": -91.18583679199219, "logps/rejected": -893.7371215820312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4701566696166992, "rewards/margins": 8.086143493652344, "rewards/rejected": -8.556299209594727, "step": 79410 }, { "epoch": 0.95, "learning_rate": 3.6936742971493135e-08, "logits/chosen": -2.8096306324005127, "logits/rejected": -2.190258502960205, "logps/chosen": -112.93756103515625, "logps/rejected": -908.6505737304688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5848493576049805, "rewards/margins": 8.113204956054688, "rewards/rejected": -8.698054313659668, "step": 79420 }, { "epoch": 0.95, "learning_rate": 3.6758052836284034e-08, "logits/chosen": -2.8477301597595215, "logits/rejected": -2.1803932189941406, "logps/chosen": -137.75970458984375, "logps/rejected": -969.84912109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8264917135238647, "rewards/margins": 8.470069885253906, "rewards/rejected": -9.296561241149902, "step": 79430 }, { "epoch": 0.95, "learning_rate": 3.65797927762776e-08, "logits/chosen": -2.852609157562256, "logits/rejected": -2.175776243209839, "logps/chosen": -149.4417266845703, "logps/rejected": -949.85546875, "loss": 0.2138, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9694867134094238, "rewards/margins": 8.140348434448242, "rewards/rejected": -9.109837532043457, "step": 79440 }, { "epoch": 0.95, "learning_rate": 3.640196282259728e-08, "logits/chosen": -2.91345477104187, "logits/rejected": -2.449873447418213, "logps/chosen": -98.8622055053711, "logps/rejected": -944.3128662109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5636979341506958, "rewards/margins": 8.48802661895752, "rewards/rejected": -9.051724433898926, "step": 79450 }, { "epoch": 0.95, "learning_rate": 3.622456300629212e-08, "logits/chosen": -2.8883986473083496, "logits/rejected": -2.2309422492980957, "logps/chosen": -111.37629699707031, "logps/rejected": -936.8013916015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5851387977600098, "rewards/margins": 8.390523910522461, "rewards/rejected": -8.975664138793945, "step": 79460 }, { "epoch": 0.95, "learning_rate": 3.604759335833541e-08, "logits/chosen": -2.868461847305298, "logits/rejected": -2.408364772796631, "logps/chosen": -82.89522552490234, "logps/rejected": -870.1198120117188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.412830650806427, "rewards/margins": 7.925169467926025, "rewards/rejected": -8.338001251220703, "step": 79470 }, { "epoch": 0.95, "learning_rate": 3.5871053909625755e-08, "logits/chosen": -2.9047439098358154, "logits/rejected": -2.1697468757629395, "logps/chosen": -116.77862548828125, "logps/rejected": -1002.2957153320312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6708012819290161, "rewards/margins": 8.952089309692383, "rewards/rejected": -9.622892379760742, "step": 79480 }, { "epoch": 0.95, "learning_rate": 3.5694944690986276e-08, "logits/chosen": -2.8699004650115967, "logits/rejected": -2.175856351852417, "logps/chosen": -112.12525939941406, "logps/rejected": -1015.4671020507812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6334246397018433, "rewards/margins": 9.10639762878418, "rewards/rejected": -9.739822387695312, "step": 79490 }, { "epoch": 0.95, "learning_rate": 3.551926573316461e-08, "logits/chosen": -2.9196622371673584, "logits/rejected": -2.6336333751678467, "logps/chosen": -91.12938690185547, "logps/rejected": -752.741943359375, "loss": 0.0331, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4770030975341797, "rewards/margins": 6.678131103515625, "rewards/rejected": -7.1551337242126465, "step": 79500 }, { "epoch": 0.95, "learning_rate": 3.534401706683454e-08, "logits/chosen": -2.9343600273132324, "logits/rejected": -2.5091490745544434, "logps/chosen": -82.69895935058594, "logps/rejected": -819.2315673828125, "loss": 0.1158, "rewards/accuracies": 1.0, "rewards/chosen": -0.3888363540172577, "rewards/margins": 7.423787593841553, "rewards/rejected": -7.812624454498291, "step": 79510 }, { "epoch": 0.95, "learning_rate": 3.516919872259411e-08, "logits/chosen": -2.8711423873901367, "logits/rejected": -2.476871967315674, "logps/chosen": -87.52565002441406, "logps/rejected": -832.0283203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.46897751092910767, "rewards/margins": 7.475825309753418, "rewards/rejected": -7.944802284240723, "step": 79520 }, { "epoch": 0.95, "learning_rate": 3.4994810730965553e-08, "logits/chosen": -2.870291233062744, "logits/rejected": -2.3835291862487793, "logps/chosen": -97.89122772216797, "logps/rejected": -854.5574951171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5261183381080627, "rewards/margins": 7.636868476867676, "rewards/rejected": -8.162986755371094, "step": 79530 }, { "epoch": 0.95, "learning_rate": 3.482085312239702e-08, "logits/chosen": -2.8729870319366455, "logits/rejected": -2.369835138320923, "logps/chosen": -115.13822937011719, "logps/rejected": -873.8004150390625, "loss": 0.0572, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6460627913475037, "rewards/margins": 7.718161106109619, "rewards/rejected": -8.364224433898926, "step": 79540 }, { "epoch": 0.95, "learning_rate": 3.464732592726061e-08, "logits/chosen": -2.9031169414520264, "logits/rejected": -2.5589230060577393, "logps/chosen": -73.99592590332031, "logps/rejected": -763.359130859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3357470631599426, "rewards/margins": 6.9236955642700195, "rewards/rejected": -7.259442329406738, "step": 79550 }, { "epoch": 0.95, "learning_rate": 3.4474229175854024e-08, "logits/chosen": -2.890089273452759, "logits/rejected": -2.350215435028076, "logps/chosen": -123.16302490234375, "logps/rejected": -905.5885009765625, "loss": 0.0764, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7494732737541199, "rewards/margins": 7.913399696350098, "rewards/rejected": -8.662873268127441, "step": 79560 }, { "epoch": 0.95, "learning_rate": 3.43015628983992e-08, "logits/chosen": -2.879848003387451, "logits/rejected": -2.224778175354004, "logps/chosen": -132.67161560058594, "logps/rejected": -968.9255981445312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7621302604675293, "rewards/margins": 8.53363037109375, "rewards/rejected": -9.295761108398438, "step": 79570 }, { "epoch": 0.95, "learning_rate": 3.4129327125043135e-08, "logits/chosen": -2.8967044353485107, "logits/rejected": -2.4257264137268066, "logps/chosen": -95.80606079101562, "logps/rejected": -855.7438354492188, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.5332919955253601, "rewards/margins": 7.650362968444824, "rewards/rejected": -8.183655738830566, "step": 79580 }, { "epoch": 0.95, "learning_rate": 3.395752188585816e-08, "logits/chosen": -2.89739990234375, "logits/rejected": -2.450500726699829, "logps/chosen": -105.29681396484375, "logps/rejected": -863.9889526367188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6109968423843384, "rewards/margins": 7.638882637023926, "rewards/rejected": -8.249879837036133, "step": 79590 }, { "epoch": 0.95, "learning_rate": 3.3786147210840555e-08, "logits/chosen": -2.910292863845825, "logits/rejected": -2.3899121284484863, "logps/chosen": -118.7308349609375, "logps/rejected": -1006.9149169921875, "loss": 0.1599, "rewards/accuracies": 1.0, "rewards/chosen": -0.6509145498275757, "rewards/margins": 9.023719787597656, "rewards/rejected": -9.674633979797363, "step": 79600 }, { "epoch": 0.95, "learning_rate": 3.361520312991223e-08, "logits/chosen": -2.884937286376953, "logits/rejected": -2.3221356868743896, "logps/chosen": -103.24530029296875, "logps/rejected": -867.6436767578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5608066320419312, "rewards/margins": 7.724236965179443, "rewards/rejected": -8.285043716430664, "step": 79610 }, { "epoch": 0.95, "learning_rate": 3.3444689672919306e-08, "logits/chosen": -2.9117119312286377, "logits/rejected": -2.1694083213806152, "logps/chosen": -130.53457641601562, "logps/rejected": -1001.3499755859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6836045980453491, "rewards/margins": 8.923813819885254, "rewards/rejected": -9.607418060302734, "step": 79620 }, { "epoch": 0.95, "learning_rate": 3.3274606869632685e-08, "logits/chosen": -2.862241744995117, "logits/rejected": -2.23115873336792, "logps/chosen": -141.9705047607422, "logps/rejected": -911.3560791015625, "loss": 0.1126, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8719167709350586, "rewards/margins": 7.849087715148926, "rewards/rejected": -8.7210054397583, "step": 79630 }, { "epoch": 0.95, "learning_rate": 3.3104954749748893e-08, "logits/chosen": -2.874798536300659, "logits/rejected": -2.3787598609924316, "logps/chosen": -110.71229553222656, "logps/rejected": -933.27001953125, "loss": 0.1016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6392992734909058, "rewards/margins": 8.298093795776367, "rewards/rejected": -8.937393188476562, "step": 79640 }, { "epoch": 0.95, "learning_rate": 3.293573334288869e-08, "logits/chosen": -2.870851755142212, "logits/rejected": -2.2941722869873047, "logps/chosen": -97.5445556640625, "logps/rejected": -894.6702880859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4677616059780121, "rewards/margins": 8.099930763244629, "rewards/rejected": -8.567692756652832, "step": 79650 }, { "epoch": 0.95, "learning_rate": 3.276694267859704e-08, "logits/chosen": -2.927504777908325, "logits/rejected": -2.1952903270721436, "logps/chosen": -116.29960632324219, "logps/rejected": -1013.2423706054688, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6296958923339844, "rewards/margins": 9.089916229248047, "rewards/rejected": -9.719612121582031, "step": 79660 }, { "epoch": 0.95, "learning_rate": 3.259858278634509e-08, "logits/chosen": -2.8911209106445312, "logits/rejected": -2.3083293437957764, "logps/chosen": -115.6051025390625, "logps/rejected": -981.3508911132812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6297520399093628, "rewards/margins": 8.797643661499023, "rewards/rejected": -9.427395820617676, "step": 79670 }, { "epoch": 0.95, "learning_rate": 3.243065369552767e-08, "logits/chosen": -2.8896713256835938, "logits/rejected": -2.2956717014312744, "logps/chosen": -121.66792297363281, "logps/rejected": -979.1358642578125, "loss": 0.135, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7337490320205688, "rewards/margins": 8.669960975646973, "rewards/rejected": -9.40371036529541, "step": 79680 }, { "epoch": 0.95, "learning_rate": 3.2263155435464656e-08, "logits/chosen": -2.855897903442383, "logits/rejected": -2.26503849029541, "logps/chosen": -106.47285461425781, "logps/rejected": -969.2880859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6186816096305847, "rewards/margins": 8.681744575500488, "rewards/rejected": -9.300426483154297, "step": 79690 }, { "epoch": 0.95, "learning_rate": 3.209608803540071e-08, "logits/chosen": -2.8815388679504395, "logits/rejected": -2.3406808376312256, "logps/chosen": -104.74562072753906, "logps/rejected": -936.8762817382812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5824167132377625, "rewards/margins": 8.38524341583252, "rewards/rejected": -8.967658996582031, "step": 79700 }, { "epoch": 0.95, "learning_rate": 3.192945152450555e-08, "logits/chosen": -2.931640148162842, "logits/rejected": -2.343400478363037, "logps/chosen": -120.16109466552734, "logps/rejected": -941.8538818359375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.6310214996337891, "rewards/margins": 8.374807357788086, "rewards/rejected": -9.005826950073242, "step": 79710 }, { "epoch": 0.95, "learning_rate": 3.176324593187341e-08, "logits/chosen": -2.875127077102661, "logits/rejected": -2.1597564220428467, "logps/chosen": -147.83651733398438, "logps/rejected": -1016.6209716796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.922995924949646, "rewards/margins": 8.834094047546387, "rewards/rejected": -9.75709056854248, "step": 79720 }, { "epoch": 0.95, "learning_rate": 3.15974712865233e-08, "logits/chosen": -2.8775386810302734, "logits/rejected": -2.192030906677246, "logps/chosen": -115.83961486816406, "logps/rejected": -1034.099609375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6621607542037964, "rewards/margins": 9.28797435760498, "rewards/rejected": -9.950135231018066, "step": 79730 }, { "epoch": 0.95, "learning_rate": 3.1432127617399275e-08, "logits/chosen": -2.877474308013916, "logits/rejected": -2.296738386154175, "logps/chosen": -114.9861068725586, "logps/rejected": -884.2391357421875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.647149920463562, "rewards/margins": 7.806179046630859, "rewards/rejected": -8.453328132629395, "step": 79740 }, { "epoch": 0.95, "learning_rate": 3.126721495336937e-08, "logits/chosen": -2.8829007148742676, "logits/rejected": -2.334717273712158, "logps/chosen": -110.28865051269531, "logps/rejected": -1043.730712890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6001967191696167, "rewards/margins": 9.4307222366333, "rewards/rejected": -10.030920028686523, "step": 79750 }, { "epoch": 0.95, "learning_rate": 3.110273332322722e-08, "logits/chosen": -2.8923816680908203, "logits/rejected": -2.5410609245300293, "logps/chosen": -70.81254577636719, "logps/rejected": -827.0452270507812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.330081045627594, "rewards/margins": 7.562033176422119, "rewards/rejected": -7.892115116119385, "step": 79760 }, { "epoch": 0.95, "learning_rate": 3.0938682755690676e-08, "logits/chosen": -2.895456075668335, "logits/rejected": -2.296035051345825, "logps/chosen": -86.48486328125, "logps/rejected": -863.5567626953125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.4214855134487152, "rewards/margins": 7.8404107093811035, "rewards/rejected": -8.261896133422852, "step": 79770 }, { "epoch": 0.95, "learning_rate": 3.077506327940239e-08, "logits/chosen": -2.8882057666778564, "logits/rejected": -2.294949769973755, "logps/chosen": -98.197021484375, "logps/rejected": -910.6936645507812, "loss": 0.1632, "rewards/accuracies": 1.0, "rewards/chosen": -0.5214394330978394, "rewards/margins": 8.209115982055664, "rewards/rejected": -8.730554580688477, "step": 79780 }, { "epoch": 0.96, "learning_rate": 3.061187492293033e-08, "logits/chosen": -2.8878703117370605, "logits/rejected": -2.459651231765747, "logps/chosen": -121.77351379394531, "logps/rejected": -890.8216552734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7951276898384094, "rewards/margins": 7.712960243225098, "rewards/rejected": -8.508089065551758, "step": 79790 }, { "epoch": 0.96, "learning_rate": 3.044911771476672e-08, "logits/chosen": -2.9094367027282715, "logits/rejected": -2.280759334564209, "logps/chosen": -105.04978942871094, "logps/rejected": -1035.134765625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5326849818229675, "rewards/margins": 9.4249849319458, "rewards/rejected": -9.957670211791992, "step": 79800 }, { "epoch": 0.96, "learning_rate": 3.028679168332771e-08, "logits/chosen": -2.8841891288757324, "logits/rejected": -2.4383721351623535, "logps/chosen": -121.0959243774414, "logps/rejected": -800.688720703125, "loss": 0.2811, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7896953225135803, "rewards/margins": 6.832337856292725, "rewards/rejected": -7.622033596038818, "step": 79810 }, { "epoch": 0.96, "learning_rate": 3.012489685695591e-08, "logits/chosen": -2.906794309616089, "logits/rejected": -2.5541484355926514, "logps/chosen": -84.59525299072266, "logps/rejected": -859.3956909179688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.41148558259010315, "rewards/margins": 7.809584617614746, "rewards/rejected": -8.221070289611816, "step": 79820 }, { "epoch": 0.96, "learning_rate": 2.996343326391732e-08, "logits/chosen": -2.897226095199585, "logits/rejected": -2.415520191192627, "logps/chosen": -96.28824615478516, "logps/rejected": -867.6096801757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5053143501281738, "rewards/margins": 7.783830165863037, "rewards/rejected": -8.289144515991211, "step": 79830 }, { "epoch": 0.96, "learning_rate": 2.980240093240272e-08, "logits/chosen": -2.9177093505859375, "logits/rejected": -2.408738136291504, "logps/chosen": -104.27079772949219, "logps/rejected": -922.7306518554688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5141127705574036, "rewards/margins": 8.320686340332031, "rewards/rejected": -8.834798812866211, "step": 79840 }, { "epoch": 0.96, "learning_rate": 2.964179989052851e-08, "logits/chosen": -2.8643200397491455, "logits/rejected": -2.244818687438965, "logps/chosen": -121.0573501586914, "logps/rejected": -939.66455078125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.7210680246353149, "rewards/margins": 8.29588794708252, "rewards/rejected": -9.01695442199707, "step": 79850 }, { "epoch": 0.96, "learning_rate": 2.948163016633504e-08, "logits/chosen": -2.8959414958953857, "logits/rejected": -2.531017780303955, "logps/chosen": -76.45094299316406, "logps/rejected": -825.7132568359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.39987340569496155, "rewards/margins": 7.493348121643066, "rewards/rejected": -7.893221855163574, "step": 79860 }, { "epoch": 0.96, "learning_rate": 2.932189178778716e-08, "logits/chosen": -2.8701541423797607, "logits/rejected": -2.151796340942383, "logps/chosen": -136.1808319091797, "logps/rejected": -1013.6022338867188, "loss": 0.0775, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8188652992248535, "rewards/margins": 8.914502143859863, "rewards/rejected": -9.733366012573242, "step": 79870 }, { "epoch": 0.96, "learning_rate": 2.916258478277506e-08, "logits/chosen": -2.870138645172119, "logits/rejected": -2.489659547805786, "logps/chosen": -90.27375030517578, "logps/rejected": -911.3250732421875, "loss": 0.1007, "rewards/accuracies": 1.0, "rewards/chosen": -0.49547433853149414, "rewards/margins": 8.248929977416992, "rewards/rejected": -8.744405746459961, "step": 79880 }, { "epoch": 0.96, "learning_rate": 2.900370917911316e-08, "logits/chosen": -2.8880763053894043, "logits/rejected": -2.6744399070739746, "logps/chosen": -82.8347396850586, "logps/rejected": -764.4212036132812, "loss": 0.0989, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4657810628414154, "rewards/margins": 6.800956726074219, "rewards/rejected": -7.266737461090088, "step": 79890 }, { "epoch": 0.96, "learning_rate": 2.8845265004540655e-08, "logits/chosen": -2.891852855682373, "logits/rejected": -2.3473408222198486, "logps/chosen": -90.26223754882812, "logps/rejected": -904.58544921875, "loss": 0.102, "rewards/accuracies": 1.0, "rewards/chosen": -0.4381721615791321, "rewards/margins": 8.202312469482422, "rewards/rejected": -8.640483856201172, "step": 79900 }, { "epoch": 0.96, "learning_rate": 2.8687252286721534e-08, "logits/chosen": -2.901637554168701, "logits/rejected": -2.244530439376831, "logps/chosen": -129.72488403320312, "logps/rejected": -1012.943359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.782647967338562, "rewards/margins": 8.934423446655273, "rewards/rejected": -9.717073440551758, "step": 79910 }, { "epoch": 0.96, "learning_rate": 2.8529671053244556e-08, "logits/chosen": -2.8723912239074707, "logits/rejected": -2.2437744140625, "logps/chosen": -136.51223754882812, "logps/rejected": -987.0574951171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8362824320793152, "rewards/margins": 8.63315200805664, "rewards/rejected": -9.46943473815918, "step": 79920 }, { "epoch": 0.96, "learning_rate": 2.8372521331622714e-08, "logits/chosen": -2.8484013080596924, "logits/rejected": -2.341550827026367, "logps/chosen": -105.10540771484375, "logps/rejected": -831.3464965820312, "loss": 0.0548, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6282868981361389, "rewards/margins": 7.303017616271973, "rewards/rejected": -7.931303977966309, "step": 79930 }, { "epoch": 0.96, "learning_rate": 2.8215803149294063e-08, "logits/chosen": -2.9087865352630615, "logits/rejected": -2.3469793796539307, "logps/chosen": -115.94991302490234, "logps/rejected": -914.7960815429688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6014866232872009, "rewards/margins": 8.160810470581055, "rewards/rejected": -8.762297630310059, "step": 79940 }, { "epoch": 0.96, "learning_rate": 2.805951653362088e-08, "logits/chosen": -2.9418559074401855, "logits/rejected": -2.4763169288635254, "logps/chosen": -136.8557891845703, "logps/rejected": -885.1340942382812, "loss": 0.184, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8823131322860718, "rewards/margins": 7.57061243057251, "rewards/rejected": -8.452925682067871, "step": 79950 }, { "epoch": 0.96, "learning_rate": 2.7903661511890777e-08, "logits/chosen": -2.825087308883667, "logits/rejected": -2.0621747970581055, "logps/chosen": -128.24374389648438, "logps/rejected": -1022.41650390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7434380650520325, "rewards/margins": 9.077024459838867, "rewards/rejected": -9.820463180541992, "step": 79960 }, { "epoch": 0.96, "learning_rate": 2.7748238111315327e-08, "logits/chosen": -2.902613401412964, "logits/rejected": -2.2093770503997803, "logps/chosen": -128.37380981445312, "logps/rejected": -936.79443359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7526618838310242, "rewards/margins": 8.217938423156738, "rewards/rejected": -8.970599174499512, "step": 79970 }, { "epoch": 0.96, "learning_rate": 2.7593246359031157e-08, "logits/chosen": -2.8676156997680664, "logits/rejected": -2.262479782104492, "logps/chosen": -121.57706451416016, "logps/rejected": -1006.2308349609375, "loss": 0.1057, "rewards/accuracies": 1.0, "rewards/chosen": -0.7296522259712219, "rewards/margins": 8.929265022277832, "rewards/rejected": -9.658917427062988, "step": 79980 }, { "epoch": 0.96, "learning_rate": 2.743868628209939e-08, "logits/chosen": -2.8834519386291504, "logits/rejected": -2.3922221660614014, "logps/chosen": -100.35941314697266, "logps/rejected": -957.6554565429688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5598341822624207, "rewards/margins": 8.620735168457031, "rewards/rejected": -9.18056869506836, "step": 79990 }, { "epoch": 0.96, "learning_rate": 2.728455790750567e-08, "logits/chosen": -2.888089179992676, "logits/rejected": -2.3747706413269043, "logps/chosen": -137.28860473632812, "logps/rejected": -932.28271484375, "loss": 0.1478, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8664326667785645, "rewards/margins": 8.058194160461426, "rewards/rejected": -8.924627304077148, "step": 80000 }, { "epoch": 0.96, "learning_rate": 2.713086126216069e-08, "logits/chosen": -2.902338743209839, "logits/rejected": -2.5284831523895264, "logps/chosen": -69.59764862060547, "logps/rejected": -850.0345458984375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.28482526540756226, "rewards/margins": 7.845715522766113, "rewards/rejected": -8.13054084777832, "step": 80010 }, { "epoch": 0.96, "learning_rate": 2.6977596372899094e-08, "logits/chosen": -2.853024959564209, "logits/rejected": -2.2465996742248535, "logps/chosen": -123.50069427490234, "logps/rejected": -1013.0125122070312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7686467170715332, "rewards/margins": 8.962121963500977, "rewards/rejected": -9.730769157409668, "step": 80020 }, { "epoch": 0.96, "learning_rate": 2.6824763266480593e-08, "logits/chosen": -2.916808605194092, "logits/rejected": -2.187648057937622, "logps/chosen": -115.4177474975586, "logps/rejected": -929.07861328125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.6099276542663574, "rewards/margins": 8.279176712036133, "rewards/rejected": -8.889104843139648, "step": 80030 }, { "epoch": 0.96, "learning_rate": 2.6672361969589388e-08, "logits/chosen": -2.8662326335906982, "logits/rejected": -2.3066749572753906, "logps/chosen": -90.59443664550781, "logps/rejected": -910.5451049804688, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.4472530782222748, "rewards/margins": 8.265399932861328, "rewards/rejected": -8.712652206420898, "step": 80040 }, { "epoch": 0.96, "learning_rate": 2.652039250883476e-08, "logits/chosen": -2.866429328918457, "logits/rejected": -2.2435829639434814, "logps/chosen": -134.44039916992188, "logps/rejected": -1060.0494384765625, "loss": 0.0554, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8316210508346558, "rewards/margins": 9.370879173278809, "rewards/rejected": -10.202500343322754, "step": 80050 }, { "epoch": 0.96, "learning_rate": 2.6368854910749365e-08, "logits/chosen": -2.880622386932373, "logits/rejected": -2.3227438926696777, "logps/chosen": -107.2285385131836, "logps/rejected": -914.7576904296875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5862089395523071, "rewards/margins": 8.17207145690918, "rewards/rejected": -8.758280754089355, "step": 80060 }, { "epoch": 0.96, "learning_rate": 2.6217749201792043e-08, "logits/chosen": -2.8993639945983887, "logits/rejected": -2.2730541229248047, "logps/chosen": -120.27272033691406, "logps/rejected": -857.9401245117188, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7211229205131531, "rewards/margins": 7.4830193519592285, "rewards/rejected": -8.204141616821289, "step": 80070 }, { "epoch": 0.96, "learning_rate": 2.6067075408344746e-08, "logits/chosen": -2.835545539855957, "logits/rejected": -2.2862770557403564, "logps/chosen": -94.85221862792969, "logps/rejected": -881.2569580078125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5041936635971069, "rewards/margins": 7.924777984619141, "rewards/rejected": -8.428971290588379, "step": 80080 }, { "epoch": 0.96, "learning_rate": 2.5916833556715316e-08, "logits/chosen": -2.897090435028076, "logits/rejected": -2.635477304458618, "logps/chosen": -67.32894134521484, "logps/rejected": -812.81640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.28597506880760193, "rewards/margins": 7.455693244934082, "rewards/rejected": -7.741668701171875, "step": 80090 }, { "epoch": 0.96, "learning_rate": 2.5767023673134994e-08, "logits/chosen": -2.897825241088867, "logits/rejected": -2.537203788757324, "logps/chosen": -79.607666015625, "logps/rejected": -847.18896484375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.37834346294403076, "rewards/margins": 7.712333679199219, "rewards/rejected": -8.090677261352539, "step": 80100 }, { "epoch": 0.96, "learning_rate": 2.5617645783760636e-08, "logits/chosen": -2.8965673446655273, "logits/rejected": -2.384835720062256, "logps/chosen": -101.4376220703125, "logps/rejected": -814.0213623046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5413723587989807, "rewards/margins": 7.208197116851807, "rewards/rejected": -7.749568939208984, "step": 80110 }, { "epoch": 0.96, "learning_rate": 2.5468699914673324e-08, "logits/chosen": -2.86814284324646, "logits/rejected": -2.3117294311523438, "logps/chosen": -107.86201477050781, "logps/rejected": -974.0169067382812, "loss": 0.0918, "rewards/accuracies": 1.0, "rewards/chosen": -0.5850175023078918, "rewards/margins": 8.764687538146973, "rewards/rejected": -9.34970474243164, "step": 80120 }, { "epoch": 0.96, "learning_rate": 2.5320186091877807e-08, "logits/chosen": -2.859621047973633, "logits/rejected": -2.420440435409546, "logps/chosen": -123.47674560546875, "logps/rejected": -837.4136962890625, "loss": 0.1011, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7837883234024048, "rewards/margins": 7.180848598480225, "rewards/rejected": -7.964637756347656, "step": 80130 }, { "epoch": 0.96, "learning_rate": 2.5172104341305016e-08, "logits/chosen": -2.9084794521331787, "logits/rejected": -2.2836997509002686, "logps/chosen": -113.560546875, "logps/rejected": -967.38818359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.605622410774231, "rewards/margins": 8.669367790222168, "rewards/rejected": -9.274989128112793, "step": 80140 }, { "epoch": 0.96, "learning_rate": 2.5024454688808985e-08, "logits/chosen": -2.8539421558380127, "logits/rejected": -2.556311845779419, "logps/chosen": -77.84713745117188, "logps/rejected": -849.29345703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.36700189113616943, "rewards/margins": 7.747167110443115, "rewards/rejected": -8.11417007446289, "step": 80150 }, { "epoch": 0.96, "learning_rate": 2.4877237160169375e-08, "logits/chosen": -2.8926711082458496, "logits/rejected": -2.001262664794922, "logps/chosen": -129.5086669921875, "logps/rejected": -1059.5565185546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7315167188644409, "rewards/margins": 9.458699226379395, "rewards/rejected": -10.190214157104492, "step": 80160 }, { "epoch": 0.96, "learning_rate": 2.4730451781089515e-08, "logits/chosen": -2.8691375255584717, "logits/rejected": -2.2109410762786865, "logps/chosen": -105.42039489746094, "logps/rejected": -1069.244873046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5172246098518372, "rewards/margins": 9.770753860473633, "rewards/rejected": -10.28797721862793, "step": 80170 }, { "epoch": 0.96, "learning_rate": 2.4584098577198345e-08, "logits/chosen": -2.894178867340088, "logits/rejected": -2.321464776992798, "logps/chosen": -120.02305603027344, "logps/rejected": -931.5178833007812, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6992332339286804, "rewards/margins": 8.220086097717285, "rewards/rejected": -8.919318199157715, "step": 80180 }, { "epoch": 0.96, "learning_rate": 2.4438177574048483e-08, "logits/chosen": -2.860304355621338, "logits/rejected": -2.207784652709961, "logps/chosen": -125.3786849975586, "logps/rejected": -985.8982543945312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.742831289768219, "rewards/margins": 8.706721305847168, "rewards/rejected": -9.449552536010742, "step": 80190 }, { "epoch": 0.96, "learning_rate": 2.429268879711677e-08, "logits/chosen": -2.8790299892425537, "logits/rejected": -2.2819864749908447, "logps/chosen": -105.0416488647461, "logps/rejected": -921.9710693359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.572311282157898, "rewards/margins": 8.261277198791504, "rewards/rejected": -8.833587646484375, "step": 80200 }, { "epoch": 0.96, "learning_rate": 2.4147632271805942e-08, "logits/chosen": -2.873394250869751, "logits/rejected": -2.30519437789917, "logps/chosen": -120.9601821899414, "logps/rejected": -916.9688720703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6583935022354126, "rewards/margins": 8.118961334228516, "rewards/rejected": -8.777355194091797, "step": 80210 }, { "epoch": 0.96, "learning_rate": 2.4003008023442132e-08, "logits/chosen": -2.8985648155212402, "logits/rejected": -2.612055778503418, "logps/chosen": -84.52667999267578, "logps/rejected": -845.0989990234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3928089439868927, "rewards/margins": 7.667170524597168, "rewards/rejected": -8.059979438781738, "step": 80220 }, { "epoch": 0.96, "learning_rate": 2.3858816077276248e-08, "logits/chosen": -2.8783535957336426, "logits/rejected": -2.3026318550109863, "logps/chosen": -130.982666015625, "logps/rejected": -1007.1033325195312, "loss": 0.1019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8379908800125122, "rewards/margins": 8.830761909484863, "rewards/rejected": -9.668752670288086, "step": 80230 }, { "epoch": 0.96, "learning_rate": 2.371505645848371e-08, "logits/chosen": -2.8674468994140625, "logits/rejected": -2.438786506652832, "logps/chosen": -87.07342529296875, "logps/rejected": -891.5436401367188, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.46296030282974243, "rewards/margins": 8.067361831665039, "rewards/rejected": -8.53032112121582, "step": 80240 }, { "epoch": 0.96, "learning_rate": 2.3571729192164992e-08, "logits/chosen": -2.9198157787323, "logits/rejected": -2.5772647857666016, "logps/chosen": -97.11164093017578, "logps/rejected": -877.3176879882812, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.5454160571098328, "rewards/margins": 7.845132350921631, "rewards/rejected": -8.390547752380371, "step": 80250 }, { "epoch": 0.96, "learning_rate": 2.342883430334453e-08, "logits/chosen": -2.9014389514923096, "logits/rejected": -2.4348301887512207, "logps/chosen": -111.79072570800781, "logps/rejected": -906.470703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6558749675750732, "rewards/margins": 8.00836181640625, "rewards/rejected": -8.664237022399902, "step": 80260 }, { "epoch": 0.96, "learning_rate": 2.3286371816970964e-08, "logits/chosen": -2.864048719406128, "logits/rejected": -2.3162569999694824, "logps/chosen": -101.18106079101562, "logps/rejected": -993.6398315429688, "loss": 0.0975, "rewards/accuracies": 1.0, "rewards/chosen": -0.488883912563324, "rewards/margins": 9.059679985046387, "rewards/rejected": -9.548563003540039, "step": 80270 }, { "epoch": 0.96, "learning_rate": 2.3144341757918022e-08, "logits/chosen": -2.8842597007751465, "logits/rejected": -2.4686849117279053, "logps/chosen": -91.25114440917969, "logps/rejected": -869.3640747070312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.48164457082748413, "rewards/margins": 7.8301215171813965, "rewards/rejected": -8.311765670776367, "step": 80280 }, { "epoch": 0.96, "learning_rate": 2.3002744150983913e-08, "logits/chosen": -2.8614234924316406, "logits/rejected": -2.009119987487793, "logps/chosen": -141.69903564453125, "logps/rejected": -1089.587158203125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8503175973892212, "rewards/margins": 9.631147384643555, "rewards/rejected": -10.481464385986328, "step": 80290 }, { "epoch": 0.96, "learning_rate": 2.286157902089109e-08, "logits/chosen": -2.8828957080841064, "logits/rejected": -2.279576063156128, "logps/chosen": -118.5, "logps/rejected": -925.6573486328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6769765615463257, "rewards/margins": 8.177966117858887, "rewards/rejected": -8.854942321777344, "step": 80300 }, { "epoch": 0.96, "learning_rate": 2.272084639228678e-08, "logits/chosen": -2.8758773803710938, "logits/rejected": -2.2146263122558594, "logps/chosen": -126.475830078125, "logps/rejected": -1034.0584716796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.745134711265564, "rewards/margins": 9.173128128051758, "rewards/rejected": -9.918262481689453, "step": 80310 }, { "epoch": 0.96, "learning_rate": 2.2580546289742432e-08, "logits/chosen": -2.9174740314483643, "logits/rejected": -2.325108289718628, "logps/chosen": -84.26487731933594, "logps/rejected": -769.561767578125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.42155131697654724, "rewards/margins": 6.9127349853515625, "rewards/rejected": -7.334285736083984, "step": 80320 }, { "epoch": 0.96, "learning_rate": 2.244067873775374e-08, "logits/chosen": -2.8434455394744873, "logits/rejected": -2.3240771293640137, "logps/chosen": -99.88078308105469, "logps/rejected": -986.8416748046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5434752702713013, "rewards/margins": 8.935602188110352, "rewards/rejected": -9.479076385498047, "step": 80330 }, { "epoch": 0.96, "learning_rate": 2.2301243760741165e-08, "logits/chosen": -2.8872082233428955, "logits/rejected": -2.5520946979522705, "logps/chosen": -91.47206115722656, "logps/rejected": -879.0579833984375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.4932866096496582, "rewards/margins": 7.906856536865234, "rewards/rejected": -8.40014362335205, "step": 80340 }, { "epoch": 0.96, "learning_rate": 2.216224138305023e-08, "logits/chosen": -2.867649555206299, "logits/rejected": -2.159959316253662, "logps/chosen": -115.17045593261719, "logps/rejected": -966.3997192382812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.667377233505249, "rewards/margins": 8.61644172668457, "rewards/rejected": -9.283819198608398, "step": 80350 }, { "epoch": 0.96, "learning_rate": 2.202367162894986e-08, "logits/chosen": -2.883368492126465, "logits/rejected": -2.4522793292999268, "logps/chosen": -84.79344177246094, "logps/rejected": -822.5436401367188, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.4015340209007263, "rewards/margins": 7.444195747375488, "rewards/rejected": -7.845728874206543, "step": 80360 }, { "epoch": 0.96, "learning_rate": 2.1885534522634312e-08, "logits/chosen": -2.87541127204895, "logits/rejected": -2.3594534397125244, "logps/chosen": -104.01362609863281, "logps/rejected": -904.9920043945312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5612069964408875, "rewards/margins": 8.087160110473633, "rewards/rejected": -8.648366928100586, "step": 80370 }, { "epoch": 0.96, "learning_rate": 2.174783008822179e-08, "logits/chosen": -2.8565077781677246, "logits/rejected": -2.3449931144714355, "logps/chosen": -98.06867218017578, "logps/rejected": -926.2374267578125, "loss": 0.1066, "rewards/accuracies": 1.0, "rewards/chosen": -0.5439151525497437, "rewards/margins": 8.326848030090332, "rewards/rejected": -8.870762825012207, "step": 80380 }, { "epoch": 0.96, "learning_rate": 2.1610558349755007e-08, "logits/chosen": -2.9168384075164795, "logits/rejected": -2.463620901107788, "logps/chosen": -87.94136047363281, "logps/rejected": -907.4454956054688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.46502724289894104, "rewards/margins": 8.228532791137695, "rewards/rejected": -8.693559646606445, "step": 80390 }, { "epoch": 0.96, "learning_rate": 2.147371933120146e-08, "logits/chosen": -2.8950018882751465, "logits/rejected": -2.2155559062957764, "logps/chosen": -109.74853515625, "logps/rejected": -1046.7769775390625, "loss": 0.0866, "rewards/accuracies": 1.0, "rewards/chosen": -0.5897092819213867, "rewards/margins": 9.48167610168457, "rewards/rejected": -10.071383476257324, "step": 80400 }, { "epoch": 0.96, "learning_rate": 2.133731305645259e-08, "logits/chosen": -2.873922824859619, "logits/rejected": -2.5266780853271484, "logps/chosen": -78.8219223022461, "logps/rejected": -828.3722534179688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.37149542570114136, "rewards/margins": 7.5358428955078125, "rewards/rejected": -7.9073381423950195, "step": 80410 }, { "epoch": 0.96, "learning_rate": 2.1201339549324627e-08, "logits/chosen": -2.898341655731201, "logits/rejected": -2.3409862518310547, "logps/chosen": -101.15865325927734, "logps/rejected": -894.47265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5496554374694824, "rewards/margins": 8.018294334411621, "rewards/rejected": -8.567949295043945, "step": 80420 }, { "epoch": 0.96, "learning_rate": 2.1065798833558026e-08, "logits/chosen": -2.8822288513183594, "logits/rejected": -2.2899577617645264, "logps/chosen": -143.22198486328125, "logps/rejected": -839.48583984375, "loss": 0.4273, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9611527323722839, "rewards/margins": 7.058152675628662, "rewards/rejected": -8.019305229187012, "step": 80430 }, { "epoch": 0.96, "learning_rate": 2.0930690932818298e-08, "logits/chosen": -2.907723903656006, "logits/rejected": -2.442206859588623, "logps/chosen": -109.44498443603516, "logps/rejected": -931.93505859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6295266151428223, "rewards/margins": 8.295854568481445, "rewards/rejected": -8.925379753112793, "step": 80440 }, { "epoch": 0.96, "learning_rate": 2.0796015870694632e-08, "logits/chosen": -2.886937379837036, "logits/rejected": -2.2567124366760254, "logps/chosen": -100.52983856201172, "logps/rejected": -915.4362182617188, "loss": 0.0846, "rewards/accuracies": 1.0, "rewards/chosen": -0.55093914270401, "rewards/margins": 8.215922355651855, "rewards/rejected": -8.766862869262695, "step": 80450 }, { "epoch": 0.96, "learning_rate": 2.0661773670700712e-08, "logits/chosen": -2.8749325275421143, "logits/rejected": -2.4755115509033203, "logps/chosen": -96.6640625, "logps/rejected": -888.07275390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5053151249885559, "rewards/margins": 7.972290992736816, "rewards/rejected": -8.477605819702148, "step": 80460 }, { "epoch": 0.96, "learning_rate": 2.052796435627502e-08, "logits/chosen": -2.88389253616333, "logits/rejected": -2.421816349029541, "logps/chosen": -112.36930847167969, "logps/rejected": -894.7646484375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.6767473220825195, "rewards/margins": 7.86818790435791, "rewards/rejected": -8.54493522644043, "step": 80470 }, { "epoch": 0.96, "learning_rate": 2.0394587950780533e-08, "logits/chosen": -2.908048152923584, "logits/rejected": -2.2028541564941406, "logps/chosen": -108.852294921875, "logps/rejected": -981.5177612304688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5501325726509094, "rewards/margins": 8.85377311706543, "rewards/rejected": -9.403905868530273, "step": 80480 }, { "epoch": 0.96, "learning_rate": 2.0261644477504172e-08, "logits/chosen": -2.9027862548828125, "logits/rejected": -2.5254156589508057, "logps/chosen": -83.29889678955078, "logps/rejected": -844.00927734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4359237551689148, "rewards/margins": 7.624242305755615, "rewards/rejected": -8.06016731262207, "step": 80490 }, { "epoch": 0.96, "learning_rate": 2.0129133959657377e-08, "logits/chosen": -2.8918724060058594, "logits/rejected": -2.2825305461883545, "logps/chosen": -102.02378845214844, "logps/rejected": -1001.5006103515625, "loss": 0.1061, "rewards/accuracies": 1.0, "rewards/chosen": -0.5438159704208374, "rewards/margins": 9.069093704223633, "rewards/rejected": -9.612909317016602, "step": 80500 }, { "epoch": 0.96, "learning_rate": 1.999705642037636e-08, "logits/chosen": -2.8783118724823, "logits/rejected": -2.4177207946777344, "logps/chosen": -77.0167007446289, "logps/rejected": -846.2745971679688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.35226041078567505, "rewards/margins": 7.741687774658203, "rewards/rejected": -8.093948364257812, "step": 80510 }, { "epoch": 0.96, "learning_rate": 1.9865411882721286e-08, "logits/chosen": -2.8914923667907715, "logits/rejected": -2.3682126998901367, "logps/chosen": -121.97056579589844, "logps/rejected": -920.5099487304688, "loss": 0.0876, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7248458862304688, "rewards/margins": 8.09315299987793, "rewards/rejected": -8.817998886108398, "step": 80520 }, { "epoch": 0.96, "learning_rate": 1.9734200369676827e-08, "logits/chosen": -2.851905345916748, "logits/rejected": -2.069809675216675, "logps/chosen": -132.80914306640625, "logps/rejected": -1042.108642578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7631959915161133, "rewards/margins": 9.249299049377441, "rewards/rejected": -10.012495040893555, "step": 80530 }, { "epoch": 0.96, "learning_rate": 1.960342190415271e-08, "logits/chosen": -2.8444108963012695, "logits/rejected": -2.2802090644836426, "logps/chosen": -110.43524169921875, "logps/rejected": -998.4559326171875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6265895962715149, "rewards/margins": 8.962345123291016, "rewards/rejected": -9.588933944702148, "step": 80540 }, { "epoch": 0.96, "learning_rate": 1.947307650898178e-08, "logits/chosen": -2.8727235794067383, "logits/rejected": -2.2565624713897705, "logps/chosen": -112.39752197265625, "logps/rejected": -855.8262939453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6711071729660034, "rewards/margins": 7.501959800720215, "rewards/rejected": -8.173068046569824, "step": 80550 }, { "epoch": 0.96, "learning_rate": 1.9343164206922505e-08, "logits/chosen": -2.90734601020813, "logits/rejected": -2.356412410736084, "logps/chosen": -95.70587921142578, "logps/rejected": -971.82568359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4998219907283783, "rewards/margins": 8.832258224487305, "rewards/rejected": -9.33207893371582, "step": 80560 }, { "epoch": 0.96, "learning_rate": 1.9213685020657014e-08, "logits/chosen": -2.8954455852508545, "logits/rejected": -2.3396737575531006, "logps/chosen": -102.86701965332031, "logps/rejected": -953.2278442382812, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5494550466537476, "rewards/margins": 8.593209266662598, "rewards/rejected": -9.142662048339844, "step": 80570 }, { "epoch": 0.96, "learning_rate": 1.9084638972792226e-08, "logits/chosen": -2.8438169956207275, "logits/rejected": -2.2235543727874756, "logps/chosen": -127.85188293457031, "logps/rejected": -967.6945190429688, "loss": 0.0852, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8166348338127136, "rewards/margins": 8.471404075622559, "rewards/rejected": -9.288040161132812, "step": 80580 }, { "epoch": 0.96, "learning_rate": 1.8956026085858727e-08, "logits/chosen": -2.8868956565856934, "logits/rejected": -2.4664292335510254, "logps/chosen": -94.81431579589844, "logps/rejected": -854.2086181640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5018694996833801, "rewards/margins": 7.6673126220703125, "rewards/rejected": -8.169181823730469, "step": 80590 }, { "epoch": 0.96, "learning_rate": 1.8827846382312442e-08, "logits/chosen": -2.9050393104553223, "logits/rejected": -2.4445555210113525, "logps/chosen": -88.6093521118164, "logps/rejected": -851.0304565429688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.42546406388282776, "rewards/margins": 7.701519966125488, "rewards/rejected": -8.126983642578125, "step": 80600 }, { "epoch": 0.96, "learning_rate": 1.8700099884532972e-08, "logits/chosen": -2.900538444519043, "logits/rejected": -2.1405770778656006, "logps/chosen": -122.52703857421875, "logps/rejected": -1070.525146484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7107561826705933, "rewards/margins": 9.583646774291992, "rewards/rejected": -10.294404983520508, "step": 80610 }, { "epoch": 0.97, "learning_rate": 1.8572786614824422e-08, "logits/chosen": -2.842452049255371, "logits/rejected": -2.0176119804382324, "logps/chosen": -147.75204467773438, "logps/rejected": -1003.6473388671875, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -0.9057775735855103, "rewards/margins": 8.752891540527344, "rewards/rejected": -9.658670425415039, "step": 80620 }, { "epoch": 0.97, "learning_rate": 1.8445906595415396e-08, "logits/chosen": -2.817594528198242, "logits/rejected": -1.7017955780029297, "logps/chosen": -167.80682373046875, "logps/rejected": -1111.7197265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0309836864471436, "rewards/margins": 9.674835205078125, "rewards/rejected": -10.705819129943848, "step": 80630 }, { "epoch": 0.97, "learning_rate": 1.8319459848459285e-08, "logits/chosen": -2.911118745803833, "logits/rejected": -2.428497076034546, "logps/chosen": -101.80169677734375, "logps/rejected": -894.7735595703125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5169885754585266, "rewards/margins": 8.03825855255127, "rewards/rejected": -8.555246353149414, "step": 80640 }, { "epoch": 0.97, "learning_rate": 1.81934463960326e-08, "logits/chosen": -2.885148525238037, "logits/rejected": -2.313995361328125, "logps/chosen": -102.8097152709961, "logps/rejected": -992.77001953125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.556588888168335, "rewards/margins": 8.98560905456543, "rewards/rejected": -9.542198181152344, "step": 80650 }, { "epoch": 0.97, "learning_rate": 1.806786626013718e-08, "logits/chosen": -2.915163516998291, "logits/rejected": -2.452986717224121, "logps/chosen": -99.79046630859375, "logps/rejected": -919.1282348632812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5404191613197327, "rewards/margins": 8.2689847946167, "rewards/rejected": -8.809404373168945, "step": 80660 }, { "epoch": 0.97, "learning_rate": 1.79427194626991e-08, "logits/chosen": -2.8764560222625732, "logits/rejected": -2.3703343868255615, "logps/chosen": -95.04740905761719, "logps/rejected": -854.0548706054688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4786652624607086, "rewards/margins": 7.671906471252441, "rewards/rejected": -8.1505708694458, "step": 80670 }, { "epoch": 0.97, "learning_rate": 1.781800602556866e-08, "logits/chosen": -2.8796846866607666, "logits/rejected": -2.516995429992676, "logps/chosen": -115.08573913574219, "logps/rejected": -874.4425048828125, "loss": 0.0932, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7412611246109009, "rewards/margins": 7.612094879150391, "rewards/rejected": -8.35335636138916, "step": 80680 }, { "epoch": 0.97, "learning_rate": 1.769372597052038e-08, "logits/chosen": -2.9010589122772217, "logits/rejected": -2.439730167388916, "logps/chosen": -111.2242660522461, "logps/rejected": -894.75390625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.5933731198310852, "rewards/margins": 7.956018924713135, "rewards/rejected": -8.549392700195312, "step": 80690 }, { "epoch": 0.97, "learning_rate": 1.75698793192533e-08, "logits/chosen": -2.8397650718688965, "logits/rejected": -2.215014696121216, "logps/chosen": -156.67884826660156, "logps/rejected": -891.8035278320312, "loss": 0.0894, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.042075514793396, "rewards/margins": 7.495156764984131, "rewards/rejected": -8.537233352661133, "step": 80700 }, { "epoch": 0.97, "learning_rate": 1.744646609339068e-08, "logits/chosen": -2.9052419662475586, "logits/rejected": -2.620861530303955, "logps/chosen": -69.59514617919922, "logps/rejected": -807.3048095703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.32967591285705566, "rewards/margins": 7.3737287521362305, "rewards/rejected": -7.703404903411865, "step": 80710 }, { "epoch": 0.97, "learning_rate": 1.732348631448e-08, "logits/chosen": -2.905723810195923, "logits/rejected": -2.481593608856201, "logps/chosen": -99.52140808105469, "logps/rejected": -890.6848754882812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5147387981414795, "rewards/margins": 8.009198188781738, "rewards/rejected": -8.523937225341797, "step": 80720 }, { "epoch": 0.97, "learning_rate": 1.7200940003993248e-08, "logits/chosen": -2.864807605743408, "logits/rejected": -2.230605125427246, "logps/chosen": -119.01972961425781, "logps/rejected": -1086.1981201171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6753968000411987, "rewards/margins": 9.783557891845703, "rewards/rejected": -10.458955764770508, "step": 80730 }, { "epoch": 0.97, "learning_rate": 1.707882718332665e-08, "logits/chosen": -2.9016361236572266, "logits/rejected": -2.4304275512695312, "logps/chosen": -111.8457260131836, "logps/rejected": -858.7496337890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6860803365707397, "rewards/margins": 7.527838706970215, "rewards/rejected": -8.213918685913086, "step": 80740 }, { "epoch": 0.97, "learning_rate": 1.6957147873800927e-08, "logits/chosen": -2.8935582637786865, "logits/rejected": -2.468991279602051, "logps/chosen": -89.35608673095703, "logps/rejected": -860.8289794921875, "loss": 0.1523, "rewards/accuracies": 1.0, "rewards/chosen": -0.4128074049949646, "rewards/margins": 7.820019721984863, "rewards/rejected": -8.232828140258789, "step": 80750 }, { "epoch": 0.97, "learning_rate": 1.683590209666075e-08, "logits/chosen": -2.9001259803771973, "logits/rejected": -2.2450335025787354, "logps/chosen": -126.84623718261719, "logps/rejected": -1056.4293212890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7197661995887756, "rewards/margins": 9.452962875366211, "rewards/rejected": -10.1727294921875, "step": 80760 }, { "epoch": 0.97, "learning_rate": 1.6715089873075296e-08, "logits/chosen": -2.9006428718566895, "logits/rejected": -2.380202293395996, "logps/chosen": -97.16780090332031, "logps/rejected": -895.1632690429688, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.49951276183128357, "rewards/margins": 8.070012092590332, "rewards/rejected": -8.569524765014648, "step": 80770 }, { "epoch": 0.97, "learning_rate": 1.659471122413825e-08, "logits/chosen": -2.853637218475342, "logits/rejected": -2.3067164421081543, "logps/chosen": -126.94873046875, "logps/rejected": -961.3455810546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7011400461196899, "rewards/margins": 8.513223648071289, "rewards/rejected": -9.214363098144531, "step": 80780 }, { "epoch": 0.97, "learning_rate": 1.6474766170866962e-08, "logits/chosen": -2.892186403274536, "logits/rejected": -2.1649439334869385, "logps/chosen": -129.94969177246094, "logps/rejected": -1087.5836181640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7353451251983643, "rewards/margins": 9.730155944824219, "rewards/rejected": -10.465500831604004, "step": 80790 }, { "epoch": 0.97, "learning_rate": 1.635525473420413e-08, "logits/chosen": -2.9021172523498535, "logits/rejected": -2.151822328567505, "logps/chosen": -105.7925796508789, "logps/rejected": -927.9306640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5045472383499146, "rewards/margins": 8.396196365356445, "rewards/rejected": -8.90074348449707, "step": 80800 }, { "epoch": 0.97, "learning_rate": 1.6236176935015835e-08, "logits/chosen": -2.876647472381592, "logits/rejected": -2.352048873901367, "logps/chosen": -103.8669662475586, "logps/rejected": -910.5667114257812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5464105606079102, "rewards/margins": 8.173974990844727, "rewards/rejected": -8.720385551452637, "step": 80810 }, { "epoch": 0.97, "learning_rate": 1.6117532794092394e-08, "logits/chosen": -2.8666257858276367, "logits/rejected": -2.093541145324707, "logps/chosen": -100.86748504638672, "logps/rejected": -889.61328125, "loss": 0.1069, "rewards/accuracies": 1.0, "rewards/chosen": -0.5093253254890442, "rewards/margins": 8.006257057189941, "rewards/rejected": -8.515581130981445, "step": 80820 }, { "epoch": 0.97, "learning_rate": 1.5999322332149458e-08, "logits/chosen": -2.8768150806427, "logits/rejected": -2.403595447540283, "logps/chosen": -99.62307739257812, "logps/rejected": -930.3577270507812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.48240384459495544, "rewards/margins": 8.441039085388184, "rewards/rejected": -8.923443794250488, "step": 80830 }, { "epoch": 0.97, "learning_rate": 1.5881545569825517e-08, "logits/chosen": -2.8936448097229004, "logits/rejected": -2.3986871242523193, "logps/chosen": -107.70878601074219, "logps/rejected": -1002.9503784179688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.63884037733078, "rewards/margins": 8.992525100708008, "rewards/rejected": -9.631365776062012, "step": 80840 }, { "epoch": 0.97, "learning_rate": 1.5764202527684403e-08, "logits/chosen": -2.847752809524536, "logits/rejected": -2.501063823699951, "logps/chosen": -93.02488708496094, "logps/rejected": -834.04541015625, "loss": 0.1003, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5251372456550598, "rewards/margins": 7.44201135635376, "rewards/rejected": -7.9671478271484375, "step": 80850 }, { "epoch": 0.97, "learning_rate": 1.5647293226213888e-08, "logits/chosen": -2.8613853454589844, "logits/rejected": -1.840593695640564, "logps/chosen": -158.55886840820312, "logps/rejected": -1106.7457275390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.959692656993866, "rewards/margins": 9.692130088806152, "rewards/rejected": -10.651823043823242, "step": 80860 }, { "epoch": 0.97, "learning_rate": 1.5530817685825984e-08, "logits/chosen": -2.8826141357421875, "logits/rejected": -2.205723524093628, "logps/chosen": -110.1729736328125, "logps/rejected": -955.27685546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5825379490852356, "rewards/margins": 8.571489334106445, "rewards/rejected": -9.154027938842773, "step": 80870 }, { "epoch": 0.97, "learning_rate": 1.541477592685692e-08, "logits/chosen": -2.891028881072998, "logits/rejected": -2.4092326164245605, "logps/chosen": -92.15006256103516, "logps/rejected": -844.4176635742188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.504743218421936, "rewards/margins": 7.553762912750244, "rewards/rejected": -8.05850601196289, "step": 80880 }, { "epoch": 0.97, "learning_rate": 1.529916796956771e-08, "logits/chosen": -2.795517683029175, "logits/rejected": -1.9871165752410889, "logps/chosen": -130.0041961669922, "logps/rejected": -1070.897705078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7372392416000366, "rewards/margins": 9.539998054504395, "rewards/rejected": -10.277237892150879, "step": 80890 }, { "epoch": 0.97, "learning_rate": 1.5183993834142485e-08, "logits/chosen": -2.921294927597046, "logits/rejected": -2.550196647644043, "logps/chosen": -84.55226135253906, "logps/rejected": -892.642578125, "loss": 0.0951, "rewards/accuracies": 1.0, "rewards/chosen": -0.42360830307006836, "rewards/margins": 8.118584632873535, "rewards/rejected": -8.542192459106445, "step": 80900 }, { "epoch": 0.97, "learning_rate": 1.506925354069072e-08, "logits/chosen": -2.8677196502685547, "logits/rejected": -2.395812511444092, "logps/chosen": -90.00645446777344, "logps/rejected": -942.845703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.47895199060440063, "rewards/margins": 8.552262306213379, "rewards/rejected": -9.031214714050293, "step": 80910 }, { "epoch": 0.97, "learning_rate": 1.4954947109245555e-08, "logits/chosen": -2.892857789993286, "logits/rejected": -2.6888718605041504, "logps/chosen": -65.84849548339844, "logps/rejected": -809.8016967773438, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3109647333621979, "rewards/margins": 7.418496131896973, "rewards/rejected": -7.729461669921875, "step": 80920 }, { "epoch": 0.97, "learning_rate": 1.4841074559764634e-08, "logits/chosen": -2.844723701477051, "logits/rejected": -2.4073922634124756, "logps/chosen": -109.99691009521484, "logps/rejected": -834.2243041992188, "loss": 0.1427, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6172013282775879, "rewards/margins": 7.347315311431885, "rewards/rejected": -7.964515686035156, "step": 80930 }, { "epoch": 0.97, "learning_rate": 1.4727635912130112e-08, "logits/chosen": -2.8631198406219482, "logits/rejected": -2.4113306999206543, "logps/chosen": -114.02558898925781, "logps/rejected": -794.3521118164062, "loss": 0.0883, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7188795208930969, "rewards/margins": 6.844626426696777, "rewards/rejected": -7.563506126403809, "step": 80940 }, { "epoch": 0.97, "learning_rate": 1.4614631186147809e-08, "logits/chosen": -2.874791383743286, "logits/rejected": -2.24877667427063, "logps/chosen": -124.0125732421875, "logps/rejected": -998.6320190429688, "loss": 0.0453, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7506101727485657, "rewards/margins": 8.846013069152832, "rewards/rejected": -9.596622467041016, "step": 80950 }, { "epoch": 0.97, "learning_rate": 1.4502060401548058e-08, "logits/chosen": -2.893380641937256, "logits/rejected": -2.469639539718628, "logps/chosen": -90.40028381347656, "logps/rejected": -915.0921630859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.42684465646743774, "rewards/margins": 8.331903457641602, "rewards/rejected": -8.758747100830078, "step": 80960 }, { "epoch": 0.97, "learning_rate": 1.4389923577985133e-08, "logits/chosen": -2.89452862739563, "logits/rejected": -2.2643351554870605, "logps/chosen": -111.72802734375, "logps/rejected": -928.9982299804688, "loss": 0.1392, "rewards/accuracies": 1.0, "rewards/chosen": -0.6498815417289734, "rewards/margins": 8.24669361114502, "rewards/rejected": -8.896576881408691, "step": 80970 }, { "epoch": 0.97, "learning_rate": 1.4278220735038373e-08, "logits/chosen": -2.8433146476745605, "logits/rejected": -2.5176682472229004, "logps/chosen": -79.88054656982422, "logps/rejected": -848.9978637695312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4113023281097412, "rewards/margins": 7.705729007720947, "rewards/rejected": -8.11703109741211, "step": 80980 }, { "epoch": 0.97, "learning_rate": 1.4166951892210235e-08, "logits/chosen": -2.876951217651367, "logits/rejected": -2.36149525642395, "logps/chosen": -98.0750503540039, "logps/rejected": -910.6609497070312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.49630841612815857, "rewards/margins": 8.211615562438965, "rewards/rejected": -8.707923889160156, "step": 80990 }, { "epoch": 0.97, "learning_rate": 1.4056117068928232e-08, "logits/chosen": -2.8853652477264404, "logits/rejected": -2.7013802528381348, "logps/chosen": -47.85320281982422, "logps/rejected": -746.1895751953125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.14784054458141327, "rewards/margins": 6.955595970153809, "rewards/rejected": -7.1034369468688965, "step": 81000 }, { "epoch": 0.97, "eval_logits/chosen": -2.8859634399414062, "eval_logits/rejected": -1.7594223022460938, "eval_logps/chosen": -243.4639434814453, "eval_logps/rejected": -1144.7491455078125, "eval_loss": 0.0013252641074359417, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.8228368759155273, "eval_rewards/margins": 9.157401084899902, "eval_rewards/rejected": -10.980237007141113, "eval_runtime": 1.2166, "eval_samples_per_second": 4.11, "eval_steps_per_second": 2.466, "step": 81000 }, { "epoch": 0.97, "learning_rate": 1.3945716284543831e-08, "logits/chosen": -2.854933261871338, "logits/rejected": -2.219388484954834, "logps/chosen": -96.439453125, "logps/rejected": -931.7398681640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.518595814704895, "rewards/margins": 8.40029525756836, "rewards/rejected": -8.918889999389648, "step": 81010 }, { "epoch": 0.97, "learning_rate": 1.3835749558332722e-08, "logits/chosen": -2.9020652770996094, "logits/rejected": -2.2576892375946045, "logps/chosen": -143.78358459472656, "logps/rejected": -1051.3736572265625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.9017874002456665, "rewards/margins": 9.215580940246582, "rewards/rejected": -10.117368698120117, "step": 81020 }, { "epoch": 0.97, "learning_rate": 1.3726216909494827e-08, "logits/chosen": -2.8870456218719482, "logits/rejected": -2.3613228797912598, "logps/chosen": -103.91230773925781, "logps/rejected": -925.4111328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5922924876213074, "rewards/margins": 8.269659042358398, "rewards/rejected": -8.86195182800293, "step": 81030 }, { "epoch": 0.97, "learning_rate": 1.3617118357153735e-08, "logits/chosen": -2.9250388145446777, "logits/rejected": -2.1147401332855225, "logps/chosen": -152.0341796875, "logps/rejected": -1039.8897705078125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9112136960029602, "rewards/margins": 9.060441970825195, "rewards/rejected": -9.97165584564209, "step": 81040 }, { "epoch": 0.97, "learning_rate": 1.3508453920358377e-08, "logits/chosen": -2.8947949409484863, "logits/rejected": -2.1739933490753174, "logps/chosen": -120.95097351074219, "logps/rejected": -946.2356567382812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.69969642162323, "rewards/margins": 8.36478328704834, "rewards/rejected": -9.06447982788086, "step": 81050 }, { "epoch": 0.97, "learning_rate": 1.3400223618081076e-08, "logits/chosen": -2.8621413707733154, "logits/rejected": -2.176703691482544, "logps/chosen": -117.83451843261719, "logps/rejected": -1066.4422607421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6717011332511902, "rewards/margins": 9.598867416381836, "rewards/rejected": -10.270567893981934, "step": 81060 }, { "epoch": 0.97, "learning_rate": 1.3292427469218382e-08, "logits/chosen": -2.822509765625, "logits/rejected": -2.043041944503784, "logps/chosen": -130.74856567382812, "logps/rejected": -1035.4537353515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6930164098739624, "rewards/margins": 9.264518737792969, "rewards/rejected": -9.957536697387695, "step": 81070 }, { "epoch": 0.97, "learning_rate": 1.3185065492591353e-08, "logits/chosen": -2.85005521774292, "logits/rejected": -2.3953216075897217, "logps/chosen": -102.94184875488281, "logps/rejected": -878.0099487304688, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.5659044981002808, "rewards/margins": 7.832221031188965, "rewards/rejected": -8.398125648498535, "step": 81080 }, { "epoch": 0.97, "learning_rate": 1.307813770694527e-08, "logits/chosen": -2.921642303466797, "logits/rejected": -2.4040374755859375, "logps/chosen": -107.8842544555664, "logps/rejected": -981.0075073242188, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6386640667915344, "rewards/margins": 8.775684356689453, "rewards/rejected": -9.414348602294922, "step": 81090 }, { "epoch": 0.97, "learning_rate": 1.2971644130948813e-08, "logits/chosen": -2.8750081062316895, "logits/rejected": -2.400564193725586, "logps/chosen": -95.50777435302734, "logps/rejected": -906.2178955078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5346312522888184, "rewards/margins": 8.145134925842285, "rewards/rejected": -8.679765701293945, "step": 81100 }, { "epoch": 0.97, "learning_rate": 1.2865584783195994e-08, "logits/chosen": -2.937736749649048, "logits/rejected": -2.189548969268799, "logps/chosen": -135.18115234375, "logps/rejected": -993.2698364257812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.712912917137146, "rewards/margins": 8.821381568908691, "rewards/rejected": -9.534295082092285, "step": 81110 }, { "epoch": 0.97, "learning_rate": 1.2759959682204503e-08, "logits/chosen": -2.9361300468444824, "logits/rejected": -2.207578420639038, "logps/chosen": -134.10317993164062, "logps/rejected": -1040.060302734375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8002769351005554, "rewards/margins": 9.188211441040039, "rewards/rejected": -9.988487243652344, "step": 81120 }, { "epoch": 0.97, "learning_rate": 1.2654768846415977e-08, "logits/chosen": -2.9082303047180176, "logits/rejected": -2.243337631225586, "logps/chosen": -104.66996002197266, "logps/rejected": -954.3201904296875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5391185283660889, "rewards/margins": 8.609219551086426, "rewards/rejected": -9.148338317871094, "step": 81130 }, { "epoch": 0.97, "learning_rate": 1.2550012294196556e-08, "logits/chosen": -2.85145902633667, "logits/rejected": -2.499910831451416, "logps/chosen": -97.05500030517578, "logps/rejected": -886.4891357421875, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.5681637525558472, "rewards/margins": 7.912909507751465, "rewards/rejected": -8.481074333190918, "step": 81140 }, { "epoch": 0.97, "learning_rate": 1.2445690043836334e-08, "logits/chosen": -2.9014644622802734, "logits/rejected": -2.352179765701294, "logps/chosen": -115.31672668457031, "logps/rejected": -966.7353515625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6310214400291443, "rewards/margins": 8.62610149383545, "rewards/rejected": -9.257122993469238, "step": 81150 }, { "epoch": 0.97, "learning_rate": 1.2341802113549627e-08, "logits/chosen": -2.92866849899292, "logits/rejected": -2.5444905757904053, "logps/chosen": -101.56132507324219, "logps/rejected": -847.6149291992188, "loss": 0.0807, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.58305823802948, "rewards/margins": 7.503645420074463, "rewards/rejected": -8.086702346801758, "step": 81160 }, { "epoch": 0.97, "learning_rate": 1.2238348521475263e-08, "logits/chosen": -2.873624324798584, "logits/rejected": -2.1595826148986816, "logps/chosen": -154.84811401367188, "logps/rejected": -1005.6535034179688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9967271685600281, "rewards/margins": 8.666069030761719, "rewards/rejected": -9.662796020507812, "step": 81170 }, { "epoch": 0.97, "learning_rate": 1.2135329285675735e-08, "logits/chosen": -2.880030393600464, "logits/rejected": -2.453670024871826, "logps/chosen": -85.00709533691406, "logps/rejected": -909.6486206054688, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.37606287002563477, "rewards/margins": 8.332536697387695, "rewards/rejected": -8.708600044250488, "step": 81180 }, { "epoch": 0.97, "learning_rate": 1.2032744424137766e-08, "logits/chosen": -2.9149677753448486, "logits/rejected": -2.4273674488067627, "logps/chosen": -105.06275939941406, "logps/rejected": -819.2630004882812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.547782838344574, "rewards/margins": 7.269654750823975, "rewards/rejected": -7.817437171936035, "step": 81190 }, { "epoch": 0.97, "learning_rate": 1.1930593954772862e-08, "logits/chosen": -2.8561511039733887, "logits/rejected": -2.356802463531494, "logps/chosen": -93.2853012084961, "logps/rejected": -879.2966918945312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4951603412628174, "rewards/margins": 7.9189934730529785, "rewards/rejected": -8.414154052734375, "step": 81200 }, { "epoch": 0.97, "learning_rate": 1.1828877895415924e-08, "logits/chosen": -2.8495559692382812, "logits/rejected": -2.2373218536376953, "logps/chosen": -143.2585906982422, "logps/rejected": -933.0712890625, "loss": 0.111, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8993932008743286, "rewards/margins": 8.023394584655762, "rewards/rejected": -8.9227876663208, "step": 81210 }, { "epoch": 0.97, "learning_rate": 1.172759626382608e-08, "logits/chosen": -2.8908498287200928, "logits/rejected": -2.106410503387451, "logps/chosen": -129.94639587402344, "logps/rejected": -1060.4483642578125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7847418785095215, "rewards/margins": 9.418021202087402, "rewards/rejected": -10.202764511108398, "step": 81220 }, { "epoch": 0.97, "learning_rate": 1.1626749077687238e-08, "logits/chosen": -2.9007790088653564, "logits/rejected": -2.4834463596343994, "logps/chosen": -93.9062728881836, "logps/rejected": -853.0521240234375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.521713376045227, "rewards/margins": 7.629547119140625, "rewards/rejected": -8.151261329650879, "step": 81230 }, { "epoch": 0.97, "learning_rate": 1.1526336354606703e-08, "logits/chosen": -2.8767871856689453, "logits/rejected": -2.333038806915283, "logps/chosen": -99.72972106933594, "logps/rejected": -938.0256958007812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5388530492782593, "rewards/margins": 8.447797775268555, "rewards/rejected": -8.986651420593262, "step": 81240 }, { "epoch": 0.97, "learning_rate": 1.1426358112116564e-08, "logits/chosen": -2.8709404468536377, "logits/rejected": -2.5737216472625732, "logps/chosen": -68.29386138916016, "logps/rejected": -811.0509033203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3184170126914978, "rewards/margins": 7.420198917388916, "rewards/rejected": -7.738615989685059, "step": 81250 }, { "epoch": 0.97, "learning_rate": 1.1326814367672579e-08, "logits/chosen": -2.9064197540283203, "logits/rejected": -2.536811351776123, "logps/chosen": -89.05330657958984, "logps/rejected": -756.246826171875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.470132976770401, "rewards/margins": 6.7170562744140625, "rewards/rejected": -7.18718957901001, "step": 81260 }, { "epoch": 0.97, "learning_rate": 1.1227705138654732e-08, "logits/chosen": -2.8918745517730713, "logits/rejected": -2.1943068504333496, "logps/chosen": -105.7523193359375, "logps/rejected": -982.2864990234375, "loss": 0.1612, "rewards/accuracies": 1.0, "rewards/chosen": -0.5635223984718323, "rewards/margins": 8.848958015441895, "rewards/rejected": -9.412480354309082, "step": 81270 }, { "epoch": 0.97, "learning_rate": 1.112903044236724e-08, "logits/chosen": -2.8512611389160156, "logits/rejected": -2.0957815647125244, "logps/chosen": -129.04782104492188, "logps/rejected": -1007.8250732421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7542259693145752, "rewards/margins": 8.930705070495605, "rewards/rejected": -9.684930801391602, "step": 81280 }, { "epoch": 0.97, "learning_rate": 1.103079029603854e-08, "logits/chosen": -2.878685712814331, "logits/rejected": -2.4518256187438965, "logps/chosen": -84.66865539550781, "logps/rejected": -886.0065307617188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.42359527945518494, "rewards/margins": 8.067840576171875, "rewards/rejected": -8.491436004638672, "step": 81290 }, { "epoch": 0.97, "learning_rate": 1.0932984716821027e-08, "logits/chosen": -2.8878259658813477, "logits/rejected": -2.3185782432556152, "logps/chosen": -116.6221923828125, "logps/rejected": -992.7791137695312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6519155502319336, "rewards/margins": 8.869461059570312, "rewards/rejected": -9.521376609802246, "step": 81300 }, { "epoch": 0.97, "learning_rate": 1.0835613721791593e-08, "logits/chosen": -2.8728415966033936, "logits/rejected": -2.36759614944458, "logps/chosen": -94.89955139160156, "logps/rejected": -892.6748046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5184081196784973, "rewards/margins": 8.027872085571289, "rewards/rejected": -8.546281814575195, "step": 81310 }, { "epoch": 0.97, "learning_rate": 1.0738677327950253e-08, "logits/chosen": -2.8768203258514404, "logits/rejected": -2.4298789501190186, "logps/chosen": -87.54074096679688, "logps/rejected": -895.5255737304688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4577558934688568, "rewards/margins": 8.1178617477417, "rewards/rejected": -8.575617790222168, "step": 81320 }, { "epoch": 0.97, "learning_rate": 1.0642175552222356e-08, "logits/chosen": -2.9027178287506104, "logits/rejected": -2.353502035140991, "logps/chosen": -99.23973083496094, "logps/rejected": -916.0281372070312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5132519602775574, "rewards/margins": 8.250991821289062, "rewards/rejected": -8.764243125915527, "step": 81330 }, { "epoch": 0.97, "learning_rate": 1.0546108411456645e-08, "logits/chosen": -2.877220869064331, "logits/rejected": -2.281564235687256, "logps/chosen": -106.5712890625, "logps/rejected": -978.3873901367188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5856693983078003, "rewards/margins": 8.794026374816895, "rewards/rejected": -9.379694938659668, "step": 81340 }, { "epoch": 0.97, "learning_rate": 1.0450475922426372e-08, "logits/chosen": -2.8612589836120605, "logits/rejected": -2.3903377056121826, "logps/chosen": -82.3445053100586, "logps/rejected": -847.9456176757812, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.3866731524467468, "rewards/margins": 7.722342014312744, "rewards/rejected": -8.109014511108398, "step": 81350 }, { "epoch": 0.97, "learning_rate": 1.0355278101828458e-08, "logits/chosen": -2.8599183559417725, "logits/rejected": -2.3616440296173096, "logps/chosen": -90.97085571289062, "logps/rejected": -921.8069458007812, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.4742838740348816, "rewards/margins": 8.352418899536133, "rewards/rejected": -8.826703071594238, "step": 81360 }, { "epoch": 0.97, "learning_rate": 1.026051496628433e-08, "logits/chosen": -2.8785629272460938, "logits/rejected": -2.5033059120178223, "logps/chosen": -79.33598327636719, "logps/rejected": -912.2095947265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.37862688302993774, "rewards/margins": 8.372467994689941, "rewards/rejected": -8.751094818115234, "step": 81370 }, { "epoch": 0.97, "learning_rate": 1.0166186532339362e-08, "logits/chosen": -2.861856460571289, "logits/rejected": -2.371506690979004, "logps/chosen": -112.07979583740234, "logps/rejected": -892.3753051757812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6614474058151245, "rewards/margins": 7.884240627288818, "rewards/rejected": -8.54568862915039, "step": 81380 }, { "epoch": 0.97, "learning_rate": 1.0072292816463159e-08, "logits/chosen": -2.8475403785705566, "logits/rejected": -2.3236570358276367, "logps/chosen": -98.49193572998047, "logps/rejected": -973.5979614257812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5340974926948547, "rewards/margins": 8.818853378295898, "rewards/rejected": -9.352949142456055, "step": 81390 }, { "epoch": 0.97, "learning_rate": 9.978833835048995e-09, "logits/chosen": -2.864985704421997, "logits/rejected": -2.4738242626190186, "logps/chosen": -79.47584533691406, "logps/rejected": -831.9876708984375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.40579062700271606, "rewards/margins": 7.537686824798584, "rewards/rejected": -7.943476676940918, "step": 81400 }, { "epoch": 0.97, "learning_rate": 9.885809604414654e-09, "logits/chosen": -2.9078359603881836, "logits/rejected": -2.269418478012085, "logps/chosen": -110.4411392211914, "logps/rejected": -994.498046875, "loss": 0.0865, "rewards/accuracies": 1.0, "rewards/chosen": -0.6341773271560669, "rewards/margins": 8.922142028808594, "rewards/rejected": -9.556318283081055, "step": 81410 }, { "epoch": 0.97, "learning_rate": 9.79322014080214e-09, "logits/chosen": -2.858553171157837, "logits/rejected": -2.3180227279663086, "logps/chosen": -102.36552429199219, "logps/rejected": -859.2188720703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.577616274356842, "rewards/margins": 7.628783226013184, "rewards/rejected": -8.206399917602539, "step": 81420 }, { "epoch": 0.97, "learning_rate": 9.701065460376857e-09, "logits/chosen": -2.902866840362549, "logits/rejected": -2.414966344833374, "logps/chosen": -105.09024810791016, "logps/rejected": -908.0978393554688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6219261884689331, "rewards/margins": 8.087224960327148, "rewards/rejected": -8.709150314331055, "step": 81430 }, { "epoch": 0.97, "learning_rate": 9.609345579229268e-09, "logits/chosen": -2.8199338912963867, "logits/rejected": -2.3597638607025146, "logps/chosen": -98.64885711669922, "logps/rejected": -941.4656982421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5499075055122375, "rewards/margins": 8.477924346923828, "rewards/rejected": -9.027831077575684, "step": 81440 }, { "epoch": 0.97, "learning_rate": 9.51806051337295e-09, "logits/chosen": -2.889814853668213, "logits/rejected": -2.3570942878723145, "logps/chosen": -104.16119384765625, "logps/rejected": -934.1404418945312, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.5356674790382385, "rewards/margins": 8.425776481628418, "rewards/rejected": -8.961444854736328, "step": 81450 }, { "epoch": 0.98, "learning_rate": 9.427210278746545e-09, "logits/chosen": -2.8835721015930176, "logits/rejected": -2.459245443344116, "logps/chosen": -85.45018005371094, "logps/rejected": -915.5735473632812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4242408275604248, "rewards/margins": 8.342893600463867, "rewards/rejected": -8.767134666442871, "step": 81460 }, { "epoch": 0.98, "learning_rate": 9.336794891211532e-09, "logits/chosen": -2.898343801498413, "logits/rejected": -2.3651256561279297, "logps/chosen": -106.22164154052734, "logps/rejected": -899.2108154296875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.634334921836853, "rewards/margins": 7.975241184234619, "rewards/rejected": -8.609576225280762, "step": 81470 }, { "epoch": 0.98, "learning_rate": 9.246814366555002e-09, "logits/chosen": -2.8891241550445557, "logits/rejected": -2.5674610137939453, "logps/chosen": -93.00907897949219, "logps/rejected": -780.3259887695312, "loss": 0.0806, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5410817265510559, "rewards/margins": 6.8956193923950195, "rewards/rejected": -7.436700344085693, "step": 81480 }, { "epoch": 0.98, "learning_rate": 9.15726872048689e-09, "logits/chosen": -2.8915469646453857, "logits/rejected": -2.6137092113494873, "logps/chosen": -94.4599838256836, "logps/rejected": -793.7838134765625, "loss": 0.1027, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5554496049880981, "rewards/margins": 6.999804496765137, "rewards/rejected": -7.555253505706787, "step": 81490 }, { "epoch": 0.98, "learning_rate": 9.068157968641634e-09, "logits/chosen": -2.914203405380249, "logits/rejected": -2.494960308074951, "logps/chosen": -88.94834899902344, "logps/rejected": -823.1922607421875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.44260019063949585, "rewards/margins": 7.409224033355713, "rewards/rejected": -7.851823329925537, "step": 81500 }, { "epoch": 0.98, "learning_rate": 8.979482126577621e-09, "logits/chosen": -2.8270606994628906, "logits/rejected": -2.2203171253204346, "logps/chosen": -113.40335845947266, "logps/rejected": -1047.4566650390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6419254541397095, "rewards/margins": 9.437551498413086, "rewards/rejected": -10.079476356506348, "step": 81510 }, { "epoch": 0.98, "learning_rate": 8.891241209777468e-09, "logits/chosen": -2.8626036643981934, "logits/rejected": -2.428449869155884, "logps/chosen": -84.52450561523438, "logps/rejected": -896.4890747070312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4347590506076813, "rewards/margins": 8.14552116394043, "rewards/rejected": -8.580280303955078, "step": 81520 }, { "epoch": 0.98, "learning_rate": 8.80343523364774e-09, "logits/chosen": -2.895043134689331, "logits/rejected": -2.389286518096924, "logps/chosen": -111.16097259521484, "logps/rejected": -1011.2550659179688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6433250308036804, "rewards/margins": 9.067222595214844, "rewards/rejected": -9.710548400878906, "step": 81530 }, { "epoch": 0.98, "learning_rate": 8.716064213519504e-09, "logits/chosen": -2.91510272026062, "logits/rejected": -2.288252115249634, "logps/chosen": -123.5668716430664, "logps/rejected": -1004.2346801757812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6480039358139038, "rewards/margins": 8.993207931518555, "rewards/rejected": -9.64121150970459, "step": 81540 }, { "epoch": 0.98, "learning_rate": 8.629128164647227e-09, "logits/chosen": -2.866577625274658, "logits/rejected": -2.115442991256714, "logps/chosen": -139.86102294921875, "logps/rejected": -1050.333740234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8483592867851257, "rewards/margins": 9.238256454467773, "rewards/rejected": -10.086614608764648, "step": 81550 }, { "epoch": 0.98, "learning_rate": 8.542627102209323e-09, "logits/chosen": -2.94852352142334, "logits/rejected": -2.399031162261963, "logps/chosen": -92.7716293334961, "logps/rejected": -974.6112060546875, "loss": 0.1482, "rewards/accuracies": 1.0, "rewards/chosen": -0.46399134397506714, "rewards/margins": 8.888315200805664, "rewards/rejected": -9.352306365966797, "step": 81560 }, { "epoch": 0.98, "learning_rate": 8.456561041309264e-09, "logits/chosen": -2.8799703121185303, "logits/rejected": -2.2497825622558594, "logps/chosen": -124.4178237915039, "logps/rejected": -914.8552856445312, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -0.7458555698394775, "rewards/margins": 8.011747360229492, "rewards/rejected": -8.757603645324707, "step": 81570 }, { "epoch": 0.98, "learning_rate": 8.370929996973642e-09, "logits/chosen": -2.857997179031372, "logits/rejected": -2.4302706718444824, "logps/chosen": -88.53472900390625, "logps/rejected": -895.0115356445312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.44421276450157166, "rewards/margins": 8.116331100463867, "rewards/rejected": -8.560543060302734, "step": 81580 }, { "epoch": 0.98, "learning_rate": 8.285733984153554e-09, "logits/chosen": -2.9358272552490234, "logits/rejected": -2.5671513080596924, "logps/chosen": -100.2641830444336, "logps/rejected": -950.3045043945312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5584529638290405, "rewards/margins": 8.554868698120117, "rewards/rejected": -9.113320350646973, "step": 81590 }, { "epoch": 0.98, "learning_rate": 8.200973017723767e-09, "logits/chosen": -2.9073686599731445, "logits/rejected": -2.4493088722229004, "logps/chosen": -112.08976745605469, "logps/rejected": -914.8858642578125, "loss": 0.0987, "rewards/accuracies": 1.0, "rewards/chosen": -0.6186189651489258, "rewards/margins": 8.157111167907715, "rewards/rejected": -8.775731086730957, "step": 81600 }, { "epoch": 0.98, "learning_rate": 8.116647112483278e-09, "logits/chosen": -2.8402440547943115, "logits/rejected": -2.366121768951416, "logps/chosen": -131.65469360351562, "logps/rejected": -947.1897583007812, "loss": 0.155, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8031134605407715, "rewards/margins": 8.287251472473145, "rewards/rejected": -9.090363502502441, "step": 81610 }, { "epoch": 0.98, "learning_rate": 8.032756283155307e-09, "logits/chosen": -2.9109740257263184, "logits/rejected": -2.4580657482147217, "logps/chosen": -94.16046905517578, "logps/rejected": -855.8734130859375, "loss": 0.0956, "rewards/accuracies": 1.0, "rewards/chosen": -0.47567057609558105, "rewards/margins": 7.677533149719238, "rewards/rejected": -8.153203010559082, "step": 81620 }, { "epoch": 0.98, "learning_rate": 7.949300544387028e-09, "logits/chosen": -2.9015920162200928, "logits/rejected": -2.271029233932495, "logps/chosen": -106.2795639038086, "logps/rejected": -1002.76513671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5641668438911438, "rewards/margins": 9.067560195922852, "rewards/rejected": -9.631728172302246, "step": 81630 }, { "epoch": 0.98, "learning_rate": 7.866279910749563e-09, "logits/chosen": -2.8821423053741455, "logits/rejected": -2.0963692665100098, "logps/chosen": -128.8809814453125, "logps/rejected": -1062.6199951171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7889412045478821, "rewards/margins": 9.429750442504883, "rewards/rejected": -10.21868896484375, "step": 81640 }, { "epoch": 0.98, "learning_rate": 7.783694396737984e-09, "logits/chosen": -2.8882975578308105, "logits/rejected": -2.2391180992126465, "logps/chosen": -115.34742736816406, "logps/rejected": -979.9498901367188, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.6789870262145996, "rewards/margins": 8.734465599060059, "rewards/rejected": -9.413453102111816, "step": 81650 }, { "epoch": 0.98, "learning_rate": 7.70154401677159e-09, "logits/chosen": -2.916119337081909, "logits/rejected": -2.2301411628723145, "logps/chosen": -121.59770202636719, "logps/rejected": -967.0076293945312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6533936262130737, "rewards/margins": 8.609031677246094, "rewards/rejected": -9.262426376342773, "step": 81660 }, { "epoch": 0.98, "learning_rate": 7.619828785193629e-09, "logits/chosen": -2.886758327484131, "logits/rejected": -2.1660380363464355, "logps/chosen": -112.159423828125, "logps/rejected": -1042.03076171875, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.6149859428405762, "rewards/margins": 9.416032791137695, "rewards/rejected": -10.03101921081543, "step": 81670 }, { "epoch": 0.98, "learning_rate": 7.538548716271022e-09, "logits/chosen": -2.865297794342041, "logits/rejected": -2.2901346683502197, "logps/chosen": -122.71795654296875, "logps/rejected": -949.5062255859375, "loss": 0.0956, "rewards/accuracies": 1.0, "rewards/chosen": -0.720670759677887, "rewards/margins": 8.395517349243164, "rewards/rejected": -9.116189002990723, "step": 81680 }, { "epoch": 0.98, "learning_rate": 7.457703824195473e-09, "logits/chosen": -2.8764538764953613, "logits/rejected": -2.2189078330993652, "logps/chosen": -118.90354919433594, "logps/rejected": -1024.4974365234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6591792106628418, "rewards/margins": 9.197943687438965, "rewards/rejected": -9.857123374938965, "step": 81690 }, { "epoch": 0.98, "learning_rate": 7.3772941230820795e-09, "logits/chosen": -2.9520320892333984, "logits/rejected": -2.6238417625427246, "logps/chosen": -119.3448715209961, "logps/rejected": -847.5611572265625, "loss": 0.1407, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7456451654434204, "rewards/margins": 7.35312032699585, "rewards/rejected": -8.09876537322998, "step": 81700 }, { "epoch": 0.98, "learning_rate": 7.297319626970168e-09, "logits/chosen": -2.8504395484924316, "logits/rejected": -2.3289523124694824, "logps/chosen": -104.6157455444336, "logps/rejected": -843.4183349609375, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.5829662084579468, "rewards/margins": 7.4909348487854, "rewards/rejected": -8.073901176452637, "step": 81710 }, { "epoch": 0.98, "learning_rate": 7.2177803498230114e-09, "logits/chosen": -2.828430652618408, "logits/rejected": -2.096190929412842, "logps/chosen": -135.66232299804688, "logps/rejected": -1056.850830078125, "loss": 0.0911, "rewards/accuracies": 1.0, "rewards/chosen": -0.7926823496818542, "rewards/margins": 9.373023986816406, "rewards/rejected": -10.165704727172852, "step": 81720 }, { "epoch": 0.98, "learning_rate": 7.138676305527836e-09, "logits/chosen": -2.8991456031799316, "logits/rejected": -2.2365188598632812, "logps/chosen": -130.49559020996094, "logps/rejected": -979.72802734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7877848744392395, "rewards/margins": 8.619054794311523, "rewards/rejected": -9.406839370727539, "step": 81730 }, { "epoch": 0.98, "learning_rate": 7.060007507896371e-09, "logits/chosen": -2.8822383880615234, "logits/rejected": -2.092167377471924, "logps/chosen": -134.635986328125, "logps/rejected": -1098.576416015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8600686192512512, "rewards/margins": 9.716938018798828, "rewards/rejected": -10.577006340026855, "step": 81740 }, { "epoch": 0.98, "learning_rate": 6.981773970663741e-09, "logits/chosen": -2.89866042137146, "logits/rejected": -2.3066463470458984, "logps/chosen": -107.3642349243164, "logps/rejected": -889.5244140625, "loss": 0.1345, "rewards/accuracies": 1.0, "rewards/chosen": -0.6099386215209961, "rewards/margins": 7.896646976470947, "rewards/rejected": -8.506586074829102, "step": 81750 }, { "epoch": 0.98, "learning_rate": 6.9039757074892964e-09, "logits/chosen": -2.8598244190216064, "logits/rejected": -2.34122633934021, "logps/chosen": -118.10929107666016, "logps/rejected": -897.0, "loss": 0.0307, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7223321199417114, "rewards/margins": 7.855672359466553, "rewards/rejected": -8.578004837036133, "step": 81760 }, { "epoch": 0.98, "learning_rate": 6.826612731956339e-09, "logits/chosen": -2.9102587699890137, "logits/rejected": -2.684844970703125, "logps/chosen": -107.62870788574219, "logps/rejected": -717.861572265625, "loss": 0.1481, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7185945510864258, "rewards/margins": 6.1010847091674805, "rewards/rejected": -6.819679260253906, "step": 81770 }, { "epoch": 0.98, "learning_rate": 6.749685057572397e-09, "logits/chosen": -2.8988163471221924, "logits/rejected": -2.2027883529663086, "logps/chosen": -107.1438980102539, "logps/rejected": -977.6982421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5679789781570435, "rewards/margins": 8.814443588256836, "rewards/rejected": -9.382423400878906, "step": 81780 }, { "epoch": 0.98, "learning_rate": 6.673192697768671e-09, "logits/chosen": -2.9197745323181152, "logits/rejected": -2.191889762878418, "logps/chosen": -118.0968246459961, "logps/rejected": -989.46142578125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.6659923791885376, "rewards/margins": 8.844228744506836, "rewards/rejected": -9.510220527648926, "step": 81790 }, { "epoch": 0.98, "learning_rate": 6.597135665900589e-09, "logits/chosen": -2.8700194358825684, "logits/rejected": -2.1364967823028564, "logps/chosen": -135.57467651367188, "logps/rejected": -1081.42822265625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7635923624038696, "rewards/margins": 9.637483596801758, "rewards/rejected": -10.40107536315918, "step": 81800 }, { "epoch": 0.98, "learning_rate": 6.521513975247529e-09, "logits/chosen": -2.8465323448181152, "logits/rejected": -2.113647222518921, "logps/chosen": -138.06924438476562, "logps/rejected": -1085.078125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7982884645462036, "rewards/margins": 9.644372940063477, "rewards/rejected": -10.442660331726074, "step": 81810 }, { "epoch": 0.98, "learning_rate": 6.446327639012817e-09, "logits/chosen": -2.890106439590454, "logits/rejected": -2.413573741912842, "logps/chosen": -76.95671081542969, "logps/rejected": -794.4427490234375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3579068183898926, "rewards/margins": 7.205499172210693, "rewards/rejected": -7.563406467437744, "step": 81820 }, { "epoch": 0.98, "learning_rate": 6.3715766703237315e-09, "logits/chosen": -2.930537700653076, "logits/rejected": -2.1977686882019043, "logps/chosen": -118.42594146728516, "logps/rejected": -1002.6746826171875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6503585577011108, "rewards/margins": 8.986371994018555, "rewards/rejected": -9.636730194091797, "step": 81830 }, { "epoch": 0.98, "learning_rate": 6.2972610822314985e-09, "logits/chosen": -2.901623487472534, "logits/rejected": -2.4199540615081787, "logps/chosen": -108.73783874511719, "logps/rejected": -952.4027099609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5882531404495239, "rewards/margins": 8.546753883361816, "rewards/rejected": -9.13500690460205, "step": 81840 }, { "epoch": 0.98, "learning_rate": 6.223380887711572e-09, "logits/chosen": -2.8560612201690674, "logits/rejected": -2.3318569660186768, "logps/chosen": -94.3981704711914, "logps/rejected": -901.2097778320312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5135117769241333, "rewards/margins": 8.131882667541504, "rewards/rejected": -8.645395278930664, "step": 81850 }, { "epoch": 0.98, "learning_rate": 6.1499360996633565e-09, "logits/chosen": -2.91977858543396, "logits/rejected": -2.0207269191741943, "logps/chosen": -135.8677978515625, "logps/rejected": -1104.03076171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8203972578048706, "rewards/margins": 9.798197746276855, "rewards/rejected": -10.6185941696167, "step": 81860 }, { "epoch": 0.98, "learning_rate": 6.0769267309096494e-09, "logits/chosen": -2.933960437774658, "logits/rejected": -2.4474236965179443, "logps/chosen": -111.98995208740234, "logps/rejected": -1008.26123046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6721990704536438, "rewards/margins": 9.019603729248047, "rewards/rejected": -9.691802978515625, "step": 81870 }, { "epoch": 0.98, "learning_rate": 6.0043527941980314e-09, "logits/chosen": -2.864966869354248, "logits/rejected": -2.4439074993133545, "logps/chosen": -96.29402923583984, "logps/rejected": -887.86865234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5294519662857056, "rewards/margins": 7.971275329589844, "rewards/rejected": -8.500726699829102, "step": 81880 }, { "epoch": 0.98, "learning_rate": 5.932214302199757e-09, "logits/chosen": -2.8757381439208984, "logits/rejected": -2.2174699306488037, "logps/chosen": -119.07659912109375, "logps/rejected": -1045.39306640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.69267737865448, "rewards/margins": 9.360661506652832, "rewards/rejected": -10.053337097167969, "step": 81890 }, { "epoch": 0.98, "learning_rate": 5.86051126750975e-09, "logits/chosen": -2.920053005218506, "logits/rejected": -2.531078815460205, "logps/chosen": -85.09373474121094, "logps/rejected": -900.21435546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4186830520629883, "rewards/margins": 8.19529914855957, "rewards/rejected": -8.613981246948242, "step": 81900 }, { "epoch": 0.98, "learning_rate": 5.78924370264744e-09, "logits/chosen": -2.905538558959961, "logits/rejected": -2.3798022270202637, "logps/chosen": -121.7985610961914, "logps/rejected": -868.72509765625, "loss": 0.0984, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7375177145004272, "rewards/margins": 7.568568229675293, "rewards/rejected": -8.306085586547852, "step": 81910 }, { "epoch": 0.98, "learning_rate": 5.718411620055652e-09, "logits/chosen": -2.8979923725128174, "logits/rejected": -2.592984437942505, "logps/chosen": -79.9234390258789, "logps/rejected": -853.3624877929688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.36936232447624207, "rewards/margins": 7.778868675231934, "rewards/rejected": -8.148232460021973, "step": 81920 }, { "epoch": 0.98, "learning_rate": 5.648015032101717e-09, "logits/chosen": -2.916893720626831, "logits/rejected": -2.5542187690734863, "logps/chosen": -81.04186248779297, "logps/rejected": -821.5291748046875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.41293033957481384, "rewards/margins": 7.430184364318848, "rewards/rejected": -7.843115329742432, "step": 81930 }, { "epoch": 0.98, "learning_rate": 5.578053951076634e-09, "logits/chosen": -2.8847293853759766, "logits/rejected": -2.3848161697387695, "logps/chosen": -99.76249694824219, "logps/rejected": -862.7786865234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5497299432754517, "rewards/margins": 7.69125509262085, "rewards/rejected": -8.240984916687012, "step": 81940 }, { "epoch": 0.98, "learning_rate": 5.508528389195355e-09, "logits/chosen": -2.8596746921539307, "logits/rejected": -2.45180082321167, "logps/chosen": -126.71626281738281, "logps/rejected": -782.9636840820312, "loss": 0.2068, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8450890779495239, "rewards/margins": 6.605849266052246, "rewards/rejected": -7.4509382247924805, "step": 81950 }, { "epoch": 0.98, "learning_rate": 5.439438358596782e-09, "logits/chosen": -2.873521089553833, "logits/rejected": -2.3451056480407715, "logps/chosen": -96.98060607910156, "logps/rejected": -864.4759521484375, "loss": 0.1004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5188750624656677, "rewards/margins": 7.7398810386657715, "rewards/rejected": -8.258756637573242, "step": 81960 }, { "epoch": 0.98, "learning_rate": 5.370783871344043e-09, "logits/chosen": -2.858060359954834, "logits/rejected": -2.1204357147216797, "logps/chosen": -134.2464599609375, "logps/rejected": -903.6194458007812, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": -0.8040646314620972, "rewards/margins": 7.8440704345703125, "rewards/rejected": -8.6481351852417, "step": 81970 }, { "epoch": 0.98, "learning_rate": 5.30256493942366e-09, "logits/chosen": -2.8906407356262207, "logits/rejected": -2.4330554008483887, "logps/chosen": -87.60740661621094, "logps/rejected": -884.2975463867188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.458691269159317, "rewards/margins": 8.009316444396973, "rewards/rejected": -8.468008041381836, "step": 81980 }, { "epoch": 0.98, "learning_rate": 5.234781574746939e-09, "logits/chosen": -2.885289430618286, "logits/rejected": -2.3756940364837646, "logps/chosen": -91.26537322998047, "logps/rejected": -902.6701049804688, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.4922068119049072, "rewards/margins": 8.16648006439209, "rewards/rejected": -8.658686637878418, "step": 81990 }, { "epoch": 0.98, "learning_rate": 5.167433789148579e-09, "logits/chosen": -2.8720085620880127, "logits/rejected": -2.5177853107452393, "logps/chosen": -135.21905517578125, "logps/rejected": -875.1085205078125, "loss": 0.1492, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8997453451156616, "rewards/margins": 7.4613823890686035, "rewards/rejected": -8.361127853393555, "step": 82000 }, { "epoch": 0.98, "learning_rate": 5.1005215943869535e-09, "logits/chosen": -2.8917887210845947, "logits/rejected": -2.341567039489746, "logps/chosen": -104.6871337890625, "logps/rejected": -996.8009643554688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.566062867641449, "rewards/margins": 9.019416809082031, "rewards/rejected": -9.585479736328125, "step": 82010 }, { "epoch": 0.98, "learning_rate": 5.034045002144938e-09, "logits/chosen": -2.844907283782959, "logits/rejected": -2.405547618865967, "logps/chosen": -109.15431213378906, "logps/rejected": -924.6854248046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6800175905227661, "rewards/margins": 8.176783561706543, "rewards/rejected": -8.856801986694336, "step": 82020 }, { "epoch": 0.98, "learning_rate": 4.968004024029083e-09, "logits/chosen": -2.8986828327178955, "logits/rejected": -2.18125581741333, "logps/chosen": -127.35323333740234, "logps/rejected": -933.0787963867188, "loss": 0.0741, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7357385158538818, "rewards/margins": 8.202648162841797, "rewards/rejected": -8.938386917114258, "step": 82030 }, { "epoch": 0.98, "learning_rate": 4.9023986715704405e-09, "logits/chosen": -2.851506471633911, "logits/rejected": -2.3635318279266357, "logps/chosen": -98.78038787841797, "logps/rejected": -919.3424072265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5016714334487915, "rewards/margins": 8.293844223022461, "rewards/rejected": -8.795515060424805, "step": 82040 }, { "epoch": 0.98, "learning_rate": 4.837228956222906e-09, "logits/chosen": -2.863424777984619, "logits/rejected": -2.134935140609741, "logps/chosen": -127.03802490234375, "logps/rejected": -904.4519653320312, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.7423629760742188, "rewards/margins": 7.908509254455566, "rewards/rejected": -8.650872230529785, "step": 82050 }, { "epoch": 0.98, "learning_rate": 4.772494889365153e-09, "logits/chosen": -2.885377883911133, "logits/rejected": -2.4473395347595215, "logps/chosen": -81.93672943115234, "logps/rejected": -882.8082275390625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.40046700835227966, "rewards/margins": 8.054051399230957, "rewards/rejected": -8.45451831817627, "step": 82060 }, { "epoch": 0.98, "learning_rate": 4.708196482299809e-09, "logits/chosen": -2.8891782760620117, "logits/rejected": -2.1904072761535645, "logps/chosen": -117.68804931640625, "logps/rejected": -869.7262573242188, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.6308343410491943, "rewards/margins": 7.677803039550781, "rewards/rejected": -8.308637619018555, "step": 82070 }, { "epoch": 0.98, "learning_rate": 4.644333746252894e-09, "logits/chosen": -2.9205214977264404, "logits/rejected": -2.3149166107177734, "logps/chosen": -108.4891586303711, "logps/rejected": -972.80322265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6504790186882019, "rewards/margins": 8.680429458618164, "rewards/rejected": -9.330907821655273, "step": 82080 }, { "epoch": 0.98, "learning_rate": 4.5809066923746536e-09, "logits/chosen": -2.8769121170043945, "logits/rejected": -2.435574769973755, "logps/chosen": -115.78556060791016, "logps/rejected": -793.5466918945312, "loss": 0.141, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6897880434989929, "rewards/margins": 6.875291347503662, "rewards/rejected": -7.565079689025879, "step": 82090 }, { "epoch": 0.98, "learning_rate": 4.517915331739286e-09, "logits/chosen": -2.902339458465576, "logits/rejected": -2.2245712280273438, "logps/chosen": -108.53901672363281, "logps/rejected": -1002.8780517578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5946251749992371, "rewards/margins": 9.041425704956055, "rewards/rejected": -9.636051177978516, "step": 82100 }, { "epoch": 0.98, "learning_rate": 4.4553596753452165e-09, "logits/chosen": -2.9261341094970703, "logits/rejected": -2.4193673133850098, "logps/chosen": -87.31465148925781, "logps/rejected": -863.8931884765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.44929060339927673, "rewards/margins": 7.794406890869141, "rewards/rejected": -8.243697166442871, "step": 82110 }, { "epoch": 0.98, "learning_rate": 4.393239734114263e-09, "logits/chosen": -2.8535892963409424, "logits/rejected": -2.3402833938598633, "logps/chosen": -109.9458999633789, "logps/rejected": -845.1932373046875, "loss": 0.1536, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6938897371292114, "rewards/margins": 7.380925178527832, "rewards/rejected": -8.074813842773438, "step": 82120 }, { "epoch": 0.98, "learning_rate": 4.331555518892194e-09, "logits/chosen": -2.8648922443389893, "logits/rejected": -2.1932239532470703, "logps/chosen": -113.9332046508789, "logps/rejected": -860.6033325195312, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6463114023208618, "rewards/margins": 7.5720696449279785, "rewards/rejected": -8.21838092803955, "step": 82130 }, { "epoch": 0.98, "learning_rate": 4.270307040449284e-09, "logits/chosen": -2.8942911624908447, "logits/rejected": -2.358464479446411, "logps/chosen": -118.34742736816406, "logps/rejected": -878.1224365234375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7081519365310669, "rewards/margins": 7.672576904296875, "rewards/rejected": -8.380727767944336, "step": 82140 }, { "epoch": 0.98, "learning_rate": 4.2094943094792005e-09, "logits/chosen": -2.879408359527588, "logits/rejected": -2.4183671474456787, "logps/chosen": -99.55410766601562, "logps/rejected": -848.3936767578125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5245335698127747, "rewards/margins": 7.583338737487793, "rewards/rejected": -8.107872009277344, "step": 82150 }, { "epoch": 0.98, "learning_rate": 4.149117336599562e-09, "logits/chosen": -2.873065710067749, "logits/rejected": -2.3858959674835205, "logps/chosen": -75.0503158569336, "logps/rejected": -843.1287231445312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.35868722200393677, "rewards/margins": 7.687723636627197, "rewards/rejected": -8.046411514282227, "step": 82160 }, { "epoch": 0.98, "learning_rate": 4.089176132352213e-09, "logits/chosen": -2.8588316440582275, "logits/rejected": -2.4674060344696045, "logps/chosen": -88.24417114257812, "logps/rejected": -855.2847900390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4709417223930359, "rewards/margins": 7.699085235595703, "rewards/rejected": -8.170026779174805, "step": 82170 }, { "epoch": 0.98, "learning_rate": 4.0296707072023935e-09, "logits/chosen": -2.9225287437438965, "logits/rejected": -2.233341693878174, "logps/chosen": -131.58633422851562, "logps/rejected": -941.29150390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7938497066497803, "rewards/margins": 8.232205390930176, "rewards/rejected": -9.026054382324219, "step": 82180 }, { "epoch": 0.98, "learning_rate": 3.9706010715401254e-09, "logits/chosen": -2.8863189220428467, "logits/rejected": -2.4733548164367676, "logps/chosen": -75.96595764160156, "logps/rejected": -792.7689819335938, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.35771483182907104, "rewards/margins": 7.207575798034668, "rewards/rejected": -7.5652899742126465, "step": 82190 }, { "epoch": 0.98, "learning_rate": 3.911967235678271e-09, "logits/chosen": -2.8591575622558594, "logits/rejected": -2.3384945392608643, "logps/chosen": -148.52761840820312, "logps/rejected": -895.8177490234375, "loss": 0.1819, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9849305152893066, "rewards/margins": 7.588674068450928, "rewards/rejected": -8.57360553741455, "step": 82200 }, { "epoch": 0.98, "learning_rate": 3.853769209854197e-09, "logits/chosen": -2.870312213897705, "logits/rejected": -2.198995351791382, "logps/chosen": -136.6923065185547, "logps/rejected": -1093.1708984375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8428829312324524, "rewards/margins": 9.674848556518555, "rewards/rejected": -10.517732620239258, "step": 82210 }, { "epoch": 0.98, "learning_rate": 3.796007004229496e-09, "logits/chosen": -2.8945698738098145, "logits/rejected": -2.1745545864105225, "logps/chosen": -128.423583984375, "logps/rejected": -1039.1468505859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6747127175331116, "rewards/margins": 9.292012214660645, "rewards/rejected": -9.966724395751953, "step": 82220 }, { "epoch": 0.98, "learning_rate": 3.7386806288891575e-09, "logits/chosen": -2.8597779273986816, "logits/rejected": -2.3644094467163086, "logps/chosen": -101.00647735595703, "logps/rejected": -916.7327270507812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5728787183761597, "rewards/margins": 8.201696395874023, "rewards/rejected": -8.774574279785156, "step": 82230 }, { "epoch": 0.98, "learning_rate": 3.6817900938418417e-09, "logits/chosen": -2.8485164642333984, "logits/rejected": -2.1150407791137695, "logps/chosen": -145.43026733398438, "logps/rejected": -1034.000732421875, "loss": 0.0974, "rewards/accuracies": 1.0, "rewards/chosen": -0.9288977384567261, "rewards/margins": 9.003410339355469, "rewards/rejected": -9.9323091506958, "step": 82240 }, { "epoch": 0.98, "learning_rate": 3.6253354090209915e-09, "logits/chosen": -2.908862829208374, "logits/rejected": -2.436511516571045, "logps/chosen": -87.61278533935547, "logps/rejected": -944.5107421875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.44632309675216675, "rewards/margins": 8.627344131469727, "rewards/rejected": -9.073667526245117, "step": 82250 }, { "epoch": 0.98, "learning_rate": 3.5693165842831668e-09, "logits/chosen": -2.86379337310791, "logits/rejected": -2.5622920989990234, "logps/chosen": -65.26533508300781, "logps/rejected": -866.2981567382812, "loss": 0.1464, "rewards/accuracies": 1.0, "rewards/chosen": -0.2793058454990387, "rewards/margins": 8.007499694824219, "rewards/rejected": -8.286805152893066, "step": 82260 }, { "epoch": 0.98, "learning_rate": 3.5137336294088776e-09, "logits/chosen": -2.847527027130127, "logits/rejected": -2.1515281200408936, "logps/chosen": -142.59027099609375, "logps/rejected": -1002.8362426757812, "loss": 0.072, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8839357495307922, "rewards/margins": 8.726762771606445, "rewards/rejected": -9.610698699951172, "step": 82270 }, { "epoch": 0.98, "learning_rate": 3.4585865541031383e-09, "logits/chosen": -2.889219284057617, "logits/rejected": -1.9567285776138306, "logps/chosen": -144.639892578125, "logps/rejected": -1147.488037109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8277467489242554, "rewards/margins": 10.232894897460938, "rewards/rejected": -11.06064224243164, "step": 82280 }, { "epoch": 0.99, "learning_rate": 3.4038753679943582e-09, "logits/chosen": -2.8830111026763916, "logits/rejected": -2.3572826385498047, "logps/chosen": -100.78337097167969, "logps/rejected": -927.4645385742188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5520293116569519, "rewards/margins": 8.333823204040527, "rewards/rejected": -8.885851860046387, "step": 82290 }, { "epoch": 0.99, "learning_rate": 3.349600080634896e-09, "logits/chosen": -2.9165375232696533, "logits/rejected": -2.4289498329162598, "logps/chosen": -93.56263732910156, "logps/rejected": -828.8330078125, "loss": 0.2416, "rewards/accuracies": 1.0, "rewards/chosen": -0.4466800093650818, "rewards/margins": 7.467458248138428, "rewards/rejected": -7.9141387939453125, "step": 82300 }, { "epoch": 0.99, "learning_rate": 3.295760701501338e-09, "logits/chosen": -2.8822741508483887, "logits/rejected": -2.1657557487487793, "logps/chosen": -134.47122192382812, "logps/rejected": -1083.07666015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7937294244766235, "rewards/margins": 9.622953414916992, "rewards/rejected": -10.416682243347168, "step": 82310 }, { "epoch": 0.99, "learning_rate": 3.242357239993388e-09, "logits/chosen": -2.879750967025757, "logits/rejected": -2.0744214057922363, "logps/chosen": -124.44486236572266, "logps/rejected": -1103.4981689453125, "loss": 0.1352, "rewards/accuracies": 1.0, "rewards/chosen": -0.695334255695343, "rewards/margins": 9.940582275390625, "rewards/rejected": -10.63591480255127, "step": 82320 }, { "epoch": 0.99, "learning_rate": 3.1893897054355306e-09, "logits/chosen": -2.8885817527770996, "logits/rejected": -2.2132644653320312, "logps/chosen": -139.9477996826172, "logps/rejected": -1043.1971435546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8524128794670105, "rewards/margins": 9.168235778808594, "rewards/rejected": -10.020648002624512, "step": 82330 }, { "epoch": 0.99, "learning_rate": 3.1368581070756464e-09, "logits/chosen": -2.906557321548462, "logits/rejected": -2.582230567932129, "logps/chosen": -75.30948638916016, "logps/rejected": -870.8405151367188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.396183580160141, "rewards/margins": 7.9315361976623535, "rewards/rejected": -8.327719688415527, "step": 82340 }, { "epoch": 0.99, "learning_rate": 3.0847624540855658e-09, "logits/chosen": -2.861067533493042, "logits/rejected": -2.397775173187256, "logps/chosen": -106.87864685058594, "logps/rejected": -951.6134033203125, "loss": 0.1461, "rewards/accuracies": 1.0, "rewards/chosen": -0.6272919178009033, "rewards/margins": 8.493586540222168, "rewards/rejected": -9.120878219604492, "step": 82350 }, { "epoch": 0.99, "learning_rate": 3.033102755561068e-09, "logits/chosen": -2.860861301422119, "logits/rejected": -2.3931756019592285, "logps/chosen": -106.6261215209961, "logps/rejected": -885.9968872070312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6450824737548828, "rewards/margins": 7.820802211761475, "rewards/rejected": -8.4658842086792, "step": 82360 }, { "epoch": 0.99, "learning_rate": 2.981879020521883e-09, "logits/chosen": -2.8658714294433594, "logits/rejected": -2.0118637084960938, "logps/chosen": -132.3253631591797, "logps/rejected": -1075.040283203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8158256411552429, "rewards/margins": 9.51527214050293, "rewards/rejected": -10.331098556518555, "step": 82370 }, { "epoch": 0.99, "learning_rate": 2.9310912579114114e-09, "logits/chosen": -2.903769016265869, "logits/rejected": -2.4806718826293945, "logps/chosen": -98.7734603881836, "logps/rejected": -888.9386596679688, "loss": 0.1384, "rewards/accuracies": 1.0, "rewards/chosen": -0.5274950265884399, "rewards/margins": 7.975869655609131, "rewards/rejected": -8.503364562988281, "step": 82380 }, { "epoch": 0.99, "learning_rate": 2.8807394765970053e-09, "logits/chosen": -2.876880407333374, "logits/rejected": -2.3154451847076416, "logps/chosen": -115.606689453125, "logps/rejected": -913.4090576171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6729649305343628, "rewards/margins": 8.0786771774292, "rewards/rejected": -8.751642227172852, "step": 82390 }, { "epoch": 0.99, "learning_rate": 2.8308236853702433e-09, "logits/chosen": -2.826777935028076, "logits/rejected": -2.149141788482666, "logps/chosen": -115.509033203125, "logps/rejected": -922.3311767578125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6971348524093628, "rewards/margins": 8.124009132385254, "rewards/rejected": -8.82114315032959, "step": 82400 }, { "epoch": 0.99, "learning_rate": 2.78134389294582e-09, "logits/chosen": -2.9003262519836426, "logits/rejected": -2.366276741027832, "logps/chosen": -117.88516998291016, "logps/rejected": -836.61279296875, "loss": 0.0283, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7122265100479126, "rewards/margins": 7.263688087463379, "rewards/rejected": -7.975914001464844, "step": 82410 }, { "epoch": 0.99, "learning_rate": 2.732300107963215e-09, "logits/chosen": -2.8475565910339355, "logits/rejected": -2.14847731590271, "logps/chosen": -105.88868713378906, "logps/rejected": -966.2354736328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5782300233840942, "rewards/margins": 8.678628921508789, "rewards/rejected": -9.256858825683594, "step": 82420 }, { "epoch": 0.99, "learning_rate": 2.683692338985022e-09, "logits/chosen": -2.8856496810913086, "logits/rejected": -2.7054224014282227, "logps/chosen": -62.69976043701172, "logps/rejected": -758.1175537109375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.2730702757835388, "rewards/margins": 6.940981864929199, "rewards/rejected": -7.214051723480225, "step": 82430 }, { "epoch": 0.99, "learning_rate": 2.6355205944980646e-09, "logits/chosen": -2.909348964691162, "logits/rejected": -2.295527935028076, "logps/chosen": -93.74859619140625, "logps/rejected": -879.7491455078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5075174570083618, "rewards/margins": 7.913964748382568, "rewards/rejected": -8.421483039855957, "step": 82440 }, { "epoch": 0.99, "learning_rate": 2.587784882913391e-09, "logits/chosen": -2.8712730407714844, "logits/rejected": -2.2079739570617676, "logps/chosen": -131.90786743164062, "logps/rejected": -973.5641479492188, "loss": 0.1063, "rewards/accuracies": 1.0, "rewards/chosen": -0.7762260437011719, "rewards/margins": 8.554099082946777, "rewards/rejected": -9.330324172973633, "step": 82450 }, { "epoch": 0.99, "learning_rate": 2.5404852125648916e-09, "logits/chosen": -2.855463743209839, "logits/rejected": -2.197023868560791, "logps/chosen": -121.48319244384766, "logps/rejected": -1000.8504028320312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7097718119621277, "rewards/margins": 8.897436141967773, "rewards/rejected": -9.607209205627441, "step": 82460 }, { "epoch": 0.99, "learning_rate": 2.4936215917115146e-09, "logits/chosen": -2.8925538063049316, "logits/rejected": -2.60444974899292, "logps/chosen": -61.2681999206543, "logps/rejected": -789.1820068359375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.2509123384952545, "rewards/margins": 7.2794671058654785, "rewards/rejected": -7.530379295349121, "step": 82470 }, { "epoch": 0.99, "learning_rate": 2.447194028535049e-09, "logits/chosen": -2.8703951835632324, "logits/rejected": -1.9669986963272095, "logps/chosen": -150.9781951904297, "logps/rejected": -983.3556518554688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8865588307380676, "rewards/margins": 8.530903816223145, "rewards/rejected": -9.417463302612305, "step": 82480 }, { "epoch": 0.99, "learning_rate": 2.401202531142066e-09, "logits/chosen": -2.9004921913146973, "logits/rejected": -2.4804258346557617, "logps/chosen": -82.1052474975586, "logps/rejected": -892.0523681640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.42070022225379944, "rewards/margins": 8.117606163024902, "rewards/rejected": -8.53830623626709, "step": 82490 }, { "epoch": 0.99, "learning_rate": 2.3556471075622532e-09, "logits/chosen": -2.848975658416748, "logits/rejected": -2.3582425117492676, "logps/chosen": -99.30123901367188, "logps/rejected": -881.8640747070312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4962059557437897, "rewards/margins": 7.939614772796631, "rewards/rejected": -8.435819625854492, "step": 82500 }, { "epoch": 0.99, "learning_rate": 2.3105277657495263e-09, "logits/chosen": -2.8460030555725098, "logits/rejected": -2.1938745975494385, "logps/chosen": -131.4679718017578, "logps/rejected": -956.9954833984375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.77082759141922, "rewards/margins": 8.397217750549316, "rewards/rejected": -9.168045043945312, "step": 82510 }, { "epoch": 0.99, "learning_rate": 2.2658445135817497e-09, "logits/chosen": -2.876478433609009, "logits/rejected": -2.0685362815856934, "logps/chosen": -132.93258666992188, "logps/rejected": -986.1409301757812, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7563624382019043, "rewards/margins": 8.700180053710938, "rewards/rejected": -9.456543922424316, "step": 82520 }, { "epoch": 0.99, "learning_rate": 2.2215973588601837e-09, "logits/chosen": -2.8766372203826904, "logits/rejected": -2.3713760375976562, "logps/chosen": -107.97886657714844, "logps/rejected": -809.3292846679688, "loss": 0.0922, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6153691411018372, "rewards/margins": 7.086320400238037, "rewards/rejected": -7.701689720153809, "step": 82530 }, { "epoch": 0.99, "learning_rate": 2.177786309310592e-09, "logits/chosen": -2.839649200439453, "logits/rejected": -2.2862839698791504, "logps/chosen": -130.78402709960938, "logps/rejected": -993.1207275390625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8409018516540527, "rewards/margins": 8.685212135314941, "rewards/rejected": -9.526113510131836, "step": 82540 }, { "epoch": 0.99, "learning_rate": 2.1344113725818573e-09, "logits/chosen": -2.8563194274902344, "logits/rejected": -2.3129630088806152, "logps/chosen": -100.51444244384766, "logps/rejected": -928.38037109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5325231552124023, "rewards/margins": 8.347681999206543, "rewards/rejected": -8.880206108093262, "step": 82550 }, { "epoch": 0.99, "learning_rate": 2.0914725562476422e-09, "logits/chosen": -2.928757905960083, "logits/rejected": -2.5709190368652344, "logps/chosen": -100.19181060791016, "logps/rejected": -864.5333251953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5732071399688721, "rewards/margins": 7.689328193664551, "rewards/rejected": -8.262535095214844, "step": 82560 }, { "epoch": 0.99, "learning_rate": 2.048969867804451e-09, "logits/chosen": -2.912684202194214, "logits/rejected": -2.4946446418762207, "logps/chosen": -103.55503845214844, "logps/rejected": -930.4805908203125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5887395739555359, "rewards/margins": 8.336126327514648, "rewards/rejected": -8.92486572265625, "step": 82570 }, { "epoch": 0.99, "learning_rate": 2.0069033146735695e-09, "logits/chosen": -2.8965985774993896, "logits/rejected": -2.4606544971466064, "logps/chosen": -99.74308013916016, "logps/rejected": -935.3685302734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5003165006637573, "rewards/margins": 8.467828750610352, "rewards/rejected": -8.968145370483398, "step": 82580 }, { "epoch": 0.99, "learning_rate": 1.9652729041994002e-09, "logits/chosen": -2.8646979331970215, "logits/rejected": -2.215029716491699, "logps/chosen": -147.28848266601562, "logps/rejected": -1055.2847900390625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9330056309700012, "rewards/margins": 9.220221519470215, "rewards/rejected": -10.153227806091309, "step": 82590 }, { "epoch": 0.99, "learning_rate": 1.924078643650573e-09, "logits/chosen": -2.8891444206237793, "logits/rejected": -2.157243251800537, "logps/chosen": -135.00819396972656, "logps/rejected": -1024.0743408203125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7698008418083191, "rewards/margins": 9.073759078979492, "rewards/rejected": -9.843560218811035, "step": 82600 }, { "epoch": 0.99, "learning_rate": 1.883320540219391e-09, "logits/chosen": -2.9027533531188965, "logits/rejected": -2.520369052886963, "logps/chosen": -105.87834167480469, "logps/rejected": -860.7119140625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6295424103736877, "rewards/margins": 7.5909247398376465, "rewards/rejected": -8.220467567443848, "step": 82610 }, { "epoch": 0.99, "learning_rate": 1.8429986010223834e-09, "logits/chosen": -2.9145195484161377, "logits/rejected": -2.557577610015869, "logps/chosen": -81.90902709960938, "logps/rejected": -890.1904296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.43803349137306213, "rewards/margins": 8.090758323669434, "rewards/rejected": -8.528791427612305, "step": 82620 }, { "epoch": 0.99, "learning_rate": 1.803112833099474e-09, "logits/chosen": -2.9068703651428223, "logits/rejected": -2.186474323272705, "logps/chosen": -117.27313232421875, "logps/rejected": -894.4544067382812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7113191485404968, "rewards/margins": 7.848387241363525, "rewards/rejected": -8.55970573425293, "step": 82630 }, { "epoch": 0.99, "learning_rate": 1.7636632434145373e-09, "logits/chosen": -2.861046075820923, "logits/rejected": -2.501141309738159, "logps/chosen": -82.30436706542969, "logps/rejected": -890.6251220703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3902765214443207, "rewards/margins": 8.133283615112305, "rewards/rejected": -8.5235595703125, "step": 82640 }, { "epoch": 0.99, "learning_rate": 1.7246498388553966e-09, "logits/chosen": -2.8944599628448486, "logits/rejected": -2.527902126312256, "logps/chosen": -93.43355560302734, "logps/rejected": -801.9992065429688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4715103209018707, "rewards/margins": 7.178351402282715, "rewards/rejected": -7.649862766265869, "step": 82650 }, { "epoch": 0.99, "learning_rate": 1.6860726262338257e-09, "logits/chosen": -2.905433416366577, "logits/rejected": -2.4168803691864014, "logps/chosen": -94.21307373046875, "logps/rejected": -927.7122802734375, "loss": 0.1551, "rewards/accuracies": 1.0, "rewards/chosen": -0.4542859196662903, "rewards/margins": 8.432182312011719, "rewards/rejected": -8.886468887329102, "step": 82660 }, { "epoch": 0.99, "learning_rate": 1.6479316122852695e-09, "logits/chosen": -2.886186122894287, "logits/rejected": -2.502488374710083, "logps/chosen": -97.89802551269531, "logps/rejected": -842.9232177734375, "loss": 0.1511, "rewards/accuracies": 1.0, "rewards/chosen": -0.5672707557678223, "rewards/margins": 7.464814186096191, "rewards/rejected": -8.032084465026855, "step": 82670 }, { "epoch": 0.99, "learning_rate": 1.6102268036688462e-09, "logits/chosen": -2.853347063064575, "logits/rejected": -2.110396146774292, "logps/chosen": -125.6161117553711, "logps/rejected": -1062.2811279296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7129459381103516, "rewards/margins": 9.504656791687012, "rewards/rejected": -10.21760368347168, "step": 82680 }, { "epoch": 0.99, "learning_rate": 1.5729582069679006e-09, "logits/chosen": -2.8919289112091064, "logits/rejected": -2.3539021015167236, "logps/chosen": -128.99468994140625, "logps/rejected": -898.5266723632812, "loss": 0.1239, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8413770794868469, "rewards/margins": 7.761909484863281, "rewards/rejected": -8.60328483581543, "step": 82690 }, { "epoch": 0.99, "learning_rate": 1.5361258286894498e-09, "logits/chosen": -2.861757516860962, "logits/rejected": -2.2905285358428955, "logps/chosen": -105.294189453125, "logps/rejected": -973.6900634765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5977043509483337, "rewards/margins": 8.744437217712402, "rewards/rejected": -9.342142105102539, "step": 82700 }, { "epoch": 0.99, "learning_rate": 1.4997296752641831e-09, "logits/chosen": -2.866577625274658, "logits/rejected": -2.460541009902954, "logps/chosen": -81.9642333984375, "logps/rejected": -853.1296997070312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.41909533739089966, "rewards/margins": 7.7243218421936035, "rewards/rejected": -8.143416404724121, "step": 82710 }, { "epoch": 0.99, "learning_rate": 1.4637697530467398e-09, "logits/chosen": -2.8490381240844727, "logits/rejected": -2.2515320777893066, "logps/chosen": -120.25457763671875, "logps/rejected": -986.0849609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.687182605266571, "rewards/margins": 8.783308982849121, "rewards/rejected": -9.470492362976074, "step": 82720 }, { "epoch": 0.99, "learning_rate": 1.428246068315986e-09, "logits/chosen": -2.892246961593628, "logits/rejected": -2.341207981109619, "logps/chosen": -103.42500305175781, "logps/rejected": -941.0120849609375, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.5535879135131836, "rewards/margins": 8.478549003601074, "rewards/rejected": -9.032137870788574, "step": 82730 }, { "epoch": 0.99, "learning_rate": 1.3931586272736275e-09, "logits/chosen": -2.921327590942383, "logits/rejected": -2.541919231414795, "logps/chosen": -72.37120819091797, "logps/rejected": -842.6844482421875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.3026842474937439, "rewards/margins": 7.746913909912109, "rewards/rejected": -8.049598693847656, "step": 82740 }, { "epoch": 0.99, "learning_rate": 1.3585074360464302e-09, "logits/chosen": -2.8919262886047363, "logits/rejected": -2.4398586750030518, "logps/chosen": -86.9664306640625, "logps/rejected": -918.4542846679688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4100170135498047, "rewards/margins": 8.383166313171387, "rewards/rejected": -8.793184280395508, "step": 82750 }, { "epoch": 0.99, "learning_rate": 1.3242925006839991e-09, "logits/chosen": -2.822983980178833, "logits/rejected": -2.1696574687957764, "logps/chosen": -123.74491882324219, "logps/rejected": -1025.35400390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7460613250732422, "rewards/margins": 9.093101501464844, "rewards/rejected": -9.839162826538086, "step": 82760 }, { "epoch": 0.99, "learning_rate": 1.2905138271604445e-09, "logits/chosen": -2.846074104309082, "logits/rejected": -2.1673970222473145, "logps/chosen": -114.96238708496094, "logps/rejected": -980.1094970703125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6788445711135864, "rewards/margins": 8.718367576599121, "rewards/rejected": -9.397211074829102, "step": 82770 }, { "epoch": 0.99, "learning_rate": 1.257171421373271e-09, "logits/chosen": -2.905863046646118, "logits/rejected": -2.2250680923461914, "logps/chosen": -140.61569213867188, "logps/rejected": -1046.7965087890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8066850900650024, "rewards/margins": 9.259454727172852, "rewards/rejected": -10.066140174865723, "step": 82780 }, { "epoch": 0.99, "learning_rate": 1.2242652891439333e-09, "logits/chosen": -2.9213948249816895, "logits/rejected": -2.2271902561187744, "logps/chosen": -113.74495697021484, "logps/rejected": -1018.48828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6349298357963562, "rewards/margins": 9.147079467773438, "rewards/rejected": -9.78200912475586, "step": 82790 }, { "epoch": 0.99, "learning_rate": 1.1917954362178353e-09, "logits/chosen": -2.9159693717956543, "logits/rejected": -2.379981517791748, "logps/chosen": -112.08329772949219, "logps/rejected": -895.98681640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.627062201499939, "rewards/margins": 7.943594455718994, "rewards/rejected": -8.570657730102539, "step": 82800 }, { "epoch": 0.99, "learning_rate": 1.1597618682643308e-09, "logits/chosen": -2.909987211227417, "logits/rejected": -2.45904803276062, "logps/chosen": -103.7174072265625, "logps/rejected": -955.5404052734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5800825357437134, "rewards/margins": 8.58433723449707, "rewards/rejected": -9.164419174194336, "step": 82810 }, { "epoch": 0.99, "learning_rate": 1.128164590875891e-09, "logits/chosen": -2.8788490295410156, "logits/rejected": -2.1424670219421387, "logps/chosen": -119.01729583740234, "logps/rejected": -1005.8646240234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7122591733932495, "rewards/margins": 8.94788646697998, "rewards/rejected": -9.660146713256836, "step": 82820 }, { "epoch": 0.99, "learning_rate": 1.0970036095697689e-09, "logits/chosen": -2.8626089096069336, "logits/rejected": -2.469974994659424, "logps/chosen": -90.91877746582031, "logps/rejected": -883.7042236328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5010807514190674, "rewards/margins": 7.9525861740112305, "rewards/rejected": -8.453667640686035, "step": 82830 }, { "epoch": 0.99, "learning_rate": 1.0662789297863353e-09, "logits/chosen": -2.898834228515625, "logits/rejected": -2.3456802368164062, "logps/chosen": -93.5479736328125, "logps/rejected": -827.5036010742188, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.4531398415565491, "rewards/margins": 7.4534711837768555, "rewards/rejected": -7.906611442565918, "step": 82840 }, { "epoch": 0.99, "learning_rate": 1.0359905568901873e-09, "logits/chosen": -2.9135403633117676, "logits/rejected": -2.508688449859619, "logps/chosen": -82.7918472290039, "logps/rejected": -889.9168090820312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3838704228401184, "rewards/margins": 8.124040603637695, "rewards/rejected": -8.50791072845459, "step": 82850 }, { "epoch": 0.99, "learning_rate": 1.0061384961693176e-09, "logits/chosen": -2.9514400959014893, "logits/rejected": -2.3626601696014404, "logps/chosen": -107.01322174072266, "logps/rejected": -988.7374877929688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5887050032615662, "rewards/margins": 8.88741683959961, "rewards/rejected": -9.47612190246582, "step": 82860 }, { "epoch": 0.99, "learning_rate": 9.767227528362234e-10, "logits/chosen": -2.91176176071167, "logits/rejected": -2.1754701137542725, "logps/chosen": -115.31187438964844, "logps/rejected": -979.322265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6994444131851196, "rewards/margins": 8.681137084960938, "rewards/rejected": -9.38058090209961, "step": 82870 }, { "epoch": 0.99, "learning_rate": 9.477433320265183e-10, "logits/chosen": -2.914126396179199, "logits/rejected": -2.458636522293091, "logps/chosen": -103.82392883300781, "logps/rejected": -975.0105590820312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5793439149856567, "rewards/margins": 8.780448913574219, "rewards/rejected": -9.359792709350586, "step": 82880 }, { "epoch": 0.99, "learning_rate": 9.192002388000443e-10, "logits/chosen": -2.878164529800415, "logits/rejected": -2.409942865371704, "logps/chosen": -127.0948257446289, "logps/rejected": -810.2423706054688, "loss": 0.1567, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7919900417327881, "rewards/margins": 6.93878698348999, "rewards/rejected": -7.730776309967041, "step": 82890 }, { "epoch": 0.99, "learning_rate": 8.910934781400371e-10, "logits/chosen": -2.840559482574463, "logits/rejected": -2.1050798892974854, "logps/chosen": -99.92375946044922, "logps/rejected": -980.5762939453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5382863879203796, "rewards/margins": 8.863986015319824, "rewards/rejected": -9.402273178100586, "step": 82900 }, { "epoch": 0.99, "learning_rate": 8.634230549545153e-10, "logits/chosen": -2.8911526203155518, "logits/rejected": -2.4885668754577637, "logps/chosen": -79.65296173095703, "logps/rejected": -867.6868896484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4128569960594177, "rewards/margins": 7.883138179779053, "rewards/rejected": -8.295995712280273, "step": 82910 }, { "epoch": 0.99, "learning_rate": 8.361889740740592e-10, "logits/chosen": -2.9202475547790527, "logits/rejected": -2.2169442176818848, "logps/chosen": -116.32100677490234, "logps/rejected": -1066.336181640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6559396386146545, "rewards/margins": 9.597994804382324, "rewards/rejected": -10.253934860229492, "step": 82920 }, { "epoch": 0.99, "learning_rate": 8.093912402537541e-10, "logits/chosen": -2.921140193939209, "logits/rejected": -2.104538917541504, "logps/chosen": -116.2317123413086, "logps/rejected": -970.0906982421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6510971784591675, "rewards/margins": 8.650116920471191, "rewards/rejected": -9.301214218139648, "step": 82930 }, { "epoch": 0.99, "learning_rate": 7.830298581729123e-10, "logits/chosen": -2.8958706855773926, "logits/rejected": -2.209423065185547, "logps/chosen": -110.5473403930664, "logps/rejected": -1026.799072265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6011340022087097, "rewards/margins": 9.265867233276367, "rewards/rejected": -9.8670015335083, "step": 82940 }, { "epoch": 0.99, "learning_rate": 7.571048324334085e-10, "logits/chosen": -2.8106093406677246, "logits/rejected": -2.01713228225708, "logps/chosen": -141.4168701171875, "logps/rejected": -992.7589721679688, "loss": 0.1789, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9001796841621399, "rewards/margins": 8.634273529052734, "rewards/rejected": -9.534453392028809, "step": 82950 }, { "epoch": 0.99, "learning_rate": 7.316161675624545e-10, "logits/chosen": -2.911754608154297, "logits/rejected": -2.5492606163024902, "logps/chosen": -76.41966247558594, "logps/rejected": -801.0389404296875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.34256112575531006, "rewards/margins": 7.29648494720459, "rewards/rejected": -7.639046669006348, "step": 82960 }, { "epoch": 0.99, "learning_rate": 7.065638680095466e-10, "logits/chosen": -2.9120876789093018, "logits/rejected": -2.6255006790161133, "logps/chosen": -69.91670227050781, "logps/rejected": -850.2359619140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3184775412082672, "rewards/margins": 7.809708595275879, "rewards/rejected": -8.12818717956543, "step": 82970 }, { "epoch": 0.99, "learning_rate": 6.819479381492411e-10, "logits/chosen": -2.8772387504577637, "logits/rejected": -2.1634743213653564, "logps/chosen": -115.2857894897461, "logps/rejected": -1050.4407958984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6055485606193542, "rewards/margins": 9.488469123840332, "rewards/rejected": -10.094017028808594, "step": 82980 }, { "epoch": 0.99, "learning_rate": 6.577683822792114e-10, "logits/chosen": -2.9314846992492676, "logits/rejected": -2.4614810943603516, "logps/chosen": -93.31605529785156, "logps/rejected": -874.4476318359375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.49394330382347107, "rewards/margins": 7.852682590484619, "rewards/rejected": -8.346624374389648, "step": 82990 }, { "epoch": 0.99, "learning_rate": 6.340252046210804e-10, "logits/chosen": -2.9013848304748535, "logits/rejected": -2.53090500831604, "logps/chosen": -97.5177993774414, "logps/rejected": -786.4139404296875, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.539337158203125, "rewards/margins": 6.930808067321777, "rewards/rejected": -7.470146179199219, "step": 83000 }, { "epoch": 0.99, "learning_rate": 6.107184093206986e-10, "logits/chosen": -2.903168201446533, "logits/rejected": -2.343863010406494, "logps/chosen": -97.99470520019531, "logps/rejected": -985.8435668945312, "loss": 0.135, "rewards/accuracies": 1.0, "rewards/chosen": -0.4523981511592865, "rewards/margins": 9.009593963623047, "rewards/rejected": -9.461994171142578, "step": 83010 }, { "epoch": 0.99, "learning_rate": 5.878480004470333e-10, "logits/chosen": -2.8927013874053955, "logits/rejected": -2.4192910194396973, "logps/chosen": -97.25872802734375, "logps/rejected": -919.2608642578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4894983172416687, "rewards/margins": 8.305347442626953, "rewards/rejected": -8.794845581054688, "step": 83020 }, { "epoch": 0.99, "learning_rate": 5.654139819932791e-10, "logits/chosen": -2.9032299518585205, "logits/rejected": -2.3972012996673584, "logps/chosen": -91.00544738769531, "logps/rejected": -841.0836791992188, "loss": 0.0848, "rewards/accuracies": 1.0, "rewards/chosen": -0.4510424733161926, "rewards/margins": 7.570717811584473, "rewards/rejected": -8.021759986877441, "step": 83030 }, { "epoch": 0.99, "learning_rate": 5.43416357876303e-10, "logits/chosen": -2.8323636054992676, "logits/rejected": -2.2706923484802246, "logps/chosen": -115.69002532958984, "logps/rejected": -879.3433837890625, "loss": 0.1404, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6685601472854614, "rewards/margins": 7.743152618408203, "rewards/rejected": -8.411712646484375, "step": 83040 }, { "epoch": 0.99, "learning_rate": 5.218551319369214e-10, "logits/chosen": -2.9159951210021973, "logits/rejected": -2.4223923683166504, "logps/chosen": -100.88253021240234, "logps/rejected": -936.6266479492188, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5248039960861206, "rewards/margins": 8.462952613830566, "rewards/rejected": -8.98775577545166, "step": 83050 }, { "epoch": 0.99, "learning_rate": 5.007303079396231e-10, "logits/chosen": -2.8836874961853027, "logits/rejected": -2.33186674118042, "logps/chosen": -117.9248275756836, "logps/rejected": -902.8350830078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6982932090759277, "rewards/margins": 7.965813636779785, "rewards/rejected": -8.664106369018555, "step": 83060 }, { "epoch": 0.99, "learning_rate": 4.800418895728465e-10, "logits/chosen": -2.8859338760375977, "logits/rejected": -2.410048484802246, "logps/chosen": -102.7608871459961, "logps/rejected": -991.2989501953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5577398538589478, "rewards/margins": 8.97224235534668, "rewards/rejected": -9.529982566833496, "step": 83070 }, { "epoch": 0.99, "learning_rate": 4.597898804487022e-10, "logits/chosen": -2.8504650592803955, "logits/rejected": -2.255276679992676, "logps/chosen": -113.6657485961914, "logps/rejected": -976.9923095703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5818172693252563, "rewards/margins": 8.779932975769043, "rewards/rejected": -9.361749649047852, "step": 83080 }, { "epoch": 0.99, "learning_rate": 4.39974284102973e-10, "logits/chosen": -2.863412380218506, "logits/rejected": -2.384880542755127, "logps/chosen": -90.44929504394531, "logps/rejected": -905.9783325195312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.45565709471702576, "rewards/margins": 8.217920303344727, "rewards/rejected": -8.673577308654785, "step": 83090 }, { "epoch": 0.99, "learning_rate": 4.205951039953915e-10, "logits/chosen": -2.8547556400299072, "logits/rejected": -2.434068202972412, "logps/chosen": -85.00597381591797, "logps/rejected": -906.0421142578125, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": -0.43393391370773315, "rewards/margins": 8.242082595825195, "rewards/rejected": -8.676016807556152, "step": 83100 }, { "epoch": 0.99, "learning_rate": 4.016523435096398e-10, "logits/chosen": -2.9053635597229004, "logits/rejected": -2.237818479537964, "logps/chosen": -116.36607360839844, "logps/rejected": -1024.673828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6569705605506897, "rewards/margins": 9.191160202026367, "rewards/rejected": -9.848130226135254, "step": 83110 }, { "epoch": 0.99, "learning_rate": 3.831460059530723e-10, "logits/chosen": -2.8773093223571777, "logits/rejected": -2.365884780883789, "logps/chosen": -99.0569839477539, "logps/rejected": -935.55615234375, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -0.5195714235305786, "rewards/margins": 8.452899932861328, "rewards/rejected": -8.972471237182617, "step": 83120 }, { "epoch": 1.0, "learning_rate": 3.650760945569931e-10, "logits/chosen": -2.9106202125549316, "logits/rejected": -2.437744379043579, "logps/chosen": -113.45234680175781, "logps/rejected": -903.5359497070312, "loss": 0.0864, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7067767381668091, "rewards/margins": 7.942749977111816, "rewards/rejected": -8.649526596069336, "step": 83130 }, { "epoch": 1.0, "learning_rate": 3.47442612476101e-10, "logits/chosen": -2.8781683444976807, "logits/rejected": -2.364675998687744, "logps/chosen": -103.70228576660156, "logps/rejected": -923.7969970703125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5955203771591187, "rewards/margins": 8.262046813964844, "rewards/rejected": -8.857566833496094, "step": 83140 }, { "epoch": 1.0, "learning_rate": 3.302455627890444e-10, "logits/chosen": -2.8960607051849365, "logits/rejected": -2.260733127593994, "logps/chosen": -98.99092102050781, "logps/rejected": -986.291015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4812200665473938, "rewards/margins": 8.995810508728027, "rewards/rejected": -9.477029800415039, "step": 83150 }, { "epoch": 1.0, "learning_rate": 3.1348494849869905e-10, "logits/chosen": -2.8586387634277344, "logits/rejected": -2.3855209350585938, "logps/chosen": -105.26192474365234, "logps/rejected": -843.0823974609375, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": -0.5818008184432983, "rewards/margins": 7.455986976623535, "rewards/rejected": -8.037787437438965, "step": 83160 }, { "epoch": 1.0, "learning_rate": 2.971607725313352e-10, "logits/chosen": -2.8853461742401123, "logits/rejected": -2.260122537612915, "logps/chosen": -102.74821472167969, "logps/rejected": -941.4560546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4900003969669342, "rewards/margins": 8.538415908813477, "rewards/rejected": -9.02841567993164, "step": 83170 }, { "epoch": 1.0, "learning_rate": 2.812730377371731e-10, "logits/chosen": -2.856607437133789, "logits/rejected": -2.315840482711792, "logps/chosen": -103.9743423461914, "logps/rejected": -1011.5886840820312, "loss": 0.0819, "rewards/accuracies": 1.0, "rewards/chosen": -0.5435409545898438, "rewards/margins": 9.16071891784668, "rewards/rejected": -9.70426082611084, "step": 83180 }, { "epoch": 1.0, "learning_rate": 2.658217468898272e-10, "logits/chosen": -2.8902344703674316, "logits/rejected": -2.4528543949127197, "logps/chosen": -78.63014221191406, "logps/rejected": -904.8843994140625, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.36800462007522583, "rewards/margins": 8.283018112182617, "rewards/rejected": -8.651023864746094, "step": 83190 }, { "epoch": 1.0, "learning_rate": 2.5080690268741714e-10, "logits/chosen": -2.851290464401245, "logits/rejected": -2.3802242279052734, "logps/chosen": -97.51617431640625, "logps/rejected": -831.5115966796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4843662679195404, "rewards/margins": 7.445712089538574, "rewards/rejected": -7.930078983306885, "step": 83200 }, { "epoch": 1.0, "learning_rate": 2.36228507751457e-10, "logits/chosen": -2.932776927947998, "logits/rejected": -2.3973958492279053, "logps/chosen": -106.5042953491211, "logps/rejected": -853.2822265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6082861423492432, "rewards/margins": 7.546866416931152, "rewards/rejected": -8.155153274536133, "step": 83210 }, { "epoch": 1.0, "learning_rate": 2.2208656462713307e-10, "logits/chosen": -2.8811910152435303, "logits/rejected": -2.3511927127838135, "logps/chosen": -127.93681335449219, "logps/rejected": -772.7227783203125, "loss": 0.209, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8508505821228027, "rewards/margins": 6.500341892242432, "rewards/rejected": -7.351192474365234, "step": 83220 }, { "epoch": 1.0, "learning_rate": 2.0838107578358136e-10, "logits/chosen": -2.868396759033203, "logits/rejected": -2.291100025177002, "logps/chosen": -113.45540618896484, "logps/rejected": -909.5535278320312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5998015999794006, "rewards/margins": 8.108137130737305, "rewards/rejected": -8.707939147949219, "step": 83230 }, { "epoch": 1.0, "learning_rate": 1.9511204361388758e-10, "logits/chosen": -2.8832926750183105, "logits/rejected": -2.1819779872894287, "logps/chosen": -115.0123291015625, "logps/rejected": -954.8626098632812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6185125708580017, "rewards/margins": 8.547235488891602, "rewards/rejected": -9.16574764251709, "step": 83240 }, { "epoch": 1.0, "learning_rate": 1.8227947043480964e-10, "logits/chosen": -2.908257246017456, "logits/rejected": -2.453920841217041, "logps/chosen": -88.61509704589844, "logps/rejected": -885.2498168945312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.44565311074256897, "rewards/margins": 8.020062446594238, "rewards/rejected": -8.465715408325195, "step": 83250 }, { "epoch": 1.0, "learning_rate": 1.6988335848677762e-10, "logits/chosen": -2.8707938194274902, "logits/rejected": -2.4549880027770996, "logps/chosen": -84.39739990234375, "logps/rejected": -869.6511840820312, "loss": 0.1227, "rewards/accuracies": 1.0, "rewards/chosen": -0.44346684217453003, "rewards/margins": 7.875851631164551, "rewards/rejected": -8.319318771362305, "step": 83260 }, { "epoch": 1.0, "learning_rate": 1.579237099338937e-10, "logits/chosen": -2.9315452575683594, "logits/rejected": -2.1752068996429443, "logps/chosen": -123.68392181396484, "logps/rejected": -1045.2674560546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7781901359558105, "rewards/margins": 9.263910293579102, "rewards/rejected": -10.04210090637207, "step": 83270 }, { "epoch": 1.0, "learning_rate": 1.4640052686476502e-10, "logits/chosen": -2.896519184112549, "logits/rejected": -2.402496099472046, "logps/chosen": -97.0119400024414, "logps/rejected": -944.4111328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.51221764087677, "rewards/margins": 8.545512199401855, "rewards/rejected": -9.057729721069336, "step": 83280 }, { "epoch": 1.0, "learning_rate": 1.353138112908381e-10, "logits/chosen": -2.864142894744873, "logits/rejected": -2.3268983364105225, "logps/chosen": -99.78334045410156, "logps/rejected": -920.9027099609375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5087956190109253, "rewards/margins": 8.310678482055664, "rewards/rejected": -8.819474220275879, "step": 83290 }, { "epoch": 1.0, "learning_rate": 1.246635651483419e-10, "logits/chosen": -2.8739166259765625, "logits/rejected": -2.4932971000671387, "logps/chosen": -129.97332763671875, "logps/rejected": -859.07763671875, "loss": 0.1831, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8023029565811157, "rewards/margins": 7.405725955963135, "rewards/rejected": -8.208027839660645, "step": 83300 }, { "epoch": 1.0, "learning_rate": 1.1444979029634485e-10, "logits/chosen": -2.9299888610839844, "logits/rejected": -2.4290709495544434, "logps/chosen": -78.2322998046875, "logps/rejected": -923.2444458007812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.37028974294662476, "rewards/margins": 8.485156059265137, "rewards/rejected": -8.855446815490723, "step": 83310 }, { "epoch": 1.0, "learning_rate": 1.0467248851814272e-10, "logits/chosen": -2.8120338916778564, "logits/rejected": -2.1558682918548584, "logps/chosen": -125.89445495605469, "logps/rejected": -941.9520263671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7083415985107422, "rewards/margins": 8.308059692382812, "rewards/rejected": -9.016401290893555, "step": 83320 }, { "epoch": 1.0, "learning_rate": 9.533166152098095e-11, "logits/chosen": -2.881221055984497, "logits/rejected": -2.4179112911224365, "logps/chosen": -93.23969268798828, "logps/rejected": -937.2491455078125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.48742371797561646, "rewards/margins": 8.503988265991211, "rewards/rejected": -8.991413116455078, "step": 83330 }, { "epoch": 1.0, "learning_rate": 8.642731093577717e-11, "logits/chosen": -2.874610662460327, "logits/rejected": -2.226656436920166, "logps/chosen": -104.3104019165039, "logps/rejected": -950.5020751953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5361546277999878, "rewards/margins": 8.583544731140137, "rewards/rejected": -9.11970043182373, "step": 83340 }, { "epoch": 1.0, "learning_rate": 7.795943831739872e-11, "logits/chosen": -2.880373954772949, "logits/rejected": -2.4754750728607178, "logps/chosen": -100.58067321777344, "logps/rejected": -806.6677856445312, "loss": 0.1108, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5661286115646362, "rewards/margins": 7.1226654052734375, "rewards/rejected": -7.688794136047363, "step": 83350 }, { "epoch": 1.0, "learning_rate": 6.992804514383e-11, "logits/chosen": -2.907050609588623, "logits/rejected": -2.3977367877960205, "logps/chosen": -83.57711791992188, "logps/rejected": -862.2890625, "loss": 0.1564, "rewards/accuracies": 1.0, "rewards/chosen": -0.3860594928264618, "rewards/margins": 7.857491970062256, "rewards/rejected": -8.243550300598145, "step": 83360 }, { "epoch": 1.0, "learning_rate": 6.233313281756026e-11, "logits/chosen": -2.867867946624756, "logits/rejected": -2.5491485595703125, "logps/chosen": -66.12623596191406, "logps/rejected": -741.3837890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.28156062960624695, "rewards/margins": 6.759153842926025, "rewards/rejected": -7.040713310241699, "step": 83370 }, { "epoch": 1.0, "learning_rate": 5.5174702664473335e-11, "logits/chosen": -2.8541417121887207, "logits/rejected": -2.3130881786346436, "logps/chosen": -158.82669067382812, "logps/rejected": -964.1256713867188, "loss": 0.0503, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0866644382476807, "rewards/margins": 8.149736404418945, "rewards/rejected": -9.236400604248047, "step": 83380 }, { "epoch": 1.0, "learning_rate": 4.845275593495791e-11, "logits/chosen": -2.880969524383545, "logits/rejected": -2.3126304149627686, "logps/chosen": -105.14796447753906, "logps/rejected": -1000.7341918945312, "loss": 0.2143, "rewards/accuracies": 1.0, "rewards/chosen": -0.5687073469161987, "rewards/margins": 9.056818008422852, "rewards/rejected": -9.625526428222656, "step": 83390 }, { "epoch": 1.0, "learning_rate": 4.21672938019646e-11, "logits/chosen": -2.8774938583374023, "logits/rejected": -2.225128650665283, "logps/chosen": -112.8341064453125, "logps/rejected": -1015.51171875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5614093542098999, "rewards/margins": 9.192459106445312, "rewards/rejected": -9.753868103027344, "step": 83400 }, { "epoch": 1.0, "learning_rate": 3.6318317363504e-11, "logits/chosen": -2.8753294944763184, "logits/rejected": -2.3072702884674072, "logps/chosen": -124.73795318603516, "logps/rejected": -1005.4281005859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7308323979377747, "rewards/margins": 8.912421226501465, "rewards/rejected": -9.643255233764648, "step": 83410 }, { "epoch": 1.0, "learning_rate": 3.09058276401486e-11, "logits/chosen": -2.9019222259521484, "logits/rejected": -2.3590450286865234, "logps/chosen": -115.0085678100586, "logps/rejected": -955.05615234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6489714980125427, "rewards/margins": 8.505485534667969, "rewards/rejected": -9.15445613861084, "step": 83420 }, { "epoch": 1.0, "learning_rate": 2.5929825577530877e-11, "logits/chosen": -2.8987159729003906, "logits/rejected": -2.4579198360443115, "logps/chosen": -106.73500061035156, "logps/rejected": -881.9306640625, "loss": 0.1038, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6440009474754333, "rewards/margins": 7.789483547210693, "rewards/rejected": -8.433484077453613, "step": 83430 }, { "epoch": 1.0, "learning_rate": 2.1390312044122786e-11, "logits/chosen": -2.922356367111206, "logits/rejected": -2.3477373123168945, "logps/chosen": -97.96113586425781, "logps/rejected": -910.0281372070312, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.5041831731796265, "rewards/margins": 8.223858833312988, "rewards/rejected": -8.728042602539062, "step": 83440 }, { "epoch": 1.0, "learning_rate": 1.7287287832623568e-11, "logits/chosen": -2.8645172119140625, "logits/rejected": -2.3069963455200195, "logps/chosen": -120.5834732055664, "logps/rejected": -954.46728515625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7162612676620483, "rewards/margins": 8.436420440673828, "rewards/rejected": -9.152681350708008, "step": 83450 }, { "epoch": 1.0, "learning_rate": 1.362075365940463e-11, "logits/chosen": -2.917692184448242, "logits/rejected": -2.476179599761963, "logps/chosen": -87.86246490478516, "logps/rejected": -831.36572265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4171516001224518, "rewards/margins": 7.517359256744385, "rewards/rejected": -7.934510707855225, "step": 83460 }, { "epoch": 1.0, "learning_rate": 1.0390710164509543e-11, "logits/chosen": -2.9238829612731934, "logits/rejected": -2.5896637439727783, "logps/chosen": -81.65977478027344, "logps/rejected": -859.0618286132812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.3855884075164795, "rewards/margins": 7.8317670822143555, "rewards/rejected": -8.217355728149414, "step": 83470 }, { "epoch": 1.0, "learning_rate": 7.597157911931608e-12, "logits/chosen": -2.8977532386779785, "logits/rejected": -2.4066901206970215, "logps/chosen": -112.44022369384766, "logps/rejected": -937.2418212890625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.640956699848175, "rewards/margins": 8.324621200561523, "rewards/rejected": -8.965578079223633, "step": 83480 }, { "epoch": 1.0, "learning_rate": 5.240097389613841e-12, "logits/chosen": -2.913175344467163, "logits/rejected": -2.4559457302093506, "logps/chosen": -98.59709167480469, "logps/rejected": -883.7835083007812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5443757772445679, "rewards/margins": 7.914473533630371, "rewards/rejected": -8.458847999572754, "step": 83490 }, { "epoch": 1.0, "learning_rate": 3.319529008616318e-12, "logits/chosen": -2.8431484699249268, "logits/rejected": -2.1134791374206543, "logps/chosen": -146.08639526367188, "logps/rejected": -918.7960815429688, "loss": 0.0975, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9466089010238647, "rewards/margins": 7.863497257232666, "rewards/rejected": -8.81010627746582, "step": 83500 }, { "epoch": 1.0, "learning_rate": 1.8354531047815038e-12, "logits/chosen": -2.8738200664520264, "logits/rejected": -2.313246965408325, "logps/chosen": -118.60623931884766, "logps/rejected": -877.6036987304688, "loss": 0.0883, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7198641300201416, "rewards/margins": 7.675858974456787, "rewards/rejected": -8.395723342895508, "step": 83510 }, { "epoch": 1.0, "learning_rate": 7.878699370689192e-13, "logits/chosen": -2.8894753456115723, "logits/rejected": -2.439892292022705, "logps/chosen": -81.86724853515625, "logps/rejected": -867.4264526367188, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.42033594846725464, "rewards/margins": 7.872345924377441, "rewards/rejected": -8.292681694030762, "step": 83520 }, { "epoch": 1.0, "learning_rate": 1.76779688387807e-13, "logits/chosen": -2.884983539581299, "logits/rejected": -2.320037364959717, "logps/chosen": -126.31254577636719, "logps/rejected": -918.9298095703125, "loss": 0.0873, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.785901665687561, "rewards/margins": 8.031806945800781, "rewards/rejected": -8.817708969116211, "step": 83530 }, { "epoch": 1.0, "step": 83539, "total_flos": 0.0, "train_loss": 0.05805659896943479, "train_runtime": 136192.1568, "train_samples_per_second": 2.454, "train_steps_per_second": 0.613 } ], "logging_steps": 10, "max_steps": 83539, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }