{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9997265418482124, "eval_steps": 100, "global_step": 10284, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.859086491739552e-10, "logits/chosen": -2.7380757331848145, "logits/rejected": -2.824676990509033, "logps/chosen": -110.21537780761719, "logps/rejected": -136.8306427001953, "loss": 1.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.859086491739553e-09, "logits/chosen": -2.7822985649108887, "logits/rejected": -2.781153917312622, "logps/chosen": -221.65379333496094, "logps/rejected": -202.54539489746094, "loss": 1.0001, "rewards/accuracies": 0.3576388955116272, "rewards/chosen": 0.0013104991521686316, "rewards/margins": 0.0014001904055476189, "rewards/rejected": -8.969102054834366e-05, "step": 10 }, { "epoch": 0.01, "learning_rate": 9.718172983479106e-09, "logits/chosen": -2.7663846015930176, "logits/rejected": -2.757770299911499, "logps/chosen": -199.00106811523438, "logps/rejected": -185.31143188476562, "loss": 1.0015, "rewards/accuracies": 0.5, "rewards/chosen": -0.001889596926048398, "rewards/margins": -0.00209548557177186, "rewards/rejected": 0.0002058891550404951, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.4577259475218657e-08, "logits/chosen": -2.8105146884918213, "logits/rejected": -2.7981715202331543, "logps/chosen": -208.05624389648438, "logps/rejected": -192.64044189453125, "loss": 1.0002, "rewards/accuracies": 0.47187501192092896, "rewards/chosen": -0.003374215215444565, "rewards/margins": -0.0016945224488154054, "rewards/rejected": -0.0016796926502138376, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.943634596695821e-08, "logits/chosen": -2.7664506435394287, "logits/rejected": -2.7862141132354736, "logps/chosen": -197.5291290283203, "logps/rejected": -188.8165740966797, "loss": 1.002, "rewards/accuracies": 0.4468750059604645, "rewards/chosen": -0.0014407314592972398, "rewards/margins": -0.00430614547803998, "rewards/rejected": 0.0028654143679887056, "step": 40 }, { "epoch": 0.01, "learning_rate": 2.4295432458697764e-08, "logits/chosen": -2.776463270187378, "logits/rejected": -2.7792251110076904, "logps/chosen": -201.38197326660156, "logps/rejected": -189.93276977539062, "loss": 0.999, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.0018952597165480256, "rewards/margins": 0.00021468903287313879, "rewards/rejected": 0.0016805704217404127, "step": 50 }, { "epoch": 0.02, "learning_rate": 2.9154518950437314e-08, "logits/chosen": -2.7934350967407227, "logits/rejected": -2.7988104820251465, "logps/chosen": -206.02767944335938, "logps/rejected": -189.4266357421875, "loss": 1.0014, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.0006429950590245426, "rewards/margins": -0.0019359359284862876, "rewards/rejected": 0.0012929405784234405, "step": 60 }, { "epoch": 0.02, "learning_rate": 3.4013605442176873e-08, "logits/chosen": -2.78006649017334, "logits/rejected": -2.7711257934570312, "logps/chosen": -200.0697784423828, "logps/rejected": -180.75758361816406, "loss": 0.9966, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -9.763417619979009e-05, "rewards/margins": 0.0023461231030523777, "rewards/rejected": -0.002443757839500904, "step": 70 }, { "epoch": 0.02, "learning_rate": 3.887269193391642e-08, "logits/chosen": -2.7850944995880127, "logits/rejected": -2.7837047576904297, "logps/chosen": -192.77597045898438, "logps/rejected": -182.43698120117188, "loss": 0.9967, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.003315262496471405, "rewards/margins": 0.005001432728022337, "rewards/rejected": -0.0016861699987202883, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.373177842565597e-08, "logits/chosen": -2.7849223613739014, "logits/rejected": -2.765354633331299, "logps/chosen": -200.2301788330078, "logps/rejected": -183.25489807128906, "loss": 0.9955, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": 0.002481408417224884, "rewards/margins": 0.0061470940709114075, "rewards/rejected": -0.0036656856536865234, "step": 90 }, { "epoch": 0.03, "learning_rate": 4.859086491739553e-08, "logits/chosen": -2.7850663661956787, "logits/rejected": -2.777864694595337, "logps/chosen": -211.11416625976562, "logps/rejected": -191.1077423095703, "loss": 0.9958, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.001670264988206327, "rewards/margins": 0.0035112425684928894, "rewards/rejected": -0.0018409776967018843, "step": 100 }, { "epoch": 0.03, "eval_logits/chosen": -2.6924479007720947, "eval_logits/rejected": -2.6876208782196045, "eval_logps/chosen": -195.60777282714844, "eval_logps/rejected": -180.92320251464844, "eval_loss": 1.0002543926239014, "eval_rewards/accuracies": 0.49303194880485535, "eval_rewards/chosen": -0.000245044706389308, "eval_rewards/margins": -5.4176409321371466e-05, "eval_rewards/rejected": -0.00019086812972091138, "eval_runtime": 444.5306, "eval_samples_per_second": 26.466, "eval_steps_per_second": 3.309, "step": 100 }, { "epoch": 0.03, "learning_rate": 5.344995140913508e-08, "logits/chosen": -2.7685182094573975, "logits/rejected": -2.7815494537353516, "logps/chosen": -203.09451293945312, "logps/rejected": -192.09835815429688, "loss": 1.0022, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.0015572088304907084, "rewards/margins": -0.0013193445047363639, "rewards/rejected": -0.00023786406381987035, "step": 110 }, { "epoch": 0.04, "learning_rate": 5.830903790087463e-08, "logits/chosen": -2.7377212047576904, "logits/rejected": -2.7259089946746826, "logps/chosen": -197.40304565429688, "logps/rejected": -177.4573211669922, "loss": 1.0007, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.0015263364184647799, "rewards/margins": 0.0014055164065212011, "rewards/rejected": -0.0029318523593246937, "step": 120 }, { "epoch": 0.04, "learning_rate": 6.316812439261418e-08, "logits/chosen": -2.781754970550537, "logits/rejected": -2.780665397644043, "logps/chosen": -206.015869140625, "logps/rejected": -194.5734405517578, "loss": 1.001, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.0026487892027944326, "rewards/margins": 0.001911659142933786, "rewards/rejected": 0.000737129885237664, "step": 130 }, { "epoch": 0.04, "learning_rate": 6.802721088435375e-08, "logits/chosen": -2.7300639152526855, "logits/rejected": -2.7538230419158936, "logps/chosen": -193.50584411621094, "logps/rejected": -173.8282012939453, "loss": 1.0005, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -0.0014676448190584779, "rewards/margins": -0.001340946415439248, "rewards/rejected": -0.0001266980543732643, "step": 140 }, { "epoch": 0.04, "learning_rate": 7.28862973760933e-08, "logits/chosen": -2.780795097351074, "logits/rejected": -2.7789599895477295, "logps/chosen": -186.2657012939453, "logps/rejected": -176.09500122070312, "loss": 0.9997, "rewards/accuracies": 0.46562498807907104, "rewards/chosen": -0.006670904811471701, "rewards/margins": -0.003007309976965189, "rewards/rejected": -0.0036635962314903736, "step": 150 }, { "epoch": 0.05, "learning_rate": 7.774538386783285e-08, "logits/chosen": -2.7663471698760986, "logits/rejected": -2.7770943641662598, "logps/chosen": -170.55154418945312, "logps/rejected": -173.17251586914062, "loss": 1.0013, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": -0.0022262814454734325, "rewards/margins": -0.002861147280782461, "rewards/rejected": 0.0006348658935166895, "step": 160 }, { "epoch": 0.05, "learning_rate": 8.26044703595724e-08, "logits/chosen": -2.7730824947357178, "logits/rejected": -2.769960403442383, "logps/chosen": -221.2248077392578, "logps/rejected": -201.74874877929688, "loss": 0.9999, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.002443634672090411, "rewards/margins": 0.0040279231034219265, "rewards/rejected": -0.0015842880820855498, "step": 170 }, { "epoch": 0.05, "learning_rate": 8.746355685131194e-08, "logits/chosen": -2.76259183883667, "logits/rejected": -2.7266428470611572, "logps/chosen": -207.81698608398438, "logps/rejected": -173.13430786132812, "loss": 1.0002, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": 0.002216937020421028, "rewards/margins": 0.001683462760411203, "rewards/rejected": 0.0005334746092557907, "step": 180 }, { "epoch": 0.06, "learning_rate": 9.23226433430515e-08, "logits/chosen": -2.7751212120056152, "logits/rejected": -2.7650701999664307, "logps/chosen": -186.60348510742188, "logps/rejected": -167.40306091308594, "loss": 0.9997, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": 0.000979422009550035, "rewards/margins": -0.0013022356433793902, "rewards/rejected": 0.0022816576529294252, "step": 190 }, { "epoch": 0.06, "learning_rate": 9.718172983479106e-08, "logits/chosen": -2.757312297821045, "logits/rejected": -2.7516403198242188, "logps/chosen": -202.6693572998047, "logps/rejected": -188.0704345703125, "loss": 0.9984, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": 0.0023301562760025263, "rewards/margins": 0.0031469545792788267, "rewards/rejected": -0.0008167977211996913, "step": 200 }, { "epoch": 0.06, "eval_logits/chosen": -2.6837515830993652, "eval_logits/rejected": -2.6787424087524414, "eval_logps/chosen": -195.6126708984375, "eval_logps/rejected": -180.9346923828125, "eval_loss": 0.9995196461677551, "eval_rewards/accuracies": 0.4988103210926056, "eval_rewards/chosen": -0.0007361570023931563, "eval_rewards/margins": 0.0006052808603271842, "eval_rewards/rejected": -0.0013414378045126796, "eval_runtime": 443.7511, "eval_samples_per_second": 26.513, "eval_steps_per_second": 3.315, "step": 200 }, { "epoch": 0.06, "learning_rate": 1.0204081632653061e-07, "logits/chosen": -2.759521007537842, "logits/rejected": -2.762321949005127, "logps/chosen": -192.76931762695312, "logps/rejected": -187.10482788085938, "loss": 0.999, "rewards/accuracies": 0.4468750059604645, "rewards/chosen": -0.0026286139618605375, "rewards/margins": -0.003410499542951584, "rewards/rejected": 0.0007818859303370118, "step": 210 }, { "epoch": 0.06, "learning_rate": 1.0689990281827016e-07, "logits/chosen": -2.746939182281494, "logits/rejected": -2.776545763015747, "logps/chosen": -178.4153289794922, "logps/rejected": -182.76333618164062, "loss": 1.0002, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.001649575075134635, "rewards/margins": 0.0006654064636677504, "rewards/rejected": -0.0023149813059717417, "step": 220 }, { "epoch": 0.07, "learning_rate": 1.117589893100097e-07, "logits/chosen": -2.767946720123291, "logits/rejected": -2.777435779571533, "logps/chosen": -185.25881958007812, "logps/rejected": -184.54998779296875, "loss": 1.0031, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0033680996857583523, "rewards/margins": -0.002976976800709963, "rewards/rejected": -0.00039112247759476304, "step": 230 }, { "epoch": 0.07, "learning_rate": 1.1661807580174926e-07, "logits/chosen": -2.761780023574829, "logits/rejected": -2.804439067840576, "logps/chosen": -192.47836303710938, "logps/rejected": -198.77731323242188, "loss": 0.997, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.0017596991965547204, "rewards/margins": -0.0007232691277749836, "rewards/rejected": -0.0010364304762333632, "step": 240 }, { "epoch": 0.07, "learning_rate": 1.2147716229348882e-07, "logits/chosen": -2.7784340381622314, "logits/rejected": -2.77835750579834, "logps/chosen": -192.2952423095703, "logps/rejected": -178.8677520751953, "loss": 0.9982, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.0018869973719120026, "rewards/margins": 0.001274367212317884, "rewards/rejected": -0.003161365631967783, "step": 250 }, { "epoch": 0.08, "learning_rate": 1.2633624878522837e-07, "logits/chosen": -2.7689738273620605, "logits/rejected": -2.777843952178955, "logps/chosen": -207.0860595703125, "logps/rejected": -197.57162475585938, "loss": 1.0012, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.00039492687210440636, "rewards/margins": 0.0005041843978688121, "rewards/rejected": -0.00010925752576440573, "step": 260 }, { "epoch": 0.08, "learning_rate": 1.3119533527696792e-07, "logits/chosen": -2.788908004760742, "logits/rejected": -2.7591609954833984, "logps/chosen": -226.4412841796875, "logps/rejected": -188.81068420410156, "loss": 0.9992, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.002002813620492816, "rewards/margins": 0.0019938002806156874, "rewards/rejected": -0.003996613435447216, "step": 270 }, { "epoch": 0.08, "learning_rate": 1.360544217687075e-07, "logits/chosen": -2.7908740043640137, "logits/rejected": -2.779235601425171, "logps/chosen": -202.7140655517578, "logps/rejected": -184.89212036132812, "loss": 0.9963, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.0004921169602312148, "rewards/margins": 0.0021473595406860113, "rewards/rejected": -0.002639476442709565, "step": 280 }, { "epoch": 0.08, "learning_rate": 1.4091350826044704e-07, "logits/chosen": -2.7945303916931152, "logits/rejected": -2.7873740196228027, "logps/chosen": -206.4544219970703, "logps/rejected": -197.7809295654297, "loss": 0.9997, "rewards/accuracies": 0.515625, "rewards/chosen": -0.00042502378346398473, "rewards/margins": -0.0009619802003726363, "rewards/rejected": 0.0005369562422856688, "step": 290 }, { "epoch": 0.09, "learning_rate": 1.457725947521866e-07, "logits/chosen": -2.7498443126678467, "logits/rejected": -2.750371217727661, "logps/chosen": -192.40003967285156, "logps/rejected": -173.42335510253906, "loss": 0.9982, "rewards/accuracies": 0.503125011920929, "rewards/chosen": 0.0006312422920018435, "rewards/margins": -3.3728498237906024e-05, "rewards/rejected": 0.0006649707793258131, "step": 300 }, { "epoch": 0.09, "eval_logits/chosen": -2.689706802368164, "eval_logits/rejected": -2.6848087310791016, "eval_logps/chosen": -195.61361694335938, "eval_logps/rejected": -180.93612670898438, "eval_loss": 0.9996600151062012, "eval_rewards/accuracies": 0.4983004629611969, "eval_rewards/chosen": -0.0008316952735185623, "eval_rewards/margins": 0.000652860093396157, "eval_rewards/rejected": -0.001484555541537702, "eval_runtime": 443.5992, "eval_samples_per_second": 26.522, "eval_steps_per_second": 3.316, "step": 300 }, { "epoch": 0.09, "learning_rate": 1.5063168124392614e-07, "logits/chosen": -2.750749111175537, "logits/rejected": -2.7541632652282715, "logps/chosen": -194.2585906982422, "logps/rejected": -173.24342346191406, "loss": 1.0014, "rewards/accuracies": 0.484375, "rewards/chosen": -0.004035498481243849, "rewards/margins": -0.003319731680676341, "rewards/rejected": -0.0007157664513215423, "step": 310 }, { "epoch": 0.09, "learning_rate": 1.554907677356657e-07, "logits/chosen": -2.7510886192321777, "logits/rejected": -2.7908897399902344, "logps/chosen": -163.4540557861328, "logps/rejected": -183.45425415039062, "loss": 0.9994, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": 0.0019172386964783072, "rewards/margins": 0.0012772420886904001, "rewards/rejected": 0.0006399970734491944, "step": 320 }, { "epoch": 0.1, "learning_rate": 1.6034985422740524e-07, "logits/chosen": -2.730457067489624, "logits/rejected": -2.754711627960205, "logps/chosen": -173.8527069091797, "logps/rejected": -189.8472900390625, "loss": 0.9978, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.0029088188894093037, "rewards/margins": 0.0033104638569056988, "rewards/rejected": -0.006219283677637577, "step": 330 }, { "epoch": 0.1, "learning_rate": 1.652089407191448e-07, "logits/chosen": -2.76922607421875, "logits/rejected": -2.7422194480895996, "logps/chosen": -194.23680114746094, "logps/rejected": -160.4568328857422, "loss": 1.0014, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.0046670883893966675, "rewards/margins": -0.00291821570135653, "rewards/rejected": -0.0017488717567175627, "step": 340 }, { "epoch": 0.1, "learning_rate": 1.7006802721088434e-07, "logits/chosen": -2.771911382675171, "logits/rejected": -2.798151969909668, "logps/chosen": -181.0185546875, "logps/rejected": -178.7841339111328, "loss": 1.0032, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.0018037393456324935, "rewards/margins": -0.001680979155935347, "rewards/rejected": -0.00012276046618353575, "step": 350 }, { "epoch": 0.11, "learning_rate": 1.749271137026239e-07, "logits/chosen": -2.812619209289551, "logits/rejected": -2.79461669921875, "logps/chosen": -223.05697631835938, "logps/rejected": -200.67198181152344, "loss": 1.0019, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.004876718390733004, "rewards/margins": -0.0017235095147043467, "rewards/rejected": -0.0031532091088593006, "step": 360 }, { "epoch": 0.11, "learning_rate": 1.7978620019436344e-07, "logits/chosen": -2.7639615535736084, "logits/rejected": -2.761749029159546, "logps/chosen": -191.48397827148438, "logps/rejected": -174.09100341796875, "loss": 0.9953, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.0004648994654417038, "rewards/margins": 0.003035474568605423, "rewards/rejected": -0.003500374499708414, "step": 370 }, { "epoch": 0.11, "learning_rate": 1.84645286686103e-07, "logits/chosen": -2.7765164375305176, "logits/rejected": -2.7844157218933105, "logps/chosen": -183.25009155273438, "logps/rejected": -182.92747497558594, "loss": 1.0008, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.0028205744456499815, "rewards/margins": -0.003933761268854141, "rewards/rejected": 0.0011131864739581943, "step": 380 }, { "epoch": 0.11, "learning_rate": 1.8950437317784256e-07, "logits/chosen": -2.7915711402893066, "logits/rejected": -2.7761950492858887, "logps/chosen": -198.72650146484375, "logps/rejected": -186.8263702392578, "loss": 0.9976, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -0.0008961642161011696, "rewards/margins": 0.0019898409955203533, "rewards/rejected": -0.002886005211621523, "step": 390 }, { "epoch": 0.12, "learning_rate": 1.9436345966958211e-07, "logits/chosen": -2.768798351287842, "logits/rejected": -2.805954933166504, "logps/chosen": -187.5673065185547, "logps/rejected": -199.96156311035156, "loss": 0.9966, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.003487077308818698, "rewards/margins": 0.0017222666647285223, "rewards/rejected": -0.0052093444392085075, "step": 400 }, { "epoch": 0.12, "eval_logits/chosen": -2.6913726329803467, "eval_logits/rejected": -2.6865181922912598, "eval_logps/chosen": -195.6290740966797, "eval_logps/rejected": -180.9485321044922, "eval_loss": 0.9998844265937805, "eval_rewards/accuracies": 0.4994901418685913, "eval_rewards/chosen": -0.0023770283441990614, "eval_rewards/margins": 0.0003477816062513739, "eval_rewards/rejected": -0.0027248100377619267, "eval_runtime": 443.4885, "eval_samples_per_second": 26.528, "eval_steps_per_second": 3.317, "step": 400 }, { "epoch": 0.12, "learning_rate": 1.9922254616132166e-07, "logits/chosen": -2.7575571537017822, "logits/rejected": -2.7523703575134277, "logps/chosen": -186.25596618652344, "logps/rejected": -171.8108673095703, "loss": 1.0021, "rewards/accuracies": 0.4593749940395355, "rewards/chosen": -0.0011599508579820395, "rewards/margins": 0.0005078490357846022, "rewards/rejected": -0.0016678001265972853, "step": 410 }, { "epoch": 0.12, "learning_rate": 2.0408163265306121e-07, "logits/chosen": -2.7704014778137207, "logits/rejected": -2.745540142059326, "logps/chosen": -188.27548217773438, "logps/rejected": -167.63754272460938, "loss": 0.9996, "rewards/accuracies": 0.5, "rewards/chosen": -0.005081222392618656, "rewards/margins": -0.0013192463666200638, "rewards/rejected": -0.0037619750946760178, "step": 420 }, { "epoch": 0.13, "learning_rate": 2.0894071914480076e-07, "logits/chosen": -2.7747130393981934, "logits/rejected": -2.773439407348633, "logps/chosen": -201.019775390625, "logps/rejected": -184.5673065185547, "loss": 0.9988, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.004013075493276119, "rewards/margins": 0.0002744763041846454, "rewards/rejected": -0.004287551622837782, "step": 430 }, { "epoch": 0.13, "learning_rate": 2.137998056365403e-07, "logits/chosen": -2.776340961456299, "logits/rejected": -2.7776546478271484, "logps/chosen": -194.1025390625, "logps/rejected": -183.6971893310547, "loss": 0.9964, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.0036060716956853867, "rewards/margins": -0.00018001366697717458, "rewards/rejected": -0.0034260577522218227, "step": 440 }, { "epoch": 0.13, "learning_rate": 2.1865889212827986e-07, "logits/chosen": -2.745941638946533, "logits/rejected": -2.7574284076690674, "logps/chosen": -183.247802734375, "logps/rejected": -176.6983184814453, "loss": 0.9968, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.0049474723637104034, "rewards/margins": -0.001293553039431572, "rewards/rejected": -0.0036539186257869005, "step": 450 }, { "epoch": 0.13, "learning_rate": 2.235179786200194e-07, "logits/chosen": -2.769151210784912, "logits/rejected": -2.7719428539276123, "logps/chosen": -178.2666015625, "logps/rejected": -168.8743438720703, "loss": 0.9979, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.0005593408131971955, "rewards/margins": 0.002134414855390787, "rewards/rejected": -0.002693755552172661, "step": 460 }, { "epoch": 0.14, "learning_rate": 2.2837706511175896e-07, "logits/chosen": -2.783510684967041, "logits/rejected": -2.7758355140686035, "logps/chosen": -219.9918975830078, "logps/rejected": -187.12355041503906, "loss": 1.0, "rewards/accuracies": 0.484375, "rewards/chosen": -0.0005246883956715465, "rewards/margins": 0.002583832014352083, "rewards/rejected": -0.0031085205264389515, "step": 470 }, { "epoch": 0.14, "learning_rate": 2.332361516034985e-07, "logits/chosen": -2.7807018756866455, "logits/rejected": -2.769526481628418, "logps/chosen": -191.76307678222656, "logps/rejected": -172.55288696289062, "loss": 0.9995, "rewards/accuracies": 0.4593749940395355, "rewards/chosen": -0.0057826414704322815, "rewards/margins": -0.003593266010284424, "rewards/rejected": -0.002189375925809145, "step": 480 }, { "epoch": 0.14, "learning_rate": 2.3809523809523806e-07, "logits/chosen": -2.7531919479370117, "logits/rejected": -2.7485499382019043, "logps/chosen": -211.37136840820312, "logps/rejected": -183.17626953125, "loss": 0.9984, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.002168575767427683, "rewards/margins": 0.0029125846922397614, "rewards/rejected": -0.005081160459667444, "step": 490 }, { "epoch": 0.15, "learning_rate": 2.4295432458697764e-07, "logits/chosen": -2.8140952587127686, "logits/rejected": -2.7944469451904297, "logps/chosen": -215.98110961914062, "logps/rejected": -196.25465393066406, "loss": 0.9992, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.00415486516430974, "rewards/margins": -8.980366692412645e-05, "rewards/rejected": -0.004065061453729868, "step": 500 }, { "epoch": 0.15, "eval_logits/chosen": -2.6694087982177734, "eval_logits/rejected": -2.6641385555267334, "eval_logps/chosen": -195.6439971923828, "eval_logps/rejected": -180.9752960205078, "eval_loss": 0.9983938932418823, "eval_rewards/accuracies": 0.5122365951538086, "eval_rewards/chosen": -0.003868211293593049, "eval_rewards/margins": 0.0015316897770389915, "eval_rewards/rejected": -0.005399900488555431, "eval_runtime": 443.5731, "eval_samples_per_second": 26.523, "eval_steps_per_second": 3.316, "step": 500 }, { "epoch": 0.15, "learning_rate": 2.478134110787172e-07, "logits/chosen": -2.7879891395568848, "logits/rejected": -2.7984836101531982, "logps/chosen": -203.31301879882812, "logps/rejected": -215.4292755126953, "loss": 0.9961, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.003516793716698885, "rewards/margins": 0.005363177973777056, "rewards/rejected": -0.00887997169047594, "step": 510 }, { "epoch": 0.15, "learning_rate": 2.5267249757045674e-07, "logits/chosen": -2.7599759101867676, "logits/rejected": -2.75394868850708, "logps/chosen": -167.49600219726562, "logps/rejected": -158.7400665283203, "loss": 1.0014, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.0029489509761333466, "rewards/margins": 0.004199314396828413, "rewards/rejected": -0.007148265838623047, "step": 520 }, { "epoch": 0.15, "learning_rate": 2.575315840621963e-07, "logits/chosen": -2.7890686988830566, "logits/rejected": -2.7895076274871826, "logps/chosen": -210.8441162109375, "logps/rejected": -190.7526397705078, "loss": 1.0027, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.008806548081338406, "rewards/margins": -0.006835663225501776, "rewards/rejected": -0.0019708843901753426, "step": 530 }, { "epoch": 0.16, "learning_rate": 2.6239067055393583e-07, "logits/chosen": -2.7971768379211426, "logits/rejected": -2.7915546894073486, "logps/chosen": -206.54354858398438, "logps/rejected": -194.00369262695312, "loss": 0.9982, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -0.005383323412388563, "rewards/margins": 9.738374501466751e-06, "rewards/rejected": -0.005393061321228743, "step": 540 }, { "epoch": 0.16, "learning_rate": 2.6724975704567544e-07, "logits/chosen": -2.7736599445343018, "logits/rejected": -2.764831781387329, "logps/chosen": -214.5034942626953, "logps/rejected": -188.35311889648438, "loss": 0.9938, "rewards/accuracies": 0.53125, "rewards/chosen": -0.003384556155651808, "rewards/margins": 0.004990326706320047, "rewards/rejected": -0.008374882861971855, "step": 550 }, { "epoch": 0.16, "learning_rate": 2.72108843537415e-07, "logits/chosen": -2.7674479484558105, "logits/rejected": -2.767453670501709, "logps/chosen": -185.07174682617188, "logps/rejected": -179.09393310546875, "loss": 0.9974, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -0.005933644715696573, "rewards/margins": 0.0022817221470177174, "rewards/rejected": -0.00821536686271429, "step": 560 }, { "epoch": 0.17, "learning_rate": 2.7696793002915454e-07, "logits/chosen": -2.7519259452819824, "logits/rejected": -2.7778701782226562, "logps/chosen": -177.8760986328125, "logps/rejected": -182.38775634765625, "loss": 0.9915, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.005426911171525717, "rewards/margins": 0.010609583929181099, "rewards/rejected": -0.016036493703722954, "step": 570 }, { "epoch": 0.17, "learning_rate": 2.818270165208941e-07, "logits/chosen": -2.7523977756500244, "logits/rejected": -2.7401721477508545, "logps/chosen": -192.66586303710938, "logps/rejected": -169.11270141601562, "loss": 0.9977, "rewards/accuracies": 0.503125011920929, "rewards/chosen": -0.0048529463820159435, "rewards/margins": 0.004674314521253109, "rewards/rejected": -0.00952726136893034, "step": 580 }, { "epoch": 0.17, "learning_rate": 2.8668610301263364e-07, "logits/chosen": -2.7547593116760254, "logits/rejected": -2.7581233978271484, "logps/chosen": -192.94606018066406, "logps/rejected": -190.77268981933594, "loss": 0.9969, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.0011147389886900783, "rewards/margins": 0.004964867141097784, "rewards/rejected": -0.006079606246203184, "step": 590 }, { "epoch": 0.18, "learning_rate": 2.915451895043732e-07, "logits/chosen": -2.7692792415618896, "logits/rejected": -2.765631914138794, "logps/chosen": -190.91696166992188, "logps/rejected": -173.8136444091797, "loss": 0.9983, "rewards/accuracies": 0.484375, "rewards/chosen": -0.009905226528644562, "rewards/margins": 0.000821866444312036, "rewards/rejected": -0.010727094486355782, "step": 600 }, { "epoch": 0.18, "eval_logits/chosen": -2.6910696029663086, "eval_logits/rejected": -2.686223030090332, "eval_logps/chosen": -195.6588592529297, "eval_logps/rejected": -180.99447631835938, "eval_loss": 0.9981443285942078, "eval_rewards/accuracies": 0.5127464532852173, "eval_rewards/chosen": -0.005352581851184368, "eval_rewards/margins": 0.0019660205580294132, "eval_rewards/rejected": -0.007318601943552494, "eval_runtime": 443.4182, "eval_samples_per_second": 26.533, "eval_steps_per_second": 3.317, "step": 600 }, { "epoch": 0.18, "learning_rate": 2.9640427599611273e-07, "logits/chosen": -2.7499747276306152, "logits/rejected": -2.7771193981170654, "logps/chosen": -170.37429809570312, "logps/rejected": -174.40542602539062, "loss": 0.9969, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.0075881704688072205, "rewards/margins": -0.0003000420401804149, "rewards/rejected": -0.007288129068911076, "step": 610 }, { "epoch": 0.18, "learning_rate": 3.012633624878523e-07, "logits/chosen": -2.7855448722839355, "logits/rejected": -2.774034023284912, "logps/chosen": -199.66506958007812, "logps/rejected": -181.94955444335938, "loss": 0.9963, "rewards/accuracies": 0.5, "rewards/chosen": -0.00368059566244483, "rewards/margins": 0.001826319145038724, "rewards/rejected": -0.0055069150403141975, "step": 620 }, { "epoch": 0.18, "learning_rate": 3.0612244897959183e-07, "logits/chosen": -2.7524774074554443, "logits/rejected": -2.7855701446533203, "logps/chosen": -176.02008056640625, "logps/rejected": -177.9148712158203, "loss": 0.9963, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.004544141236692667, "rewards/margins": 0.0032399215269833803, "rewards/rejected": -0.007784062065184116, "step": 630 }, { "epoch": 0.19, "learning_rate": 3.109815354713314e-07, "logits/chosen": -2.7449309825897217, "logits/rejected": -2.750732183456421, "logps/chosen": -181.66734313964844, "logps/rejected": -168.88841247558594, "loss": 0.9945, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.0030544609762728214, "rewards/margins": 0.00868634507060051, "rewards/rejected": -0.011740805581212044, "step": 640 }, { "epoch": 0.19, "learning_rate": 3.1584062196307093e-07, "logits/chosen": -2.777808904647827, "logits/rejected": -2.7611007690429688, "logps/chosen": -202.5792999267578, "logps/rejected": -188.46258544921875, "loss": 0.9955, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.005264073144644499, "rewards/margins": 0.005189122632145882, "rewards/rejected": -0.010453195311129093, "step": 650 }, { "epoch": 0.19, "learning_rate": 3.206997084548105e-07, "logits/chosen": -2.788951873779297, "logits/rejected": -2.7755539417266846, "logps/chosen": -198.77944946289062, "logps/rejected": -177.9780731201172, "loss": 0.9997, "rewards/accuracies": 0.503125011920929, "rewards/chosen": -0.00859787967056036, "rewards/margins": 0.0023240833543241024, "rewards/rejected": -0.01092196349054575, "step": 660 }, { "epoch": 0.2, "learning_rate": 3.2555879494655003e-07, "logits/chosen": -2.771275281906128, "logits/rejected": -2.778935432434082, "logps/chosen": -208.5783233642578, "logps/rejected": -192.476318359375, "loss": 0.998, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.009211702272295952, "rewards/margins": 0.0008993959054350853, "rewards/rejected": -0.010111097246408463, "step": 670 }, { "epoch": 0.2, "learning_rate": 3.304178814382896e-07, "logits/chosen": -2.7745091915130615, "logits/rejected": -2.7752413749694824, "logps/chosen": -186.6476593017578, "logps/rejected": -172.84085083007812, "loss": 0.9931, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.00590853113681078, "rewards/margins": 0.007229860872030258, "rewards/rejected": -0.013138392940163612, "step": 680 }, { "epoch": 0.2, "learning_rate": 3.3527696793002913e-07, "logits/chosen": -2.7487292289733887, "logits/rejected": -2.753268241882324, "logps/chosen": -194.5361785888672, "logps/rejected": -178.95840454101562, "loss": 0.9953, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.009363415651023388, "rewards/margins": 0.007163605652749538, "rewards/rejected": -0.016527021303772926, "step": 690 }, { "epoch": 0.2, "learning_rate": 3.401360544217687e-07, "logits/chosen": -2.735527515411377, "logits/rejected": -2.7788593769073486, "logps/chosen": -190.52822875976562, "logps/rejected": -198.77719116210938, "loss": 0.9968, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.006488684564828873, "rewards/margins": 0.003683714661747217, "rewards/rejected": -0.010172399692237377, "step": 700 }, { "epoch": 0.2, "eval_logits/chosen": -2.6803455352783203, "eval_logits/rejected": -2.67529559135437, "eval_logps/chosen": -195.698486328125, "eval_logps/rejected": -181.0484619140625, "eval_loss": 0.997223436832428, "eval_rewards/accuracies": 0.5241332650184631, "eval_rewards/chosen": -0.009316603653132915, "eval_rewards/margins": 0.003401679452508688, "eval_rewards/rejected": -0.01271828357130289, "eval_runtime": 443.593, "eval_samples_per_second": 26.522, "eval_steps_per_second": 3.316, "step": 700 }, { "epoch": 0.21, "learning_rate": 3.4499514091350823e-07, "logits/chosen": -2.774488687515259, "logits/rejected": -2.774510145187378, "logps/chosen": -203.57730102539062, "logps/rejected": -191.86526489257812, "loss": 0.9927, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.007745369337499142, "rewards/margins": 0.007565264590084553, "rewards/rejected": -0.015310634858906269, "step": 710 }, { "epoch": 0.21, "learning_rate": 3.498542274052478e-07, "logits/chosen": -2.7597362995147705, "logits/rejected": -2.7930312156677246, "logps/chosen": -195.6759033203125, "logps/rejected": -205.12081909179688, "loss": 0.9979, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.008192415349185467, "rewards/margins": 0.007148954086005688, "rewards/rejected": -0.015341369435191154, "step": 720 }, { "epoch": 0.21, "learning_rate": 3.5471331389698733e-07, "logits/chosen": -2.7899694442749023, "logits/rejected": -2.7742934226989746, "logps/chosen": -236.7986297607422, "logps/rejected": -213.5732421875, "loss": 0.9908, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.0075895837508141994, "rewards/margins": 0.007327827624976635, "rewards/rejected": -0.014917412772774696, "step": 730 }, { "epoch": 0.22, "learning_rate": 3.595724003887269e-07, "logits/chosen": -2.775095224380493, "logits/rejected": -2.7730841636657715, "logps/chosen": -196.58554077148438, "logps/rejected": -191.58370971679688, "loss": 0.9989, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.016460472717881203, "rewards/margins": -0.00014977165847085416, "rewards/rejected": -0.01631070114672184, "step": 740 }, { "epoch": 0.22, "learning_rate": 3.6443148688046643e-07, "logits/chosen": -2.747368335723877, "logits/rejected": -2.7749929428100586, "logps/chosen": -182.0715789794922, "logps/rejected": -181.74368286132812, "loss": 0.994, "rewards/accuracies": 0.515625, "rewards/chosen": -0.007714638952165842, "rewards/margins": 0.008383364416658878, "rewards/rejected": -0.016098003834486008, "step": 750 }, { "epoch": 0.22, "learning_rate": 3.69290573372206e-07, "logits/chosen": -2.7606258392333984, "logits/rejected": -2.7410714626312256, "logps/chosen": -195.3123016357422, "logps/rejected": -167.84469604492188, "loss": 1.0003, "rewards/accuracies": 0.515625, "rewards/chosen": -0.014703328721225262, "rewards/margins": 0.0018827319145202637, "rewards/rejected": -0.01658605970442295, "step": 760 }, { "epoch": 0.22, "learning_rate": 3.741496598639456e-07, "logits/chosen": -2.8003480434417725, "logits/rejected": -2.801697015762329, "logps/chosen": -200.1820068359375, "logps/rejected": -193.44102478027344, "loss": 0.9936, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.012818267568945885, "rewards/margins": 0.005492014344781637, "rewards/rejected": -0.01831028237938881, "step": 770 }, { "epoch": 0.23, "learning_rate": 3.7900874635568513e-07, "logits/chosen": -2.736542224884033, "logits/rejected": -2.7550294399261475, "logps/chosen": -184.42022705078125, "logps/rejected": -178.26968383789062, "loss": 0.9981, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.011115864850580692, "rewards/margins": 0.005478517152369022, "rewards/rejected": -0.016594382002949715, "step": 780 }, { "epoch": 0.23, "learning_rate": 3.838678328474247e-07, "logits/chosen": -2.7842249870300293, "logits/rejected": -2.7937369346618652, "logps/chosen": -192.46023559570312, "logps/rejected": -186.9135284423828, "loss": 0.9998, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.012151426635682583, "rewards/margins": 0.0024247639812529087, "rewards/rejected": -0.014576191082596779, "step": 790 }, { "epoch": 0.23, "learning_rate": 3.8872691933916423e-07, "logits/chosen": -2.7808454036712646, "logits/rejected": -2.7431931495666504, "logps/chosen": -218.29647827148438, "logps/rejected": -184.58609008789062, "loss": 0.9893, "rewards/accuracies": 0.546875, "rewards/chosen": -0.009516356512904167, "rewards/margins": 0.007908456958830357, "rewards/rejected": -0.0174248106777668, "step": 800 }, { "epoch": 0.23, "eval_logits/chosen": -2.672806978225708, "eval_logits/rejected": -2.667616844177246, "eval_logps/chosen": -195.71884155273438, "eval_logps/rejected": -181.0857696533203, "eval_loss": 0.9950501322746277, "eval_rewards/accuracies": 0.5248130559921265, "eval_rewards/chosen": -0.011351874098181725, "eval_rewards/margins": 0.005094607826322317, "eval_rewards/rejected": -0.01644648239016533, "eval_runtime": 443.5917, "eval_samples_per_second": 26.522, "eval_steps_per_second": 3.316, "step": 800 }, { "epoch": 0.24, "learning_rate": 3.935860058309038e-07, "logits/chosen": -2.7565319538116455, "logits/rejected": -2.772705554962158, "logps/chosen": -175.65234375, "logps/rejected": -171.27127075195312, "loss": 0.9918, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.010509233921766281, "rewards/margins": 0.008544095791876316, "rewards/rejected": -0.019053328782320023, "step": 810 }, { "epoch": 0.24, "learning_rate": 3.9844509232264333e-07, "logits/chosen": -2.7437987327575684, "logits/rejected": -2.760740280151367, "logps/chosen": -180.13516235351562, "logps/rejected": -173.41995239257812, "loss": 0.9922, "rewards/accuracies": 0.53125, "rewards/chosen": -0.012019792571663857, "rewards/margins": 0.0075903395190835, "rewards/rejected": -0.01961013302206993, "step": 820 }, { "epoch": 0.24, "learning_rate": 4.033041788143829e-07, "logits/chosen": -2.754300117492676, "logits/rejected": -2.754885196685791, "logps/chosen": -198.19288635253906, "logps/rejected": -176.8773651123047, "loss": 0.9977, "rewards/accuracies": 0.484375, "rewards/chosen": -0.01481813658028841, "rewards/margins": 0.0010406129295006394, "rewards/rejected": -0.01585875079035759, "step": 830 }, { "epoch": 0.25, "learning_rate": 4.0816326530612243e-07, "logits/chosen": -2.742445230484009, "logits/rejected": -2.761491298675537, "logps/chosen": -180.978271484375, "logps/rejected": -187.919677734375, "loss": 0.9908, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.009284690022468567, "rewards/margins": 0.009516666643321514, "rewards/rejected": -0.018801355734467506, "step": 840 }, { "epoch": 0.25, "learning_rate": 4.13022351797862e-07, "logits/chosen": -2.7756142616271973, "logits/rejected": -2.761234760284424, "logps/chosen": -205.8830108642578, "logps/rejected": -187.29832458496094, "loss": 0.9929, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.015474356710910797, "rewards/margins": 0.0059659467078745365, "rewards/rejected": -0.021440301090478897, "step": 850 }, { "epoch": 0.25, "learning_rate": 4.178814382896015e-07, "logits/chosen": -2.7605714797973633, "logits/rejected": -2.768223285675049, "logps/chosen": -193.89329528808594, "logps/rejected": -186.303955078125, "loss": 0.9917, "rewards/accuracies": 0.53125, "rewards/chosen": -0.015928097069263458, "rewards/margins": 0.00672167306765914, "rewards/rejected": -0.022649768739938736, "step": 860 }, { "epoch": 0.25, "learning_rate": 4.227405247813411e-07, "logits/chosen": -2.753310441970825, "logits/rejected": -2.7532548904418945, "logps/chosen": -200.5699005126953, "logps/rejected": -190.7694549560547, "loss": 0.993, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.01844353973865509, "rewards/margins": 0.0066141290590167046, "rewards/rejected": -0.02505766786634922, "step": 870 }, { "epoch": 0.26, "learning_rate": 4.275996112730806e-07, "logits/chosen": -2.786644458770752, "logits/rejected": -2.776003360748291, "logps/chosen": -194.55020141601562, "logps/rejected": -184.32540893554688, "loss": 0.9896, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.013445606455206871, "rewards/margins": 0.011986644938588142, "rewards/rejected": -0.025432255119085312, "step": 880 }, { "epoch": 0.26, "learning_rate": 4.324586977648202e-07, "logits/chosen": -2.754173994064331, "logits/rejected": -2.7404377460479736, "logps/chosen": -206.9376983642578, "logps/rejected": -178.90286254882812, "loss": 0.9932, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.015037769451737404, "rewards/margins": 0.006139338947832584, "rewards/rejected": -0.021177105605602264, "step": 890 }, { "epoch": 0.26, "learning_rate": 4.373177842565597e-07, "logits/chosen": -2.8073904514312744, "logits/rejected": -2.7982370853424072, "logps/chosen": -201.30996704101562, "logps/rejected": -180.21197509765625, "loss": 0.988, "rewards/accuracies": 0.546875, "rewards/chosen": -0.005904694087803364, "rewards/margins": 0.011741106398403645, "rewards/rejected": -0.01764579862356186, "step": 900 }, { "epoch": 0.26, "eval_logits/chosen": -2.6813623905181885, "eval_logits/rejected": -2.6763410568237305, "eval_logps/chosen": -195.7743682861328, "eval_logps/rejected": -181.16632080078125, "eval_loss": 0.9923923015594482, "eval_rewards/accuracies": 0.5421481728553772, "eval_rewards/chosen": -0.016905097290873528, "eval_rewards/margins": 0.007597530260682106, "eval_rewards/rejected": -0.024502631276845932, "eval_runtime": 443.5463, "eval_samples_per_second": 26.525, "eval_steps_per_second": 3.316, "step": 900 }, { "epoch": 0.27, "learning_rate": 4.421768707482993e-07, "logits/chosen": -2.7671663761138916, "logits/rejected": -2.755331516265869, "logps/chosen": -209.4878692626953, "logps/rejected": -188.00961303710938, "loss": 0.9968, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.019240865483880043, "rewards/margins": -0.0007847605156712234, "rewards/rejected": -0.018456105142831802, "step": 910 }, { "epoch": 0.27, "learning_rate": 4.470359572400388e-07, "logits/chosen": -2.7682743072509766, "logits/rejected": -2.7791781425476074, "logps/chosen": -197.57215881347656, "logps/rejected": -187.62045288085938, "loss": 0.9943, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.021333549171686172, "rewards/margins": 0.005782320164144039, "rewards/rejected": -0.027115870267152786, "step": 920 }, { "epoch": 0.27, "learning_rate": 4.5189504373177837e-07, "logits/chosen": -2.752847194671631, "logits/rejected": -2.76066255569458, "logps/chosen": -159.60269165039062, "logps/rejected": -163.90760803222656, "loss": 0.9949, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.017810573801398277, "rewards/margins": 0.006514446344226599, "rewards/rejected": -0.024325022473931313, "step": 930 }, { "epoch": 0.27, "learning_rate": 4.567541302235179e-07, "logits/chosen": -2.7474169731140137, "logits/rejected": -2.760066509246826, "logps/chosen": -190.1791534423828, "logps/rejected": -181.58018493652344, "loss": 0.9964, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.020396294072270393, "rewards/margins": 0.001476737903431058, "rewards/rejected": -0.021873032674193382, "step": 940 }, { "epoch": 0.28, "learning_rate": 4.6161321671525747e-07, "logits/chosen": -2.768737316131592, "logits/rejected": -2.7523348331451416, "logps/chosen": -204.35702514648438, "logps/rejected": -174.8519287109375, "loss": 0.9941, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.019180381670594215, "rewards/margins": 0.002315108897164464, "rewards/rejected": -0.02149549126625061, "step": 950 }, { "epoch": 0.28, "learning_rate": 4.66472303206997e-07, "logits/chosen": -2.756478786468506, "logits/rejected": -2.737335443496704, "logps/chosen": -204.95266723632812, "logps/rejected": -176.6099090576172, "loss": 0.9906, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.02747512422502041, "rewards/margins": 0.0013099886709824204, "rewards/rejected": -0.028785113245248795, "step": 960 }, { "epoch": 0.28, "learning_rate": 4.7133138969873657e-07, "logits/chosen": -2.793271780014038, "logits/rejected": -2.7566933631896973, "logps/chosen": -211.52218627929688, "logps/rejected": -174.21127319335938, "loss": 0.9904, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.0174846388399601, "rewards/margins": 0.011544780805706978, "rewards/rejected": -0.029029419645667076, "step": 970 }, { "epoch": 0.29, "learning_rate": 4.761904761904761e-07, "logits/chosen": -2.7469844818115234, "logits/rejected": -2.754621982574463, "logps/chosen": -196.5634002685547, "logps/rejected": -180.66799926757812, "loss": 0.9948, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.02481316402554512, "rewards/margins": 0.004062502179294825, "rewards/rejected": -0.02887566387653351, "step": 980 }, { "epoch": 0.29, "learning_rate": 4.810495626822157e-07, "logits/chosen": -2.7565131187438965, "logits/rejected": -2.7494494915008545, "logps/chosen": -210.0545196533203, "logps/rejected": -196.57020568847656, "loss": 0.9908, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.01970556564629078, "rewards/margins": 0.010368332266807556, "rewards/rejected": -0.030073896050453186, "step": 990 }, { "epoch": 0.29, "learning_rate": 4.859086491739553e-07, "logits/chosen": -2.799431800842285, "logits/rejected": -2.784318447113037, "logps/chosen": -223.51602172851562, "logps/rejected": -199.84771728515625, "loss": 0.9879, "rewards/accuracies": 0.578125, "rewards/chosen": -0.021287377923727036, "rewards/margins": 0.012864580377936363, "rewards/rejected": -0.03415196016430855, "step": 1000 }, { "epoch": 0.29, "eval_logits/chosen": -2.6796457767486572, "eval_logits/rejected": -2.6746058464050293, "eval_logps/chosen": -195.8248291015625, "eval_logps/rejected": -181.2388458251953, "eval_loss": 0.9906548857688904, "eval_rewards/accuracies": 0.5475866794586182, "eval_rewards/chosen": -0.02195058949291706, "eval_rewards/margins": 0.009805315174162388, "eval_rewards/rejected": -0.03175590559840202, "eval_runtime": 443.5499, "eval_samples_per_second": 26.525, "eval_steps_per_second": 3.316, "step": 1000 }, { "epoch": 0.29, "learning_rate": 4.907677356656948e-07, "logits/chosen": -2.7441210746765137, "logits/rejected": -2.77170467376709, "logps/chosen": -180.41897583007812, "logps/rejected": -174.46829223632812, "loss": 0.9895, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.019895415753126144, "rewards/margins": 0.00860919151455164, "rewards/rejected": -0.02850460447371006, "step": 1010 }, { "epoch": 0.3, "learning_rate": 4.956268221574344e-07, "logits/chosen": -2.783003330230713, "logits/rejected": -2.8088173866271973, "logps/chosen": -196.19674682617188, "logps/rejected": -198.5745391845703, "loss": 0.9864, "rewards/accuracies": 0.5625, "rewards/chosen": -0.024192538112401962, "rewards/margins": 0.017691707238554955, "rewards/rejected": -0.041884247213602066, "step": 1020 }, { "epoch": 0.3, "learning_rate": 4.999999855968691e-07, "logits/chosen": -2.7660329341888428, "logits/rejected": -2.7447166442871094, "logps/chosen": -231.9657745361328, "logps/rejected": -195.4416046142578, "loss": 0.9941, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.027889683842658997, "rewards/margins": 0.007279851473867893, "rewards/rejected": -0.035169534385204315, "step": 1030 }, { "epoch": 0.3, "learning_rate": 4.999982572231678e-07, "logits/chosen": -2.7660796642303467, "logits/rejected": -2.7666707038879395, "logps/chosen": -184.3852996826172, "logps/rejected": -176.80551147460938, "loss": 0.992, "rewards/accuracies": 0.53125, "rewards/chosen": -0.023688379675149918, "rewards/margins": 0.009250967763364315, "rewards/rejected": -0.03293934464454651, "step": 1040 }, { "epoch": 0.31, "learning_rate": 4.999936482461037e-07, "logits/chosen": -2.759528160095215, "logits/rejected": -2.7832791805267334, "logps/chosen": -165.89126586914062, "logps/rejected": -167.88478088378906, "loss": 0.9909, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.019904453307390213, "rewards/margins": 0.014452459290623665, "rewards/rejected": -0.03435691073536873, "step": 1050 }, { "epoch": 0.31, "learning_rate": 4.999861587187839e-07, "logits/chosen": -2.7523863315582275, "logits/rejected": -2.730530023574829, "logps/chosen": -182.0239715576172, "logps/rejected": -153.8279266357422, "loss": 0.9911, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.02682223916053772, "rewards/margins": 0.008590024895966053, "rewards/rejected": -0.0354122593998909, "step": 1060 }, { "epoch": 0.31, "learning_rate": 4.999757887275061e-07, "logits/chosen": -2.7594218254089355, "logits/rejected": -2.75547456741333, "logps/chosen": -200.0524139404297, "logps/rejected": -185.8358612060547, "loss": 0.991, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.027161534875631332, "rewards/margins": 0.004652016330510378, "rewards/rejected": -0.03181355446577072, "step": 1070 }, { "epoch": 0.32, "learning_rate": 4.999625383917586e-07, "logits/chosen": -2.747807741165161, "logits/rejected": -2.7675487995147705, "logps/chosen": -195.44952392578125, "logps/rejected": -187.72470092773438, "loss": 0.9884, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.02705690823495388, "rewards/margins": 0.012591761536896229, "rewards/rejected": -0.039648670703172684, "step": 1080 }, { "epoch": 0.32, "learning_rate": 4.999464078642184e-07, "logits/chosen": -2.7693164348602295, "logits/rejected": -2.7496602535247803, "logps/chosen": -198.76596069335938, "logps/rejected": -169.71527099609375, "loss": 0.992, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.027691161260008812, "rewards/margins": 0.009209704585373402, "rewards/rejected": -0.03690087050199509, "step": 1090 }, { "epoch": 0.32, "learning_rate": 4.999273973307493e-07, "logits/chosen": -2.764065980911255, "logits/rejected": -2.7625510692596436, "logps/chosen": -198.4870147705078, "logps/rejected": -185.19239807128906, "loss": 0.9882, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.02553977072238922, "rewards/margins": 0.015339975245296955, "rewards/rejected": -0.0408797450363636, "step": 1100 }, { "epoch": 0.32, "eval_logits/chosen": -2.669912815093994, "eval_logits/rejected": -2.6647205352783203, "eval_logps/chosen": -195.86611938476562, "eval_logps/rejected": -181.32003784179688, "eval_loss": 0.9869112372398376, "eval_rewards/accuracies": 0.5598232746124268, "eval_rewards/chosen": -0.026083307340741158, "eval_rewards/margins": 0.013792227022349834, "eval_rewards/rejected": -0.039875537157058716, "eval_runtime": 443.4502, "eval_samples_per_second": 26.531, "eval_steps_per_second": 3.317, "step": 1100 }, { "epoch": 0.32, "learning_rate": 4.999055070104e-07, "logits/chosen": -2.764817714691162, "logits/rejected": -2.7909953594207764, "logps/chosen": -184.2716522216797, "logps/rejected": -188.51341247558594, "loss": 0.989, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.03175400570034981, "rewards/margins": 0.015587709844112396, "rewards/rejected": -0.047341711819171906, "step": 1110 }, { "epoch": 0.33, "learning_rate": 4.998807371554017e-07, "logits/chosen": -2.7454075813293457, "logits/rejected": -2.748185396194458, "logps/chosen": -189.30145263671875, "logps/rejected": -183.9518280029297, "loss": 0.9874, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.028897082433104515, "rewards/margins": 0.016509678214788437, "rewards/rejected": -0.045406755059957504, "step": 1120 }, { "epoch": 0.33, "learning_rate": 4.998530880511649e-07, "logits/chosen": -2.7354488372802734, "logits/rejected": -2.7205042839050293, "logps/chosen": -194.57437133789062, "logps/rejected": -168.87326049804688, "loss": 0.9852, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.031062299385666847, "rewards/margins": 0.014078726060688496, "rewards/rejected": -0.04514102265238762, "step": 1130 }, { "epoch": 0.33, "learning_rate": 4.998225600162761e-07, "logits/chosen": -2.7497661113739014, "logits/rejected": -2.765693426132202, "logps/chosen": -183.91720581054688, "logps/rejected": -189.841796875, "loss": 0.9812, "rewards/accuracies": 0.59375, "rewards/chosen": -0.032630693167448044, "rewards/margins": 0.017434656620025635, "rewards/rejected": -0.05006534606218338, "step": 1140 }, { "epoch": 0.34, "learning_rate": 4.997891534024945e-07, "logits/chosen": -2.7627580165863037, "logits/rejected": -2.7716870307922363, "logps/chosen": -184.07809448242188, "logps/rejected": -187.72488403320312, "loss": 0.9814, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0297432541847229, "rewards/margins": 0.018422532826662064, "rewards/rejected": -0.048165787011384964, "step": 1150 }, { "epoch": 0.34, "learning_rate": 4.997528685947477e-07, "logits/chosen": -2.781460762023926, "logits/rejected": -2.7639803886413574, "logps/chosen": -186.93238830566406, "logps/rejected": -169.6712646484375, "loss": 0.9864, "rewards/accuracies": 0.53125, "rewards/chosen": -0.031463898718357086, "rewards/margins": 0.009577763266861439, "rewards/rejected": -0.04104166105389595, "step": 1160 }, { "epoch": 0.34, "learning_rate": 4.997137060111269e-07, "logits/chosen": -2.729543447494507, "logits/rejected": -2.760732889175415, "logps/chosen": -170.96646118164062, "logps/rejected": -179.94125366210938, "loss": 0.9854, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03276781365275383, "rewards/margins": 0.018309885635972023, "rewards/rejected": -0.051077693700790405, "step": 1170 }, { "epoch": 0.34, "learning_rate": 4.996716661028829e-07, "logits/chosen": -2.753207206726074, "logits/rejected": -2.7457656860351562, "logps/chosen": -197.0430450439453, "logps/rejected": -188.0629425048828, "loss": 0.9888, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.04059111326932907, "rewards/margins": 0.007697828114032745, "rewards/rejected": -0.04828894883394241, "step": 1180 }, { "epoch": 0.35, "learning_rate": 4.996267493544203e-07, "logits/chosen": -2.805116653442383, "logits/rejected": -2.80474853515625, "logps/chosen": -195.44107055664062, "logps/rejected": -189.65139770507812, "loss": 0.989, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04101649299263954, "rewards/margins": 0.009648426435887814, "rewards/rejected": -0.05066491290926933, "step": 1190 }, { "epoch": 0.35, "learning_rate": 4.99578956283292e-07, "logits/chosen": -2.7761738300323486, "logits/rejected": -2.792396306991577, "logps/chosen": -202.1172332763672, "logps/rejected": -195.01736450195312, "loss": 0.979, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.028962809592485428, "rewards/margins": 0.02408483624458313, "rewards/rejected": -0.053047649562358856, "step": 1200 }, { "epoch": 0.35, "eval_logits/chosen": -2.6734869480133057, "eval_logits/rejected": -2.6683530807495117, "eval_logps/chosen": -195.96934509277344, "eval_logps/rejected": -181.4419403076172, "eval_loss": 0.9851318597793579, "eval_rewards/accuracies": 0.5562542676925659, "eval_rewards/chosen": -0.0364016555249691, "eval_rewards/margins": 0.015664031729102135, "eval_rewards/rejected": -0.052065689116716385, "eval_runtime": 443.4919, "eval_samples_per_second": 26.528, "eval_steps_per_second": 3.317, "step": 1200 }, { "epoch": 0.35, "learning_rate": 4.995282874401933e-07, "logits/chosen": -2.790585994720459, "logits/rejected": -2.7970879077911377, "logps/chosen": -195.25216674804688, "logps/rejected": -186.48037719726562, "loss": 0.9838, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.033409614115953445, "rewards/margins": 0.017419548705220222, "rewards/rejected": -0.05082916095852852, "step": 1210 }, { "epoch": 0.36, "learning_rate": 4.994747434089559e-07, "logits/chosen": -2.771557331085205, "logits/rejected": -2.754660129547119, "logps/chosen": -200.41087341308594, "logps/rejected": -188.08175659179688, "loss": 0.9935, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.045268464833498, "rewards/margins": 0.007542738225311041, "rewards/rejected": -0.05281120538711548, "step": 1220 }, { "epoch": 0.36, "learning_rate": 4.994183248065402e-07, "logits/chosen": -2.7263119220733643, "logits/rejected": -2.7358851432800293, "logps/chosen": -195.21957397460938, "logps/rejected": -181.23416137695312, "loss": 0.9856, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.035321272909641266, "rewards/margins": 0.01393541507422924, "rewards/rejected": -0.049256689846515656, "step": 1230 }, { "epoch": 0.36, "learning_rate": 4.993590322830295e-07, "logits/chosen": -2.7576966285705566, "logits/rejected": -2.754138946533203, "logps/chosen": -201.56475830078125, "logps/rejected": -175.058349609375, "loss": 0.9867, "rewards/accuracies": 0.47187501192092896, "rewards/chosen": -0.048004359006881714, "rewards/margins": 0.007500568870455027, "rewards/rejected": -0.055504925549030304, "step": 1240 }, { "epoch": 0.36, "learning_rate": 4.992968665216213e-07, "logits/chosen": -2.742941379547119, "logits/rejected": -2.740309238433838, "logps/chosen": -191.94155883789062, "logps/rejected": -180.1873016357422, "loss": 0.9865, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.03951577842235565, "rewards/margins": 0.014310412108898163, "rewards/rejected": -0.053826190531253815, "step": 1250 }, { "epoch": 0.37, "learning_rate": 4.992318282386203e-07, "logits/chosen": -2.796257495880127, "logits/rejected": -2.7801544666290283, "logps/chosen": -198.45379638671875, "logps/rejected": -180.01675415039062, "loss": 0.9877, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.043470896780490875, "rewards/margins": 0.011671255342662334, "rewards/rejected": -0.055142153054475784, "step": 1260 }, { "epoch": 0.37, "learning_rate": 4.991639181834298e-07, "logits/chosen": -2.7639927864074707, "logits/rejected": -2.7846803665161133, "logps/chosen": -188.55496215820312, "logps/rejected": -180.0243377685547, "loss": 0.9848, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04286808520555496, "rewards/margins": 0.01618005894124508, "rewards/rejected": -0.05904814600944519, "step": 1270 }, { "epoch": 0.37, "learning_rate": 4.990931371385427e-07, "logits/chosen": -2.751796245574951, "logits/rejected": -2.749959707260132, "logps/chosen": -171.72152709960938, "logps/rejected": -165.4966583251953, "loss": 0.9802, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.03991890698671341, "rewards/margins": 0.01386339496821165, "rewards/rejected": -0.053782302886247635, "step": 1280 }, { "epoch": 0.38, "learning_rate": 4.990194859195335e-07, "logits/chosen": -2.8110146522521973, "logits/rejected": -2.799238681793213, "logps/chosen": -190.9245147705078, "logps/rejected": -185.66787719726562, "loss": 0.9916, "rewards/accuracies": 0.515625, "rewards/chosen": -0.046778354793787, "rewards/margins": 0.009815091267228127, "rewards/rejected": -0.05659344792366028, "step": 1290 }, { "epoch": 0.38, "learning_rate": 4.989429653750478e-07, "logits/chosen": -2.7405948638916016, "logits/rejected": -2.764275550842285, "logps/chosen": -186.57777404785156, "logps/rejected": -181.9051055908203, "loss": 0.985, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.04000754654407501, "rewards/margins": 0.023956280201673508, "rewards/rejected": -0.06396382302045822, "step": 1300 }, { "epoch": 0.38, "eval_logits/chosen": -2.692143678665161, "eval_logits/rejected": -2.687408447265625, "eval_logps/chosen": -195.9900360107422, "eval_logps/rejected": -181.49777221679688, "eval_loss": 0.9818428158760071, "eval_rewards/accuracies": 0.5608429908752441, "eval_rewards/chosen": -0.038472600281238556, "eval_rewards/margins": 0.01917639747262001, "eval_rewards/rejected": -0.057648997753858566, "eval_runtime": 443.4605, "eval_samples_per_second": 26.53, "eval_steps_per_second": 3.317, "step": 1300 }, { "epoch": 0.38, "learning_rate": 4.988635763867929e-07, "logits/chosen": -2.754769802093506, "logits/rejected": -2.7642273902893066, "logps/chosen": -171.62371826171875, "logps/rejected": -173.17213439941406, "loss": 0.9828, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.0436868891119957, "rewards/margins": 0.013914274051785469, "rewards/rejected": -0.05760116130113602, "step": 1310 }, { "epoch": 0.39, "learning_rate": 4.987813198695282e-07, "logits/chosen": -2.799032688140869, "logits/rejected": -2.7754616737365723, "logps/chosen": -239.6453399658203, "logps/rejected": -194.0479278564453, "loss": 0.9799, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.042483363300561905, "rewards/margins": 0.020803894847631454, "rewards/rejected": -0.06328727304935455, "step": 1320 }, { "epoch": 0.39, "learning_rate": 4.986961967710538e-07, "logits/chosen": -2.751368761062622, "logits/rejected": -2.775388717651367, "logps/chosen": -190.04867553710938, "logps/rejected": -184.562255859375, "loss": 0.9826, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04249824956059456, "rewards/margins": 0.01044423133134842, "rewards/rejected": -0.05294247716665268, "step": 1330 }, { "epoch": 0.39, "learning_rate": 4.986082080722e-07, "logits/chosen": -2.718585729598999, "logits/rejected": -2.753647804260254, "logps/chosen": -155.80125427246094, "logps/rejected": -167.8536834716797, "loss": 0.9767, "rewards/accuracies": 0.5625, "rewards/chosen": -0.041705384850502014, "rewards/margins": 0.018204566091299057, "rewards/rejected": -0.05990995094180107, "step": 1340 }, { "epoch": 0.39, "learning_rate": 4.985173547868161e-07, "logits/chosen": -2.782245397567749, "logits/rejected": -2.7602505683898926, "logps/chosen": -193.46780395507812, "logps/rejected": -168.34664916992188, "loss": 0.9872, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.05123595520853996, "rewards/margins": 0.011545160785317421, "rewards/rejected": -0.06278111785650253, "step": 1350 }, { "epoch": 0.4, "learning_rate": 4.984236379617585e-07, "logits/chosen": -2.762777805328369, "logits/rejected": -2.7644848823547363, "logps/chosen": -191.2324676513672, "logps/rejected": -175.95669555664062, "loss": 0.9746, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.041074853390455246, "rewards/margins": 0.027508120983839035, "rewards/rejected": -0.06858296692371368, "step": 1360 }, { "epoch": 0.4, "learning_rate": 4.983270586768788e-07, "logits/chosen": -2.7665207386016846, "logits/rejected": -2.7342450618743896, "logps/chosen": -208.2240753173828, "logps/rejected": -171.34295654296875, "loss": 0.9826, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.04937398433685303, "rewards/margins": 0.010283073410391808, "rewards/rejected": -0.059657059609889984, "step": 1370 }, { "epoch": 0.4, "learning_rate": 4.982276180450112e-07, "logits/chosen": -2.771615505218506, "logits/rejected": -2.75608491897583, "logps/chosen": -190.74603271484375, "logps/rejected": -178.79782104492188, "loss": 0.9848, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.046520378440618515, "rewards/margins": 0.014152820222079754, "rewards/rejected": -0.060673199594020844, "step": 1380 }, { "epoch": 0.41, "learning_rate": 4.981253172119596e-07, "logits/chosen": -2.7729361057281494, "logits/rejected": -2.775089740753174, "logps/chosen": -183.19204711914062, "logps/rejected": -174.96011352539062, "loss": 0.9854, "rewards/accuracies": 0.53125, "rewards/chosen": -0.05265643075108528, "rewards/margins": 0.012122412212193012, "rewards/rejected": -0.06477884203195572, "step": 1390 }, { "epoch": 0.41, "learning_rate": 4.980201573564849e-07, "logits/chosen": -2.7582387924194336, "logits/rejected": -2.7580008506774902, "logps/chosen": -186.2787322998047, "logps/rejected": -192.34915161132812, "loss": 0.9821, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.05323047563433647, "rewards/margins": 0.02109154500067234, "rewards/rejected": -0.07432201504707336, "step": 1400 }, { "epoch": 0.41, "eval_logits/chosen": -2.6810476779937744, "eval_logits/rejected": -2.676079511642456, "eval_logps/chosen": -196.06719970703125, "eval_logps/rejected": -181.589111328125, "eval_loss": 0.98047935962677, "eval_rewards/accuracies": 0.558973491191864, "eval_rewards/chosen": -0.04618801176548004, "eval_rewards/margins": 0.020594673231244087, "eval_rewards/rejected": -0.06678267568349838, "eval_runtime": 443.5004, "eval_samples_per_second": 26.528, "eval_steps_per_second": 3.317, "step": 1400 }, { "epoch": 0.41, "learning_rate": 4.979121396902908e-07, "logits/chosen": -2.7379727363586426, "logits/rejected": -2.7336249351501465, "logps/chosen": -203.15322875976562, "logps/rejected": -181.72555541992188, "loss": 0.9673, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.050723861902952194, "rewards/margins": 0.030538350343704224, "rewards/rejected": -0.08126221597194672, "step": 1410 }, { "epoch": 0.41, "learning_rate": 4.978012654580102e-07, "logits/chosen": -2.766727924346924, "logits/rejected": -2.7514896392822266, "logps/chosen": -203.69638061523438, "logps/rejected": -176.80108642578125, "loss": 0.9777, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.04542822390794754, "rewards/margins": 0.02188730053603649, "rewards/rejected": -0.06731553375720978, "step": 1420 }, { "epoch": 0.42, "learning_rate": 4.976875359371907e-07, "logits/chosen": -2.730456590652466, "logits/rejected": -2.741044759750366, "logps/chosen": -170.23345947265625, "logps/rejected": -165.81503295898438, "loss": 0.9798, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.05172818899154663, "rewards/margins": 0.020575672388076782, "rewards/rejected": -0.07230386137962341, "step": 1430 }, { "epoch": 0.42, "learning_rate": 4.9757095243828e-07, "logits/chosen": -2.7576966285705566, "logits/rejected": -2.767139434814453, "logps/chosen": -184.7630615234375, "logps/rejected": -174.48568725585938, "loss": 0.9791, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.048875968903303146, "rewards/margins": 0.019326506182551384, "rewards/rejected": -0.06820248067378998, "step": 1440 }, { "epoch": 0.42, "learning_rate": 4.974515163046109e-07, "logits/chosen": -2.755999803543091, "logits/rejected": -2.7733826637268066, "logps/chosen": -208.53012084960938, "logps/rejected": -187.8560333251953, "loss": 0.9791, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05815325304865837, "rewards/margins": 0.02635633386671543, "rewards/rejected": -0.08450958877801895, "step": 1450 }, { "epoch": 0.43, "learning_rate": 4.973292289123853e-07, "logits/chosen": -2.7847931385040283, "logits/rejected": -2.7798149585723877, "logps/chosen": -192.0679168701172, "logps/rejected": -189.32138061523438, "loss": 0.9796, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.045699235051870346, "rewards/margins": 0.02493266388773918, "rewards/rejected": -0.07063189893960953, "step": 1460 }, { "epoch": 0.43, "learning_rate": 4.972040916706591e-07, "logits/chosen": -2.7773451805114746, "logits/rejected": -2.7763779163360596, "logps/chosen": -181.1774139404297, "logps/rejected": -177.46780395507812, "loss": 0.9744, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.051887236535549164, "rewards/margins": 0.019978413358330727, "rewards/rejected": -0.07186565548181534, "step": 1470 }, { "epoch": 0.43, "learning_rate": 4.970761060213252e-07, "logits/chosen": -2.7489306926727295, "logits/rejected": -2.7517669200897217, "logps/chosen": -191.70501708984375, "logps/rejected": -186.7254180908203, "loss": 0.9782, "rewards/accuracies": 0.5625, "rewards/chosen": -0.053087152540683746, "rewards/margins": 0.023641914129257202, "rewards/rejected": -0.07672907412052155, "step": 1480 }, { "epoch": 0.43, "learning_rate": 4.969452734390976e-07, "logits/chosen": -2.7764415740966797, "logits/rejected": -2.740410566329956, "logps/chosen": -223.160400390625, "logps/rejected": -195.7806854248047, "loss": 0.9679, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.05532132834196091, "rewards/margins": 0.03490416705608368, "rewards/rejected": -0.09022549539804459, "step": 1490 }, { "epoch": 0.44, "learning_rate": 4.968115954314938e-07, "logits/chosen": -2.7740657329559326, "logits/rejected": -2.74410343170166, "logps/chosen": -209.5076141357422, "logps/rejected": -175.87014770507812, "loss": 0.9822, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.06310389190912247, "rewards/margins": 0.008666287176311016, "rewards/rejected": -0.07177017629146576, "step": 1500 }, { "epoch": 0.44, "eval_logits/chosen": -2.6813409328460693, "eval_logits/rejected": -2.6763975620269775, "eval_logps/chosen": -196.1554412841797, "eval_logps/rejected": -181.69834899902344, "eval_loss": 0.9778910279273987, "eval_rewards/accuracies": 0.5632222890853882, "eval_rewards/chosen": -0.05501263216137886, "eval_rewards/margins": 0.02269531972706318, "eval_rewards/rejected": -0.07770795375108719, "eval_runtime": 443.474, "eval_samples_per_second": 26.529, "eval_steps_per_second": 3.317, "step": 1500 }, { "epoch": 0.44, "learning_rate": 4.966750735388179e-07, "logits/chosen": -2.7591936588287354, "logits/rejected": -2.750657558441162, "logps/chosen": -192.50819396972656, "logps/rejected": -178.97280883789062, "loss": 0.9742, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06468276679515839, "rewards/margins": 0.017507528886198997, "rewards/rejected": -0.08219029009342194, "step": 1510 }, { "epoch": 0.44, "learning_rate": 4.965357093341425e-07, "logits/chosen": -2.7657759189605713, "logits/rejected": -2.724945306777954, "logps/chosen": -214.63888549804688, "logps/rejected": -174.35433959960938, "loss": 0.9775, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.055322058498859406, "rewards/margins": 0.0185992531478405, "rewards/rejected": -0.0739213079214096, "step": 1520 }, { "epoch": 0.45, "learning_rate": 4.963935044232909e-07, "logits/chosen": -2.7615058422088623, "logits/rejected": -2.72468900680542, "logps/chosen": -201.26620483398438, "logps/rejected": -174.67568969726562, "loss": 0.9842, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -0.05784177780151367, "rewards/margins": 0.015006395988166332, "rewards/rejected": -0.07284817099571228, "step": 1530 }, { "epoch": 0.45, "learning_rate": 4.96248460444818e-07, "logits/chosen": -2.7774100303649902, "logits/rejected": -2.782388210296631, "logps/chosen": -174.90695190429688, "logps/rejected": -176.49539184570312, "loss": 0.9718, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.050717033445835114, "rewards/margins": 0.02875804901123047, "rewards/rejected": -0.07947508245706558, "step": 1540 }, { "epoch": 0.45, "learning_rate": 4.961005790699925e-07, "logits/chosen": -2.7708535194396973, "logits/rejected": -2.760714054107666, "logps/chosen": -197.136962890625, "logps/rejected": -178.37271118164062, "loss": 0.9687, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04507363587617874, "rewards/margins": 0.033439092338085175, "rewards/rejected": -0.07851273566484451, "step": 1550 }, { "epoch": 0.46, "learning_rate": 4.959498620027765e-07, "logits/chosen": -2.738436222076416, "logits/rejected": -2.746307849884033, "logps/chosen": -203.83351135253906, "logps/rejected": -183.6167755126953, "loss": 0.9772, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.07137466967105865, "rewards/margins": 0.01522110402584076, "rewards/rejected": -0.08659576624631882, "step": 1560 }, { "epoch": 0.46, "learning_rate": 4.957963109798064e-07, "logits/chosen": -2.7385504245758057, "logits/rejected": -2.754218816757202, "logps/chosen": -183.70143127441406, "logps/rejected": -180.86209106445312, "loss": 0.9792, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.06631606817245483, "rewards/margins": 0.027938008308410645, "rewards/rejected": -0.09425408393144608, "step": 1570 }, { "epoch": 0.46, "learning_rate": 4.956399277703729e-07, "logits/chosen": -2.732557535171509, "logits/rejected": -2.718478202819824, "logps/chosen": -201.7747802734375, "logps/rejected": -166.88467407226562, "loss": 0.9817, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.06500306725502014, "rewards/margins": 0.017368188127875328, "rewards/rejected": -0.08237125724554062, "step": 1580 }, { "epoch": 0.46, "learning_rate": 4.954807141764006e-07, "logits/chosen": -2.7428674697875977, "logits/rejected": -2.744955062866211, "logps/chosen": -192.12753295898438, "logps/rejected": -179.4935760498047, "loss": 0.9814, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0626605898141861, "rewards/margins": 0.025600705295801163, "rewards/rejected": -0.08826129138469696, "step": 1590 }, { "epoch": 0.47, "learning_rate": 4.953186720324272e-07, "logits/chosen": -2.78163480758667, "logits/rejected": -2.754462718963623, "logps/chosen": -207.11032104492188, "logps/rejected": -178.58656311035156, "loss": 0.9755, "rewards/accuracies": 0.578125, "rewards/chosen": -0.05041077733039856, "rewards/margins": 0.030925389379262924, "rewards/rejected": -0.08133616298437119, "step": 1600 }, { "epoch": 0.47, "eval_logits/chosen": -2.65568208694458, "eval_logits/rejected": -2.6502463817596436, "eval_logps/chosen": -196.205810546875, "eval_logps/rejected": -181.77638244628906, "eval_loss": 0.9756138920783997, "eval_rewards/accuracies": 0.565601646900177, "eval_rewards/chosen": -0.06004924699664116, "eval_rewards/margins": 0.02545936405658722, "eval_rewards/rejected": -0.08550861477851868, "eval_runtime": 443.4965, "eval_samples_per_second": 26.528, "eval_steps_per_second": 3.317, "step": 1600 }, { "epoch": 0.47, "learning_rate": 4.951538032055822e-07, "logits/chosen": -2.7657768726348877, "logits/rejected": -2.76108980178833, "logps/chosen": -204.87576293945312, "logps/rejected": -192.89125061035156, "loss": 0.9764, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.05869390815496445, "rewards/margins": 0.03274776414036751, "rewards/rejected": -0.09144166857004166, "step": 1610 }, { "epoch": 0.47, "learning_rate": 4.949861095955656e-07, "logits/chosen": -2.7711009979248047, "logits/rejected": -2.7802529335021973, "logps/chosen": -198.54647827148438, "logps/rejected": -204.2147216796875, "loss": 0.9831, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.07690480351448059, "rewards/margins": 0.008501519449055195, "rewards/rejected": -0.08540631830692291, "step": 1620 }, { "epoch": 0.48, "learning_rate": 4.948155931346262e-07, "logits/chosen": -2.752509117126465, "logits/rejected": -2.750129222869873, "logps/chosen": -204.9178009033203, "logps/rejected": -195.44839477539062, "loss": 0.9764, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.06369360536336899, "rewards/margins": 0.022426560521125793, "rewards/rejected": -0.08612016588449478, "step": 1630 }, { "epoch": 0.48, "learning_rate": 4.946422557875386e-07, "logits/chosen": -2.765320062637329, "logits/rejected": -2.7900614738464355, "logps/chosen": -183.15676879882812, "logps/rejected": -191.41493225097656, "loss": 0.9725, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.06169677525758743, "rewards/margins": 0.025588322430849075, "rewards/rejected": -0.0872851014137268, "step": 1640 }, { "epoch": 0.48, "learning_rate": 4.944660995515814e-07, "logits/chosen": -2.745473623275757, "logits/rejected": -2.7588276863098145, "logps/chosen": -195.170166015625, "logps/rejected": -193.9084014892578, "loss": 0.9754, "rewards/accuracies": 0.515625, "rewards/chosen": -0.07741503417491913, "rewards/margins": 0.019316289573907852, "rewards/rejected": -0.09673131257295609, "step": 1650 }, { "epoch": 0.48, "learning_rate": 4.942871264565139e-07, "logits/chosen": -2.734675407409668, "logits/rejected": -2.739234447479248, "logps/chosen": -188.9669647216797, "logps/rejected": -180.78250122070312, "loss": 0.9774, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.06605865061283112, "rewards/margins": 0.022786004468798637, "rewards/rejected": -0.0888446569442749, "step": 1660 }, { "epoch": 0.49, "learning_rate": 4.941053385645522e-07, "logits/chosen": -2.7543911933898926, "logits/rejected": -2.7284069061279297, "logps/chosen": -200.8400421142578, "logps/rejected": -168.43988037109375, "loss": 0.9813, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.0760786384344101, "rewards/margins": 0.01815926656126976, "rewards/rejected": -0.09423790872097015, "step": 1670 }, { "epoch": 0.49, "learning_rate": 4.939207379703462e-07, "logits/chosen": -2.7847373485565186, "logits/rejected": -2.7574844360351562, "logps/chosen": -199.1935272216797, "logps/rejected": -177.27557373046875, "loss": 0.971, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.06233362480998039, "rewards/margins": 0.026879101991653442, "rewards/rejected": -0.08921272307634354, "step": 1680 }, { "epoch": 0.49, "learning_rate": 4.937333268009552e-07, "logits/chosen": -2.748115062713623, "logits/rejected": -2.726240873336792, "logps/chosen": -204.0299835205078, "logps/rejected": -180.19448852539062, "loss": 0.9743, "rewards/accuracies": 0.578125, "rewards/chosen": -0.05886361747980118, "rewards/margins": 0.024648474529385567, "rewards/rejected": -0.0835120901465416, "step": 1690 }, { "epoch": 0.5, "learning_rate": 4.935431072158234e-07, "logits/chosen": -2.7623496055603027, "logits/rejected": -2.7553372383117676, "logps/chosen": -189.81854248046875, "logps/rejected": -180.30911254882812, "loss": 0.9697, "rewards/accuracies": 0.5625, "rewards/chosen": -0.07516200840473175, "rewards/margins": 0.023591557517647743, "rewards/rejected": -0.09875356405973434, "step": 1700 }, { "epoch": 0.5, "eval_logits/chosen": -2.680124044418335, "eval_logits/rejected": -2.675168037414551, "eval_logps/chosen": -196.2568817138672, "eval_logps/rejected": -181.85256958007812, "eval_loss": 0.9731392860412598, "eval_rewards/accuracies": 0.5650917887687683, "eval_rewards/chosen": -0.06515874713659286, "eval_rewards/margins": 0.02797050215303898, "eval_rewards/rejected": -0.09312925487756729, "eval_runtime": 443.319, "eval_samples_per_second": 26.538, "eval_steps_per_second": 3.318, "step": 1700 }, { "epoch": 0.5, "learning_rate": 4.933500814067543e-07, "logits/chosen": -2.7594799995422363, "logits/rejected": -2.7589800357818604, "logps/chosen": -212.92599487304688, "logps/rejected": -188.29454040527344, "loss": 0.96, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05867772176861763, "rewards/margins": 0.041473548859357834, "rewards/rejected": -0.10015126317739487, "step": 1710 }, { "epoch": 0.5, "learning_rate": 4.931542515978871e-07, "logits/chosen": -2.7504467964172363, "logits/rejected": -2.756544828414917, "logps/chosen": -212.72036743164062, "logps/rejected": -204.91036987304688, "loss": 0.9724, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07226310670375824, "rewards/margins": 0.038937319070100784, "rewards/rejected": -0.11120043694972992, "step": 1720 }, { "epoch": 0.5, "learning_rate": 4.929556200456692e-07, "logits/chosen": -2.745439052581787, "logits/rejected": -2.7529444694519043, "logps/chosen": -170.9637908935547, "logps/rejected": -165.9134521484375, "loss": 0.9841, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.06306852400302887, "rewards/margins": 0.024060754105448723, "rewards/rejected": -0.08712927997112274, "step": 1730 }, { "epoch": 0.51, "learning_rate": 4.927541890388315e-07, "logits/chosen": -2.75010085105896, "logits/rejected": -2.741394519805908, "logps/chosen": -213.6536102294922, "logps/rejected": -198.53201293945312, "loss": 0.9682, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.0757746696472168, "rewards/margins": 0.02875874564051628, "rewards/rejected": -0.10453341156244278, "step": 1740 }, { "epoch": 0.51, "learning_rate": 4.925499608983617e-07, "logits/chosen": -2.7459850311279297, "logits/rejected": -2.754770040512085, "logps/chosen": -208.39181518554688, "logps/rejected": -188.983642578125, "loss": 0.9612, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.0683453232049942, "rewards/margins": 0.03796737641096115, "rewards/rejected": -0.10631269216537476, "step": 1750 }, { "epoch": 0.51, "learning_rate": 4.92342937977477e-07, "logits/chosen": -2.7780871391296387, "logits/rejected": -2.769228458404541, "logps/chosen": -236.0298309326172, "logps/rejected": -210.1380157470703, "loss": 0.9676, "rewards/accuracies": 0.546875, "rewards/chosen": -0.07248321920633316, "rewards/margins": 0.0332036130130291, "rewards/rejected": -0.10568682849407196, "step": 1760 }, { "epoch": 0.52, "learning_rate": 4.921331226615981e-07, "logits/chosen": -2.7420806884765625, "logits/rejected": -2.7397849559783936, "logps/chosen": -196.43295288085938, "logps/rejected": -178.20077514648438, "loss": 0.9717, "rewards/accuracies": 0.578125, "rewards/chosen": -0.07191386073827744, "rewards/margins": 0.029133638367056847, "rewards/rejected": -0.10104749351739883, "step": 1770 }, { "epoch": 0.52, "learning_rate": 4.919205173683202e-07, "logits/chosen": -2.7878201007843018, "logits/rejected": -2.7463841438293457, "logps/chosen": -224.4694366455078, "logps/rejected": -186.58062744140625, "loss": 0.9756, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.06282098591327667, "rewards/margins": 0.02961428463459015, "rewards/rejected": -0.09243526309728622, "step": 1780 }, { "epoch": 0.52, "learning_rate": 4.917051245473868e-07, "logits/chosen": -2.756880283355713, "logits/rejected": -2.7773396968841553, "logps/chosen": -186.11907958984375, "logps/rejected": -195.0716094970703, "loss": 0.9748, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.07742653787136078, "rewards/margins": 0.024380305781960487, "rewards/rejected": -0.10180683434009552, "step": 1790 }, { "epoch": 0.53, "learning_rate": 4.914869466806603e-07, "logits/chosen": -2.7491137981414795, "logits/rejected": -2.757794141769409, "logps/chosen": -178.83447265625, "logps/rejected": -172.74996948242188, "loss": 0.969, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.07571407407522202, "rewards/margins": 0.029796475544571877, "rewards/rejected": -0.10551054775714874, "step": 1800 }, { "epoch": 0.53, "eval_logits/chosen": -2.6686718463897705, "eval_logits/rejected": -2.6634953022003174, "eval_logps/chosen": -196.30665588378906, "eval_logps/rejected": -181.93798828125, "eval_loss": 0.9698388576507568, "eval_rewards/accuracies": 0.5686607956886292, "eval_rewards/chosen": -0.07013525068759918, "eval_rewards/margins": 0.03153569623827934, "eval_rewards/rejected": -0.10167094320058823, "eval_runtime": 443.4506, "eval_samples_per_second": 26.531, "eval_steps_per_second": 3.317, "step": 1800 }, { "epoch": 0.53, "learning_rate": 4.912659862820937e-07, "logits/chosen": -2.7059569358825684, "logits/rejected": -2.734696865081787, "logps/chosen": -178.76712036132812, "logps/rejected": -186.81173706054688, "loss": 0.9687, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.08366887271404266, "rewards/margins": 0.02821827493607998, "rewards/rejected": -0.1118871420621872, "step": 1810 }, { "epoch": 0.53, "learning_rate": 4.910422458977018e-07, "logits/chosen": -2.7654166221618652, "logits/rejected": -2.7601962089538574, "logps/chosen": -189.85423278808594, "logps/rejected": -185.08970642089844, "loss": 0.9745, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.07412820309400558, "rewards/margins": 0.026077458634972572, "rewards/rejected": -0.1002056747674942, "step": 1820 }, { "epoch": 0.53, "learning_rate": 4.90815728105532e-07, "logits/chosen": -2.7707886695861816, "logits/rejected": -2.7747719287872314, "logps/chosen": -186.23532104492188, "logps/rejected": -183.20777893066406, "loss": 0.9594, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.06935007870197296, "rewards/margins": 0.030855247750878334, "rewards/rejected": -0.10020531713962555, "step": 1830 }, { "epoch": 0.54, "learning_rate": 4.90586435515634e-07, "logits/chosen": -2.7859041690826416, "logits/rejected": -2.7700035572052, "logps/chosen": -213.6102752685547, "logps/rejected": -195.71310424804688, "loss": 0.9682, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07400950789451599, "rewards/margins": 0.03597418963909149, "rewards/rejected": -0.10998369753360748, "step": 1840 }, { "epoch": 0.54, "learning_rate": 4.903543707700302e-07, "logits/chosen": -2.761504650115967, "logits/rejected": -2.7782986164093018, "logps/chosen": -198.09092712402344, "logps/rejected": -186.11976623535156, "loss": 0.9752, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.07849571853876114, "rewards/margins": 0.02638430893421173, "rewards/rejected": -0.10488002002239227, "step": 1850 }, { "epoch": 0.54, "learning_rate": 4.901195365426851e-07, "logits/chosen": -2.744083881378174, "logits/rejected": -2.745847225189209, "logps/chosen": -196.6710662841797, "logps/rejected": -183.01327514648438, "loss": 0.9633, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.06702904403209686, "rewards/margins": 0.0462227538228035, "rewards/rejected": -0.11325179040431976, "step": 1860 }, { "epoch": 0.55, "learning_rate": 4.89881935539475e-07, "logits/chosen": -2.761718273162842, "logits/rejected": -2.7792365550994873, "logps/chosen": -204.8824005126953, "logps/rejected": -189.04917907714844, "loss": 0.9684, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07677263021469116, "rewards/margins": 0.03511573746800423, "rewards/rejected": -0.11188837140798569, "step": 1870 }, { "epoch": 0.55, "learning_rate": 4.896415704981556e-07, "logits/chosen": -2.7535223960876465, "logits/rejected": -2.7572989463806152, "logps/chosen": -205.97634887695312, "logps/rejected": -198.9563751220703, "loss": 0.9586, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.06817978620529175, "rewards/margins": 0.04087362438440323, "rewards/rejected": -0.10905341804027557, "step": 1880 }, { "epoch": 0.55, "learning_rate": 4.893984441883317e-07, "logits/chosen": -2.737229347229004, "logits/rejected": -2.7194113731384277, "logps/chosen": -184.98281860351562, "logps/rejected": -170.07290649414062, "loss": 0.9744, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.075322724878788, "rewards/margins": 0.025637894868850708, "rewards/rejected": -0.1009606122970581, "step": 1890 }, { "epoch": 0.55, "learning_rate": 4.891525594114248e-07, "logits/chosen": -2.759535789489746, "logits/rejected": -2.748884677886963, "logps/chosen": -206.9478759765625, "logps/rejected": -191.83497619628906, "loss": 0.9643, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.08102191984653473, "rewards/margins": 0.0340164490044117, "rewards/rejected": -0.11503837257623672, "step": 1900 }, { "epoch": 0.55, "eval_logits/chosen": -2.6642489433288574, "eval_logits/rejected": -2.659011125564575, "eval_logps/chosen": -196.3668670654297, "eval_logps/rejected": -182.01368713378906, "eval_loss": 0.9684566855430603, "eval_rewards/accuracies": 0.5676410794258118, "eval_rewards/chosen": -0.07615655660629272, "eval_rewards/margins": 0.03308200463652611, "eval_rewards/rejected": -0.10923856496810913, "eval_runtime": 443.3461, "eval_samples_per_second": 26.537, "eval_steps_per_second": 3.318, "step": 1900 }, { "epoch": 0.56, "learning_rate": 4.889039190006407e-07, "logits/chosen": -2.7680182456970215, "logits/rejected": -2.7183163166046143, "logps/chosen": -196.04763793945312, "logps/rejected": -171.93922424316406, "loss": 0.9648, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.073929563164711, "rewards/margins": 0.03020293638110161, "rewards/rejected": -0.10413248836994171, "step": 1910 }, { "epoch": 0.56, "learning_rate": 4.886525258209368e-07, "logits/chosen": -2.789874315261841, "logits/rejected": -2.7898380756378174, "logps/chosen": -203.37677001953125, "logps/rejected": -184.8866729736328, "loss": 0.9636, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.08382896333932877, "rewards/margins": 0.03888505697250366, "rewards/rejected": -0.12271402031183243, "step": 1920 }, { "epoch": 0.56, "learning_rate": 4.883983827689896e-07, "logits/chosen": -2.7781100273132324, "logits/rejected": -2.7623000144958496, "logps/chosen": -224.9477081298828, "logps/rejected": -200.4647674560547, "loss": 0.9727, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.07715074717998505, "rewards/margins": 0.026067961007356644, "rewards/rejected": -0.1032186970114708, "step": 1930 }, { "epoch": 0.57, "learning_rate": 4.881414927731608e-07, "logits/chosen": -2.766394853591919, "logits/rejected": -2.778665781021118, "logps/chosen": -213.1951446533203, "logps/rejected": -201.0144500732422, "loss": 0.9584, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.08078450709581375, "rewards/margins": 0.035427432507276535, "rewards/rejected": -0.11621192842721939, "step": 1940 }, { "epoch": 0.57, "learning_rate": 4.878818587934638e-07, "logits/chosen": -2.7382991313934326, "logits/rejected": -2.7401552200317383, "logps/chosen": -182.39071655273438, "logps/rejected": -173.4800567626953, "loss": 0.9522, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.06153035908937454, "rewards/margins": 0.051566898822784424, "rewards/rejected": -0.11309723556041718, "step": 1950 }, { "epoch": 0.57, "learning_rate": 4.876194838215295e-07, "logits/chosen": -2.759721040725708, "logits/rejected": -2.756985902786255, "logps/chosen": -179.3950653076172, "logps/rejected": -173.28369140625, "loss": 0.9635, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0704420655965805, "rewards/margins": 0.03925835341215134, "rewards/rejected": -0.10970041900873184, "step": 1960 }, { "epoch": 0.57, "learning_rate": 4.873543708805718e-07, "logits/chosen": -2.767444133758545, "logits/rejected": -2.728198766708374, "logps/chosen": -218.9180145263672, "logps/rejected": -189.20535278320312, "loss": 0.9681, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.087130606174469, "rewards/margins": 0.035440556704998016, "rewards/rejected": -0.12257115542888641, "step": 1970 }, { "epoch": 0.58, "learning_rate": 4.870865230253532e-07, "logits/chosen": -2.7817773818969727, "logits/rejected": -2.7752878665924072, "logps/chosen": -197.40106201171875, "logps/rejected": -178.65773010253906, "loss": 0.9687, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.08426558971405029, "rewards/margins": 0.043002355843782425, "rewards/rejected": -0.12726792693138123, "step": 1980 }, { "epoch": 0.58, "learning_rate": 4.868159433421485e-07, "logits/chosen": -2.7358546257019043, "logits/rejected": -2.738718032836914, "logps/chosen": -201.25308227539062, "logps/rejected": -195.88258361816406, "loss": 0.9672, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.09167347848415375, "rewards/margins": 0.030366484075784683, "rewards/rejected": -0.12203995883464813, "step": 1990 }, { "epoch": 0.58, "learning_rate": 4.865426349487108e-07, "logits/chosen": -2.8001911640167236, "logits/rejected": -2.7797961235046387, "logps/chosen": -221.62557983398438, "logps/rejected": -200.5688934326172, "loss": 0.9655, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.08506297320127487, "rewards/margins": 0.04144131764769554, "rewards/rejected": -0.12650427222251892, "step": 2000 }, { "epoch": 0.58, "eval_logits/chosen": -2.685004234313965, "eval_logits/rejected": -2.680168390274048, "eval_logps/chosen": -196.42654418945312, "eval_logps/rejected": -182.10122680664062, "eval_loss": 0.9663463234901428, "eval_rewards/accuracies": 0.5756288170814514, "eval_rewards/chosen": -0.08212257921695709, "eval_rewards/margins": 0.03587257117033005, "eval_rewards/rejected": -0.11799515038728714, "eval_runtime": 443.3293, "eval_samples_per_second": 26.538, "eval_steps_per_second": 3.318, "step": 2000 }, { "epoch": 0.59, "learning_rate": 4.862666009942341e-07, "logits/chosen": -2.7604477405548096, "logits/rejected": -2.767010450363159, "logps/chosen": -203.9152069091797, "logps/rejected": -190.85159301757812, "loss": 0.9659, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09103678911924362, "rewards/margins": 0.03718984127044678, "rewards/rejected": -0.1282266229391098, "step": 2010 }, { "epoch": 0.59, "learning_rate": 4.859878446593181e-07, "logits/chosen": -2.7273712158203125, "logits/rejected": -2.7808773517608643, "logps/chosen": -181.61282348632812, "logps/rejected": -196.00958251953125, "loss": 0.9574, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07144895195960999, "rewards/margins": 0.059774626046419144, "rewards/rejected": -0.13122357428073883, "step": 2020 }, { "epoch": 0.59, "learning_rate": 4.857063691559309e-07, "logits/chosen": -2.7482104301452637, "logits/rejected": -2.7654850482940674, "logps/chosen": -194.5123291015625, "logps/rejected": -180.79283142089844, "loss": 0.9564, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.08377393335103989, "rewards/margins": 0.050066135823726654, "rewards/rejected": -0.13384008407592773, "step": 2030 }, { "epoch": 0.6, "learning_rate": 4.854221777273722e-07, "logits/chosen": -2.752758741378784, "logits/rejected": -2.755716562271118, "logps/chosen": -170.12339782714844, "logps/rejected": -163.80215454101562, "loss": 0.9652, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.07802867144346237, "rewards/margins": 0.03308725357055664, "rewards/rejected": -0.11111593246459961, "step": 2040 }, { "epoch": 0.6, "learning_rate": 4.851352736482359e-07, "logits/chosen": -2.743976354598999, "logits/rejected": -2.7437987327575684, "logps/chosen": -209.16943359375, "logps/rejected": -186.8437042236328, "loss": 0.9593, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07512355595827103, "rewards/margins": 0.04379050433635712, "rewards/rejected": -0.11891404539346695, "step": 2050 }, { "epoch": 0.6, "learning_rate": 4.848456602243726e-07, "logits/chosen": -2.76847243309021, "logits/rejected": -2.7415621280670166, "logps/chosen": -215.87521362304688, "logps/rejected": -188.3891143798828, "loss": 0.9646, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09460041671991348, "rewards/margins": 0.02119792439043522, "rewards/rejected": -0.11579833179712296, "step": 2060 }, { "epoch": 0.6, "learning_rate": 4.84553340792851e-07, "logits/chosen": -2.728362560272217, "logits/rejected": -2.7296814918518066, "logps/chosen": -198.44586181640625, "logps/rejected": -184.37933349609375, "loss": 0.971, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10106942802667618, "rewards/margins": 0.027541017159819603, "rewards/rejected": -0.12861044704914093, "step": 2070 }, { "epoch": 0.61, "learning_rate": 4.842583187219201e-07, "logits/chosen": -2.7220025062561035, "logits/rejected": -2.715425491333008, "logps/chosen": -184.50009155273438, "logps/rejected": -179.82077026367188, "loss": 0.97, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08983436971902847, "rewards/margins": 0.02670256420969963, "rewards/rejected": -0.1165369376540184, "step": 2080 }, { "epoch": 0.61, "learning_rate": 4.839605974109698e-07, "logits/chosen": -2.754271984100342, "logits/rejected": -2.7299962043762207, "logps/chosen": -193.97775268554688, "logps/rejected": -174.15480041503906, "loss": 0.9539, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08133591711521149, "rewards/margins": 0.050822339951992035, "rewards/rejected": -0.13215824961662292, "step": 2090 }, { "epoch": 0.61, "learning_rate": 4.836601802904922e-07, "logits/chosen": -2.7427401542663574, "logits/rejected": -2.75071382522583, "logps/chosen": -188.09854125976562, "logps/rejected": -172.60829162597656, "loss": 0.9719, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.09669375419616699, "rewards/margins": 0.03804296255111694, "rewards/rejected": -0.13473671674728394, "step": 2100 }, { "epoch": 0.61, "eval_logits/chosen": -2.6727218627929688, "eval_logits/rejected": -2.6676571369171143, "eval_logps/chosen": -196.51327514648438, "eval_logps/rejected": -182.2023468017578, "eval_loss": 0.9645021557807922, "eval_rewards/accuracies": 0.5676410794258118, "eval_rewards/chosen": -0.09079542011022568, "eval_rewards/margins": 0.037310030311346054, "eval_rewards/rejected": -0.12810543179512024, "eval_runtime": 443.4647, "eval_samples_per_second": 26.53, "eval_steps_per_second": 3.317, "step": 2100 }, { "epoch": 0.62, "learning_rate": 4.833570708220415e-07, "logits/chosen": -2.764371156692505, "logits/rejected": -2.7658863067626953, "logps/chosen": -213.55459594726562, "logps/rejected": -204.02517700195312, "loss": 0.9497, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.08745203167200089, "rewards/margins": 0.05170099064707756, "rewards/rejected": -0.13915303349494934, "step": 2110 }, { "epoch": 0.62, "learning_rate": 4.830512724981947e-07, "logits/chosen": -2.758815288543701, "logits/rejected": -2.7648801803588867, "logps/chosen": -191.48367309570312, "logps/rejected": -177.66061401367188, "loss": 0.9642, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.09611859172582626, "rewards/margins": 0.026732753962278366, "rewards/rejected": -0.12285135686397552, "step": 2120 }, { "epoch": 0.62, "learning_rate": 4.827427888425111e-07, "logits/chosen": -2.7500061988830566, "logits/rejected": -2.7405474185943604, "logps/chosen": -186.7626953125, "logps/rejected": -175.85812377929688, "loss": 0.9665, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.08782060444355011, "rewards/margins": 0.04283968359231949, "rewards/rejected": -0.130660280585289, "step": 2130 }, { "epoch": 0.62, "learning_rate": 4.824316234094918e-07, "logits/chosen": -2.7345998287200928, "logits/rejected": -2.734236478805542, "logps/chosen": -179.12454223632812, "logps/rejected": -177.08853149414062, "loss": 0.9586, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08842668682336807, "rewards/margins": 0.031327299773693085, "rewards/rejected": -0.11975400149822235, "step": 2140 }, { "epoch": 0.63, "learning_rate": 4.821177797845383e-07, "logits/chosen": -2.7678306102752686, "logits/rejected": -2.7621452808380127, "logps/chosen": -210.3707275390625, "logps/rejected": -183.60426330566406, "loss": 0.9621, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09107877314090729, "rewards/margins": 0.04685630649328232, "rewards/rejected": -0.1379350870847702, "step": 2150 }, { "epoch": 0.63, "learning_rate": 4.818012615839122e-07, "logits/chosen": -2.7913317680358887, "logits/rejected": -2.773393154144287, "logps/chosen": -204.22744750976562, "logps/rejected": -177.96424865722656, "loss": 0.9603, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.10166385024785995, "rewards/margins": 0.01808108761906624, "rewards/rejected": -0.11974494159221649, "step": 2160 }, { "epoch": 0.63, "learning_rate": 4.814820724546923e-07, "logits/chosen": -2.7454721927642822, "logits/rejected": -2.732819080352783, "logps/chosen": -180.0576171875, "logps/rejected": -170.5610809326172, "loss": 0.9626, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.09514407813549042, "rewards/margins": 0.03504454344511032, "rewards/rejected": -0.13018861413002014, "step": 2170 }, { "epoch": 0.64, "learning_rate": 4.811602160747332e-07, "logits/chosen": -2.7451462745666504, "logits/rejected": -2.7270007133483887, "logps/chosen": -206.5254364013672, "logps/rejected": -194.58749389648438, "loss": 0.9776, "rewards/accuracies": 0.47187501192092896, "rewards/chosen": -0.1110968142747879, "rewards/margins": 0.020342206582427025, "rewards/rejected": -0.13143901526927948, "step": 2180 }, { "epoch": 0.64, "learning_rate": 4.808356961526233e-07, "logits/chosen": -2.7640247344970703, "logits/rejected": -2.760708808898926, "logps/chosen": -213.62130737304688, "logps/rejected": -186.7675018310547, "loss": 0.9709, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.10413794219493866, "rewards/margins": 0.03650267794728279, "rewards/rejected": -0.14064063131809235, "step": 2190 }, { "epoch": 0.64, "learning_rate": 4.805085164276413e-07, "logits/chosen": -2.7213828563690186, "logits/rejected": -2.7300868034362793, "logps/chosen": -202.58807373046875, "logps/rejected": -183.22645568847656, "loss": 0.9576, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.09867699444293976, "rewards/margins": 0.04763052612543106, "rewards/rejected": -0.14630751311779022, "step": 2200 }, { "epoch": 0.64, "eval_logits/chosen": -2.6729633808135986, "eval_logits/rejected": -2.6679437160491943, "eval_logps/chosen": -196.5585479736328, "eval_logps/rejected": -182.2709197998047, "eval_loss": 0.9625018239021301, "eval_rewards/accuracies": 0.5729095935821533, "eval_rewards/chosen": -0.09532498568296432, "eval_rewards/margins": 0.03963753208518028, "eval_rewards/rejected": -0.1349625140428543, "eval_runtime": 443.5452, "eval_samples_per_second": 26.525, "eval_steps_per_second": 3.316, "step": 2200 }, { "epoch": 0.64, "learning_rate": 4.801786806697134e-07, "logits/chosen": -2.754920721054077, "logits/rejected": -2.762866973876953, "logps/chosen": -196.8423309326172, "logps/rejected": -195.01043701171875, "loss": 0.9552, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.1039787083864212, "rewards/margins": 0.040667999535799026, "rewards/rejected": -0.14464668929576874, "step": 2210 }, { "epoch": 0.65, "learning_rate": 4.798461926793703e-07, "logits/chosen": -2.7615623474121094, "logits/rejected": -2.7736001014709473, "logps/chosen": -209.5021209716797, "logps/rejected": -202.2715301513672, "loss": 0.9615, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1150670200586319, "rewards/margins": 0.040112510323524475, "rewards/rejected": -0.15517953038215637, "step": 2220 }, { "epoch": 0.65, "learning_rate": 4.795110562877026e-07, "logits/chosen": -2.7603373527526855, "logits/rejected": -2.7503154277801514, "logps/chosen": -187.4466552734375, "logps/rejected": -181.63955688476562, "loss": 0.9569, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.11598215252161026, "rewards/margins": 0.035527873784303665, "rewards/rejected": -0.15151001513004303, "step": 2230 }, { "epoch": 0.65, "learning_rate": 4.791732753563174e-07, "logits/chosen": -2.7604167461395264, "logits/rejected": -2.7718167304992676, "logps/chosen": -192.2855682373047, "logps/rejected": -194.53607177734375, "loss": 0.9622, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.12070713192224503, "rewards/margins": 0.022978540509939194, "rewards/rejected": -0.14368568360805511, "step": 2240 }, { "epoch": 0.66, "learning_rate": 4.788328537772933e-07, "logits/chosen": -2.78924298286438, "logits/rejected": -2.799626350402832, "logps/chosen": -204.7958984375, "logps/rejected": -193.9917449951172, "loss": 0.9451, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.09752093255519867, "rewards/margins": 0.055007584393024445, "rewards/rejected": -0.15252849459648132, "step": 2250 }, { "epoch": 0.66, "learning_rate": 4.784897954731359e-07, "logits/chosen": -2.73913836479187, "logits/rejected": -2.7504515647888184, "logps/chosen": -188.52005004882812, "logps/rejected": -196.14346313476562, "loss": 0.9666, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.09183724224567413, "rewards/margins": 0.04959854856133461, "rewards/rejected": -0.14143578708171844, "step": 2260 }, { "epoch": 0.66, "learning_rate": 4.781441043967323e-07, "logits/chosen": -2.7440826892852783, "logits/rejected": -2.760591983795166, "logps/chosen": -203.64828491210938, "logps/rejected": -200.84938049316406, "loss": 0.9672, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.11135001480579376, "rewards/margins": 0.037941206246614456, "rewards/rejected": -0.14929120242595673, "step": 2270 }, { "epoch": 0.67, "learning_rate": 4.777957845313058e-07, "logits/chosen": -2.7185616493225098, "logits/rejected": -2.7239575386047363, "logps/chosen": -179.6100311279297, "logps/rejected": -172.4264678955078, "loss": 0.9655, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.10776621103286743, "rewards/margins": 0.03039861097931862, "rewards/rejected": -0.13816483318805695, "step": 2280 }, { "epoch": 0.67, "learning_rate": 4.7744483989037e-07, "logits/chosen": -2.7633557319641113, "logits/rejected": -2.7489027976989746, "logps/chosen": -192.33351135253906, "logps/rejected": -172.5152130126953, "loss": 0.9547, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.10020127147436142, "rewards/margins": 0.042753200978040695, "rewards/rejected": -0.142954483628273, "step": 2290 }, { "epoch": 0.67, "learning_rate": 4.770912745176822e-07, "logits/chosen": -2.7870826721191406, "logits/rejected": -2.753652572631836, "logps/chosen": -221.02529907226562, "logps/rejected": -181.46377563476562, "loss": 0.9619, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.10642800480127335, "rewards/margins": 0.022537609562277794, "rewards/rejected": -0.1289656013250351, "step": 2300 }, { "epoch": 0.67, "eval_logits/chosen": -2.6579878330230713, "eval_logits/rejected": -2.6526882648468018, "eval_logps/chosen": -196.61697387695312, "eval_logps/rejected": -182.3572235107422, "eval_loss": 0.9603249430656433, "eval_rewards/accuracies": 0.5783480405807495, "eval_rewards/chosen": -0.10116615891456604, "eval_rewards/margins": 0.04242768511176109, "eval_rewards/rejected": -0.14359383285045624, "eval_runtime": 443.5502, "eval_samples_per_second": 26.525, "eval_steps_per_second": 3.316, "step": 2300 }, { "epoch": 0.67, "learning_rate": 4.7673509248719737e-07, "logits/chosen": -2.7467334270477295, "logits/rejected": -2.7274622917175293, "logps/chosen": -189.3142547607422, "logps/rejected": -175.53521728515625, "loss": 0.9534, "rewards/accuracies": 0.578125, "rewards/chosen": -0.11234588921070099, "rewards/margins": 0.029966553673148155, "rewards/rejected": -0.1423124372959137, "step": 2310 }, { "epoch": 0.68, "learning_rate": 4.763762979030205e-07, "logits/chosen": -2.7524502277374268, "logits/rejected": -2.760484218597412, "logps/chosen": -206.99856567382812, "logps/rejected": -195.05409240722656, "loss": 0.9601, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.08634912222623825, "rewards/margins": 0.04963558167219162, "rewards/rejected": -0.13598468899726868, "step": 2320 }, { "epoch": 0.68, "learning_rate": 4.760148948993601e-07, "logits/chosen": -2.780813217163086, "logits/rejected": -2.7848782539367676, "logps/chosen": -194.63572692871094, "logps/rejected": -186.0827178955078, "loss": 0.9648, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11056170612573624, "rewards/margins": 0.04150097444653511, "rewards/rejected": -0.15206268429756165, "step": 2330 }, { "epoch": 0.68, "learning_rate": 4.7565088764047993e-07, "logits/chosen": -2.740149974822998, "logits/rejected": -2.730266571044922, "logps/chosen": -185.96807861328125, "logps/rejected": -171.44134521484375, "loss": 0.9529, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.10003244876861572, "rewards/margins": 0.04513604938983917, "rewards/rejected": -0.1451684981584549, "step": 2340 }, { "epoch": 0.69, "learning_rate": 4.752842803206515e-07, "logits/chosen": -2.768859624862671, "logits/rejected": -2.765820264816284, "logps/chosen": -183.59072875976562, "logps/rejected": -177.88339233398438, "loss": 0.9768, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.13758380711078644, "rewards/margins": 0.017323657870292664, "rewards/rejected": -0.1549074649810791, "step": 2350 }, { "epoch": 0.69, "learning_rate": 4.749150771641053e-07, "logits/chosen": -2.745387315750122, "logits/rejected": -2.743863582611084, "logps/chosen": -192.3404083251953, "logps/rejected": -175.23135375976562, "loss": 0.9608, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.09886828809976578, "rewards/margins": 0.03992761671543121, "rewards/rejected": -0.1387959122657776, "step": 2360 }, { "epoch": 0.69, "learning_rate": 4.7454328242498243e-07, "logits/chosen": -2.7137324810028076, "logits/rejected": -2.741093158721924, "logps/chosen": -190.41773986816406, "logps/rejected": -194.13595581054688, "loss": 0.9667, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.12575829029083252, "rewards/margins": 0.03708643093705177, "rewards/rejected": -0.1628447026014328, "step": 2370 }, { "epoch": 0.69, "learning_rate": 4.7416890038728543e-07, "logits/chosen": -2.7767727375030518, "logits/rejected": -2.7499849796295166, "logps/chosen": -216.6398468017578, "logps/rejected": -189.55067443847656, "loss": 0.952, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.10577182471752167, "rewards/margins": 0.04436280578374863, "rewards/rejected": -0.1501346379518509, "step": 2380 }, { "epoch": 0.7, "learning_rate": 4.73791935364829e-07, "logits/chosen": -2.7677741050720215, "logits/rejected": -2.7710769176483154, "logps/chosen": -196.3310089111328, "logps/rejected": -189.33282470703125, "loss": 0.9504, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.11472728103399277, "rewards/margins": 0.04461818188428879, "rewards/rejected": -0.15934546291828156, "step": 2390 }, { "epoch": 0.7, "learning_rate": 4.734123917011903e-07, "logits/chosen": -2.7767577171325684, "logits/rejected": -2.7674083709716797, "logps/chosen": -196.1704864501953, "logps/rejected": -187.2528533935547, "loss": 0.9511, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.12227902561426163, "rewards/margins": 0.048698049038648605, "rewards/rejected": -0.17097707092761993, "step": 2400 }, { "epoch": 0.7, "eval_logits/chosen": -2.6617283821105957, "eval_logits/rejected": -2.656481981277466, "eval_logps/chosen": -196.710693359375, "eval_logps/rejected": -182.46116638183594, "eval_loss": 0.9600609540939331, "eval_rewards/accuracies": 0.57222980260849, "eval_rewards/chosen": -0.11053957790136337, "eval_rewards/margins": 0.043447259813547134, "eval_rewards/rejected": -0.1539868265390396, "eval_runtime": 443.5718, "eval_samples_per_second": 26.523, "eval_steps_per_second": 3.316, "step": 2400 }, { "epoch": 0.7, "learning_rate": 4.7303027376965874e-07, "logits/chosen": -2.746814012527466, "logits/rejected": -2.731839895248413, "logps/chosen": -209.3233642578125, "logps/rejected": -186.90347290039062, "loss": 0.9485, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.10549162328243256, "rewards/margins": 0.0548577681183815, "rewards/rejected": -0.16034939885139465, "step": 2410 }, { "epoch": 0.71, "learning_rate": 4.726455859731859e-07, "logits/chosen": -2.7607133388519287, "logits/rejected": -2.7557640075683594, "logps/chosen": -180.23956298828125, "logps/rejected": -178.86196899414062, "loss": 0.9443, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.11721359193325043, "rewards/margins": 0.05561182647943497, "rewards/rejected": -0.172825425863266, "step": 2420 }, { "epoch": 0.71, "learning_rate": 4.7225833274433455e-07, "logits/chosen": -2.7584927082061768, "logits/rejected": -2.7558186054229736, "logps/chosen": -189.36634826660156, "logps/rejected": -185.4894256591797, "loss": 0.9589, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.11889169365167618, "rewards/margins": 0.04156076908111572, "rewards/rejected": -0.1604524850845337, "step": 2430 }, { "epoch": 0.71, "learning_rate": 4.718685185452275e-07, "logits/chosen": -2.7491304874420166, "logits/rejected": -2.7356982231140137, "logps/chosen": -193.890380859375, "logps/rejected": -172.22006225585938, "loss": 0.9558, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.11748484522104263, "rewards/margins": 0.04917275533080101, "rewards/rejected": -0.16665759682655334, "step": 2440 }, { "epoch": 0.71, "learning_rate": 4.7147614786749656e-07, "logits/chosen": -2.785374164581299, "logits/rejected": -2.7864601612091064, "logps/chosen": -204.22335815429688, "logps/rejected": -191.0725860595703, "loss": 0.9432, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.12457302957773209, "rewards/margins": 0.047984734177589417, "rewards/rejected": -0.1725577563047409, "step": 2450 }, { "epoch": 0.72, "learning_rate": 4.710812252322303e-07, "logits/chosen": -2.7331464290618896, "logits/rejected": -2.725174903869629, "logps/chosen": -196.36265563964844, "logps/rejected": -177.1366729736328, "loss": 0.966, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.12758466601371765, "rewards/margins": 0.020772898569703102, "rewards/rejected": -0.1483575403690338, "step": 2460 }, { "epoch": 0.72, "learning_rate": 4.706837551899223e-07, "logits/chosen": -2.7659404277801514, "logits/rejected": -2.747095823287964, "logps/chosen": -216.7389373779297, "logps/rejected": -189.22409057617188, "loss": 0.9534, "rewards/accuracies": 0.5625, "rewards/chosen": -0.12142902612686157, "rewards/margins": 0.040458451956510544, "rewards/rejected": -0.16188748180866241, "step": 2470 }, { "epoch": 0.72, "learning_rate": 4.7028374232041877e-07, "logits/chosen": -2.722733736038208, "logits/rejected": -2.7091987133026123, "logps/chosen": -174.03915405273438, "logps/rejected": -176.1728057861328, "loss": 0.9594, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.10743166506290436, "rewards/margins": 0.04605314880609512, "rewards/rejected": -0.15348480641841888, "step": 2480 }, { "epoch": 0.73, "learning_rate": 4.698811912328655e-07, "logits/chosen": -2.7615768909454346, "logits/rejected": -2.772902011871338, "logps/chosen": -179.90347290039062, "logps/rejected": -184.03897094726562, "loss": 0.9568, "rewards/accuracies": 0.578125, "rewards/chosen": -0.11061780154705048, "rewards/margins": 0.05398521572351456, "rewards/rejected": -0.16460299491882324, "step": 2490 }, { "epoch": 0.73, "learning_rate": 4.6947610656565485e-07, "logits/chosen": -2.7560980319976807, "logits/rejected": -2.746638298034668, "logps/chosen": -207.52249145507812, "logps/rejected": -189.99624633789062, "loss": 0.9516, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.11252021789550781, "rewards/margins": 0.053376246243715286, "rewards/rejected": -0.1658964604139328, "step": 2500 }, { "epoch": 0.73, "eval_logits/chosen": -2.6664488315582275, "eval_logits/rejected": -2.6613011360168457, "eval_logps/chosen": -196.7630157470703, "eval_logps/rejected": -182.53892517089844, "eval_loss": 0.957039475440979, "eval_rewards/accuracies": 0.5715499520301819, "eval_rewards/chosen": -0.1157722994685173, "eval_rewards/margins": 0.04599176347255707, "eval_rewards/rejected": -0.16176405549049377, "eval_runtime": 443.4481, "eval_samples_per_second": 26.531, "eval_steps_per_second": 3.317, "step": 2500 }, { "epoch": 0.73, "learning_rate": 4.690684929863723e-07, "logits/chosen": -2.7579269409179688, "logits/rejected": -2.763580083847046, "logps/chosen": -193.8256378173828, "logps/rejected": -179.65748596191406, "loss": 0.967, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.1388111412525177, "rewards/margins": 0.021634388715028763, "rewards/rejected": -0.16044552624225616, "step": 2510 }, { "epoch": 0.74, "learning_rate": 4.68658355191743e-07, "logits/chosen": -2.7525057792663574, "logits/rejected": -2.733154535293579, "logps/chosen": -215.12722778320312, "logps/rejected": -178.46194458007812, "loss": 0.9572, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.12127050012350082, "rewards/margins": 0.050067853182554245, "rewards/rejected": -0.17133836448192596, "step": 2520 }, { "epoch": 0.74, "learning_rate": 4.6824569790757683e-07, "logits/chosen": -2.758488416671753, "logits/rejected": -2.771643877029419, "logps/chosen": -207.5240020751953, "logps/rejected": -196.0047149658203, "loss": 0.96, "rewards/accuracies": 0.59375, "rewards/chosen": -0.13123732805252075, "rewards/margins": 0.05722743272781372, "rewards/rejected": -0.18846476078033447, "step": 2530 }, { "epoch": 0.74, "learning_rate": 4.678305258887151e-07, "logits/chosen": -2.734318256378174, "logits/rejected": -2.7332804203033447, "logps/chosen": -198.74063110351562, "logps/rejected": -180.7388458251953, "loss": 0.9637, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.1199951022863388, "rewards/margins": 0.0437748059630394, "rewards/rejected": -0.1637698858976364, "step": 2540 }, { "epoch": 0.74, "learning_rate": 4.674128439189745e-07, "logits/chosen": -2.738473415374756, "logits/rejected": -2.7565932273864746, "logps/chosen": -199.6444549560547, "logps/rejected": -204.21682739257812, "loss": 0.9474, "rewards/accuracies": 0.5625, "rewards/chosen": -0.12135820090770721, "rewards/margins": 0.0532490611076355, "rewards/rejected": -0.1746072620153427, "step": 2550 }, { "epoch": 0.75, "learning_rate": 4.669926568110932e-07, "logits/chosen": -2.737910747528076, "logits/rejected": -2.720459461212158, "logps/chosen": -183.45167541503906, "logps/rejected": -174.4886474609375, "loss": 0.9577, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.12640735507011414, "rewards/margins": 0.04174577444791794, "rewards/rejected": -0.16815313696861267, "step": 2560 }, { "epoch": 0.75, "learning_rate": 4.6656996940667436e-07, "logits/chosen": -2.728344440460205, "logits/rejected": -2.7403762340545654, "logps/chosen": -180.70962524414062, "logps/rejected": -175.8680419921875, "loss": 0.95, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.11759184300899506, "rewards/margins": 0.053888868540525436, "rewards/rejected": -0.17148073017597198, "step": 2570 }, { "epoch": 0.75, "learning_rate": 4.661447865761311e-07, "logits/chosen": -2.731985569000244, "logits/rejected": -2.7103681564331055, "logps/chosen": -197.1103057861328, "logps/rejected": -169.7650909423828, "loss": 0.9609, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.13190656900405884, "rewards/margins": 0.030604299157857895, "rewards/rejected": -0.16251085698604584, "step": 2580 }, { "epoch": 0.76, "learning_rate": 4.6571711321862977e-07, "logits/chosen": -2.730292558670044, "logits/rejected": -2.730025053024292, "logps/chosen": -169.65896606445312, "logps/rejected": -171.60116577148438, "loss": 0.9588, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.13009211421012878, "rewards/margins": 0.03937167674303055, "rewards/rejected": -0.16946378350257874, "step": 2590 }, { "epoch": 0.76, "learning_rate": 4.652869542620341e-07, "logits/chosen": -2.709564447402954, "logits/rejected": -2.7069900035858154, "logps/chosen": -187.7020721435547, "logps/rejected": -160.48951721191406, "loss": 0.9577, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.10107941925525665, "rewards/margins": 0.047139011323451996, "rewards/rejected": -0.14821843802928925, "step": 2600 }, { "epoch": 0.76, "eval_logits/chosen": -2.664644718170166, "eval_logits/rejected": -2.6595005989074707, "eval_logps/chosen": -196.8412628173828, "eval_logps/rejected": -182.63865661621094, "eval_loss": 0.9554187655448914, "eval_rewards/accuracies": 0.5718898773193359, "eval_rewards/chosen": -0.12359517812728882, "eval_rewards/margins": 0.0481419675052166, "eval_rewards/rejected": -0.1717371642589569, "eval_runtime": 443.3935, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 2600 }, { "epoch": 0.76, "learning_rate": 4.64854314662848e-07, "logits/chosen": -2.7113401889801025, "logits/rejected": -2.726046323776245, "logps/chosen": -177.04306030273438, "logps/rejected": -169.5409698486328, "loss": 0.9594, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.13913336396217346, "rewards/margins": 0.03179800882935524, "rewards/rejected": -0.1709313690662384, "step": 2610 }, { "epoch": 0.76, "learning_rate": 4.644191994061584e-07, "logits/chosen": -2.7433714866638184, "logits/rejected": -2.7490386962890625, "logps/chosen": -198.26492309570312, "logps/rejected": -195.8823699951172, "loss": 0.9551, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.12107695639133453, "rewards/margins": 0.05229382589459419, "rewards/rejected": -0.17337077856063843, "step": 2620 }, { "epoch": 0.77, "learning_rate": 4.639816135055783e-07, "logits/chosen": -2.7246880531311035, "logits/rejected": -2.7300329208374023, "logps/chosen": -187.84384155273438, "logps/rejected": -170.01132202148438, "loss": 0.9486, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.11736071109771729, "rewards/margins": 0.04356401413679123, "rewards/rejected": -0.16092471778392792, "step": 2630 }, { "epoch": 0.77, "learning_rate": 4.635415620031885e-07, "logits/chosen": -2.7280688285827637, "logits/rejected": -2.7262918949127197, "logps/chosen": -199.6859893798828, "logps/rejected": -175.3978271484375, "loss": 0.963, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1443082094192505, "rewards/margins": 0.03426407650113106, "rewards/rejected": -0.17857226729393005, "step": 2640 }, { "epoch": 0.77, "learning_rate": 4.6309904996947966e-07, "logits/chosen": -2.7617874145507812, "logits/rejected": -2.779418706893921, "logps/chosen": -204.58631896972656, "logps/rejected": -197.4747314453125, "loss": 0.95, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.12481949478387833, "rewards/margins": 0.058627624064683914, "rewards/rejected": -0.18344712257385254, "step": 2650 }, { "epoch": 0.78, "learning_rate": 4.626540825032939e-07, "logits/chosen": -2.779951572418213, "logits/rejected": -2.7412378787994385, "logps/chosen": -213.5417022705078, "logps/rejected": -175.49853515625, "loss": 0.961, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14209267497062683, "rewards/margins": 0.045297764241695404, "rewards/rejected": -0.18739044666290283, "step": 2660 }, { "epoch": 0.78, "learning_rate": 4.622066647317662e-07, "logits/chosen": -2.7636003494262695, "logits/rejected": -2.745131731033325, "logps/chosen": -195.9354248046875, "logps/rejected": -181.21420288085938, "loss": 0.962, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1278540939092636, "rewards/margins": 0.03553072363138199, "rewards/rejected": -0.163384810090065, "step": 2670 }, { "epoch": 0.78, "learning_rate": 4.617568018102649e-07, "logits/chosen": -2.8005611896514893, "logits/rejected": -2.7828357219696045, "logps/chosen": -214.6331329345703, "logps/rejected": -191.2391357421875, "loss": 0.9445, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13087083399295807, "rewards/margins": 0.05404329299926758, "rewards/rejected": -0.18491411209106445, "step": 2680 }, { "epoch": 0.78, "learning_rate": 4.613044989223327e-07, "logits/chosen": -2.764817953109741, "logits/rejected": -2.769461154937744, "logps/chosen": -198.07208251953125, "logps/rejected": -188.2695770263672, "loss": 0.9588, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.12125615030527115, "rewards/margins": 0.041098788380622864, "rewards/rejected": -0.1623549461364746, "step": 2690 }, { "epoch": 0.79, "learning_rate": 4.6084976127962694e-07, "logits/chosen": -2.7548482418060303, "logits/rejected": -2.7405712604522705, "logps/chosen": -190.39096069335938, "logps/rejected": -180.38926696777344, "loss": 0.9471, "rewards/accuracies": 0.59375, "rewards/chosen": -0.12370000034570694, "rewards/margins": 0.05232849717140198, "rewards/rejected": -0.17602849006652832, "step": 2700 }, { "epoch": 0.79, "eval_logits/chosen": -2.6672322750091553, "eval_logits/rejected": -2.6621241569519043, "eval_logps/chosen": -196.8730926513672, "eval_logps/rejected": -182.68402099609375, "eval_loss": 0.9541336297988892, "eval_rewards/accuracies": 0.5735893845558167, "eval_rewards/chosen": -0.1267787367105484, "eval_rewards/margins": 0.04949454218149185, "eval_rewards/rejected": -0.17627327144145966, "eval_runtime": 443.3932, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 2700 }, { "epoch": 0.79, "learning_rate": 4.603925941218593e-07, "logits/chosen": -2.731152296066284, "logits/rejected": -2.7341597080230713, "logps/chosen": -212.43447875976562, "logps/rejected": -197.4214630126953, "loss": 0.9455, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1260325014591217, "rewards/margins": 0.06015104055404663, "rewards/rejected": -0.18618355691432953, "step": 2710 }, { "epoch": 0.79, "learning_rate": 4.5993300271673535e-07, "logits/chosen": -2.74223256111145, "logits/rejected": -2.7303566932678223, "logps/chosen": -188.73492431640625, "logps/rejected": -185.53005981445312, "loss": 0.9645, "rewards/accuracies": 0.5625, "rewards/chosen": -0.15222349762916565, "rewards/margins": 0.033357687294483185, "rewards/rejected": -0.18558119237422943, "step": 2720 }, { "epoch": 0.8, "learning_rate": 4.5947099235989426e-07, "logits/chosen": -2.741570472717285, "logits/rejected": -2.756967067718506, "logps/chosen": -202.7544403076172, "logps/rejected": -191.4830322265625, "loss": 0.9544, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.1407163292169571, "rewards/margins": 0.0526912696659565, "rewards/rejected": -0.1934075951576233, "step": 2730 }, { "epoch": 0.8, "learning_rate": 4.590065683748476e-07, "logits/chosen": -2.761409044265747, "logits/rejected": -2.759230852127075, "logps/chosen": -190.07643127441406, "logps/rejected": -184.67315673828125, "loss": 0.9524, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.13507774472236633, "rewards/margins": 0.043145522475242615, "rewards/rejected": -0.17822325229644775, "step": 2740 }, { "epoch": 0.8, "learning_rate": 4.585397361129177e-07, "logits/chosen": -2.7489280700683594, "logits/rejected": -2.730376720428467, "logps/chosen": -191.56483459472656, "logps/rejected": -171.06887817382812, "loss": 0.9452, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.13877546787261963, "rewards/margins": 0.05284839868545532, "rewards/rejected": -0.19162388145923615, "step": 2750 }, { "epoch": 0.81, "learning_rate": 4.5807050095317643e-07, "logits/chosen": -2.7506136894226074, "logits/rejected": -2.760101795196533, "logps/chosen": -185.88739013671875, "logps/rejected": -184.04281616210938, "loss": 0.9573, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.15197591483592987, "rewards/margins": 0.04332758113741875, "rewards/rejected": -0.1953035145998001, "step": 2760 }, { "epoch": 0.81, "learning_rate": 4.575988683023831e-07, "logits/chosen": -2.750823736190796, "logits/rejected": -2.737844467163086, "logps/chosen": -186.29476928710938, "logps/rejected": -161.99533081054688, "loss": 0.9633, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.1368028223514557, "rewards/margins": 0.04006402567028999, "rewards/rejected": -0.17686684429645538, "step": 2770 }, { "epoch": 0.81, "learning_rate": 4.5712484359492185e-07, "logits/chosen": -2.755030870437622, "logits/rejected": -2.7267005443573, "logps/chosen": -188.34628295898438, "logps/rejected": -176.9173583984375, "loss": 0.9636, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.14116336405277252, "rewards/margins": 0.041894249618053436, "rewards/rejected": -0.18305759131908417, "step": 2780 }, { "epoch": 0.81, "learning_rate": 4.5664843229273954e-07, "logits/chosen": -2.727405309677124, "logits/rejected": -2.7636685371398926, "logps/chosen": -185.30831909179688, "logps/rejected": -189.11410522460938, "loss": 0.9475, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.11881668865680695, "rewards/margins": 0.062104128301143646, "rewards/rejected": -0.18092080950737, "step": 2790 }, { "epoch": 0.82, "learning_rate": 4.561696398852823e-07, "logits/chosen": -2.721590995788574, "logits/rejected": -2.7140235900878906, "logps/chosen": -187.95919799804688, "logps/rejected": -173.23416137695312, "loss": 0.9519, "rewards/accuracies": 0.546875, "rewards/chosen": -0.13266701996326447, "rewards/margins": 0.05463450402021408, "rewards/rejected": -0.18730153143405914, "step": 2800 }, { "epoch": 0.82, "eval_logits/chosen": -2.681025981903076, "eval_logits/rejected": -2.6762020587921143, "eval_logps/chosen": -196.94139099121094, "eval_logps/rejected": -182.77047729492188, "eval_loss": 0.9523664712905884, "eval_rewards/accuracies": 0.5737593770027161, "eval_rewards/chosen": -0.13360761106014252, "eval_rewards/margins": 0.051313430070877075, "eval_rewards/rejected": -0.1849210411310196, "eval_runtime": 443.4513, "eval_samples_per_second": 26.531, "eval_steps_per_second": 3.317, "step": 2800 }, { "epoch": 0.82, "learning_rate": 4.556884718894327e-07, "logits/chosen": -2.7339940071105957, "logits/rejected": -2.716157913208008, "logps/chosen": -201.26852416992188, "logps/rejected": -188.23138427734375, "loss": 0.9512, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.14071759581565857, "rewards/margins": 0.039004649966955185, "rewards/rejected": -0.17972226440906525, "step": 2810 }, { "epoch": 0.82, "learning_rate": 4.5520493384944614e-07, "logits/chosen": -2.7608866691589355, "logits/rejected": -2.7515382766723633, "logps/chosen": -208.635009765625, "logps/rejected": -188.59561157226562, "loss": 0.9467, "rewards/accuracies": 0.578125, "rewards/chosen": -0.12841913104057312, "rewards/margins": 0.06464473158121109, "rewards/rejected": -0.193063884973526, "step": 2820 }, { "epoch": 0.83, "learning_rate": 4.547190313368865e-07, "logits/chosen": -2.750911235809326, "logits/rejected": -2.7666754722595215, "logps/chosen": -187.2324981689453, "logps/rejected": -188.1076202392578, "loss": 0.9577, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.13645856082439423, "rewards/margins": 0.05990775674581528, "rewards/rejected": -0.1963663399219513, "step": 2830 }, { "epoch": 0.83, "learning_rate": 4.5423076995056255e-07, "logits/chosen": -2.72402024269104, "logits/rejected": -2.7575836181640625, "logps/chosen": -180.29605102539062, "logps/rejected": -186.4771728515625, "loss": 0.9581, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1337813436985016, "rewards/margins": 0.03869408741593361, "rewards/rejected": -0.1724754273891449, "step": 2840 }, { "epoch": 0.83, "learning_rate": 4.537401553164629e-07, "logits/chosen": -2.7499241828918457, "logits/rejected": -2.77961802482605, "logps/chosen": -180.1635284423828, "logps/rejected": -189.73565673828125, "loss": 0.9433, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.14217296242713928, "rewards/margins": 0.06596062332391739, "rewards/rejected": -0.20813357830047607, "step": 2850 }, { "epoch": 0.83, "learning_rate": 4.532471930876919e-07, "logits/chosen": -2.756718158721924, "logits/rejected": -2.7450459003448486, "logps/chosen": -214.15200805664062, "logps/rejected": -197.15908813476562, "loss": 0.9513, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1372036188840866, "rewards/margins": 0.05283326655626297, "rewards/rejected": -0.19003687798976898, "step": 2860 }, { "epoch": 0.84, "learning_rate": 4.527518889444035e-07, "logits/chosen": -2.746441602706909, "logits/rejected": -2.756840229034424, "logps/chosen": -189.22317504882812, "logps/rejected": -187.7545166015625, "loss": 0.9476, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.13749714195728302, "rewards/margins": 0.06224555894732475, "rewards/rejected": -0.19974270462989807, "step": 2870 }, { "epoch": 0.84, "learning_rate": 4.5225424859373684e-07, "logits/chosen": -2.7592597007751465, "logits/rejected": -2.7454373836517334, "logps/chosen": -192.59786987304688, "logps/rejected": -177.7920684814453, "loss": 0.9703, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.14666634798049927, "rewards/margins": 0.03436069190502167, "rewards/rejected": -0.18102705478668213, "step": 2880 }, { "epoch": 0.84, "learning_rate": 4.517542777697496e-07, "logits/chosen": -2.7579264640808105, "logits/rejected": -2.717677593231201, "logps/chosen": -205.26953125, "logps/rejected": -170.2499237060547, "loss": 0.9531, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.12789583206176758, "rewards/margins": 0.052336446940898895, "rewards/rejected": -0.18023227155208588, "step": 2890 }, { "epoch": 0.85, "learning_rate": 4.512519822333525e-07, "logits/chosen": -2.740565299987793, "logits/rejected": -2.716698169708252, "logps/chosen": -222.6794891357422, "logps/rejected": -183.50570678710938, "loss": 0.9522, "rewards/accuracies": 0.515625, "rewards/chosen": -0.14425984025001526, "rewards/margins": 0.031176995486021042, "rewards/rejected": -0.1754368394613266, "step": 2900 }, { "epoch": 0.85, "eval_logits/chosen": -2.6654722690582275, "eval_logits/rejected": -2.660351037979126, "eval_logps/chosen": -196.96963500976562, "eval_logps/rejected": -182.81700134277344, "eval_loss": 0.9514912962913513, "eval_rewards/accuracies": 0.5723997354507446, "eval_rewards/chosen": -0.13643421232700348, "eval_rewards/margins": 0.05313733592629433, "eval_rewards/rejected": -0.1895715445280075, "eval_runtime": 443.3239, "eval_samples_per_second": 26.538, "eval_steps_per_second": 3.318, "step": 2900 }, { "epoch": 0.85, "learning_rate": 4.507473677722428e-07, "logits/chosen": -2.754894733428955, "logits/rejected": -2.723676919937134, "logps/chosen": -225.0426025390625, "logps/rejected": -184.7257843017578, "loss": 0.9566, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.13445214927196503, "rewards/margins": 0.03854208067059517, "rewards/rejected": -0.1729942411184311, "step": 2910 }, { "epoch": 0.85, "learning_rate": 4.502404402008374e-07, "logits/chosen": -2.7293784618377686, "logits/rejected": -2.7325520515441895, "logps/chosen": -184.0533905029297, "logps/rejected": -168.29550170898438, "loss": 0.9458, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.12156828492879868, "rewards/margins": 0.05183269828557968, "rewards/rejected": -0.17340096831321716, "step": 2920 }, { "epoch": 0.85, "learning_rate": 4.4973120536020623e-07, "logits/chosen": -2.7345995903015137, "logits/rejected": -2.755232572555542, "logps/chosen": -181.9169158935547, "logps/rejected": -173.37290954589844, "loss": 0.9411, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.12848785519599915, "rewards/margins": 0.05240977555513382, "rewards/rejected": -0.18089762330055237, "step": 2930 }, { "epoch": 0.86, "learning_rate": 4.4921966911800446e-07, "logits/chosen": -2.768277645111084, "logits/rejected": -2.783160924911499, "logps/chosen": -200.125, "logps/rejected": -195.50100708007812, "loss": 0.9381, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14732368290424347, "rewards/margins": 0.04739189147949219, "rewards/rejected": -0.19471557438373566, "step": 2940 }, { "epoch": 0.86, "learning_rate": 4.487058373684052e-07, "logits/chosen": -2.760740280151367, "logits/rejected": -2.747847557067871, "logps/chosen": -190.6141815185547, "logps/rejected": -180.71969604492188, "loss": 0.936, "rewards/accuracies": 0.5625, "rewards/chosen": -0.13626626133918762, "rewards/margins": 0.05940115451812744, "rewards/rejected": -0.19566740095615387, "step": 2950 }, { "epoch": 0.86, "learning_rate": 4.4818971603203174e-07, "logits/chosen": -2.764630079269409, "logits/rejected": -2.7455592155456543, "logps/chosen": -207.49392700195312, "logps/rejected": -179.84259033203125, "loss": 0.9377, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.14047829806804657, "rewards/margins": 0.05755741521716118, "rewards/rejected": -0.19803571701049805, "step": 2960 }, { "epoch": 0.87, "learning_rate": 4.4767131105588885e-07, "logits/chosen": -2.747560739517212, "logits/rejected": -2.7539334297180176, "logps/chosen": -187.84457397460938, "logps/rejected": -182.74752807617188, "loss": 0.9376, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.14399246871471405, "rewards/margins": 0.07578577101230621, "rewards/rejected": -0.21977825462818146, "step": 2970 }, { "epoch": 0.87, "learning_rate": 4.471506284132948e-07, "logits/chosen": -2.7605197429656982, "logits/rejected": -2.7485010623931885, "logps/chosen": -203.68710327148438, "logps/rejected": -191.30636596679688, "loss": 0.9585, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15663865208625793, "rewards/margins": 0.03506855294108391, "rewards/rejected": -0.19170720875263214, "step": 2980 }, { "epoch": 0.87, "learning_rate": 4.466276741038118e-07, "logits/chosen": -2.731755256652832, "logits/rejected": -2.7256150245666504, "logps/chosen": -183.74453735351562, "logps/rejected": -173.6746368408203, "loss": 0.9604, "rewards/accuracies": 0.515625, "rewards/chosen": -0.16540805995464325, "rewards/margins": 0.03276645019650459, "rewards/rejected": -0.19817450642585754, "step": 2990 }, { "epoch": 0.88, "learning_rate": 4.461024541531779e-07, "logits/chosen": -2.7541847229003906, "logits/rejected": -2.7689316272735596, "logps/chosen": -212.4781951904297, "logps/rejected": -209.14669799804688, "loss": 0.9414, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.1305633783340454, "rewards/margins": 0.06333880126476288, "rewards/rejected": -0.19390219449996948, "step": 3000 }, { "epoch": 0.88, "eval_logits/chosen": -2.675511360168457, "eval_logits/rejected": -2.67060923576355, "eval_logps/chosen": -197.00001525878906, "eval_logps/rejected": -182.87057495117188, "eval_loss": 0.9491233229637146, "eval_rewards/accuracies": 0.5744391679763794, "eval_rewards/chosen": -0.13946956396102905, "eval_rewards/margins": 0.05546097829937935, "eval_rewards/rejected": -0.1949305385351181, "eval_runtime": 443.5187, "eval_samples_per_second": 26.527, "eval_steps_per_second": 3.317, "step": 3000 }, { "epoch": 0.88, "learning_rate": 4.455749746132366e-07, "logits/chosen": -2.7444159984588623, "logits/rejected": -2.7448792457580566, "logps/chosen": -190.2972412109375, "logps/rejected": -169.44912719726562, "loss": 0.9336, "rewards/accuracies": 0.5625, "rewards/chosen": -0.14413055777549744, "rewards/margins": 0.049498431384563446, "rewards/rejected": -0.19362899661064148, "step": 3010 }, { "epoch": 0.88, "learning_rate": 4.4504524156186763e-07, "logits/chosen": -2.7625129222869873, "logits/rejected": -2.753511905670166, "logps/chosen": -179.7624053955078, "logps/rejected": -167.31228637695312, "loss": 0.9557, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13771577179431915, "rewards/margins": 0.04483487457036972, "rewards/rejected": -0.18255063891410828, "step": 3020 }, { "epoch": 0.88, "learning_rate": 4.4451326110291675e-07, "logits/chosen": -2.7575528621673584, "logits/rejected": -2.7579586505889893, "logps/chosen": -196.14010620117188, "logps/rejected": -191.85595703125, "loss": 0.942, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.15023425221443176, "rewards/margins": 0.06083030626177788, "rewards/rejected": -0.21106454730033875, "step": 3030 }, { "epoch": 0.89, "learning_rate": 4.439790393661256e-07, "logits/chosen": -2.7624666690826416, "logits/rejected": -2.76863956451416, "logps/chosen": -193.34310913085938, "logps/rejected": -193.1276092529297, "loss": 0.9555, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.15546974539756775, "rewards/margins": 0.06791901588439941, "rewards/rejected": -0.22338874638080597, "step": 3040 }, { "epoch": 0.89, "learning_rate": 4.4344258250706066e-07, "logits/chosen": -2.7380971908569336, "logits/rejected": -2.7569963932037354, "logps/chosen": -181.44793701171875, "logps/rejected": -173.0232391357422, "loss": 0.9425, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.14029046893119812, "rewards/margins": 0.06983927637338638, "rewards/rejected": -0.2101297378540039, "step": 3050 }, { "epoch": 0.89, "learning_rate": 4.429038967070429e-07, "logits/chosen": -2.748533010482788, "logits/rejected": -2.738431215286255, "logps/chosen": -197.2476348876953, "logps/rejected": -192.76535034179688, "loss": 0.9452, "rewards/accuracies": 0.546875, "rewards/chosen": -0.15091857314109802, "rewards/margins": 0.05640494078397751, "rewards/rejected": -0.20732350647449493, "step": 3060 }, { "epoch": 0.9, "learning_rate": 4.423629881730759e-07, "logits/chosen": -2.7502188682556152, "logits/rejected": -2.7514498233795166, "logps/chosen": -206.1704864501953, "logps/rejected": -199.50942993164062, "loss": 0.9488, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.16290298104286194, "rewards/margins": 0.060902971774339676, "rewards/rejected": -0.22380594909191132, "step": 3070 }, { "epoch": 0.9, "learning_rate": 4.418198631377751e-07, "logits/chosen": -2.752500295639038, "logits/rejected": -2.7435920238494873, "logps/chosen": -190.84780883789062, "logps/rejected": -180.72679138183594, "loss": 0.9516, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.13359172642230988, "rewards/margins": 0.06508227437734604, "rewards/rejected": -0.19867399334907532, "step": 3080 }, { "epoch": 0.9, "learning_rate": 4.412745278592952e-07, "logits/chosen": -2.7263596057891846, "logits/rejected": -2.739581346511841, "logps/chosen": -199.06126403808594, "logps/rejected": -200.32713317871094, "loss": 0.9414, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1425991952419281, "rewards/margins": 0.06533292680978775, "rewards/rejected": -0.20793208479881287, "step": 3090 }, { "epoch": 0.9, "learning_rate": 4.407269886212586e-07, "logits/chosen": -2.72841215133667, "logits/rejected": -2.7648680210113525, "logps/chosen": -184.0097198486328, "logps/rejected": -183.879150390625, "loss": 0.9509, "rewards/accuracies": 0.609375, "rewards/chosen": -0.1576574146747589, "rewards/margins": 0.05096912384033203, "rewards/rejected": -0.20862650871276855, "step": 3100 }, { "epoch": 0.9, "eval_logits/chosen": -2.6625335216522217, "eval_logits/rejected": -2.657381534576416, "eval_logps/chosen": -197.05508422851562, "eval_logps/rejected": -182.9411163330078, "eval_loss": 0.9482882618904114, "eval_rewards/accuracies": 0.5798776149749756, "eval_rewards/chosen": -0.14497847855091095, "eval_rewards/margins": 0.057004500180482864, "eval_rewards/rejected": -0.2019829899072647, "eval_runtime": 443.4978, "eval_samples_per_second": 26.528, "eval_steps_per_second": 3.317, "step": 3100 }, { "epoch": 0.91, "learning_rate": 4.401772517326827e-07, "logits/chosen": -2.743058204650879, "logits/rejected": -2.736036777496338, "logps/chosen": -200.5432586669922, "logps/rejected": -182.67279052734375, "loss": 0.9512, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.16279372572898865, "rewards/margins": 0.03614386171102524, "rewards/rejected": -0.1989375799894333, "step": 3110 }, { "epoch": 0.91, "learning_rate": 4.3962532352790746e-07, "logits/chosen": -2.7184717655181885, "logits/rejected": -2.7647883892059326, "logps/chosen": -194.09921264648438, "logps/rejected": -216.4038848876953, "loss": 0.9343, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.1559465378522873, "rewards/margins": 0.08612390607595444, "rewards/rejected": -0.24207043647766113, "step": 3120 }, { "epoch": 0.91, "learning_rate": 4.390712103665222e-07, "logits/chosen": -2.726545810699463, "logits/rejected": -2.714825391769409, "logps/chosen": -193.67857360839844, "logps/rejected": -175.10110473632812, "loss": 0.9599, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.15397819876670837, "rewards/margins": 0.04705255106091499, "rewards/rejected": -0.20103076100349426, "step": 3130 }, { "epoch": 0.92, "learning_rate": 4.385149186332923e-07, "logits/chosen": -2.766066551208496, "logits/rejected": -2.7510006427764893, "logps/chosen": -197.1205596923828, "logps/rejected": -180.7372589111328, "loss": 0.942, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.15654006600379944, "rewards/margins": 0.056927651166915894, "rewards/rejected": -0.21346771717071533, "step": 3140 }, { "epoch": 0.92, "learning_rate": 4.379564547380858e-07, "logits/chosen": -2.735912799835205, "logits/rejected": -2.7432703971862793, "logps/chosen": -193.31373596191406, "logps/rejected": -179.3289337158203, "loss": 0.9546, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.14612646400928497, "rewards/margins": 0.04988929629325867, "rewards/rejected": -0.19601577520370483, "step": 3150 }, { "epoch": 0.92, "learning_rate": 4.373958251157995e-07, "logits/chosen": -2.728239059448242, "logits/rejected": -2.742741346359253, "logps/chosen": -182.5937957763672, "logps/rejected": -183.79812622070312, "loss": 0.9485, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.16978135704994202, "rewards/margins": 0.04071738198399544, "rewards/rejected": -0.21049876511096954, "step": 3160 }, { "epoch": 0.92, "learning_rate": 4.3683303622628467e-07, "logits/chosen": -2.7599949836730957, "logits/rejected": -2.7678418159484863, "logps/chosen": -205.2645721435547, "logps/rejected": -194.56536865234375, "loss": 0.9322, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.14348538219928741, "rewards/margins": 0.09094350785017014, "rewards/rejected": -0.23442888259887695, "step": 3170 }, { "epoch": 0.93, "learning_rate": 4.3626809455427284e-07, "logits/chosen": -2.7088184356689453, "logits/rejected": -2.737748622894287, "logps/chosen": -182.29830932617188, "logps/rejected": -185.71241760253906, "loss": 0.9458, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1563442051410675, "rewards/margins": 0.06264548003673553, "rewards/rejected": -0.21898965537548065, "step": 3180 }, { "epoch": 0.93, "learning_rate": 4.357010066093009e-07, "logits/chosen": -2.7586493492126465, "logits/rejected": -2.738326072692871, "logps/chosen": -212.00320434570312, "logps/rejected": -179.95960998535156, "loss": 0.9345, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.13312071561813354, "rewards/margins": 0.07820292562246323, "rewards/rejected": -0.21132364869117737, "step": 3190 }, { "epoch": 0.93, "learning_rate": 4.351317789256361e-07, "logits/chosen": -2.725133180618286, "logits/rejected": -2.711580753326416, "logps/chosen": -213.65029907226562, "logps/rejected": -186.83413696289062, "loss": 0.9453, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.15071360766887665, "rewards/margins": 0.07792054116725922, "rewards/rejected": -0.22863414883613586, "step": 3200 }, { "epoch": 0.93, "eval_logits/chosen": -2.647813558578491, "eval_logits/rejected": -2.6423654556274414, "eval_logps/chosen": -197.0772247314453, "eval_logps/rejected": -182.98223876953125, "eval_loss": 0.9472061395645142, "eval_rewards/accuracies": 0.5834466218948364, "eval_rewards/chosen": -0.14719060063362122, "eval_rewards/margins": 0.05890314280986786, "eval_rewards/rejected": -0.20609375834465027, "eval_runtime": 443.5189, "eval_samples_per_second": 26.526, "eval_steps_per_second": 3.317, "step": 3200 }, { "epoch": 0.94, "learning_rate": 4.3456041806220105e-07, "logits/chosen": -2.710390090942383, "logits/rejected": -2.732726573944092, "logps/chosen": -174.5506134033203, "logps/rejected": -184.31295776367188, "loss": 0.9507, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.14880771934986115, "rewards/margins": 0.04055916517972946, "rewards/rejected": -0.18936687707901, "step": 3210 }, { "epoch": 0.94, "learning_rate": 4.3398693060249757e-07, "logits/chosen": -2.7572438716888428, "logits/rejected": -2.780435562133789, "logps/chosen": -205.3828582763672, "logps/rejected": -194.72537231445312, "loss": 0.9523, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.159606471657753, "rewards/margins": 0.03793240338563919, "rewards/rejected": -0.19753886759281158, "step": 3220 }, { "epoch": 0.94, "learning_rate": 4.334113231545314e-07, "logits/chosen": -2.7653536796569824, "logits/rejected": -2.7734832763671875, "logps/chosen": -209.9667205810547, "logps/rejected": -196.0996856689453, "loss": 0.9555, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1710664927959442, "rewards/margins": 0.03884775564074516, "rewards/rejected": -0.20991425216197968, "step": 3230 }, { "epoch": 0.95, "learning_rate": 4.3283360235073584e-07, "logits/chosen": -2.781524181365967, "logits/rejected": -2.7697031497955322, "logps/chosen": -213.2795867919922, "logps/rejected": -185.36959838867188, "loss": 0.96, "rewards/accuracies": 0.546875, "rewards/chosen": -0.16060218214988708, "rewards/margins": 0.04485396295785904, "rewards/rejected": -0.20545610785484314, "step": 3240 }, { "epoch": 0.95, "learning_rate": 4.322537748478952e-07, "logits/chosen": -2.727961301803589, "logits/rejected": -2.7541677951812744, "logps/chosen": -183.5811309814453, "logps/rejected": -186.97708129882812, "loss": 0.9641, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.16682754456996918, "rewards/margins": 0.05004224181175232, "rewards/rejected": -0.2168697863817215, "step": 3250 }, { "epoch": 0.95, "learning_rate": 4.3167184732706825e-07, "logits/chosen": -2.712656021118164, "logits/rejected": -2.727965831756592, "logps/chosen": -190.7420654296875, "logps/rejected": -185.13853454589844, "loss": 0.9528, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.16370153427124023, "rewards/margins": 0.04779518395662308, "rewards/rejected": -0.2114967405796051, "step": 3260 }, { "epoch": 0.95, "learning_rate": 4.310878264935113e-07, "logits/chosen": -2.74177885055542, "logits/rejected": -2.743264675140381, "logps/chosen": -176.8439483642578, "logps/rejected": -179.67626953125, "loss": 0.9477, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.15423059463500977, "rewards/margins": 0.049925509840250015, "rewards/rejected": -0.20415611565113068, "step": 3270 }, { "epoch": 0.96, "learning_rate": 4.305017190766006e-07, "logits/chosen": -2.724313497543335, "logits/rejected": -2.76788067817688, "logps/chosen": -168.16397094726562, "logps/rejected": -183.28399658203125, "loss": 0.943, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.14488880336284637, "rewards/margins": 0.07369550317525864, "rewards/rejected": -0.21858429908752441, "step": 3280 }, { "epoch": 0.96, "learning_rate": 4.2991353182975545e-07, "logits/chosen": -2.7469019889831543, "logits/rejected": -2.71189546585083, "logps/chosen": -204.70333862304688, "logps/rejected": -180.01437377929688, "loss": 0.9454, "rewards/accuracies": 0.484375, "rewards/chosen": -0.13744714856147766, "rewards/margins": 0.05579395964741707, "rewards/rejected": -0.19324110448360443, "step": 3290 }, { "epoch": 0.96, "learning_rate": 4.293232715303595e-07, "logits/chosen": -2.7478322982788086, "logits/rejected": -2.740370988845825, "logps/chosen": -197.2376251220703, "logps/rejected": -188.1304168701172, "loss": 0.9577, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1363435834646225, "rewards/margins": 0.036770425736904144, "rewards/rejected": -0.17311401665210724, "step": 3300 }, { "epoch": 0.96, "eval_logits/chosen": -2.662210702896118, "eval_logits/rejected": -2.65704345703125, "eval_logps/chosen": -197.0956268310547, "eval_logps/rejected": -183.0017852783203, "eval_loss": 0.9461437463760376, "eval_rewards/accuracies": 0.5793677568435669, "eval_rewards/chosen": -0.14903247356414795, "eval_rewards/margins": 0.05901965871453285, "eval_rewards/rejected": -0.2080521434545517, "eval_runtime": 443.584, "eval_samples_per_second": 26.523, "eval_steps_per_second": 3.316, "step": 3300 }, { "epoch": 0.97, "learning_rate": 4.2873094497968364e-07, "logits/chosen": -2.7464559078216553, "logits/rejected": -2.7323105335235596, "logps/chosen": -203.46339416503906, "logps/rejected": -184.08560180664062, "loss": 0.9466, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.14876006543636322, "rewards/margins": 0.04546096548438072, "rewards/rejected": -0.19422104954719543, "step": 3310 }, { "epoch": 0.97, "learning_rate": 4.281365590028067e-07, "logits/chosen": -2.7063145637512207, "logits/rejected": -2.7181246280670166, "logps/chosen": -197.8404998779297, "logps/rejected": -197.62437438964844, "loss": 0.9218, "rewards/accuracies": 0.625, "rewards/chosen": -0.12893405556678772, "rewards/margins": 0.09074068069458008, "rewards/rejected": -0.2196747362613678, "step": 3320 }, { "epoch": 0.97, "learning_rate": 4.2754012044853734e-07, "logits/chosen": -2.7540042400360107, "logits/rejected": -2.7615749835968018, "logps/chosen": -190.9739227294922, "logps/rejected": -186.8544158935547, "loss": 0.9485, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.1605357974767685, "rewards/margins": 0.049000561237335205, "rewards/rejected": -0.2095363587141037, "step": 3330 }, { "epoch": 0.97, "learning_rate": 4.269416361893352e-07, "logits/chosen": -2.721992254257202, "logits/rejected": -2.7304000854492188, "logps/chosen": -206.56936645507812, "logps/rejected": -191.93504333496094, "loss": 0.9345, "rewards/accuracies": 0.546875, "rewards/chosen": -0.15713246166706085, "rewards/margins": 0.05960817262530327, "rewards/rejected": -0.2167406529188156, "step": 3340 }, { "epoch": 0.98, "learning_rate": 4.2634111312123144e-07, "logits/chosen": -2.729740858078003, "logits/rejected": -2.6976797580718994, "logps/chosen": -203.6077423095703, "logps/rejected": -181.80348205566406, "loss": 0.941, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.1732880175113678, "rewards/margins": 0.04767068475484848, "rewards/rejected": -0.22095870971679688, "step": 3350 }, { "epoch": 0.98, "learning_rate": 4.257385581637493e-07, "logits/chosen": -2.7473340034484863, "logits/rejected": -2.7488176822662354, "logps/chosen": -197.68312072753906, "logps/rejected": -198.27090454101562, "loss": 0.9564, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.15684029459953308, "rewards/margins": 0.04104867950081825, "rewards/rejected": -0.19788897037506104, "step": 3360 }, { "epoch": 0.98, "learning_rate": 4.2513397825982463e-07, "logits/chosen": -2.755300760269165, "logits/rejected": -2.751363515853882, "logps/chosen": -199.49459838867188, "logps/rejected": -185.002685546875, "loss": 0.9472, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.15579402446746826, "rewards/margins": 0.06800516694784164, "rewards/rejected": -0.2237991988658905, "step": 3370 }, { "epoch": 0.99, "learning_rate": 4.245273803757254e-07, "logits/chosen": -2.779808521270752, "logits/rejected": -2.763307571411133, "logps/chosen": -220.1051788330078, "logps/rejected": -205.69338989257812, "loss": 0.9421, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.14048513770103455, "rewards/margins": 0.047556690871715546, "rewards/rejected": -0.18804185092449188, "step": 3380 }, { "epoch": 0.99, "learning_rate": 4.239187715009722e-07, "logits/chosen": -2.752995014190674, "logits/rejected": -2.762915849685669, "logps/chosen": -217.9800567626953, "logps/rejected": -202.8637237548828, "loss": 0.9429, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.17609527707099915, "rewards/margins": 0.05705328658223152, "rewards/rejected": -0.23314857482910156, "step": 3390 }, { "epoch": 0.99, "learning_rate": 4.2330815864825696e-07, "logits/chosen": -2.7742648124694824, "logits/rejected": -2.774212598800659, "logps/chosen": -226.41854858398438, "logps/rejected": -207.75296020507812, "loss": 0.9374, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.15242572128772736, "rewards/margins": 0.06987457722425461, "rewards/rejected": -0.22230032086372375, "step": 3400 }, { "epoch": 0.99, "eval_logits/chosen": -2.6551883220672607, "eval_logits/rejected": -2.649892807006836, "eval_logps/chosen": -197.13763427734375, "eval_logps/rejected": -183.06625366210938, "eval_loss": 0.9452260136604309, "eval_rewards/accuracies": 0.5769884586334229, "eval_rewards/chosen": -0.15323300659656525, "eval_rewards/margins": 0.061263974756002426, "eval_rewards/rejected": -0.21449698507785797, "eval_runtime": 443.4932, "eval_samples_per_second": 26.528, "eval_steps_per_second": 3.317, "step": 3400 }, { "epoch": 0.99, "learning_rate": 4.2269554885336234e-07, "logits/chosen": -2.780066728591919, "logits/rejected": -2.7521700859069824, "logps/chosen": -226.03341674804688, "logps/rejected": -197.58273315429688, "loss": 0.9342, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.17001919448375702, "rewards/margins": 0.0716620534658432, "rewards/rejected": -0.24168124794960022, "step": 3410 }, { "epoch": 1.0, "learning_rate": 4.2208094917508095e-07, "logits/chosen": -2.7597529888153076, "logits/rejected": -2.7499496936798096, "logps/chosen": -178.80166625976562, "logps/rejected": -175.30052185058594, "loss": 0.9548, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.18084251880645752, "rewards/margins": 0.033990949392318726, "rewards/rejected": -0.21483345329761505, "step": 3420 }, { "epoch": 1.0, "learning_rate": 4.214643666951338e-07, "logits/chosen": -2.7603445053100586, "logits/rejected": -2.770897388458252, "logps/chosen": -203.36367797851562, "logps/rejected": -199.06788635253906, "loss": 0.9237, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.14491590857505798, "rewards/margins": 0.09012307226657867, "rewards/rejected": -0.23503899574279785, "step": 3430 }, { "epoch": 1.0, "learning_rate": 4.2084580851808866e-07, "logits/chosen": -2.758740186691284, "logits/rejected": -2.7576069831848145, "logps/chosen": -188.8221893310547, "logps/rejected": -176.63290405273438, "loss": 0.9499, "rewards/accuracies": 0.578125, "rewards/chosen": -0.15238967537879944, "rewards/margins": 0.05928494781255722, "rewards/rejected": -0.21167464554309845, "step": 3440 }, { "epoch": 1.01, "learning_rate": 4.2022528177127827e-07, "logits/chosen": -2.7145676612854004, "logits/rejected": -2.71001935005188, "logps/chosen": -187.4506072998047, "logps/rejected": -177.14111328125, "loss": 0.9288, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.16252227127552032, "rewards/margins": 0.0730685144662857, "rewards/rejected": -0.23559077084064484, "step": 3450 }, { "epoch": 1.01, "learning_rate": 4.196027936047182e-07, "logits/chosen": -2.7500064373016357, "logits/rejected": -2.721895217895508, "logps/chosen": -209.0021209716797, "logps/rejected": -185.7652130126953, "loss": 0.9354, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.16328108310699463, "rewards/margins": 0.08029235899448395, "rewards/rejected": -0.24357345700263977, "step": 3460 }, { "epoch": 1.01, "learning_rate": 4.189783511910244e-07, "logits/chosen": -2.7528345584869385, "logits/rejected": -2.764251232147217, "logps/chosen": -181.8857879638672, "logps/rejected": -176.1710205078125, "loss": 0.9325, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.14209429919719696, "rewards/margins": 0.08681829273700714, "rewards/rejected": -0.2289125621318817, "step": 3470 }, { "epoch": 1.02, "learning_rate": 4.1835196172533083e-07, "logits/chosen": -2.7215676307678223, "logits/rejected": -2.691948175430298, "logps/chosen": -220.85800170898438, "logps/rejected": -192.20065307617188, "loss": 0.9396, "rewards/accuracies": 0.5625, "rewards/chosen": -0.16459617018699646, "rewards/margins": 0.06939554214477539, "rewards/rejected": -0.23399169743061066, "step": 3480 }, { "epoch": 1.02, "learning_rate": 4.1772363242520615e-07, "logits/chosen": -2.709977626800537, "logits/rejected": -2.7163503170013428, "logps/chosen": -192.06781005859375, "logps/rejected": -177.94094848632812, "loss": 0.9346, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.16086141765117645, "rewards/margins": 0.07237172871828079, "rewards/rejected": -0.23323316872119904, "step": 3490 }, { "epoch": 1.02, "learning_rate": 4.1709337053057083e-07, "logits/chosen": -2.753588914871216, "logits/rejected": -2.756035566329956, "logps/chosen": -190.80001831054688, "logps/rejected": -191.4000701904297, "loss": 0.9299, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.14268064498901367, "rewards/margins": 0.08039744943380356, "rewards/rejected": -0.22307810187339783, "step": 3500 }, { "epoch": 1.02, "eval_logits/chosen": -2.666295051574707, "eval_logits/rejected": -2.661217212677002, "eval_logps/chosen": -197.17547607421875, "eval_logps/rejected": -183.11599731445312, "eval_loss": 0.9439437389373779, "eval_rewards/accuracies": 0.5769884586334229, "eval_rewards/chosen": -0.15701434016227722, "eval_rewards/margins": 0.06245831400156021, "eval_rewards/rejected": -0.21947267651557922, "eval_runtime": 443.578, "eval_samples_per_second": 26.523, "eval_steps_per_second": 3.316, "step": 3500 }, { "epoch": 1.02, "learning_rate": 4.164611833036136e-07, "logits/chosen": -2.714778423309326, "logits/rejected": -2.7263901233673096, "logps/chosen": -201.15415954589844, "logps/rejected": -174.53749084472656, "loss": 0.9326, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.1574777513742447, "rewards/margins": 0.06711246818304062, "rewards/rejected": -0.2245902270078659, "step": 3510 }, { "epoch": 1.03, "learning_rate": 4.1582707802870777e-07, "logits/chosen": -2.730517864227295, "logits/rejected": -2.725821018218994, "logps/chosen": -210.6570281982422, "logps/rejected": -185.85055541992188, "loss": 0.9278, "rewards/accuracies": 0.5625, "rewards/chosen": -0.15748251974582672, "rewards/margins": 0.057965390384197235, "rewards/rejected": -0.21544790267944336, "step": 3520 }, { "epoch": 1.03, "learning_rate": 4.151910620123276e-07, "logits/chosen": -2.756896734237671, "logits/rejected": -2.740691661834717, "logps/chosen": -202.3911590576172, "logps/rejected": -182.94061279296875, "loss": 0.9473, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.185185968875885, "rewards/margins": 0.03288014605641365, "rewards/rejected": -0.21806609630584717, "step": 3530 }, { "epoch": 1.03, "learning_rate": 4.145531425829636e-07, "logits/chosen": -2.747807264328003, "logits/rejected": -2.7508645057678223, "logps/chosen": -191.0756072998047, "logps/rejected": -178.44808959960938, "loss": 0.9394, "rewards/accuracies": 0.578125, "rewards/chosen": -0.1534716784954071, "rewards/margins": 0.0547831654548645, "rewards/rejected": -0.2082548439502716, "step": 3540 }, { "epoch": 1.04, "learning_rate": 4.139133270910384e-07, "logits/chosen": -2.7193522453308105, "logits/rejected": -2.721191644668579, "logps/chosen": -201.148681640625, "logps/rejected": -175.58279418945312, "loss": 0.9385, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14847466349601746, "rewards/margins": 0.06052703410387039, "rewards/rejected": -0.20900170505046844, "step": 3550 }, { "epoch": 1.04, "learning_rate": 4.13271622908822e-07, "logits/chosen": -2.7018117904663086, "logits/rejected": -2.705580711364746, "logps/chosen": -173.38417053222656, "logps/rejected": -177.74185180664062, "loss": 0.9373, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.1533229649066925, "rewards/margins": 0.07361769676208496, "rewards/rejected": -0.22694067656993866, "step": 3560 }, { "epoch": 1.04, "learning_rate": 4.126280374303469e-07, "logits/chosen": -2.767444372177124, "logits/rejected": -2.7558300495147705, "logps/chosen": -197.06289672851562, "logps/rejected": -181.7204132080078, "loss": 0.941, "rewards/accuracies": 0.546875, "rewards/chosen": -0.15346328914165497, "rewards/margins": 0.06391434371471405, "rewards/rejected": -0.21737761795520782, "step": 3570 }, { "epoch": 1.04, "learning_rate": 4.1198257807132276e-07, "logits/chosen": -2.734412670135498, "logits/rejected": -2.743535041809082, "logps/chosen": -186.5696563720703, "logps/rejected": -192.47348022460938, "loss": 0.9151, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": -0.1432243436574936, "rewards/margins": 0.08326814323663712, "rewards/rejected": -0.2264924943447113, "step": 3580 }, { "epoch": 1.05, "learning_rate": 4.11335252269051e-07, "logits/chosen": -2.715536594390869, "logits/rejected": -2.7261719703674316, "logps/chosen": -189.0170135498047, "logps/rejected": -176.01217651367188, "loss": 0.9335, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.1607503592967987, "rewards/margins": 0.07931359112262726, "rewards/rejected": -0.24006398022174835, "step": 3590 }, { "epoch": 1.05, "learning_rate": 4.1068606748233916e-07, "logits/chosen": -2.704840898513794, "logits/rejected": -2.71500301361084, "logps/chosen": -195.28549194335938, "logps/rejected": -186.14892578125, "loss": 0.936, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16414105892181396, "rewards/margins": 0.054746825248003006, "rewards/rejected": -0.21888788044452667, "step": 3600 }, { "epoch": 1.05, "eval_logits/chosen": -2.658426284790039, "eval_logits/rejected": -2.6531946659088135, "eval_logps/chosen": -197.23300170898438, "eval_logps/rejected": -183.18643188476562, "eval_loss": 0.9437562823295593, "eval_rewards/accuracies": 0.5788578987121582, "eval_rewards/chosen": -0.16277101635932922, "eval_rewards/margins": 0.06374562531709671, "eval_rewards/rejected": -0.22651664912700653, "eval_runtime": 443.5371, "eval_samples_per_second": 26.525, "eval_steps_per_second": 3.317, "step": 3600 }, { "epoch": 1.05, "learning_rate": 4.100350311914149e-07, "logits/chosen": -2.742522954940796, "logits/rejected": -2.7588882446289062, "logps/chosen": -218.0732421875, "logps/rejected": -205.04379272460938, "loss": 0.9188, "rewards/accuracies": 0.640625, "rewards/chosen": -0.14045368134975433, "rewards/margins": 0.11588656902313232, "rewards/rejected": -0.25634023547172546, "step": 3610 }, { "epoch": 1.06, "learning_rate": 4.093821508978399e-07, "logits/chosen": -2.7190682888031006, "logits/rejected": -2.7327933311462402, "logps/chosen": -193.43785095214844, "logps/rejected": -187.7996826171875, "loss": 0.9622, "rewards/accuracies": 0.546875, "rewards/chosen": -0.17929328978061676, "rewards/margins": 0.03454384207725525, "rewards/rejected": -0.2138371467590332, "step": 3620 }, { "epoch": 1.06, "learning_rate": 4.087274341244232e-07, "logits/chosen": -2.733224868774414, "logits/rejected": -2.7335586547851562, "logps/chosen": -196.0847625732422, "logps/rejected": -187.5789031982422, "loss": 0.9411, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.17219600081443787, "rewards/margins": 0.07483509182929993, "rewards/rejected": -0.24703112244606018, "step": 3630 }, { "epoch": 1.06, "learning_rate": 4.0807088841513473e-07, "logits/chosen": -2.76577091217041, "logits/rejected": -2.7651515007019043, "logps/chosen": -209.15121459960938, "logps/rejected": -203.23410034179688, "loss": 0.9332, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.14608857035636902, "rewards/margins": 0.09846082329750061, "rewards/rejected": -0.24454942345619202, "step": 3640 }, { "epoch": 1.06, "learning_rate": 4.074125213350184e-07, "logits/chosen": -2.7061800956726074, "logits/rejected": -2.7219319343566895, "logps/chosen": -189.35731506347656, "logps/rejected": -191.7716522216797, "loss": 0.9241, "rewards/accuracies": 0.625, "rewards/chosen": -0.14296016097068787, "rewards/margins": 0.09905602782964706, "rewards/rejected": -0.24201619625091553, "step": 3650 }, { "epoch": 1.07, "learning_rate": 4.0675234047010475e-07, "logits/chosen": -2.757671356201172, "logits/rejected": -2.7647621631622314, "logps/chosen": -211.3065643310547, "logps/rejected": -206.5069122314453, "loss": 0.9335, "rewards/accuracies": 0.609375, "rewards/chosen": -0.13889974355697632, "rewards/margins": 0.09706652909517288, "rewards/rejected": -0.2359662801027298, "step": 3660 }, { "epoch": 1.07, "learning_rate": 4.0609035342732374e-07, "logits/chosen": -2.792123556137085, "logits/rejected": -2.7674877643585205, "logps/chosen": -222.4933624267578, "logps/rejected": -188.7619171142578, "loss": 0.9397, "rewards/accuracies": 0.5625, "rewards/chosen": -0.19772221148014069, "rewards/margins": 0.05392972379922867, "rewards/rejected": -0.25165191292762756, "step": 3670 }, { "epoch": 1.07, "learning_rate": 4.0542656783441685e-07, "logits/chosen": -2.74934458732605, "logits/rejected": -2.7401318550109863, "logps/chosen": -199.81399536132812, "logps/rejected": -187.61581420898438, "loss": 0.9394, "rewards/accuracies": 0.578125, "rewards/chosen": -0.18191394209861755, "rewards/margins": 0.0601019486784935, "rewards/rejected": -0.24201588332653046, "step": 3680 }, { "epoch": 1.08, "learning_rate": 4.047609913398496e-07, "logits/chosen": -2.7838268280029297, "logits/rejected": -2.793952703475952, "logps/chosen": -219.28515625, "logps/rejected": -220.8399658203125, "loss": 0.925, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.18908637762069702, "rewards/margins": 0.0818186104297638, "rewards/rejected": -0.2709049582481384, "step": 3690 }, { "epoch": 1.08, "learning_rate": 4.0409363161272283e-07, "logits/chosen": -2.730079174041748, "logits/rejected": -2.7240207195281982, "logps/chosen": -204.08016967773438, "logps/rejected": -179.5956573486328, "loss": 0.9435, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.17038798332214355, "rewards/margins": 0.05574061721563339, "rewards/rejected": -0.22612860798835754, "step": 3700 }, { "epoch": 1.08, "eval_logits/chosen": -2.6722583770751953, "eval_logits/rejected": -2.6673152446746826, "eval_logps/chosen": -197.2606658935547, "eval_logps/rejected": -183.226318359375, "eval_loss": 0.9420310258865356, "eval_rewards/accuracies": 0.5807273983955383, "eval_rewards/chosen": -0.16553470492362976, "eval_rewards/margins": 0.06496822834014893, "eval_rewards/rejected": -0.2305029332637787, "eval_runtime": 443.582, "eval_samples_per_second": 26.523, "eval_steps_per_second": 3.316, "step": 3700 }, { "epoch": 1.08, "learning_rate": 4.03424496342685e-07, "logits/chosen": -2.7483789920806885, "logits/rejected": -2.7456538677215576, "logps/chosen": -175.35150146484375, "logps/rejected": -163.5673065185547, "loss": 0.9311, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.1589987576007843, "rewards/margins": 0.056345902383327484, "rewards/rejected": -0.21534466743469238, "step": 3710 }, { "epoch": 1.09, "learning_rate": 4.02753593239843e-07, "logits/chosen": -2.747954845428467, "logits/rejected": -2.7363548278808594, "logps/chosen": -216.8974609375, "logps/rejected": -190.09544372558594, "loss": 0.9216, "rewards/accuracies": 0.546875, "rewards/chosen": -0.17782175540924072, "rewards/margins": 0.06778020411729813, "rewards/rejected": -0.24560198187828064, "step": 3720 }, { "epoch": 1.09, "learning_rate": 4.0208093003467366e-07, "logits/chosen": -2.734060525894165, "logits/rejected": -2.7579092979431152, "logps/chosen": -182.96588134765625, "logps/rejected": -188.7209930419922, "loss": 0.9241, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.17049555480480194, "rewards/margins": 0.07502481341362, "rewards/rejected": -0.24552038311958313, "step": 3730 }, { "epoch": 1.09, "learning_rate": 4.014065144779345e-07, "logits/chosen": -2.7681238651275635, "logits/rejected": -2.757328748703003, "logps/chosen": -201.3914794921875, "logps/rejected": -186.22134399414062, "loss": 0.9316, "rewards/accuracies": 0.578125, "rewards/chosen": -0.15492196381092072, "rewards/margins": 0.07273847609758377, "rewards/rejected": -0.2276604175567627, "step": 3740 }, { "epoch": 1.09, "learning_rate": 4.0073035434057477e-07, "logits/chosen": -2.7487292289733887, "logits/rejected": -2.7397453784942627, "logps/chosen": -182.2095184326172, "logps/rejected": -174.14088439941406, "loss": 0.9317, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.14754052460193634, "rewards/margins": 0.0717361718416214, "rewards/rejected": -0.21927671134471893, "step": 3750 }, { "epoch": 1.1, "learning_rate": 4.0005245741364537e-07, "logits/chosen": -2.7562036514282227, "logits/rejected": -2.7581675052642822, "logps/chosen": -207.8866729736328, "logps/rejected": -186.6951141357422, "loss": 0.9442, "rewards/accuracies": 0.546875, "rewards/chosen": -0.16531476378440857, "rewards/margins": 0.05598212406039238, "rewards/rejected": -0.22129687666893005, "step": 3760 }, { "epoch": 1.1, "learning_rate": 3.9937283150820935e-07, "logits/chosen": -2.742161273956299, "logits/rejected": -2.717721939086914, "logps/chosen": -191.58201599121094, "logps/rejected": -175.09475708007812, "loss": 0.9318, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.16936281323432922, "rewards/margins": 0.06095327064394951, "rewards/rejected": -0.23031607270240784, "step": 3770 }, { "epoch": 1.1, "learning_rate": 3.9869148445525195e-07, "logits/chosen": -2.7626867294311523, "logits/rejected": -2.7525551319122314, "logps/chosen": -212.2379913330078, "logps/rejected": -193.54745483398438, "loss": 0.9094, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.17974770069122314, "rewards/margins": 0.08587310463190079, "rewards/rejected": -0.26562076807022095, "step": 3780 }, { "epoch": 1.11, "learning_rate": 3.980084241055905e-07, "logits/chosen": -2.7718071937561035, "logits/rejected": -2.7361700534820557, "logps/chosen": -220.29745483398438, "logps/rejected": -180.39019775390625, "loss": 0.9433, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.18197306990623474, "rewards/margins": 0.05966731905937195, "rewards/rejected": -0.2416403740644455, "step": 3790 }, { "epoch": 1.11, "learning_rate": 3.973236583297835e-07, "logits/chosen": -2.736232042312622, "logits/rejected": -2.781214475631714, "logps/chosen": -199.220947265625, "logps/rejected": -213.5320281982422, "loss": 0.9341, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16228926181793213, "rewards/margins": 0.1069878339767456, "rewards/rejected": -0.26927709579467773, "step": 3800 }, { "epoch": 1.11, "eval_logits/chosen": -2.663583993911743, "eval_logits/rejected": -2.6584584712982178, "eval_logps/chosen": -197.3029327392578, "eval_logps/rejected": -183.2721405029297, "eval_loss": 0.9421671032905579, "eval_rewards/accuracies": 0.581237256526947, "eval_rewards/chosen": -0.1697637140750885, "eval_rewards/margins": 0.0653214231133461, "eval_rewards/rejected": -0.2350851446390152, "eval_runtime": 443.4935, "eval_samples_per_second": 26.528, "eval_steps_per_second": 3.317, "step": 3800 }, { "epoch": 1.11, "learning_rate": 3.966371950180404e-07, "logits/chosen": -2.72824764251709, "logits/rejected": -2.7456448078155518, "logps/chosen": -190.12353515625, "logps/rejected": -185.89035034179688, "loss": 0.9381, "rewards/accuracies": 0.578125, "rewards/chosen": -0.16197335720062256, "rewards/margins": 0.071574866771698, "rewards/rejected": -0.23354823887348175, "step": 3810 }, { "epoch": 1.11, "learning_rate": 3.9594904208013034e-07, "logits/chosen": -2.7469112873077393, "logits/rejected": -2.7380030155181885, "logps/chosen": -205.6768035888672, "logps/rejected": -195.28128051757812, "loss": 0.9039, "rewards/accuracies": 0.6343749761581421, "rewards/chosen": -0.15377888083457947, "rewards/margins": 0.11534550040960312, "rewards/rejected": -0.2691243886947632, "step": 3820 }, { "epoch": 1.12, "learning_rate": 3.952592074452914e-07, "logits/chosen": -2.765177011489868, "logits/rejected": -2.7546281814575195, "logps/chosen": -202.8085479736328, "logps/rejected": -195.9279022216797, "loss": 0.9343, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15616342425346375, "rewards/margins": 0.08027738332748413, "rewards/rejected": -0.23644080758094788, "step": 3830 }, { "epoch": 1.12, "learning_rate": 3.9456769906213885e-07, "logits/chosen": -2.7568039894104004, "logits/rejected": -2.7418313026428223, "logps/chosen": -215.3795166015625, "logps/rejected": -184.53538513183594, "loss": 0.9448, "rewards/accuracies": 0.546875, "rewards/chosen": -0.16710713505744934, "rewards/margins": 0.04904966801404953, "rewards/rejected": -0.21615679562091827, "step": 3840 }, { "epoch": 1.12, "learning_rate": 3.9387452489857365e-07, "logits/chosen": -2.716653347015381, "logits/rejected": -2.7444241046905518, "logps/chosen": -187.95553588867188, "logps/rejected": -183.2508087158203, "loss": 0.9209, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.15672233700752258, "rewards/margins": 0.10151603072881699, "rewards/rejected": -0.25823837518692017, "step": 3850 }, { "epoch": 1.13, "learning_rate": 3.9317969294169086e-07, "logits/chosen": -2.717005491256714, "logits/rejected": -2.7423160076141357, "logps/chosen": -189.92715454101562, "logps/rejected": -203.66329956054688, "loss": 0.9349, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16257938742637634, "rewards/margins": 0.07710906118154526, "rewards/rejected": -0.239688441157341, "step": 3860 }, { "epoch": 1.13, "learning_rate": 3.9248321119768725e-07, "logits/chosen": -2.756875991821289, "logits/rejected": -2.7580325603485107, "logps/chosen": -193.33822631835938, "logps/rejected": -197.3529052734375, "loss": 0.9374, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.17182958126068115, "rewards/margins": 0.07164481282234192, "rewards/rejected": -0.24347439408302307, "step": 3870 }, { "epoch": 1.13, "learning_rate": 3.9178508769176954e-07, "logits/chosen": -2.746933698654175, "logits/rejected": -2.765470266342163, "logps/chosen": -183.94923400878906, "logps/rejected": -172.761962890625, "loss": 0.9257, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.16591930389404297, "rewards/margins": 0.08755029737949371, "rewards/rejected": -0.2534696161746979, "step": 3880 }, { "epoch": 1.13, "learning_rate": 3.9108533046806134e-07, "logits/chosen": -2.714050769805908, "logits/rejected": -2.7366297245025635, "logps/chosen": -199.7872314453125, "logps/rejected": -189.11221313476562, "loss": 0.9329, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.18223315477371216, "rewards/margins": 0.05786599591374397, "rewards/rejected": -0.24009914696216583, "step": 3890 }, { "epoch": 1.14, "learning_rate": 3.903839475895111e-07, "logits/chosen": -2.7495663166046143, "logits/rejected": -2.7438509464263916, "logps/chosen": -196.3547821044922, "logps/rejected": -184.18392944335938, "loss": 0.9296, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.16668249666690826, "rewards/margins": 0.06621585786342621, "rewards/rejected": -0.23289835453033447, "step": 3900 }, { "epoch": 1.14, "eval_logits/chosen": -2.643688678741455, "eval_logits/rejected": -2.638190984725952, "eval_logps/chosen": -197.34109497070312, "eval_logps/rejected": -183.32252502441406, "eval_loss": 0.9405214786529541, "eval_rewards/accuracies": 0.5713800191879272, "eval_rewards/chosen": -0.17357788980007172, "eval_rewards/margins": 0.06654638051986694, "eval_rewards/rejected": -0.24012430012226105, "eval_runtime": 443.4943, "eval_samples_per_second": 26.528, "eval_steps_per_second": 3.317, "step": 3900 }, { "epoch": 1.14, "learning_rate": 3.8968094713779847e-07, "logits/chosen": -2.7617404460906982, "logits/rejected": -2.75152325630188, "logps/chosen": -200.19493103027344, "logps/rejected": -186.37130737304688, "loss": 0.9433, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2069055140018463, "rewards/margins": 0.04876132681965828, "rewards/rejected": -0.2556668221950531, "step": 3910 }, { "epoch": 1.14, "learning_rate": 3.8897633721324185e-07, "logits/chosen": -2.7647957801818848, "logits/rejected": -2.7173893451690674, "logps/chosen": -201.73190307617188, "logps/rejected": -167.64065551757812, "loss": 0.9488, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18705810606479645, "rewards/margins": 0.06296879798173904, "rewards/rejected": -0.2500268816947937, "step": 3920 }, { "epoch": 1.15, "learning_rate": 3.882701259347047e-07, "logits/chosen": -2.7746589183807373, "logits/rejected": -2.7673373222351074, "logps/chosen": -216.4488983154297, "logps/rejected": -196.27919006347656, "loss": 0.938, "rewards/accuracies": 0.59375, "rewards/chosen": -0.18840670585632324, "rewards/margins": 0.08028068393468857, "rewards/rejected": -0.2686874270439148, "step": 3930 }, { "epoch": 1.15, "learning_rate": 3.8756232143950217e-07, "logits/chosen": -2.764437198638916, "logits/rejected": -2.770145893096924, "logps/chosen": -193.11636352539062, "logps/rejected": -191.73863220214844, "loss": 0.923, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.18075844645500183, "rewards/margins": 0.0936131626367569, "rewards/rejected": -0.2743716239929199, "step": 3940 }, { "epoch": 1.15, "learning_rate": 3.86852931883307e-07, "logits/chosen": -2.751063823699951, "logits/rejected": -2.739689588546753, "logps/chosen": -176.7470245361328, "logps/rejected": -161.86376953125, "loss": 0.9265, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.18067319691181183, "rewards/margins": 0.06665468215942383, "rewards/rejected": -0.24732787907123566, "step": 3950 }, { "epoch": 1.16, "learning_rate": 3.8614196544005614e-07, "logits/chosen": -2.758460760116577, "logits/rejected": -2.7591326236724854, "logps/chosen": -198.73178100585938, "logps/rejected": -187.89523315429688, "loss": 0.9229, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.18694129586219788, "rewards/margins": 0.06280256062746048, "rewards/rejected": -0.24974386394023895, "step": 3960 }, { "epoch": 1.16, "learning_rate": 3.854294303018558e-07, "logits/chosen": -2.755293369293213, "logits/rejected": -2.7594475746154785, "logps/chosen": -192.7220916748047, "logps/rejected": -184.11373901367188, "loss": 0.9507, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.18343867361545563, "rewards/margins": 0.06071305274963379, "rewards/rejected": -0.2441517412662506, "step": 3970 }, { "epoch": 1.16, "learning_rate": 3.8471533467888773e-07, "logits/chosen": -2.7248222827911377, "logits/rejected": -2.7303566932678223, "logps/chosen": -194.8540802001953, "logps/rejected": -191.58544921875, "loss": 0.9664, "rewards/accuracies": 0.5625, "rewards/chosen": -0.19519048929214478, "rewards/margins": 0.044502776116132736, "rewards/rejected": -0.23969325423240662, "step": 3980 }, { "epoch": 1.16, "learning_rate": 3.8399968679931436e-07, "logits/chosen": -2.778623342514038, "logits/rejected": -2.7736964225769043, "logps/chosen": -213.4280242919922, "logps/rejected": -194.47512817382812, "loss": 0.9344, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.17752891778945923, "rewards/margins": 0.04777335748076439, "rewards/rejected": -0.2253022938966751, "step": 3990 }, { "epoch": 1.17, "learning_rate": 3.832824949091839e-07, "logits/chosen": -2.7643864154815674, "logits/rejected": -2.729543685913086, "logps/chosen": -191.1431884765625, "logps/rejected": -157.68287658691406, "loss": 0.9338, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.18981656432151794, "rewards/margins": 0.03325197845697403, "rewards/rejected": -0.22306856513023376, "step": 4000 }, { "epoch": 1.17, "eval_logits/chosen": -2.6482629776000977, "eval_logits/rejected": -2.642835855484009, "eval_logps/chosen": -197.35194396972656, "eval_logps/rejected": -183.3475799560547, "eval_loss": 0.9401752948760986, "eval_rewards/accuracies": 0.5771583914756775, "eval_rewards/chosen": -0.17466464638710022, "eval_rewards/margins": 0.06796282529830933, "eval_rewards/rejected": -0.24262748658657074, "eval_runtime": 443.5283, "eval_samples_per_second": 26.526, "eval_steps_per_second": 3.317, "step": 4000 }, { "epoch": 1.17, "learning_rate": 3.825637672723354e-07, "logits/chosen": -2.7617149353027344, "logits/rejected": -2.7304465770721436, "logps/chosen": -219.8583984375, "logps/rejected": -195.1961212158203, "loss": 0.9498, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1955319046974182, "rewards/margins": 0.05322521924972534, "rewards/rejected": -0.24875712394714355, "step": 4010 }, { "epoch": 1.17, "learning_rate": 3.818435121703036e-07, "logits/chosen": -2.721818208694458, "logits/rejected": -2.752952814102173, "logps/chosen": -190.40536499023438, "logps/rejected": -196.10472106933594, "loss": 0.9467, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1841999590396881, "rewards/margins": 0.058970857411623, "rewards/rejected": -0.24317078292369843, "step": 4020 }, { "epoch": 1.18, "learning_rate": 3.8112173790222356e-07, "logits/chosen": -2.7530345916748047, "logits/rejected": -2.710888385772705, "logps/chosen": -198.40542602539062, "logps/rejected": -170.2858123779297, "loss": 0.9357, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.19204849004745483, "rewards/margins": 0.051998645067214966, "rewards/rejected": -0.2440471202135086, "step": 4030 }, { "epoch": 1.18, "learning_rate": 3.8039845278473467e-07, "logits/chosen": -2.7375307083129883, "logits/rejected": -2.7373642921447754, "logps/chosen": -193.3035888671875, "logps/rejected": -186.16928100585938, "loss": 0.9375, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.17028315365314484, "rewards/margins": 0.05886261910200119, "rewards/rejected": -0.22914576530456543, "step": 4040 }, { "epoch": 1.18, "learning_rate": 3.7967366515188515e-07, "logits/chosen": -2.7566046714782715, "logits/rejected": -2.7663302421569824, "logps/chosen": -203.11294555664062, "logps/rejected": -197.13296508789062, "loss": 0.9227, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.18032750487327576, "rewards/margins": 0.082804836332798, "rewards/rejected": -0.26313233375549316, "step": 4050 }, { "epoch": 1.18, "learning_rate": 3.7894738335503605e-07, "logits/chosen": -2.718829870223999, "logits/rejected": -2.7271435260772705, "logps/chosen": -169.3847198486328, "logps/rejected": -169.4076385498047, "loss": 0.9259, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.19295862317085266, "rewards/margins": 0.03824089840054512, "rewards/rejected": -0.23119953274726868, "step": 4060 }, { "epoch": 1.19, "learning_rate": 3.782196157627649e-07, "logits/chosen": -2.7349534034729004, "logits/rejected": -2.7287216186523438, "logps/chosen": -188.2509765625, "logps/rejected": -180.3507080078125, "loss": 0.9225, "rewards/accuracies": 0.578125, "rewards/chosen": -0.1794901192188263, "rewards/margins": 0.09128087759017944, "rewards/rejected": -0.27077096700668335, "step": 4070 }, { "epoch": 1.19, "learning_rate": 3.7749037076076915e-07, "logits/chosen": -2.734947681427002, "logits/rejected": -2.7134604454040527, "logps/chosen": -188.677490234375, "logps/rejected": -162.92825317382812, "loss": 0.9366, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17669777572155, "rewards/margins": 0.058089107275009155, "rewards/rejected": -0.23478689789772034, "step": 4080 }, { "epoch": 1.19, "learning_rate": 3.767596567517698e-07, "logits/chosen": -2.747828960418701, "logits/rejected": -2.728135585784912, "logps/chosen": -191.05796813964844, "logps/rejected": -178.67286682128906, "loss": 0.9393, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.1807515174150467, "rewards/margins": 0.07468868046998978, "rewards/rejected": -0.2554401755332947, "step": 4090 }, { "epoch": 1.2, "learning_rate": 3.760274821554146e-07, "logits/chosen": -2.733659029006958, "logits/rejected": -2.734858274459839, "logps/chosen": -205.01156616210938, "logps/rejected": -184.3057403564453, "loss": 0.9257, "rewards/accuracies": 0.609375, "rewards/chosen": -0.1808921992778778, "rewards/margins": 0.07933205366134644, "rewards/rejected": -0.26022425293922424, "step": 4100 }, { "epoch": 1.2, "eval_logits/chosen": -2.6465163230895996, "eval_logits/rejected": -2.6410677433013916, "eval_logps/chosen": -197.38494873046875, "eval_logps/rejected": -183.38291931152344, "eval_loss": 0.9395056962966919, "eval_rewards/accuracies": 0.5766485333442688, "eval_rewards/chosen": -0.17796368896961212, "eval_rewards/margins": 0.06819843500852585, "eval_rewards/rejected": -0.24616213142871857, "eval_runtime": 443.4769, "eval_samples_per_second": 26.529, "eval_steps_per_second": 3.317, "step": 4100 }, { "epoch": 1.2, "learning_rate": 3.752938554081806e-07, "logits/chosen": -2.741283893585205, "logits/rejected": -2.728320360183716, "logps/chosen": -176.6365203857422, "logps/rejected": -167.493896484375, "loss": 0.9401, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1969231367111206, "rewards/margins": 0.06731526553630829, "rewards/rejected": -0.2642384171485901, "step": 4110 }, { "epoch": 1.2, "learning_rate": 3.7455878496327765e-07, "logits/chosen": -2.7120602130889893, "logits/rejected": -2.741365671157837, "logps/chosen": -197.4280242919922, "logps/rejected": -198.5906219482422, "loss": 0.931, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.1776856780052185, "rewards/margins": 0.07181225717067719, "rewards/rejected": -0.2494979351758957, "step": 4120 }, { "epoch": 1.2, "learning_rate": 3.738222792905501e-07, "logits/chosen": -2.72918963432312, "logits/rejected": -2.709934949874878, "logps/chosen": -193.74960327148438, "logps/rejected": -181.6873779296875, "loss": 0.9355, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16433259844779968, "rewards/margins": 0.09380577504634857, "rewards/rejected": -0.25813838839530945, "step": 4130 }, { "epoch": 1.21, "learning_rate": 3.7308434687638025e-07, "logits/chosen": -2.7442197799682617, "logits/rejected": -2.738088369369507, "logps/chosen": -188.8663330078125, "logps/rejected": -181.5855712890625, "loss": 0.905, "rewards/accuracies": 0.59375, "rewards/chosen": -0.16837748885154724, "rewards/margins": 0.08360324800014496, "rewards/rejected": -0.251980721950531, "step": 4140 }, { "epoch": 1.21, "learning_rate": 3.723449962235896e-07, "logits/chosen": -2.7592225074768066, "logits/rejected": -2.73795747756958, "logps/chosen": -205.52249145507812, "logps/rejected": -195.18670654296875, "loss": 0.9521, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.21364426612854004, "rewards/margins": 0.04619845002889633, "rewards/rejected": -0.2598426938056946, "step": 4150 }, { "epoch": 1.21, "learning_rate": 3.7160423585134146e-07, "logits/chosen": -2.752201557159424, "logits/rejected": -2.752375602722168, "logps/chosen": -194.0412139892578, "logps/rejected": -177.90927124023438, "loss": 0.9319, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.17724746465682983, "rewards/margins": 0.04986109584569931, "rewards/rejected": -0.22710859775543213, "step": 4160 }, { "epoch": 1.22, "learning_rate": 3.708620742950426e-07, "logits/chosen": -2.7732951641082764, "logits/rejected": -2.7519004344940186, "logps/chosen": -212.4149627685547, "logps/rejected": -189.8248748779297, "loss": 0.9311, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.17742520570755005, "rewards/margins": 0.06528773903846741, "rewards/rejected": -0.24271297454833984, "step": 4170 }, { "epoch": 1.22, "learning_rate": 3.70118520106245e-07, "logits/chosen": -2.7384650707244873, "logits/rejected": -2.7295033931732178, "logps/chosen": -188.52340698242188, "logps/rejected": -189.1070098876953, "loss": 0.9338, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.16893449425697327, "rewards/margins": 0.0758652538061142, "rewards/rejected": -0.24479976296424866, "step": 4180 }, { "epoch": 1.22, "learning_rate": 3.693735818525471e-07, "logits/chosen": -2.7295570373535156, "logits/rejected": -2.7191290855407715, "logps/chosen": -190.3443145751953, "logps/rejected": -174.7919158935547, "loss": 0.9479, "rewards/accuracies": 0.578125, "rewards/chosen": -0.164515420794487, "rewards/margins": 0.06707581877708435, "rewards/rejected": -0.23159126937389374, "step": 4190 }, { "epoch": 1.23, "learning_rate": 3.686272681174953e-07, "logits/chosen": -2.746267795562744, "logits/rejected": -2.7370259761810303, "logps/chosen": -184.0223388671875, "logps/rejected": -171.82571411132812, "loss": 0.9368, "rewards/accuracies": 0.578125, "rewards/chosen": -0.18105138838291168, "rewards/margins": 0.06937004625797272, "rewards/rejected": -0.2504214644432068, "step": 4200 }, { "epoch": 1.23, "eval_logits/chosen": -2.6547842025756836, "eval_logits/rejected": -2.6495087146759033, "eval_logps/chosen": -197.39144897460938, "eval_logps/rejected": -183.40631103515625, "eval_loss": 0.9386326670646667, "eval_rewards/accuracies": 0.5832766890525818, "eval_rewards/chosen": -0.17861297726631165, "eval_rewards/margins": 0.06988853961229324, "eval_rewards/rejected": -0.2485015094280243, "eval_runtime": 443.4997, "eval_samples_per_second": 26.528, "eval_steps_per_second": 3.317, "step": 4200 }, { "epoch": 1.23, "learning_rate": 3.6787958750048505e-07, "logits/chosen": -2.752012252807617, "logits/rejected": -2.7526049613952637, "logps/chosen": -192.90499877929688, "logps/rejected": -189.8139190673828, "loss": 0.9222, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.1890353262424469, "rewards/margins": 0.0668584331870079, "rewards/rejected": -0.2558937668800354, "step": 4210 }, { "epoch": 1.23, "learning_rate": 3.671305486166615e-07, "logits/chosen": -2.7432100772857666, "logits/rejected": -2.7489893436431885, "logps/chosen": -195.6559295654297, "logps/rejected": -184.36268615722656, "loss": 0.9649, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.22122982144355774, "rewards/margins": 0.031619004905223846, "rewards/rejected": -0.25284886360168457, "step": 4220 }, { "epoch": 1.23, "learning_rate": 3.663801600968206e-07, "logits/chosen": -2.720708131790161, "logits/rejected": -2.7209668159484863, "logps/chosen": -196.4862518310547, "logps/rejected": -171.839111328125, "loss": 0.9449, "rewards/accuracies": 0.546875, "rewards/chosen": -0.1937265694141388, "rewards/margins": 0.04329920932650566, "rewards/rejected": -0.23702581226825714, "step": 4230 }, { "epoch": 1.24, "learning_rate": 3.656284305873093e-07, "logits/chosen": -2.712952136993408, "logits/rejected": -2.742710590362549, "logps/chosen": -195.47030639648438, "logps/rejected": -192.92300415039062, "loss": 0.9188, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1770908236503601, "rewards/margins": 0.08894769847393036, "rewards/rejected": -0.2660385072231293, "step": 4240 }, { "epoch": 1.24, "learning_rate": 3.6487536874992634e-07, "logits/chosen": -2.7471885681152344, "logits/rejected": -2.7262566089630127, "logps/chosen": -197.21139526367188, "logps/rejected": -179.6346893310547, "loss": 0.9263, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1807824820280075, "rewards/margins": 0.06291843205690384, "rewards/rejected": -0.24370090663433075, "step": 4250 }, { "epoch": 1.24, "learning_rate": 3.6412098326182193e-07, "logits/chosen": -2.7384915351867676, "logits/rejected": -2.759437084197998, "logps/chosen": -208.9658203125, "logps/rejected": -195.96910095214844, "loss": 0.9389, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19169051945209503, "rewards/margins": 0.06298931688070297, "rewards/rejected": -0.2546798288822174, "step": 4260 }, { "epoch": 1.25, "learning_rate": 3.633652828153982e-07, "logits/chosen": -2.758999824523926, "logits/rejected": -2.7437479496002197, "logps/chosen": -196.12710571289062, "logps/rejected": -175.85052490234375, "loss": 0.9342, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.20808005332946777, "rewards/margins": 0.05318453162908554, "rewards/rejected": -0.2612645626068115, "step": 4270 }, { "epoch": 1.25, "learning_rate": 3.626082761182089e-07, "logits/chosen": -2.738142490386963, "logits/rejected": -2.7285945415496826, "logps/chosen": -195.19175720214844, "logps/rejected": -186.00698852539062, "loss": 0.9236, "rewards/accuracies": 0.609375, "rewards/chosen": -0.16431716084480286, "rewards/margins": 0.0769704058766365, "rewards/rejected": -0.24128755927085876, "step": 4280 }, { "epoch": 1.25, "learning_rate": 3.6184997189285883e-07, "logits/chosen": -2.7633702754974365, "logits/rejected": -2.77644419670105, "logps/chosen": -221.3899688720703, "logps/rejected": -203.3837432861328, "loss": 0.9114, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.17412501573562622, "rewards/margins": 0.11658620834350586, "rewards/rejected": -0.2907112240791321, "step": 4290 }, { "epoch": 1.25, "learning_rate": 3.610903788769039e-07, "logits/chosen": -2.723555088043213, "logits/rejected": -2.7423713207244873, "logps/chosen": -177.48287963867188, "logps/rejected": -188.9573974609375, "loss": 0.916, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.17435404658317566, "rewards/margins": 0.1128971129655838, "rewards/rejected": -0.28725117444992065, "step": 4300 }, { "epoch": 1.25, "eval_logits/chosen": -2.6445152759552, "eval_logits/rejected": -2.6390397548675537, "eval_logps/chosen": -197.41690063476562, "eval_logps/rejected": -183.43453979492188, "eval_loss": 0.9384915828704834, "eval_rewards/accuracies": 0.5763086080551147, "eval_rewards/chosen": -0.18115966022014618, "eval_rewards/margins": 0.07016538083553314, "eval_rewards/rejected": -0.2513250708580017, "eval_runtime": 443.5422, "eval_samples_per_second": 26.525, "eval_steps_per_second": 3.316, "step": 4300 }, { "epoch": 1.26, "learning_rate": 3.603295058227498e-07, "logits/chosen": -2.7326014041900635, "logits/rejected": -2.713841438293457, "logps/chosen": -198.6339874267578, "logps/rejected": -174.13699340820312, "loss": 0.935, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.19719673693180084, "rewards/margins": 0.0652797743678093, "rewards/rejected": -0.26247650384902954, "step": 4310 }, { "epoch": 1.26, "learning_rate": 3.5956736149755165e-07, "logits/chosen": -2.677794933319092, "logits/rejected": -2.6976592540740967, "logps/chosen": -189.23277282714844, "logps/rejected": -194.13796997070312, "loss": 0.9088, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1718340814113617, "rewards/margins": 0.0862637609243393, "rewards/rejected": -0.2580978274345398, "step": 4320 }, { "epoch": 1.26, "learning_rate": 3.588039546831125e-07, "logits/chosen": -2.7398147583007812, "logits/rejected": -2.780071258544922, "logps/chosen": -204.4764404296875, "logps/rejected": -211.4398956298828, "loss": 0.9095, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.19177083671092987, "rewards/margins": 0.09926985204219818, "rewards/rejected": -0.29104071855545044, "step": 4330 }, { "epoch": 1.27, "learning_rate": 3.580392941757828e-07, "logits/chosen": -2.7215991020202637, "logits/rejected": -2.7327351570129395, "logps/chosen": -199.95437622070312, "logps/rejected": -191.2192840576172, "loss": 0.9253, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.17956005036830902, "rewards/margins": 0.09569841623306274, "rewards/rejected": -0.27525845170021057, "step": 4340 }, { "epoch": 1.27, "learning_rate": 3.5727338878635837e-07, "logits/chosen": -2.71069598197937, "logits/rejected": -2.736706018447876, "logps/chosen": -192.46652221679688, "logps/rejected": -198.8728790283203, "loss": 0.925, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.17255479097366333, "rewards/margins": 0.05931825190782547, "rewards/rejected": -0.2318730354309082, "step": 4350 }, { "epoch": 1.27, "learning_rate": 3.5650624733997944e-07, "logits/chosen": -2.746736526489258, "logits/rejected": -2.7394838333129883, "logps/chosen": -189.6582794189453, "logps/rejected": -163.8761749267578, "loss": 0.9395, "rewards/accuracies": 0.59375, "rewards/chosen": -0.16428257524967194, "rewards/margins": 0.0878874734044075, "rewards/rejected": -0.25217002630233765, "step": 4360 }, { "epoch": 1.27, "learning_rate": 3.5573787867602834e-07, "logits/chosen": -2.731800079345703, "logits/rejected": -2.7399964332580566, "logps/chosen": -184.02911376953125, "logps/rejected": -175.05958557128906, "loss": 0.9325, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.21719913184642792, "rewards/margins": 0.06476739048957825, "rewards/rejected": -0.28196650743484497, "step": 4370 }, { "epoch": 1.28, "learning_rate": 3.5496829164802844e-07, "logits/chosen": -2.7151689529418945, "logits/rejected": -2.7390894889831543, "logps/chosen": -179.9576873779297, "logps/rejected": -177.39881896972656, "loss": 0.9121, "rewards/accuracies": 0.609375, "rewards/chosen": -0.18142959475517273, "rewards/margins": 0.09553618729114532, "rewards/rejected": -0.27696579694747925, "step": 4380 }, { "epoch": 1.28, "learning_rate": 3.5419749512354134e-07, "logits/chosen": -2.712517738342285, "logits/rejected": -2.7266857624053955, "logps/chosen": -174.7775421142578, "logps/rejected": -166.7398223876953, "loss": 0.9256, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.18136021494865417, "rewards/margins": 0.06892851740121841, "rewards/rejected": -0.2502886950969696, "step": 4390 }, { "epoch": 1.28, "learning_rate": 3.534254979840653e-07, "logits/chosen": -2.7191343307495117, "logits/rejected": -2.7256722450256348, "logps/chosen": -185.9070587158203, "logps/rejected": -176.5060272216797, "loss": 0.9093, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.18107880651950836, "rewards/margins": 0.07841619849205017, "rewards/rejected": -0.25949499011039734, "step": 4400 }, { "epoch": 1.28, "eval_logits/chosen": -2.6502106189727783, "eval_logits/rejected": -2.6448378562927246, "eval_logps/chosen": -197.46881103515625, "eval_logps/rejected": -183.49722290039062, "eval_loss": 0.9375219941139221, "eval_rewards/accuracies": 0.5831067562103271, "eval_rewards/chosen": -0.1863512098789215, "eval_rewards/margins": 0.07124443352222443, "eval_rewards/rejected": -0.25759562849998474, "eval_runtime": 443.5197, "eval_samples_per_second": 26.526, "eval_steps_per_second": 3.317, "step": 4400 }, { "epoch": 1.29, "learning_rate": 3.526523091249324e-07, "logits/chosen": -2.7146754264831543, "logits/rejected": -2.723759889602661, "logps/chosen": -168.30792236328125, "logps/rejected": -174.40383911132812, "loss": 0.9294, "rewards/accuracies": 0.578125, "rewards/chosen": -0.1705663502216339, "rewards/margins": 0.08236994594335556, "rewards/rejected": -0.25293630361557007, "step": 4410 }, { "epoch": 1.29, "learning_rate": 3.518779374552066e-07, "logits/chosen": -2.7407498359680176, "logits/rejected": -2.737842082977295, "logps/chosen": -195.39694213867188, "logps/rejected": -192.8684539794922, "loss": 0.9102, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.2054818570613861, "rewards/margins": 0.10714855045080185, "rewards/rejected": -0.31263041496276855, "step": 4420 }, { "epoch": 1.29, "learning_rate": 3.511023918975806e-07, "logits/chosen": -2.738293409347534, "logits/rejected": -2.719567060470581, "logps/chosen": -200.95350646972656, "logps/rejected": -166.63438415527344, "loss": 0.9335, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1738850325345993, "rewards/margins": 0.07271697372198105, "rewards/rejected": -0.24660198390483856, "step": 4430 }, { "epoch": 1.3, "learning_rate": 3.5032568138827317e-07, "logits/chosen": -2.703068256378174, "logits/rejected": -2.6935973167419434, "logps/chosen": -173.39498901367188, "logps/rejected": -169.9501190185547, "loss": 0.9397, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.17545907199382782, "rewards/margins": 0.06307219713926315, "rewards/rejected": -0.23853127658367157, "step": 4440 }, { "epoch": 1.3, "learning_rate": 3.4954781487692645e-07, "logits/chosen": -2.705918550491333, "logits/rejected": -2.718723773956299, "logps/chosen": -175.64553833007812, "logps/rejected": -183.96029663085938, "loss": 0.9333, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.18060100078582764, "rewards/margins": 0.08112530410289764, "rewards/rejected": -0.2617262899875641, "step": 4450 }, { "epoch": 1.3, "learning_rate": 3.487688013265024e-07, "logits/chosen": -2.717538356781006, "logits/rejected": -2.711728572845459, "logps/chosen": -190.5551300048828, "logps/rejected": -174.3909149169922, "loss": 0.9367, "rewards/accuracies": 0.5625, "rewards/chosen": -0.17379295825958252, "rewards/margins": 0.06317581981420517, "rewards/rejected": -0.2369687855243683, "step": 4460 }, { "epoch": 1.3, "learning_rate": 3.479886497131799e-07, "logits/chosen": -2.7508199214935303, "logits/rejected": -2.751966953277588, "logps/chosen": -189.25047302246094, "logps/rejected": -190.21163940429688, "loss": 0.9321, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.20376893877983093, "rewards/margins": 0.07644443213939667, "rewards/rejected": -0.2802133560180664, "step": 4470 }, { "epoch": 1.31, "learning_rate": 3.472073690262509e-07, "logits/chosen": -2.7412967681884766, "logits/rejected": -2.747689723968506, "logps/chosen": -190.6803741455078, "logps/rejected": -186.3988494873047, "loss": 0.9372, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.20845802128314972, "rewards/margins": 0.05214614421129227, "rewards/rejected": -0.2606041729450226, "step": 4480 }, { "epoch": 1.31, "learning_rate": 3.464249682680174e-07, "logits/chosen": -2.7326245307922363, "logits/rejected": -2.7284915447235107, "logps/chosen": -201.30905151367188, "logps/rejected": -185.57102966308594, "loss": 0.9209, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.17261795699596405, "rewards/margins": 0.08153346180915833, "rewards/rejected": -0.2541514039039612, "step": 4490 }, { "epoch": 1.31, "learning_rate": 3.4564145645368726e-07, "logits/chosen": -2.745229721069336, "logits/rejected": -2.7422919273376465, "logps/chosen": -202.03028869628906, "logps/rejected": -189.1793212890625, "loss": 0.9408, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.2132134884595871, "rewards/margins": 0.06484152376651764, "rewards/rejected": -0.27805501222610474, "step": 4500 }, { "epoch": 1.31, "eval_logits/chosen": -2.6475512981414795, "eval_logits/rejected": -2.6421573162078857, "eval_logps/chosen": -197.50157165527344, "eval_logps/rejected": -183.53640747070312, "eval_loss": 0.9368076324462891, "eval_rewards/accuracies": 0.579707682132721, "eval_rewards/chosen": -0.18962688744068146, "eval_rewards/margins": 0.0718846246600151, "eval_rewards/rejected": -0.26151153445243835, "eval_runtime": 443.5492, "eval_samples_per_second": 26.525, "eval_steps_per_second": 3.316, "step": 4500 }, { "epoch": 1.32, "learning_rate": 3.448568426112703e-07, "logits/chosen": -2.7658443450927734, "logits/rejected": -2.7624783515930176, "logps/chosen": -219.56533813476562, "logps/rejected": -200.84490966796875, "loss": 0.9374, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.20336861908435822, "rewards/margins": 0.058419667184352875, "rewards/rejected": -0.2617882788181305, "step": 4510 }, { "epoch": 1.32, "learning_rate": 3.4407113578147484e-07, "logits/chosen": -2.710071325302124, "logits/rejected": -2.7460498809814453, "logps/chosen": -192.58006286621094, "logps/rejected": -201.64654541015625, "loss": 0.9309, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.18946990370750427, "rewards/margins": 0.07266337424516678, "rewards/rejected": -0.26213327050209045, "step": 4520 }, { "epoch": 1.32, "learning_rate": 3.4328434501760285e-07, "logits/chosen": -2.7125916481018066, "logits/rejected": -2.717337131500244, "logps/chosen": -200.4038848876953, "logps/rejected": -182.41598510742188, "loss": 0.9249, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21740098297595978, "rewards/margins": 0.08536939322948456, "rewards/rejected": -0.30277037620544434, "step": 4530 }, { "epoch": 1.32, "learning_rate": 3.4249647938544604e-07, "logits/chosen": -2.742849349975586, "logits/rejected": -2.7103047370910645, "logps/chosen": -201.5478057861328, "logps/rejected": -189.70504760742188, "loss": 0.9279, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.1888127475976944, "rewards/margins": 0.0719815269112587, "rewards/rejected": -0.2607942521572113, "step": 4540 }, { "epoch": 1.33, "learning_rate": 3.417075479631812e-07, "logits/chosen": -2.7691056728363037, "logits/rejected": -2.7712225914001465, "logps/chosen": -187.98654174804688, "logps/rejected": -191.6451873779297, "loss": 0.9276, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.2066299170255661, "rewards/margins": 0.08311845362186432, "rewards/rejected": -0.2897483706474304, "step": 4550 }, { "epoch": 1.33, "learning_rate": 3.409175598412658e-07, "logits/chosen": -2.722172737121582, "logits/rejected": -2.7517924308776855, "logps/chosen": -192.55972290039062, "logps/rejected": -193.70285034179688, "loss": 0.9166, "rewards/accuracies": 0.59375, "rewards/chosen": -0.19656968116760254, "rewards/margins": 0.08301197737455368, "rewards/rejected": -0.27958163619041443, "step": 4560 }, { "epoch": 1.33, "learning_rate": 3.40126524122333e-07, "logits/chosen": -2.7423648834228516, "logits/rejected": -2.7352731227874756, "logps/chosen": -193.5330352783203, "logps/rejected": -192.60610961914062, "loss": 0.9136, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.18352532386779785, "rewards/margins": 0.08987872302532196, "rewards/rejected": -0.273404061794281, "step": 4570 }, { "epoch": 1.34, "learning_rate": 3.3933444992108703e-07, "logits/chosen": -2.7353925704956055, "logits/rejected": -2.701998472213745, "logps/chosen": -204.79664611816406, "logps/rejected": -169.64654541015625, "loss": 0.9283, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.18033994734287262, "rewards/margins": 0.06377788633108139, "rewards/rejected": -0.24411781132221222, "step": 4580 }, { "epoch": 1.34, "learning_rate": 3.3854134636419783e-07, "logits/chosen": -2.70586895942688, "logits/rejected": -2.7365779876708984, "logps/chosen": -196.3794708251953, "logps/rejected": -186.25599670410156, "loss": 0.9169, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.19063739478588104, "rewards/margins": 0.07079647481441498, "rewards/rejected": -0.26143383979797363, "step": 4590 }, { "epoch": 1.34, "learning_rate": 3.377472225901963e-07, "logits/chosen": -2.7350027561187744, "logits/rejected": -2.7467072010040283, "logps/chosen": -196.7301788330078, "logps/rejected": -190.94921875, "loss": 0.9245, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.2064705789089203, "rewards/margins": 0.0866440087556839, "rewards/rejected": -0.293114572763443, "step": 4600 }, { "epoch": 1.34, "eval_logits/chosen": -2.661395311355591, "eval_logits/rejected": -2.656251907348633, "eval_logps/chosen": -197.5314483642578, "eval_logps/rejected": -183.58152770996094, "eval_loss": 0.9362921118736267, "eval_rewards/accuracies": 0.5786879658699036, "eval_rewards/chosen": -0.19261135160923004, "eval_rewards/margins": 0.0734131708741188, "eval_rewards/rejected": -0.26602452993392944, "eval_runtime": 443.5217, "eval_samples_per_second": 26.526, "eval_steps_per_second": 3.317, "step": 4600 }, { "epoch": 1.34, "learning_rate": 3.3695208774936863e-07, "logits/chosen": -2.7428646087646484, "logits/rejected": -2.7751758098602295, "logps/chosen": -176.3280029296875, "logps/rejected": -185.43893432617188, "loss": 0.9284, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.17020182311534882, "rewards/margins": 0.10501272976398468, "rewards/rejected": -0.2752145528793335, "step": 4610 }, { "epoch": 1.35, "learning_rate": 3.36155951003651e-07, "logits/chosen": -2.7163808345794678, "logits/rejected": -2.720247745513916, "logps/chosen": -197.26736450195312, "logps/rejected": -181.1373291015625, "loss": 0.933, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1962851732969284, "rewards/margins": 0.06428460776805878, "rewards/rejected": -0.2605697810649872, "step": 4620 }, { "epoch": 1.35, "learning_rate": 3.353588215265243e-07, "logits/chosen": -2.741788864135742, "logits/rejected": -2.771073818206787, "logps/chosen": -203.08348083496094, "logps/rejected": -204.08729553222656, "loss": 0.9208, "rewards/accuracies": 0.640625, "rewards/chosen": -0.20287474989891052, "rewards/margins": 0.10334237664937973, "rewards/rejected": -0.30621713399887085, "step": 4630 }, { "epoch": 1.35, "learning_rate": 3.3456070850290773e-07, "logits/chosen": -2.7344655990600586, "logits/rejected": -2.74898624420166, "logps/chosen": -181.22328186035156, "logps/rejected": -167.53872680664062, "loss": 0.9273, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1856803596019745, "rewards/margins": 0.07511058449745178, "rewards/rejected": -0.26079094409942627, "step": 4640 }, { "epoch": 1.36, "learning_rate": 3.337616211290539e-07, "logits/chosen": -2.735715389251709, "logits/rejected": -2.7299177646636963, "logps/chosen": -193.27822875976562, "logps/rejected": -179.62449645996094, "loss": 0.9184, "rewards/accuracies": 0.5625, "rewards/chosen": -0.16103467345237732, "rewards/margins": 0.08624567091464996, "rewards/rejected": -0.2472803294658661, "step": 4650 }, { "epoch": 1.36, "learning_rate": 3.329615686124419e-07, "logits/chosen": -2.7415900230407715, "logits/rejected": -2.7193005084991455, "logps/chosen": -193.71746826171875, "logps/rejected": -173.92623901367188, "loss": 0.9443, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.23755891621112823, "rewards/margins": 0.03554988652467728, "rewards/rejected": -0.2731088101863861, "step": 4660 }, { "epoch": 1.36, "learning_rate": 3.321605601716719e-07, "logits/chosen": -2.7361927032470703, "logits/rejected": -2.7571628093719482, "logps/chosen": -230.0321044921875, "logps/rejected": -213.3992156982422, "loss": 0.919, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.20671303570270538, "rewards/margins": 0.07165684551000595, "rewards/rejected": -0.2783699035644531, "step": 4670 }, { "epoch": 1.37, "learning_rate": 3.313586050363589e-07, "logits/chosen": -2.7228760719299316, "logits/rejected": -2.7285807132720947, "logps/chosen": -178.76773071289062, "logps/rejected": -175.6080322265625, "loss": 0.9404, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.18862402439117432, "rewards/margins": 0.0594087652862072, "rewards/rejected": -0.248032808303833, "step": 4680 }, { "epoch": 1.37, "learning_rate": 3.305557124470256e-07, "logits/chosen": -2.7296719551086426, "logits/rejected": -2.703669786453247, "logps/chosen": -177.8763427734375, "logps/rejected": -169.86013793945312, "loss": 0.9394, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.19478978216648102, "rewards/margins": 0.05060974508523941, "rewards/rejected": -0.24539950489997864, "step": 4690 }, { "epoch": 1.37, "learning_rate": 3.2975189165499705e-07, "logits/chosen": -2.7460432052612305, "logits/rejected": -2.742731809616089, "logps/chosen": -193.16432189941406, "logps/rejected": -172.71707153320312, "loss": 0.9469, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.23428598046302795, "rewards/margins": 0.02087447978556156, "rewards/rejected": -0.2551604211330414, "step": 4700 }, { "epoch": 1.37, "eval_logits/chosen": -2.663195848464966, "eval_logits/rejected": -2.658090591430664, "eval_logps/chosen": -197.5492706298828, "eval_logps/rejected": -183.58750915527344, "eval_loss": 0.9363651275634766, "eval_rewards/accuracies": 0.5774983167648315, "eval_rewards/chosen": -0.19439838826656342, "eval_rewards/margins": 0.0722242221236229, "eval_rewards/rejected": -0.2666226327419281, "eval_runtime": 443.5861, "eval_samples_per_second": 26.522, "eval_steps_per_second": 3.316, "step": 4700 }, { "epoch": 1.37, "learning_rate": 3.2894715192229334e-07, "logits/chosen": -2.7303051948547363, "logits/rejected": -2.715846538543701, "logps/chosen": -185.22119140625, "logps/rejected": -162.5220947265625, "loss": 0.9338, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.19038231670856476, "rewards/margins": 0.06438779830932617, "rewards/rejected": -0.25477010011672974, "step": 4710 }, { "epoch": 1.38, "learning_rate": 3.2814150252152297e-07, "logits/chosen": -2.7478604316711426, "logits/rejected": -2.7180073261260986, "logps/chosen": -212.9084014892578, "logps/rejected": -183.78271484375, "loss": 0.9501, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22500388324260712, "rewards/margins": 0.048243068158626556, "rewards/rejected": -0.2732469439506531, "step": 4720 }, { "epoch": 1.38, "learning_rate": 3.273349527357761e-07, "logits/chosen": -2.721357822418213, "logits/rejected": -2.749497652053833, "logps/chosen": -183.78855895996094, "logps/rejected": -177.4352569580078, "loss": 0.9265, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.19720439612865448, "rewards/margins": 0.0935719683766365, "rewards/rejected": -0.2907763719558716, "step": 4730 }, { "epoch": 1.38, "learning_rate": 3.265275118585178e-07, "logits/chosen": -2.7387373447418213, "logits/rejected": -2.7455060482025146, "logps/chosen": -188.77359008789062, "logps/rejected": -185.95318603515625, "loss": 0.92, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.21058376133441925, "rewards/margins": 0.10225590318441391, "rewards/rejected": -0.31283968687057495, "step": 4740 }, { "epoch": 1.39, "learning_rate": 3.257191891934804e-07, "logits/chosen": -2.7524802684783936, "logits/rejected": -2.738579034805298, "logps/chosen": -195.68280029296875, "logps/rejected": -172.1834716796875, "loss": 0.9377, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.20516614615917206, "rewards/margins": 0.06340218335390091, "rewards/rejected": -0.26856836676597595, "step": 4750 }, { "epoch": 1.39, "learning_rate": 3.2490999405455675e-07, "logits/chosen": -2.7448105812072754, "logits/rejected": -2.7162296772003174, "logps/chosen": -224.0458526611328, "logps/rejected": -184.15225219726562, "loss": 0.9251, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18894138932228088, "rewards/margins": 0.0902334600687027, "rewards/rejected": -0.2791748642921448, "step": 4760 }, { "epoch": 1.39, "learning_rate": 3.24099935765693e-07, "logits/chosen": -2.7416248321533203, "logits/rejected": -2.7578914165496826, "logps/chosen": -175.80899047851562, "logps/rejected": -186.74563598632812, "loss": 0.9348, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2133389413356781, "rewards/margins": 0.0710340365767479, "rewards/rejected": -0.2843729853630066, "step": 4770 }, { "epoch": 1.39, "learning_rate": 3.2328902366078055e-07, "logits/chosen": -2.7463059425354004, "logits/rejected": -2.7555289268493652, "logps/chosen": -216.08584594726562, "logps/rejected": -196.11654663085938, "loss": 0.9453, "rewards/accuracies": 0.59375, "rewards/chosen": -0.19555380940437317, "rewards/margins": 0.0949058011174202, "rewards/rejected": -0.29045960307121277, "step": 4780 }, { "epoch": 1.4, "learning_rate": 3.2247726708354916e-07, "logits/chosen": -2.7026150226593018, "logits/rejected": -2.727374315261841, "logps/chosen": -188.6559295654297, "logps/rejected": -184.67588806152344, "loss": 0.9123, "rewards/accuracies": 0.653124988079071, "rewards/chosen": -0.17864663898944855, "rewards/margins": 0.08552898466587067, "rewards/rejected": -0.2641756236553192, "step": 4790 }, { "epoch": 1.4, "learning_rate": 3.216646753874589e-07, "logits/chosen": -2.7416110038757324, "logits/rejected": -2.7252979278564453, "logps/chosen": -200.88369750976562, "logps/rejected": -183.188232421875, "loss": 0.9421, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.21473363041877747, "rewards/margins": 0.048976875841617584, "rewards/rejected": -0.26371049880981445, "step": 4800 }, { "epoch": 1.4, "eval_logits/chosen": -2.669057607650757, "eval_logits/rejected": -2.6640470027923584, "eval_logps/chosen": -197.55172729492188, "eval_logps/rejected": -183.60397338867188, "eval_loss": 0.9358345866203308, "eval_rewards/accuracies": 0.5819170475006104, "eval_rewards/chosen": -0.19464226067066193, "eval_rewards/margins": 0.07362484186887741, "eval_rewards/rejected": -0.26826706528663635, "eval_runtime": 443.5515, "eval_samples_per_second": 26.525, "eval_steps_per_second": 3.316, "step": 4800 }, { "epoch": 1.4, "learning_rate": 3.208512579355925e-07, "logits/chosen": -2.757784366607666, "logits/rejected": -2.7481346130371094, "logps/chosen": -201.49862670898438, "logps/rejected": -203.2624969482422, "loss": 0.9325, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19091400504112244, "rewards/margins": 0.06763036549091339, "rewards/rejected": -0.258544385433197, "step": 4810 }, { "epoch": 1.41, "learning_rate": 3.200370241005476e-07, "logits/chosen": -2.7480101585388184, "logits/rejected": -2.723578929901123, "logps/chosen": -202.5946502685547, "logps/rejected": -169.34739685058594, "loss": 0.9091, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.18458306789398193, "rewards/margins": 0.07845629006624222, "rewards/rejected": -0.26303935050964355, "step": 4820 }, { "epoch": 1.41, "learning_rate": 3.1922198326432835e-07, "logits/chosen": -2.7490594387054443, "logits/rejected": -2.738849639892578, "logps/chosen": -187.2062530517578, "logps/rejected": -175.1187286376953, "loss": 0.9332, "rewards/accuracies": 0.59375, "rewards/chosen": -0.19218167662620544, "rewards/margins": 0.08368309587240219, "rewards/rejected": -0.27586477994918823, "step": 4830 }, { "epoch": 1.41, "learning_rate": 3.184061448182379e-07, "logits/chosen": -2.7155652046203613, "logits/rejected": -2.6949169635772705, "logps/chosen": -202.61073303222656, "logps/rejected": -180.4763946533203, "loss": 0.9499, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.19347918033599854, "rewards/margins": 0.04405388981103897, "rewards/rejected": -0.2375330626964569, "step": 4840 }, { "epoch": 1.41, "learning_rate": 3.175895181627695e-07, "logits/chosen": -2.7712833881378174, "logits/rejected": -2.7551021575927734, "logps/chosen": -192.11685180664062, "logps/rejected": -185.52256774902344, "loss": 0.9448, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.2322724163532257, "rewards/margins": 0.05692540481686592, "rewards/rejected": -0.28919777274131775, "step": 4850 }, { "epoch": 1.42, "learning_rate": 3.167721127074988e-07, "logits/chosen": -2.754729986190796, "logits/rejected": -2.7834737300872803, "logps/chosen": -183.2649383544922, "logps/rejected": -194.07073974609375, "loss": 0.9309, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2212039977312088, "rewards/margins": 0.05648614838719368, "rewards/rejected": -0.2776901423931122, "step": 4860 }, { "epoch": 1.42, "learning_rate": 3.15953937870975e-07, "logits/chosen": -2.7316761016845703, "logits/rejected": -2.7409138679504395, "logps/chosen": -185.97994995117188, "logps/rejected": -187.85910034179688, "loss": 0.9352, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.19891253113746643, "rewards/margins": 0.0585172101855278, "rewards/rejected": -0.25742974877357483, "step": 4870 }, { "epoch": 1.42, "learning_rate": 3.1513500308061264e-07, "logits/chosen": -2.748883008956909, "logits/rejected": -2.7345004081726074, "logps/chosen": -204.35073852539062, "logps/rejected": -189.57794189453125, "loss": 0.9219, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.19797687232494354, "rewards/margins": 0.07122127711772919, "rewards/rejected": -0.26919814944267273, "step": 4880 }, { "epoch": 1.43, "learning_rate": 3.1431531777258265e-07, "logits/chosen": -2.7522618770599365, "logits/rejected": -2.7130308151245117, "logps/chosen": -183.6536102294922, "logps/rejected": -157.83010864257812, "loss": 0.9459, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.19545993208885193, "rewards/margins": 0.053138844668865204, "rewards/rejected": -0.24859876930713654, "step": 4890 }, { "epoch": 1.43, "learning_rate": 3.134948913917039e-07, "logits/chosen": -2.717576026916504, "logits/rejected": -2.730308771133423, "logps/chosen": -185.91043090820312, "logps/rejected": -183.8404998779297, "loss": 0.9076, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.1804662048816681, "rewards/margins": 0.09477277100086212, "rewards/rejected": -0.2752389907836914, "step": 4900 }, { "epoch": 1.43, "eval_logits/chosen": -2.667618751525879, "eval_logits/rejected": -2.662586212158203, "eval_logps/chosen": -197.56797790527344, "eval_logps/rejected": -183.62533569335938, "eval_loss": 0.9355936646461487, "eval_rewards/accuracies": 0.5798776149749756, "eval_rewards/chosen": -0.19626484811306, "eval_rewards/margins": 0.07414159178733826, "eval_rewards/rejected": -0.27040642499923706, "eval_runtime": 443.5025, "eval_samples_per_second": 26.527, "eval_steps_per_second": 3.317, "step": 4900 }, { "epoch": 1.43, "learning_rate": 3.126737333913344e-07, "logits/chosen": -2.769333600997925, "logits/rejected": -2.768833637237549, "logps/chosen": -220.874755859375, "logps/rejected": -203.6051483154297, "loss": 0.9021, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.1960815042257309, "rewards/margins": 0.13010276854038239, "rewards/rejected": -0.32618430256843567, "step": 4910 }, { "epoch": 1.44, "learning_rate": 3.1185185323326194e-07, "logits/chosen": -2.7267374992370605, "logits/rejected": -2.768446445465088, "logps/chosen": -182.239990234375, "logps/rejected": -190.83349609375, "loss": 0.906, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.19862093031406403, "rewards/margins": 0.08212271332740784, "rewards/rejected": -0.28074365854263306, "step": 4920 }, { "epoch": 1.44, "learning_rate": 3.110292603875956e-07, "logits/chosen": -2.7466022968292236, "logits/rejected": -2.7096469402313232, "logps/chosen": -208.9374542236328, "logps/rejected": -182.35670471191406, "loss": 0.948, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2167479544878006, "rewards/margins": 0.04567436873912811, "rewards/rejected": -0.2624223232269287, "step": 4930 }, { "epoch": 1.44, "learning_rate": 3.1020596433265635e-07, "logits/chosen": -2.7337393760681152, "logits/rejected": -2.732323408126831, "logps/chosen": -184.12216186523438, "logps/rejected": -185.9611053466797, "loss": 0.9425, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2332111895084381, "rewards/margins": 0.05965740606188774, "rewards/rejected": -0.29286855459213257, "step": 4940 }, { "epoch": 1.44, "learning_rate": 3.0938197455486783e-07, "logits/chosen": -2.6897988319396973, "logits/rejected": -2.7108638286590576, "logps/chosen": -186.6949005126953, "logps/rejected": -181.40200805664062, "loss": 0.9367, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.2100130021572113, "rewards/margins": 0.0862884670495987, "rewards/rejected": -0.2963015139102936, "step": 4950 }, { "epoch": 1.45, "learning_rate": 3.08557300548647e-07, "logits/chosen": -2.7417614459991455, "logits/rejected": -2.7184202671051025, "logps/chosen": -192.87637329101562, "logps/rejected": -162.53128051757812, "loss": 0.9197, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19202032685279846, "rewards/margins": 0.07406075298786163, "rewards/rejected": -0.2660810351371765, "step": 4960 }, { "epoch": 1.45, "learning_rate": 3.077319518162952e-07, "logits/chosen": -2.730668544769287, "logits/rejected": -2.739375352859497, "logps/chosen": -196.22915649414062, "logps/rejected": -202.01319885253906, "loss": 0.9224, "rewards/accuracies": 0.578125, "rewards/chosen": -0.21786180138587952, "rewards/margins": 0.07545635849237442, "rewards/rejected": -0.29331815242767334, "step": 4970 }, { "epoch": 1.45, "learning_rate": 3.069059378678878e-07, "logits/chosen": -2.732133150100708, "logits/rejected": -2.7478561401367188, "logps/chosen": -178.5888671875, "logps/rejected": -178.92031860351562, "loss": 0.9194, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18372566998004913, "rewards/margins": 0.10189918428659439, "rewards/rejected": -0.28562483191490173, "step": 4980 }, { "epoch": 1.46, "learning_rate": 3.0607926822116564e-07, "logits/chosen": -2.73771333694458, "logits/rejected": -2.738654851913452, "logps/chosen": -178.12435913085938, "logps/rejected": -174.51303100585938, "loss": 0.9264, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.19615371525287628, "rewards/margins": 0.07561203837394714, "rewards/rejected": -0.27176573872566223, "step": 4990 }, { "epoch": 1.46, "learning_rate": 3.0525195240142437e-07, "logits/chosen": -2.7430174350738525, "logits/rejected": -2.755254030227661, "logps/chosen": -195.0157470703125, "logps/rejected": -188.9918975830078, "loss": 0.94, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.16923925280570984, "rewards/margins": 0.08428038656711578, "rewards/rejected": -0.25351962447166443, "step": 5000 }, { "epoch": 1.46, "eval_logits/chosen": -2.6492247581481934, "eval_logits/rejected": -2.643824577331543, "eval_logps/chosen": -197.6009521484375, "eval_logps/rejected": -183.6591033935547, "eval_loss": 0.9352905750274658, "eval_rewards/accuracies": 0.580047607421875, "eval_rewards/chosen": -0.19956253468990326, "eval_rewards/margins": 0.07421758025884628, "eval_rewards/rejected": -0.27378013730049133, "eval_runtime": 443.4518, "eval_samples_per_second": 26.531, "eval_steps_per_second": 3.317, "step": 5000 }, { "epoch": 1.46, "learning_rate": 3.044239999414055e-07, "logits/chosen": -2.7292063236236572, "logits/rejected": -2.706313133239746, "logps/chosen": -184.35232543945312, "logps/rejected": -179.50625610351562, "loss": 0.9422, "rewards/accuracies": 0.578125, "rewards/chosen": -0.20498375594615936, "rewards/margins": 0.06014307588338852, "rewards/rejected": -0.2651267945766449, "step": 5010 }, { "epoch": 1.46, "learning_rate": 3.0359542038118606e-07, "logits/chosen": -2.742978572845459, "logits/rejected": -2.734860420227051, "logps/chosen": -206.57275390625, "logps/rejected": -190.71548461914062, "loss": 0.9089, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.19854263961315155, "rewards/margins": 0.08930404484272003, "rewards/rejected": -0.2878466844558716, "step": 5020 }, { "epoch": 1.47, "learning_rate": 3.027662232680689e-07, "logits/chosen": -2.72959566116333, "logits/rejected": -2.729051113128662, "logps/chosen": -195.5106964111328, "logps/rejected": -171.5630645751953, "loss": 0.9356, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.18533293902873993, "rewards/margins": 0.09665311127901077, "rewards/rejected": -0.2819860577583313, "step": 5030 }, { "epoch": 1.47, "learning_rate": 3.0193641815647255e-07, "logits/chosen": -2.735405445098877, "logits/rejected": -2.7377450466156006, "logps/chosen": -194.8864288330078, "logps/rejected": -184.33419799804688, "loss": 0.9143, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20550887286663055, "rewards/margins": 0.09748460352420807, "rewards/rejected": -0.3029934763908386, "step": 5040 }, { "epoch": 1.47, "learning_rate": 3.011060146078212e-07, "logits/chosen": -2.740532636642456, "logits/rejected": -2.745987892150879, "logps/chosen": -198.83352661132812, "logps/rejected": -194.37173461914062, "loss": 0.9368, "rewards/accuracies": 0.546875, "rewards/chosen": -0.22939512133598328, "rewards/margins": 0.06054091453552246, "rewards/rejected": -0.28993600606918335, "step": 5050 }, { "epoch": 1.48, "learning_rate": 3.002750221904347e-07, "logits/chosen": -2.7253835201263428, "logits/rejected": -2.7207252979278564, "logps/chosen": -188.45120239257812, "logps/rejected": -186.4692840576172, "loss": 0.9424, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.21971821784973145, "rewards/margins": 0.0432281419634819, "rewards/rejected": -0.26294639706611633, "step": 5060 }, { "epoch": 1.48, "learning_rate": 2.9944345047941785e-07, "logits/chosen": -2.716625928878784, "logits/rejected": -2.7393455505371094, "logps/chosen": -203.8828582763672, "logps/rejected": -200.91465759277344, "loss": 0.9127, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2097250521183014, "rewards/margins": 0.07229788601398468, "rewards/rejected": -0.2820229232311249, "step": 5070 }, { "epoch": 1.48, "learning_rate": 2.9861130905655065e-07, "logits/chosen": -2.730515956878662, "logits/rejected": -2.6795966625213623, "logps/chosen": -215.6951904296875, "logps/rejected": -176.55252075195312, "loss": 0.9354, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2164064645767212, "rewards/margins": 0.047541793435811996, "rewards/rejected": -0.2639482617378235, "step": 5080 }, { "epoch": 1.48, "learning_rate": 2.977786075101774e-07, "logits/chosen": -2.740309953689575, "logits/rejected": -2.7642016410827637, "logps/chosen": -186.1707305908203, "logps/rejected": -197.3705596923828, "loss": 0.9294, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23175272345542908, "rewards/margins": 0.0802375078201294, "rewards/rejected": -0.31199023127555847, "step": 5090 }, { "epoch": 1.49, "learning_rate": 2.9694535543509653e-07, "logits/chosen": -2.7505338191986084, "logits/rejected": -2.7634050846099854, "logps/chosen": -180.63919067382812, "logps/rejected": -180.73904418945312, "loss": 0.9288, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.21512751281261444, "rewards/margins": 0.08128011971712112, "rewards/rejected": -0.29640763998031616, "step": 5100 }, { "epoch": 1.49, "eval_logits/chosen": -2.6486804485321045, "eval_logits/rejected": -2.6432888507843018, "eval_logps/chosen": -197.6044921875, "eval_logps/rejected": -183.66253662109375, "eval_loss": 0.9350979924201965, "eval_rewards/accuracies": 0.580897331237793, "eval_rewards/chosen": -0.19991926848888397, "eval_rewards/margins": 0.07420553267002106, "eval_rewards/rejected": -0.27412480115890503, "eval_runtime": 443.5316, "eval_samples_per_second": 26.526, "eval_steps_per_second": 3.317, "step": 5100 }, { "epoch": 1.49, "learning_rate": 2.961115624324499e-07, "logits/chosen": -2.724372148513794, "logits/rejected": -2.7239785194396973, "logps/chosen": -184.83824157714844, "logps/rejected": -166.89259338378906, "loss": 0.9269, "rewards/accuracies": 0.578125, "rewards/chosen": -0.2036965787410736, "rewards/margins": 0.06950495392084122, "rewards/rejected": -0.27320152521133423, "step": 5110 }, { "epoch": 1.49, "learning_rate": 2.9527723810961207e-07, "logits/chosen": -2.735801935195923, "logits/rejected": -2.7238357067108154, "logps/chosen": -196.07200622558594, "logps/rejected": -173.1053009033203, "loss": 0.9487, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.20888221263885498, "rewards/margins": 0.05614093691110611, "rewards/rejected": -0.2650231420993805, "step": 5120 }, { "epoch": 1.5, "learning_rate": 2.9444239208008e-07, "logits/chosen": -2.7660791873931885, "logits/rejected": -2.7649636268615723, "logps/chosen": -212.7071990966797, "logps/rejected": -200.44374084472656, "loss": 0.9248, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.21736297011375427, "rewards/margins": 0.06548234075307846, "rewards/rejected": -0.28284531831741333, "step": 5130 }, { "epoch": 1.5, "learning_rate": 2.936070339633618e-07, "logits/chosen": -2.736414909362793, "logits/rejected": -2.727729320526123, "logps/chosen": -184.80453491210938, "logps/rejected": -174.11581420898438, "loss": 0.9251, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.18705031275749207, "rewards/margins": 0.08300456404685974, "rewards/rejected": -0.2700548768043518, "step": 5140 }, { "epoch": 1.5, "learning_rate": 2.9277117338486616e-07, "logits/chosen": -2.738631248474121, "logits/rejected": -2.7362170219421387, "logps/chosen": -184.38180541992188, "logps/rejected": -176.70655822753906, "loss": 0.9248, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.22087529301643372, "rewards/margins": 0.07811127603054047, "rewards/rejected": -0.29898661375045776, "step": 5150 }, { "epoch": 1.51, "learning_rate": 2.9193481997579133e-07, "logits/chosen": -2.7428336143493652, "logits/rejected": -2.723949670791626, "logps/chosen": -219.9194793701172, "logps/rejected": -203.8686981201172, "loss": 0.9388, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.23469701409339905, "rewards/margins": 0.06632021069526672, "rewards/rejected": -0.30101722478866577, "step": 5160 }, { "epoch": 1.51, "learning_rate": 2.910979833730145e-07, "logits/chosen": -2.7712066173553467, "logits/rejected": -2.741729259490967, "logps/chosen": -203.03404235839844, "logps/rejected": -181.98988342285156, "loss": 0.9129, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.20010265707969666, "rewards/margins": 0.08742941915988922, "rewards/rejected": -0.2875320315361023, "step": 5170 }, { "epoch": 1.51, "learning_rate": 2.9026067321897995e-07, "logits/chosen": -2.717153787612915, "logits/rejected": -2.7007384300231934, "logps/chosen": -193.92271423339844, "logps/rejected": -167.20896911621094, "loss": 0.9185, "rewards/accuracies": 0.578125, "rewards/chosen": -0.19192959368228912, "rewards/margins": 0.06471812725067139, "rewards/rejected": -0.2566477060317993, "step": 5180 }, { "epoch": 1.51, "learning_rate": 2.8942289916158883e-07, "logits/chosen": -2.7420923709869385, "logits/rejected": -2.760744571685791, "logps/chosen": -181.37686157226562, "logps/rejected": -184.68063354492188, "loss": 0.9454, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2095378190279007, "rewards/margins": 0.06707773357629776, "rewards/rejected": -0.27661553025245667, "step": 5190 }, { "epoch": 1.52, "learning_rate": 2.8858467085408763e-07, "logits/chosen": -2.76371431350708, "logits/rejected": -2.763125419616699, "logps/chosen": -198.66419982910156, "logps/rejected": -195.47776794433594, "loss": 0.927, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23758378624916077, "rewards/margins": 0.06860102713108063, "rewards/rejected": -0.3061848282814026, "step": 5200 }, { "epoch": 1.52, "eval_logits/chosen": -2.6552000045776367, "eval_logits/rejected": -2.6499037742614746, "eval_logps/chosen": -197.61444091796875, "eval_logps/rejected": -183.68826293945312, "eval_loss": 0.9342753887176514, "eval_rewards/accuracies": 0.5820870399475098, "eval_rewards/chosen": -0.2009136825799942, "eval_rewards/margins": 0.07578551024198532, "eval_rewards/rejected": -0.2766991853713989, "eval_runtime": 443.3793, "eval_samples_per_second": 26.535, "eval_steps_per_second": 3.318, "step": 5200 }, { "epoch": 1.52, "learning_rate": 2.877459979549566e-07, "logits/chosen": -2.7898855209350586, "logits/rejected": -2.7748923301696777, "logps/chosen": -210.88479614257812, "logps/rejected": -194.19326782226562, "loss": 0.9279, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.19191429018974304, "rewards/margins": 0.08744990825653076, "rewards/rejected": -0.2793641984462738, "step": 5210 }, { "epoch": 1.52, "learning_rate": 2.869068901277991e-07, "logits/chosen": -2.7477736473083496, "logits/rejected": -2.7583398818969727, "logps/chosen": -197.46267700195312, "logps/rejected": -200.65147399902344, "loss": 0.9151, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.18644069135189056, "rewards/margins": 0.09844745695590973, "rewards/rejected": -0.2848881483078003, "step": 5220 }, { "epoch": 1.53, "learning_rate": 2.860673570412297e-07, "logits/chosen": -2.758688449859619, "logits/rejected": -2.746370792388916, "logps/chosen": -198.10745239257812, "logps/rejected": -179.736328125, "loss": 0.9259, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.2171638011932373, "rewards/margins": 0.04720696806907654, "rewards/rejected": -0.26437076926231384, "step": 5230 }, { "epoch": 1.53, "learning_rate": 2.852274083687634e-07, "logits/chosen": -2.754690647125244, "logits/rejected": -2.7557036876678467, "logps/chosen": -189.4248809814453, "logps/rejected": -189.26260375976562, "loss": 0.9368, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.199670672416687, "rewards/margins": 0.05970006436109543, "rewards/rejected": -0.25937071442604065, "step": 5240 }, { "epoch": 1.53, "learning_rate": 2.8438705378870337e-07, "logits/chosen": -2.7454700469970703, "logits/rejected": -2.7141568660736084, "logps/chosen": -206.284423828125, "logps/rejected": -179.8996124267578, "loss": 0.9198, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.19247445464134216, "rewards/margins": 0.10617701709270477, "rewards/rejected": -0.29865145683288574, "step": 5250 }, { "epoch": 1.53, "learning_rate": 2.8354630298403015e-07, "logits/chosen": -2.735435962677002, "logits/rejected": -2.7617130279541016, "logps/chosen": -211.7236328125, "logps/rejected": -202.30496215820312, "loss": 0.9375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23901574313640594, "rewards/margins": 0.07207988202571869, "rewards/rejected": -0.31109562516212463, "step": 5260 }, { "epoch": 1.54, "learning_rate": 2.827051656422895e-07, "logits/chosen": -2.7626614570617676, "logits/rejected": -2.724433422088623, "logps/chosen": -216.4359893798828, "logps/rejected": -187.43603515625, "loss": 0.9277, "rewards/accuracies": 0.578125, "rewards/chosen": -0.21994581818580627, "rewards/margins": 0.08216341584920883, "rewards/rejected": -0.3021092116832733, "step": 5270 }, { "epoch": 1.54, "learning_rate": 2.818636514554814e-07, "logits/chosen": -2.7331411838531494, "logits/rejected": -2.714127779006958, "logps/chosen": -202.60743713378906, "logps/rejected": -178.23214721679688, "loss": 0.9166, "rewards/accuracies": 0.578125, "rewards/chosen": -0.20016559958457947, "rewards/margins": 0.08119861781597137, "rewards/rejected": -0.28136423230171204, "step": 5280 }, { "epoch": 1.54, "learning_rate": 2.810217701199478e-07, "logits/chosen": -2.7301182746887207, "logits/rejected": -2.7275664806365967, "logps/chosen": -196.49765014648438, "logps/rejected": -185.9195556640625, "loss": 0.9229, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.19735606014728546, "rewards/margins": 0.09479983896017075, "rewards/rejected": -0.292155921459198, "step": 5290 }, { "epoch": 1.55, "learning_rate": 2.801795313362609e-07, "logits/chosen": -2.7671756744384766, "logits/rejected": -2.7629024982452393, "logps/chosen": -205.7484893798828, "logps/rejected": -188.64962768554688, "loss": 0.9171, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.20872464776039124, "rewards/margins": 0.07953401654958725, "rewards/rejected": -0.2882586419582367, "step": 5300 }, { "epoch": 1.55, "eval_logits/chosen": -2.6475534439086914, "eval_logits/rejected": -2.642137050628662, "eval_logps/chosen": -197.62921142578125, "eval_logps/rejected": -183.7055206298828, "eval_loss": 0.933870255947113, "eval_rewards/accuracies": 0.5822569727897644, "eval_rewards/chosen": -0.20238925516605377, "eval_rewards/margins": 0.0760333314538002, "eval_rewards/rejected": -0.27842259407043457, "eval_runtime": 443.3851, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 5300 }, { "epoch": 1.55, "learning_rate": 2.7933694480911217e-07, "logits/chosen": -2.7444474697113037, "logits/rejected": -2.756112575531006, "logps/chosen": -191.1089630126953, "logps/rejected": -194.6819305419922, "loss": 0.9219, "rewards/accuracies": 0.5625, "rewards/chosen": -0.20938988029956818, "rewards/margins": 0.10179195553064346, "rewards/rejected": -0.31118181347846985, "step": 5310 }, { "epoch": 1.55, "learning_rate": 2.7849402024719944e-07, "logits/chosen": -2.747704267501831, "logits/rejected": -2.757530689239502, "logps/chosen": -217.6678466796875, "logps/rejected": -210.233154296875, "loss": 0.9241, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.21528121829032898, "rewards/margins": 0.08019135892391205, "rewards/rejected": -0.29547256231307983, "step": 5320 }, { "epoch": 1.55, "learning_rate": 2.7765076736311575e-07, "logits/chosen": -2.72330641746521, "logits/rejected": -2.718510150909424, "logps/chosen": -178.87374877929688, "logps/rejected": -171.4022979736328, "loss": 0.9264, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.213222935795784, "rewards/margins": 0.059436433017253876, "rewards/rejected": -0.2726593613624573, "step": 5330 }, { "epoch": 1.56, "learning_rate": 2.7680719587323717e-07, "logits/chosen": -2.7657723426818848, "logits/rejected": -2.771605968475342, "logps/chosen": -208.59555053710938, "logps/rejected": -198.11004638671875, "loss": 0.9155, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.20413896441459656, "rewards/margins": 0.11837039142847061, "rewards/rejected": -0.3225093483924866, "step": 5340 }, { "epoch": 1.56, "learning_rate": 2.759633154976111e-07, "logits/chosen": -2.7205915451049805, "logits/rejected": -2.726088285446167, "logps/chosen": -182.267822265625, "logps/rejected": -174.0267791748047, "loss": 0.9174, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.20186123251914978, "rewards/margins": 0.08812803775072098, "rewards/rejected": -0.28998929262161255, "step": 5350 }, { "epoch": 1.56, "learning_rate": 2.7511913595984374e-07, "logits/chosen": -2.7115683555603027, "logits/rejected": -2.707627058029175, "logps/chosen": -190.1064453125, "logps/rejected": -178.34567260742188, "loss": 0.9147, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.18654951453208923, "rewards/margins": 0.10496105998754501, "rewards/rejected": -0.29151061177253723, "step": 5360 }, { "epoch": 1.57, "learning_rate": 2.7427466698698864e-07, "logits/chosen": -2.7274367809295654, "logits/rejected": -2.7309534549713135, "logps/chosen": -203.6020965576172, "logps/rejected": -197.11085510253906, "loss": 0.9108, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.20246362686157227, "rewards/margins": 0.10691912472248077, "rewards/rejected": -0.30938273668289185, "step": 5370 }, { "epoch": 1.57, "learning_rate": 2.7342991830943437e-07, "logits/chosen": -2.766284942626953, "logits/rejected": -2.7335293292999268, "logps/chosen": -203.7217559814453, "logps/rejected": -175.9837188720703, "loss": 0.9585, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.22377462685108185, "rewards/margins": 0.05499381572008133, "rewards/rejected": -0.278768390417099, "step": 5380 }, { "epoch": 1.57, "learning_rate": 2.7258489966079206e-07, "logits/chosen": -2.7298882007598877, "logits/rejected": -2.738774061203003, "logps/chosen": -191.04055786132812, "logps/rejected": -186.08273315429688, "loss": 0.9449, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2002135068178177, "rewards/margins": 0.08072539418935776, "rewards/rejected": -0.28093892335891724, "step": 5390 }, { "epoch": 1.58, "learning_rate": 2.717396207777841e-07, "logits/chosen": -2.7327914237976074, "logits/rejected": -2.7400810718536377, "logps/chosen": -187.4174041748047, "logps/rejected": -185.2515869140625, "loss": 0.9337, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.204170823097229, "rewards/margins": 0.06763770431280136, "rewards/rejected": -0.27180856466293335, "step": 5400 }, { "epoch": 1.58, "eval_logits/chosen": -2.6513025760650635, "eval_logits/rejected": -2.6459460258483887, "eval_logps/chosen": -197.645263671875, "eval_logps/rejected": -183.72079467773438, "eval_loss": 0.934425950050354, "eval_rewards/accuracies": 0.5786879658699036, "eval_rewards/chosen": -0.20399518311023712, "eval_rewards/margins": 0.07595469802618027, "eval_rewards/rejected": -0.2799498438835144, "eval_runtime": 443.3253, "eval_samples_per_second": 26.538, "eval_steps_per_second": 3.318, "step": 5400 }, { "epoch": 1.58, "learning_rate": 2.7089409140013103e-07, "logits/chosen": -2.7303237915039062, "logits/rejected": -2.7288081645965576, "logps/chosen": -216.2913055419922, "logps/rejected": -185.7354736328125, "loss": 0.9246, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1843048334121704, "rewards/margins": 0.08620087802410126, "rewards/rejected": -0.2705056965351105, "step": 5410 }, { "epoch": 1.58, "learning_rate": 2.700483212704398e-07, "logits/chosen": -2.740948438644409, "logits/rejected": -2.7331557273864746, "logps/chosen": -200.00587463378906, "logps/rejected": -190.20492553710938, "loss": 0.931, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.22048547863960266, "rewards/margins": 0.07095751911401749, "rewards/rejected": -0.29144302010536194, "step": 5420 }, { "epoch": 1.58, "learning_rate": 2.692023201340915e-07, "logits/chosen": -2.7501890659332275, "logits/rejected": -2.738179922103882, "logps/chosen": -197.78414916992188, "logps/rejected": -191.88436889648438, "loss": 0.9055, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.25792795419692993, "rewards/margins": 0.08034543693065643, "rewards/rejected": -0.3382733464241028, "step": 5430 }, { "epoch": 1.59, "learning_rate": 2.6835609773912903e-07, "logits/chosen": -2.728278160095215, "logits/rejected": -2.7290234565734863, "logps/chosen": -184.92025756835938, "logps/rejected": -178.275390625, "loss": 0.9192, "rewards/accuracies": 0.578125, "rewards/chosen": -0.19944298267364502, "rewards/margins": 0.10552481561899185, "rewards/rejected": -0.30496782064437866, "step": 5440 }, { "epoch": 1.59, "learning_rate": 2.675096638361446e-07, "logits/chosen": -2.720247983932495, "logits/rejected": -2.7001519203186035, "logps/chosen": -200.8359832763672, "logps/rejected": -184.19882202148438, "loss": 0.9261, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.22018194198608398, "rewards/margins": 0.06855995953083038, "rewards/rejected": -0.2887418866157532, "step": 5450 }, { "epoch": 1.59, "learning_rate": 2.666630281781676e-07, "logits/chosen": -2.715918779373169, "logits/rejected": -2.7154223918914795, "logps/chosen": -202.9798126220703, "logps/rejected": -189.34768676757812, "loss": 0.9272, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.22410516440868378, "rewards/margins": 0.09275824576616287, "rewards/rejected": -0.31686341762542725, "step": 5460 }, { "epoch": 1.6, "learning_rate": 2.658162005205522e-07, "logits/chosen": -2.7463111877441406, "logits/rejected": -2.7408008575439453, "logps/chosen": -189.31149291992188, "logps/rejected": -174.1709442138672, "loss": 0.9472, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.21325497329235077, "rewards/margins": 0.0632646232843399, "rewards/rejected": -0.2765195965766907, "step": 5470 }, { "epoch": 1.6, "learning_rate": 2.6496919062086466e-07, "logits/chosen": -2.7319397926330566, "logits/rejected": -2.7326912879943848, "logps/chosen": -213.6542205810547, "logps/rejected": -202.09652709960938, "loss": 0.9157, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.20796532928943634, "rewards/margins": 0.08152662217617035, "rewards/rejected": -0.2894919216632843, "step": 5480 }, { "epoch": 1.6, "learning_rate": 2.641220082387714e-07, "logits/chosen": -2.7371435165405273, "logits/rejected": -2.7322709560394287, "logps/chosen": -189.59552001953125, "logps/rejected": -177.81393432617188, "loss": 0.9354, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.21587204933166504, "rewards/margins": 0.09057654440402985, "rewards/rejected": -0.3064486086368561, "step": 5490 }, { "epoch": 1.6, "learning_rate": 2.6327466313592605e-07, "logits/chosen": -2.7272610664367676, "logits/rejected": -2.755542278289795, "logps/chosen": -177.8417510986328, "logps/rejected": -187.9776153564453, "loss": 0.919, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.20907747745513916, "rewards/margins": 0.09078783541917801, "rewards/rejected": -0.2998653054237366, "step": 5500 }, { "epoch": 1.6, "eval_logits/chosen": -2.6445047855377197, "eval_logits/rejected": -2.6390130519866943, "eval_logps/chosen": -197.66366577148438, "eval_logps/rejected": -183.74652099609375, "eval_loss": 0.9334189295768738, "eval_rewards/accuracies": 0.5810673236846924, "eval_rewards/chosen": -0.2058352380990982, "eval_rewards/margins": 0.07668833434581757, "eval_rewards/rejected": -0.28252357244491577, "eval_runtime": 443.355, "eval_samples_per_second": 26.536, "eval_steps_per_second": 3.318, "step": 5500 }, { "epoch": 1.61, "learning_rate": 2.624271650758574e-07, "logits/chosen": -2.6975483894348145, "logits/rejected": -2.7088265419006348, "logps/chosen": -176.8209991455078, "logps/rejected": -166.51121520996094, "loss": 0.9416, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.21087944507598877, "rewards/margins": 0.05349407345056534, "rewards/rejected": -0.2643735110759735, "step": 5510 }, { "epoch": 1.61, "learning_rate": 2.615795238238565e-07, "logits/chosen": -2.7559120655059814, "logits/rejected": -2.741698741912842, "logps/chosen": -211.35818481445312, "logps/rejected": -183.8773956298828, "loss": 0.9254, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.1826242059469223, "rewards/margins": 0.09974977374076843, "rewards/rejected": -0.28237396478652954, "step": 5520 }, { "epoch": 1.61, "learning_rate": 2.607317491468644e-07, "logits/chosen": -2.738673210144043, "logits/rejected": -2.741772174835205, "logps/chosen": -191.3245086669922, "logps/rejected": -174.14743041992188, "loss": 0.9398, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.1919967234134674, "rewards/margins": 0.0601605661213398, "rewards/rejected": -0.2521572709083557, "step": 5530 }, { "epoch": 1.62, "learning_rate": 2.598838508133596e-07, "logits/chosen": -2.717775821685791, "logits/rejected": -2.7444305419921875, "logps/chosen": -176.27365112304688, "logps/rejected": -180.37942504882812, "loss": 0.9109, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.20145054161548615, "rewards/margins": 0.07873423397541046, "rewards/rejected": -0.2801847755908966, "step": 5540 }, { "epoch": 1.62, "learning_rate": 2.590358385932452e-07, "logits/chosen": -2.752577304840088, "logits/rejected": -2.7411880493164062, "logps/chosen": -188.3993682861328, "logps/rejected": -179.72702026367188, "loss": 0.9297, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20420292019844055, "rewards/margins": 0.07471559941768646, "rewards/rejected": -0.27891847491264343, "step": 5550 }, { "epoch": 1.62, "learning_rate": 2.5818772225773704e-07, "logits/chosen": -2.740549087524414, "logits/rejected": -2.7294859886169434, "logps/chosen": -214.0988311767578, "logps/rejected": -194.05979919433594, "loss": 0.9154, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.1923362910747528, "rewards/margins": 0.09265486896038055, "rewards/rejected": -0.28499117493629456, "step": 5560 }, { "epoch": 1.62, "learning_rate": 2.5733951157924997e-07, "logits/chosen": -2.7381107807159424, "logits/rejected": -2.731104612350464, "logps/chosen": -178.1517791748047, "logps/rejected": -169.85348510742188, "loss": 0.935, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.2057117521762848, "rewards/margins": 0.0582122728228569, "rewards/rejected": -0.2639240622520447, "step": 5570 }, { "epoch": 1.63, "learning_rate": 2.5649121633128656e-07, "logits/chosen": -2.7527637481689453, "logits/rejected": -2.7482333183288574, "logps/chosen": -202.63143920898438, "logps/rejected": -194.18113708496094, "loss": 0.9452, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.22132262587547302, "rewards/margins": 0.06279505789279938, "rewards/rejected": -0.2841176986694336, "step": 5580 }, { "epoch": 1.63, "learning_rate": 2.556428462883232e-07, "logits/chosen": -2.724813222885132, "logits/rejected": -2.7437031269073486, "logps/chosen": -188.14248657226562, "logps/rejected": -192.07470703125, "loss": 0.9244, "rewards/accuracies": 0.6343749761581421, "rewards/chosen": -0.22033333778381348, "rewards/margins": 0.08756639808416367, "rewards/rejected": -0.30789971351623535, "step": 5590 }, { "epoch": 1.63, "learning_rate": 2.5479441122569874e-07, "logits/chosen": -2.7545359134674072, "logits/rejected": -2.745806932449341, "logps/chosen": -206.4933319091797, "logps/rejected": -184.64808654785156, "loss": 0.9297, "rewards/accuracies": 0.578125, "rewards/chosen": -0.21928980946540833, "rewards/margins": 0.07845072448253632, "rewards/rejected": -0.29774051904678345, "step": 5600 }, { "epoch": 1.63, "eval_logits/chosen": -2.6471898555755615, "eval_logits/rejected": -2.64178466796875, "eval_logps/chosen": -197.658203125, "eval_logps/rejected": -183.7437286376953, "eval_loss": 0.9340550303459167, "eval_rewards/accuracies": 0.5793677568435669, "eval_rewards/chosen": -0.20529019832611084, "eval_rewards/margins": 0.07695318758487701, "eval_rewards/rejected": -0.28224337100982666, "eval_runtime": 443.3931, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 5600 }, { "epoch": 1.64, "learning_rate": 2.539459209195007e-07, "logits/chosen": -2.733870267868042, "logits/rejected": -2.7074484825134277, "logps/chosen": -214.43017578125, "logps/rejected": -184.8483428955078, "loss": 0.9295, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.19600990414619446, "rewards/margins": 0.07425862550735474, "rewards/rejected": -0.2702685296535492, "step": 5610 }, { "epoch": 1.64, "learning_rate": 2.530973851464535e-07, "logits/chosen": -2.7479662895202637, "logits/rejected": -2.757333755493164, "logps/chosen": -193.40345764160156, "logps/rejected": -175.7198028564453, "loss": 0.9435, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.21974997222423553, "rewards/margins": 0.0870782732963562, "rewards/rejected": -0.30682826042175293, "step": 5620 }, { "epoch": 1.64, "learning_rate": 2.5224881368380513e-07, "logits/chosen": -2.7397866249084473, "logits/rejected": -2.738558292388916, "logps/chosen": -198.94163513183594, "logps/rejected": -190.87486267089844, "loss": 0.9213, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.18970100581645966, "rewards/margins": 0.09227032959461212, "rewards/rejected": -0.2819713056087494, "step": 5630 }, { "epoch": 1.65, "learning_rate": 2.514002163092152e-07, "logits/chosen": -2.722203016281128, "logits/rejected": -2.7369751930236816, "logps/chosen": -216.5584259033203, "logps/rejected": -201.0596466064453, "loss": 0.9286, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.2221214771270752, "rewards/margins": 0.07766957581043243, "rewards/rejected": -0.29979103803634644, "step": 5640 }, { "epoch": 1.65, "learning_rate": 2.5055160280064145e-07, "logits/chosen": -2.728950023651123, "logits/rejected": -2.7567801475524902, "logps/chosen": -175.72958374023438, "logps/rejected": -188.25039672851562, "loss": 0.9278, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.23196330666542053, "rewards/margins": 0.06436417996883392, "rewards/rejected": -0.2963274419307709, "step": 5650 }, { "epoch": 1.65, "learning_rate": 2.497029829362279e-07, "logits/chosen": -2.701756000518799, "logits/rejected": -2.7560033798217773, "logps/chosen": -176.56668090820312, "logps/rejected": -190.84999084472656, "loss": 0.9211, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.19433894753456116, "rewards/margins": 0.09112901985645294, "rewards/rejected": -0.2854679524898529, "step": 5660 }, { "epoch": 1.65, "learning_rate": 2.488543664941916e-07, "logits/chosen": -2.7353200912475586, "logits/rejected": -2.7525343894958496, "logps/chosen": -192.02662658691406, "logps/rejected": -188.41783142089844, "loss": 0.9235, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.20639260113239288, "rewards/margins": 0.10132446140050888, "rewards/rejected": -0.30771705508232117, "step": 5670 }, { "epoch": 1.66, "learning_rate": 2.480057632527103e-07, "logits/chosen": -2.732024908065796, "logits/rejected": -2.7357800006866455, "logps/chosen": -174.36900329589844, "logps/rejected": -170.44863891601562, "loss": 0.9486, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.22138449549674988, "rewards/margins": 0.05923574045300484, "rewards/rejected": -0.2806202471256256, "step": 5680 }, { "epoch": 1.66, "learning_rate": 2.471571829898095e-07, "logits/chosen": -2.7303781509399414, "logits/rejected": -2.750192880630493, "logps/chosen": -205.6610565185547, "logps/rejected": -206.914794921875, "loss": 0.919, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.21082866191864014, "rewards/margins": 0.08251901715993881, "rewards/rejected": -0.29334768652915955, "step": 5690 }, { "epoch": 1.66, "learning_rate": 2.4630863548325e-07, "logits/chosen": -2.735438585281372, "logits/rejected": -2.7419936656951904, "logps/chosen": -188.0749969482422, "logps/rejected": -184.5003204345703, "loss": 0.9174, "rewards/accuracies": 0.609375, "rewards/chosen": -0.196400448679924, "rewards/margins": 0.1187920793890953, "rewards/rejected": -0.3151925206184387, "step": 5700 }, { "epoch": 1.66, "eval_logits/chosen": -2.654500961303711, "eval_logits/rejected": -2.6492364406585693, "eval_logps/chosen": -197.672607421875, "eval_logps/rejected": -183.75535583496094, "eval_loss": 0.9332903027534485, "eval_rewards/accuracies": 0.580047607421875, "eval_rewards/chosen": -0.20673073828220367, "eval_rewards/margins": 0.07667768001556396, "eval_rewards/rejected": -0.28340843319892883, "eval_runtime": 443.3386, "eval_samples_per_second": 26.537, "eval_steps_per_second": 3.318, "step": 5700 }, { "epoch": 1.67, "learning_rate": 2.4546013051041514e-07, "logits/chosen": -2.7554385662078857, "logits/rejected": -2.755969524383545, "logps/chosen": -189.93936157226562, "logps/rejected": -173.48239135742188, "loss": 0.9304, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.20026805996894836, "rewards/margins": 0.09992248564958572, "rewards/rejected": -0.3001905381679535, "step": 5710 }, { "epoch": 1.67, "learning_rate": 2.4461167784819827e-07, "logits/chosen": -2.728515625, "logits/rejected": -2.7146217823028564, "logps/chosen": -191.40211486816406, "logps/rejected": -182.94676208496094, "loss": 0.9361, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21724525094032288, "rewards/margins": 0.07030661404132843, "rewards/rejected": -0.2875518202781677, "step": 5720 }, { "epoch": 1.67, "learning_rate": 2.4376328727288974e-07, "logits/chosen": -2.7447657585144043, "logits/rejected": -2.751224994659424, "logps/chosen": -178.4643096923828, "logps/rejected": -178.66090393066406, "loss": 0.9383, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2255851775407791, "rewards/margins": 0.05275057628750801, "rewards/rejected": -0.2783357501029968, "step": 5730 }, { "epoch": 1.67, "learning_rate": 2.429149685600648e-07, "logits/chosen": -2.729738473892212, "logits/rejected": -2.698761224746704, "logps/chosen": -183.81948852539062, "logps/rejected": -174.73214721679688, "loss": 0.9379, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.21086111664772034, "rewards/margins": 0.05557774752378464, "rewards/rejected": -0.26643887162208557, "step": 5740 }, { "epoch": 1.68, "learning_rate": 2.4206673148447066e-07, "logits/chosen": -2.726379871368408, "logits/rejected": -2.706997871398926, "logps/chosen": -181.8163604736328, "logps/rejected": -158.59722900390625, "loss": 0.9467, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.19309577345848083, "rewards/margins": 0.061171580106019974, "rewards/rejected": -0.2542673647403717, "step": 5750 }, { "epoch": 1.68, "learning_rate": 2.4121858581991353e-07, "logits/chosen": -2.734316110610962, "logits/rejected": -2.7303547859191895, "logps/chosen": -194.5187530517578, "logps/rejected": -185.96414184570312, "loss": 0.9143, "rewards/accuracies": 0.578125, "rewards/chosen": -0.20588722825050354, "rewards/margins": 0.0946664959192276, "rewards/rejected": -0.30055373907089233, "step": 5760 }, { "epoch": 1.68, "learning_rate": 2.403705413391467e-07, "logits/chosen": -2.7414212226867676, "logits/rejected": -2.7207446098327637, "logps/chosen": -213.69198608398438, "logps/rejected": -181.99899291992188, "loss": 0.928, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2160993367433548, "rewards/margins": 0.0850725769996643, "rewards/rejected": -0.3011718988418579, "step": 5770 }, { "epoch": 1.69, "learning_rate": 2.3952260781375726e-07, "logits/chosen": -2.7480435371398926, "logits/rejected": -2.7473959922790527, "logps/chosen": -205.23489379882812, "logps/rejected": -193.68539428710938, "loss": 0.9355, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.211337611079216, "rewards/margins": 0.06998740881681442, "rewards/rejected": -0.28132501244544983, "step": 5780 }, { "epoch": 1.69, "learning_rate": 2.386747950140541e-07, "logits/chosen": -2.7476816177368164, "logits/rejected": -2.7682197093963623, "logps/chosen": -200.57284545898438, "logps/rejected": -198.715087890625, "loss": 0.9177, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.20661315321922302, "rewards/margins": 0.1073889508843422, "rewards/rejected": -0.3140020966529846, "step": 5790 }, { "epoch": 1.69, "learning_rate": 2.3782711270895492e-07, "logits/chosen": -2.7584152221679688, "logits/rejected": -2.748316764831543, "logps/chosen": -203.68675231933594, "logps/rejected": -195.41549682617188, "loss": 0.9275, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2284468412399292, "rewards/margins": 0.08097778260707855, "rewards/rejected": -0.30942457914352417, "step": 5800 }, { "epoch": 1.69, "eval_logits/chosen": -2.6524062156677246, "eval_logits/rejected": -2.647106647491455, "eval_logps/chosen": -197.66415405273438, "eval_logps/rejected": -183.74755859375, "eval_loss": 0.9332142472267151, "eval_rewards/accuracies": 0.5759687423706055, "eval_rewards/chosen": -0.20588359236717224, "eval_rewards/margins": 0.07674256712198257, "eval_rewards/rejected": -0.2826261818408966, "eval_runtime": 443.3985, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 5800 }, { "epoch": 1.69, "learning_rate": 2.3697957066587383e-07, "logits/chosen": -2.7346596717834473, "logits/rejected": -2.7185769081115723, "logps/chosen": -200.0314178466797, "logps/rejected": -178.08457946777344, "loss": 0.9587, "rewards/accuracies": 0.47187501192092896, "rewards/chosen": -0.2539510726928711, "rewards/margins": 0.03835904970765114, "rewards/rejected": -0.29231011867523193, "step": 5810 }, { "epoch": 1.7, "learning_rate": 2.3613217865060852e-07, "logits/chosen": -2.7300784587860107, "logits/rejected": -2.708784818649292, "logps/chosen": -197.45974731445312, "logps/rejected": -175.445556640625, "loss": 0.9341, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.23136886954307556, "rewards/margins": 0.04439845681190491, "rewards/rejected": -0.27576732635498047, "step": 5820 }, { "epoch": 1.7, "learning_rate": 2.352849464272285e-07, "logits/chosen": -2.74354887008667, "logits/rejected": -2.7220675945281982, "logps/chosen": -223.2447509765625, "logps/rejected": -192.660400390625, "loss": 0.929, "rewards/accuracies": 0.578125, "rewards/chosen": -0.2157532423734665, "rewards/margins": 0.08507034182548523, "rewards/rejected": -0.3008235991001129, "step": 5830 }, { "epoch": 1.7, "learning_rate": 2.3443788375796174e-07, "logits/chosen": -2.7344906330108643, "logits/rejected": -2.7282562255859375, "logps/chosen": -220.3262176513672, "logps/rejected": -205.54330444335938, "loss": 0.9263, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.24231091141700745, "rewards/margins": 0.062385208904743195, "rewards/rejected": -0.30469608306884766, "step": 5840 }, { "epoch": 1.71, "learning_rate": 2.3359100040308243e-07, "logits/chosen": -2.7421112060546875, "logits/rejected": -2.723379135131836, "logps/chosen": -198.9661407470703, "logps/rejected": -178.9770965576172, "loss": 0.9394, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.2452888786792755, "rewards/margins": 0.04774923250079155, "rewards/rejected": -0.29303810000419617, "step": 5850 }, { "epoch": 1.71, "learning_rate": 2.3274430612079892e-07, "logits/chosen": -2.7680325508117676, "logits/rejected": -2.7334728240966797, "logps/chosen": -216.655517578125, "logps/rejected": -190.80474853515625, "loss": 0.9297, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2257642298936844, "rewards/margins": 0.07767541706562042, "rewards/rejected": -0.3034396171569824, "step": 5860 }, { "epoch": 1.71, "learning_rate": 2.318978106671407e-07, "logits/chosen": -2.740093231201172, "logits/rejected": -2.7320263385772705, "logps/chosen": -188.4129180908203, "logps/rejected": -181.80380249023438, "loss": 0.9246, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22893789410591125, "rewards/margins": 0.065252386033535, "rewards/rejected": -0.29419028759002686, "step": 5870 }, { "epoch": 1.72, "learning_rate": 2.3105152379584642e-07, "logits/chosen": -2.7359161376953125, "logits/rejected": -2.7144532203674316, "logps/chosen": -198.8520965576172, "logps/rejected": -170.20571899414062, "loss": 0.9385, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.1944771260023117, "rewards/margins": 0.07553229480981827, "rewards/rejected": -0.27000942826271057, "step": 5880 }, { "epoch": 1.72, "learning_rate": 2.3020545525825119e-07, "logits/chosen": -2.7251765727996826, "logits/rejected": -2.7204253673553467, "logps/chosen": -212.84732055664062, "logps/rejected": -192.09767150878906, "loss": 0.9052, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.1890733242034912, "rewards/margins": 0.10702107846736908, "rewards/rejected": -0.2960943877696991, "step": 5890 }, { "epoch": 1.72, "learning_rate": 2.2935961480317463e-07, "logits/chosen": -2.71047306060791, "logits/rejected": -2.7011935710906982, "logps/chosen": -199.55006408691406, "logps/rejected": -189.4619140625, "loss": 0.9164, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.21190086007118225, "rewards/margins": 0.0709785670042038, "rewards/rejected": -0.28287941217422485, "step": 5900 }, { "epoch": 1.72, "eval_logits/chosen": -2.644160270690918, "eval_logits/rejected": -2.638662576675415, "eval_logps/chosen": -197.6846923828125, "eval_logps/rejected": -183.78807067871094, "eval_loss": 0.9320964217185974, "eval_rewards/accuracies": 0.580897331237793, "eval_rewards/chosen": -0.20793870091438293, "eval_rewards/margins": 0.07873953133821487, "eval_rewards/rejected": -0.2866782546043396, "eval_runtime": 443.3868, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 5900 }, { "epoch": 1.72, "learning_rate": 2.2851401217680788e-07, "logits/chosen": -2.7377264499664307, "logits/rejected": -2.739065408706665, "logps/chosen": -185.49278259277344, "logps/rejected": -182.3894500732422, "loss": 0.9262, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.21655301749706268, "rewards/margins": 0.05303264781832695, "rewards/rejected": -0.26958566904067993, "step": 5910 }, { "epoch": 1.73, "learning_rate": 2.2766865712260217e-07, "logits/chosen": -2.7535338401794434, "logits/rejected": -2.753856897354126, "logps/chosen": -199.83055114746094, "logps/rejected": -184.28892517089844, "loss": 0.9271, "rewards/accuracies": 0.578125, "rewards/chosen": -0.21020498871803284, "rewards/margins": 0.10383982956409454, "rewards/rejected": -0.3140447735786438, "step": 5920 }, { "epoch": 1.73, "learning_rate": 2.2682355938115583e-07, "logits/chosen": -2.744236469268799, "logits/rejected": -2.752063035964966, "logps/chosen": -203.16470336914062, "logps/rejected": -185.2848663330078, "loss": 0.9166, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.20815984904766083, "rewards/margins": 0.0902753472328186, "rewards/rejected": -0.2984352111816406, "step": 5930 }, { "epoch": 1.73, "learning_rate": 2.2597872869010218e-07, "logits/chosen": -2.753807544708252, "logits/rejected": -2.7454967498779297, "logps/chosen": -215.274169921875, "logps/rejected": -195.089599609375, "loss": 0.903, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.21169881522655487, "rewards/margins": 0.11044590175151825, "rewards/rejected": -0.3221447467803955, "step": 5940 }, { "epoch": 1.74, "learning_rate": 2.2513417478399777e-07, "logits/chosen": -2.735591411590576, "logits/rejected": -2.7258079051971436, "logps/chosen": -168.94375610351562, "logps/rejected": -160.7762451171875, "loss": 0.924, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.21011146903038025, "rewards/margins": 0.07576417922973633, "rewards/rejected": -0.2858756184577942, "step": 5950 }, { "epoch": 1.74, "learning_rate": 2.2428990739420954e-07, "logits/chosen": -2.7638401985168457, "logits/rejected": -2.732102870941162, "logps/chosen": -205.1365203857422, "logps/rejected": -182.8333740234375, "loss": 0.9109, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.20985054969787598, "rewards/margins": 0.0838356763124466, "rewards/rejected": -0.2936862111091614, "step": 5960 }, { "epoch": 1.74, "learning_rate": 2.2344593624880342e-07, "logits/chosen": -2.708482503890991, "logits/rejected": -2.703840732574463, "logps/chosen": -203.40245056152344, "logps/rejected": -182.5862579345703, "loss": 0.9505, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21254822611808777, "rewards/margins": 0.05914415791630745, "rewards/rejected": -0.2716923952102661, "step": 5970 }, { "epoch": 1.74, "learning_rate": 2.2260227107243154e-07, "logits/chosen": -2.7273995876312256, "logits/rejected": -2.7421963214874268, "logps/chosen": -190.72694396972656, "logps/rejected": -188.5596923828125, "loss": 0.9285, "rewards/accuracies": 0.546875, "rewards/chosen": -0.2123124897480011, "rewards/margins": 0.06590452790260315, "rewards/rejected": -0.27821698784828186, "step": 5980 }, { "epoch": 1.75, "learning_rate": 2.2175892158622075e-07, "logits/chosen": -2.7460885047912598, "logits/rejected": -2.7639546394348145, "logps/chosen": -198.0811767578125, "logps/rejected": -194.3120880126953, "loss": 0.9193, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.20485849678516388, "rewards/margins": 0.12061629444360733, "rewards/rejected": -0.3254747986793518, "step": 5990 }, { "epoch": 1.75, "learning_rate": 2.209158975076601e-07, "logits/chosen": -2.71342134475708, "logits/rejected": -2.717883348464966, "logps/chosen": -199.0811309814453, "logps/rejected": -189.3131561279297, "loss": 0.9218, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.2159605324268341, "rewards/margins": 0.06497268378734589, "rewards/rejected": -0.2809332013130188, "step": 6000 }, { "epoch": 1.75, "eval_logits/chosen": -2.643186092376709, "eval_logits/rejected": -2.6376824378967285, "eval_logps/chosen": -197.700439453125, "eval_logps/rejected": -183.7935028076172, "eval_loss": 0.9321945905685425, "eval_rewards/accuracies": 0.5786879658699036, "eval_rewards/chosen": -0.20951364934444427, "eval_rewards/margins": 0.07770907878875732, "eval_rewards/rejected": -0.287222683429718, "eval_runtime": 443.3821, "eval_samples_per_second": 26.535, "eval_steps_per_second": 3.318, "step": 6000 }, { "epoch": 1.75, "learning_rate": 2.2007320855048941e-07, "logits/chosen": -2.7461142539978027, "logits/rejected": -2.731106996536255, "logps/chosen": -196.85745239257812, "logps/rejected": -179.5032501220703, "loss": 0.9265, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2143532782793045, "rewards/margins": 0.06113836169242859, "rewards/rejected": -0.2754916250705719, "step": 6010 }, { "epoch": 1.76, "learning_rate": 2.1923086442458701e-07, "logits/chosen": -2.7388505935668945, "logits/rejected": -2.7120919227600098, "logps/chosen": -199.21661376953125, "logps/rejected": -177.04495239257812, "loss": 0.9123, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.18536700308322906, "rewards/margins": 0.08062330633401871, "rewards/rejected": -0.26599031686782837, "step": 6020 }, { "epoch": 1.76, "learning_rate": 2.1838887483585773e-07, "logits/chosen": -2.741877555847168, "logits/rejected": -2.7501578330993652, "logps/chosen": -189.5054931640625, "logps/rejected": -181.48829650878906, "loss": 0.9361, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22205829620361328, "rewards/margins": 0.054470986127853394, "rewards/rejected": -0.2765292823314667, "step": 6030 }, { "epoch": 1.76, "learning_rate": 2.175472494861214e-07, "logits/chosen": -2.741569757461548, "logits/rejected": -2.7047781944274902, "logps/chosen": -217.58944702148438, "logps/rejected": -188.43344116210938, "loss": 0.9283, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20472857356071472, "rewards/margins": 0.07321996986865997, "rewards/rejected": -0.2779485285282135, "step": 6040 }, { "epoch": 1.76, "learning_rate": 2.1670599807300082e-07, "logits/chosen": -2.7186341285705566, "logits/rejected": -2.7301604747772217, "logps/chosen": -196.19149780273438, "logps/rejected": -194.16751098632812, "loss": 0.9283, "rewards/accuracies": 0.53125, "rewards/chosen": -0.2274094521999359, "rewards/margins": 0.05695997551083565, "rewards/rejected": -0.28436940908432007, "step": 6050 }, { "epoch": 1.77, "learning_rate": 2.158651302898103e-07, "logits/chosen": -2.7257373332977295, "logits/rejected": -2.724644184112549, "logps/chosen": -194.86843872070312, "logps/rejected": -180.59100341796875, "loss": 0.9054, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.18583182990550995, "rewards/margins": 0.10537779331207275, "rewards/rejected": -0.2912096381187439, "step": 6060 }, { "epoch": 1.77, "learning_rate": 2.1502465582544348e-07, "logits/chosen": -2.722172975540161, "logits/rejected": -2.74467134475708, "logps/chosen": -190.9675750732422, "logps/rejected": -196.57125854492188, "loss": 0.9115, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.2208135426044464, "rewards/margins": 0.0937967300415039, "rewards/rejected": -0.3146103024482727, "step": 6070 }, { "epoch": 1.77, "learning_rate": 2.1418458436426227e-07, "logits/chosen": -2.7449307441711426, "logits/rejected": -2.7329206466674805, "logps/chosen": -209.1360626220703, "logps/rejected": -187.81834411621094, "loss": 0.8947, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.20917844772338867, "rewards/margins": 0.10158956050872803, "rewards/rejected": -0.3107679784297943, "step": 6080 }, { "epoch": 1.78, "learning_rate": 2.1334492558598467e-07, "logits/chosen": -2.725964307785034, "logits/rejected": -2.7144277095794678, "logps/chosen": -198.27955627441406, "logps/rejected": -182.8297119140625, "loss": 0.9358, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20703771710395813, "rewards/margins": 0.0838475450873375, "rewards/rejected": -0.2908852696418762, "step": 6090 }, { "epoch": 1.78, "learning_rate": 2.1250568916557394e-07, "logits/chosen": -2.711625337600708, "logits/rejected": -2.675715208053589, "logps/chosen": -194.16815185546875, "logps/rejected": -163.89535522460938, "loss": 0.944, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.23656761646270752, "rewards/margins": 0.017660627141594887, "rewards/rejected": -0.2542282044887543, "step": 6100 }, { "epoch": 1.78, "eval_logits/chosen": -2.660655975341797, "eval_logits/rejected": -2.6554903984069824, "eval_logps/chosen": -197.71176147460938, "eval_logps/rejected": -183.81625366210938, "eval_loss": 0.9318803548812866, "eval_rewards/accuracies": 0.5822569727897644, "eval_rewards/chosen": -0.2106441855430603, "eval_rewards/margins": 0.07885365933179855, "eval_rewards/rejected": -0.28949788212776184, "eval_runtime": 443.3707, "eval_samples_per_second": 26.535, "eval_steps_per_second": 3.318, "step": 6100 }, { "epoch": 1.78, "learning_rate": 2.1166688477312648e-07, "logits/chosen": -2.7371768951416016, "logits/rejected": -2.739062547683716, "logps/chosen": -187.1644744873047, "logps/rejected": -182.38119506835938, "loss": 0.9194, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.23588745296001434, "rewards/margins": 0.09019680321216583, "rewards/rejected": -0.3260842561721802, "step": 6110 }, { "epoch": 1.79, "learning_rate": 2.1082852207376056e-07, "logits/chosen": -2.7473931312561035, "logits/rejected": -2.745567798614502, "logps/chosen": -193.79232788085938, "logps/rejected": -191.493408203125, "loss": 0.9054, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.20973005890846252, "rewards/margins": 0.09154538810253143, "rewards/rejected": -0.30127543210983276, "step": 6120 }, { "epoch": 1.79, "learning_rate": 2.0999061072750527e-07, "logits/chosen": -2.766697645187378, "logits/rejected": -2.7617154121398926, "logps/chosen": -204.7694854736328, "logps/rejected": -187.0171661376953, "loss": 0.9399, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.22126245498657227, "rewards/margins": 0.0732002705335617, "rewards/rejected": -0.2944626808166504, "step": 6130 }, { "epoch": 1.79, "learning_rate": 2.091531603891888e-07, "logits/chosen": -2.7624351978302, "logits/rejected": -2.7651846408843994, "logps/chosen": -225.45278930664062, "logps/rejected": -211.8724822998047, "loss": 0.9139, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2010863572359085, "rewards/margins": 0.10098360478878021, "rewards/rejected": -0.3020699620246887, "step": 6140 }, { "epoch": 1.79, "learning_rate": 2.0831618070832756e-07, "logits/chosen": -2.7233376502990723, "logits/rejected": -2.733025074005127, "logps/chosen": -178.06814575195312, "logps/rejected": -194.65530395507812, "loss": 0.9346, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23188039660453796, "rewards/margins": 0.05588630586862564, "rewards/rejected": -0.2877667248249054, "step": 6150 }, { "epoch": 1.8, "learning_rate": 2.0747968132901455e-07, "logits/chosen": -2.7171859741210938, "logits/rejected": -2.7284810543060303, "logps/chosen": -191.13034057617188, "logps/rejected": -176.52029418945312, "loss": 0.929, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.2055968940258026, "rewards/margins": 0.1095929741859436, "rewards/rejected": -0.3151898980140686, "step": 6160 }, { "epoch": 1.8, "learning_rate": 2.066436718898089e-07, "logits/chosen": -2.73824405670166, "logits/rejected": -2.745525360107422, "logps/chosen": -183.75559997558594, "logps/rejected": -182.84054565429688, "loss": 0.8989, "rewards/accuracies": 0.609375, "rewards/chosen": -0.1956663727760315, "rewards/margins": 0.09247630089521408, "rewards/rejected": -0.28814268112182617, "step": 6170 }, { "epoch": 1.8, "learning_rate": 2.0580816202362393e-07, "logits/chosen": -2.7328238487243652, "logits/rejected": -2.727752685546875, "logps/chosen": -198.67794799804688, "logps/rejected": -198.55172729492188, "loss": 0.9179, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.21394948661327362, "rewards/margins": 0.07331383973360062, "rewards/rejected": -0.28726333379745483, "step": 6180 }, { "epoch": 1.81, "learning_rate": 2.0497316135761699e-07, "logits/chosen": -2.7127814292907715, "logits/rejected": -2.739947557449341, "logps/chosen": -198.18446350097656, "logps/rejected": -204.63540649414062, "loss": 0.9277, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.20710286498069763, "rewards/margins": 0.11847794055938721, "rewards/rejected": -0.3255808353424072, "step": 6190 }, { "epoch": 1.81, "learning_rate": 2.041386795130781e-07, "logits/chosen": -2.71413516998291, "logits/rejected": -2.7128262519836426, "logps/chosen": -217.21572875976562, "logps/rejected": -203.00741577148438, "loss": 0.9037, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.1752261370420456, "rewards/margins": 0.11408879607915878, "rewards/rejected": -0.28931495547294617, "step": 6200 }, { "epoch": 1.81, "eval_logits/chosen": -2.6512789726257324, "eval_logits/rejected": -2.645932912826538, "eval_logps/chosen": -197.71023559570312, "eval_logps/rejected": -183.81349182128906, "eval_loss": 0.9323169589042664, "eval_rewards/accuracies": 0.5780081748962402, "eval_rewards/chosen": -0.2104932814836502, "eval_rewards/margins": 0.07872689515352249, "eval_rewards/rejected": -0.2892201840877533, "eval_runtime": 443.4189, "eval_samples_per_second": 26.532, "eval_steps_per_second": 3.317, "step": 6200 }, { "epoch": 1.81, "learning_rate": 2.0330472610531904e-07, "logits/chosen": -2.7172884941101074, "logits/rejected": -2.714123249053955, "logps/chosen": -202.11622619628906, "logps/rejected": -190.3927001953125, "loss": 0.9249, "rewards/accuracies": 0.578125, "rewards/chosen": -0.2016797512769699, "rewards/margins": 0.10873542726039886, "rewards/rejected": -0.3104151785373688, "step": 6210 }, { "epoch": 1.81, "learning_rate": 2.0247131074356282e-07, "logits/chosen": -2.719733476638794, "logits/rejected": -2.744614362716675, "logps/chosen": -203.67276000976562, "logps/rejected": -190.30633544921875, "loss": 0.9149, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.24380454421043396, "rewards/margins": 0.08631626516580582, "rewards/rejected": -0.3301208019256592, "step": 6220 }, { "epoch": 1.82, "learning_rate": 2.016384430308327e-07, "logits/chosen": -2.758436918258667, "logits/rejected": -2.7610437870025635, "logps/chosen": -192.81202697753906, "logps/rejected": -178.98318481445312, "loss": 0.9235, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.2031092643737793, "rewards/margins": 0.08105708658695221, "rewards/rejected": -0.2841663360595703, "step": 6230 }, { "epoch": 1.82, "learning_rate": 2.0080613256384176e-07, "logits/chosen": -2.709527015686035, "logits/rejected": -2.7233712673187256, "logps/chosen": -200.44332885742188, "logps/rejected": -185.1295928955078, "loss": 0.939, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.21839790046215057, "rewards/margins": 0.06442956626415253, "rewards/rejected": -0.2828274965286255, "step": 6240 }, { "epoch": 1.82, "learning_rate": 1.9997438893288206e-07, "logits/chosen": -2.7478461265563965, "logits/rejected": -2.7245230674743652, "logps/chosen": -203.77633666992188, "logps/rejected": -185.59506225585938, "loss": 0.9275, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.2087053805589676, "rewards/margins": 0.04750724509358406, "rewards/rejected": -0.25621265172958374, "step": 6250 }, { "epoch": 1.83, "learning_rate": 1.991432217217147e-07, "logits/chosen": -2.74981951713562, "logits/rejected": -2.730909585952759, "logps/chosen": -208.09963989257812, "logps/rejected": -191.15164184570312, "loss": 0.9082, "rewards/accuracies": 0.625, "rewards/chosen": -0.21967999637126923, "rewards/margins": 0.09564986824989319, "rewards/rejected": -0.3153298795223236, "step": 6260 }, { "epoch": 1.83, "learning_rate": 1.9831264050745831e-07, "logits/chosen": -2.753127336502075, "logits/rejected": -2.7233431339263916, "logps/chosen": -206.69058227539062, "logps/rejected": -189.58457946777344, "loss": 0.946, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.24597451090812683, "rewards/margins": 0.040549568831920624, "rewards/rejected": -0.28652408719062805, "step": 6270 }, { "epoch": 1.83, "learning_rate": 1.9748265486048003e-07, "logits/chosen": -2.734738826751709, "logits/rejected": -2.7540624141693115, "logps/chosen": -190.40771484375, "logps/rejected": -191.31874084472656, "loss": 0.8927, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.17388156056404114, "rewards/margins": 0.12894825637340546, "rewards/rejected": -0.3028298020362854, "step": 6280 }, { "epoch": 1.83, "learning_rate": 1.9665327434428424e-07, "logits/chosen": -2.7537999153137207, "logits/rejected": -2.7414052486419678, "logps/chosen": -188.02294921875, "logps/rejected": -189.0082550048828, "loss": 0.928, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.22517235577106476, "rewards/margins": 0.09064620733261108, "rewards/rejected": -0.31581854820251465, "step": 6290 }, { "epoch": 1.84, "learning_rate": 1.9582450851540278e-07, "logits/chosen": -2.729881763458252, "logits/rejected": -2.697326183319092, "logps/chosen": -200.64572143554688, "logps/rejected": -184.64097595214844, "loss": 0.929, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.19603231549263, "rewards/margins": 0.0747486799955368, "rewards/rejected": -0.2707809805870056, "step": 6300 }, { "epoch": 1.84, "eval_logits/chosen": -2.6499528884887695, "eval_logits/rejected": -2.644568681716919, "eval_logps/chosen": -197.71945190429688, "eval_logps/rejected": -183.8264923095703, "eval_loss": 0.9321011304855347, "eval_rewards/accuracies": 0.5773283243179321, "eval_rewards/chosen": -0.21141472458839417, "eval_rewards/margins": 0.07910703122615814, "eval_rewards/rejected": -0.2905217409133911, "eval_runtime": 443.3682, "eval_samples_per_second": 26.536, "eval_steps_per_second": 3.318, "step": 6300 }, { "epoch": 1.84, "learning_rate": 1.9499636692328477e-07, "logits/chosen": -2.729348659515381, "logits/rejected": -2.734265089035034, "logps/chosen": -198.71798706054688, "logps/rejected": -195.2779083251953, "loss": 0.9228, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.24377970397472382, "rewards/margins": 0.06538959592580795, "rewards/rejected": -0.30916929244995117, "step": 6310 }, { "epoch": 1.84, "learning_rate": 1.9416885911018648e-07, "logits/chosen": -2.7466068267822266, "logits/rejected": -2.7390589714050293, "logps/chosen": -212.66635131835938, "logps/rejected": -198.4500274658203, "loss": 0.9558, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.2629418969154358, "rewards/margins": 0.041651926934719086, "rewards/rejected": -0.30459386110305786, "step": 6320 }, { "epoch": 1.85, "learning_rate": 1.9334199461106165e-07, "logits/chosen": -2.746856212615967, "logits/rejected": -2.741549253463745, "logps/chosen": -208.10165405273438, "logps/rejected": -191.68942260742188, "loss": 0.9406, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.20201078057289124, "rewards/margins": 0.09430360794067383, "rewards/rejected": -0.29631438851356506, "step": 6330 }, { "epoch": 1.85, "learning_rate": 1.9251578295345113e-07, "logits/chosen": -2.724238634109497, "logits/rejected": -2.7157511711120605, "logps/chosen": -202.83360290527344, "logps/rejected": -182.34056091308594, "loss": 0.9438, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.20443248748779297, "rewards/margins": 0.05801036208868027, "rewards/rejected": -0.26244282722473145, "step": 6340 }, { "epoch": 1.85, "learning_rate": 1.9169023365737392e-07, "logits/chosen": -2.766662120819092, "logits/rejected": -2.779308319091797, "logps/chosen": -196.58349609375, "logps/rejected": -195.30563354492188, "loss": 0.9444, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.23755855858325958, "rewards/margins": 0.06650666147470474, "rewards/rejected": -0.3040652573108673, "step": 6350 }, { "epoch": 1.86, "learning_rate": 1.9086535623521626e-07, "logits/chosen": -2.7374839782714844, "logits/rejected": -2.723292112350464, "logps/chosen": -210.5731964111328, "logps/rejected": -171.655029296875, "loss": 0.9246, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19213752448558807, "rewards/margins": 0.1119011640548706, "rewards/rejected": -0.3040386736392975, "step": 6360 }, { "epoch": 1.86, "learning_rate": 1.900411601916234e-07, "logits/chosen": -2.716977834701538, "logits/rejected": -2.726975917816162, "logps/chosen": -188.70948791503906, "logps/rejected": -186.00656127929688, "loss": 0.9243, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.22361302375793457, "rewards/margins": 0.07853808254003525, "rewards/rejected": -0.3021511137485504, "step": 6370 }, { "epoch": 1.86, "learning_rate": 1.8921765502338905e-07, "logits/chosen": -2.758565902709961, "logits/rejected": -2.7778704166412354, "logps/chosen": -205.9553680419922, "logps/rejected": -211.300537109375, "loss": 0.9231, "rewards/accuracies": 0.59375, "rewards/chosen": -0.2435857355594635, "rewards/margins": 0.08927027136087418, "rewards/rejected": -0.33285602927207947, "step": 6380 }, { "epoch": 1.86, "learning_rate": 1.8839485021934633e-07, "logits/chosen": -2.7537710666656494, "logits/rejected": -2.7532780170440674, "logps/chosen": -191.62831115722656, "logps/rejected": -187.5370330810547, "loss": 0.938, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.2515445351600647, "rewards/margins": 0.049747172743082047, "rewards/rejected": -0.30129170417785645, "step": 6390 }, { "epoch": 1.87, "learning_rate": 1.8757275526025857e-07, "logits/chosen": -2.729072332382202, "logits/rejected": -2.7408745288848877, "logps/chosen": -200.15261840820312, "logps/rejected": -198.6827392578125, "loss": 0.9091, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.19985443353652954, "rewards/margins": 0.10629860311746597, "rewards/rejected": -0.3061530590057373, "step": 6400 }, { "epoch": 1.87, "eval_logits/chosen": -2.658572196960449, "eval_logits/rejected": -2.653358221054077, "eval_logps/chosen": -197.71670532226562, "eval_logps/rejected": -183.82521057128906, "eval_loss": 0.9324252605438232, "eval_rewards/accuracies": 0.5759687423706055, "eval_rewards/chosen": -0.21114031970500946, "eval_rewards/margins": 0.07925137132406235, "eval_rewards/rejected": -0.2903916537761688, "eval_runtime": 443.281, "eval_samples_per_second": 26.541, "eval_steps_per_second": 3.318, "step": 6400 }, { "epoch": 1.87, "learning_rate": 1.8675137961870969e-07, "logits/chosen": -2.696608781814575, "logits/rejected": -2.721331834793091, "logps/chosen": -169.80068969726562, "logps/rejected": -181.3059539794922, "loss": 0.9307, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.2106015682220459, "rewards/margins": 0.05789356306195259, "rewards/rejected": -0.2684951424598694, "step": 6410 }, { "epoch": 1.87, "learning_rate": 1.8593073275899555e-07, "logits/chosen": -2.7464489936828613, "logits/rejected": -2.7503392696380615, "logps/chosen": -198.9092559814453, "logps/rejected": -187.02224731445312, "loss": 0.9264, "rewards/accuracies": 0.5625, "rewards/chosen": -0.21138563752174377, "rewards/margins": 0.0724065825343132, "rewards/rejected": -0.2837921977043152, "step": 6420 }, { "epoch": 1.88, "learning_rate": 1.851108241370143e-07, "logits/chosen": -2.738217353820801, "logits/rejected": -2.7316102981567383, "logps/chosen": -193.5830841064453, "logps/rejected": -185.67431640625, "loss": 0.9206, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.19683505594730377, "rewards/margins": 0.08236946910619736, "rewards/rejected": -0.27920451760292053, "step": 6430 }, { "epoch": 1.88, "learning_rate": 1.8429166320015816e-07, "logits/chosen": -2.7444231510162354, "logits/rejected": -2.710388660430908, "logps/chosen": -199.27511596679688, "logps/rejected": -178.7822265625, "loss": 0.9369, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.222540020942688, "rewards/margins": 0.03969267010688782, "rewards/rejected": -0.2622326910495758, "step": 6440 }, { "epoch": 1.88, "learning_rate": 1.834732593872037e-07, "logits/chosen": -2.722276210784912, "logits/rejected": -2.7346787452697754, "logps/chosen": -194.71664428710938, "logps/rejected": -189.44082641601562, "loss": 0.9313, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.21700486540794373, "rewards/margins": 0.0775427371263504, "rewards/rejected": -0.2945476174354553, "step": 6450 }, { "epoch": 1.88, "learning_rate": 1.826556221282039e-07, "logits/chosen": -2.7683663368225098, "logits/rejected": -2.77569317817688, "logps/chosen": -194.61509704589844, "logps/rejected": -184.4914093017578, "loss": 0.9441, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.25148120522499084, "rewards/margins": 0.08030495792627335, "rewards/rejected": -0.3317861557006836, "step": 6460 }, { "epoch": 1.89, "learning_rate": 1.81838760844379e-07, "logits/chosen": -2.733006477355957, "logits/rejected": -2.7150261402130127, "logps/chosen": -211.0814666748047, "logps/rejected": -180.62013244628906, "loss": 0.9164, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.20144124329090118, "rewards/margins": 0.08752738684415817, "rewards/rejected": -0.28896862268447876, "step": 6470 }, { "epoch": 1.89, "learning_rate": 1.81022684948008e-07, "logits/chosen": -2.7351765632629395, "logits/rejected": -2.7202677726745605, "logps/chosen": -205.88320922851562, "logps/rejected": -176.65267944335938, "loss": 0.9175, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.19780614972114563, "rewards/margins": 0.078305684030056, "rewards/rejected": -0.27611178159713745, "step": 6480 }, { "epoch": 1.89, "learning_rate": 1.8020740384232037e-07, "logits/chosen": -2.729175329208374, "logits/rejected": -2.7268869876861572, "logps/chosen": -193.4375457763672, "logps/rejected": -173.4163360595703, "loss": 0.9309, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.21653446555137634, "rewards/margins": 0.07103071361780167, "rewards/rejected": -0.2875651717185974, "step": 6490 }, { "epoch": 1.9, "learning_rate": 1.7939292692138753e-07, "logits/chosen": -2.7279040813446045, "logits/rejected": -2.752498149871826, "logps/chosen": -202.0452423095703, "logps/rejected": -195.004150390625, "loss": 0.9094, "rewards/accuracies": 0.609375, "rewards/chosen": -0.2281557023525238, "rewards/margins": 0.10120411962270737, "rewards/rejected": -0.32935982942581177, "step": 6500 }, { "epoch": 1.9, "eval_logits/chosen": -2.6529784202575684, "eval_logits/rejected": -2.6476540565490723, "eval_logps/chosen": -197.72872924804688, "eval_logps/rejected": -183.82424926757812, "eval_loss": 0.9320737719535828, "eval_rewards/accuracies": 0.5769884586334229, "eval_rewards/chosen": -0.21234209835529327, "eval_rewards/margins": 0.0779537484049797, "eval_rewards/rejected": -0.2902958393096924, "eval_runtime": 443.3609, "eval_samples_per_second": 26.536, "eval_steps_per_second": 3.318, "step": 6500 }, { "epoch": 1.9, "learning_rate": 1.785792635700148e-07, "logits/chosen": -2.7295122146606445, "logits/rejected": -2.732819080352783, "logps/chosen": -205.765380859375, "logps/rejected": -192.5811004638672, "loss": 0.9115, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.24779972434043884, "rewards/margins": 0.0829542875289917, "rewards/rejected": -0.33075404167175293, "step": 6510 }, { "epoch": 1.9, "learning_rate": 1.777664231636329e-07, "logits/chosen": -2.7400591373443604, "logits/rejected": -2.7352259159088135, "logps/chosen": -198.49118041992188, "logps/rejected": -179.13111877441406, "loss": 0.9279, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.2171938121318817, "rewards/margins": 0.09275706857442856, "rewards/rejected": -0.30995088815689087, "step": 6520 }, { "epoch": 1.9, "learning_rate": 1.7695441506819058e-07, "logits/chosen": -2.7267587184906006, "logits/rejected": -2.732179641723633, "logps/chosen": -195.60536193847656, "logps/rejected": -188.46739196777344, "loss": 0.9345, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.22522278130054474, "rewards/margins": 0.073407843708992, "rewards/rejected": -0.29863062500953674, "step": 6530 }, { "epoch": 1.91, "learning_rate": 1.7614324864004604e-07, "logits/chosen": -2.740993022918701, "logits/rejected": -2.7591586112976074, "logps/chosen": -207.13516235351562, "logps/rejected": -204.57444763183594, "loss": 0.915, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.2111998349428177, "rewards/margins": 0.0766361653804779, "rewards/rejected": -0.2878360152244568, "step": 6540 }, { "epoch": 1.91, "learning_rate": 1.753329332258593e-07, "logits/chosen": -2.782977342605591, "logits/rejected": -2.7586865425109863, "logps/chosen": -210.08950805664062, "logps/rejected": -190.40237426757812, "loss": 0.9359, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.23711267113685608, "rewards/margins": 0.053105391561985016, "rewards/rejected": -0.2902180552482605, "step": 6550 }, { "epoch": 1.91, "learning_rate": 1.7452347816248476e-07, "logits/chosen": -2.734740972518921, "logits/rejected": -2.7349419593811035, "logps/chosen": -199.7351837158203, "logps/rejected": -184.5938720703125, "loss": 0.9084, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.22013111412525177, "rewards/margins": 0.10203494131565094, "rewards/rejected": -0.3221660256385803, "step": 6560 }, { "epoch": 1.92, "learning_rate": 1.7371489277686325e-07, "logits/chosen": -2.711773157119751, "logits/rejected": -2.713780641555786, "logps/chosen": -191.1211700439453, "logps/rejected": -174.2381591796875, "loss": 0.9119, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.16844011843204498, "rewards/margins": 0.11107321083545685, "rewards/rejected": -0.27951332926750183, "step": 6570 }, { "epoch": 1.92, "learning_rate": 1.7290718638591492e-07, "logits/chosen": -2.739809036254883, "logits/rejected": -2.754399538040161, "logps/chosen": -176.8489532470703, "logps/rejected": -178.35150146484375, "loss": 0.9403, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.23008951544761658, "rewards/margins": 0.05764657258987427, "rewards/rejected": -0.28773611783981323, "step": 6580 }, { "epoch": 1.92, "learning_rate": 1.721003682964316e-07, "logits/chosen": -2.7267158031463623, "logits/rejected": -2.701935291290283, "logps/chosen": -212.7582550048828, "logps/rejected": -187.20980834960938, "loss": 0.9176, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.2046319991350174, "rewards/margins": 0.0942133218050003, "rewards/rejected": -0.2988453209400177, "step": 6590 }, { "epoch": 1.93, "learning_rate": 1.7129444780496972e-07, "logits/chosen": -2.74074649810791, "logits/rejected": -2.746044874191284, "logps/chosen": -202.53713989257812, "logps/rejected": -191.43814086914062, "loss": 0.9449, "rewards/accuracies": 0.546875, "rewards/chosen": -0.22108259797096252, "rewards/margins": 0.07247807830572128, "rewards/rejected": -0.2935606837272644, "step": 6600 }, { "epoch": 1.93, "eval_logits/chosen": -2.6358284950256348, "eval_logits/rejected": -2.6301984786987305, "eval_logps/chosen": -197.7251434326172, "eval_logps/rejected": -183.8246307373047, "eval_loss": 0.932049036026001, "eval_rewards/accuracies": 0.5795377492904663, "eval_rewards/chosen": -0.2119826078414917, "eval_rewards/margins": 0.07835091650485992, "eval_rewards/rejected": -0.29033350944519043, "eval_runtime": 443.2884, "eval_samples_per_second": 26.54, "eval_steps_per_second": 3.318, "step": 6600 }, { "epoch": 1.93, "learning_rate": 1.7048943419774307e-07, "logits/chosen": -2.747640609741211, "logits/rejected": -2.730374813079834, "logps/chosen": -203.86410522460938, "logps/rejected": -194.0001983642578, "loss": 0.9269, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.22307124733924866, "rewards/margins": 0.0955260619521141, "rewards/rejected": -0.31859731674194336, "step": 6610 }, { "epoch": 1.93, "learning_rate": 1.69685336750516e-07, "logits/chosen": -2.760178327560425, "logits/rejected": -2.7309234142303467, "logps/chosen": -221.4056396484375, "logps/rejected": -194.12762451171875, "loss": 0.9376, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2214629203081131, "rewards/margins": 0.06823597103357315, "rewards/rejected": -0.28969889879226685, "step": 6620 }, { "epoch": 1.93, "learning_rate": 1.6888216472849638e-07, "logits/chosen": -2.710864305496216, "logits/rejected": -2.733487129211426, "logps/chosen": -161.94752502441406, "logps/rejected": -181.10289001464844, "loss": 0.9176, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.22365054488182068, "rewards/margins": 0.06910266727209091, "rewards/rejected": -0.2927531898021698, "step": 6630 }, { "epoch": 1.94, "learning_rate": 1.6807992738622871e-07, "logits/chosen": -2.7660770416259766, "logits/rejected": -2.7430419921875, "logps/chosen": -223.51760864257812, "logps/rejected": -193.828857421875, "loss": 0.9266, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21513095498085022, "rewards/margins": 0.09501657634973526, "rewards/rejected": -0.3101475238800049, "step": 6640 }, { "epoch": 1.94, "learning_rate": 1.6727863396748793e-07, "logits/chosen": -2.7407381534576416, "logits/rejected": -2.746407985687256, "logps/chosen": -215.10440063476562, "logps/rejected": -196.6473846435547, "loss": 0.9003, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.19051149487495422, "rewards/margins": 0.14527949690818787, "rewards/rejected": -0.3357909619808197, "step": 6650 }, { "epoch": 1.94, "learning_rate": 1.6647829370517231e-07, "logits/chosen": -2.7251319885253906, "logits/rejected": -2.717182159423828, "logps/chosen": -186.213134765625, "logps/rejected": -174.87966918945312, "loss": 0.9504, "rewards/accuracies": 0.515625, "rewards/chosen": -0.24467504024505615, "rewards/margins": 0.03715928643941879, "rewards/rejected": -0.28183430433273315, "step": 6660 }, { "epoch": 1.95, "learning_rate": 1.6567891582119764e-07, "logits/chosen": -2.729135513305664, "logits/rejected": -2.7299442291259766, "logps/chosen": -185.3548583984375, "logps/rejected": -179.4267578125, "loss": 0.9416, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.2128109484910965, "rewards/margins": 0.05870833247900009, "rewards/rejected": -0.2715193033218384, "step": 6670 }, { "epoch": 1.95, "learning_rate": 1.6488050952639056e-07, "logits/chosen": -2.7212636470794678, "logits/rejected": -2.7113900184631348, "logps/chosen": -210.2383270263672, "logps/rejected": -193.41522216796875, "loss": 0.9143, "rewards/accuracies": 0.59375, "rewards/chosen": -0.226904034614563, "rewards/margins": 0.10393612086772919, "rewards/rejected": -0.33084017038345337, "step": 6680 }, { "epoch": 1.95, "learning_rate": 1.6408308402038267e-07, "logits/chosen": -2.7432644367218018, "logits/rejected": -2.74440598487854, "logps/chosen": -202.8652801513672, "logps/rejected": -188.2684326171875, "loss": 0.9243, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.22147603332996368, "rewards/margins": 0.05875271558761597, "rewards/rejected": -0.28022870421409607, "step": 6690 }, { "epoch": 1.95, "learning_rate": 1.632866484915043e-07, "logits/chosen": -2.768315553665161, "logits/rejected": -2.7549726963043213, "logps/chosen": -210.89108276367188, "logps/rejected": -196.1284637451172, "loss": 0.9404, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.22763648629188538, "rewards/margins": 0.08506520837545395, "rewards/rejected": -0.3127017021179199, "step": 6700 }, { "epoch": 1.95, "eval_logits/chosen": -2.6496846675872803, "eval_logits/rejected": -2.644331455230713, "eval_logps/chosen": -197.72042846679688, "eval_logps/rejected": -183.8302001953125, "eval_loss": 0.9318643808364868, "eval_rewards/accuracies": 0.5802175402641296, "eval_rewards/chosen": -0.21151039004325867, "eval_rewards/margins": 0.07937860488891602, "eval_rewards/rejected": -0.29088902473449707, "eval_runtime": 443.3291, "eval_samples_per_second": 26.538, "eval_steps_per_second": 3.318, "step": 6700 }, { "epoch": 1.96, "learning_rate": 1.62491212116679e-07, "logits/chosen": -2.7260818481445312, "logits/rejected": -2.7126994132995605, "logps/chosen": -191.04051208496094, "logps/rejected": -170.42739868164062, "loss": 0.9243, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.21634069085121155, "rewards/margins": 0.0933542475104332, "rewards/rejected": -0.30969494581222534, "step": 6710 }, { "epoch": 1.96, "learning_rate": 1.616967840613175e-07, "logits/chosen": -2.744236946105957, "logits/rejected": -2.7506120204925537, "logps/chosen": -191.96136474609375, "logps/rejected": -186.02713012695312, "loss": 0.9111, "rewards/accuracies": 0.59375, "rewards/chosen": -0.19615641236305237, "rewards/margins": 0.10042214393615723, "rewards/rejected": -0.2965785562992096, "step": 6720 }, { "epoch": 1.96, "learning_rate": 1.609033734792119e-07, "logits/chosen": -2.7435319423675537, "logits/rejected": -2.7113170623779297, "logps/chosen": -198.1565399169922, "logps/rejected": -166.88198852539062, "loss": 0.9242, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2112916260957718, "rewards/margins": 0.07510940730571747, "rewards/rejected": -0.28640103340148926, "step": 6730 }, { "epoch": 1.97, "learning_rate": 1.6011098951243093e-07, "logits/chosen": -2.7205939292907715, "logits/rejected": -2.7266383171081543, "logps/chosen": -180.89730834960938, "logps/rejected": -172.36257934570312, "loss": 0.923, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21849095821380615, "rewards/margins": 0.07519636303186417, "rewards/rejected": -0.2936873435974121, "step": 6740 }, { "epoch": 1.97, "learning_rate": 1.5931964129121376e-07, "logits/chosen": -2.7276265621185303, "logits/rejected": -2.7258830070495605, "logps/chosen": -189.77908325195312, "logps/rejected": -181.21559143066406, "loss": 0.9416, "rewards/accuracies": 0.578125, "rewards/chosen": -0.22878125309944153, "rewards/margins": 0.06122662499547005, "rewards/rejected": -0.29000788927078247, "step": 6750 }, { "epoch": 1.97, "learning_rate": 1.5852933793386543e-07, "logits/chosen": -2.7102389335632324, "logits/rejected": -2.730618476867676, "logps/chosen": -180.26690673828125, "logps/rejected": -180.7349395751953, "loss": 0.9272, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.22268176078796387, "rewards/margins": 0.07356902211904526, "rewards/rejected": -0.2962507903575897, "step": 6760 }, { "epoch": 1.97, "learning_rate": 1.5774008854665136e-07, "logits/chosen": -2.73473858833313, "logits/rejected": -2.7441117763519287, "logps/chosen": -187.04324340820312, "logps/rejected": -191.31651306152344, "loss": 0.9105, "rewards/accuracies": 0.578125, "rewards/chosen": -0.20239019393920898, "rewards/margins": 0.10861504077911377, "rewards/rejected": -0.31100520491600037, "step": 6770 }, { "epoch": 1.98, "learning_rate": 1.569519022236928e-07, "logits/chosen": -2.732327699661255, "logits/rejected": -2.726734161376953, "logps/chosen": -196.94210815429688, "logps/rejected": -190.38333129882812, "loss": 0.9269, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.21858474612236023, "rewards/margins": 0.06861617416143417, "rewards/rejected": -0.2872008979320526, "step": 6780 }, { "epoch": 1.98, "learning_rate": 1.5616478804686162e-07, "logits/chosen": -2.739928722381592, "logits/rejected": -2.7340385913848877, "logps/chosen": -197.3428955078125, "logps/rejected": -181.82748413085938, "loss": 0.9276, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.23716449737548828, "rewards/margins": 0.06081245467066765, "rewards/rejected": -0.29797694087028503, "step": 6790 }, { "epoch": 1.98, "learning_rate": 1.5537875508567618e-07, "logits/chosen": -2.7353885173797607, "logits/rejected": -2.737687826156616, "logps/chosen": -183.25909423828125, "logps/rejected": -175.84182739257812, "loss": 0.9155, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.1979648917913437, "rewards/margins": 0.08871322125196457, "rewards/rejected": -0.2866780757904053, "step": 6800 }, { "epoch": 1.98, "eval_logits/chosen": -2.6362035274505615, "eval_logits/rejected": -2.6305882930755615, "eval_logps/chosen": -197.72909545898438, "eval_logps/rejected": -183.84056091308594, "eval_loss": 0.9314205050468445, "eval_rewards/accuracies": 0.5825968980789185, "eval_rewards/chosen": -0.21238040924072266, "eval_rewards/margins": 0.07954783737659454, "eval_rewards/rejected": -0.2919282615184784, "eval_runtime": 443.3072, "eval_samples_per_second": 26.539, "eval_steps_per_second": 3.318, "step": 6800 }, { "epoch": 1.99, "learning_rate": 1.5459381239719637e-07, "logits/chosen": -2.7328858375549316, "logits/rejected": -2.7300026416778564, "logps/chosen": -192.85726928710938, "logps/rejected": -182.21939086914062, "loss": 0.9299, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.24222655594348907, "rewards/margins": 0.06386517733335495, "rewards/rejected": -0.3060917258262634, "step": 6810 }, { "epoch": 1.99, "learning_rate": 1.538099690259193e-07, "logits/chosen": -2.7482893466949463, "logits/rejected": -2.774064540863037, "logps/chosen": -198.48825073242188, "logps/rejected": -204.14930725097656, "loss": 0.9155, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.21093496680259705, "rewards/margins": 0.10215537250041962, "rewards/rejected": -0.31309035420417786, "step": 6820 }, { "epoch": 1.99, "learning_rate": 1.5302723400367547e-07, "logits/chosen": -2.7298903465270996, "logits/rejected": -2.720515251159668, "logps/chosen": -204.21322631835938, "logps/rejected": -180.675537109375, "loss": 0.9181, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1895557940006256, "rewards/margins": 0.08064098656177521, "rewards/rejected": -0.270196795463562, "step": 6830 }, { "epoch": 2.0, "learning_rate": 1.522456163495242e-07, "logits/chosen": -2.721670627593994, "logits/rejected": -2.714440107345581, "logps/chosen": -203.53138732910156, "logps/rejected": -188.99951171875, "loss": 0.9137, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.21865499019622803, "rewards/margins": 0.1239236369729042, "rewards/rejected": -0.34257861971855164, "step": 6840 }, { "epoch": 2.0, "learning_rate": 1.5146512506965013e-07, "logits/chosen": -2.751051187515259, "logits/rejected": -2.725895643234253, "logps/chosen": -192.48666381835938, "logps/rejected": -174.069580078125, "loss": 0.9222, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23686334490776062, "rewards/margins": 0.06425967812538147, "rewards/rejected": -0.3011230528354645, "step": 6850 }, { "epoch": 2.0, "learning_rate": 1.5068576915725912e-07, "logits/chosen": -2.752620220184326, "logits/rejected": -2.7401299476623535, "logps/chosen": -217.71902465820312, "logps/rejected": -194.8961181640625, "loss": 0.9327, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.20294050872325897, "rewards/margins": 0.08372064679861069, "rewards/rejected": -0.28666117787361145, "step": 6860 }, { "epoch": 2.0, "learning_rate": 1.4990755759247485e-07, "logits/chosen": -2.7282445430755615, "logits/rejected": -2.7199878692626953, "logps/chosen": -201.61978149414062, "logps/rejected": -178.6517791748047, "loss": 0.9403, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.21178099513053894, "rewards/margins": 0.09103866666555405, "rewards/rejected": -0.3028196692466736, "step": 6870 }, { "epoch": 2.01, "learning_rate": 1.4913049934223516e-07, "logits/chosen": -2.7153429985046387, "logits/rejected": -2.7482893466949463, "logps/chosen": -197.48809814453125, "logps/rejected": -202.34713745117188, "loss": 0.9031, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.20829913020133972, "rewards/margins": 0.09428951889276505, "rewards/rejected": -0.30258864164352417, "step": 6880 }, { "epoch": 2.01, "learning_rate": 1.4835460336018895e-07, "logits/chosen": -2.7747058868408203, "logits/rejected": -2.7874245643615723, "logps/chosen": -212.27536010742188, "logps/rejected": -207.9674072265625, "loss": 0.8989, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.2062179148197174, "rewards/margins": 0.13777431845664978, "rewards/rejected": -0.3439922332763672, "step": 6890 }, { "epoch": 2.01, "learning_rate": 1.4757987858659296e-07, "logits/chosen": -2.7338640689849854, "logits/rejected": -2.7241692543029785, "logps/chosen": -202.30865478515625, "logps/rejected": -183.61050415039062, "loss": 0.9328, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.21838633716106415, "rewards/margins": 0.0648864209651947, "rewards/rejected": -0.28327274322509766, "step": 6900 }, { "epoch": 2.01, "eval_logits/chosen": -2.6352241039276123, "eval_logits/rejected": -2.6295835971832275, "eval_logps/chosen": -197.7320556640625, "eval_logps/rejected": -183.84559631347656, "eval_loss": 0.9312693476676941, "eval_rewards/accuracies": 0.5883752703666687, "eval_rewards/chosen": -0.212674081325531, "eval_rewards/margins": 0.07975810021162033, "eval_rewards/rejected": -0.29243215918540955, "eval_runtime": 443.3007, "eval_samples_per_second": 26.54, "eval_steps_per_second": 3.318, "step": 6900 }, { "epoch": 2.02, "learning_rate": 1.468063339482084e-07, "logits/chosen": -2.70162034034729, "logits/rejected": -2.7118117809295654, "logps/chosen": -176.92330932617188, "logps/rejected": -171.45474243164062, "loss": 0.9329, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24294695258140564, "rewards/margins": 0.07506359368562698, "rewards/rejected": -0.3180105686187744, "step": 6910 }, { "epoch": 2.02, "learning_rate": 1.4603397835819864e-07, "logits/chosen": -2.748415946960449, "logits/rejected": -2.758174419403076, "logps/chosen": -215.23764038085938, "logps/rejected": -211.4716339111328, "loss": 0.9061, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.21699877083301544, "rewards/margins": 0.11743853241205215, "rewards/rejected": -0.3344372808933258, "step": 6920 }, { "epoch": 2.02, "learning_rate": 1.4526282071602602e-07, "logits/chosen": -2.735396146774292, "logits/rejected": -2.740246295928955, "logps/chosen": -190.8970947265625, "logps/rejected": -185.86720275878906, "loss": 0.9009, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.19201794266700745, "rewards/margins": 0.09687992185354233, "rewards/rejected": -0.288897842168808, "step": 6930 }, { "epoch": 2.02, "learning_rate": 1.4449286990734992e-07, "logits/chosen": -2.7192721366882324, "logits/rejected": -2.7274882793426514, "logps/chosen": -193.3112030029297, "logps/rejected": -187.61007690429688, "loss": 0.9233, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.2464873343706131, "rewards/margins": 0.07503970712423325, "rewards/rejected": -0.32152706384658813, "step": 6940 }, { "epoch": 2.03, "learning_rate": 1.437241348039236e-07, "logits/chosen": -2.7276501655578613, "logits/rejected": -2.774181365966797, "logps/chosen": -197.47341918945312, "logps/rejected": -204.94204711914062, "loss": 0.8835, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23753318190574646, "rewards/margins": 0.11532554775476456, "rewards/rejected": -0.3528587222099304, "step": 6950 }, { "epoch": 2.03, "learning_rate": 1.4295662426349253e-07, "logits/chosen": -2.7351794242858887, "logits/rejected": -2.7332804203033447, "logps/chosen": -174.01608276367188, "logps/rejected": -174.34007263183594, "loss": 0.9153, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.20454581081867218, "rewards/margins": 0.08692500740289688, "rewards/rejected": -0.29147082567214966, "step": 6960 }, { "epoch": 2.03, "learning_rate": 1.4219034712969196e-07, "logits/chosen": -2.721445083618164, "logits/rejected": -2.7228879928588867, "logps/chosen": -201.0042724609375, "logps/rejected": -187.95651245117188, "loss": 0.9163, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2132536619901657, "rewards/margins": 0.07119356095790863, "rewards/rejected": -0.28444722294807434, "step": 6970 }, { "epoch": 2.04, "learning_rate": 1.414253122319457e-07, "logits/chosen": -2.7350051403045654, "logits/rejected": -2.7252516746520996, "logps/chosen": -204.6690673828125, "logps/rejected": -186.09927368164062, "loss": 0.913, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.20454342663288116, "rewards/margins": 0.09902454912662506, "rewards/rejected": -0.30356794595718384, "step": 6980 }, { "epoch": 2.04, "learning_rate": 1.4066152838536344e-07, "logits/chosen": -2.734398603439331, "logits/rejected": -2.72821307182312, "logps/chosen": -195.73269653320312, "logps/rejected": -177.6675567626953, "loss": 0.9175, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.23298010230064392, "rewards/margins": 0.08028466999530792, "rewards/rejected": -0.31326478719711304, "step": 6990 }, { "epoch": 2.04, "learning_rate": 1.398990043906397e-07, "logits/chosen": -2.723540782928467, "logits/rejected": -2.7197868824005127, "logps/chosen": -187.7908172607422, "logps/rejected": -179.2284393310547, "loss": 0.9012, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.23153071105480194, "rewards/margins": 0.0925595834851265, "rewards/rejected": -0.32409027218818665, "step": 7000 }, { "epoch": 2.04, "eval_logits/chosen": -2.6416141986846924, "eval_logits/rejected": -2.6360816955566406, "eval_logps/chosen": -197.75148010253906, "eval_logps/rejected": -183.8529510498047, "eval_loss": 0.9321034550666809, "eval_rewards/accuracies": 0.5766485333442688, "eval_rewards/chosen": -0.21461807191371918, "eval_rewards/margins": 0.07854754477739334, "eval_rewards/rejected": -0.2931656241416931, "eval_runtime": 443.3078, "eval_samples_per_second": 26.539, "eval_steps_per_second": 3.318, "step": 7000 }, { "epoch": 2.04, "learning_rate": 1.3913774903395268e-07, "logits/chosen": -2.7158100605010986, "logits/rejected": -2.7378973960876465, "logps/chosen": -169.9608612060547, "logps/rejected": -175.0387420654297, "loss": 0.9392, "rewards/accuracies": 0.578125, "rewards/chosen": -0.2130027711391449, "rewards/margins": 0.06254032254219055, "rewards/rejected": -0.27554312348365784, "step": 7010 }, { "epoch": 2.05, "learning_rate": 1.3837777108686226e-07, "logits/chosen": -2.7293105125427246, "logits/rejected": -2.7177674770355225, "logps/chosen": -196.72335815429688, "logps/rejected": -182.2796173095703, "loss": 0.9184, "rewards/accuracies": 0.578125, "rewards/chosen": -0.22791095077991486, "rewards/margins": 0.0950903668999672, "rewards/rejected": -0.32300129532814026, "step": 7020 }, { "epoch": 2.05, "learning_rate": 1.376190793062098e-07, "logits/chosen": -2.7231078147888184, "logits/rejected": -2.741497755050659, "logps/chosen": -177.41726684570312, "logps/rejected": -179.03158569335938, "loss": 0.9427, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.228541761636734, "rewards/margins": 0.04261890798807144, "rewards/rejected": -0.27116066217422485, "step": 7030 }, { "epoch": 2.05, "learning_rate": 1.3686168243401657e-07, "logits/chosen": -2.7197694778442383, "logits/rejected": -2.726256847381592, "logps/chosen": -209.73495483398438, "logps/rejected": -202.67848205566406, "loss": 0.9301, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20739248394966125, "rewards/margins": 0.10595047473907471, "rewards/rejected": -0.31334295868873596, "step": 7040 }, { "epoch": 2.06, "learning_rate": 1.361055891973833e-07, "logits/chosen": -2.7275664806365967, "logits/rejected": -2.7401270866394043, "logps/chosen": -191.7522735595703, "logps/rejected": -185.98114013671875, "loss": 0.9006, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.18320171535015106, "rewards/margins": 0.13302107155323029, "rewards/rejected": -0.31622275710105896, "step": 7050 }, { "epoch": 2.06, "learning_rate": 1.353508083083895e-07, "logits/chosen": -2.7291016578674316, "logits/rejected": -2.7423441410064697, "logps/chosen": -186.4053497314453, "logps/rejected": -173.64749145507812, "loss": 0.9185, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.21921300888061523, "rewards/margins": 0.08114122599363327, "rewards/rejected": -0.3003542423248291, "step": 7060 }, { "epoch": 2.06, "learning_rate": 1.3459734846399341e-07, "logits/chosen": -2.733018398284912, "logits/rejected": -2.741650104522705, "logps/chosen": -202.03048706054688, "logps/rejected": -192.7786865234375, "loss": 0.931, "rewards/accuracies": 0.59375, "rewards/chosen": -0.20718999207019806, "rewards/margins": 0.09407447278499603, "rewards/rejected": -0.3012644648551941, "step": 7070 }, { "epoch": 2.07, "learning_rate": 1.3384521834593137e-07, "logits/chosen": -2.7663657665252686, "logits/rejected": -2.7430965900421143, "logps/chosen": -212.329833984375, "logps/rejected": -184.7091827392578, "loss": 0.9236, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.23409593105316162, "rewards/margins": 0.09491056948900223, "rewards/rejected": -0.32900652289390564, "step": 7080 }, { "epoch": 2.07, "learning_rate": 1.3309442662061786e-07, "logits/chosen": -2.731829881668091, "logits/rejected": -2.7377142906188965, "logps/chosen": -199.4453125, "logps/rejected": -174.1707000732422, "loss": 0.9292, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.20361220836639404, "rewards/margins": 0.08883138000965118, "rewards/rejected": -0.2924436032772064, "step": 7090 }, { "epoch": 2.07, "learning_rate": 1.3234498193904608e-07, "logits/chosen": -2.7473537921905518, "logits/rejected": -2.765782594680786, "logps/chosen": -187.45675659179688, "logps/rejected": -183.682373046875, "loss": 0.9296, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20840564370155334, "rewards/margins": 0.08556272089481354, "rewards/rejected": -0.2939683794975281, "step": 7100 }, { "epoch": 2.07, "eval_logits/chosen": -2.651341199874878, "eval_logits/rejected": -2.6459922790527344, "eval_logps/chosen": -197.7322540283203, "eval_logps/rejected": -183.85069274902344, "eval_loss": 0.9314108490943909, "eval_rewards/accuracies": 0.5780081748962402, "eval_rewards/chosen": -0.21269488334655762, "eval_rewards/margins": 0.08024676889181137, "eval_rewards/rejected": -0.2929416596889496, "eval_runtime": 443.2974, "eval_samples_per_second": 26.54, "eval_steps_per_second": 3.318, "step": 7100 }, { "epoch": 2.07, "learning_rate": 1.3159689293668748e-07, "logits/chosen": -2.751176357269287, "logits/rejected": -2.738022565841675, "logps/chosen": -198.71163940429688, "logps/rejected": -175.29800415039062, "loss": 0.9342, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.2539035975933075, "rewards/margins": 0.04478713870048523, "rewards/rejected": -0.2986907362937927, "step": 7110 }, { "epoch": 2.08, "learning_rate": 1.308501682333931e-07, "logits/chosen": -2.730717897415161, "logits/rejected": -2.731663703918457, "logps/chosen": -189.55604553222656, "logps/rejected": -179.62344360351562, "loss": 0.9256, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.20553156733512878, "rewards/margins": 0.07954994589090347, "rewards/rejected": -0.28508150577545166, "step": 7120 }, { "epoch": 2.08, "learning_rate": 1.3010481643329368e-07, "logits/chosen": -2.7590889930725098, "logits/rejected": -2.7237637042999268, "logps/chosen": -172.84356689453125, "logps/rejected": -149.15286254882812, "loss": 0.9285, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.21032579243183136, "rewards/margins": 0.07369138300418854, "rewards/rejected": -0.2840171456336975, "step": 7130 }, { "epoch": 2.08, "learning_rate": 1.2936084612470065e-07, "logits/chosen": -2.763084888458252, "logits/rejected": -2.7535722255706787, "logps/chosen": -205.8310546875, "logps/rejected": -190.03878784179688, "loss": 0.9197, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2162288874387741, "rewards/margins": 0.08808388561010361, "rewards/rejected": -0.3043127655982971, "step": 7140 }, { "epoch": 2.09, "learning_rate": 1.2861826588000712e-07, "logits/chosen": -2.7227349281311035, "logits/rejected": -2.743065357208252, "logps/chosen": -190.4771270751953, "logps/rejected": -186.38426208496094, "loss": 0.9075, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.22551707923412323, "rewards/margins": 0.08020542562007904, "rewards/rejected": -0.30572250485420227, "step": 7150 }, { "epoch": 2.09, "learning_rate": 1.278770842555896e-07, "logits/chosen": -2.731098175048828, "logits/rejected": -2.719924211502075, "logps/chosen": -188.09976196289062, "logps/rejected": -171.93310546875, "loss": 0.9163, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20485873520374298, "rewards/margins": 0.07870732247829437, "rewards/rejected": -0.28356605768203735, "step": 7160 }, { "epoch": 2.09, "learning_rate": 1.271373097917086e-07, "logits/chosen": -2.7509634494781494, "logits/rejected": -2.7207791805267334, "logps/chosen": -204.354248046875, "logps/rejected": -189.40679931640625, "loss": 0.9383, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.21421301364898682, "rewards/margins": 0.05646609514951706, "rewards/rejected": -0.2706790864467621, "step": 7170 }, { "epoch": 2.09, "learning_rate": 1.2639895101241073e-07, "logits/chosen": -2.770415782928467, "logits/rejected": -2.744168758392334, "logps/chosen": -204.50999450683594, "logps/rejected": -180.71023559570312, "loss": 0.9295, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.20703864097595215, "rewards/margins": 0.07825209945440292, "rewards/rejected": -0.2852907180786133, "step": 7180 }, { "epoch": 2.1, "learning_rate": 1.2566201642543054e-07, "logits/chosen": -2.7546815872192383, "logits/rejected": -2.7181484699249268, "logps/chosen": -205.6988067626953, "logps/rejected": -174.81179809570312, "loss": 0.907, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.19236141443252563, "rewards/margins": 0.09385807067155838, "rewards/rejected": -0.2862195074558258, "step": 7190 }, { "epoch": 2.1, "learning_rate": 1.249265145220921e-07, "logits/chosen": -2.725691318511963, "logits/rejected": -2.7101643085479736, "logps/chosen": -198.37322998046875, "logps/rejected": -186.52853393554688, "loss": 0.9076, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.24150443077087402, "rewards/margins": 0.11702193319797516, "rewards/rejected": -0.358526349067688, "step": 7200 }, { "epoch": 2.1, "eval_logits/chosen": -2.65535831451416, "eval_logits/rejected": -2.6500964164733887, "eval_logps/chosen": -197.7506866455078, "eval_logps/rejected": -183.865966796875, "eval_loss": 0.9315347075462341, "eval_rewards/accuracies": 0.579707682132721, "eval_rewards/chosen": -0.21453608572483063, "eval_rewards/margins": 0.07993295043706894, "eval_rewards/rejected": -0.29446902871131897, "eval_runtime": 443.2574, "eval_samples_per_second": 26.542, "eval_steps_per_second": 3.319, "step": 7200 }, { "epoch": 2.1, "learning_rate": 1.2419245377721166e-07, "logits/chosen": -2.744749069213867, "logits/rejected": -2.7440600395202637, "logps/chosen": -192.15850830078125, "logps/rejected": -178.24766540527344, "loss": 0.9182, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.2116578072309494, "rewards/margins": 0.06727829575538635, "rewards/rejected": -0.27893608808517456, "step": 7210 }, { "epoch": 2.11, "learning_rate": 1.2345984264899953e-07, "logits/chosen": -2.744431257247925, "logits/rejected": -2.7419965267181396, "logps/chosen": -204.25119018554688, "logps/rejected": -192.86651611328125, "loss": 0.9103, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.19741736352443695, "rewards/margins": 0.08304335176944733, "rewards/rejected": -0.2804606854915619, "step": 7220 }, { "epoch": 2.11, "learning_rate": 1.2272868957896285e-07, "logits/chosen": -2.735452890396118, "logits/rejected": -2.7617390155792236, "logps/chosen": -204.0012969970703, "logps/rejected": -200.9781951904297, "loss": 0.9228, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.20529159903526306, "rewards/margins": 0.10176429897546768, "rewards/rejected": -0.30705589056015015, "step": 7230 }, { "epoch": 2.11, "learning_rate": 1.2199900299180823e-07, "logits/chosen": -2.722921848297119, "logits/rejected": -2.7459399700164795, "logps/chosen": -193.969482421875, "logps/rejected": -197.5996856689453, "loss": 0.9122, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.22556057572364807, "rewards/margins": 0.10073423385620117, "rewards/rejected": -0.32629480957984924, "step": 7240 }, { "epoch": 2.11, "learning_rate": 1.2127079129534502e-07, "logits/chosen": -2.7340056896209717, "logits/rejected": -2.739253282546997, "logps/chosen": -192.6339569091797, "logps/rejected": -180.08802795410156, "loss": 0.9046, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.20382554829120636, "rewards/margins": 0.10780832916498184, "rewards/rejected": -0.3116338849067688, "step": 7250 }, { "epoch": 2.12, "learning_rate": 1.2054406288038787e-07, "logits/chosen": -2.751988410949707, "logits/rejected": -2.7704575061798096, "logps/chosen": -205.4809112548828, "logps/rejected": -196.97647094726562, "loss": 0.9161, "rewards/accuracies": 0.5625, "rewards/chosen": -0.19896064698696136, "rewards/margins": 0.11849311739206314, "rewards/rejected": -0.3174537718296051, "step": 7260 }, { "epoch": 2.12, "learning_rate": 1.198188261206603e-07, "logits/chosen": -2.7551045417785645, "logits/rejected": -2.750485897064209, "logps/chosen": -218.8869171142578, "logps/rejected": -208.7373046875, "loss": 0.9106, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.22199001908302307, "rewards/margins": 0.1098296269774437, "rewards/rejected": -0.33181968331336975, "step": 7270 }, { "epoch": 2.12, "learning_rate": 1.1909508937269857e-07, "logits/chosen": -2.7286875247955322, "logits/rejected": -2.7499356269836426, "logps/chosen": -191.07275390625, "logps/rejected": -179.6157684326172, "loss": 0.9193, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.1898176223039627, "rewards/margins": 0.0719531923532486, "rewards/rejected": -0.2617708146572113, "step": 7280 }, { "epoch": 2.13, "learning_rate": 1.1837286097575466e-07, "logits/chosen": -2.7580971717834473, "logits/rejected": -2.7623705863952637, "logps/chosen": -196.2259979248047, "logps/rejected": -181.41830444335938, "loss": 0.9182, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.21018242835998535, "rewards/margins": 0.08354298770427704, "rewards/rejected": -0.2937254309654236, "step": 7290 }, { "epoch": 2.13, "learning_rate": 1.1765214925170097e-07, "logits/chosen": -2.7369384765625, "logits/rejected": -2.742072582244873, "logps/chosen": -178.70603942871094, "logps/rejected": -172.1625213623047, "loss": 0.922, "rewards/accuracies": 0.578125, "rewards/chosen": -0.21116510033607483, "rewards/margins": 0.08774997293949127, "rewards/rejected": -0.2989150881767273, "step": 7300 }, { "epoch": 2.13, "eval_logits/chosen": -2.656193256378174, "eval_logits/rejected": -2.6509604454040527, "eval_logps/chosen": -197.75233459472656, "eval_logps/rejected": -183.85647583007812, "eval_loss": 0.9315423369407654, "eval_rewards/accuracies": 0.5791978240013123, "eval_rewards/chosen": -0.2147025316953659, "eval_rewards/margins": 0.07881684601306915, "eval_rewards/rejected": -0.29351937770843506, "eval_runtime": 443.3236, "eval_samples_per_second": 26.538, "eval_steps_per_second": 3.318, "step": 7300 }, { "epoch": 2.13, "learning_rate": 1.1693296250493376e-07, "logits/chosen": -2.7492427825927734, "logits/rejected": -2.758079767227173, "logps/chosen": -206.41268920898438, "logps/rejected": -205.76123046875, "loss": 0.9047, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.22261814773082733, "rewards/margins": 0.10148187726736069, "rewards/rejected": -0.3241000175476074, "step": 7310 }, { "epoch": 2.14, "learning_rate": 1.1621530902227781e-07, "logits/chosen": -2.745958089828491, "logits/rejected": -2.737013339996338, "logps/chosen": -192.68576049804688, "logps/rejected": -179.25247192382812, "loss": 0.9302, "rewards/accuracies": 0.5625, "rewards/chosen": -0.22553372383117676, "rewards/margins": 0.061601459980010986, "rewards/rejected": -0.28713518381118774, "step": 7320 }, { "epoch": 2.14, "learning_rate": 1.1549919707289088e-07, "logits/chosen": -2.722991466522217, "logits/rejected": -2.731642246246338, "logps/chosen": -164.6597442626953, "logps/rejected": -165.81527709960938, "loss": 0.9152, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.1901760846376419, "rewards/margins": 0.10527213662862778, "rewards/rejected": -0.2954482436180115, "step": 7330 }, { "epoch": 2.14, "learning_rate": 1.1478463490816858e-07, "logits/chosen": -2.7464330196380615, "logits/rejected": -2.751593589782715, "logps/chosen": -188.0258331298828, "logps/rejected": -181.29013061523438, "loss": 0.9107, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.2331833839416504, "rewards/margins": 0.08430353552103043, "rewards/rejected": -0.3174869418144226, "step": 7340 }, { "epoch": 2.14, "learning_rate": 1.1407163076164894e-07, "logits/chosen": -2.7444779872894287, "logits/rejected": -2.7680869102478027, "logps/chosen": -190.80401611328125, "logps/rejected": -192.6204833984375, "loss": 0.912, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.23358038067817688, "rewards/margins": 0.11961646378040314, "rewards/rejected": -0.35319679975509644, "step": 7350 }, { "epoch": 2.15, "learning_rate": 1.1336019284891767e-07, "logits/chosen": -2.722039222717285, "logits/rejected": -2.701793670654297, "logps/chosen": -185.11410522460938, "logps/rejected": -162.76284790039062, "loss": 0.9138, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.19528807699680328, "rewards/margins": 0.0973658487200737, "rewards/rejected": -0.2926539182662964, "step": 7360 }, { "epoch": 2.15, "learning_rate": 1.1265032936751395e-07, "logits/chosen": -2.725271224975586, "logits/rejected": -2.7608556747436523, "logps/chosen": -188.7613067626953, "logps/rejected": -195.23399353027344, "loss": 0.8958, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22539019584655762, "rewards/margins": 0.10492382943630219, "rewards/rejected": -0.3303140103816986, "step": 7370 }, { "epoch": 2.15, "learning_rate": 1.1194204849683509e-07, "logits/chosen": -2.761343002319336, "logits/rejected": -2.734705686569214, "logps/chosen": -214.1074676513672, "logps/rejected": -187.6863250732422, "loss": 0.9251, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2229997217655182, "rewards/margins": 0.06879208981990814, "rewards/rejected": -0.29179179668426514, "step": 7380 }, { "epoch": 2.16, "learning_rate": 1.1123535839804318e-07, "logits/chosen": -2.7466766834259033, "logits/rejected": -2.7543885707855225, "logps/chosen": -204.26852416992188, "logps/rejected": -199.51849365234375, "loss": 0.9263, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22250142693519592, "rewards/margins": 0.08121421933174133, "rewards/rejected": -0.30371564626693726, "step": 7390 }, { "epoch": 2.16, "learning_rate": 1.1053026721397044e-07, "logits/chosen": -2.734654426574707, "logits/rejected": -2.7429187297821045, "logps/chosen": -204.10427856445312, "logps/rejected": -194.6341094970703, "loss": 0.9136, "rewards/accuracies": 0.609375, "rewards/chosen": -0.21057207882404327, "rewards/margins": 0.12202072143554688, "rewards/rejected": -0.33259281516075134, "step": 7400 }, { "epoch": 2.16, "eval_logits/chosen": -2.646413564682007, "eval_logits/rejected": -2.640982151031494, "eval_logps/chosen": -197.75148010253906, "eval_logps/rejected": -183.86248779296875, "eval_loss": 0.9313158392906189, "eval_rewards/accuracies": 0.5819170475006104, "eval_rewards/chosen": -0.21462056040763855, "eval_rewards/margins": 0.07949841767549515, "eval_rewards/rejected": -0.2941189706325531, "eval_runtime": 443.2671, "eval_samples_per_second": 26.542, "eval_steps_per_second": 3.319, "step": 7400 }, { "epoch": 2.16, "learning_rate": 1.0982678306902563e-07, "logits/chosen": -2.750333786010742, "logits/rejected": -2.7447495460510254, "logps/chosen": -216.957275390625, "logps/rejected": -195.1598663330078, "loss": 0.9426, "rewards/accuracies": 0.5625, "rewards/chosen": -0.26006537675857544, "rewards/margins": 0.05888168886303902, "rewards/rejected": -0.3189470171928406, "step": 7410 }, { "epoch": 2.16, "learning_rate": 1.0912491406910032e-07, "logits/chosen": -2.7206127643585205, "logits/rejected": -2.7083652019500732, "logps/chosen": -183.6171112060547, "logps/rejected": -179.92808532714844, "loss": 0.9218, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2206425666809082, "rewards/margins": 0.06548481434583664, "rewards/rejected": -0.28612738847732544, "step": 7420 }, { "epoch": 2.17, "learning_rate": 1.084246683014759e-07, "logits/chosen": -2.776256561279297, "logits/rejected": -2.7593002319335938, "logps/chosen": -201.40565490722656, "logps/rejected": -187.70831298828125, "loss": 0.9257, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21603736281394958, "rewards/margins": 0.10193196684122086, "rewards/rejected": -0.31796932220458984, "step": 7430 }, { "epoch": 2.17, "learning_rate": 1.0772605383472977e-07, "logits/chosen": -2.7164487838745117, "logits/rejected": -2.7195496559143066, "logps/chosen": -195.38851928710938, "logps/rejected": -189.60653686523438, "loss": 0.9409, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.23506739735603333, "rewards/margins": 0.055074017494916916, "rewards/rejected": -0.29014140367507935, "step": 7440 }, { "epoch": 2.17, "learning_rate": 1.0702907871864267e-07, "logits/chosen": -2.731732130050659, "logits/rejected": -2.7372167110443115, "logps/chosen": -195.2466583251953, "logps/rejected": -184.58062744140625, "loss": 0.9204, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.22982437908649445, "rewards/margins": 0.08360082656145096, "rewards/rejected": -0.3134251832962036, "step": 7450 }, { "epoch": 2.18, "learning_rate": 1.0633375098410627e-07, "logits/chosen": -2.71661639213562, "logits/rejected": -2.7214877605438232, "logps/chosen": -181.84878540039062, "logps/rejected": -177.9984130859375, "loss": 0.9279, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.236176535487175, "rewards/margins": 0.08922503143548965, "rewards/rejected": -0.32540157437324524, "step": 7460 }, { "epoch": 2.18, "learning_rate": 1.0564007864302982e-07, "logits/chosen": -2.7508246898651123, "logits/rejected": -2.7447965145111084, "logps/chosen": -208.2515411376953, "logps/rejected": -188.55581665039062, "loss": 0.926, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.24827198684215546, "rewards/margins": 0.07217380404472351, "rewards/rejected": -0.32044583559036255, "step": 7470 }, { "epoch": 2.18, "learning_rate": 1.0494806968824877e-07, "logits/chosen": -2.7589292526245117, "logits/rejected": -2.7438571453094482, "logps/chosen": -210.2908477783203, "logps/rejected": -189.3018341064453, "loss": 0.9249, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2212313860654831, "rewards/margins": 0.08061711490154266, "rewards/rejected": -0.30184850096702576, "step": 7480 }, { "epoch": 2.18, "learning_rate": 1.0425773209343189e-07, "logits/chosen": -2.736321210861206, "logits/rejected": -2.7403640747070312, "logps/chosen": -189.15023803710938, "logps/rejected": -180.63282775878906, "loss": 0.9258, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.2023574411869049, "rewards/margins": 0.11086982488632202, "rewards/rejected": -0.31322723627090454, "step": 7490 }, { "epoch": 2.19, "learning_rate": 1.0356907381298983e-07, "logits/chosen": -2.7436957359313965, "logits/rejected": -2.734558582305908, "logps/chosen": -207.8624725341797, "logps/rejected": -191.48626708984375, "loss": 0.9401, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22266311943531036, "rewards/margins": 0.07991985976696014, "rewards/rejected": -0.30258291959762573, "step": 7500 }, { "epoch": 2.19, "eval_logits/chosen": -2.6543385982513428, "eval_logits/rejected": -2.6490490436553955, "eval_logps/chosen": -197.7451171875, "eval_logps/rejected": -183.85833740234375, "eval_loss": 0.9313808083534241, "eval_rewards/accuracies": 0.5798776149749756, "eval_rewards/chosen": -0.21397916972637177, "eval_rewards/margins": 0.07972751557826996, "eval_rewards/rejected": -0.2937066853046417, "eval_runtime": 443.3645, "eval_samples_per_second": 26.536, "eval_steps_per_second": 3.318, "step": 7500 }, { "epoch": 2.19, "learning_rate": 1.0288210278198319e-07, "logits/chosen": -2.6996657848358154, "logits/rejected": -2.7122485637664795, "logps/chosen": -177.2788848876953, "logps/rejected": -162.20631408691406, "loss": 0.9112, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20859304070472717, "rewards/margins": 0.1111995205283165, "rewards/rejected": -0.31979256868362427, "step": 7510 }, { "epoch": 2.19, "learning_rate": 1.0219682691603168e-07, "logits/chosen": -2.7130396366119385, "logits/rejected": -2.728550434112549, "logps/chosen": -187.67735290527344, "logps/rejected": -184.43748474121094, "loss": 0.9016, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.19813624024391174, "rewards/margins": 0.0911586657166481, "rewards/rejected": -0.28929489850997925, "step": 7520 }, { "epoch": 2.2, "learning_rate": 1.0151325411122206e-07, "logits/chosen": -2.7337875366210938, "logits/rejected": -2.734790325164795, "logps/chosen": -184.33343505859375, "logps/rejected": -181.77609252929688, "loss": 0.9207, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2145807445049286, "rewards/margins": 0.06570100784301758, "rewards/rejected": -0.28028178215026855, "step": 7530 }, { "epoch": 2.2, "learning_rate": 1.0083139224401762e-07, "logits/chosen": -2.7556958198547363, "logits/rejected": -2.754171371459961, "logps/chosen": -203.33828735351562, "logps/rejected": -185.1107177734375, "loss": 0.9272, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.197455495595932, "rewards/margins": 0.0879480391740799, "rewards/rejected": -0.2854035198688507, "step": 7540 }, { "epoch": 2.2, "learning_rate": 1.0015124917116769e-07, "logits/chosen": -2.75793719291687, "logits/rejected": -2.7613115310668945, "logps/chosen": -197.8968505859375, "logps/rejected": -190.76565551757812, "loss": 0.9236, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.2403552234172821, "rewards/margins": 0.08378183841705322, "rewards/rejected": -0.3241370618343353, "step": 7550 }, { "epoch": 2.21, "learning_rate": 9.947283272961637e-08, "logits/chosen": -2.722438335418701, "logits/rejected": -2.7054498195648193, "logps/chosen": -175.672607421875, "logps/rejected": -162.665771484375, "loss": 0.9231, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.1979292929172516, "rewards/margins": 0.07674896717071533, "rewards/rejected": -0.2746782898902893, "step": 7560 }, { "epoch": 2.21, "learning_rate": 9.879615073641309e-08, "logits/chosen": -2.763669729232788, "logits/rejected": -2.7318906784057617, "logps/chosen": -201.5901336669922, "logps/rejected": -178.46498107910156, "loss": 0.9179, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.19735045731067657, "rewards/margins": 0.09358976781368256, "rewards/rejected": -0.29094022512435913, "step": 7570 }, { "epoch": 2.21, "learning_rate": 9.812121098862184e-08, "logits/chosen": -2.7316806316375732, "logits/rejected": -2.743900775909424, "logps/chosen": -194.90371704101562, "logps/rejected": -187.50155639648438, "loss": 0.9325, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.22696837782859802, "rewards/margins": 0.060942865908145905, "rewards/rejected": -0.28791123628616333, "step": 7580 }, { "epoch": 2.21, "learning_rate": 9.744802126323159e-08, "logits/chosen": -2.7149295806884766, "logits/rejected": -2.7186684608459473, "logps/chosen": -214.962158203125, "logps/rejected": -186.78811645507812, "loss": 0.9203, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.23681148886680603, "rewards/margins": 0.04845339059829712, "rewards/rejected": -0.28526487946510315, "step": 7590 }, { "epoch": 2.22, "learning_rate": 9.677658931706676e-08, "logits/chosen": -2.726931095123291, "logits/rejected": -2.715970516204834, "logps/chosen": -189.72854614257812, "logps/rejected": -176.05377197265625, "loss": 0.9295, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2344752848148346, "rewards/margins": 0.06562696397304535, "rewards/rejected": -0.30010226368904114, "step": 7600 }, { "epoch": 2.22, "eval_logits/chosen": -2.6620023250579834, "eval_logits/rejected": -2.6568603515625, "eval_logps/chosen": -197.7584991455078, "eval_logps/rejected": -183.87474060058594, "eval_loss": 0.9313289523124695, "eval_rewards/accuracies": 0.581237256526947, "eval_rewards/chosen": -0.21531949937343597, "eval_rewards/margins": 0.08002565056085587, "eval_rewards/rejected": -0.29534512758255005, "eval_runtime": 443.2661, "eval_samples_per_second": 26.542, "eval_steps_per_second": 3.319, "step": 7600 }, { "epoch": 2.22, "learning_rate": 9.61069228866979e-08, "logits/chosen": -2.7586958408355713, "logits/rejected": -2.759864330291748, "logps/chosen": -206.00894165039062, "logps/rejected": -192.7720947265625, "loss": 0.9273, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.23331649601459503, "rewards/margins": 0.08103559911251068, "rewards/rejected": -0.3143520951271057, "step": 7610 }, { "epoch": 2.22, "learning_rate": 9.543902968835219e-08, "logits/chosen": -2.7187604904174805, "logits/rejected": -2.7171874046325684, "logps/chosen": -193.1675262451172, "logps/rejected": -175.18508911132812, "loss": 0.9277, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.2174643725156784, "rewards/margins": 0.06399818509817123, "rewards/rejected": -0.28146255016326904, "step": 7620 }, { "epoch": 2.23, "learning_rate": 9.477291741782478e-08, "logits/chosen": -2.7655746936798096, "logits/rejected": -2.743079662322998, "logps/chosen": -214.4647979736328, "logps/rejected": -198.39024353027344, "loss": 0.946, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.25131624937057495, "rewards/margins": 0.06656187027692795, "rewards/rejected": -0.3178780972957611, "step": 7630 }, { "epoch": 2.23, "learning_rate": 9.410859375039034e-08, "logits/chosen": -2.759535312652588, "logits/rejected": -2.746966600418091, "logps/chosen": -194.54786682128906, "logps/rejected": -180.0648193359375, "loss": 0.9223, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.21222415566444397, "rewards/margins": 0.07545609772205353, "rewards/rejected": -0.2876802384853363, "step": 7640 }, { "epoch": 2.23, "learning_rate": 9.3446066340714e-08, "logits/chosen": -2.757150888442993, "logits/rejected": -2.7474305629730225, "logps/chosen": -220.7982635498047, "logps/rejected": -201.95033264160156, "loss": 0.9162, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.1936686933040619, "rewards/margins": 0.11243748664855957, "rewards/rejected": -0.3061061501502991, "step": 7650 }, { "epoch": 2.23, "learning_rate": 9.27853428227639e-08, "logits/chosen": -2.767207622528076, "logits/rejected": -2.7521488666534424, "logps/chosen": -209.8219757080078, "logps/rejected": -188.6089324951172, "loss": 0.9515, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.2451823651790619, "rewards/margins": 0.04614636301994324, "rewards/rejected": -0.2913287281990051, "step": 7660 }, { "epoch": 2.24, "learning_rate": 9.21264308097226e-08, "logits/chosen": -2.719449758529663, "logits/rejected": -2.7556424140930176, "logps/chosen": -165.529052734375, "logps/rejected": -181.20455932617188, "loss": 0.9016, "rewards/accuracies": 0.59375, "rewards/chosen": -0.22411219775676727, "rewards/margins": 0.09404568374156952, "rewards/rejected": -0.31815794110298157, "step": 7670 }, { "epoch": 2.24, "learning_rate": 9.146933789389963e-08, "logits/chosen": -2.7221908569335938, "logits/rejected": -2.710775852203369, "logps/chosen": -193.11004638671875, "logps/rejected": -176.21279907226562, "loss": 0.9268, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.20240387320518494, "rewards/margins": 0.06890231370925903, "rewards/rejected": -0.27130618691444397, "step": 7680 }, { "epoch": 2.24, "learning_rate": 9.081407164664396e-08, "logits/chosen": -2.761277914047241, "logits/rejected": -2.739208459854126, "logps/chosen": -220.0433807373047, "logps/rejected": -195.45925903320312, "loss": 0.939, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.23591816425323486, "rewards/margins": 0.08795022964477539, "rewards/rejected": -0.32386839389801025, "step": 7690 }, { "epoch": 2.25, "learning_rate": 9.016063961825698e-08, "logits/chosen": -2.7358932495117188, "logits/rejected": -2.7563042640686035, "logps/chosen": -190.58761596679688, "logps/rejected": -178.07778930664062, "loss": 0.9128, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.19711415469646454, "rewards/margins": 0.09829168021678925, "rewards/rejected": -0.2954058349132538, "step": 7700 }, { "epoch": 2.25, "eval_logits/chosen": -2.655303716659546, "eval_logits/rejected": -2.65004301071167, "eval_logps/chosen": -197.75904846191406, "eval_logps/rejected": -183.8813934326172, "eval_loss": 0.9308503270149231, "eval_rewards/accuracies": 0.5817471146583557, "eval_rewards/chosen": -0.21537372469902039, "eval_rewards/margins": 0.08063706010580063, "eval_rewards/rejected": -0.2960107922554016, "eval_runtime": 443.7199, "eval_samples_per_second": 26.514, "eval_steps_per_second": 3.315, "step": 7700 }, { "epoch": 2.25, "learning_rate": 8.950904933790512e-08, "logits/chosen": -2.725898265838623, "logits/rejected": -2.7195515632629395, "logps/chosen": -189.76937866210938, "logps/rejected": -179.32357788085938, "loss": 0.9147, "rewards/accuracies": 0.609375, "rewards/chosen": -0.18170380592346191, "rewards/margins": 0.10253002494573593, "rewards/rejected": -0.28423386812210083, "step": 7710 }, { "epoch": 2.25, "learning_rate": 8.885930831353328e-08, "logits/chosen": -2.709311008453369, "logits/rejected": -2.7129642963409424, "logps/chosen": -189.28977966308594, "logps/rejected": -172.34219360351562, "loss": 0.9186, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.21395018696784973, "rewards/margins": 0.06539814174175262, "rewards/rejected": -0.27934831380844116, "step": 7720 }, { "epoch": 2.25, "learning_rate": 8.821142403177845e-08, "logits/chosen": -2.728788137435913, "logits/rejected": -2.7360405921936035, "logps/chosen": -196.939453125, "logps/rejected": -191.22731018066406, "loss": 0.9326, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.21163785457611084, "rewards/margins": 0.06347735226154327, "rewards/rejected": -0.2751151919364929, "step": 7730 }, { "epoch": 2.26, "learning_rate": 8.756540395788311e-08, "logits/chosen": -2.734846591949463, "logits/rejected": -2.7414629459381104, "logps/chosen": -197.96871948242188, "logps/rejected": -188.20376586914062, "loss": 0.9393, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.21204037964344025, "rewards/margins": 0.07741545140743256, "rewards/rejected": -0.2894558310508728, "step": 7740 }, { "epoch": 2.26, "learning_rate": 8.692125553560976e-08, "logits/chosen": -2.717087984085083, "logits/rejected": -2.740471363067627, "logps/chosen": -194.17306518554688, "logps/rejected": -194.03067016601562, "loss": 0.9164, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.25672441720962524, "rewards/margins": 0.08338448405265808, "rewards/rejected": -0.3401089310646057, "step": 7750 }, { "epoch": 2.26, "learning_rate": 8.62789861871544e-08, "logits/chosen": -2.704550266265869, "logits/rejected": -2.7091641426086426, "logps/chosen": -188.353515625, "logps/rejected": -172.57882690429688, "loss": 0.9066, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.20504169166088104, "rewards/margins": 0.09637182950973511, "rewards/rejected": -0.30141353607177734, "step": 7760 }, { "epoch": 2.27, "learning_rate": 8.563860331306158e-08, "logits/chosen": -2.728397846221924, "logits/rejected": -2.7323508262634277, "logps/chosen": -194.7670135498047, "logps/rejected": -179.7714080810547, "loss": 0.928, "rewards/accuracies": 0.625, "rewards/chosen": -0.2068866491317749, "rewards/margins": 0.08743634819984436, "rewards/rejected": -0.29432302713394165, "step": 7770 }, { "epoch": 2.27, "learning_rate": 8.500011429213916e-08, "logits/chosen": -2.733534574508667, "logits/rejected": -2.7395362854003906, "logps/chosen": -196.14666748046875, "logps/rejected": -186.81178283691406, "loss": 0.9252, "rewards/accuracies": 0.59375, "rewards/chosen": -0.2137400358915329, "rewards/margins": 0.09642390161752701, "rewards/rejected": -0.3101639747619629, "step": 7780 }, { "epoch": 2.27, "learning_rate": 8.43635264813728e-08, "logits/chosen": -2.758620262145996, "logits/rejected": -2.750257730484009, "logps/chosen": -211.9737548828125, "logps/rejected": -193.65223693847656, "loss": 0.9105, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1767754703760147, "rewards/margins": 0.12258114665746689, "rewards/rejected": -0.2993565797805786, "step": 7790 }, { "epoch": 2.28, "learning_rate": 8.372884721584155e-08, "logits/chosen": -2.7193734645843506, "logits/rejected": -2.7123632431030273, "logps/chosen": -184.4526824951172, "logps/rejected": -167.96360778808594, "loss": 0.9074, "rewards/accuracies": 0.578125, "rewards/chosen": -0.20129454135894775, "rewards/margins": 0.10267508029937744, "rewards/rejected": -0.3039696216583252, "step": 7800 }, { "epoch": 2.28, "eval_logits/chosen": -2.6557297706604004, "eval_logits/rejected": -2.6504623889923096, "eval_logps/chosen": -197.76478576660156, "eval_logps/rejected": -183.8850555419922, "eval_loss": 0.9311633706092834, "eval_rewards/accuracies": 0.5836166143417358, "eval_rewards/chosen": -0.2159472107887268, "eval_rewards/margins": 0.08042870461940765, "eval_rewards/rejected": -0.29637593030929565, "eval_runtime": 443.4949, "eval_samples_per_second": 26.528, "eval_steps_per_second": 3.317, "step": 7800 }, { "epoch": 2.28, "learning_rate": 8.309608380863328e-08, "logits/chosen": -2.7333827018737793, "logits/rejected": -2.7506890296936035, "logps/chosen": -175.94215393066406, "logps/rejected": -184.37400817871094, "loss": 0.9142, "rewards/accuracies": 0.609375, "rewards/chosen": -0.2469567507505417, "rewards/margins": 0.07817981392145157, "rewards/rejected": -0.32513657212257385, "step": 7810 }, { "epoch": 2.28, "learning_rate": 8.246524355076057e-08, "logits/chosen": -2.7284388542175293, "logits/rejected": -2.736680030822754, "logps/chosen": -192.95489501953125, "logps/rejected": -193.68785095214844, "loss": 0.9323, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2360697090625763, "rewards/margins": 0.10624003410339355, "rewards/rejected": -0.34230974316596985, "step": 7820 }, { "epoch": 2.28, "learning_rate": 8.183633371107615e-08, "logits/chosen": -2.7205255031585693, "logits/rejected": -2.7430524826049805, "logps/chosen": -182.37574768066406, "logps/rejected": -185.91189575195312, "loss": 0.9101, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.2163948267698288, "rewards/margins": 0.08373256027698517, "rewards/rejected": -0.3001273572444916, "step": 7830 }, { "epoch": 2.29, "learning_rate": 8.120936153618996e-08, "logits/chosen": -2.704521894454956, "logits/rejected": -2.7069900035858154, "logps/chosen": -181.1570281982422, "logps/rejected": -166.2770233154297, "loss": 0.9296, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2072068452835083, "rewards/margins": 0.07812671363353729, "rewards/rejected": -0.2853335738182068, "step": 7840 }, { "epoch": 2.29, "learning_rate": 8.058433425038483e-08, "logits/chosen": -2.7473597526550293, "logits/rejected": -2.7511062622070312, "logps/chosen": -181.43115234375, "logps/rejected": -170.19798278808594, "loss": 0.9204, "rewards/accuracies": 0.625, "rewards/chosen": -0.2030690610408783, "rewards/margins": 0.08421279489994049, "rewards/rejected": -0.28728190064430237, "step": 7850 }, { "epoch": 2.29, "learning_rate": 7.996125905553375e-08, "logits/chosen": -2.7208352088928223, "logits/rejected": -2.7141966819763184, "logps/chosen": -184.7169952392578, "logps/rejected": -184.3052520751953, "loss": 0.9415, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.2215987890958786, "rewards/margins": 0.07141195237636566, "rewards/rejected": -0.29301077127456665, "step": 7860 }, { "epoch": 2.3, "learning_rate": 7.934014313101692e-08, "logits/chosen": -2.749361515045166, "logits/rejected": -2.7265572547912598, "logps/chosen": -219.4988555908203, "logps/rejected": -185.86529541015625, "loss": 0.9196, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.21059224009513855, "rewards/margins": 0.10057976096868515, "rewards/rejected": -0.3111719787120819, "step": 7870 }, { "epoch": 2.3, "learning_rate": 7.87209936336386e-08, "logits/chosen": -2.746774911880493, "logits/rejected": -2.7589919567108154, "logps/chosen": -197.74710083007812, "logps/rejected": -199.05357360839844, "loss": 0.9093, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.20615649223327637, "rewards/margins": 0.09410884976387024, "rewards/rejected": -0.3002653121948242, "step": 7880 }, { "epoch": 2.3, "learning_rate": 7.810381769754506e-08, "logits/chosen": -2.752405881881714, "logits/rejected": -2.7523396015167236, "logps/chosen": -205.9868621826172, "logps/rejected": -192.83425903320312, "loss": 0.9399, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2012767344713211, "rewards/margins": 0.0421002134680748, "rewards/rejected": -0.2433769404888153, "step": 7890 }, { "epoch": 2.3, "learning_rate": 7.748862243414211e-08, "logits/chosen": -2.7154974937438965, "logits/rejected": -2.726799488067627, "logps/chosen": -193.008544921875, "logps/rejected": -182.70101928710938, "loss": 0.9114, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.20253929495811462, "rewards/margins": 0.07466837018728256, "rewards/rejected": -0.2772076725959778, "step": 7900 }, { "epoch": 2.3, "eval_logits/chosen": -2.6478512287139893, "eval_logits/rejected": -2.642455577850342, "eval_logps/chosen": -197.75442504882812, "eval_logps/rejected": -183.87030029296875, "eval_loss": 0.9310198426246643, "eval_rewards/accuracies": 0.5836166143417358, "eval_rewards/chosen": -0.21491007506847382, "eval_rewards/margins": 0.07999106496572495, "eval_rewards/rejected": -0.29490113258361816, "eval_runtime": 443.5742, "eval_samples_per_second": 26.523, "eval_steps_per_second": 3.316, "step": 7900 }, { "epoch": 2.31, "learning_rate": 7.687541493201347e-08, "logits/chosen": -2.7405920028686523, "logits/rejected": -2.7437314987182617, "logps/chosen": -190.9894256591797, "logps/rejected": -188.3881378173828, "loss": 0.9488, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2341761589050293, "rewards/margins": 0.03301035612821579, "rewards/rejected": -0.2671864926815033, "step": 7910 }, { "epoch": 2.31, "learning_rate": 7.62642022568386e-08, "logits/chosen": -2.7333028316497803, "logits/rejected": -2.7367300987243652, "logps/chosen": -195.2904815673828, "logps/rejected": -180.70547485351562, "loss": 0.8981, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.19127438962459564, "rewards/margins": 0.07776583731174469, "rewards/rejected": -0.26904022693634033, "step": 7920 }, { "epoch": 2.31, "learning_rate": 7.565499145131193e-08, "logits/chosen": -2.7502613067626953, "logits/rejected": -2.708519458770752, "logps/chosen": -222.30517578125, "logps/rejected": -176.55001831054688, "loss": 0.9368, "rewards/accuracies": 0.53125, "rewards/chosen": -0.2366447001695633, "rewards/margins": 0.05661622807383537, "rewards/rejected": -0.29326093196868896, "step": 7930 }, { "epoch": 2.32, "learning_rate": 7.50477895350611e-08, "logits/chosen": -2.740562677383423, "logits/rejected": -2.7238717079162598, "logps/chosen": -211.9696044921875, "logps/rejected": -192.79771423339844, "loss": 0.929, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.21781253814697266, "rewards/margins": 0.11220844089984894, "rewards/rejected": -0.3300209641456604, "step": 7940 }, { "epoch": 2.32, "learning_rate": 7.444260350456632e-08, "logits/chosen": -2.7244935035705566, "logits/rejected": -2.710855007171631, "logps/chosen": -215.6509246826172, "logps/rejected": -184.55307006835938, "loss": 0.9274, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.230869323015213, "rewards/margins": 0.07824772596359253, "rewards/rejected": -0.3091171085834503, "step": 7950 }, { "epoch": 2.32, "learning_rate": 7.383944033308003e-08, "logits/chosen": -2.6875998973846436, "logits/rejected": -2.71150279045105, "logps/chosen": -199.21945190429688, "logps/rejected": -196.64727783203125, "loss": 0.9031, "rewards/accuracies": 0.6468750238418579, "rewards/chosen": -0.17514397203922272, "rewards/margins": 0.11656300723552704, "rewards/rejected": -0.29170694947242737, "step": 7960 }, { "epoch": 2.32, "learning_rate": 7.323830697054592e-08, "logits/chosen": -2.7250888347625732, "logits/rejected": -2.7495064735412598, "logps/chosen": -184.5178985595703, "logps/rejected": -180.9095916748047, "loss": 0.9147, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.21628186106681824, "rewards/margins": 0.11150647699832916, "rewards/rejected": -0.3277883529663086, "step": 7970 }, { "epoch": 2.33, "learning_rate": 7.263921034351972e-08, "logits/chosen": -2.726407527923584, "logits/rejected": -2.7399487495422363, "logps/chosen": -205.93057250976562, "logps/rejected": -196.50315856933594, "loss": 0.9267, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.2090451419353485, "rewards/margins": 0.09366326034069061, "rewards/rejected": -0.3027084171772003, "step": 7980 }, { "epoch": 2.33, "learning_rate": 7.204215735508821e-08, "logits/chosen": -2.7203097343444824, "logits/rejected": -2.7385830879211426, "logps/chosen": -188.94375610351562, "logps/rejected": -196.45272827148438, "loss": 0.9054, "rewards/accuracies": 0.59375, "rewards/chosen": -0.21457454562187195, "rewards/margins": 0.13003358244895935, "rewards/rejected": -0.3446081578731537, "step": 7990 }, { "epoch": 2.33, "learning_rate": 7.1447154884791e-08, "logits/chosen": -2.754638910293579, "logits/rejected": -2.7675697803497314, "logps/chosen": -206.0548553466797, "logps/rejected": -201.75033569335938, "loss": 0.9181, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": -0.24122412502765656, "rewards/margins": 0.09840685874223709, "rewards/rejected": -0.33963099122047424, "step": 8000 }, { "epoch": 2.33, "eval_logits/chosen": -2.6661343574523926, "eval_logits/rejected": -2.6611063480377197, "eval_logps/chosen": -197.75009155273438, "eval_logps/rejected": -183.85853576660156, "eval_loss": 0.9317770004272461, "eval_rewards/accuracies": 0.5771583914756775, "eval_rewards/chosen": -0.21447615325450897, "eval_rewards/margins": 0.07924765348434448, "eval_rewards/rejected": -0.29372379183769226, "eval_runtime": 443.4039, "eval_samples_per_second": 26.533, "eval_steps_per_second": 3.318, "step": 8000 }, { "epoch": 2.34, "learning_rate": 7.08542097885402e-08, "logits/chosen": -2.739370346069336, "logits/rejected": -2.739750862121582, "logps/chosen": -168.8159942626953, "logps/rejected": -170.51507568359375, "loss": 0.93, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2074386328458786, "rewards/margins": 0.06587998569011688, "rewards/rejected": -0.2733186185359955, "step": 8010 }, { "epoch": 2.34, "learning_rate": 7.026332889854212e-08, "logits/chosen": -2.7454347610473633, "logits/rejected": -2.7481212615966797, "logps/chosen": -191.80398559570312, "logps/rejected": -169.96246337890625, "loss": 0.9527, "rewards/accuracies": 0.5625, "rewards/chosen": -0.21735993027687073, "rewards/margins": 0.05763064697384834, "rewards/rejected": -0.2749905586242676, "step": 8020 }, { "epoch": 2.34, "learning_rate": 6.967451902321808e-08, "logits/chosen": -2.7633728981018066, "logits/rejected": -2.762784481048584, "logps/chosen": -207.1423797607422, "logps/rejected": -194.453125, "loss": 0.9206, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2185828685760498, "rewards/margins": 0.08463943004608154, "rewards/rejected": -0.30322226881980896, "step": 8030 }, { "epoch": 2.35, "learning_rate": 6.908778694712611e-08, "logits/chosen": -2.7394168376922607, "logits/rejected": -2.7601752281188965, "logps/chosen": -197.01817321777344, "logps/rejected": -191.726806640625, "loss": 0.9391, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2251404970884323, "rewards/margins": 0.07005010545253754, "rewards/rejected": -0.29519063234329224, "step": 8040 }, { "epoch": 2.35, "learning_rate": 6.85031394308831e-08, "logits/chosen": -2.749958038330078, "logits/rejected": -2.7379648685455322, "logps/chosen": -193.93630981445312, "logps/rejected": -188.49380493164062, "loss": 0.9394, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.24032609164714813, "rewards/margins": 0.06645093858242035, "rewards/rejected": -0.3067770302295685, "step": 8050 }, { "epoch": 2.35, "learning_rate": 6.792058321108621e-08, "logits/chosen": -2.73541522026062, "logits/rejected": -2.743712902069092, "logps/chosen": -181.5113983154297, "logps/rejected": -181.34979248046875, "loss": 0.9196, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21504303812980652, "rewards/margins": 0.08492358773946762, "rewards/rejected": -0.29996663331985474, "step": 8060 }, { "epoch": 2.35, "learning_rate": 6.734012500023608e-08, "logits/chosen": -2.724332332611084, "logits/rejected": -2.699763298034668, "logps/chosen": -204.10018920898438, "logps/rejected": -174.8278045654297, "loss": 0.9323, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.23785284161567688, "rewards/margins": 0.07769812643527985, "rewards/rejected": -0.3155509829521179, "step": 8070 }, { "epoch": 2.36, "learning_rate": 6.67617714866586e-08, "logits/chosen": -2.7390198707580566, "logits/rejected": -2.7389464378356934, "logps/chosen": -196.2102508544922, "logps/rejected": -187.58624267578125, "loss": 0.9284, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.23619568347930908, "rewards/margins": 0.07881676405668259, "rewards/rejected": -0.31501245498657227, "step": 8080 }, { "epoch": 2.36, "learning_rate": 6.618552933442869e-08, "logits/chosen": -2.71484637260437, "logits/rejected": -2.718121290206909, "logps/chosen": -188.6407012939453, "logps/rejected": -179.3391571044922, "loss": 0.9224, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.20391671359539032, "rewards/margins": 0.10935616493225098, "rewards/rejected": -0.3132728636264801, "step": 8090 }, { "epoch": 2.36, "learning_rate": 6.561140518329287e-08, "logits/chosen": -2.669769525527954, "logits/rejected": -2.695160150527954, "logps/chosen": -166.22305297851562, "logps/rejected": -174.75186157226562, "loss": 0.9009, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.18055148422718048, "rewards/margins": 0.09363645315170288, "rewards/rejected": -0.27418795228004456, "step": 8100 }, { "epoch": 2.36, "eval_logits/chosen": -2.6632046699523926, "eval_logits/rejected": -2.658102512359619, "eval_logps/chosen": -197.7543182373047, "eval_logps/rejected": -183.87362670898438, "eval_loss": 0.9310758709907532, "eval_rewards/accuracies": 0.5798776149749756, "eval_rewards/chosen": -0.21490146219730377, "eval_rewards/margins": 0.08033359050750732, "eval_rewards/rejected": -0.2952350676059723, "eval_runtime": 443.3937, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 8100 }, { "epoch": 2.37, "learning_rate": 6.503940564859317e-08, "logits/chosen": -2.7465438842773438, "logits/rejected": -2.7310779094696045, "logps/chosen": -203.1905059814453, "logps/rejected": -194.52020263671875, "loss": 0.9262, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24113135039806366, "rewards/margins": 0.06915220618247986, "rewards/rejected": -0.3102835714817047, "step": 8110 }, { "epoch": 2.37, "learning_rate": 6.446953732119068e-08, "logits/chosen": -2.7239115238189697, "logits/rejected": -2.7100701332092285, "logps/chosen": -189.4662322998047, "logps/rejected": -181.71133422851562, "loss": 0.9064, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.22534950077533722, "rewards/margins": 0.08933541178703308, "rewards/rejected": -0.3146849274635315, "step": 8120 }, { "epoch": 2.37, "learning_rate": 6.390180676738949e-08, "logits/chosen": -2.742435932159424, "logits/rejected": -2.728344678878784, "logps/chosen": -213.70993041992188, "logps/rejected": -189.58584594726562, "loss": 0.9412, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.24831977486610413, "rewards/margins": 0.04875670000910759, "rewards/rejected": -0.2970764935016632, "step": 8130 }, { "epoch": 2.37, "learning_rate": 6.333622052886159e-08, "logits/chosen": -2.709750175476074, "logits/rejected": -2.716714859008789, "logps/chosen": -202.9120635986328, "logps/rejected": -185.79696655273438, "loss": 0.9295, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2223753035068512, "rewards/margins": 0.07038509845733643, "rewards/rejected": -0.29276034235954285, "step": 8140 }, { "epoch": 2.38, "learning_rate": 6.277278512257073e-08, "logits/chosen": -2.7267911434173584, "logits/rejected": -2.747617483139038, "logps/chosen": -220.88833618164062, "logps/rejected": -208.8884735107422, "loss": 0.9105, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.22989530861377716, "rewards/margins": 0.12462420761585236, "rewards/rejected": -0.3545195162296295, "step": 8150 }, { "epoch": 2.38, "learning_rate": 6.221150704069808e-08, "logits/chosen": -2.728583574295044, "logits/rejected": -2.742826461791992, "logps/chosen": -177.401123046875, "logps/rejected": -175.57814025878906, "loss": 0.91, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.22042067348957062, "rewards/margins": 0.07267331331968307, "rewards/rejected": -0.2930939793586731, "step": 8160 }, { "epoch": 2.38, "learning_rate": 6.165239275056658e-08, "logits/chosen": -2.7489070892333984, "logits/rejected": -2.728541135787964, "logps/chosen": -202.7565460205078, "logps/rejected": -191.60960388183594, "loss": 0.9296, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.21797093749046326, "rewards/margins": 0.07035072892904282, "rewards/rejected": -0.28832167387008667, "step": 8170 }, { "epoch": 2.39, "learning_rate": 6.109544869456734e-08, "logits/chosen": -2.7465271949768066, "logits/rejected": -2.7321600914001465, "logps/chosen": -217.69332885742188, "logps/rejected": -183.03245544433594, "loss": 0.9104, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.20488587021827698, "rewards/margins": 0.1237928494811058, "rewards/rejected": -0.32867875695228577, "step": 8180 }, { "epoch": 2.39, "learning_rate": 6.054068129008464e-08, "logits/chosen": -2.7363193035125732, "logits/rejected": -2.724510669708252, "logps/chosen": -197.43472290039062, "logps/rejected": -179.55078125, "loss": 0.9333, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.22554786503314972, "rewards/margins": 0.07664482295513153, "rewards/rejected": -0.30219265818595886, "step": 8190 }, { "epoch": 2.39, "learning_rate": 5.998809692942261e-08, "logits/chosen": -2.7568459510803223, "logits/rejected": -2.726026773452759, "logps/chosen": -224.9655303955078, "logps/rejected": -181.2517852783203, "loss": 0.9091, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.20677609741687775, "rewards/margins": 0.0898161232471466, "rewards/rejected": -0.29659220576286316, "step": 8200 }, { "epoch": 2.39, "eval_logits/chosen": -2.643325090408325, "eval_logits/rejected": -2.6378369331359863, "eval_logps/chosen": -197.7702178955078, "eval_logps/rejected": -183.88160705566406, "eval_loss": 0.9310687184333801, "eval_rewards/accuracies": 0.5829367637634277, "eval_rewards/chosen": -0.21649178862571716, "eval_rewards/margins": 0.0795392096042633, "eval_rewards/rejected": -0.29603099822998047, "eval_runtime": 443.4095, "eval_samples_per_second": 26.533, "eval_steps_per_second": 3.317, "step": 8200 }, { "epoch": 2.39, "learning_rate": 5.9437701979730995e-08, "logits/chosen": -2.7097978591918945, "logits/rejected": -2.7331063747406006, "logps/chosen": -183.22218322753906, "logps/rejected": -181.6038360595703, "loss": 0.9293, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.2365005761384964, "rewards/margins": 0.07266677170991898, "rewards/rejected": -0.3091673254966736, "step": 8210 }, { "epoch": 2.4, "learning_rate": 5.8889502782932174e-08, "logits/chosen": -2.7132773399353027, "logits/rejected": -2.741889238357544, "logps/chosen": -187.0943145751953, "logps/rejected": -183.83511352539062, "loss": 0.9098, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.2061687409877777, "rewards/margins": 0.11216280609369278, "rewards/rejected": -0.3183315396308899, "step": 8220 }, { "epoch": 2.4, "learning_rate": 5.834350565564805e-08, "logits/chosen": -2.7249484062194824, "logits/rejected": -2.7134346961975098, "logps/chosen": -182.797119140625, "logps/rejected": -170.59536743164062, "loss": 0.9177, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.19757375121116638, "rewards/margins": 0.07276350259780884, "rewards/rejected": -0.27033722400665283, "step": 8230 }, { "epoch": 2.4, "learning_rate": 5.779971688912702e-08, "logits/chosen": -2.735774517059326, "logits/rejected": -2.743027448654175, "logps/chosen": -181.68629455566406, "logps/rejected": -188.88807678222656, "loss": 0.9373, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2310686856508255, "rewards/margins": 0.07609544694423676, "rewards/rejected": -0.30716413259506226, "step": 8240 }, { "epoch": 2.41, "learning_rate": 5.725814274917198e-08, "logits/chosen": -2.7728989124298096, "logits/rejected": -2.724799394607544, "logps/chosen": -223.89620971679688, "logps/rejected": -192.3927001953125, "loss": 0.9281, "rewards/accuracies": 0.578125, "rewards/chosen": -0.2514110803604126, "rewards/margins": 0.06689505279064178, "rewards/rejected": -0.3183061480522156, "step": 8250 }, { "epoch": 2.41, "learning_rate": 5.6718789476067287e-08, "logits/chosen": -2.734097957611084, "logits/rejected": -2.746814489364624, "logps/chosen": -203.88644409179688, "logps/rejected": -201.47889709472656, "loss": 0.9254, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.21979546546936035, "rewards/margins": 0.08329310268163681, "rewards/rejected": -0.30308857560157776, "step": 8260 }, { "epoch": 2.41, "learning_rate": 5.618166328450788e-08, "logits/chosen": -2.7586183547973633, "logits/rejected": -2.7315571308135986, "logps/chosen": -208.2580108642578, "logps/rejected": -175.76177978515625, "loss": 0.9377, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2211875170469284, "rewards/margins": 0.069062739610672, "rewards/rejected": -0.2902502417564392, "step": 8270 }, { "epoch": 2.42, "learning_rate": 5.564677036352683e-08, "logits/chosen": -2.7465243339538574, "logits/rejected": -2.7541754245758057, "logps/chosen": -196.60305786132812, "logps/rejected": -199.50289916992188, "loss": 0.9082, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.2293652594089508, "rewards/margins": 0.10975439846515656, "rewards/rejected": -0.33911964297294617, "step": 8280 }, { "epoch": 2.42, "learning_rate": 5.5114116876424643e-08, "logits/chosen": -2.75584077835083, "logits/rejected": -2.746237277984619, "logps/chosen": -217.4754180908203, "logps/rejected": -197.7372283935547, "loss": 0.9273, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21786010265350342, "rewards/margins": 0.09159267693758011, "rewards/rejected": -0.3094528019428253, "step": 8290 }, { "epoch": 2.42, "learning_rate": 5.4583708960697695e-08, "logits/chosen": -2.737339735031128, "logits/rejected": -2.761617660522461, "logps/chosen": -214.1763916015625, "logps/rejected": -195.55984497070312, "loss": 0.9091, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2552254796028137, "rewards/margins": 0.09345632046461105, "rewards/rejected": -0.348681777715683, "step": 8300 }, { "epoch": 2.42, "eval_logits/chosen": -2.652771234512329, "eval_logits/rejected": -2.6474828720092773, "eval_logps/chosen": -197.7509765625, "eval_logps/rejected": -183.8717041015625, "eval_loss": 0.9311734437942505, "eval_rewards/accuracies": 0.5832766890525818, "eval_rewards/chosen": -0.21456702053546906, "eval_rewards/margins": 0.08047395944595337, "eval_rewards/rejected": -0.29504096508026123, "eval_runtime": 443.4337, "eval_samples_per_second": 26.532, "eval_steps_per_second": 3.317, "step": 8300 }, { "epoch": 2.42, "learning_rate": 5.405555272796788e-08, "logits/chosen": -2.7279231548309326, "logits/rejected": -2.6993558406829834, "logps/chosen": -218.52407836914062, "logps/rejected": -187.58221435546875, "loss": 0.9191, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.2137344628572464, "rewards/margins": 0.08393244445323944, "rewards/rejected": -0.2976669371128082, "step": 8310 }, { "epoch": 2.43, "learning_rate": 5.3529654263912205e-08, "logits/chosen": -2.746372938156128, "logits/rejected": -2.7346367835998535, "logps/chosen": -228.1139678955078, "logps/rejected": -198.20999145507812, "loss": 0.9182, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2145860195159912, "rewards/margins": 0.09652328491210938, "rewards/rejected": -0.3111093044281006, "step": 8320 }, { "epoch": 2.43, "learning_rate": 5.300601962819229e-08, "logits/chosen": -2.6986913681030273, "logits/rejected": -2.711686372756958, "logps/chosen": -177.49481201171875, "logps/rejected": -178.4385986328125, "loss": 0.9027, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.22211650013923645, "rewards/margins": 0.12292201817035675, "rewards/rejected": -0.3450385332107544, "step": 8330 }, { "epoch": 2.43, "learning_rate": 5.248465485438522e-08, "logits/chosen": -2.7135062217712402, "logits/rejected": -2.7118725776672363, "logps/chosen": -171.53228759765625, "logps/rejected": -163.58480834960938, "loss": 0.9388, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.23733428120613098, "rewards/margins": 0.06133853644132614, "rewards/rejected": -0.2986728250980377, "step": 8340 }, { "epoch": 2.44, "learning_rate": 5.196556594991308e-08, "logits/chosen": -2.708112955093384, "logits/rejected": -2.728519916534424, "logps/chosen": -201.09585571289062, "logps/rejected": -194.08924865722656, "loss": 0.9024, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.23435361683368683, "rewards/margins": 0.12458226829767227, "rewards/rejected": -0.3589359223842621, "step": 8350 }, { "epoch": 2.44, "learning_rate": 5.144875889597469e-08, "logits/chosen": -2.7321176528930664, "logits/rejected": -2.7355644702911377, "logps/chosen": -176.49795532226562, "logps/rejected": -187.25527954101562, "loss": 0.9326, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.21027982234954834, "rewards/margins": 0.05963939428329468, "rewards/rejected": -0.2699192464351654, "step": 8360 }, { "epoch": 2.44, "learning_rate": 5.093423964747603e-08, "logits/chosen": -2.7487478256225586, "logits/rejected": -2.724884033203125, "logps/chosen": -194.27857971191406, "logps/rejected": -172.05706787109375, "loss": 0.9393, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.2161370813846588, "rewards/margins": 0.07543951272964478, "rewards/rejected": -0.2915765643119812, "step": 8370 }, { "epoch": 2.44, "learning_rate": 5.042201413296207e-08, "logits/chosen": -2.7153565883636475, "logits/rejected": -2.732156276702881, "logps/chosen": -196.8876953125, "logps/rejected": -196.85394287109375, "loss": 0.9173, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.17807409167289734, "rewards/margins": 0.09584574401378632, "rewards/rejected": -0.27391982078552246, "step": 8380 }, { "epoch": 2.45, "learning_rate": 4.991208825454804e-08, "logits/chosen": -2.717339038848877, "logits/rejected": -2.7379343509674072, "logps/chosen": -186.6918182373047, "logps/rejected": -189.26284790039062, "loss": 0.9068, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.2063685953617096, "rewards/margins": 0.09851086884737015, "rewards/rejected": -0.30487945675849915, "step": 8390 }, { "epoch": 2.45, "learning_rate": 4.94044678878516e-08, "logits/chosen": -2.741137742996216, "logits/rejected": -2.757124423980713, "logps/chosen": -203.63336181640625, "logps/rejected": -199.83615112304688, "loss": 0.9419, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.22201666235923767, "rewards/margins": 0.07703028619289398, "rewards/rejected": -0.29904693365097046, "step": 8400 }, { "epoch": 2.45, "eval_logits/chosen": -2.6419103145599365, "eval_logits/rejected": -2.636409044265747, "eval_logps/chosen": -197.74330139160156, "eval_logps/rejected": -183.8677520751953, "eval_loss": 0.930666446685791, "eval_rewards/accuracies": 0.5776682496070862, "eval_rewards/chosen": -0.21379975974559784, "eval_rewards/margins": 0.08084730058908463, "eval_rewards/rejected": -0.29464706778526306, "eval_runtime": 443.3983, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 8400 }, { "epoch": 2.45, "learning_rate": 4.889915888192545e-08, "logits/chosen": -2.7031803131103516, "logits/rejected": -2.6964354515075684, "logps/chosen": -194.42892456054688, "logps/rejected": -179.48733520507812, "loss": 0.9192, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.21093186736106873, "rewards/margins": 0.10730777680873871, "rewards/rejected": -0.31823965907096863, "step": 8410 }, { "epoch": 2.46, "learning_rate": 4.839616705918928e-08, "logits/chosen": -2.7463109493255615, "logits/rejected": -2.7112486362457275, "logps/chosen": -203.19883728027344, "logps/rejected": -176.5725860595703, "loss": 0.9298, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.2048642933368683, "rewards/margins": 0.09186164289712906, "rewards/rejected": -0.29672592878341675, "step": 8420 }, { "epoch": 2.46, "learning_rate": 4.7895498215363487e-08, "logits/chosen": -2.7438952922821045, "logits/rejected": -2.730785369873047, "logps/chosen": -208.5506134033203, "logps/rejected": -184.9493865966797, "loss": 0.9244, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.18712742626667023, "rewards/margins": 0.09391121566295624, "rewards/rejected": -0.28103864192962646, "step": 8430 }, { "epoch": 2.46, "learning_rate": 4.73971581194014e-08, "logits/chosen": -2.7060279846191406, "logits/rejected": -2.718268871307373, "logps/chosen": -183.0648193359375, "logps/rejected": -178.09503173828125, "loss": 0.9307, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.21532253921031952, "rewards/margins": 0.06491868197917938, "rewards/rejected": -0.2802412211894989, "step": 8440 }, { "epoch": 2.46, "learning_rate": 4.6901152513423886e-08, "logits/chosen": -2.74855637550354, "logits/rejected": -2.7635855674743652, "logps/chosen": -182.1060791015625, "logps/rejected": -195.69158935546875, "loss": 0.8908, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.18026016652584076, "rewards/margins": 0.11594252288341522, "rewards/rejected": -0.29620271921157837, "step": 8450 }, { "epoch": 2.47, "learning_rate": 4.640748711265236e-08, "logits/chosen": -2.7476954460144043, "logits/rejected": -2.7142105102539062, "logps/chosen": -209.7215576171875, "logps/rejected": -177.82785034179688, "loss": 0.9138, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.24884888529777527, "rewards/margins": 0.07080671936273575, "rewards/rejected": -0.3196555972099304, "step": 8460 }, { "epoch": 2.47, "learning_rate": 4.5916167605343394e-08, "logits/chosen": -2.7221813201904297, "logits/rejected": -2.6793854236602783, "logps/chosen": -197.37985229492188, "logps/rejected": -169.0908966064453, "loss": 0.9334, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.22837810218334198, "rewards/margins": 0.06554639339447021, "rewards/rejected": -0.2939245104789734, "step": 8470 }, { "epoch": 2.47, "learning_rate": 4.542719965272293e-08, "logits/chosen": -2.727269172668457, "logits/rejected": -2.7378478050231934, "logps/chosen": -209.7412872314453, "logps/rejected": -187.45875549316406, "loss": 0.9353, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.23735317587852478, "rewards/margins": 0.11680205911397934, "rewards/rejected": -0.35415521264076233, "step": 8480 }, { "epoch": 2.48, "learning_rate": 4.4940588888921075e-08, "logits/chosen": -2.7760841846466064, "logits/rejected": -2.7486133575439453, "logps/chosen": -206.3143310546875, "logps/rejected": -184.89443969726562, "loss": 0.9335, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.22854101657867432, "rewards/margins": 0.070357546210289, "rewards/rejected": -0.2988985776901245, "step": 8490 }, { "epoch": 2.48, "learning_rate": 4.445634092090747e-08, "logits/chosen": -2.712855577468872, "logits/rejected": -2.6964898109436035, "logps/chosen": -201.7066192626953, "logps/rejected": -184.13980102539062, "loss": 0.9203, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.18783876299858093, "rewards/margins": 0.0779946893453598, "rewards/rejected": -0.26583343744277954, "step": 8500 }, { "epoch": 2.48, "eval_logits/chosen": -2.652738332748413, "eval_logits/rejected": -2.6474361419677734, "eval_logps/chosen": -197.7529296875, "eval_logps/rejected": -183.8688201904297, "eval_loss": 0.9313034415245056, "eval_rewards/accuracies": 0.5834466218948364, "eval_rewards/chosen": -0.21476346254348755, "eval_rewards/margins": 0.07999155670404434, "eval_rewards/rejected": -0.2947549819946289, "eval_runtime": 443.3861, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 8500 }, { "epoch": 2.48, "learning_rate": 4.397446132842619e-08, "logits/chosen": -2.721277952194214, "logits/rejected": -2.7163240909576416, "logps/chosen": -190.77566528320312, "logps/rejected": -181.6962890625, "loss": 0.9341, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2155207097530365, "rewards/margins": 0.08565304428339005, "rewards/rejected": -0.30117374658584595, "step": 8510 }, { "epoch": 2.49, "learning_rate": 4.349495566393202e-08, "logits/chosen": -2.753206253051758, "logits/rejected": -2.7568376064300537, "logps/chosen": -175.25245666503906, "logps/rejected": -165.4966583251953, "loss": 0.9363, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.22295379638671875, "rewards/margins": 0.06077491119503975, "rewards/rejected": -0.2837287187576294, "step": 8520 }, { "epoch": 2.49, "learning_rate": 4.3017829452525714e-08, "logits/chosen": -2.7324330806732178, "logits/rejected": -2.7552661895751953, "logps/chosen": -197.71481323242188, "logps/rejected": -198.50564575195312, "loss": 0.9214, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21423344314098358, "rewards/margins": 0.12743337452411652, "rewards/rejected": -0.3416668176651001, "step": 8530 }, { "epoch": 2.49, "learning_rate": 4.254308819189131e-08, "logits/chosen": -2.7188706398010254, "logits/rejected": -2.7103164196014404, "logps/chosen": -187.2228546142578, "logps/rejected": -171.23370361328125, "loss": 0.9343, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22129873931407928, "rewards/margins": 0.0748787671327591, "rewards/rejected": -0.29617753624916077, "step": 8540 }, { "epoch": 2.49, "learning_rate": 4.207073735223188e-08, "logits/chosen": -2.74505615234375, "logits/rejected": -2.7541544437408447, "logps/chosen": -200.2747039794922, "logps/rejected": -189.50448608398438, "loss": 0.9122, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.19934865832328796, "rewards/margins": 0.10086574405431747, "rewards/rejected": -0.30021440982818604, "step": 8550 }, { "epoch": 2.5, "learning_rate": 4.160078237620715e-08, "logits/chosen": -2.744319438934326, "logits/rejected": -2.755634307861328, "logps/chosen": -188.6450653076172, "logps/rejected": -186.31484985351562, "loss": 0.9167, "rewards/accuracies": 0.59375, "rewards/chosen": -0.2137790024280548, "rewards/margins": 0.09879012405872345, "rewards/rejected": -0.31256914138793945, "step": 8560 }, { "epoch": 2.5, "learning_rate": 4.113322867887034e-08, "logits/chosen": -2.745191812515259, "logits/rejected": -2.736252546310425, "logps/chosen": -192.93218994140625, "logps/rejected": -176.01040649414062, "loss": 0.9286, "rewards/accuracies": 0.578125, "rewards/chosen": -0.23361460864543915, "rewards/margins": 0.0659651830792427, "rewards/rejected": -0.29957979917526245, "step": 8570 }, { "epoch": 2.5, "learning_rate": 4.066808164760599e-08, "logits/chosen": -2.7102911472320557, "logits/rejected": -2.7131974697113037, "logps/chosen": -200.6427001953125, "logps/rejected": -179.17141723632812, "loss": 0.9359, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.20251154899597168, "rewards/margins": 0.06852830946445465, "rewards/rejected": -0.2710398733615875, "step": 8580 }, { "epoch": 2.51, "learning_rate": 4.020534664206798e-08, "logits/chosen": -2.7358498573303223, "logits/rejected": -2.7214462757110596, "logps/chosen": -195.99107360839844, "logps/rejected": -192.43423461914062, "loss": 0.9251, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.22206735610961914, "rewards/margins": 0.10203574597835541, "rewards/rejected": -0.32410308718681335, "step": 8590 }, { "epoch": 2.51, "learning_rate": 3.974502899411741e-08, "logits/chosen": -2.744194507598877, "logits/rejected": -2.735948324203491, "logps/chosen": -219.98733520507812, "logps/rejected": -201.3661651611328, "loss": 0.9102, "rewards/accuracies": 0.625, "rewards/chosen": -0.2364121973514557, "rewards/margins": 0.10045131295919418, "rewards/rejected": -0.3368634879589081, "step": 8600 }, { "epoch": 2.51, "eval_logits/chosen": -2.648935079574585, "eval_logits/rejected": -2.6435654163360596, "eval_logps/chosen": -197.7634735107422, "eval_logps/rejected": -183.8791046142578, "eval_loss": 0.9314857125282288, "eval_rewards/accuracies": 0.5820870399475098, "eval_rewards/chosen": -0.21581508219242096, "eval_rewards/margins": 0.0799664780497551, "eval_rewards/rejected": -0.29578158259391785, "eval_runtime": 443.3659, "eval_samples_per_second": 26.536, "eval_steps_per_second": 3.318, "step": 8600 }, { "epoch": 2.51, "learning_rate": 3.9287134007761677e-08, "logits/chosen": -2.763599157333374, "logits/rejected": -2.750394821166992, "logps/chosen": -215.42379760742188, "logps/rejected": -189.61410522460938, "loss": 0.9236, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.20579537749290466, "rewards/margins": 0.09698157757520676, "rewards/rejected": -0.3027769923210144, "step": 8610 }, { "epoch": 2.51, "learning_rate": 3.883166695909268e-08, "logits/chosen": -2.75989031791687, "logits/rejected": -2.7441859245300293, "logps/chosen": -209.43435668945312, "logps/rejected": -182.25961303710938, "loss": 0.9205, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2286672294139862, "rewards/margins": 0.07702066004276276, "rewards/rejected": -0.30568793416023254, "step": 8620 }, { "epoch": 2.52, "learning_rate": 3.837863309622683e-08, "logits/chosen": -2.7383246421813965, "logits/rejected": -2.728440046310425, "logps/chosen": -210.39956665039062, "logps/rejected": -188.38473510742188, "loss": 0.9276, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.21961596608161926, "rewards/margins": 0.07722331583499908, "rewards/rejected": -0.29683929681777954, "step": 8630 }, { "epoch": 2.52, "learning_rate": 3.79280376392439e-08, "logits/chosen": -2.7541236877441406, "logits/rejected": -2.752408504486084, "logps/chosen": -225.40658569335938, "logps/rejected": -200.19308471679688, "loss": 0.9193, "rewards/accuracies": 0.578125, "rewards/chosen": -0.19588294625282288, "rewards/margins": 0.09243004769086838, "rewards/rejected": -0.28831297159194946, "step": 8640 }, { "epoch": 2.52, "learning_rate": 3.747988578012731e-08, "logits/chosen": -2.731980323791504, "logits/rejected": -2.744915723800659, "logps/chosen": -200.18807983398438, "logps/rejected": -192.57229614257812, "loss": 0.9226, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.20432892441749573, "rewards/margins": 0.10759834200143814, "rewards/rejected": -0.31192725896835327, "step": 8650 }, { "epoch": 2.53, "learning_rate": 3.703418268270406e-08, "logits/chosen": -2.7322587966918945, "logits/rejected": -2.740971326828003, "logps/chosen": -217.7086181640625, "logps/rejected": -195.62628173828125, "loss": 0.9276, "rewards/accuracies": 0.578125, "rewards/chosen": -0.23085355758666992, "rewards/margins": 0.11779451370239258, "rewards/rejected": -0.3486481010913849, "step": 8660 }, { "epoch": 2.53, "learning_rate": 3.659093348258521e-08, "logits/chosen": -2.749173879623413, "logits/rejected": -2.7466702461242676, "logps/chosen": -196.7553253173828, "logps/rejected": -182.85800170898438, "loss": 0.9168, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.2209496796131134, "rewards/margins": 0.07574382424354553, "rewards/rejected": -0.29669347405433655, "step": 8670 }, { "epoch": 2.53, "learning_rate": 3.615014328710706e-08, "logits/chosen": -2.7865371704101562, "logits/rejected": -2.7517011165618896, "logps/chosen": -191.7059783935547, "logps/rejected": -178.4337615966797, "loss": 0.9019, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.2355107069015503, "rewards/margins": 0.08629553020000458, "rewards/rejected": -0.32180625200271606, "step": 8680 }, { "epoch": 2.53, "learning_rate": 3.5711817175271744e-08, "logits/chosen": -2.7685344219207764, "logits/rejected": -2.7406980991363525, "logps/chosen": -220.50802612304688, "logps/rejected": -182.41189575195312, "loss": 0.9186, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.2188243418931961, "rewards/margins": 0.0704570859670639, "rewards/rejected": -0.28928142786026, "step": 8690 }, { "epoch": 2.54, "learning_rate": 3.5275960197689355e-08, "logits/chosen": -2.738590717315674, "logits/rejected": -2.7316675186157227, "logps/chosen": -196.73020935058594, "logps/rejected": -180.64486694335938, "loss": 0.9327, "rewards/accuracies": 0.5625, "rewards/chosen": -0.24094291031360626, "rewards/margins": 0.05457784980535507, "rewards/rejected": -0.2955207824707031, "step": 8700 }, { "epoch": 2.54, "eval_logits/chosen": -2.655764102935791, "eval_logits/rejected": -2.6505346298217773, "eval_logps/chosen": -197.75112915039062, "eval_logps/rejected": -183.8668670654297, "eval_loss": 0.9315599203109741, "eval_rewards/accuracies": 0.582426905632019, "eval_rewards/chosen": -0.21458037197589874, "eval_rewards/margins": 0.07997720688581467, "eval_rewards/rejected": -0.2945576012134552, "eval_runtime": 443.3832, "eval_samples_per_second": 26.535, "eval_steps_per_second": 3.318, "step": 8700 }, { "epoch": 2.54, "learning_rate": 3.484257737651897e-08, "logits/chosen": -2.7013044357299805, "logits/rejected": -2.7178680896759033, "logps/chosen": -172.18899536132812, "logps/rejected": -177.84359741210938, "loss": 0.9021, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.21273639798164368, "rewards/margins": 0.10589251667261124, "rewards/rejected": -0.31862884759902954, "step": 8710 }, { "epoch": 2.54, "learning_rate": 3.441167370541162e-08, "logits/chosen": -2.7266368865966797, "logits/rejected": -2.745483875274658, "logps/chosen": -196.69894409179688, "logps/rejected": -201.3649444580078, "loss": 0.913, "rewards/accuracies": 0.625, "rewards/chosen": -0.19837717711925507, "rewards/margins": 0.117425337433815, "rewards/rejected": -0.31580251455307007, "step": 8720 }, { "epoch": 2.55, "learning_rate": 3.398325414945208e-08, "logits/chosen": -2.7272849082946777, "logits/rejected": -2.738806962966919, "logps/chosen": -187.09805297851562, "logps/rejected": -176.56629943847656, "loss": 0.9094, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.24703994393348694, "rewards/margins": 0.08300556987524033, "rewards/rejected": -0.33004552125930786, "step": 8730 }, { "epoch": 2.55, "learning_rate": 3.355732364510208e-08, "logits/chosen": -2.716055393218994, "logits/rejected": -2.7345597743988037, "logps/chosen": -167.56236267089844, "logps/rejected": -176.9795684814453, "loss": 0.9222, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.21685859560966492, "rewards/margins": 0.08585675805807114, "rewards/rejected": -0.30271536111831665, "step": 8740 }, { "epoch": 2.55, "learning_rate": 3.31338871001432e-08, "logits/chosen": -2.7347230911254883, "logits/rejected": -2.741093635559082, "logps/chosen": -210.4318389892578, "logps/rejected": -184.84768676757812, "loss": 0.9008, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.20497319102287292, "rewards/margins": 0.12005607783794403, "rewards/rejected": -0.32502925395965576, "step": 8750 }, { "epoch": 2.56, "learning_rate": 3.2712949393620323e-08, "logits/chosen": -2.756697177886963, "logits/rejected": -2.735779285430908, "logps/chosen": -201.85545349121094, "logps/rejected": -180.489013671875, "loss": 0.9274, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.21515175700187683, "rewards/margins": 0.07304902374744415, "rewards/rejected": -0.2882007956504822, "step": 8760 }, { "epoch": 2.56, "learning_rate": 3.2294515375785644e-08, "logits/chosen": -2.707594394683838, "logits/rejected": -2.7309513092041016, "logps/chosen": -163.45175170898438, "logps/rejected": -170.62564086914062, "loss": 0.929, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.21401043236255646, "rewards/margins": 0.06886029243469238, "rewards/rejected": -0.28287073969841003, "step": 8770 }, { "epoch": 2.56, "learning_rate": 3.187858986804243e-08, "logits/chosen": -2.7427127361297607, "logits/rejected": -2.7254652976989746, "logps/chosen": -209.6241912841797, "logps/rejected": -183.1163330078125, "loss": 0.9239, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.21841943264007568, "rewards/margins": 0.08116400986909866, "rewards/rejected": -0.29958346486091614, "step": 8780 }, { "epoch": 2.56, "learning_rate": 3.146517766288992e-08, "logits/chosen": -2.7649950981140137, "logits/rejected": -2.734921455383301, "logps/chosen": -218.80978393554688, "logps/rejected": -192.93006896972656, "loss": 0.9264, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.23358440399169922, "rewards/margins": 0.0533999502658844, "rewards/rejected": -0.286984384059906, "step": 8790 }, { "epoch": 2.57, "learning_rate": 3.105428352386747e-08, "logits/chosen": -2.7432548999786377, "logits/rejected": -2.7458105087280273, "logps/chosen": -202.32501220703125, "logps/rejected": -191.49530029296875, "loss": 0.9221, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2086220532655716, "rewards/margins": 0.14341671764850616, "rewards/rejected": -0.35203877091407776, "step": 8800 }, { "epoch": 2.57, "eval_logits/chosen": -2.670869827270508, "eval_logits/rejected": -2.665914297103882, "eval_logps/chosen": -197.7539520263672, "eval_logps/rejected": -183.87417602539062, "eval_loss": 0.9304640889167786, "eval_rewards/accuracies": 0.5827668309211731, "eval_rewards/chosen": -0.21486559510231018, "eval_rewards/margins": 0.08042251318693161, "eval_rewards/rejected": -0.2952880859375, "eval_runtime": 443.4695, "eval_samples_per_second": 26.529, "eval_steps_per_second": 3.317, "step": 8800 }, { "epoch": 2.57, "learning_rate": 3.064591218550036e-08, "logits/chosen": -2.744394540786743, "logits/rejected": -2.749730348587036, "logps/chosen": -180.59036254882812, "logps/rejected": -174.34088134765625, "loss": 0.9156, "rewards/accuracies": 0.578125, "rewards/chosen": -0.233584925532341, "rewards/margins": 0.08018484711647034, "rewards/rejected": -0.3137698173522949, "step": 8810 }, { "epoch": 2.57, "learning_rate": 3.024006835324475e-08, "logits/chosen": -2.6970248222351074, "logits/rejected": -2.721404790878296, "logps/chosen": -180.15753173828125, "logps/rejected": -171.57437133789062, "loss": 0.9174, "rewards/accuracies": 0.59375, "rewards/chosen": -0.20657749474048615, "rewards/margins": 0.09772597253322601, "rewards/rejected": -0.30430346727371216, "step": 8820 }, { "epoch": 2.58, "learning_rate": 2.983675670343372e-08, "logits/chosen": -2.7651171684265137, "logits/rejected": -2.737391948699951, "logps/chosen": -200.2530975341797, "logps/rejected": -179.17733764648438, "loss": 0.9196, "rewards/accuracies": 0.546875, "rewards/chosen": -0.21371407806873322, "rewards/margins": 0.07342345267534256, "rewards/rejected": -0.28713753819465637, "step": 8830 }, { "epoch": 2.58, "learning_rate": 2.9435981883223244e-08, "logits/chosen": -2.759326219558716, "logits/rejected": -2.7474710941314697, "logps/chosen": -200.50869750976562, "logps/rejected": -190.08184814453125, "loss": 0.9159, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2350468635559082, "rewards/margins": 0.08074475079774857, "rewards/rejected": -0.31579163670539856, "step": 8840 }, { "epoch": 2.58, "learning_rate": 2.9037748510538585e-08, "logits/chosen": -2.74891996383667, "logits/rejected": -2.754908800125122, "logps/chosen": -193.0035400390625, "logps/rejected": -192.65682983398438, "loss": 0.9152, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.20041045546531677, "rewards/margins": 0.10050807148218155, "rewards/rejected": -0.3009185492992401, "step": 8850 }, { "epoch": 2.58, "learning_rate": 2.864206117402146e-08, "logits/chosen": -2.7324039936065674, "logits/rejected": -2.7093453407287598, "logps/chosen": -205.21630859375, "logps/rejected": -179.09365844726562, "loss": 0.9247, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2105673849582672, "rewards/margins": 0.04506702348589897, "rewards/rejected": -0.2556344270706177, "step": 8860 }, { "epoch": 2.59, "learning_rate": 2.8248924432976577e-08, "logits/chosen": -2.7505710124969482, "logits/rejected": -2.723975896835327, "logps/chosen": -229.4464874267578, "logps/rejected": -199.12144470214844, "loss": 0.9504, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.22895434498786926, "rewards/margins": 0.0746496245265007, "rewards/rejected": -0.3036039471626282, "step": 8870 }, { "epoch": 2.59, "learning_rate": 2.7858342817319803e-08, "logits/chosen": -2.740084409713745, "logits/rejected": -2.7120959758758545, "logps/chosen": -204.06201171875, "logps/rejected": -177.44509887695312, "loss": 0.9258, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.19777357578277588, "rewards/margins": 0.06542792916297913, "rewards/rejected": -0.263201504945755, "step": 8880 }, { "epoch": 2.59, "learning_rate": 2.7470320827525123e-08, "logits/chosen": -2.7446672916412354, "logits/rejected": -2.7116057872772217, "logps/chosen": -212.20352172851562, "logps/rejected": -192.9834442138672, "loss": 0.9102, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.22548964619636536, "rewards/margins": 0.09153325110673904, "rewards/rejected": -0.3170229196548462, "step": 8890 }, { "epoch": 2.6, "learning_rate": 2.7084862934573683e-08, "logits/chosen": -2.7336907386779785, "logits/rejected": -2.747609853744507, "logps/chosen": -206.77383422851562, "logps/rejected": -205.8780517578125, "loss": 0.8851, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.17482876777648926, "rewards/margins": 0.12865395843982697, "rewards/rejected": -0.3034827709197998, "step": 8900 }, { "epoch": 2.6, "eval_logits/chosen": -2.6622135639190674, "eval_logits/rejected": -2.65708589553833, "eval_logps/chosen": -197.75082397460938, "eval_logps/rejected": -183.87017822265625, "eval_loss": 0.931524932384491, "eval_rewards/accuracies": 0.5815771818161011, "eval_rewards/chosen": -0.2145525962114334, "eval_rewards/margins": 0.08033449202775955, "eval_rewards/rejected": -0.29488709568977356, "eval_runtime": 443.3905, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 8900 }, { "epoch": 2.6, "learning_rate": 2.670197357990156e-08, "logits/chosen": -2.7496495246887207, "logits/rejected": -2.767632007598877, "logps/chosen": -188.72943115234375, "logps/rejected": -182.79318237304688, "loss": 0.9185, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.20827095210552216, "rewards/margins": 0.09271486103534698, "rewards/rejected": -0.30098584294319153, "step": 8910 }, { "epoch": 2.6, "learning_rate": 2.632165717534901e-08, "logits/chosen": -2.739619493484497, "logits/rejected": -2.69582462310791, "logps/chosen": -201.65185546875, "logps/rejected": -174.01602172851562, "loss": 0.9334, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.20441868901252747, "rewards/margins": 0.06253104656934738, "rewards/rejected": -0.26694971323013306, "step": 8920 }, { "epoch": 2.6, "learning_rate": 2.5943918103109447e-08, "logits/chosen": -2.7390568256378174, "logits/rejected": -2.733745813369751, "logps/chosen": -194.0807647705078, "logps/rejected": -195.40028381347656, "loss": 0.9154, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.2364007532596588, "rewards/margins": 0.07870364189147949, "rewards/rejected": -0.3151043951511383, "step": 8930 }, { "epoch": 2.61, "learning_rate": 2.556876071567887e-08, "logits/chosen": -2.729707956314087, "logits/rejected": -2.7373297214508057, "logps/chosen": -195.1029510498047, "logps/rejected": -180.72091674804688, "loss": 0.9018, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.19179031252861023, "rewards/margins": 0.10696914047002792, "rewards/rejected": -0.29875949025154114, "step": 8940 }, { "epoch": 2.61, "learning_rate": 2.5196189335806083e-08, "logits/chosen": -2.74918794631958, "logits/rejected": -2.7424709796905518, "logps/chosen": -210.4421844482422, "logps/rejected": -199.2183380126953, "loss": 0.9257, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.21995720267295837, "rewards/margins": 0.06796015053987503, "rewards/rejected": -0.2879173159599304, "step": 8950 }, { "epoch": 2.61, "learning_rate": 2.482620825644241e-08, "logits/chosen": -2.7196192741394043, "logits/rejected": -2.7085094451904297, "logps/chosen": -177.93862915039062, "logps/rejected": -169.6486053466797, "loss": 0.921, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.2068100869655609, "rewards/margins": 0.0747736245393753, "rewards/rejected": -0.2815837264060974, "step": 8960 }, { "epoch": 2.62, "learning_rate": 2.4458821740692636e-08, "logits/chosen": -2.7299575805664062, "logits/rejected": -2.7204432487487793, "logps/chosen": -221.1160430908203, "logps/rejected": -197.26625061035156, "loss": 0.9359, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.19956819713115692, "rewards/margins": 0.10138002783060074, "rewards/rejected": -0.30094823241233826, "step": 8970 }, { "epoch": 2.62, "learning_rate": 2.409403402176541e-08, "logits/chosen": -2.755445718765259, "logits/rejected": -2.74418568611145, "logps/chosen": -213.45803833007812, "logps/rejected": -208.23281860351562, "loss": 0.9075, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.20812085270881653, "rewards/margins": 0.10565093904733658, "rewards/rejected": -0.3137717843055725, "step": 8980 }, { "epoch": 2.62, "learning_rate": 2.3731849302925126e-08, "logits/chosen": -2.7552380561828613, "logits/rejected": -2.740424633026123, "logps/chosen": -213.5357666015625, "logps/rejected": -198.0575408935547, "loss": 0.9244, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.22672665119171143, "rewards/margins": 0.07111769914627075, "rewards/rejected": -0.2978443503379822, "step": 8990 }, { "epoch": 2.63, "learning_rate": 2.3372271757442858e-08, "logits/chosen": -2.7197983264923096, "logits/rejected": -2.715674877166748, "logps/chosen": -191.84591674804688, "logps/rejected": -183.59007263183594, "loss": 0.924, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.22121064364910126, "rewards/margins": 0.08909296989440918, "rewards/rejected": -0.31030359864234924, "step": 9000 }, { "epoch": 2.63, "eval_logits/chosen": -2.6502623558044434, "eval_logits/rejected": -2.6449220180511475, "eval_logps/chosen": -197.74917602539062, "eval_logps/rejected": -183.87181091308594, "eval_loss": 0.9304214119911194, "eval_rewards/accuracies": 0.5803874731063843, "eval_rewards/chosen": -0.21438626945018768, "eval_rewards/margins": 0.08066567778587341, "eval_rewards/rejected": -0.2950519621372223, "eval_runtime": 443.376, "eval_samples_per_second": 26.535, "eval_steps_per_second": 3.318, "step": 9000 }, { "epoch": 2.63, "learning_rate": 2.301530552854869e-08, "logits/chosen": -2.7371134757995605, "logits/rejected": -2.7749149799346924, "logps/chosen": -177.41712951660156, "logps/rejected": -190.35281372070312, "loss": 0.908, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.22068405151367188, "rewards/margins": 0.09638369828462601, "rewards/rejected": -0.3170677423477173, "step": 9010 }, { "epoch": 2.63, "learning_rate": 2.266095472938376e-08, "logits/chosen": -2.7563886642456055, "logits/rejected": -2.778109073638916, "logps/chosen": -199.50352478027344, "logps/rejected": -190.46853637695312, "loss": 0.9135, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.19542989134788513, "rewards/margins": 0.11176440864801407, "rewards/rejected": -0.307194322347641, "step": 9020 }, { "epoch": 2.63, "learning_rate": 2.2309223442952885e-08, "logits/chosen": -2.733668565750122, "logits/rejected": -2.7238266468048096, "logps/chosen": -200.59725952148438, "logps/rejected": -185.13900756835938, "loss": 0.9339, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.19557544589042664, "rewards/margins": 0.06873549520969391, "rewards/rejected": -0.26431095600128174, "step": 9030 }, { "epoch": 2.64, "learning_rate": 2.1960115722077733e-08, "logits/chosen": -2.767874002456665, "logits/rejected": -2.7725155353546143, "logps/chosen": -234.43310546875, "logps/rejected": -232.8823699951172, "loss": 0.9095, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.24168884754180908, "rewards/margins": 0.10853960365056992, "rewards/rejected": -0.350228488445282, "step": 9040 }, { "epoch": 2.64, "learning_rate": 2.1613635589349756e-08, "logits/chosen": -2.73789381980896, "logits/rejected": -2.7311458587646484, "logps/chosen": -185.69686889648438, "logps/rejected": -173.32257080078125, "loss": 0.9227, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.25270745158195496, "rewards/margins": 0.06281878799200058, "rewards/rejected": -0.3155262768268585, "step": 9050 }, { "epoch": 2.64, "learning_rate": 2.1269787037084237e-08, "logits/chosen": -2.7241010665893555, "logits/rejected": -2.7217373847961426, "logps/chosen": -201.6121368408203, "logps/rejected": -185.01535034179688, "loss": 0.9272, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.21401450037956238, "rewards/margins": 0.08743976056575775, "rewards/rejected": -0.30145424604415894, "step": 9060 }, { "epoch": 2.65, "learning_rate": 2.0928574027273797e-08, "logits/chosen": -2.737255811691284, "logits/rejected": -2.6996257305145264, "logps/chosen": -216.8815155029297, "logps/rejected": -174.23831176757812, "loss": 0.941, "rewards/accuracies": 0.5625, "rewards/chosen": -0.25054770708084106, "rewards/margins": 0.06389566510915756, "rewards/rejected": -0.31444334983825684, "step": 9070 }, { "epoch": 2.65, "learning_rate": 2.0590000491543385e-08, "logits/chosen": -2.7226431369781494, "logits/rejected": -2.7095208168029785, "logps/chosen": -228.7338104248047, "logps/rejected": -202.36485290527344, "loss": 0.9055, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.21068677306175232, "rewards/margins": 0.10356806218624115, "rewards/rejected": -0.31425485014915466, "step": 9080 }, { "epoch": 2.65, "learning_rate": 2.025407033110435e-08, "logits/chosen": -2.7291300296783447, "logits/rejected": -2.7181403636932373, "logps/chosen": -221.032470703125, "logps/rejected": -185.6308135986328, "loss": 0.9268, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.22104379534721375, "rewards/margins": 0.07961587607860565, "rewards/rejected": -0.3006596863269806, "step": 9090 }, { "epoch": 2.65, "learning_rate": 1.9920787416709967e-08, "logits/chosen": -2.725501298904419, "logits/rejected": -2.7245450019836426, "logps/chosen": -204.4935302734375, "logps/rejected": -187.82420349121094, "loss": 0.9025, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.19262897968292236, "rewards/margins": 0.11257578432559967, "rewards/rejected": -0.30520474910736084, "step": 9100 }, { "epoch": 2.65, "eval_logits/chosen": -2.6463818550109863, "eval_logits/rejected": -2.64095401763916, "eval_logps/chosen": -197.75514221191406, "eval_logps/rejected": -183.8715362548828, "eval_loss": 0.9314547181129456, "eval_rewards/accuracies": 0.5790278911590576, "eval_rewards/chosen": -0.2149849534034729, "eval_rewards/margins": 0.0800388753414154, "eval_rewards/rejected": -0.2950238287448883, "eval_runtime": 443.3623, "eval_samples_per_second": 26.536, "eval_steps_per_second": 3.318, "step": 9100 }, { "epoch": 2.66, "learning_rate": 1.959015558861049e-08, "logits/chosen": -2.7422285079956055, "logits/rejected": -2.713832378387451, "logps/chosen": -191.2167205810547, "logps/rejected": -172.79429626464844, "loss": 0.9232, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2038939744234085, "rewards/margins": 0.08725843578577042, "rewards/rejected": -0.29115238785743713, "step": 9110 }, { "epoch": 2.66, "learning_rate": 1.926217865650906e-08, "logits/chosen": -2.732168674468994, "logits/rejected": -2.72629976272583, "logps/chosen": -201.04891967773438, "logps/rejected": -180.03712463378906, "loss": 0.9082, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.21944966912269592, "rewards/margins": 0.10418324172496796, "rewards/rejected": -0.3236328959465027, "step": 9120 }, { "epoch": 2.66, "learning_rate": 1.8936860399517947e-08, "logits/chosen": -2.73362398147583, "logits/rejected": -2.71791410446167, "logps/chosen": -184.18553161621094, "logps/rejected": -170.3592071533203, "loss": 0.9322, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.2177029401063919, "rewards/margins": 0.06241176277399063, "rewards/rejected": -0.28011471033096313, "step": 9130 }, { "epoch": 2.67, "learning_rate": 1.8614204566114622e-08, "logits/chosen": -2.7433769702911377, "logits/rejected": -2.768494129180908, "logps/chosen": -184.1487274169922, "logps/rejected": -171.6521759033203, "loss": 0.9207, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2070891559123993, "rewards/margins": 0.06774391233921051, "rewards/rejected": -0.2748330533504486, "step": 9140 }, { "epoch": 2.67, "learning_rate": 1.8294214874099045e-08, "logits/chosen": -2.7814841270446777, "logits/rejected": -2.751478672027588, "logps/chosen": -218.31857299804688, "logps/rejected": -192.98916625976562, "loss": 0.9206, "rewards/accuracies": 0.578125, "rewards/chosen": -0.25284838676452637, "rewards/margins": 0.07783571630716324, "rewards/rejected": -0.33068403601646423, "step": 9150 }, { "epoch": 2.67, "learning_rate": 1.7976895010550304e-08, "logits/chosen": -2.7278478145599365, "logits/rejected": -2.712846517562866, "logps/chosen": -214.8646697998047, "logps/rejected": -182.18746948242188, "loss": 0.9261, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.1966797411441803, "rewards/margins": 0.07067761570215225, "rewards/rejected": -0.26735737919807434, "step": 9160 }, { "epoch": 2.67, "learning_rate": 1.766224863178467e-08, "logits/chosen": -2.701188087463379, "logits/rejected": -2.710338830947876, "logps/chosen": -200.46682739257812, "logps/rejected": -189.87393188476562, "loss": 0.9239, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.20917856693267822, "rewards/margins": 0.08873340487480164, "rewards/rejected": -0.29791194200515747, "step": 9170 }, { "epoch": 2.68, "learning_rate": 1.7350279363312988e-08, "logits/chosen": -2.704545259475708, "logits/rejected": -2.7386326789855957, "logps/chosen": -210.10971069335938, "logps/rejected": -196.05384826660156, "loss": 0.913, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.2131265103816986, "rewards/margins": 0.09158849716186523, "rewards/rejected": -0.3047150671482086, "step": 9180 }, { "epoch": 2.68, "learning_rate": 1.7040990799799317e-08, "logits/chosen": -2.7536277770996094, "logits/rejected": -2.7470672130584717, "logps/chosen": -183.75985717773438, "logps/rejected": -184.50489807128906, "loss": 0.9058, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20402200520038605, "rewards/margins": 0.10172203928232193, "rewards/rejected": -0.3057440221309662, "step": 9190 }, { "epoch": 2.68, "learning_rate": 1.6734386505019155e-08, "logits/chosen": -2.7178406715393066, "logits/rejected": -2.720010280609131, "logps/chosen": -207.04568481445312, "logps/rejected": -187.1564483642578, "loss": 0.9348, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21785137057304382, "rewards/margins": 0.08800145983695984, "rewards/rejected": -0.30585283041000366, "step": 9200 }, { "epoch": 2.68, "eval_logits/chosen": -2.640465021133423, "eval_logits/rejected": -2.63492488861084, "eval_logps/chosen": -197.74913024902344, "eval_logps/rejected": -183.866943359375, "eval_loss": 0.930846095085144, "eval_rewards/accuracies": 0.5802175402641296, "eval_rewards/chosen": -0.21438243985176086, "eval_rewards/margins": 0.08018327504396439, "eval_rewards/rejected": -0.29456573724746704, "eval_runtime": 443.5095, "eval_samples_per_second": 26.527, "eval_steps_per_second": 3.317, "step": 9200 }, { "epoch": 2.69, "learning_rate": 1.643047001181852e-08, "logits/chosen": -2.707907199859619, "logits/rejected": -2.722409248352051, "logps/chosen": -178.1352996826172, "logps/rejected": -183.51571655273438, "loss": 0.9095, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22640416026115417, "rewards/margins": 0.11551691591739655, "rewards/rejected": -0.3419210910797119, "step": 9210 }, { "epoch": 2.69, "learning_rate": 1.6129244822073424e-08, "logits/chosen": -2.707960844039917, "logits/rejected": -2.696136951446533, "logps/chosen": -206.0924530029297, "logps/rejected": -188.82069396972656, "loss": 0.9137, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.24374035000801086, "rewards/margins": 0.07161261886358261, "rewards/rejected": -0.3153529763221741, "step": 9220 }, { "epoch": 2.69, "learning_rate": 1.5830714406649155e-08, "logits/chosen": -2.6887340545654297, "logits/rejected": -2.696369171142578, "logps/chosen": -205.6720733642578, "logps/rejected": -201.45242309570312, "loss": 0.8953, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.18328827619552612, "rewards/margins": 0.1293097734451294, "rewards/rejected": -0.3125980794429779, "step": 9230 }, { "epoch": 2.7, "learning_rate": 1.5534882205360645e-08, "logits/chosen": -2.766343355178833, "logits/rejected": -2.7414097785949707, "logps/chosen": -202.77731323242188, "logps/rejected": -172.8382110595703, "loss": 0.9419, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.22233736515045166, "rewards/margins": 0.08321109414100647, "rewards/rejected": -0.30554842948913574, "step": 9240 }, { "epoch": 2.7, "learning_rate": 1.5241751626932503e-08, "logits/chosen": -2.729731559753418, "logits/rejected": -2.709348678588867, "logps/chosen": -196.87371826171875, "logps/rejected": -176.45614624023438, "loss": 0.9371, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20812328159809113, "rewards/margins": 0.07200122624635696, "rewards/rejected": -0.2801244854927063, "step": 9250 }, { "epoch": 2.7, "learning_rate": 1.4951326048960073e-08, "logits/chosen": -2.7518060207366943, "logits/rejected": -2.7505202293395996, "logps/chosen": -209.6000518798828, "logps/rejected": -194.99954223632812, "loss": 0.9341, "rewards/accuracies": 0.578125, "rewards/chosen": -0.22044873237609863, "rewards/margins": 0.07619608938694, "rewards/rejected": -0.29664483666419983, "step": 9260 }, { "epoch": 2.7, "learning_rate": 1.4663608817870182e-08, "logits/chosen": -2.724161148071289, "logits/rejected": -2.7372405529022217, "logps/chosen": -186.85226440429688, "logps/rejected": -192.1614532470703, "loss": 0.9183, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1896825134754181, "rewards/margins": 0.08983322232961655, "rewards/rejected": -0.27951571345329285, "step": 9270 }, { "epoch": 2.71, "learning_rate": 1.4378603248882932e-08, "logits/chosen": -2.7284820079803467, "logits/rejected": -2.745692729949951, "logps/chosen": -189.42337036132812, "logps/rejected": -192.413818359375, "loss": 0.932, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.26627618074417114, "rewards/margins": 0.08315873891115189, "rewards/rejected": -0.34943491220474243, "step": 9280 }, { "epoch": 2.71, "learning_rate": 1.4096312625973162e-08, "logits/chosen": -2.7349767684936523, "logits/rejected": -2.7317709922790527, "logps/chosen": -185.63861083984375, "logps/rejected": -187.82980346679688, "loss": 0.9297, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2128647118806839, "rewards/margins": 0.06893138587474823, "rewards/rejected": -0.2817961275577545, "step": 9290 }, { "epoch": 2.71, "learning_rate": 1.3816740201832794e-08, "logits/chosen": -2.7093145847320557, "logits/rejected": -2.722029209136963, "logps/chosen": -192.80197143554688, "logps/rejected": -192.15017700195312, "loss": 0.9067, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.20303645730018616, "rewards/margins": 0.1047695130109787, "rewards/rejected": -0.30780598521232605, "step": 9300 }, { "epoch": 2.71, "eval_logits/chosen": -2.646462917327881, "eval_logits/rejected": -2.641040086746216, "eval_logps/chosen": -197.75985717773438, "eval_logps/rejected": -183.88047790527344, "eval_loss": 0.9311975240707397, "eval_rewards/accuracies": 0.5856559872627258, "eval_rewards/chosen": -0.21545521914958954, "eval_rewards/margins": 0.0804641842842102, "eval_rewards/rejected": -0.29591938853263855, "eval_runtime": 443.4347, "eval_samples_per_second": 26.532, "eval_steps_per_second": 3.317, "step": 9300 }, { "epoch": 2.72, "learning_rate": 1.3539889197833416e-08, "logits/chosen": -2.7400126457214355, "logits/rejected": -2.731029510498047, "logps/chosen": -205.12112426757812, "logps/rejected": -187.09153747558594, "loss": 0.9299, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.22620835900306702, "rewards/margins": 0.06900273263454437, "rewards/rejected": -0.2952111065387726, "step": 9310 }, { "epoch": 2.72, "learning_rate": 1.3265762803988944e-08, "logits/chosen": -2.740659236907959, "logits/rejected": -2.766366720199585, "logps/chosen": -185.3249969482422, "logps/rejected": -191.77540588378906, "loss": 0.9077, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.20000505447387695, "rewards/margins": 0.10223666578531265, "rewards/rejected": -0.302241712808609, "step": 9320 }, { "epoch": 2.72, "learning_rate": 1.299436417891911e-08, "logits/chosen": -2.743675947189331, "logits/rejected": -2.7326626777648926, "logps/chosen": -195.64309692382812, "logps/rejected": -174.15208435058594, "loss": 0.922, "rewards/accuracies": 0.640625, "rewards/chosen": -0.19279178977012634, "rewards/margins": 0.1185562014579773, "rewards/rejected": -0.31134796142578125, "step": 9330 }, { "epoch": 2.72, "learning_rate": 1.2725696449812779e-08, "logits/chosen": -2.7424731254577637, "logits/rejected": -2.733281135559082, "logps/chosen": -202.0879364013672, "logps/rejected": -185.03854370117188, "loss": 0.9254, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.19211646914482117, "rewards/margins": 0.09572719037532806, "rewards/rejected": -0.28784364461898804, "step": 9340 }, { "epoch": 2.73, "learning_rate": 1.2459762712392268e-08, "logits/chosen": -2.722184419631958, "logits/rejected": -2.7161264419555664, "logps/chosen": -187.12063598632812, "logps/rejected": -177.51832580566406, "loss": 0.9, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.2033185213804245, "rewards/margins": 0.09994320571422577, "rewards/rejected": -0.30326172709465027, "step": 9350 }, { "epoch": 2.73, "learning_rate": 1.2196566030877348e-08, "logits/chosen": -2.7002055644989014, "logits/rejected": -2.729663372039795, "logps/chosen": -167.5603790283203, "logps/rejected": -175.6070556640625, "loss": 0.9287, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21544072031974792, "rewards/margins": 0.08538533747196198, "rewards/rejected": -0.3008260726928711, "step": 9360 }, { "epoch": 2.73, "learning_rate": 1.1936109437950231e-08, "logits/chosen": -2.7116270065307617, "logits/rejected": -2.7060136795043945, "logps/chosen": -189.29359436035156, "logps/rejected": -184.63731384277344, "loss": 0.9068, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.2106679379940033, "rewards/margins": 0.1083311215043068, "rewards/rejected": -0.3189990818500519, "step": 9370 }, { "epoch": 2.74, "learning_rate": 1.1678395934720308e-08, "logits/chosen": -2.720411539077759, "logits/rejected": -2.7151317596435547, "logps/chosen": -210.22427368164062, "logps/rejected": -194.78689575195312, "loss": 0.9022, "rewards/accuracies": 0.59375, "rewards/chosen": -0.245496466755867, "rewards/margins": 0.09903912246227264, "rewards/rejected": -0.34453555941581726, "step": 9380 }, { "epoch": 2.74, "learning_rate": 1.1423428490689834e-08, "logits/chosen": -2.7146785259246826, "logits/rejected": -2.7142322063446045, "logps/chosen": -186.73641967773438, "logps/rejected": -177.76785278320312, "loss": 0.9128, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.2251826971769333, "rewards/margins": 0.09867312014102936, "rewards/rejected": -0.32385581731796265, "step": 9390 }, { "epoch": 2.74, "learning_rate": 1.1171210043719626e-08, "logits/chosen": -2.7494256496429443, "logits/rejected": -2.7391581535339355, "logps/chosen": -223.8762664794922, "logps/rejected": -191.97633361816406, "loss": 0.9263, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.22717516124248505, "rewards/margins": 0.09901183843612671, "rewards/rejected": -0.32618698477745056, "step": 9400 }, { "epoch": 2.74, "eval_logits/chosen": -2.648622751235962, "eval_logits/rejected": -2.643249750137329, "eval_logps/chosen": -197.7535858154297, "eval_logps/rejected": -183.8784637451172, "eval_loss": 0.9307305216789246, "eval_rewards/accuracies": 0.5829367637634277, "eval_rewards/chosen": -0.2148279845714569, "eval_rewards/margins": 0.08088845759630203, "eval_rewards/rejected": -0.29571646451950073, "eval_runtime": 443.3903, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 9400 }, { "epoch": 2.74, "learning_rate": 1.0921743499995139e-08, "logits/chosen": -2.749537467956543, "logits/rejected": -2.7279648780822754, "logps/chosen": -220.28036499023438, "logps/rejected": -194.126708984375, "loss": 0.9278, "rewards/accuracies": 0.59375, "rewards/chosen": -0.23800167441368103, "rewards/margins": 0.05967860668897629, "rewards/rejected": -0.2976802885532379, "step": 9410 }, { "epoch": 2.75, "learning_rate": 1.0675031733993144e-08, "logits/chosen": -2.765080690383911, "logits/rejected": -2.728818893432617, "logps/chosen": -207.89999389648438, "logps/rejected": -188.54592895507812, "loss": 0.9229, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2473161667585373, "rewards/margins": 0.0724390298128128, "rewards/rejected": -0.3197552263736725, "step": 9420 }, { "epoch": 2.75, "learning_rate": 1.0431077588448301e-08, "logits/chosen": -2.7497363090515137, "logits/rejected": -2.762665271759033, "logps/chosen": -187.5045623779297, "logps/rejected": -177.8409881591797, "loss": 0.9288, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2124253511428833, "rewards/margins": 0.09188083559274673, "rewards/rejected": -0.3043062090873718, "step": 9430 }, { "epoch": 2.75, "learning_rate": 1.018988387432082e-08, "logits/chosen": -2.740898847579956, "logits/rejected": -2.7467055320739746, "logps/chosen": -193.87625122070312, "logps/rejected": -191.6377410888672, "loss": 0.9209, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.2214924544095993, "rewards/margins": 0.08042500913143158, "rewards/rejected": -0.30191749334335327, "step": 9440 }, { "epoch": 2.76, "learning_rate": 9.951453370763725e-09, "logits/chosen": -2.716641426086426, "logits/rejected": -2.7378287315368652, "logps/chosen": -182.84176635742188, "logps/rejected": -182.30215454101562, "loss": 0.9041, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22670023143291473, "rewards/margins": 0.0805177167057991, "rewards/rejected": -0.30721792578697205, "step": 9450 }, { "epoch": 2.76, "learning_rate": 9.715788825091053e-09, "logits/chosen": -2.7388954162597656, "logits/rejected": -2.7575676441192627, "logps/chosen": -190.92247009277344, "logps/rejected": -194.45704650878906, "loss": 0.9242, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.2476111650466919, "rewards/margins": 0.07143766433000565, "rewards/rejected": -0.31904882192611694, "step": 9460 }, { "epoch": 2.76, "learning_rate": 9.482892952746069e-09, "logits/chosen": -2.7590038776397705, "logits/rejected": -2.734876871109009, "logps/chosen": -212.1120147705078, "logps/rejected": -186.57237243652344, "loss": 0.9293, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.21230432391166687, "rewards/margins": 0.08338922262191772, "rewards/rejected": -0.2956935465335846, "step": 9470 }, { "epoch": 2.77, "learning_rate": 9.252768437269965e-09, "logits/chosen": -2.738917827606201, "logits/rejected": -2.7594659328460693, "logps/chosen": -179.0370330810547, "logps/rejected": -184.1522216796875, "loss": 0.9011, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.22402581572532654, "rewards/margins": 0.12652428448200226, "rewards/rejected": -0.35055011510849, "step": 9480 }, { "epoch": 2.77, "learning_rate": 9.025417930271062e-09, "logits/chosen": -2.714035749435425, "logits/rejected": -2.717360019683838, "logps/chosen": -175.53945922851562, "logps/rejected": -170.43405151367188, "loss": 0.9164, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.2230546921491623, "rewards/margins": 0.10123791545629501, "rewards/rejected": -0.3242926597595215, "step": 9490 }, { "epoch": 2.77, "learning_rate": 8.800844051394168e-09, "logits/chosen": -2.7311530113220215, "logits/rejected": -2.7537662982940674, "logps/chosen": -188.2486572265625, "logps/rejected": -193.36257934570312, "loss": 0.912, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.2432141751050949, "rewards/margins": 0.11424192041158676, "rewards/rejected": -0.35745611786842346, "step": 9500 }, { "epoch": 2.77, "eval_logits/chosen": -2.6494829654693604, "eval_logits/rejected": -2.644101142883301, "eval_logps/chosen": -197.75807189941406, "eval_logps/rejected": -183.8787841796875, "eval_loss": 0.9305983781814575, "eval_rewards/accuracies": 0.5822569727897644, "eval_rewards/chosen": -0.21527476608753204, "eval_rewards/margins": 0.08047327399253845, "eval_rewards/rejected": -0.2957480549812317, "eval_runtime": 443.3997, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 9500 }, { "epoch": 2.77, "learning_rate": 8.579049388290432e-09, "logits/chosen": -2.753450393676758, "logits/rejected": -2.7338128089904785, "logps/chosen": -208.67062377929688, "logps/rejected": -187.44216918945312, "loss": 0.9423, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.21917076408863068, "rewards/margins": 0.09175173938274384, "rewards/rejected": -0.3109225034713745, "step": 9510 }, { "epoch": 2.78, "learning_rate": 8.360036496587397e-09, "logits/chosen": -2.716602087020874, "logits/rejected": -2.725924015045166, "logps/chosen": -194.48460388183594, "logps/rejected": -184.2608642578125, "loss": 0.9136, "rewards/accuracies": 0.5625, "rewards/chosen": -0.22760502994060516, "rewards/margins": 0.08745179325342178, "rewards/rejected": -0.31505683064460754, "step": 9520 }, { "epoch": 2.78, "learning_rate": 8.143807899859828e-09, "logits/chosen": -2.737185001373291, "logits/rejected": -2.7138681411743164, "logps/chosen": -226.47079467773438, "logps/rejected": -194.29702758789062, "loss": 0.931, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.21239247918128967, "rewards/margins": 0.07970704883337021, "rewards/rejected": -0.29209956526756287, "step": 9530 }, { "epoch": 2.78, "learning_rate": 7.930366089600321e-09, "logits/chosen": -2.7296338081359863, "logits/rejected": -2.7105979919433594, "logps/chosen": -194.48092651367188, "logps/rejected": -175.62625122070312, "loss": 0.9307, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.23873183131217957, "rewards/margins": 0.07228745520114899, "rewards/rejected": -0.31101930141448975, "step": 9540 }, { "epoch": 2.79, "learning_rate": 7.719713525190851e-09, "logits/chosen": -2.7413105964660645, "logits/rejected": -2.740273952484131, "logps/chosen": -199.0068359375, "logps/rejected": -181.02638244628906, "loss": 0.9055, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.2191927433013916, "rewards/margins": 0.1066521629691124, "rewards/rejected": -0.3258448541164398, "step": 9550 }, { "epoch": 2.79, "learning_rate": 7.511852633874299e-09, "logits/chosen": -2.757634162902832, "logits/rejected": -2.7396769523620605, "logps/chosen": -212.4727783203125, "logps/rejected": -192.638427734375, "loss": 0.9276, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.23950496315956116, "rewards/margins": 0.06648717075586319, "rewards/rejected": -0.305992066860199, "step": 9560 }, { "epoch": 2.79, "learning_rate": 7.3067858107264125e-09, "logits/chosen": -2.7161240577697754, "logits/rejected": -2.7146925926208496, "logps/chosen": -202.38369750976562, "logps/rejected": -185.82192993164062, "loss": 0.9349, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.22953566908836365, "rewards/margins": 0.08001131564378738, "rewards/rejected": -0.3095470070838928, "step": 9570 }, { "epoch": 2.79, "learning_rate": 7.104515418628443e-09, "logits/chosen": -2.74570894241333, "logits/rejected": -2.7499148845672607, "logps/chosen": -203.444091796875, "logps/rejected": -184.25234985351562, "loss": 0.9122, "rewards/accuracies": 0.578125, "rewards/chosen": -0.21463234722614288, "rewards/margins": 0.0944782942533493, "rewards/rejected": -0.3091106116771698, "step": 9580 }, { "epoch": 2.8, "learning_rate": 6.905043788239695e-09, "logits/chosen": -2.733797311782837, "logits/rejected": -2.738307476043701, "logps/chosen": -205.6097869873047, "logps/rejected": -186.2257080078125, "loss": 0.949, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24760353565216064, "rewards/margins": 0.06620237976312637, "rewards/rejected": -0.3138059377670288, "step": 9590 }, { "epoch": 2.8, "learning_rate": 6.708373217970853e-09, "logits/chosen": -2.7177987098693848, "logits/rejected": -2.7118101119995117, "logps/chosen": -204.19784545898438, "logps/rejected": -184.19650268554688, "loss": 0.9157, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.19713929295539856, "rewards/margins": 0.09181550145149231, "rewards/rejected": -0.28895479440689087, "step": 9600 }, { "epoch": 2.8, "eval_logits/chosen": -2.649334669113159, "eval_logits/rejected": -2.6439483165740967, "eval_logps/chosen": -197.7744903564453, "eval_logps/rejected": -183.8859405517578, "eval_loss": 0.931446373462677, "eval_rewards/accuracies": 0.5785180330276489, "eval_rewards/chosen": -0.2169175148010254, "eval_rewards/margins": 0.07954936474561691, "eval_rewards/rejected": -0.2964669167995453, "eval_runtime": 443.4093, "eval_samples_per_second": 26.533, "eval_steps_per_second": 3.317, "step": 9600 }, { "epoch": 2.8, "learning_rate": 6.514505973957252e-09, "logits/chosen": -2.7404611110687256, "logits/rejected": -2.7086660861968994, "logps/chosen": -198.59323120117188, "logps/rejected": -163.24148559570312, "loss": 0.9186, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2203093320131302, "rewards/margins": 0.06923945248126984, "rewards/rejected": -0.2895487844944, "step": 9610 }, { "epoch": 2.81, "learning_rate": 6.323444290033064e-09, "logits/chosen": -2.7444145679473877, "logits/rejected": -2.7581942081451416, "logps/chosen": -183.66000366210938, "logps/rejected": -187.50717163085938, "loss": 0.921, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.19854137301445007, "rewards/margins": 0.07382546365261078, "rewards/rejected": -0.27236682176589966, "step": 9620 }, { "epoch": 2.81, "learning_rate": 6.135190367705295e-09, "logits/chosen": -2.729832172393799, "logits/rejected": -2.721663236618042, "logps/chosen": -196.37266540527344, "logps/rejected": -175.19287109375, "loss": 0.9094, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.19448015093803406, "rewards/margins": 0.09932075440883636, "rewards/rejected": -0.29380089044570923, "step": 9630 }, { "epoch": 2.81, "learning_rate": 5.949746376128662e-09, "logits/chosen": -2.7646279335021973, "logits/rejected": -2.739750385284424, "logps/chosen": -203.86634826660156, "logps/rejected": -182.5919952392578, "loss": 0.9157, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.20224671065807343, "rewards/margins": 0.08542544394731522, "rewards/rejected": -0.28767213225364685, "step": 9640 }, { "epoch": 2.81, "learning_rate": 5.767114452080363e-09, "logits/chosen": -2.7331650257110596, "logits/rejected": -2.707552433013916, "logps/chosen": -195.32037353515625, "logps/rejected": -173.34744262695312, "loss": 0.9152, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.20608988404273987, "rewards/margins": 0.07609757035970688, "rewards/rejected": -0.28218746185302734, "step": 9650 }, { "epoch": 2.82, "learning_rate": 5.587296699935629e-09, "logits/chosen": -2.733536958694458, "logits/rejected": -2.727839231491089, "logps/chosen": -209.40878295898438, "logps/rejected": -185.87734985351562, "loss": 0.9242, "rewards/accuracies": 0.578125, "rewards/chosen": -0.25389450788497925, "rewards/margins": 0.058593083173036575, "rewards/rejected": -0.3124876022338867, "step": 9660 }, { "epoch": 2.82, "learning_rate": 5.410295191643349e-09, "logits/chosen": -2.7379884719848633, "logits/rejected": -2.724625587463379, "logps/chosen": -188.17678833007812, "logps/rejected": -170.9092559814453, "loss": 0.9321, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.21372437477111816, "rewards/margins": 0.07617217302322388, "rewards/rejected": -0.28989654779434204, "step": 9670 }, { "epoch": 2.82, "learning_rate": 5.236111966702345e-09, "logits/chosen": -2.724616765975952, "logits/rejected": -2.751457691192627, "logps/chosen": -181.1899871826172, "logps/rejected": -180.98452758789062, "loss": 0.9117, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.21435685455799103, "rewards/margins": 0.09840370714664459, "rewards/rejected": -0.31276053190231323, "step": 9680 }, { "epoch": 2.83, "learning_rate": 5.064749032137744e-09, "logits/chosen": -2.7458510398864746, "logits/rejected": -2.7312254905700684, "logps/chosen": -204.89797973632812, "logps/rejected": -188.15606689453125, "loss": 0.9283, "rewards/accuracies": 0.5625, "rewards/chosen": -0.18471845984458923, "rewards/margins": 0.10320683568716049, "rewards/rejected": -0.2879253029823303, "step": 9690 }, { "epoch": 2.83, "learning_rate": 4.896208362477838e-09, "logits/chosen": -2.7029900550842285, "logits/rejected": -2.713294267654419, "logps/chosen": -195.61605834960938, "logps/rejected": -188.32113647460938, "loss": 0.9094, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.18786336481571198, "rewards/margins": 0.10033879429101944, "rewards/rejected": -0.28820210695266724, "step": 9700 }, { "epoch": 2.83, "eval_logits/chosen": -2.649444580078125, "eval_logits/rejected": -2.6440680027008057, "eval_logps/chosen": -197.7625274658203, "eval_logps/rejected": -183.88259887695312, "eval_loss": 0.9309257864952087, "eval_rewards/accuracies": 0.5831067562103271, "eval_rewards/chosen": -0.21572300791740417, "eval_rewards/margins": 0.08040815591812134, "eval_rewards/rejected": -0.2961311936378479, "eval_runtime": 443.3908, "eval_samples_per_second": 26.534, "eval_steps_per_second": 3.318, "step": 9700 }, { "epoch": 2.83, "learning_rate": 4.730491899731487e-09, "logits/chosen": -2.734896183013916, "logits/rejected": -2.7162668704986572, "logps/chosen": -209.939453125, "logps/rejected": -182.87033081054688, "loss": 0.9165, "rewards/accuracies": 0.578125, "rewards/chosen": -0.22276946902275085, "rewards/margins": 0.08350035548210144, "rewards/rejected": -0.3062697947025299, "step": 9710 }, { "epoch": 2.84, "learning_rate": 4.567601553365608e-09, "logits/chosen": -2.740227460861206, "logits/rejected": -2.737812042236328, "logps/chosen": -206.8555908203125, "logps/rejected": -187.4201202392578, "loss": 0.922, "rewards/accuracies": 0.59375, "rewards/chosen": -0.224287748336792, "rewards/margins": 0.08504447340965271, "rewards/rejected": -0.3093322217464447, "step": 9720 }, { "epoch": 2.84, "learning_rate": 4.407539200283167e-09, "logits/chosen": -2.7353711128234863, "logits/rejected": -2.7307136058807373, "logps/chosen": -200.0714569091797, "logps/rejected": -183.77415466308594, "loss": 0.923, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.19830277562141418, "rewards/margins": 0.08112906664609909, "rewards/rejected": -0.27943187952041626, "step": 9730 }, { "epoch": 2.84, "learning_rate": 4.250306684801696e-09, "logits/chosen": -2.771733045578003, "logits/rejected": -2.7563509941101074, "logps/chosen": -206.34060668945312, "logps/rejected": -193.19229125976562, "loss": 0.9129, "rewards/accuracies": 0.59375, "rewards/chosen": -0.23825040459632874, "rewards/margins": 0.09162227809429169, "rewards/rejected": -0.32987266778945923, "step": 9740 }, { "epoch": 2.84, "learning_rate": 4.095905818631784e-09, "logits/chosen": -2.7209630012512207, "logits/rejected": -2.75238037109375, "logps/chosen": -176.484130859375, "logps/rejected": -178.44244384765625, "loss": 0.9192, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22507627308368683, "rewards/margins": 0.07493752241134644, "rewards/rejected": -0.30001378059387207, "step": 9750 }, { "epoch": 2.85, "learning_rate": 3.9443383808565326e-09, "logits/chosen": -2.765580177307129, "logits/rejected": -2.7446515560150146, "logps/chosen": -209.17422485351562, "logps/rejected": -183.21139526367188, "loss": 0.9416, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.2306930124759674, "rewards/margins": 0.06615431606769562, "rewards/rejected": -0.29684728384017944, "step": 9760 }, { "epoch": 2.85, "learning_rate": 3.795606117910743e-09, "logits/chosen": -2.7446208000183105, "logits/rejected": -2.7233622074127197, "logps/chosen": -183.44345092773438, "logps/rejected": -163.6812744140625, "loss": 0.9275, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.231636643409729, "rewards/margins": 0.06933009624481201, "rewards/rejected": -0.30096670985221863, "step": 9770 }, { "epoch": 2.85, "learning_rate": 3.6497107435610462e-09, "logits/chosen": -2.709591865539551, "logits/rejected": -2.7183585166931152, "logps/chosen": -194.71524047851562, "logps/rejected": -182.2717742919922, "loss": 0.9439, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.22313237190246582, "rewards/margins": 0.057538557797670364, "rewards/rejected": -0.2806709110736847, "step": 9780 }, { "epoch": 2.86, "learning_rate": 3.5066539388859116e-09, "logits/chosen": -2.747555732727051, "logits/rejected": -2.7734694480895996, "logps/chosen": -205.0553741455078, "logps/rejected": -202.21759033203125, "loss": 0.9144, "rewards/accuracies": 0.59375, "rewards/chosen": -0.2210444211959839, "rewards/margins": 0.11726301908493042, "rewards/rejected": -0.3383074402809143, "step": 9790 }, { "epoch": 2.86, "learning_rate": 3.366437352256557e-09, "logits/chosen": -2.7042324542999268, "logits/rejected": -2.7263922691345215, "logps/chosen": -183.5642547607422, "logps/rejected": -185.1039276123047, "loss": 0.9256, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.21685466170310974, "rewards/margins": 0.10197044909000397, "rewards/rejected": -0.3188251256942749, "step": 9800 }, { "epoch": 2.86, "eval_logits/chosen": -2.649299144744873, "eval_logits/rejected": -2.6439149379730225, "eval_logps/chosen": -197.76528930664062, "eval_logps/rejected": -183.88673400878906, "eval_loss": 0.9303866028785706, "eval_rewards/accuracies": 0.5837865471839905, "eval_rewards/chosen": -0.21599791944026947, "eval_rewards/margins": 0.08054690808057785, "eval_rewards/rejected": -0.2965448200702667, "eval_runtime": 443.4139, "eval_samples_per_second": 26.533, "eval_steps_per_second": 3.317, "step": 9800 }, { "epoch": 2.86, "learning_rate": 3.229062599317656e-09, "logits/chosen": -2.721593141555786, "logits/rejected": -2.737192153930664, "logps/chosen": -176.07333374023438, "logps/rejected": -174.5138702392578, "loss": 0.9269, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2209792584180832, "rewards/margins": 0.06416002660989761, "rewards/rejected": -0.285139262676239, "step": 9810 }, { "epoch": 2.86, "learning_rate": 3.094531262969019e-09, "logits/chosen": -2.754240036010742, "logits/rejected": -2.7274465560913086, "logps/chosen": -206.8941650390625, "logps/rejected": -177.24270629882812, "loss": 0.9209, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.188827782869339, "rewards/margins": 0.09714638441801071, "rewards/rejected": -0.2859741747379303, "step": 9820 }, { "epoch": 2.87, "learning_rate": 2.9628448933470827e-09, "logits/chosen": -2.7514803409576416, "logits/rejected": -2.7271556854248047, "logps/chosen": -219.15768432617188, "logps/rejected": -199.83834838867188, "loss": 0.9092, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.2370646893978119, "rewards/margins": 0.10409317165613174, "rewards/rejected": -0.34115785360336304, "step": 9830 }, { "epoch": 2.87, "learning_rate": 2.8340050078072275e-09, "logits/chosen": -2.7178354263305664, "logits/rejected": -2.728059768676758, "logps/chosen": -180.62649536132812, "logps/rejected": -178.77957153320312, "loss": 0.9101, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.1962624341249466, "rewards/margins": 0.09259771555662155, "rewards/rejected": -0.28886014223098755, "step": 9840 }, { "epoch": 2.87, "learning_rate": 2.708013090906236e-09, "logits/chosen": -2.7312958240509033, "logits/rejected": -2.723904609680176, "logps/chosen": -200.50428771972656, "logps/rejected": -188.86557006835938, "loss": 0.9225, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.2286476194858551, "rewards/margins": 0.06230293586850166, "rewards/rejected": -0.2909505367279053, "step": 9850 }, { "epoch": 2.88, "learning_rate": 2.5848705943851966e-09, "logits/chosen": -2.7521772384643555, "logits/rejected": -2.7468342781066895, "logps/chosen": -208.4209747314453, "logps/rejected": -195.09420776367188, "loss": 0.8946, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.20935706794261932, "rewards/margins": 0.11841585487127304, "rewards/rejected": -0.32777291536331177, "step": 9860 }, { "epoch": 2.88, "learning_rate": 2.464578937152767e-09, "logits/chosen": -2.707209825515747, "logits/rejected": -2.711197853088379, "logps/chosen": -182.0824737548828, "logps/rejected": -170.37973022460938, "loss": 0.9154, "rewards/accuracies": 0.59375, "rewards/chosen": -0.2167019098997116, "rewards/margins": 0.10509312152862549, "rewards/rejected": -0.3217950463294983, "step": 9870 }, { "epoch": 2.88, "learning_rate": 2.347139505268769e-09, "logits/chosen": -2.7392303943634033, "logits/rejected": -2.7479453086853027, "logps/chosen": -175.31564331054688, "logps/rejected": -187.35633850097656, "loss": 0.9075, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.197030708193779, "rewards/margins": 0.1033674031496048, "rewards/rejected": -0.3003981113433838, "step": 9880 }, { "epoch": 2.88, "learning_rate": 2.2325536519283983e-09, "logits/chosen": -2.7220139503479004, "logits/rejected": -2.730076789855957, "logps/chosen": -203.31546020507812, "logps/rejected": -197.3459014892578, "loss": 0.8875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.207614466547966, "rewards/margins": 0.10640215873718262, "rewards/rejected": -0.3140166401863098, "step": 9890 }, { "epoch": 2.89, "learning_rate": 2.120822697446345e-09, "logits/chosen": -2.755768299102783, "logits/rejected": -2.7644848823547363, "logps/chosen": -191.22451782226562, "logps/rejected": -191.1256866455078, "loss": 0.9287, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.21304388344287872, "rewards/margins": 0.08656670153141022, "rewards/rejected": -0.29961055517196655, "step": 9900 }, { "epoch": 2.89, "eval_logits/chosen": -2.6493735313415527, "eval_logits/rejected": -2.643993616104126, "eval_logps/chosen": -197.75450134277344, "eval_logps/rejected": -183.87619018554688, "eval_loss": 0.9304879307746887, "eval_rewards/accuracies": 0.5832766890525818, "eval_rewards/chosen": -0.21492087841033936, "eval_rewards/margins": 0.0805683583021164, "eval_rewards/rejected": -0.29548925161361694, "eval_runtime": 443.2716, "eval_samples_per_second": 26.541, "eval_steps_per_second": 3.319, "step": 9900 }, { "epoch": 2.89, "learning_rate": 2.0119479292419472e-09, "logits/chosen": -2.707951784133911, "logits/rejected": -2.7130141258239746, "logps/chosen": -199.61636352539062, "logps/rejected": -174.99508666992188, "loss": 0.9208, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.18702030181884766, "rewards/margins": 0.08625749498605728, "rewards/rejected": -0.27327775955200195, "step": 9910 }, { "epoch": 2.89, "learning_rate": 1.905930601823952e-09, "logits/chosen": -2.758725643157959, "logits/rejected": -2.730515956878662, "logps/chosen": -210.56680297851562, "logps/rejected": -177.89271545410156, "loss": 0.9393, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19237715005874634, "rewards/margins": 0.09237701445817947, "rewards/rejected": -0.2847541868686676, "step": 9920 }, { "epoch": 2.9, "learning_rate": 1.8027719367763871e-09, "logits/chosen": -2.745802402496338, "logits/rejected": -2.7457292079925537, "logps/chosen": -208.337646484375, "logps/rejected": -192.1747283935547, "loss": 0.9263, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.23373830318450928, "rewards/margins": 0.0824032798409462, "rewards/rejected": -0.3161415457725525, "step": 9930 }, { "epoch": 2.9, "learning_rate": 1.7024731227443523e-09, "logits/chosen": -2.714531183242798, "logits/rejected": -2.7135071754455566, "logps/chosen": -175.10997009277344, "logps/rejected": -184.83316040039062, "loss": 0.9294, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.20745953917503357, "rewards/margins": 0.07404839992523193, "rewards/rejected": -0.2815079391002655, "step": 9940 }, { "epoch": 2.9, "learning_rate": 1.6050353154202778e-09, "logits/chosen": -2.759021043777466, "logits/rejected": -2.7469115257263184, "logps/chosen": -211.49734497070312, "logps/rejected": -189.22146606445312, "loss": 0.9401, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.2346908301115036, "rewards/margins": 0.057391781359910965, "rewards/rejected": -0.29208260774612427, "step": 9950 }, { "epoch": 2.91, "learning_rate": 1.5104596375307143e-09, "logits/chosen": -2.7347006797790527, "logits/rejected": -2.7453768253326416, "logps/chosen": -200.760009765625, "logps/rejected": -187.3798828125, "loss": 0.9056, "rewards/accuracies": 0.609375, "rewards/chosen": -0.22261445224285126, "rewards/margins": 0.08408321440219879, "rewards/rejected": -0.30669766664505005, "step": 9960 }, { "epoch": 2.91, "learning_rate": 1.4187471788232873e-09, "logits/chosen": -2.7443184852600098, "logits/rejected": -2.7420456409454346, "logps/chosen": -195.6764373779297, "logps/rejected": -179.39016723632812, "loss": 0.9024, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20467519760131836, "rewards/margins": 0.10200591385364532, "rewards/rejected": -0.3066811263561249, "step": 9970 }, { "epoch": 2.91, "learning_rate": 1.329898996054235e-09, "logits/chosen": -2.72369122505188, "logits/rejected": -2.7290115356445312, "logps/chosen": -206.4029998779297, "logps/rejected": -194.41281127929688, "loss": 0.9374, "rewards/accuracies": 0.578125, "rewards/chosen": -0.20877929031848907, "rewards/margins": 0.09340817481279373, "rewards/rejected": -0.302187442779541, "step": 9980 }, { "epoch": 2.91, "learning_rate": 1.2439161129762232e-09, "logits/chosen": -2.751157760620117, "logits/rejected": -2.7274537086486816, "logps/chosen": -197.70578002929688, "logps/rejected": -176.07757568359375, "loss": 0.9335, "rewards/accuracies": 0.578125, "rewards/chosen": -0.22376060485839844, "rewards/margins": 0.0929488092660904, "rewards/rejected": -0.31670939922332764, "step": 9990 }, { "epoch": 2.92, "learning_rate": 1.1607995203264943e-09, "logits/chosen": -2.704770088195801, "logits/rejected": -2.7196052074432373, "logps/chosen": -191.6173095703125, "logps/rejected": -188.82199096679688, "loss": 0.9296, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20596985518932343, "rewards/margins": 0.07100576907396317, "rewards/rejected": -0.2769756019115448, "step": 10000 }, { "epoch": 2.92, "eval_logits/chosen": -2.64931583404541, "eval_logits/rejected": -2.6439385414123535, "eval_logps/chosen": -197.76206970214844, "eval_logps/rejected": -183.87411499023438, "eval_loss": 0.9310234189033508, "eval_rewards/accuracies": 0.5795377492904663, "eval_rewards/chosen": -0.215674489736557, "eval_rewards/margins": 0.07960996776819229, "eval_rewards/rejected": -0.2952844798564911, "eval_runtime": 443.2852, "eval_samples_per_second": 26.54, "eval_steps_per_second": 3.318, "step": 10000 }, { "epoch": 2.92, "learning_rate": 1.0805501758154311e-09, "logits/chosen": -2.756335496902466, "logits/rejected": -2.7353618144989014, "logps/chosen": -209.03677368164062, "logps/rejected": -181.5239715576172, "loss": 0.9188, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.2341901808977127, "rewards/margins": 0.07234900444746017, "rewards/rejected": -0.3065391480922699, "step": 10010 }, { "epoch": 2.92, "learning_rate": 1.003169004115595e-09, "logits/chosen": -2.767810344696045, "logits/rejected": -2.7451963424682617, "logps/chosen": -209.06796264648438, "logps/rejected": -184.99880981445312, "loss": 0.8944, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.20936338603496552, "rewards/margins": 0.13027939200401306, "rewards/rejected": -0.33964279294013977, "step": 10020 }, { "epoch": 2.93, "learning_rate": 9.28656896851121e-10, "logits/chosen": -2.74072003364563, "logits/rejected": -2.7295937538146973, "logps/chosen": -202.04502868652344, "logps/rejected": -175.9278106689453, "loss": 0.9223, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.22666554152965546, "rewards/margins": 0.08890683203935623, "rewards/rejected": -0.3155723512172699, "step": 10030 }, { "epoch": 2.93, "learning_rate": 8.570147125872284e-10, "logits/chosen": -2.743962049484253, "logits/rejected": -2.744354248046875, "logps/chosen": -185.90194702148438, "logps/rejected": -182.25119018554688, "loss": 0.9162, "rewards/accuracies": 0.609375, "rewards/chosen": -0.22167591750621796, "rewards/margins": 0.09675368666648865, "rewards/rejected": -0.3184296488761902, "step": 10040 }, { "epoch": 2.93, "learning_rate": 7.88243276820616e-10, "logits/chosen": -2.752854108810425, "logits/rejected": -2.7184722423553467, "logps/chosen": -198.3429412841797, "logps/rejected": -167.09361267089844, "loss": 0.9386, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.24509263038635254, "rewards/margins": 0.05374573543667793, "rewards/rejected": -0.2988383173942566, "step": 10050 }, { "epoch": 2.93, "learning_rate": 7.223433819696645e-10, "logits/chosen": -2.701383590698242, "logits/rejected": -2.7218756675720215, "logps/chosen": -175.10391235351562, "logps/rejected": -178.0584259033203, "loss": 0.9282, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.23060958087444305, "rewards/margins": 0.07636566460132599, "rewards/rejected": -0.30697527527809143, "step": 10060 }, { "epoch": 2.94, "learning_rate": 6.593157873654998e-10, "logits/chosen": -2.7408900260925293, "logits/rejected": -2.7404873371124268, "logps/chosen": -189.15286254882812, "logps/rejected": -178.94862365722656, "loss": 0.9359, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.2220519483089447, "rewards/margins": 0.058012705296278, "rewards/rejected": -0.2800647020339966, "step": 10070 }, { "epoch": 2.94, "learning_rate": 5.991612192432216e-10, "logits/chosen": -2.7326502799987793, "logits/rejected": -2.7282063961029053, "logps/chosen": -215.2332763671875, "logps/rejected": -203.8656463623047, "loss": 0.9242, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.22559790313243866, "rewards/margins": 0.08096420019865036, "rewards/rejected": -0.3065621256828308, "step": 10080 }, { "epoch": 2.94, "learning_rate": 5.418803707334385e-10, "logits/chosen": -2.7055397033691406, "logits/rejected": -2.7279880046844482, "logps/chosen": -189.23861694335938, "logps/rejected": -194.57003784179688, "loss": 0.9041, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.21456527709960938, "rewards/margins": 0.09282436221837997, "rewards/rejected": -0.30738964676856995, "step": 10090 }, { "epoch": 2.95, "learning_rate": 4.874739018544128e-10, "logits/chosen": -2.7319326400756836, "logits/rejected": -2.7255971431732178, "logps/chosen": -198.3050079345703, "logps/rejected": -183.6264190673828, "loss": 0.9335, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.2186700403690338, "rewards/margins": 0.09482128173112869, "rewards/rejected": -0.3134912848472595, "step": 10100 }, { "epoch": 2.95, "eval_logits/chosen": -2.649319648742676, "eval_logits/rejected": -2.643939733505249, "eval_logps/chosen": -197.7578125, "eval_logps/rejected": -183.87391662597656, "eval_loss": 0.9310972690582275, "eval_rewards/accuracies": 0.581237256526947, "eval_rewards/chosen": -0.21525147557258606, "eval_rewards/margins": 0.08001116663217545, "eval_rewards/rejected": -0.2952626645565033, "eval_runtime": 443.3564, "eval_samples_per_second": 26.536, "eval_steps_per_second": 3.318, "step": 10100 }, { "epoch": 2.95, "learning_rate": 4.3594243950428876e-10, "logits/chosen": -2.7047019004821777, "logits/rejected": -2.7111079692840576, "logps/chosen": -176.3616943359375, "logps/rejected": -184.66732788085938, "loss": 0.9232, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21429534256458282, "rewards/margins": 0.07300708442926407, "rewards/rejected": -0.2873024344444275, "step": 10110 }, { "epoch": 2.95, "learning_rate": 3.8728657745407123e-10, "logits/chosen": -2.7484915256500244, "logits/rejected": -2.733851194381714, "logps/chosen": -202.58200073242188, "logps/rejected": -190.02320861816406, "loss": 0.9252, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2536359429359436, "rewards/margins": 0.07397626340389252, "rewards/rejected": -0.3276122212409973, "step": 10120 }, { "epoch": 2.95, "learning_rate": 3.4150687634057484e-10, "logits/chosen": -2.721923589706421, "logits/rejected": -2.744096040725708, "logps/chosen": -189.35020446777344, "logps/rejected": -189.91268920898438, "loss": 0.9022, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.20822253823280334, "rewards/margins": 0.09155464917421341, "rewards/rejected": -0.29977720975875854, "step": 10130 }, { "epoch": 2.96, "learning_rate": 2.986038636601795e-10, "logits/chosen": -2.7409708499908447, "logits/rejected": -2.7214558124542236, "logps/chosen": -202.751953125, "logps/rejected": -177.0704803466797, "loss": 0.93, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.23496882617473602, "rewards/margins": 0.04633781313896179, "rewards/rejected": -0.281306654214859, "step": 10140 }, { "epoch": 2.96, "learning_rate": 2.585780337625576e-10, "logits/chosen": -2.738527774810791, "logits/rejected": -2.751260757446289, "logps/chosen": -183.45188903808594, "logps/rejected": -166.71090698242188, "loss": 0.9162, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.189891055226326, "rewards/margins": 0.09421588480472565, "rewards/rejected": -0.28410694003105164, "step": 10150 }, { "epoch": 2.96, "learning_rate": 2.2142984784506713e-10, "logits/chosen": -2.7730815410614014, "logits/rejected": -2.747037887573242, "logps/chosen": -215.1787109375, "logps/rejected": -189.70123291015625, "loss": 0.9302, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.23184502124786377, "rewards/margins": 0.081792913377285, "rewards/rejected": -0.313637912273407, "step": 10160 }, { "epoch": 2.97, "learning_rate": 1.8715973394745065e-10, "logits/chosen": -2.740107774734497, "logits/rejected": -2.7407820224761963, "logps/chosen": -201.0320587158203, "logps/rejected": -187.3585968017578, "loss": 0.9204, "rewards/accuracies": 0.578125, "rewards/chosen": -0.19122305512428284, "rewards/margins": 0.08361539244651794, "rewards/rejected": -0.2748384475708008, "step": 10170 }, { "epoch": 2.97, "learning_rate": 1.557680869468947e-10, "logits/chosen": -2.7282848358154297, "logits/rejected": -2.7195122241973877, "logps/chosen": -199.21316528320312, "logps/rejected": -178.7677001953125, "loss": 0.9343, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.2206597775220871, "rewards/margins": 0.0789443626999855, "rewards/rejected": -0.2996041476726532, "step": 10180 }, { "epoch": 2.97, "learning_rate": 1.2725526855347778e-10, "logits/chosen": -2.7190909385681152, "logits/rejected": -2.721194267272949, "logps/chosen": -170.80845642089844, "logps/rejected": -161.698486328125, "loss": 0.9148, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.2027992457151413, "rewards/margins": 0.08219794183969498, "rewards/rejected": -0.28499719500541687, "step": 10190 }, { "epoch": 2.98, "learning_rate": 1.0162160730592395e-10, "logits/chosen": -2.729407787322998, "logits/rejected": -2.72074294090271, "logps/chosen": -211.5586700439453, "logps/rejected": -196.6857147216797, "loss": 0.9321, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.21750383079051971, "rewards/margins": 0.06289499253034592, "rewards/rejected": -0.28039878606796265, "step": 10200 }, { "epoch": 2.98, "eval_logits/chosen": -2.6493217945098877, "eval_logits/rejected": -2.64394211769104, "eval_logps/chosen": -197.75448608398438, "eval_logps/rejected": -183.87586975097656, "eval_loss": 0.9304989576339722, "eval_rewards/accuracies": 0.582426905632019, "eval_rewards/chosen": -0.21491758525371552, "eval_rewards/margins": 0.08054331690073013, "eval_rewards/rejected": -0.29546090960502625, "eval_runtime": 443.4554, "eval_samples_per_second": 26.53, "eval_steps_per_second": 3.317, "step": 10200 }, { "epoch": 2.98, "learning_rate": 7.886739856796664e-11, "logits/chosen": -2.7204930782318115, "logits/rejected": -2.7524588108062744, "logps/chosen": -195.971923828125, "logps/rejected": -206.78628540039062, "loss": 0.9028, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.2521055042743683, "rewards/margins": 0.09555664658546448, "rewards/rejected": -0.347662091255188, "step": 10210 }, { "epoch": 2.98, "learning_rate": 5.899290452485162e-11, "logits/chosen": -2.7260775566101074, "logits/rejected": -2.7510170936584473, "logps/chosen": -175.15951538085938, "logps/rejected": -181.86631774902344, "loss": 0.9093, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.20711970329284668, "rewards/margins": 0.1187393069267273, "rewards/rejected": -0.325859010219574, "step": 10220 }, { "epoch": 2.98, "learning_rate": 4.199835418025599e-11, "logits/chosen": -2.714921474456787, "logits/rejected": -2.738410472869873, "logps/chosen": -189.37632751464844, "logps/rejected": -192.016845703125, "loss": 0.9309, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.23209278285503387, "rewards/margins": 0.10059485584497452, "rewards/rejected": -0.332687646150589, "step": 10230 }, { "epoch": 2.99, "learning_rate": 2.7883943353845807e-11, "logits/chosen": -2.7257180213928223, "logits/rejected": -2.709995746612549, "logps/chosen": -182.7290802001953, "logps/rejected": -177.0494384765625, "loss": 0.9311, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.25530582666397095, "rewards/margins": 0.062161706387996674, "rewards/rejected": -0.31746751070022583, "step": 10240 }, { "epoch": 2.99, "learning_rate": 1.6649834678778006e-11, "logits/chosen": -2.72110915184021, "logits/rejected": -2.7213776111602783, "logps/chosen": -215.32852172851562, "logps/rejected": -185.47840881347656, "loss": 0.9248, "rewards/accuracies": 0.578125, "rewards/chosen": -0.22247782349586487, "rewards/margins": 0.09786356985569, "rewards/rejected": -0.3203413486480713, "step": 10250 }, { "epoch": 2.99, "learning_rate": 8.296157600035103e-12, "logits/chosen": -2.7592031955718994, "logits/rejected": -2.7389073371887207, "logps/chosen": -226.4311981201172, "logps/rejected": -197.40982055664062, "loss": 0.9225, "rewards/accuracies": 0.578125, "rewards/chosen": -0.1961437165737152, "rewards/margins": 0.10170602798461914, "rewards/rejected": -0.29784974455833435, "step": 10260 }, { "epoch": 3.0, "learning_rate": 2.8230083727875944e-12, "logits/chosen": -2.728482246398926, "logits/rejected": -2.7387022972106934, "logps/chosen": -210.0655059814453, "logps/rejected": -200.79238891601562, "loss": 0.9069, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2141614854335785, "rewards/margins": 0.12801960110664368, "rewards/rejected": -0.34218111634254456, "step": 10270 }, { "epoch": 3.0, "learning_rate": 2.304500613947713e-13, "logits/chosen": -2.7390356063842773, "logits/rejected": -2.744161605834961, "logps/chosen": -211.9443817138672, "logps/rejected": -208.78115844726562, "loss": 0.9324, "rewards/accuracies": 0.59375, "rewards/chosen": -0.19223544001579285, "rewards/margins": 0.08548939228057861, "rewards/rejected": -0.27772483229637146, "step": 10280 }, { "epoch": 3.0, "step": 10284, "total_flos": 0.0, "train_loss": 0.9410081987063832, "train_runtime": 92137.4772, "train_samples_per_second": 7.144, "train_steps_per_second": 0.112 } ], "logging_steps": 10, "max_steps": 10284, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }