{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3333333333333334e-08, "logits/chosen": -1.381319522857666, "logits/rejected": -0.9757366180419922, "logps/chosen": -223.25863647460938, "logps/rejected": -830.5400390625, "loss": 0.2593, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.3333333333333336e-07, "logits/chosen": -1.736572504043579, "logits/rejected": -1.0549728870391846, "logps/chosen": -406.9079284667969, "logps/rejected": -761.596435546875, "loss": 0.1822, "rewards/accuracies": 0.5555555820465088, "rewards/chosen": 0.00039627417572773993, "rewards/margins": 0.000484730233438313, "rewards/rejected": -8.845605771057308e-05, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.666666666666667e-07, "logits/chosen": -1.6399459838867188, "logits/rejected": -1.0379071235656738, "logps/chosen": -483.6226501464844, "logps/rejected": -819.0009765625, "loss": 0.1801, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 3.848170308629051e-05, "rewards/margins": 0.00036858199746347964, "rewards/rejected": -0.00033010030165314674, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.0000000000000003e-07, "logits/chosen": -1.7753417491912842, "logits/rejected": -1.3355859518051147, "logps/chosen": -443.94390869140625, "logps/rejected": -788.3363647460938, "loss": 0.2323, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0017691084649413824, "rewards/margins": 0.0024432786740362644, "rewards/rejected": -0.0006741699180565774, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.333333333333335e-07, "logits/chosen": -1.5635123252868652, "logits/rejected": -0.9124569892883301, "logps/chosen": -458.33428955078125, "logps/rejected": -747.6420288085938, "loss": 0.2195, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.004033350385725498, "rewards/margins": 0.006722611375153065, "rewards/rejected": -0.0026892595924437046, "step": 40 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-07, "logits/chosen": -1.631588339805603, "logits/rejected": -0.8681947588920593, "logps/chosen": -465.05731201171875, "logps/rejected": -838.4075927734375, "loss": 0.2014, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.008273603394627571, "rewards/margins": 0.015597726218402386, "rewards/rejected": -0.0073241242207586765, "step": 50 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-07, "logits/chosen": -1.4628058671951294, "logits/rejected": -1.2347371578216553, "logps/chosen": -343.9599304199219, "logps/rejected": -739.0056762695312, "loss": 0.1761, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0036266068927943707, "rewards/margins": 0.022512439638376236, "rewards/rejected": -0.01888582855463028, "step": 60 }, { "epoch": 0.02, "learning_rate": 9.333333333333334e-07, "logits/chosen": -1.8094412088394165, "logits/rejected": -0.9877569079399109, "logps/chosen": -497.0489807128906, "logps/rejected": -864.1619262695312, "loss": 0.1809, "rewards/accuracies": 0.875, "rewards/chosen": 0.003260440658777952, "rewards/margins": 0.046338800340890884, "rewards/rejected": -0.04307835176587105, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.066666666666667e-06, "logits/chosen": -1.6897491216659546, "logits/rejected": -1.0848586559295654, "logps/chosen": -560.68017578125, "logps/rejected": -1089.6458740234375, "loss": 0.1443, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.006865059025585651, "rewards/margins": 0.08565281331539154, "rewards/rejected": -0.09251787513494492, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.2000000000000002e-06, "logits/chosen": -1.7690013647079468, "logits/rejected": -0.9375957250595093, "logps/chosen": -427.4967346191406, "logps/rejected": -953.2610473632812, "loss": 0.1582, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.010322836227715015, "rewards/margins": 0.07507754862308502, "rewards/rejected": -0.08540038764476776, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.3333333333333334e-06, "logits/chosen": -1.4934628009796143, "logits/rejected": -0.9881563186645508, "logps/chosen": -397.26727294921875, "logps/rejected": -905.0123901367188, "loss": 0.1339, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.01196499913930893, "rewards/margins": 0.11481380462646484, "rewards/rejected": -0.12677881121635437, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.4666666666666669e-06, "logits/chosen": -1.559560775756836, "logits/rejected": -0.9702051877975464, "logps/chosen": -446.76849365234375, "logps/rejected": -964.1668090820312, "loss": 0.1009, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.029861677438020706, "rewards/margins": 0.14626939594745636, "rewards/rejected": -0.17613105475902557, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.6000000000000001e-06, "logits/chosen": -1.7105035781860352, "logits/rejected": -0.9925721287727356, "logps/chosen": -542.6316528320312, "logps/rejected": -977.3997192382812, "loss": 0.1034, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06414582580327988, "rewards/margins": 0.1390438973903656, "rewards/rejected": -0.2031897008419037, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.7333333333333336e-06, "logits/chosen": -1.7129449844360352, "logits/rejected": -0.9808734655380249, "logps/chosen": -639.7268676757812, "logps/rejected": -1264.408203125, "loss": 0.0778, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1564100980758667, "rewards/margins": 0.22008244693279266, "rewards/rejected": -0.37649255990982056, "step": 130 }, { "epoch": 0.04, "learning_rate": 1.8666666666666669e-06, "logits/chosen": -1.4957599639892578, "logits/rejected": -0.9900957345962524, "logps/chosen": -606.5774536132812, "logps/rejected": -1158.996826171875, "loss": 0.1186, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1333167403936386, "rewards/margins": 0.18605293333530426, "rewards/rejected": -0.31936967372894287, "step": 140 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.7749484777450562, "logits/rejected": -1.1498210430145264, "logps/chosen": -588.8472900390625, "logps/rejected": -1247.8353271484375, "loss": 0.0621, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15223875641822815, "rewards/margins": 0.2504242956638336, "rewards/rejected": -0.40266305208206177, "step": 150 }, { "epoch": 0.04, "learning_rate": 2.133333333333334e-06, "logits/chosen": -1.4668447971343994, "logits/rejected": -0.9629266858100891, "logps/chosen": -740.5608520507812, "logps/rejected": -1320.8753662109375, "loss": 0.074, "rewards/accuracies": 0.875, "rewards/chosen": -0.22708892822265625, "rewards/margins": 0.23111894726753235, "rewards/rejected": -0.458207905292511, "step": 160 }, { "epoch": 0.05, "learning_rate": 2.266666666666667e-06, "logits/chosen": -1.5278871059417725, "logits/rejected": -1.1116211414337158, "logps/chosen": -571.50390625, "logps/rejected": -1168.722412109375, "loss": 0.1131, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1509171426296234, "rewards/margins": 0.27128082513809204, "rewards/rejected": -0.4221979081630707, "step": 170 }, { "epoch": 0.05, "learning_rate": 2.4000000000000003e-06, "logits/chosen": -1.6129930019378662, "logits/rejected": -1.0707186460494995, "logps/chosen": -591.6637573242188, "logps/rejected": -1284.7354736328125, "loss": 0.0784, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12449514865875244, "rewards/margins": 0.2544993758201599, "rewards/rejected": -0.37899452447891235, "step": 180 }, { "epoch": 0.05, "learning_rate": 2.5333333333333338e-06, "logits/chosen": -1.6850178241729736, "logits/rejected": -1.1943457126617432, "logps/chosen": -514.7299194335938, "logps/rejected": -1000.5671997070312, "loss": 0.1249, "rewards/accuracies": 0.75, "rewards/chosen": -0.09646569192409515, "rewards/margins": 0.17868806421756744, "rewards/rejected": -0.2751538157463074, "step": 190 }, { "epoch": 0.05, "learning_rate": 2.666666666666667e-06, "logits/chosen": -1.5830456018447876, "logits/rejected": -1.097068428993225, "logps/chosen": -658.3897705078125, "logps/rejected": -1211.879150390625, "loss": 0.0931, "rewards/accuracies": 0.875, "rewards/chosen": -0.16921699047088623, "rewards/margins": 0.20347478985786438, "rewards/rejected": -0.3726917505264282, "step": 200 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-06, "logits/chosen": -1.765363097190857, "logits/rejected": -0.8959721326828003, "logps/chosen": -716.1063842773438, "logps/rejected": -1217.0675048828125, "loss": 0.1018, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21076449751853943, "rewards/margins": 0.23035843670368195, "rewards/rejected": -0.4411229193210602, "step": 210 }, { "epoch": 0.06, "learning_rate": 2.9333333333333338e-06, "logits/chosen": -1.4971026182174683, "logits/rejected": -1.0308849811553955, "logps/chosen": -635.874267578125, "logps/rejected": -1254.032470703125, "loss": 0.0944, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1963960826396942, "rewards/margins": 0.2612842321395874, "rewards/rejected": -0.4576803147792816, "step": 220 }, { "epoch": 0.06, "learning_rate": 3.066666666666667e-06, "logits/chosen": -1.3114674091339111, "logits/rejected": -1.1226143836975098, "logps/chosen": -669.163818359375, "logps/rejected": -1447.716064453125, "loss": 0.083, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2657012939453125, "rewards/margins": 0.3253920078277588, "rewards/rejected": -0.5910933613777161, "step": 230 }, { "epoch": 0.06, "learning_rate": 3.2000000000000003e-06, "logits/chosen": -1.5960338115692139, "logits/rejected": -0.8448678255081177, "logps/chosen": -783.5925903320312, "logps/rejected": -1367.287841796875, "loss": 0.0798, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2770916819572449, "rewards/margins": 0.2540797293186188, "rewards/rejected": -0.531171441078186, "step": 240 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -1.6775119304656982, "logits/rejected": -1.2753263711929321, "logps/chosen": -694.5948486328125, "logps/rejected": -1378.860107421875, "loss": 0.0801, "rewards/accuracies": 0.875, "rewards/chosen": -0.2328498661518097, "rewards/margins": 0.26127415895462036, "rewards/rejected": -0.49412399530410767, "step": 250 }, { "epoch": 0.07, "learning_rate": 3.4666666666666672e-06, "logits/chosen": -1.4416381120681763, "logits/rejected": -0.9755349159240723, "logps/chosen": -681.4529418945312, "logps/rejected": -1261.5875244140625, "loss": 0.1169, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18144458532333374, "rewards/margins": 0.2220906764268875, "rewards/rejected": -0.40353527665138245, "step": 260 }, { "epoch": 0.07, "learning_rate": 3.6000000000000003e-06, "logits/chosen": -1.776125192642212, "logits/rejected": -1.1443500518798828, "logps/chosen": -665.0440673828125, "logps/rejected": -1173.3468017578125, "loss": 0.1028, "rewards/accuracies": 0.875, "rewards/chosen": -0.19433431327342987, "rewards/margins": 0.21727688610553741, "rewards/rejected": -0.4116111695766449, "step": 270 }, { "epoch": 0.07, "learning_rate": 3.7333333333333337e-06, "logits/chosen": -1.6408843994140625, "logits/rejected": -1.2362545728683472, "logps/chosen": -652.2579956054688, "logps/rejected": -1271.698974609375, "loss": 0.0917, "rewards/accuracies": 0.875, "rewards/chosen": -0.1795671433210373, "rewards/margins": 0.24397559463977814, "rewards/rejected": -0.4235427975654602, "step": 280 }, { "epoch": 0.08, "learning_rate": 3.866666666666667e-06, "logits/chosen": -1.8528718948364258, "logits/rejected": -1.1004583835601807, "logps/chosen": -762.6512451171875, "logps/rejected": -1343.5460205078125, "loss": 0.0868, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.226405531167984, "rewards/margins": 0.2510288953781128, "rewards/rejected": -0.4774344861507416, "step": 290 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.7013801336288452, "logits/rejected": -1.2125957012176514, "logps/chosen": -613.29345703125, "logps/rejected": -1406.8970947265625, "loss": 0.0761, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17005790770053864, "rewards/margins": 0.32780343294143677, "rewards/rejected": -0.4978613257408142, "step": 300 }, { "epoch": 0.08, "learning_rate": 4.133333333333333e-06, "logits/chosen": -1.415290117263794, "logits/rejected": -0.9908515810966492, "logps/chosen": -712.7332763671875, "logps/rejected": -1258.039306640625, "loss": 0.1258, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25694146752357483, "rewards/margins": 0.21659043431282043, "rewards/rejected": -0.4735318720340729, "step": 310 }, { "epoch": 0.09, "learning_rate": 4.266666666666668e-06, "logits/chosen": -1.5041309595108032, "logits/rejected": -1.0038108825683594, "logps/chosen": -661.9385986328125, "logps/rejected": -1160.186767578125, "loss": 0.0992, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21888642013072968, "rewards/margins": 0.21484248340129852, "rewards/rejected": -0.4337288737297058, "step": 320 }, { "epoch": 0.09, "learning_rate": 4.4e-06, "logits/chosen": -1.6438214778900146, "logits/rejected": -1.0989625453948975, "logps/chosen": -537.155517578125, "logps/rejected": -1078.1251220703125, "loss": 0.0751, "rewards/accuracies": 0.75, "rewards/chosen": -0.11467760801315308, "rewards/margins": 0.25429314374923706, "rewards/rejected": -0.36897072196006775, "step": 330 }, { "epoch": 0.09, "learning_rate": 4.533333333333334e-06, "logits/chosen": -1.7438217401504517, "logits/rejected": -1.0444936752319336, "logps/chosen": -644.1298828125, "logps/rejected": -1348.093505859375, "loss": 0.0711, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19465377926826477, "rewards/margins": 0.32639193534851074, "rewards/rejected": -0.5210457444190979, "step": 340 }, { "epoch": 0.09, "learning_rate": 4.666666666666667e-06, "logits/chosen": -1.7205537557601929, "logits/rejected": -1.1176466941833496, "logps/chosen": -653.7063598632812, "logps/rejected": -1317.8218994140625, "loss": 0.0812, "rewards/accuracies": 0.875, "rewards/chosen": -0.15995605289936066, "rewards/margins": 0.2710942327976227, "rewards/rejected": -0.43105024099349976, "step": 350 }, { "epoch": 0.1, "learning_rate": 4.800000000000001e-06, "logits/chosen": -1.8885447978973389, "logits/rejected": -1.4283367395401, "logps/chosen": -435.2276916503906, "logps/rejected": -1041.008056640625, "loss": 0.0918, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05559268593788147, "rewards/margins": 0.2262849360704422, "rewards/rejected": -0.2818776071071625, "step": 360 }, { "epoch": 0.1, "learning_rate": 4.933333333333334e-06, "logits/chosen": -1.7745654582977295, "logits/rejected": -1.2009865045547485, "logps/chosen": -606.5958251953125, "logps/rejected": -1145.1015625, "loss": 0.0968, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11594484001398087, "rewards/margins": 0.24505428969860077, "rewards/rejected": -0.36099910736083984, "step": 370 }, { "epoch": 0.1, "learning_rate": 4.999972922944898e-06, "logits/chosen": -1.6557433605194092, "logits/rejected": -1.1534380912780762, "logps/chosen": -643.7879028320312, "logps/rejected": -1236.194091796875, "loss": 0.091, "rewards/accuracies": 0.875, "rewards/chosen": -0.16532504558563232, "rewards/margins": 0.24116845428943634, "rewards/rejected": -0.40649348497390747, "step": 380 }, { "epoch": 0.1, "learning_rate": 4.999756310023261e-06, "logits/chosen": -1.6974895000457764, "logits/rejected": -1.2435563802719116, "logps/chosen": -619.0020751953125, "logps/rejected": -1253.426513671875, "loss": 0.0496, "rewards/accuracies": 0.875, "rewards/chosen": -0.11514924466609955, "rewards/margins": 0.26920756697654724, "rewards/rejected": -0.3843567967414856, "step": 390 }, { "epoch": 0.11, "learning_rate": 4.999323102948655e-06, "logits/chosen": -1.6725631952285767, "logits/rejected": -0.9952858686447144, "logps/chosen": -683.99072265625, "logps/rejected": -1263.6319580078125, "loss": 0.0985, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22003349661827087, "rewards/margins": 0.21788537502288818, "rewards/rejected": -0.4379189610481262, "step": 400 }, { "epoch": 0.11, "learning_rate": 4.998673339256785e-06, "logits/chosen": -1.6918662786483765, "logits/rejected": -0.9807602167129517, "logps/chosen": -646.4641723632812, "logps/rejected": -1105.5599365234375, "loss": 0.1276, "rewards/accuracies": 0.75, "rewards/chosen": -0.13202176988124847, "rewards/margins": 0.22680577635765076, "rewards/rejected": -0.35882753133773804, "step": 410 }, { "epoch": 0.11, "learning_rate": 4.997807075247147e-06, "logits/chosen": -1.4633402824401855, "logits/rejected": -0.8964066505432129, "logps/chosen": -623.4297485351562, "logps/rejected": -1171.818359375, "loss": 0.0798, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17750394344329834, "rewards/margins": 0.23941746354103088, "rewards/rejected": -0.4169214367866516, "step": 420 }, { "epoch": 0.11, "learning_rate": 4.996724385978142e-06, "logits/chosen": -1.7313741445541382, "logits/rejected": -1.088205099105835, "logps/chosen": -618.5508422851562, "logps/rejected": -1335.5269775390625, "loss": 0.0565, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15107488632202148, "rewards/margins": 0.30174392461776733, "rewards/rejected": -0.4528188109397888, "step": 430 }, { "epoch": 0.12, "learning_rate": 4.995425365260585e-06, "logits/chosen": -1.6465423107147217, "logits/rejected": -1.1094882488250732, "logps/chosen": -621.9539794921875, "logps/rejected": -1221.4598388671875, "loss": 0.0909, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15349408984184265, "rewards/margins": 0.25984710454940796, "rewards/rejected": -0.4133411943912506, "step": 440 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -1.8044379949569702, "logits/rejected": -1.1311860084533691, "logps/chosen": -721.5858764648438, "logps/rejected": -1256.863037109375, "loss": 0.0915, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21946442127227783, "rewards/margins": 0.22845225036144257, "rewards/rejected": -0.4479166567325592, "step": 450 }, { "epoch": 0.12, "learning_rate": 4.992178798434684e-06, "logits/chosen": -1.76088547706604, "logits/rejected": -1.2385786771774292, "logps/chosen": -657.9778442382812, "logps/rejected": -1414.336669921875, "loss": 0.0575, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16792455315589905, "rewards/margins": 0.3237282633781433, "rewards/rejected": -0.49165281653404236, "step": 460 }, { "epoch": 0.13, "learning_rate": 4.990231533628719e-06, "logits/chosen": -1.5809530019760132, "logits/rejected": -1.1684823036193848, "logps/chosen": -623.2067260742188, "logps/rejected": -1329.0098876953125, "loss": 0.067, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15883824229240417, "rewards/margins": 0.3003080189228058, "rewards/rejected": -0.45914632081985474, "step": 470 }, { "epoch": 0.13, "learning_rate": 4.988068499954578e-06, "logits/chosen": -1.5603920221328735, "logits/rejected": -1.0103719234466553, "logps/chosen": -745.253173828125, "logps/rejected": -1386.312744140625, "loss": 0.0639, "rewards/accuracies": 0.875, "rewards/chosen": -0.2308214157819748, "rewards/margins": 0.3027498126029968, "rewards/rejected": -0.5335712432861328, "step": 480 }, { "epoch": 0.13, "learning_rate": 4.985689884830711e-06, "logits/chosen": -1.7204630374908447, "logits/rejected": -1.0981186628341675, "logps/chosen": -663.6007080078125, "logps/rejected": -1271.954833984375, "loss": 0.0546, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23924314975738525, "rewards/margins": 0.3018389344215393, "rewards/rejected": -0.5410820841789246, "step": 490 }, { "epoch": 0.13, "learning_rate": 4.983095894354858e-06, "logits/chosen": -1.6816179752349854, "logits/rejected": -1.2458436489105225, "logps/chosen": -812.2794189453125, "logps/rejected": -1452.4508056640625, "loss": 0.0876, "rewards/accuracies": 0.875, "rewards/chosen": -0.3606022000312805, "rewards/margins": 0.2518552541732788, "rewards/rejected": -0.6124575138092041, "step": 500 }, { "epoch": 0.14, "learning_rate": 4.980286753286196e-06, "logits/chosen": -1.675920844078064, "logits/rejected": -1.2157505750656128, "logps/chosen": -622.9227294921875, "logps/rejected": -1268.478271484375, "loss": 0.0997, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23038606345653534, "rewards/margins": 0.280417799949646, "rewards/rejected": -0.5108038783073425, "step": 510 }, { "epoch": 0.14, "learning_rate": 4.97726270502586e-06, "logits/chosen": -1.6523020267486572, "logits/rejected": -1.1194841861724854, "logps/chosen": -536.9530029296875, "logps/rejected": -1284.30517578125, "loss": 0.0562, "rewards/accuracies": 0.875, "rewards/chosen": -0.15182599425315857, "rewards/margins": 0.308131605386734, "rewards/rejected": -0.4599575400352478, "step": 520 }, { "epoch": 0.14, "learning_rate": 4.974024011595864e-06, "logits/chosen": -1.5846920013427734, "logits/rejected": -1.2534643411636353, "logps/chosen": -688.9484252929688, "logps/rejected": -1183.958984375, "loss": 0.1024, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21223464608192444, "rewards/margins": 0.19630616903305054, "rewards/rejected": -0.408540815114975, "step": 530 }, { "epoch": 0.14, "learning_rate": 4.970570953616383e-06, "logits/chosen": -1.7657943964004517, "logits/rejected": -1.2593281269073486, "logps/chosen": -591.6409912109375, "logps/rejected": -1163.0576171875, "loss": 0.1017, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17924083769321442, "rewards/margins": 0.2396513670682907, "rewards/rejected": -0.4188922345638275, "step": 540 }, { "epoch": 0.15, "learning_rate": 4.966903830281449e-06, "logits/chosen": -1.7614797353744507, "logits/rejected": -1.300492286682129, "logps/chosen": -588.6466064453125, "logps/rejected": -1244.7601318359375, "loss": 0.0925, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16072975099086761, "rewards/margins": 0.29418593645095825, "rewards/rejected": -0.4549156725406647, "step": 550 }, { "epoch": 0.15, "learning_rate": 4.9630229593330226e-06, "logits/chosen": -1.5666195154190063, "logits/rejected": -0.924557089805603, "logps/chosen": -734.8566284179688, "logps/rejected": -1351.8369140625, "loss": 0.0874, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.236628919839859, "rewards/margins": 0.24980910122394562, "rewards/rejected": -0.48643797636032104, "step": 560 }, { "epoch": 0.15, "learning_rate": 4.958928677033465e-06, "logits/chosen": -1.5746439695358276, "logits/rejected": -1.0514501333236694, "logps/chosen": -673.4780883789062, "logps/rejected": -1367.333740234375, "loss": 0.0705, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23988430202007294, "rewards/margins": 0.26904696226119995, "rewards/rejected": -0.5089312791824341, "step": 570 }, { "epoch": 0.15, "learning_rate": 4.954621338136399e-06, "logits/chosen": -1.5549921989440918, "logits/rejected": -0.825292706489563, "logps/chosen": -724.3428955078125, "logps/rejected": -1314.2396240234375, "loss": 0.1148, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2795710563659668, "rewards/margins": 0.24751707911491394, "rewards/rejected": -0.5270881652832031, "step": 580 }, { "epoch": 0.16, "learning_rate": 4.95010131585597e-06, "logits/chosen": -1.4858559370040894, "logits/rejected": -1.164233922958374, "logps/chosen": -720.483642578125, "logps/rejected": -1470.8189697265625, "loss": 0.0644, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.28550735116004944, "rewards/margins": 0.27395910024642944, "rewards/rejected": -0.5594664812088013, "step": 590 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.7503721714019775, "logits/rejected": -1.0189541578292847, "logps/chosen": -725.6884765625, "logps/rejected": -1330.4554443359375, "loss": 0.0731, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19357889890670776, "rewards/margins": 0.26617223024368286, "rewards/rejected": -0.4597511887550354, "step": 600 }, { "epoch": 0.16, "learning_rate": 4.940424806108619e-06, "logits/chosen": -1.7605764865875244, "logits/rejected": -0.9754387140274048, "logps/chosen": -736.2041625976562, "logps/rejected": -1220.272216796875, "loss": 0.0966, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16418889164924622, "rewards/margins": 0.2352021038532257, "rewards/rejected": -0.3993909955024719, "step": 610 }, { "epoch": 0.17, "learning_rate": 4.935269157073597e-06, "logits/chosen": -1.6696975231170654, "logits/rejected": -1.1732470989227295, "logps/chosen": -546.5994262695312, "logps/rejected": -1076.2138671875, "loss": 0.1142, "rewards/accuracies": 0.75, "rewards/chosen": -0.17854368686676025, "rewards/margins": 0.22630243003368378, "rewards/rejected": -0.40484610199928284, "step": 620 }, { "epoch": 0.17, "learning_rate": 4.9299025014463665e-06, "logits/chosen": -1.6130393743515015, "logits/rejected": -0.9944950342178345, "logps/chosen": -607.0363159179688, "logps/rejected": -1301.4781494140625, "loss": 0.0704, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19457684457302094, "rewards/margins": 0.2787570357322693, "rewards/rejected": -0.47333383560180664, "step": 630 }, { "epoch": 0.17, "learning_rate": 4.924325304226745e-06, "logits/chosen": -1.6456743478775024, "logits/rejected": -1.2997629642486572, "logps/chosen": -683.3946533203125, "logps/rejected": -1356.107177734375, "loss": 0.0818, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2882430851459503, "rewards/margins": 0.2655082941055298, "rewards/rejected": -0.5537513494491577, "step": 640 }, { "epoch": 0.17, "learning_rate": 4.91853804865716e-06, "logits/chosen": -1.406205654144287, "logits/rejected": -1.0480941534042358, "logps/chosen": -837.6101684570312, "logps/rejected": -1426.9271240234375, "loss": 0.0759, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3547836244106293, "rewards/margins": 0.24995502829551697, "rewards/rejected": -0.6047386527061462, "step": 650 }, { "epoch": 0.18, "learning_rate": 4.912541236180779e-06, "logits/chosen": -1.6352602243423462, "logits/rejected": -1.0264801979064941, "logps/chosen": -687.4603271484375, "logps/rejected": -1298.3892822265625, "loss": 0.0835, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.25935202836990356, "rewards/margins": 0.28063350915908813, "rewards/rejected": -0.5399855375289917, "step": 660 }, { "epoch": 0.18, "learning_rate": 4.9063353863980565e-06, "logits/chosen": -1.652361512184143, "logits/rejected": -1.3324909210205078, "logps/chosen": -703.0999755859375, "logps/rejected": -1310.7120361328125, "loss": 0.0822, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2579403519630432, "rewards/margins": 0.26742976903915405, "rewards/rejected": -0.525370180606842, "step": 670 }, { "epoch": 0.18, "learning_rate": 4.899921037021719e-06, "logits/chosen": -1.8630950450897217, "logits/rejected": -1.117851734161377, "logps/chosen": -572.1090087890625, "logps/rejected": -1142.517822265625, "loss": 0.0669, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13034145534038544, "rewards/margins": 0.2702890932559967, "rewards/rejected": -0.40063056349754333, "step": 680 }, { "epoch": 0.18, "learning_rate": 4.893298743830168e-06, "logits/chosen": -1.5307347774505615, "logits/rejected": -1.1395881175994873, "logps/chosen": -570.9049072265625, "logps/rejected": -1325.3486328125, "loss": 0.0776, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1899944543838501, "rewards/margins": 0.31518790125846863, "rewards/rejected": -0.5051823854446411, "step": 690 }, { "epoch": 0.19, "learning_rate": 4.88646908061933e-06, "logits/chosen": -1.6429624557495117, "logits/rejected": -0.952468991279602, "logps/chosen": -504.73321533203125, "logps/rejected": -1041.1776123046875, "loss": 0.1034, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08391048014163971, "rewards/margins": 0.24313923716545105, "rewards/rejected": -0.3270496726036072, "step": 700 }, { "epoch": 0.19, "learning_rate": 4.879432639152935e-06, "logits/chosen": -1.8941549062728882, "logits/rejected": -1.1734158992767334, "logps/chosen": -530.647216796875, "logps/rejected": -1243.784912109375, "loss": 0.0827, "rewards/accuracies": 0.875, "rewards/chosen": -0.03668345510959625, "rewards/margins": 0.30672526359558105, "rewards/rejected": -0.3434087336063385, "step": 710 }, { "epoch": 0.19, "learning_rate": 4.8721900291112415e-06, "logits/chosen": -1.6430208683013916, "logits/rejected": -1.1618727445602417, "logps/chosen": -664.0516357421875, "logps/rejected": -1358.1705322265625, "loss": 0.064, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1712174415588379, "rewards/margins": 0.27873173356056213, "rewards/rejected": -0.44994911551475525, "step": 720 }, { "epoch": 0.19, "learning_rate": 4.864741878038218e-06, "logits/chosen": -1.5911110639572144, "logits/rejected": -0.9319060444831848, "logps/chosen": -551.3786010742188, "logps/rejected": -1062.9332275390625, "loss": 0.0848, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13672541081905365, "rewards/margins": 0.23177051544189453, "rewards/rejected": -0.368495911359787, "step": 730 }, { "epoch": 0.2, "learning_rate": 4.857088831287158e-06, "logits/chosen": -1.3883923292160034, "logits/rejected": -0.8421472311019897, "logps/chosen": -622.4197387695312, "logps/rejected": -1147.806884765625, "loss": 0.0948, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19367149472236633, "rewards/margins": 0.24927671253681183, "rewards/rejected": -0.44294825196266174, "step": 740 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.6589374542236328, "logits/rejected": -1.0507800579071045, "logps/chosen": -580.9486083984375, "logps/rejected": -1298.055908203125, "loss": 0.078, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1833840161561966, "rewards/margins": 0.29099351167678833, "rewards/rejected": -0.4743775427341461, "step": 750 }, { "epoch": 0.2, "learning_rate": 4.841170720873723e-06, "logits/chosen": -1.9193570613861084, "logits/rejected": -1.29689359664917, "logps/chosen": -593.7999267578125, "logps/rejected": -1074.827880859375, "loss": 0.1027, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13971452414989471, "rewards/margins": 0.2339230477809906, "rewards/rejected": -0.3736375570297241, "step": 760 }, { "epoch": 0.21, "learning_rate": 4.832907036453647e-06, "logits/chosen": -1.7203441858291626, "logits/rejected": -1.0238596200942993, "logps/chosen": -575.1504516601562, "logps/rejected": -1223.0185546875, "loss": 0.0779, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1228102445602417, "rewards/margins": 0.28532546758651733, "rewards/rejected": -0.4081357419490814, "step": 770 }, { "epoch": 0.21, "learning_rate": 4.824441214720629e-06, "logits/chosen": -1.5191177129745483, "logits/rejected": -1.0029339790344238, "logps/chosen": -579.9059448242188, "logps/rejected": -1316.46142578125, "loss": 0.0519, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1901233047246933, "rewards/margins": 0.3058861494064331, "rewards/rejected": -0.4960094392299652, "step": 780 }, { "epoch": 0.21, "learning_rate": 4.815773989205165e-06, "logits/chosen": -1.5836925506591797, "logits/rejected": -0.9203447103500366, "logps/chosen": -776.8148803710938, "logps/rejected": -1455.188720703125, "loss": 0.0819, "rewards/accuracies": 0.875, "rewards/chosen": -0.23546621203422546, "rewards/margins": 0.2814113199710846, "rewards/rejected": -0.5168775916099548, "step": 790 }, { "epoch": 0.21, "learning_rate": 4.806906110888606e-06, "logits/chosen": -1.6255607604980469, "logits/rejected": -1.1398379802703857, "logps/chosen": -536.3740234375, "logps/rejected": -1251.212646484375, "loss": 0.0639, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11835892498493195, "rewards/margins": 0.3064490854740143, "rewards/rejected": -0.4248080849647522, "step": 800 }, { "epoch": 0.22, "learning_rate": 4.7978383481380865e-06, "logits/chosen": -1.5587496757507324, "logits/rejected": -1.0958257913589478, "logps/chosen": -577.5291748046875, "logps/rejected": -1253.976806640625, "loss": 0.0724, "rewards/accuracies": 0.875, "rewards/chosen": -0.15895147621631622, "rewards/margins": 0.29505571722984314, "rewards/rejected": -0.45400720834732056, "step": 810 }, { "epoch": 0.22, "learning_rate": 4.788571486639948e-06, "logits/chosen": -1.5678253173828125, "logits/rejected": -1.0112650394439697, "logps/chosen": -582.6275634765625, "logps/rejected": -1258.8802490234375, "loss": 0.0726, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16134771704673767, "rewards/margins": 0.3210769593715668, "rewards/rejected": -0.48242464661598206, "step": 820 }, { "epoch": 0.22, "learning_rate": 4.779106329331665e-06, "logits/chosen": -1.5958459377288818, "logits/rejected": -1.0422935485839844, "logps/chosen": -604.3923950195312, "logps/rejected": -1268.2470703125, "loss": 0.0656, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17159458994865417, "rewards/margins": 0.29530078172683716, "rewards/rejected": -0.4668954014778137, "step": 830 }, { "epoch": 0.22, "learning_rate": 4.769443696332272e-06, "logits/chosen": -1.6635258197784424, "logits/rejected": -1.1757241487503052, "logps/chosen": -576.9772338867188, "logps/rejected": -1184.485595703125, "loss": 0.0987, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10074315965175629, "rewards/margins": 0.25036129355430603, "rewards/rejected": -0.3511044681072235, "step": 840 }, { "epoch": 0.23, "learning_rate": 4.759584424871302e-06, "logits/chosen": -1.4811336994171143, "logits/rejected": -0.9249873161315918, "logps/chosen": -593.8721313476562, "logps/rejected": -1355.8583984375, "loss": 0.06, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1529252678155899, "rewards/margins": 0.3431011736392975, "rewards/rejected": -0.4960264265537262, "step": 850 }, { "epoch": 0.23, "learning_rate": 4.749529369216246e-06, "logits/chosen": -1.5341050624847412, "logits/rejected": -0.9583051800727844, "logps/chosen": -680.0247802734375, "logps/rejected": -1330.1800537109375, "loss": 0.0771, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18310308456420898, "rewards/margins": 0.31003543734550476, "rewards/rejected": -0.49313855171203613, "step": 860 }, { "epoch": 0.23, "learning_rate": 4.7392794005985324e-06, "logits/chosen": -1.3542709350585938, "logits/rejected": -0.9462020993232727, "logps/chosen": -521.2957763671875, "logps/rejected": -1280.841552734375, "loss": 0.0739, "rewards/accuracies": 0.875, "rewards/chosen": -0.12296704202890396, "rewards/margins": 0.3202964663505554, "rewards/rejected": -0.44326353073120117, "step": 870 }, { "epoch": 0.23, "learning_rate": 4.7288354071380415e-06, "logits/chosen": -1.4632813930511475, "logits/rejected": -1.0232326984405518, "logps/chosen": -572.9237060546875, "logps/rejected": -1308.82666015625, "loss": 0.0532, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.11455889046192169, "rewards/margins": 0.3093631863594055, "rewards/rejected": -0.423922061920166, "step": 880 }, { "epoch": 0.24, "learning_rate": 4.7181982937661485e-06, "logits/chosen": -1.8567373752593994, "logits/rejected": -0.8586881756782532, "logps/chosen": -683.0592651367188, "logps/rejected": -1194.775634765625, "loss": 0.0782, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15534023940563202, "rewards/margins": 0.24916231632232666, "rewards/rejected": -0.4045025706291199, "step": 890 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.5189939737319946, "logits/rejected": -1.0900559425354004, "logps/chosen": -617.9810791015625, "logps/rejected": -1228.6695556640625, "loss": 0.0811, "rewards/accuracies": 0.875, "rewards/chosen": -0.16101650893688202, "rewards/margins": 0.2594471573829651, "rewards/rejected": -0.4204636514186859, "step": 900 }, { "epoch": 0.24, "learning_rate": 4.696348410599244e-06, "logits/chosen": -1.609279990196228, "logits/rejected": -0.9329544901847839, "logps/chosen": -649.111328125, "logps/rejected": -1244.6878662109375, "loss": 0.0947, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1598513424396515, "rewards/margins": 0.2521916627883911, "rewards/rejected": -0.4120430052280426, "step": 910 }, { "epoch": 0.25, "learning_rate": 4.685137534011549e-06, "logits/chosen": -1.5942234992980957, "logits/rejected": -0.9433167576789856, "logps/chosen": -600.16796875, "logps/rejected": -1137.0755615234375, "loss": 0.0973, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1486530601978302, "rewards/margins": 0.2408868372440338, "rewards/rejected": -0.389539897441864, "step": 920 }, { "epoch": 0.25, "learning_rate": 4.673737323763048e-06, "logits/chosen": -1.757784128189087, "logits/rejected": -0.9788764715194702, "logps/chosen": -526.8590087890625, "logps/rejected": -1164.993408203125, "loss": 0.0518, "rewards/accuracies": 0.875, "rewards/chosen": -0.06887698173522949, "rewards/margins": 0.3250022530555725, "rewards/rejected": -0.3938792049884796, "step": 930 }, { "epoch": 0.25, "learning_rate": 4.662148767637578e-06, "logits/chosen": -1.695051908493042, "logits/rejected": -1.0422875881195068, "logps/chosen": -673.8726806640625, "logps/rejected": -1251.0634765625, "loss": 0.0824, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1298268437385559, "rewards/margins": 0.2638325095176697, "rewards/rejected": -0.3936593532562256, "step": 940 }, { "epoch": 0.25, "learning_rate": 4.650372869738415e-06, "logits/chosen": -1.817731261253357, "logits/rejected": -1.1714346408843994, "logps/chosen": -632.3103637695312, "logps/rejected": -1204.101806640625, "loss": 0.0757, "rewards/accuracies": 0.875, "rewards/chosen": -0.08371297270059586, "rewards/margins": 0.2807037830352783, "rewards/rejected": -0.36441677808761597, "step": 950 }, { "epoch": 0.26, "learning_rate": 4.638410650401267e-06, "logits/chosen": -1.6917314529418945, "logits/rejected": -1.2975494861602783, "logps/chosen": -501.3326110839844, "logps/rejected": -1110.889404296875, "loss": 0.1007, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.010560419410467148, "rewards/margins": 0.2687085270881653, "rewards/rejected": -0.27926892042160034, "step": 960 }, { "epoch": 0.26, "learning_rate": 4.626263146105875e-06, "logits/chosen": -1.681670904159546, "logits/rejected": -1.0678811073303223, "logps/chosen": -548.2903442382812, "logps/rejected": -1203.9267578125, "loss": 0.0685, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.08060415089130402, "rewards/margins": 0.297444224357605, "rewards/rejected": -0.3780483603477478, "step": 970 }, { "epoch": 0.26, "learning_rate": 4.613931409386196e-06, "logits/chosen": -1.469982385635376, "logits/rejected": -1.1716678142547607, "logps/chosen": -675.4676513671875, "logps/rejected": -1349.0924072265625, "loss": 0.087, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18839700520038605, "rewards/margins": 0.28368327021598816, "rewards/rejected": -0.4720802903175354, "step": 980 }, { "epoch": 0.26, "learning_rate": 4.601416508739211e-06, "logits/chosen": -1.589691162109375, "logits/rejected": -1.0157541036605835, "logps/chosen": -608.4013061523438, "logps/rejected": -1338.4168701171875, "loss": 0.0375, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.15417249500751495, "rewards/margins": 0.30883660912513733, "rewards/rejected": -0.4630090594291687, "step": 990 }, { "epoch": 0.27, "learning_rate": 4.588719528532342e-06, "logits/chosen": -1.6860065460205078, "logits/rejected": -1.0934185981750488, "logps/chosen": -706.9453735351562, "logps/rejected": -1332.388916015625, "loss": 0.0972, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2328319251537323, "rewards/margins": 0.26258260011672974, "rewards/rejected": -0.49541449546813965, "step": 1000 }, { "epoch": 0.27, "learning_rate": 4.575841568909494e-06, "logits/chosen": -1.433650016784668, "logits/rejected": -1.1477049589157104, "logps/chosen": -688.2756958007812, "logps/rejected": -1245.33447265625, "loss": 0.0894, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22389158606529236, "rewards/margins": 0.24267525970935822, "rewards/rejected": -0.46656686067581177, "step": 1010 }, { "epoch": 0.27, "learning_rate": 4.562783745695738e-06, "logits/chosen": -1.5687224864959717, "logits/rejected": -0.853603720664978, "logps/chosen": -791.3260498046875, "logps/rejected": -1367.469482421875, "loss": 0.0939, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21331918239593506, "rewards/margins": 0.2616121768951416, "rewards/rejected": -0.4749313294887543, "step": 1020 }, { "epoch": 0.27, "learning_rate": 4.549547190300622e-06, "logits/chosen": -1.7121455669403076, "logits/rejected": -0.8818023800849915, "logps/chosen": -657.982177734375, "logps/rejected": -1258.4931640625, "loss": 0.0877, "rewards/accuracies": 0.875, "rewards/chosen": -0.12182845175266266, "rewards/margins": 0.31102484464645386, "rewards/rejected": -0.4328532814979553, "step": 1030 }, { "epoch": 0.28, "learning_rate": 4.536133049620143e-06, "logits/chosen": -1.4799646139144897, "logits/rejected": -1.1651620864868164, "logps/chosen": -479.6021423339844, "logps/rejected": -1181.6322021484375, "loss": 0.0789, "rewards/accuracies": 0.875, "rewards/chosen": -0.09868054836988449, "rewards/margins": 0.26966673135757446, "rewards/rejected": -0.36834731698036194, "step": 1040 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.827845573425293, "logits/rejected": -1.0856597423553467, "logps/chosen": -625.0631103515625, "logps/rejected": -1238.838134765625, "loss": 0.0703, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1401088982820511, "rewards/margins": 0.30787450075149536, "rewards/rejected": -0.44798341393470764, "step": 1050 }, { "epoch": 0.28, "learning_rate": 4.508776676821739e-06, "logits/chosen": -1.5721492767333984, "logits/rejected": -0.8746267557144165, "logps/chosen": -652.6207275390625, "logps/rejected": -1226.645751953125, "loss": 0.0661, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17216768860816956, "rewards/margins": 0.27679505944252014, "rewards/rejected": -0.4489627778530121, "step": 1060 }, { "epoch": 0.29, "learning_rate": 4.494836815027022e-06, "logits/chosen": -1.6152639389038086, "logits/rejected": -1.128306269645691, "logps/chosen": -588.0072021484375, "logps/rejected": -1203.587890625, "loss": 0.0879, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15817685425281525, "rewards/margins": 0.2696138024330139, "rewards/rejected": -0.42779064178466797, "step": 1070 }, { "epoch": 0.29, "learning_rate": 4.4807241083879774e-06, "logits/chosen": -1.3238633871078491, "logits/rejected": -0.7138497233390808, "logps/chosen": -601.8871459960938, "logps/rejected": -1312.838623046875, "loss": 0.0552, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15420369803905487, "rewards/margins": 0.3349114656448364, "rewards/rejected": -0.4891151785850525, "step": 1080 }, { "epoch": 0.29, "learning_rate": 4.466439779715696e-06, "logits/chosen": -1.2504911422729492, "logits/rejected": -0.7397804856300354, "logps/chosen": -631.8212890625, "logps/rejected": -1243.5828857421875, "loss": 0.0868, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19839642941951752, "rewards/margins": 0.29437923431396484, "rewards/rejected": -0.49277567863464355, "step": 1090 }, { "epoch": 0.29, "learning_rate": 4.451985066691649e-06, "logits/chosen": -1.809372901916504, "logits/rejected": -0.8999295234680176, "logps/chosen": -633.7000122070312, "logps/rejected": -1235.184814453125, "loss": 0.0684, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1565568596124649, "rewards/margins": 0.30287352204322815, "rewards/rejected": -0.45943036675453186, "step": 1100 }, { "epoch": 0.3, "learning_rate": 4.437361221760449e-06, "logits/chosen": -1.5753552913665771, "logits/rejected": -0.8743413090705872, "logps/chosen": -684.4049072265625, "logps/rejected": -1305.688232421875, "loss": 0.0796, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17457641661167145, "rewards/margins": 0.2981758415699005, "rewards/rejected": -0.47275224328041077, "step": 1110 }, { "epoch": 0.3, "learning_rate": 4.422569512021332e-06, "logits/chosen": -1.5101526975631714, "logits/rejected": -0.977883517742157, "logps/chosen": -588.9329223632812, "logps/rejected": -1159.4969482421875, "loss": 0.0763, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1433287113904953, "rewards/margins": 0.25360313057899475, "rewards/rejected": -0.39693182706832886, "step": 1120 }, { "epoch": 0.3, "learning_rate": 4.407611219118363e-06, "logits/chosen": -1.4618273973464966, "logits/rejected": -1.0363489389419556, "logps/chosen": -448.98089599609375, "logps/rejected": -1272.61865234375, "loss": 0.0453, "rewards/accuracies": 0.875, "rewards/chosen": -0.09960681945085526, "rewards/margins": 0.31163084506988525, "rewards/rejected": -0.4112376570701599, "step": 1130 }, { "epoch": 0.3, "learning_rate": 4.3924876391293915e-06, "logits/chosen": -1.6461549997329712, "logits/rejected": -0.9912912249565125, "logps/chosen": -611.0677490234375, "logps/rejected": -1183.2215576171875, "loss": 0.0846, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1306958794593811, "rewards/margins": 0.28848105669021606, "rewards/rejected": -0.41917696595191956, "step": 1140 }, { "epoch": 0.31, "learning_rate": 4.377200082453748e-06, "logits/chosen": -1.8383325338363647, "logits/rejected": -0.9174288511276245, "logps/chosen": -618.29638671875, "logps/rejected": -1216.172119140625, "loss": 0.0708, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.11510708183050156, "rewards/margins": 0.29687556624412537, "rewards/rejected": -0.41198262572288513, "step": 1150 }, { "epoch": 0.31, "learning_rate": 4.361749873698707e-06, "logits/chosen": -1.3221898078918457, "logits/rejected": -0.9603347778320312, "logps/chosen": -517.3427124023438, "logps/rejected": -1317.27099609375, "loss": 0.0551, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09739838540554047, "rewards/margins": 0.3246908485889435, "rewards/rejected": -0.42208918929100037, "step": 1160 }, { "epoch": 0.31, "learning_rate": 4.346138351564711e-06, "logits/chosen": -1.7003717422485352, "logits/rejected": -0.7912822961807251, "logps/chosen": -598.9830932617188, "logps/rejected": -1176.973388671875, "loss": 0.0841, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18871501088142395, "rewards/margins": 0.29334282875061035, "rewards/rejected": -0.4820578098297119, "step": 1170 }, { "epoch": 0.31, "learning_rate": 4.330366868729376e-06, "logits/chosen": -1.4258317947387695, "logits/rejected": -1.199805498123169, "logps/chosen": -769.9146728515625, "logps/rejected": -1417.9521484375, "loss": 0.0696, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.25572648644447327, "rewards/margins": 0.25885215401649475, "rewards/rejected": -0.5145785808563232, "step": 1180 }, { "epoch": 0.32, "learning_rate": 4.3144367917302964e-06, "logits/chosen": -1.580244779586792, "logits/rejected": -0.9348461031913757, "logps/chosen": -604.8896484375, "logps/rejected": -1246.4227294921875, "loss": 0.0574, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14286699891090393, "rewards/margins": 0.30815887451171875, "rewards/rejected": -0.4510258734226227, "step": 1190 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.449998140335083, "logits/rejected": -1.0776797533035278, "logps/chosen": -581.5048217773438, "logps/rejected": -1115.7093505859375, "loss": 0.1337, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0915089100599289, "rewards/margins": 0.22939009964466095, "rewards/rejected": -0.32089897990226746, "step": 1200 }, { "epoch": 0.32, "learning_rate": 4.2821063899795015e-06, "logits/chosen": -1.1581242084503174, "logits/rejected": -0.6956531405448914, "logps/chosen": -486.95257568359375, "logps/rejected": -1211.6070556640625, "loss": 0.0813, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02962355688214302, "rewards/margins": 0.285904198884964, "rewards/rejected": -0.3155277669429779, "step": 1210 }, { "epoch": 0.33, "learning_rate": 4.265708866531238e-06, "logits/chosen": -1.6472032070159912, "logits/rejected": -1.1526950597763062, "logps/chosen": -458.576171875, "logps/rejected": -1106.2225341796875, "loss": 0.0867, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08721883594989777, "rewards/margins": 0.26627764105796814, "rewards/rejected": -0.3534964919090271, "step": 1220 }, { "epoch": 0.33, "learning_rate": 4.249158351283414e-06, "logits/chosen": -1.5008890628814697, "logits/rejected": -0.9523450136184692, "logps/chosen": -564.3670654296875, "logps/rejected": -1191.93798828125, "loss": 0.0889, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12786266207695007, "rewards/margins": 0.2664950489997864, "rewards/rejected": -0.39435768127441406, "step": 1230 }, { "epoch": 0.33, "learning_rate": 4.232456278273743e-06, "logits/chosen": -1.5925250053405762, "logits/rejected": -0.787007749080658, "logps/chosen": -634.4956665039062, "logps/rejected": -1199.644287109375, "loss": 0.0832, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1421954333782196, "rewards/margins": 0.27115732431411743, "rewards/rejected": -0.4133527874946594, "step": 1240 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -1.5216588973999023, "logits/rejected": -0.8067516088485718, "logps/chosen": -736.2872314453125, "logps/rejected": -1336.3583984375, "loss": 0.0645, "rewards/accuracies": 0.875, "rewards/chosen": -0.2195323407649994, "rewards/margins": 0.2807646691799164, "rewards/rejected": -0.500296950340271, "step": 1250 }, { "epoch": 0.34, "learning_rate": 4.198603260653792e-06, "logits/chosen": -1.6377366781234741, "logits/rejected": -1.1248198747634888, "logps/chosen": -589.9413452148438, "logps/rejected": -1174.857666015625, "loss": 0.0898, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1545807123184204, "rewards/margins": 0.25077009201049805, "rewards/rejected": -0.40535083413124084, "step": 1260 }, { "epoch": 0.34, "learning_rate": 4.181455249275701e-06, "logits/chosen": -1.359490990638733, "logits/rejected": -0.7116638422012329, "logps/chosen": -482.0816345214844, "logps/rejected": -1281.95068359375, "loss": 0.0926, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06251558661460876, "rewards/margins": 0.3599362075328827, "rewards/rejected": -0.42245182394981384, "step": 1270 }, { "epoch": 0.34, "learning_rate": 4.1641615463459926e-06, "logits/chosen": -1.4103469848632812, "logits/rejected": -0.9721433520317078, "logps/chosen": -495.11639404296875, "logps/rejected": -1221.6544189453125, "loss": 0.0611, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09844540059566498, "rewards/margins": 0.32037508487701416, "rewards/rejected": -0.41882047057151794, "step": 1280 }, { "epoch": 0.34, "learning_rate": 4.146723650296701e-06, "logits/chosen": -1.5504339933395386, "logits/rejected": -0.8985152244567871, "logps/chosen": -516.6334228515625, "logps/rejected": -1175.6920166015625, "loss": 0.0714, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15037675201892853, "rewards/margins": 0.27001953125, "rewards/rejected": -0.42039623856544495, "step": 1290 }, { "epoch": 0.35, "learning_rate": 4.129143072053639e-06, "logits/chosen": -1.5418593883514404, "logits/rejected": -0.9249173402786255, "logps/chosen": -479.78521728515625, "logps/rejected": -1107.382568359375, "loss": 0.0675, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1212129220366478, "rewards/margins": 0.27643007040023804, "rewards/rejected": -0.3976430296897888, "step": 1300 }, { "epoch": 0.35, "learning_rate": 4.111421334905468e-06, "logits/chosen": -1.461808443069458, "logits/rejected": -0.8558281660079956, "logps/chosen": -667.7941284179688, "logps/rejected": -1245.40234375, "loss": 0.0659, "rewards/accuracies": 0.875, "rewards/chosen": -0.1607908010482788, "rewards/margins": 0.2721284031867981, "rewards/rejected": -0.4329192638397217, "step": 1310 }, { "epoch": 0.35, "learning_rate": 4.093559974371725e-06, "logits/chosen": -1.3010141849517822, "logits/rejected": -0.9021800756454468, "logps/chosen": -656.9991455078125, "logps/rejected": -1318.10205078125, "loss": 0.0846, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20327822864055634, "rewards/margins": 0.2836567759513855, "rewards/rejected": -0.48693498969078064, "step": 1320 }, { "epoch": 0.35, "learning_rate": 4.075560538069767e-06, "logits/chosen": -1.2061702013015747, "logits/rejected": -0.8291865587234497, "logps/chosen": -608.1866455078125, "logps/rejected": -1327.5355224609375, "loss": 0.0806, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16998444497585297, "rewards/margins": 0.32111790776252747, "rewards/rejected": -0.49110230803489685, "step": 1330 }, { "epoch": 0.36, "learning_rate": 4.05742458558068e-06, "logits/chosen": -1.6475965976715088, "logits/rejected": -0.8810272216796875, "logps/chosen": -616.7311401367188, "logps/rejected": -1327.117431640625, "loss": 0.062, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1568661332130432, "rewards/margins": 0.3473976254463196, "rewards/rejected": -0.5042637586593628, "step": 1340 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.3039356470108032, "logits/rejected": -0.9500153660774231, "logps/chosen": -611.8599853515625, "logps/rejected": -1257.874267578125, "loss": 0.0689, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1369098573923111, "rewards/margins": 0.29038089513778687, "rewards/rejected": -0.42729073762893677, "step": 1350 }, { "epoch": 0.36, "learning_rate": 4.020749429372286e-06, "logits/chosen": -1.4324769973754883, "logits/rejected": -0.807245135307312, "logps/chosen": -625.339111328125, "logps/rejected": -1257.253173828125, "loss": 0.089, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14000260829925537, "rewards/margins": 0.28776806592941284, "rewards/rejected": -0.4277706742286682, "step": 1360 }, { "epoch": 0.37, "learning_rate": 4.002213403412492e-06, "logits/chosen": -1.445261001586914, "logits/rejected": -0.9508928060531616, "logps/chosen": -594.2431030273438, "logps/rejected": -1150.235595703125, "loss": 0.073, "rewards/accuracies": 0.75, "rewards/chosen": -0.15963387489318848, "rewards/margins": 0.2395774871110916, "rewards/rejected": -0.3992113471031189, "step": 1370 }, { "epoch": 0.37, "learning_rate": 3.983547216509254e-06, "logits/chosen": -1.33284592628479, "logits/rejected": -0.7798209190368652, "logps/chosen": -605.1170654296875, "logps/rejected": -1115.391845703125, "loss": 0.0769, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17733311653137207, "rewards/margins": 0.23692724108695984, "rewards/rejected": -0.4142603278160095, "step": 1380 }, { "epoch": 0.37, "learning_rate": 3.964752486015001e-06, "logits/chosen": -1.353686809539795, "logits/rejected": -0.9458833932876587, "logps/chosen": -541.7138671875, "logps/rejected": -1133.408935546875, "loss": 0.0832, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1459619700908661, "rewards/margins": 0.25547298789024353, "rewards/rejected": -0.4014349579811096, "step": 1390 }, { "epoch": 0.37, "learning_rate": 3.945830840419966e-06, "logits/chosen": -1.3489134311676025, "logits/rejected": -1.114639163017273, "logps/chosen": -650.7469482421875, "logps/rejected": -1327.571044921875, "loss": 0.1023, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19746045768260956, "rewards/margins": 0.3029775023460388, "rewards/rejected": -0.5004379749298096, "step": 1400 }, { "epoch": 0.38, "learning_rate": 3.92678391921108e-06, "logits/chosen": -1.3552124500274658, "logits/rejected": -0.8964862823486328, "logps/chosen": -505.401123046875, "logps/rejected": -1367.124267578125, "loss": 0.0499, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12019245326519012, "rewards/margins": 0.3561457395553589, "rewards/rejected": -0.4763382375240326, "step": 1410 }, { "epoch": 0.38, "learning_rate": 3.907613372729916e-06, "logits/chosen": -1.5101354122161865, "logits/rejected": -1.0879840850830078, "logps/chosen": -619.8988037109375, "logps/rejected": -1368.367919921875, "loss": 0.0501, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.15163154900074005, "rewards/margins": 0.34400704503059387, "rewards/rejected": -0.49563854932785034, "step": 1420 }, { "epoch": 0.38, "learning_rate": 3.888320862029699e-06, "logits/chosen": -1.5360214710235596, "logits/rejected": -0.9855524897575378, "logps/chosen": -748.1507568359375, "logps/rejected": -1296.6070556640625, "loss": 0.0988, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1940506398677826, "rewards/margins": 0.2233462780714035, "rewards/rejected": -0.4173968732357025, "step": 1430 }, { "epoch": 0.38, "learning_rate": 3.868908058731376e-06, "logits/chosen": -1.5095126628875732, "logits/rejected": -0.8970023989677429, "logps/chosen": -497.6952209472656, "logps/rejected": -1121.1767578125, "loss": 0.07, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10547561943531036, "rewards/margins": 0.27518388628959656, "rewards/rejected": -0.3806595206260681, "step": 1440 }, { "epoch": 0.39, "learning_rate": 3.849376644878783e-06, "logits/chosen": -1.4751381874084473, "logits/rejected": -0.9001661539077759, "logps/chosen": -588.8636474609375, "logps/rejected": -1271.7371826171875, "loss": 0.0558, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1327972114086151, "rewards/margins": 0.305908739566803, "rewards/rejected": -0.4387059211730957, "step": 1450 }, { "epoch": 0.39, "learning_rate": 3.829728312792895e-06, "logits/chosen": -1.616092324256897, "logits/rejected": -1.0537126064300537, "logps/chosen": -540.6871337890625, "logps/rejected": -1179.548828125, "loss": 0.0725, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09595973044633865, "rewards/margins": 0.3011319935321808, "rewards/rejected": -0.39709168672561646, "step": 1460 }, { "epoch": 0.39, "learning_rate": 3.8099647649251984e-06, "logits/chosen": -1.423906683921814, "logits/rejected": -0.7600029706954956, "logps/chosen": -594.6055908203125, "logps/rejected": -1255.1416015625, "loss": 0.0918, "rewards/accuracies": 0.875, "rewards/chosen": -0.12480314821004868, "rewards/margins": 0.3190528154373169, "rewards/rejected": -0.44385600090026855, "step": 1470 }, { "epoch": 0.39, "learning_rate": 3.790087713710179e-06, "logits/chosen": -1.5575999021530151, "logits/rejected": -1.1269023418426514, "logps/chosen": -627.612548828125, "logps/rejected": -1399.1263427734375, "loss": 0.0576, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1461525857448578, "rewards/margins": 0.3743034303188324, "rewards/rejected": -0.5204560160636902, "step": 1480 }, { "epoch": 0.4, "learning_rate": 3.770098881416945e-06, "logits/chosen": -1.4309592247009277, "logits/rejected": -0.8114882707595825, "logps/chosen": -639.8936767578125, "logps/rejected": -1324.5836181640625, "loss": 0.0548, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15769222378730774, "rewards/margins": 0.3088419735431671, "rewards/rejected": -0.46653419733047485, "step": 1490 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.3537265062332153, "logits/rejected": -0.9575905799865723, "logps/chosen": -644.993408203125, "logps/rejected": -1161.0167236328125, "loss": 0.0985, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13782911002635956, "rewards/margins": 0.23069393634796143, "rewards/rejected": -0.3685230612754822, "step": 1500 }, { "epoch": 0.4, "learning_rate": 3.7297928109491765e-06, "logits/chosen": -1.7066447734832764, "logits/rejected": -0.8981353044509888, "logps/chosen": -499.3677673339844, "logps/rejected": -1228.71533203125, "loss": 0.0597, "rewards/accuracies": 0.875, "rewards/chosen": -0.10399389266967773, "rewards/margins": 0.3211382031440735, "rewards/rejected": -0.42513203620910645, "step": 1510 }, { "epoch": 0.41, "learning_rate": 3.7094790651387414e-06, "logits/chosen": -1.5993268489837646, "logits/rejected": -0.943587601184845, "logps/chosen": -549.6283569335938, "logps/rejected": -1147.561279296875, "loss": 0.0736, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09537569433450699, "rewards/margins": 0.2917521297931671, "rewards/rejected": -0.3871277868747711, "step": 1520 }, { "epoch": 0.41, "learning_rate": 3.689060522675689e-06, "logits/chosen": -1.4540773630142212, "logits/rejected": -0.9544679522514343, "logps/chosen": -567.1152954101562, "logps/rejected": -1310.57080078125, "loss": 0.067, "rewards/accuracies": 0.875, "rewards/chosen": -0.13753007352352142, "rewards/margins": 0.3150814175605774, "rewards/rejected": -0.45261150598526, "step": 1530 }, { "epoch": 0.41, "learning_rate": 3.668538952747236e-06, "logits/chosen": -1.5060861110687256, "logits/rejected": -1.0383261442184448, "logps/chosen": -541.1341552734375, "logps/rejected": -1345.200927734375, "loss": 0.0495, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08431238681077957, "rewards/margins": 0.34140679240226746, "rewards/rejected": -0.42571917176246643, "step": 1540 }, { "epoch": 0.41, "learning_rate": 3.6479161334675294e-06, "logits/chosen": -1.6825485229492188, "logits/rejected": -0.8775063753128052, "logps/chosen": -636.63427734375, "logps/rejected": -1235.6170654296875, "loss": 0.0867, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.09943681955337524, "rewards/margins": 0.2878590226173401, "rewards/rejected": -0.38729584217071533, "step": 1550 }, { "epoch": 0.42, "learning_rate": 3.627193851723577e-06, "logits/chosen": -1.686977744102478, "logits/rejected": -1.0781195163726807, "logps/chosen": -594.205078125, "logps/rejected": -1142.4056396484375, "loss": 0.084, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11142469942569733, "rewards/margins": 0.26075294613838196, "rewards/rejected": -0.3721776604652405, "step": 1560 }, { "epoch": 0.42, "learning_rate": 3.6063739030204226e-06, "logits/chosen": -1.571839451789856, "logits/rejected": -1.1530930995941162, "logps/chosen": -549.628173828125, "logps/rejected": -1172.0718994140625, "loss": 0.0807, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10474500805139542, "rewards/margins": 0.269944429397583, "rewards/rejected": -0.37468940019607544, "step": 1570 }, { "epoch": 0.42, "learning_rate": 3.5854580913255706e-06, "logits/chosen": -1.61894953250885, "logits/rejected": -0.9551402926445007, "logps/chosen": -607.7701416015625, "logps/rejected": -1296.424072265625, "loss": 0.053, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.13554790616035461, "rewards/margins": 0.3056618571281433, "rewards/rejected": -0.4412097930908203, "step": 1580 }, { "epoch": 0.42, "learning_rate": 3.564448228912682e-06, "logits/chosen": -1.6686357259750366, "logits/rejected": -1.0033533573150635, "logps/chosen": -650.9888916015625, "logps/rejected": -1215.917724609375, "loss": 0.0889, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1597142517566681, "rewards/margins": 0.26373302936553955, "rewards/rejected": -0.42344728112220764, "step": 1590 }, { "epoch": 0.43, "learning_rate": 3.543346136204545e-06, "logits/chosen": -1.3500854969024658, "logits/rejected": -0.8943287134170532, "logps/chosen": -594.9763793945312, "logps/rejected": -1269.090576171875, "loss": 0.0873, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14664295315742493, "rewards/margins": 0.29260388016700745, "rewards/rejected": -0.4392468333244324, "step": 1600 }, { "epoch": 0.43, "learning_rate": 3.522153641615345e-06, "logits/chosen": -1.5656368732452393, "logits/rejected": -0.917197048664093, "logps/chosen": -623.9747924804688, "logps/rejected": -1312.198486328125, "loss": 0.0674, "rewards/accuracies": 0.875, "rewards/chosen": -0.1256953328847885, "rewards/margins": 0.3076288104057312, "rewards/rejected": -0.4333241581916809, "step": 1610 }, { "epoch": 0.43, "learning_rate": 3.5008725813922383e-06, "logits/chosen": -1.4567426443099976, "logits/rejected": -1.0509058237075806, "logps/chosen": -514.6878051757812, "logps/rejected": -1177.8046875, "loss": 0.0862, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1288285255432129, "rewards/margins": 0.2865757346153259, "rewards/rejected": -0.4154042601585388, "step": 1620 }, { "epoch": 0.43, "learning_rate": 3.4795047994562463e-06, "logits/chosen": -1.6101102828979492, "logits/rejected": -1.2119176387786865, "logps/chosen": -549.9823608398438, "logps/rejected": -1237.9873046875, "loss": 0.0912, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1458604633808136, "rewards/margins": 0.2935812771320343, "rewards/rejected": -0.4394417405128479, "step": 1630 }, { "epoch": 0.44, "learning_rate": 3.458052147242494e-06, "logits/chosen": -1.6634056568145752, "logits/rejected": -0.9373501539230347, "logps/chosen": -565.6293334960938, "logps/rejected": -1192.64208984375, "loss": 0.065, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15129828453063965, "rewards/margins": 0.2895079553127289, "rewards/rejected": -0.44080623984336853, "step": 1640 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.4864572286605835, "logits/rejected": -1.038694143295288, "logps/chosen": -491.3564453125, "logps/rejected": -1235.2786865234375, "loss": 0.0685, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09666456282138824, "rewards/margins": 0.3150130808353424, "rewards/rejected": -0.41167759895324707, "step": 1650 }, { "epoch": 0.44, "learning_rate": 3.4148996743295305e-06, "logits/chosen": -1.561033010482788, "logits/rejected": -0.8384987711906433, "logps/chosen": -697.5499877929688, "logps/rejected": -1347.208251953125, "loss": 0.0675, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17049038410186768, "rewards/margins": 0.30814796686172485, "rewards/rejected": -0.47863835096359253, "step": 1660 }, { "epoch": 0.45, "learning_rate": 3.3932035926241103e-06, "logits/chosen": -1.4081476926803589, "logits/rejected": -1.0466349124908447, "logps/chosen": -630.5411376953125, "logps/rejected": -1300.04541015625, "loss": 0.0695, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12465560436248779, "rewards/margins": 0.29317888617515564, "rewards/rejected": -0.41783446073532104, "step": 1670 }, { "epoch": 0.45, "learning_rate": 3.3714301183045382e-06, "logits/chosen": -1.6413304805755615, "logits/rejected": -0.9996837377548218, "logps/chosen": -584.4578857421875, "logps/rejected": -1310.6175537109375, "loss": 0.0728, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16229207813739777, "rewards/margins": 0.3190585970878601, "rewards/rejected": -0.4813506603240967, "step": 1680 }, { "epoch": 0.45, "learning_rate": 3.349581137957604e-06, "logits/chosen": -1.5459994077682495, "logits/rejected": -0.8717397451400757, "logps/chosen": -689.1311645507812, "logps/rejected": -1347.6954345703125, "loss": 0.0896, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18276944756507874, "rewards/margins": 0.30370309948921204, "rewards/rejected": -0.48647254705429077, "step": 1690 }, { "epoch": 0.45, "learning_rate": 3.3276585447123957e-06, "logits/chosen": -1.5712751150131226, "logits/rejected": -0.9994010925292969, "logps/chosen": -587.7183227539062, "logps/rejected": -1291.80322265625, "loss": 0.0678, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13140062987804413, "rewards/margins": 0.2967793345451355, "rewards/rejected": -0.4281799793243408, "step": 1700 }, { "epoch": 0.46, "learning_rate": 3.3056642380762783e-06, "logits/chosen": -1.45646071434021, "logits/rejected": -0.9559444189071655, "logps/chosen": -743.5758056640625, "logps/rejected": -1357.35693359375, "loss": 0.0537, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1634978950023651, "rewards/margins": 0.26821058988571167, "rewards/rejected": -0.4317084848880768, "step": 1710 }, { "epoch": 0.46, "learning_rate": 3.2836001237702993e-06, "logits/chosen": -1.2642626762390137, "logits/rejected": -0.8956004977226257, "logps/chosen": -672.7369995117188, "logps/rejected": -1317.1905517578125, "loss": 0.0843, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19635051488876343, "rewards/margins": 0.26600882411003113, "rewards/rejected": -0.46235933899879456, "step": 1720 }, { "epoch": 0.46, "learning_rate": 3.2614681135640696e-06, "logits/chosen": -1.538914442062378, "logits/rejected": -0.9134441614151001, "logps/chosen": -672.8499755859375, "logps/rejected": -1275.136474609375, "loss": 0.0585, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17318633198738098, "rewards/margins": 0.288135290145874, "rewards/rejected": -0.4613215923309326, "step": 1730 }, { "epoch": 0.46, "learning_rate": 3.2392701251101172e-06, "logits/chosen": -1.4427398443222046, "logits/rejected": -0.8872078061103821, "logps/chosen": -686.1043701171875, "logps/rejected": -1366.374755859375, "loss": 0.0687, "rewards/accuracies": 0.875, "rewards/chosen": -0.19739912450313568, "rewards/margins": 0.28218263387680054, "rewards/rejected": -0.479581743478775, "step": 1740 }, { "epoch": 0.47, "learning_rate": 3.217008081777726e-06, "logits/chosen": -1.3497573137283325, "logits/rejected": -0.872177004814148, "logps/chosen": -589.72412109375, "logps/rejected": -1277.909423828125, "loss": 0.0563, "rewards/accuracies": 0.875, "rewards/chosen": -0.13301566243171692, "rewards/margins": 0.2993132770061493, "rewards/rejected": -0.4323289394378662, "step": 1750 }, { "epoch": 0.47, "learning_rate": 3.1946839124862873e-06, "logits/chosen": -1.3833153247833252, "logits/rejected": -1.0205485820770264, "logps/chosen": -539.8531494140625, "logps/rejected": -1183.308837890625, "loss": 0.0925, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1366974115371704, "rewards/margins": 0.2687934637069702, "rewards/rejected": -0.4054908752441406, "step": 1760 }, { "epoch": 0.47, "learning_rate": 3.1722995515381644e-06, "logits/chosen": -1.4850012063980103, "logits/rejected": -0.8387139439582825, "logps/chosen": -636.983642578125, "logps/rejected": -1304.3221435546875, "loss": 0.0795, "rewards/accuracies": 0.875, "rewards/chosen": -0.1878032237291336, "rewards/margins": 0.2711120843887329, "rewards/rejected": -0.4589153230190277, "step": 1770 }, { "epoch": 0.47, "learning_rate": 3.149856938451094e-06, "logits/chosen": -1.0989512205123901, "logits/rejected": -0.8349654078483582, "logps/chosen": -627.0206298828125, "logps/rejected": -1307.218505859375, "loss": 0.0903, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1822880506515503, "rewards/margins": 0.3011923134326935, "rewards/rejected": -0.4834803640842438, "step": 1780 }, { "epoch": 0.48, "learning_rate": 3.127358017790132e-06, "logits/chosen": -1.485824704170227, "logits/rejected": -0.8337934613227844, "logps/chosen": -623.2086791992188, "logps/rejected": -1302.7957763671875, "loss": 0.0511, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15663902461528778, "rewards/margins": 0.3056802451610565, "rewards/rejected": -0.4623193144798279, "step": 1790 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.7094755172729492, "logits/rejected": -1.041133165359497, "logps/chosen": -671.2548217773438, "logps/rejected": -1311.30419921875, "loss": 0.1018, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.167900949716568, "rewards/margins": 0.27187058329582214, "rewards/rejected": -0.43977150321006775, "step": 1800 }, { "epoch": 0.48, "learning_rate": 3.082199056232015e-06, "logits/chosen": -1.6966993808746338, "logits/rejected": -1.220529556274414, "logps/chosen": -596.4708251953125, "logps/rejected": -1238.605224609375, "loss": 0.0583, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12312579154968262, "rewards/margins": 0.272055447101593, "rewards/rejected": -0.39518123865127563, "step": 1810 }, { "epoch": 0.49, "learning_rate": 3.059542928183079e-06, "logits/chosen": -1.661625623703003, "logits/rejected": -1.1181296110153198, "logps/chosen": -575.0912475585938, "logps/rejected": -1266.91064453125, "loss": 0.0801, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11585699021816254, "rewards/margins": 0.2694869041442871, "rewards/rejected": -0.38534384965896606, "step": 1820 }, { "epoch": 0.49, "learning_rate": 3.0368383179176584e-06, "logits/chosen": -1.606603980064392, "logits/rejected": -1.0587247610092163, "logps/chosen": -512.89013671875, "logps/rejected": -1128.80859375, "loss": 0.0617, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10761525481939316, "rewards/margins": 0.2635376751422882, "rewards/rejected": -0.37115293741226196, "step": 1830 }, { "epoch": 0.49, "learning_rate": 3.0140871927018466e-06, "logits/chosen": -1.5754063129425049, "logits/rejected": -0.8801782727241516, "logps/chosen": -655.8692626953125, "logps/rejected": -1180.616943359375, "loss": 0.07, "rewards/accuracies": 0.75, "rewards/chosen": -0.153736412525177, "rewards/margins": 0.2592002749443054, "rewards/rejected": -0.41293662786483765, "step": 1840 }, { "epoch": 0.49, "learning_rate": 2.9912915238320755e-06, "logits/chosen": -1.427549123764038, "logits/rejected": -1.0166289806365967, "logps/chosen": -586.8533325195312, "logps/rejected": -1178.782958984375, "loss": 0.0907, "rewards/accuracies": 0.75, "rewards/chosen": -0.15467192232608795, "rewards/margins": 0.2657596468925476, "rewards/rejected": -0.420431524515152, "step": 1850 }, { "epoch": 0.5, "learning_rate": 2.9684532864643123e-06, "logits/chosen": -1.580718755722046, "logits/rejected": -1.1227762699127197, "logps/chosen": -620.2764892578125, "logps/rejected": -1323.167236328125, "loss": 0.0675, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14635826647281647, "rewards/margins": 0.3277556300163269, "rewards/rejected": -0.4741138815879822, "step": 1860 }, { "epoch": 0.5, "learning_rate": 2.945574459442917e-06, "logits/chosen": -1.291585922241211, "logits/rejected": -0.7484699487686157, "logps/chosen": -530.9237060546875, "logps/rejected": -1149.4744873046875, "loss": 0.0707, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1255730241537094, "rewards/margins": 0.28315025568008423, "rewards/rejected": -0.40872329473495483, "step": 1870 }, { "epoch": 0.5, "learning_rate": 2.922657025129185e-06, "logits/chosen": -1.3349201679229736, "logits/rejected": -0.9772024154663086, "logps/chosen": -620.5244140625, "logps/rejected": -1263.1776123046875, "loss": 0.0585, "rewards/accuracies": 0.875, "rewards/chosen": -0.16309909522533417, "rewards/margins": 0.273107647895813, "rewards/rejected": -0.43620675802230835, "step": 1880 }, { "epoch": 0.5, "learning_rate": 2.8997029692295875e-06, "logits/chosen": -1.4110755920410156, "logits/rejected": -0.9908379316329956, "logps/chosen": -516.9703369140625, "logps/rejected": -1351.016357421875, "loss": 0.0628, "rewards/accuracies": 0.875, "rewards/chosen": -0.14381906390190125, "rewards/margins": 0.3800903558731079, "rewards/rejected": -0.5239094495773315, "step": 1890 }, { "epoch": 0.51, "learning_rate": 2.876714280623708e-06, "logits/chosen": -1.4153320789337158, "logits/rejected": -0.820611298084259, "logps/chosen": -487.5252990722656, "logps/rejected": -1135.842529296875, "loss": 0.0915, "rewards/accuracies": 0.75, "rewards/chosen": -0.10841517150402069, "rewards/margins": 0.29948437213897705, "rewards/rejected": -0.40789952874183655, "step": 1900 }, { "epoch": 0.51, "learning_rate": 2.8536929511919227e-06, "logits/chosen": -1.4119293689727783, "logits/rejected": -0.8228232264518738, "logps/chosen": -627.8701171875, "logps/rejected": -1294.2296142578125, "loss": 0.0469, "rewards/accuracies": 0.875, "rewards/chosen": -0.12193576991558075, "rewards/margins": 0.32117849588394165, "rewards/rejected": -0.4431142807006836, "step": 1910 }, { "epoch": 0.51, "learning_rate": 2.8306409756428067e-06, "logits/chosen": -1.527777075767517, "logits/rejected": -0.8934208154678345, "logps/chosen": -581.7477416992188, "logps/rejected": -1238.830810546875, "loss": 0.0816, "rewards/accuracies": 0.875, "rewards/chosen": -0.1308506429195404, "rewards/margins": 0.29394230246543884, "rewards/rejected": -0.42479294538497925, "step": 1920 }, { "epoch": 0.51, "learning_rate": 2.807560351340302e-06, "logits/chosen": -1.3228596448898315, "logits/rejected": -0.7611247897148132, "logps/chosen": -601.160400390625, "logps/rejected": -1213.851806640625, "loss": 0.0707, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1343916952610016, "rewards/margins": 0.28090834617614746, "rewards/rejected": -0.4152999818325043, "step": 1930 }, { "epoch": 0.52, "learning_rate": 2.7844530781306544e-06, "logits/chosen": -1.4402358531951904, "logits/rejected": -0.8715489506721497, "logps/chosen": -518.7476806640625, "logps/rejected": -1256.0328369140625, "loss": 0.0528, "rewards/accuracies": 0.875, "rewards/chosen": -0.11650246381759644, "rewards/margins": 0.3238303065299988, "rewards/rejected": -0.4403327405452728, "step": 1940 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.5251938104629517, "logits/rejected": -1.1043987274169922, "logps/chosen": -656.2462158203125, "logps/rejected": -1206.697021484375, "loss": 0.0894, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17089466750621796, "rewards/margins": 0.22701752185821533, "rewards/rejected": -0.3979122042655945, "step": 1950 }, { "epoch": 0.52, "learning_rate": 2.738166595746554e-06, "logits/chosen": -1.3905115127563477, "logits/rejected": -0.9935697317123413, "logps/chosen": -628.3776245117188, "logps/rejected": -1096.552734375, "loss": 0.0795, "rewards/accuracies": 0.75, "rewards/chosen": -0.15120725333690643, "rewards/margins": 0.24288305640220642, "rewards/rejected": -0.39409032464027405, "step": 1960 }, { "epoch": 0.53, "learning_rate": 2.7149913971156105e-06, "logits/chosen": -1.722516655921936, "logits/rejected": -1.0052350759506226, "logps/chosen": -496.67657470703125, "logps/rejected": -1112.5921630859375, "loss": 0.0733, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10522119700908661, "rewards/margins": 0.28908994793891907, "rewards/rejected": -0.3943111300468445, "step": 1970 }, { "epoch": 0.53, "learning_rate": 2.6917975703170466e-06, "logits/chosen": -1.4045814275741577, "logits/rejected": -0.9305141568183899, "logps/chosen": -512.0284423828125, "logps/rejected": -1209.204345703125, "loss": 0.0672, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1334155797958374, "rewards/margins": 0.2813461720943451, "rewards/rejected": -0.4147617220878601, "step": 1980 }, { "epoch": 0.53, "learning_rate": 2.668587125005663e-06, "logits/chosen": -1.343185305595398, "logits/rejected": -1.0273711681365967, "logps/chosen": -549.5206298828125, "logps/rejected": -1240.9766845703125, "loss": 0.0707, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1306164562702179, "rewards/margins": 0.29476845264434814, "rewards/rejected": -0.4253849387168884, "step": 1990 }, { "epoch": 0.53, "learning_rate": 2.6453620722761897e-06, "logits/chosen": -1.6594680547714233, "logits/rejected": -0.850638210773468, "logps/chosen": -591.6373291015625, "logps/rejected": -1358.3460693359375, "loss": 0.0625, "rewards/accuracies": 0.875, "rewards/chosen": -0.10016246140003204, "rewards/margins": 0.33138564229011536, "rewards/rejected": -0.43154802918434143, "step": 2000 }, { "epoch": 0.54, "learning_rate": 2.6221244244890336e-06, "logits/chosen": -1.5611286163330078, "logits/rejected": -0.799461841583252, "logps/chosen": -587.806640625, "logps/rejected": -1161.3482666015625, "loss": 0.0633, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12262831628322601, "rewards/margins": 0.2799530327320099, "rewards/rejected": -0.4025813639163971, "step": 2010 }, { "epoch": 0.54, "learning_rate": 2.5988761950959133e-06, "logits/chosen": -1.6644928455352783, "logits/rejected": -0.9483749270439148, "logps/chosen": -535.2498168945312, "logps/rejected": -1164.773681640625, "loss": 0.0748, "rewards/accuracies": 0.875, "rewards/chosen": -0.1117793545126915, "rewards/margins": 0.281157910823822, "rewards/rejected": -0.3929373323917389, "step": 2020 }, { "epoch": 0.54, "learning_rate": 2.575619398465402e-06, "logits/chosen": -1.5217034816741943, "logits/rejected": -0.715064287185669, "logps/chosen": -589.2874755859375, "logps/rejected": -1261.570556640625, "loss": 0.0622, "rewards/accuracies": 0.875, "rewards/chosen": -0.1293199360370636, "rewards/margins": 0.3084322214126587, "rewards/rejected": -0.4377521574497223, "step": 2030 }, { "epoch": 0.54, "learning_rate": 2.5523560497083927e-06, "logits/chosen": -1.6095244884490967, "logits/rejected": -0.9723415374755859, "logps/chosen": -610.6204223632812, "logps/rejected": -1346.6697998046875, "loss": 0.0527, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18110117316246033, "rewards/margins": 0.3312448561191559, "rewards/rejected": -0.5123459696769714, "step": 2040 }, { "epoch": 0.55, "learning_rate": 2.5290881645034932e-06, "logits/chosen": -1.4780033826828003, "logits/rejected": -0.983650803565979, "logps/chosen": -654.4927978515625, "logps/rejected": -1217.2103271484375, "loss": 0.0931, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1973283588886261, "rewards/margins": 0.2564861476421356, "rewards/rejected": -0.4538145065307617, "step": 2050 }, { "epoch": 0.55, "learning_rate": 2.5058177589223766e-06, "logits/chosen": -1.6766622066497803, "logits/rejected": -0.907199501991272, "logps/chosen": -659.3814086914062, "logps/rejected": -1295.118896484375, "loss": 0.0769, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14497703313827515, "rewards/margins": 0.3324897885322571, "rewards/rejected": -0.477466881275177, "step": 2060 }, { "epoch": 0.55, "learning_rate": 2.482546849255096e-06, "logits/chosen": -1.5557941198349, "logits/rejected": -0.8595023155212402, "logps/chosen": -599.567626953125, "logps/rejected": -1293.6220703125, "loss": 0.038, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14183931052684784, "rewards/margins": 0.32466521859169006, "rewards/rejected": -0.4665044844150543, "step": 2070 }, { "epoch": 0.55, "learning_rate": 2.4592774518353858e-06, "logits/chosen": -1.3870598077774048, "logits/rejected": -0.7746745944023132, "logps/chosen": -578.613525390625, "logps/rejected": -1236.041748046875, "loss": 0.0582, "rewards/accuracies": 0.875, "rewards/chosen": -0.1255788505077362, "rewards/margins": 0.2901487350463867, "rewards/rejected": -0.41572752594947815, "step": 2080 }, { "epoch": 0.56, "learning_rate": 2.436011582865945e-06, "logits/chosen": -1.539902925491333, "logits/rejected": -0.8010295629501343, "logps/chosen": -680.379638671875, "logps/rejected": -1223.6986083984375, "loss": 0.0678, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15240542590618134, "rewards/margins": 0.2671021819114685, "rewards/rejected": -0.41950759291648865, "step": 2090 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.5265008211135864, "logits/rejected": -1.3408691883087158, "logps/chosen": -541.078125, "logps/rejected": -1169.788818359375, "loss": 0.0992, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15979784727096558, "rewards/margins": 0.2517135739326477, "rewards/rejected": -0.4115114212036133, "step": 2100 }, { "epoch": 0.56, "learning_rate": 2.3894984933853734e-06, "logits/chosen": -1.4429771900177002, "logits/rejected": -0.9880257844924927, "logps/chosen": -528.2786254882812, "logps/rejected": -1244.5059814453125, "loss": 0.0763, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14250726997852325, "rewards/margins": 0.30752262473106384, "rewards/rejected": -0.4500298500061035, "step": 2110 }, { "epoch": 0.57, "learning_rate": 2.366255303052377e-06, "logits/chosen": -1.4017646312713623, "logits/rejected": -0.8113569021224976, "logps/chosen": -604.7733154296875, "logps/rejected": -1221.2369384765625, "loss": 0.094, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15813498198986053, "rewards/margins": 0.2982472777366638, "rewards/rejected": -0.45638221502304077, "step": 2120 }, { "epoch": 0.57, "learning_rate": 2.3430237011767166e-06, "logits/chosen": -1.5092931985855103, "logits/rejected": -0.8236274719238281, "logps/chosen": -533.9609985351562, "logps/rejected": -1168.628173828125, "loss": 0.0722, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13640852272510529, "rewards/margins": 0.28761720657348633, "rewards/rejected": -0.4240257740020752, "step": 2130 }, { "epoch": 0.57, "learning_rate": 2.319805700686257e-06, "logits/chosen": -1.5162551403045654, "logits/rejected": -0.786509096622467, "logps/chosen": -623.4132080078125, "logps/rejected": -1179.5946044921875, "loss": 0.0579, "rewards/accuracies": 0.875, "rewards/chosen": -0.1392001360654831, "rewards/margins": 0.26958781480789185, "rewards/rejected": -0.40878796577453613, "step": 2140 }, { "epoch": 0.57, "learning_rate": 2.296603313330355e-06, "logits/chosen": -1.1993951797485352, "logits/rejected": -0.6298279166221619, "logps/chosen": -582.5997314453125, "logps/rejected": -1334.009765625, "loss": 0.0706, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17245307564735413, "rewards/margins": 0.29515841603279114, "rewards/rejected": -0.46761149168014526, "step": 2150 }, { "epoch": 0.58, "learning_rate": 2.2734185495055503e-06, "logits/chosen": -1.7049709558486938, "logits/rejected": -0.9636220932006836, "logps/chosen": -705.0452270507812, "logps/rejected": -1329.1993408203125, "loss": 0.0595, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15186749398708344, "rewards/margins": 0.3114302158355713, "rewards/rejected": -0.46329769492149353, "step": 2160 }, { "epoch": 0.58, "learning_rate": 2.250253418081373e-06, "logits/chosen": -1.4931375980377197, "logits/rejected": -1.0522197484970093, "logps/chosen": -568.1475830078125, "logps/rejected": -1231.364013671875, "loss": 0.0732, "rewards/accuracies": 0.875, "rewards/chosen": -0.13682815432548523, "rewards/margins": 0.29910990595817566, "rewards/rejected": -0.4359380602836609, "step": 2170 }, { "epoch": 0.58, "learning_rate": 2.22710992622628e-06, "logits/chosen": -1.4385493993759155, "logits/rejected": -1.0058460235595703, "logps/chosen": -473.60498046875, "logps/rejected": -1201.9755859375, "loss": 0.0674, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10265137255191803, "rewards/margins": 0.32189419865608215, "rewards/rejected": -0.4245455265045166, "step": 2180 }, { "epoch": 0.58, "learning_rate": 2.2039900792337477e-06, "logits/chosen": -1.3410050868988037, "logits/rejected": -1.1295228004455566, "logps/chosen": -553.0806274414062, "logps/rejected": -1231.395263671875, "loss": 0.0821, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16361066699028015, "rewards/margins": 0.2729097008705139, "rewards/rejected": -0.43652039766311646, "step": 2190 }, { "epoch": 0.59, "learning_rate": 2.1808958803485134e-06, "logits/chosen": -1.3688017129898071, "logits/rejected": -0.9007024765014648, "logps/chosen": -651.8567504882812, "logps/rejected": -1406.11572265625, "loss": 0.0535, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18758761882781982, "rewards/margins": 0.3379947543144226, "rewards/rejected": -0.5255824327468872, "step": 2200 }, { "epoch": 0.59, "learning_rate": 2.157829330593008e-06, "logits/chosen": -1.6441303491592407, "logits/rejected": -1.0162547826766968, "logps/chosen": -711.4053955078125, "logps/rejected": -1393.6256103515625, "loss": 0.0708, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2044825553894043, "rewards/margins": 0.30536073446273804, "rewards/rejected": -0.5098432302474976, "step": 2210 }, { "epoch": 0.59, "learning_rate": 2.134792428593971e-06, "logits/chosen": -1.6138668060302734, "logits/rejected": -0.9830889701843262, "logps/chosen": -678.9330444335938, "logps/rejected": -1337.688720703125, "loss": 0.0712, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16158784925937653, "rewards/margins": 0.30953675508499146, "rewards/rejected": -0.4711245596408844, "step": 2220 }, { "epoch": 0.59, "learning_rate": 2.1117871704092818e-06, "logits/chosen": -1.5652110576629639, "logits/rejected": -0.7780826687812805, "logps/chosen": -496.6578674316406, "logps/rejected": -1144.76171875, "loss": 0.0663, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11985839903354645, "rewards/margins": 0.3066459596157074, "rewards/rejected": -0.42650431394577026, "step": 2230 }, { "epoch": 0.6, "learning_rate": 2.0888155493550027e-06, "logits/chosen": -1.516342282295227, "logits/rejected": -1.1366102695465088, "logps/chosen": -601.8604125976562, "logps/rejected": -1439.399658203125, "loss": 0.0535, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15253353118896484, "rewards/margins": 0.3515278697013855, "rewards/rejected": -0.5040613412857056, "step": 2240 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.4972165822982788, "logits/rejected": -1.1287825107574463, "logps/chosen": -551.3253784179688, "logps/rejected": -1201.3675537109375, "loss": 0.0887, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14231975376605988, "rewards/margins": 0.2838347554206848, "rewards/rejected": -0.4261545240879059, "step": 2250 }, { "epoch": 0.6, "learning_rate": 2.0429811771568468e-06, "logits/chosen": -1.283483862876892, "logits/rejected": -0.8047486543655396, "logps/chosen": -674.7145385742188, "logps/rejected": -1273.704345703125, "loss": 0.0626, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19594672322273254, "rewards/margins": 0.2617323696613312, "rewards/rejected": -0.4576791226863861, "step": 2260 }, { "epoch": 0.61, "learning_rate": 2.0201223973828917e-06, "logits/chosen": -1.3640494346618652, "logits/rejected": -0.9720889329910278, "logps/chosen": -654.1539306640625, "logps/rejected": -1382.0146484375, "loss": 0.0744, "rewards/accuracies": 0.875, "rewards/chosen": -0.17939691245555878, "rewards/margins": 0.31711509823799133, "rewards/rejected": -0.4965119957923889, "step": 2270 }, { "epoch": 0.61, "learning_rate": 1.997305197135089e-06, "logits/chosen": -1.557839274406433, "logits/rejected": -0.9444772601127625, "logps/chosen": -631.0563354492188, "logps/rejected": -1341.0086669921875, "loss": 0.0733, "rewards/accuracies": 0.875, "rewards/chosen": -0.16753628849983215, "rewards/margins": 0.30379122495651245, "rewards/rejected": -0.4713274836540222, "step": 2280 }, { "epoch": 0.61, "learning_rate": 1.9745315534350157e-06, "logits/chosen": -1.2147436141967773, "logits/rejected": -0.681081235408783, "logps/chosen": -712.3760986328125, "logps/rejected": -1318.22265625, "loss": 0.0959, "rewards/accuracies": 0.875, "rewards/chosen": -0.23314671218395233, "rewards/margins": 0.2763899266719818, "rewards/rejected": -0.509536623954773, "step": 2290 }, { "epoch": 0.61, "learning_rate": 1.9518034395302413e-06, "logits/chosen": -1.6583919525146484, "logits/rejected": -1.0387184619903564, "logps/chosen": -607.9590454101562, "logps/rejected": -1096.9937744140625, "loss": 0.1013, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1655300408601761, "rewards/margins": 0.24450743198394775, "rewards/rejected": -0.41003745794296265, "step": 2300 }, { "epoch": 0.62, "learning_rate": 1.9291228247233607e-06, "logits/chosen": -1.4778130054473877, "logits/rejected": -0.7460058331489563, "logps/chosen": -651.7698974609375, "logps/rejected": -1243.669921875, "loss": 0.0769, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1902499794960022, "rewards/margins": 0.2825292944908142, "rewards/rejected": -0.4727793335914612, "step": 2310 }, { "epoch": 0.62, "learning_rate": 1.9064916742013515e-06, "logits/chosen": -1.3330655097961426, "logits/rejected": -0.9275220036506653, "logps/chosen": -523.3305053710938, "logps/rejected": -1225.2471923828125, "loss": 0.0641, "rewards/accuracies": 0.875, "rewards/chosen": -0.15531578660011292, "rewards/margins": 0.31203147768974304, "rewards/rejected": -0.46734723448753357, "step": 2320 }, { "epoch": 0.62, "learning_rate": 1.883911948865306e-06, "logits/chosen": -1.3421717882156372, "logits/rejected": -1.1515899896621704, "logps/chosen": -492.34246826171875, "logps/rejected": -1202.8974609375, "loss": 0.0831, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15640634298324585, "rewards/margins": 0.2934826612472534, "rewards/rejected": -0.44988900423049927, "step": 2330 }, { "epoch": 0.62, "learning_rate": 1.8613856051605242e-06, "logits/chosen": -1.4181009531021118, "logits/rejected": -0.8086174130439758, "logps/chosen": -602.2988891601562, "logps/rejected": -1168.888916015625, "loss": 0.07, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15534570813179016, "rewards/margins": 0.2883809208869934, "rewards/rejected": -0.44372662901878357, "step": 2340 }, { "epoch": 0.63, "learning_rate": 1.8389145949069953e-06, "logits/chosen": -1.6121231317520142, "logits/rejected": -0.8654192090034485, "logps/chosen": -598.3814697265625, "logps/rejected": -1284.2470703125, "loss": 0.0595, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14926204085350037, "rewards/margins": 0.3212565779685974, "rewards/rejected": -0.47051864862442017, "step": 2350 }, { "epoch": 0.63, "learning_rate": 1.816500865130279e-06, "logits/chosen": -1.4523346424102783, "logits/rejected": -0.9201906323432922, "logps/chosen": -600.6221923828125, "logps/rejected": -1303.9447021484375, "loss": 0.0641, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17756803333759308, "rewards/margins": 0.3046211898326874, "rewards/rejected": -0.48218923807144165, "step": 2360 }, { "epoch": 0.63, "learning_rate": 1.7941463578928088e-06, "logits/chosen": -1.5082757472991943, "logits/rejected": -0.9013730883598328, "logps/chosen": -610.1536254882812, "logps/rejected": -1315.129638671875, "loss": 0.0639, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.176839679479599, "rewards/margins": 0.3339093327522278, "rewards/rejected": -0.5107490420341492, "step": 2370 }, { "epoch": 0.63, "learning_rate": 1.7718530101256115e-06, "logits/chosen": -1.6840633153915405, "logits/rejected": -0.9501806497573853, "logps/chosen": -662.0902709960938, "logps/rejected": -1296.606689453125, "loss": 0.0698, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17285804450511932, "rewards/margins": 0.3210682272911072, "rewards/rejected": -0.4939262866973877, "step": 2380 }, { "epoch": 0.64, "learning_rate": 1.7496227534604859e-06, "logits/chosen": -1.4562785625457764, "logits/rejected": -1.0628981590270996, "logps/chosen": -594.0131225585938, "logps/rejected": -1322.223876953125, "loss": 0.0512, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.185628280043602, "rewards/margins": 0.31836962699890137, "rewards/rejected": -0.5039979219436646, "step": 2390 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.622641921043396, "logits/rejected": -0.7408018112182617, "logps/chosen": -671.2892456054688, "logps/rejected": -1338.8228759765625, "loss": 0.0585, "rewards/accuracies": 0.875, "rewards/chosen": -0.19210806488990784, "rewards/margins": 0.3319090008735657, "rewards/rejected": -0.5240170359611511, "step": 2400 }, { "epoch": 0.64, "learning_rate": 1.7053592124637557e-06, "logits/chosen": -1.6081740856170654, "logits/rejected": -0.7799841165542603, "logps/chosen": -656.5260009765625, "logps/rejected": -1301.16650390625, "loss": 0.0568, "rewards/accuracies": 0.875, "rewards/chosen": -0.21345119178295135, "rewards/margins": 0.30246636271476746, "rewards/rejected": -0.5159175992012024, "step": 2410 }, { "epoch": 0.65, "learning_rate": 1.6833297633956647e-06, "logits/chosen": -1.5897592306137085, "logits/rejected": -0.830175518989563, "logps/chosen": -643.302734375, "logps/rejected": -1318.0699462890625, "loss": 0.0593, "rewards/accuracies": 0.875, "rewards/chosen": -0.17795221507549286, "rewards/margins": 0.332580029964447, "rewards/rejected": -0.5105322599411011, "step": 2420 }, { "epoch": 0.65, "learning_rate": 1.661371075624363e-06, "logits/chosen": -1.5620427131652832, "logits/rejected": -1.090867519378662, "logps/chosen": -677.5968627929688, "logps/rejected": -1375.831787109375, "loss": 0.0626, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.185464009642601, "rewards/margins": 0.3168545365333557, "rewards/rejected": -0.5023185014724731, "step": 2430 }, { "epoch": 0.65, "learning_rate": 1.6394850517846621e-06, "logits/chosen": -1.4541980028152466, "logits/rejected": -0.972217857837677, "logps/chosen": -705.5538940429688, "logps/rejected": -1215.7884521484375, "loss": 0.1066, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19816702604293823, "rewards/margins": 0.24639299511909485, "rewards/rejected": -0.4445599615573883, "step": 2440 }, { "epoch": 0.65, "learning_rate": 1.6176735882153284e-06, "logits/chosen": -1.5021053552627563, "logits/rejected": -0.8954359292984009, "logps/chosen": -642.7943115234375, "logps/rejected": -1351.9677734375, "loss": 0.0746, "rewards/accuracies": 0.875, "rewards/chosen": -0.18909773230552673, "rewards/margins": 0.3134486675262451, "rewards/rejected": -0.5025463104248047, "step": 2450 }, { "epoch": 0.66, "learning_rate": 1.5959385747947697e-06, "logits/chosen": -1.4700965881347656, "logits/rejected": -0.7698783874511719, "logps/chosen": -592.5593872070312, "logps/rejected": -1228.227783203125, "loss": 0.055, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16944539546966553, "rewards/margins": 0.2940993309020996, "rewards/rejected": -0.46354469656944275, "step": 2460 }, { "epoch": 0.66, "learning_rate": 1.5742818947772875e-06, "logits/chosen": -1.6665054559707642, "logits/rejected": -0.948663592338562, "logps/chosen": -769.6544799804688, "logps/rejected": -1263.137451171875, "loss": 0.1006, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24728581309318542, "rewards/margins": 0.25588518381118774, "rewards/rejected": -0.5031709671020508, "step": 2470 }, { "epoch": 0.66, "learning_rate": 1.552705424629898e-06, "logits/chosen": -1.4320557117462158, "logits/rejected": -0.8846480250358582, "logps/chosen": -672.8453369140625, "logps/rejected": -1423.1922607421875, "loss": 0.0535, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1967121660709381, "rewards/margins": 0.31136855483055115, "rewards/rejected": -0.508080780506134, "step": 2480 }, { "epoch": 0.66, "learning_rate": 1.5312110338697427e-06, "logits/chosen": -1.596573829650879, "logits/rejected": -0.9393990635871887, "logps/chosen": -719.8648681640625, "logps/rejected": -1458.17919921875, "loss": 0.0633, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19690574705600739, "rewards/margins": 0.33237752318382263, "rewards/rejected": -0.5292832851409912, "step": 2490 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -1.6579780578613281, "logits/rejected": -1.0232713222503662, "logps/chosen": -629.9151611328125, "logps/rejected": -1246.3726806640625, "loss": 0.0641, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1578701287508011, "rewards/margins": 0.3020227253437042, "rewards/rejected": -0.45989280939102173, "step": 2500 }, { "epoch": 0.67, "learning_rate": 1.4884759328590476e-06, "logits/chosen": -1.6255989074707031, "logits/rejected": -0.9438311457633972, "logps/chosen": -571.7470703125, "logps/rejected": -1234.297119140625, "loss": 0.0744, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15460793673992157, "rewards/margins": 0.3083663582801819, "rewards/rejected": -0.4629742503166199, "step": 2510 }, { "epoch": 0.67, "learning_rate": 1.467238925438646e-06, "logits/chosen": -1.4770749807357788, "logits/rejected": -0.936165452003479, "logps/chosen": -617.3800048828125, "logps/rejected": -1241.4713134765625, "loss": 0.0783, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15939846634864807, "rewards/margins": 0.28489136695861816, "rewards/rejected": -0.44428983330726624, "step": 2520 }, { "epoch": 0.67, "learning_rate": 1.446091402744923e-06, "logits/chosen": -1.6321996450424194, "logits/rejected": -1.2910716533660889, "logps/chosen": -621.7635498046875, "logps/rejected": -1340.450439453125, "loss": 0.0587, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16068853437900543, "rewards/margins": 0.31573906540870667, "rewards/rejected": -0.4764275550842285, "step": 2530 }, { "epoch": 0.68, "learning_rate": 1.4250351971283937e-06, "logits/chosen": -1.790464162826538, "logits/rejected": -0.8707693219184875, "logps/chosen": -630.2117919921875, "logps/rejected": -1443.947509765625, "loss": 0.0503, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.12107650935649872, "rewards/margins": 0.3619995713233948, "rewards/rejected": -0.4830760955810547, "step": 2540 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.4005483388900757, "logits/rejected": -0.8707137107849121, "logps/chosen": -572.6295166015625, "logps/rejected": -1228.290283203125, "loss": 0.0709, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13189435005187988, "rewards/margins": 0.30900174379348755, "rewards/rejected": -0.44089609384536743, "step": 2550 }, { "epoch": 0.68, "learning_rate": 1.3832040268095589e-06, "logits/chosen": -1.5691678524017334, "logits/rejected": -0.9897274971008301, "logps/chosen": -598.7499389648438, "logps/rejected": -1142.5048828125, "loss": 0.0866, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1460307091474533, "rewards/margins": 0.24934545159339905, "rewards/rejected": -0.39537614583969116, "step": 2560 }, { "epoch": 0.69, "learning_rate": 1.362432686615316e-06, "logits/chosen": -1.69058358669281, "logits/rejected": -1.319437861442566, "logps/chosen": -554.8807373046875, "logps/rejected": -1080.8026123046875, "loss": 0.0886, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13216087222099304, "rewards/margins": 0.22610945999622345, "rewards/rejected": -0.3582703471183777, "step": 2570 }, { "epoch": 0.69, "learning_rate": 1.3417599122003464e-06, "logits/chosen": -1.4841969013214111, "logits/rejected": -0.8709270358085632, "logps/chosen": -590.7588500976562, "logps/rejected": -1269.95556640625, "loss": 0.0861, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.156617671251297, "rewards/margins": 0.2556094229221344, "rewards/rejected": -0.4122270941734314, "step": 2580 }, { "epoch": 0.69, "learning_rate": 1.3211874947800747e-06, "logits/chosen": -1.7067668437957764, "logits/rejected": -0.8970105051994324, "logps/chosen": -637.1722412109375, "logps/rejected": -1236.7254638671875, "loss": 0.0759, "rewards/accuracies": 0.875, "rewards/chosen": -0.15255072712898254, "rewards/margins": 0.2996431291103363, "rewards/rejected": -0.45219388604164124, "step": 2590 }, { "epoch": 0.69, "learning_rate": 1.3007172168743854e-06, "logits/chosen": -1.722249984741211, "logits/rejected": -0.9102805256843567, "logps/chosen": -568.446533203125, "logps/rejected": -1270.4002685546875, "loss": 0.0711, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13264772295951843, "rewards/margins": 0.33025822043418884, "rewards/rejected": -0.4629059433937073, "step": 2600 }, { "epoch": 0.7, "learning_rate": 1.280350852153168e-06, "logits/chosen": -1.4858075380325317, "logits/rejected": -1.0102977752685547, "logps/chosen": -668.56201171875, "logps/rejected": -1335.7791748046875, "loss": 0.0851, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20782490074634552, "rewards/margins": 0.29307177662849426, "rewards/rejected": -0.5008966326713562, "step": 2610 }, { "epoch": 0.7, "learning_rate": 1.260090165282645e-06, "logits/chosen": -1.3048267364501953, "logits/rejected": -0.79096519947052, "logps/chosen": -672.3717041015625, "logps/rejected": -1276.05224609375, "loss": 0.091, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2136264592409134, "rewards/margins": 0.26162099838256836, "rewards/rejected": -0.47524747252464294, "step": 2620 }, { "epoch": 0.7, "learning_rate": 1.2399369117724582e-06, "logits/chosen": -1.5877363681793213, "logits/rejected": -0.9525176882743835, "logps/chosen": -706.8630981445312, "logps/rejected": -1383.3653564453125, "loss": 0.0601, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2066899538040161, "rewards/margins": 0.30500850081443787, "rewards/rejected": -0.5116984248161316, "step": 2630 }, { "epoch": 0.7, "learning_rate": 1.2198928378235717e-06, "logits/chosen": -1.7394685745239258, "logits/rejected": -1.1289197206497192, "logps/chosen": -770.9951171875, "logps/rejected": -1358.626220703125, "loss": 0.0663, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2261141985654831, "rewards/margins": 0.2788589596748352, "rewards/rejected": -0.5049731135368347, "step": 2640 }, { "epoch": 0.71, "learning_rate": 1.1999596801769617e-06, "logits/chosen": -1.5435702800750732, "logits/rejected": -1.0047051906585693, "logps/chosen": -634.2347412109375, "logps/rejected": -1342.857177734375, "loss": 0.0383, "rewards/accuracies": 0.875, "rewards/chosen": -0.18071284890174866, "rewards/margins": 0.3370632231235504, "rewards/rejected": -0.5177761316299438, "step": 2650 }, { "epoch": 0.71, "learning_rate": 1.1801391659631423e-06, "logits/chosen": -1.4378631114959717, "logits/rejected": -1.1336383819580078, "logps/chosen": -650.482177734375, "logps/rejected": -1228.133056640625, "loss": 0.1003, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21179573237895966, "rewards/margins": 0.25491851568222046, "rewards/rejected": -0.46671420335769653, "step": 2660 }, { "epoch": 0.71, "learning_rate": 1.160433012552508e-06, "logits/chosen": -1.5026006698608398, "logits/rejected": -1.0078703165054321, "logps/chosen": -657.7984619140625, "logps/rejected": -1241.7266845703125, "loss": 0.0907, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18873688578605652, "rewards/margins": 0.2727685272693634, "rewards/rejected": -0.4615054130554199, "step": 2670 }, { "epoch": 0.71, "learning_rate": 1.1408429274065418e-06, "logits/chosen": -1.5700013637542725, "logits/rejected": -1.132730484008789, "logps/chosen": -637.8956298828125, "logps/rejected": -1278.509033203125, "loss": 0.0772, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19238412380218506, "rewards/margins": 0.2825482189655304, "rewards/rejected": -0.47493234276771545, "step": 2680 }, { "epoch": 0.72, "learning_rate": 1.1213706079298566e-06, "logits/chosen": -1.2392457723617554, "logits/rejected": -0.6231773495674133, "logps/chosen": -654.0189208984375, "logps/rejected": -1259.2901611328125, "loss": 0.0774, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2084679901599884, "rewards/margins": 0.2823956310749054, "rewards/rejected": -0.4908636212348938, "step": 2690 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.4837418794631958, "logits/rejected": -1.0266939401626587, "logps/chosen": -575.24169921875, "logps/rejected": -1292.1416015625, "loss": 0.0814, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17716486752033234, "rewards/margins": 0.29848557710647583, "rewards/rejected": -0.4756503999233246, "step": 2700 }, { "epoch": 0.72, "learning_rate": 1.0827860044369226e-06, "logits/chosen": -1.7130857706069946, "logits/rejected": -1.1947839260101318, "logps/chosen": -707.26171875, "logps/rejected": -1313.96484375, "loss": 0.0685, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20284132659435272, "rewards/margins": 0.28745827078819275, "rewards/rejected": -0.49029961228370667, "step": 2710 }, { "epoch": 0.73, "learning_rate": 1.06367706362636e-06, "logits/chosen": -1.60333251953125, "logits/rejected": -1.095365047454834, "logps/chosen": -590.465576171875, "logps/rejected": -1228.563720703125, "loss": 0.0736, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1857512891292572, "rewards/margins": 0.2781633734703064, "rewards/rejected": -0.463914692401886, "step": 2720 }, { "epoch": 0.73, "learning_rate": 1.0446925746067768e-06, "logits/chosen": -1.579377293586731, "logits/rejected": -0.8978742361068726, "logps/chosen": -583.1848754882812, "logps/rejected": -1341.265380859375, "loss": 0.0439, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1685558259487152, "rewards/margins": 0.35025161504745483, "rewards/rejected": -0.5188074111938477, "step": 2730 }, { "epoch": 0.73, "learning_rate": 1.0258341823102418e-06, "logits/chosen": -1.5291283130645752, "logits/rejected": -1.0524482727050781, "logps/chosen": -632.1546020507812, "logps/rejected": -1308.994384765625, "loss": 0.0785, "rewards/accuracies": 0.875, "rewards/chosen": -0.19686022400856018, "rewards/margins": 0.2875005602836609, "rewards/rejected": -0.48436084389686584, "step": 2740 }, { "epoch": 0.73, "learning_rate": 1.0071035207430352e-06, "logits/chosen": -1.6773452758789062, "logits/rejected": -0.8837090730667114, "logps/chosen": -650.5315551757812, "logps/rejected": -1348.764404296875, "loss": 0.0673, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16877111792564392, "rewards/margins": 0.34151607751846313, "rewards/rejected": -0.5102871656417847, "step": 2750 }, { "epoch": 0.74, "learning_rate": 9.88502212844063e-07, "logits/chosen": -1.2815361022949219, "logits/rejected": -0.6873558163642883, "logps/chosen": -566.9096069335938, "logps/rejected": -1250.682373046875, "loss": 0.0637, "rewards/accuracies": 0.875, "rewards/chosen": -0.18210506439208984, "rewards/margins": 0.3151671588420868, "rewards/rejected": -0.497272253036499, "step": 2760 }, { "epoch": 0.74, "learning_rate": 9.700318703442437e-07, "logits/chosen": -1.480957269668579, "logits/rejected": -1.109178066253662, "logps/chosen": -599.2630615234375, "logps/rejected": -1315.3450927734375, "loss": 0.0725, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16557563841342926, "rewards/margins": 0.32593974471092224, "rewards/rejected": -0.49151545763015747, "step": 2770 }, { "epoch": 0.74, "learning_rate": 9.516940936268504e-07, "logits/chosen": -1.5238648653030396, "logits/rejected": -0.9741169214248657, "logps/chosen": -644.1940307617188, "logps/rejected": -1286.3187255859375, "loss": 0.0661, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1868639439344406, "rewards/margins": 0.286797434091568, "rewards/rejected": -0.4736614227294922, "step": 2780 }, { "epoch": 0.74, "learning_rate": 9.334904715888496e-07, "logits/chosen": -1.5936267375946045, "logits/rejected": -1.060530424118042, "logps/chosen": -593.8538208007812, "logps/rejected": -1255.338623046875, "loss": 0.0778, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17814789712429047, "rewards/margins": 0.27660074830055237, "rewards/rejected": -0.45474863052368164, "step": 2790 }, { "epoch": 0.75, "learning_rate": 9.154225815032242e-07, "logits/chosen": -1.590341329574585, "logits/rejected": -0.7449840903282166, "logps/chosen": -610.4566650390625, "logps/rejected": -1260.689208984375, "loss": 0.0591, "rewards/accuracies": 0.875, "rewards/chosen": -0.1576831042766571, "rewards/margins": 0.33318689465522766, "rewards/rejected": -0.49086999893188477, "step": 2800 }, { "epoch": 0.75, "learning_rate": 8.974919888823164e-07, "logits/chosen": -1.3408777713775635, "logits/rejected": -0.7836991548538208, "logps/chosen": -589.0643920898438, "logps/rejected": -1239.4324951171875, "loss": 0.0914, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15606389939785004, "rewards/margins": 0.29545897245407104, "rewards/rejected": -0.45152291655540466, "step": 2810 }, { "epoch": 0.75, "learning_rate": 8.797002473421729e-07, "logits/chosen": -1.6109354496002197, "logits/rejected": -1.0465881824493408, "logps/chosen": -709.0189208984375, "logps/rejected": -1308.754638671875, "loss": 0.0682, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20454378426074982, "rewards/margins": 0.263343870639801, "rewards/rejected": -0.4678876996040344, "step": 2820 }, { "epoch": 0.75, "learning_rate": 8.620488984679378e-07, "logits/chosen": -1.621872901916504, "logits/rejected": -0.963768482208252, "logps/chosen": -608.9706420898438, "logps/rejected": -1198.4373779296875, "loss": 0.0642, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16753293573856354, "rewards/margins": 0.2994327247142792, "rewards/rejected": -0.4669656753540039, "step": 2830 }, { "epoch": 0.76, "learning_rate": 8.445394716802754e-07, "logits/chosen": -1.4504549503326416, "logits/rejected": -0.7841233015060425, "logps/chosen": -669.5154418945312, "logps/rejected": -1335.7557373046875, "loss": 0.0643, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19036135077476501, "rewards/margins": 0.3035343289375305, "rewards/rejected": -0.49389567971229553, "step": 2840 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.371618390083313, "logits/rejected": -0.9690683484077454, "logps/chosen": -578.9434814453125, "logps/rejected": -1268.2574462890625, "loss": 0.0852, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17845505475997925, "rewards/margins": 0.30898019671440125, "rewards/rejected": -0.4874352812767029, "step": 2850 }, { "epoch": 0.76, "learning_rate": 8.099524404308948e-07, "logits/chosen": -1.3634693622589111, "logits/rejected": -1.2364251613616943, "logps/chosen": -655.4860229492188, "logps/rejected": -1382.656005859375, "loss": 0.0849, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21539123356342316, "rewards/margins": 0.2801755666732788, "rewards/rejected": -0.49556678533554077, "step": 2860 }, { "epoch": 0.77, "learning_rate": 7.928778328007918e-07, "logits/chosen": -1.6657747030258179, "logits/rejected": -1.1524592638015747, "logps/chosen": -609.6968994140625, "logps/rejected": -1228.130859375, "loss": 0.1019, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18815730512142181, "rewards/margins": 0.2728883922100067, "rewards/rejected": -0.46104568243026733, "step": 2870 }, { "epoch": 0.77, "learning_rate": 7.759511406608255e-07, "logits/chosen": -1.521481990814209, "logits/rejected": -1.0144175291061401, "logps/chosen": -666.2223510742188, "logps/rejected": -1294.4681396484375, "loss": 0.0567, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1811828464269638, "rewards/margins": 0.2964962124824524, "rewards/rejected": -0.4776790142059326, "step": 2880 }, { "epoch": 0.77, "learning_rate": 7.591738306429769e-07, "logits/chosen": -1.5954043865203857, "logits/rejected": -0.9687323570251465, "logps/chosen": -608.4232177734375, "logps/rejected": -1266.69482421875, "loss": 0.0699, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1722910851240158, "rewards/margins": 0.308040052652359, "rewards/rejected": -0.4803311228752136, "step": 2890 }, { "epoch": 0.77, "learning_rate": 7.425473564358457e-07, "logits/chosen": -1.4824254512786865, "logits/rejected": -0.898513913154602, "logps/chosen": -605.569091796875, "logps/rejected": -1327.9998779296875, "loss": 0.0638, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16081413626670837, "rewards/margins": 0.3248310089111328, "rewards/rejected": -0.4856451451778412, "step": 2900 }, { "epoch": 0.78, "learning_rate": 7.260731586586983e-07, "logits/chosen": -1.2875173091888428, "logits/rejected": -0.9592302441596985, "logps/chosen": -512.1587524414062, "logps/rejected": -1235.3233642578125, "loss": 0.0696, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13009069859981537, "rewards/margins": 0.3230075538158417, "rewards/rejected": -0.45309823751449585, "step": 2910 }, { "epoch": 0.78, "learning_rate": 7.097526647366379e-07, "logits/chosen": -1.526106357574463, "logits/rejected": -0.9157294034957886, "logps/chosen": -611.5623779296875, "logps/rejected": -1331.778076171875, "loss": 0.0441, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1644577980041504, "rewards/margins": 0.3466406762599945, "rewards/rejected": -0.5110985040664673, "step": 2920 }, { "epoch": 0.78, "learning_rate": 6.935872887769299e-07, "logits/chosen": -1.4740724563598633, "logits/rejected": -1.2198327779769897, "logps/chosen": -515.4468994140625, "logps/rejected": -1197.4677734375, "loss": 0.0739, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13031090795993805, "rewards/margins": 0.29615822434425354, "rewards/rejected": -0.42646917700767517, "step": 2930 }, { "epoch": 0.78, "learning_rate": 6.775784314464717e-07, "logits/chosen": -1.691954255104065, "logits/rejected": -1.2317649126052856, "logps/chosen": -551.2989501953125, "logps/rejected": -1318.2874755859375, "loss": 0.0574, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15544219315052032, "rewards/margins": 0.31455904245376587, "rewards/rejected": -0.4700012803077698, "step": 2940 }, { "epoch": 0.79, "learning_rate": 6.617274798504286e-07, "logits/chosen": -1.5783944129943848, "logits/rejected": -0.9322364926338196, "logps/chosen": -619.6995849609375, "logps/rejected": -1310.8206787109375, "loss": 0.0682, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.13858993351459503, "rewards/margins": 0.35597696900367737, "rewards/rejected": -0.4945669174194336, "step": 2950 }, { "epoch": 0.79, "learning_rate": 6.460358074120518e-07, "logits/chosen": -1.6260960102081299, "logits/rejected": -1.0405943393707275, "logps/chosen": -588.0743408203125, "logps/rejected": -1362.0108642578125, "loss": 0.037, "rewards/accuracies": 0.875, "rewards/chosen": -0.14083652198314667, "rewards/margins": 0.32858163118362427, "rewards/rejected": -0.46941813826560974, "step": 2960 }, { "epoch": 0.79, "learning_rate": 6.305047737536707e-07, "logits/chosen": -1.509161353111267, "logits/rejected": -1.0090100765228271, "logps/chosen": -575.589599609375, "logps/rejected": -1260.2518310546875, "loss": 0.0557, "rewards/accuracies": 0.875, "rewards/chosen": -0.16428951919078827, "rewards/margins": 0.31566599011421204, "rewards/rejected": -0.4799554944038391, "step": 2970 }, { "epoch": 0.79, "learning_rate": 6.151357245788917e-07, "logits/chosen": -1.3317458629608154, "logits/rejected": -0.9225482940673828, "logps/chosen": -795.397705078125, "logps/rejected": -1355.7197265625, "loss": 0.0627, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2432664930820465, "rewards/margins": 0.2584363520145416, "rewards/rejected": -0.5017029047012329, "step": 2980 }, { "epoch": 0.8, "learning_rate": 5.999299915559956e-07, "logits/chosen": -1.482460379600525, "logits/rejected": -1.0576423406600952, "logps/chosen": -619.4547119140625, "logps/rejected": -1274.428955078125, "loss": 0.0574, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16743937134742737, "rewards/margins": 0.3212641477584839, "rewards/rejected": -0.48870354890823364, "step": 2990 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.5611276626586914, "logits/rejected": -1.225208044052124, "logps/chosen": -633.8873901367188, "logps/rejected": -1392.9622802734375, "loss": 0.0583, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15855436027050018, "rewards/margins": 0.31555289030075073, "rewards/rejected": -0.4741072654724121, "step": 3000 }, { "epoch": 0.8, "learning_rate": 5.700137297712749e-07, "logits/chosen": -1.5436227321624756, "logits/rejected": -0.7761337161064148, "logps/chosen": -619.49609375, "logps/rejected": -1352.654052734375, "loss": 0.0673, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.16255274415016174, "rewards/margins": 0.3418361246585846, "rewards/rejected": -0.5043889284133911, "step": 3010 }, { "epoch": 0.81, "learning_rate": 5.553057931370729e-07, "logits/chosen": -1.5770825147628784, "logits/rejected": -0.8190025091171265, "logps/chosen": -633.076416015625, "logps/rejected": -1185.306396484375, "loss": 0.0764, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14611086249351501, "rewards/margins": 0.2796880602836609, "rewards/rejected": -0.4257989525794983, "step": 3020 }, { "epoch": 0.81, "learning_rate": 5.407663566854008e-07, "logits/chosen": -1.6848455667495728, "logits/rejected": -0.7886224985122681, "logps/chosen": -664.6588745117188, "logps/rejected": -1323.16064453125, "loss": 0.0494, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15056583285331726, "rewards/margins": 0.34144124388694763, "rewards/rejected": -0.4920070767402649, "step": 3030 }, { "epoch": 0.81, "learning_rate": 5.263966802018275e-07, "logits/chosen": -1.736999750137329, "logits/rejected": -0.8889113664627075, "logps/chosen": -563.3355712890625, "logps/rejected": -1182.220947265625, "loss": 0.0538, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10529695451259613, "rewards/margins": 0.3400992751121521, "rewards/rejected": -0.4453962445259094, "step": 3040 }, { "epoch": 0.81, "learning_rate": 5.121980087628802e-07, "logits/chosen": -1.3477979898452759, "logits/rejected": -0.9392274618148804, "logps/chosen": -666.6458740234375, "logps/rejected": -1304.849853515625, "loss": 0.0825, "rewards/accuracies": 0.875, "rewards/chosen": -0.18823906779289246, "rewards/margins": 0.28339654207229614, "rewards/rejected": -0.471635639667511, "step": 3050 }, { "epoch": 0.82, "learning_rate": 4.981715726281666e-07, "logits/chosen": -1.468207597732544, "logits/rejected": -0.750462532043457, "logps/chosen": -549.2374877929688, "logps/rejected": -1129.191650390625, "loss": 0.0837, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1359584927558899, "rewards/margins": 0.2849787175655365, "rewards/rejected": -0.4209372103214264, "step": 3060 }, { "epoch": 0.82, "learning_rate": 4.843185871337722e-07, "logits/chosen": -1.5334383249282837, "logits/rejected": -1.020437240600586, "logps/chosen": -542.3077392578125, "logps/rejected": -1162.928955078125, "loss": 0.0704, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14567705988883972, "rewards/margins": 0.2968258261680603, "rewards/rejected": -0.4425028860569, "step": 3070 }, { "epoch": 0.82, "learning_rate": 4.706402525869633e-07, "logits/chosen": -1.614105463027954, "logits/rejected": -0.9975967407226562, "logps/chosen": -592.6704711914062, "logps/rejected": -1195.4998779296875, "loss": 0.0767, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14713343977928162, "rewards/margins": 0.293200820684433, "rewards/rejected": -0.4403342306613922, "step": 3080 }, { "epoch": 0.82, "learning_rate": 4.5713775416217884e-07, "logits/chosen": -1.5890741348266602, "logits/rejected": -1.1050251722335815, "logps/chosen": -633.64990234375, "logps/rejected": -1316.699462890625, "loss": 0.0565, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15724198520183563, "rewards/margins": 0.3219819962978363, "rewards/rejected": -0.47922396659851074, "step": 3090 }, { "epoch": 0.83, "learning_rate": 4.438122617983442e-07, "logits/chosen": -1.4880859851837158, "logits/rejected": -1.0814130306243896, "logps/chosen": -556.4613647460938, "logps/rejected": -1175.9158935546875, "loss": 0.0847, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1583164930343628, "rewards/margins": 0.27797532081604004, "rewards/rejected": -0.43629178404808044, "step": 3100 }, { "epoch": 0.83, "learning_rate": 4.3066493009749853e-07, "logits/chosen": -1.5819056034088135, "logits/rejected": -0.8545023798942566, "logps/chosen": -625.5091552734375, "logps/rejected": -1260.5357666015625, "loss": 0.056, "rewards/accuracies": 0.875, "rewards/chosen": -0.14933553338050842, "rewards/margins": 0.3165150284767151, "rewards/rejected": -0.4658505916595459, "step": 3110 }, { "epoch": 0.83, "learning_rate": 4.1769689822475147e-07, "logits/chosen": -1.4295012950897217, "logits/rejected": -0.9826697111129761, "logps/chosen": -595.7457885742188, "logps/rejected": -1152.125, "loss": 0.0926, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1720040887594223, "rewards/margins": 0.25839370489120483, "rewards/rejected": -0.4303978383541107, "step": 3120 }, { "epoch": 0.83, "learning_rate": 4.049092898095816e-07, "logits/chosen": -1.69967782497406, "logits/rejected": -1.0549393892288208, "logps/chosen": -673.6993408203125, "logps/rejected": -1231.6971435546875, "loss": 0.0544, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1993969976902008, "rewards/margins": 0.26704469323158264, "rewards/rejected": -0.46644172072410583, "step": 3130 }, { "epoch": 0.84, "learning_rate": 3.9230321284847856e-07, "logits/chosen": -1.347398042678833, "logits/rejected": -0.7797093391418457, "logps/chosen": -616.3978271484375, "logps/rejected": -1361.00732421875, "loss": 0.0547, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1867767572402954, "rewards/margins": 0.33997079730033875, "rewards/rejected": -0.5267475247383118, "step": 3140 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.627018928527832, "logits/rejected": -0.8644296526908875, "logps/chosen": -735.9021606445312, "logps/rejected": -1377.241455078125, "loss": 0.0526, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19497773051261902, "rewards/margins": 0.33351653814315796, "rewards/rejected": -0.5284942388534546, "step": 3150 }, { "epoch": 0.84, "learning_rate": 3.6764000653481263e-07, "logits/chosen": -1.6354873180389404, "logits/rejected": -0.8436982035636902, "logps/chosen": -638.96728515625, "logps/rejected": -1231.9659423828125, "loss": 0.0799, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19029106199741364, "rewards/margins": 0.27815455198287964, "rewards/rejected": -0.4684455990791321, "step": 3160 }, { "epoch": 0.85, "learning_rate": 3.555850141530659e-07, "logits/chosen": -1.9651466608047485, "logits/rejected": -1.0768983364105225, "logps/chosen": -751.0382690429688, "logps/rejected": -1339.02783203125, "loss": 0.0791, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1946159154176712, "rewards/margins": 0.29727649688720703, "rewards/rejected": -0.49189239740371704, "step": 3170 }, { "epoch": 0.85, "learning_rate": 3.4371582698185636e-07, "logits/chosen": -1.511791467666626, "logits/rejected": -1.1664907932281494, "logps/chosen": -508.4298400878906, "logps/rejected": -1147.3773193359375, "loss": 0.0805, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.149129718542099, "rewards/margins": 0.2729035019874573, "rewards/rejected": -0.4220332205295563, "step": 3180 }, { "epoch": 0.85, "learning_rate": 3.3203347344004737e-07, "logits/chosen": -1.4468176364898682, "logits/rejected": -1.2824242115020752, "logps/chosen": -515.1135864257812, "logps/rejected": -1145.444580078125, "loss": 0.1051, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1785714328289032, "rewards/margins": 0.23533880710601807, "rewards/rejected": -0.41391023993492126, "step": 3190 }, { "epoch": 0.85, "learning_rate": 3.2053896575809426e-07, "logits/chosen": -1.5166642665863037, "logits/rejected": -0.9769018292427063, "logps/chosen": -721.0452880859375, "logps/rejected": -1224.28125, "loss": 0.0988, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19355639815330505, "rewards/margins": 0.24941392242908478, "rewards/rejected": -0.442970335483551, "step": 3200 }, { "epoch": 0.86, "learning_rate": 3.092332998903416e-07, "logits/chosen": -1.5955547094345093, "logits/rejected": -0.8334843516349792, "logps/chosen": -624.3068237304688, "logps/rejected": -1299.734130859375, "loss": 0.0705, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1941680610179901, "rewards/margins": 0.31897804141044617, "rewards/rejected": -0.5131461024284363, "step": 3210 }, { "epoch": 0.86, "learning_rate": 2.981174554287239e-07, "logits/chosen": -1.361081600189209, "logits/rejected": -0.7655187845230103, "logps/chosen": -657.2144775390625, "logps/rejected": -1261.987548828125, "loss": 0.0805, "rewards/accuracies": 0.875, "rewards/chosen": -0.19083121418952942, "rewards/margins": 0.29023870825767517, "rewards/rejected": -0.481069952249527, "step": 3220 }, { "epoch": 0.86, "learning_rate": 2.871923955178918e-07, "logits/chosen": -1.5999078750610352, "logits/rejected": -0.7431113719940186, "logps/chosen": -730.16455078125, "logps/rejected": -1300.1298828125, "loss": 0.0675, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21212446689605713, "rewards/margins": 0.29901638627052307, "rewards/rejected": -0.5111408829689026, "step": 3230 }, { "epoch": 0.86, "learning_rate": 2.764590667717562e-07, "logits/chosen": -1.7554610967636108, "logits/rejected": -1.0210330486297607, "logps/chosen": -691.9470825195312, "logps/rejected": -1321.851806640625, "loss": 0.0668, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17403197288513184, "rewards/margins": 0.3116183876991272, "rewards/rejected": -0.4856503903865814, "step": 3240 }, { "epoch": 0.87, "learning_rate": 2.6591839919146963e-07, "logits/chosen": -1.366562843322754, "logits/rejected": -0.8948714137077332, "logps/chosen": -622.9371948242188, "logps/rejected": -1320.292724609375, "loss": 0.068, "rewards/accuracies": 0.875, "rewards/chosen": -0.1862497329711914, "rewards/margins": 0.32431843876838684, "rewards/rejected": -0.5105680823326111, "step": 3250 }, { "epoch": 0.87, "learning_rate": 2.555713060848433e-07, "logits/chosen": -1.5601285696029663, "logits/rejected": -0.8055307269096375, "logps/chosen": -620.6419067382812, "logps/rejected": -1285.7567138671875, "loss": 0.0621, "rewards/accuracies": 0.875, "rewards/chosen": -0.187259703874588, "rewards/margins": 0.3192376494407654, "rewards/rejected": -0.506497323513031, "step": 3260 }, { "epoch": 0.87, "learning_rate": 2.454186839872158e-07, "logits/chosen": -1.2374000549316406, "logits/rejected": -0.9511027336120605, "logps/chosen": -584.9683837890625, "logps/rejected": -1257.975341796875, "loss": 0.086, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20605416595935822, "rewards/margins": 0.264712929725647, "rewards/rejected": -0.4707671105861664, "step": 3270 }, { "epoch": 0.87, "learning_rate": 2.3546141258376786e-07, "logits/chosen": -1.4687381982803345, "logits/rejected": -1.0029032230377197, "logps/chosen": -693.2366943359375, "logps/rejected": -1394.9901123046875, "loss": 0.0572, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20397081971168518, "rewards/margins": 0.31658852100372314, "rewards/rejected": -0.5205592513084412, "step": 3280 }, { "epoch": 0.88, "learning_rate": 2.257003546333042e-07, "logits/chosen": -1.8167024850845337, "logits/rejected": -0.9430249929428101, "logps/chosen": -649.8558959960938, "logps/rejected": -1447.1058349609375, "loss": 0.0456, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18286794424057007, "rewards/margins": 0.35680800676345825, "rewards/rejected": -0.5396759510040283, "step": 3290 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.5668996572494507, "logits/rejected": -0.9328106045722961, "logps/chosen": -549.9508056640625, "logps/rejected": -1223.6793212890625, "loss": 0.0725, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.156307652592659, "rewards/margins": 0.3133383095264435, "rewards/rejected": -0.46964597702026367, "step": 3300 }, { "epoch": 0.88, "learning_rate": 2.0677024504760752e-07, "logits/chosen": -1.8043934106826782, "logits/rejected": -1.106671929359436, "logps/chosen": -603.4810791015625, "logps/rejected": -1286.2061767578125, "loss": 0.0659, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16138550639152527, "rewards/margins": 0.31447383761405945, "rewards/rejected": -0.4758593440055847, "step": 3310 }, { "epoch": 0.89, "learning_rate": 1.9760283363267684e-07, "logits/chosen": -1.6394857168197632, "logits/rejected": -1.1434743404388428, "logps/chosen": -560.7457885742188, "logps/rejected": -1270.900390625, "loss": 0.0668, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18648633360862732, "rewards/margins": 0.3022904694080353, "rewards/rejected": -0.4887767732143402, "step": 3320 }, { "epoch": 0.89, "learning_rate": 1.8863491596921745e-07, "logits/chosen": -1.4652307033538818, "logits/rejected": -0.8056305646896362, "logps/chosen": -588.2001342773438, "logps/rejected": -1267.324951171875, "loss": 0.0493, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17455193400382996, "rewards/margins": 0.3139300048351288, "rewards/rejected": -0.48848190903663635, "step": 3330 }, { "epoch": 0.89, "learning_rate": 1.798672690923828e-07, "logits/chosen": -1.417265772819519, "logits/rejected": -0.8639974594116211, "logps/chosen": -566.65234375, "logps/rejected": -1115.6005859375, "loss": 0.0645, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1619497835636139, "rewards/margins": 0.26331502199172974, "rewards/rejected": -0.42526477575302124, "step": 3340 }, { "epoch": 0.89, "learning_rate": 1.713006526846439e-07, "logits/chosen": -1.53346848487854, "logits/rejected": -0.9233430027961731, "logps/chosen": -593.0252075195312, "logps/rejected": -1353.1142578125, "loss": 0.0667, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17712654173374176, "rewards/margins": 0.3622104525566101, "rewards/rejected": -0.5393369793891907, "step": 3350 }, { "epoch": 0.9, "learning_rate": 1.629358090099639e-07, "logits/chosen": -1.4553884267807007, "logits/rejected": -0.867672324180603, "logps/chosen": -564.8086547851562, "logps/rejected": -1151.0306396484375, "loss": 0.0901, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18404200673103333, "rewards/margins": 0.25011754035949707, "rewards/rejected": -0.4341595768928528, "step": 3360 }, { "epoch": 0.9, "learning_rate": 1.5477346284948292e-07, "logits/chosen": -1.5238468647003174, "logits/rejected": -1.0101631879806519, "logps/chosen": -620.8792724609375, "logps/rejected": -1464.6549072265625, "loss": 0.0397, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1736445277929306, "rewards/margins": 0.37397870421409607, "rewards/rejected": -0.5476232171058655, "step": 3370 }, { "epoch": 0.9, "learning_rate": 1.4681432143872133e-07, "logits/chosen": -1.4745080471038818, "logits/rejected": -1.0695136785507202, "logps/chosen": -757.6289672851562, "logps/rejected": -1409.22802734375, "loss": 0.0673, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22448639571666718, "rewards/margins": 0.2988077998161316, "rewards/rejected": -0.5232942700386047, "step": 3380 }, { "epoch": 0.9, "learning_rate": 1.3905907440629752e-07, "logits/chosen": -1.6097753047943115, "logits/rejected": -1.02309250831604, "logps/chosen": -702.1261596679688, "logps/rejected": -1313.4244384765625, "loss": 0.0943, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2056199014186859, "rewards/margins": 0.2749633491039276, "rewards/rejected": -0.4805833399295807, "step": 3390 }, { "epoch": 0.91, "learning_rate": 1.31508393714177e-07, "logits/chosen": -1.528530478477478, "logits/rejected": -1.0698211193084717, "logps/chosen": -605.3930053710938, "logps/rejected": -1313.823486328125, "loss": 0.0457, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1993333399295807, "rewards/margins": 0.3147328794002533, "rewards/rejected": -0.514066219329834, "step": 3400 }, { "epoch": 0.91, "learning_rate": 1.241629335994471e-07, "logits/chosen": -1.638108253479004, "logits/rejected": -0.8232443928718567, "logps/chosen": -782.85986328125, "logps/rejected": -1360.0474853515625, "loss": 0.0739, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2265842854976654, "rewards/margins": 0.2933647036552429, "rewards/rejected": -0.5199490785598755, "step": 3410 }, { "epoch": 0.91, "learning_rate": 1.1702333051763271e-07, "logits/chosen": -1.5153647661209106, "logits/rejected": -0.7482441663742065, "logps/chosen": -806.5430908203125, "logps/rejected": -1379.1104736328125, "loss": 0.1037, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21187452971935272, "rewards/margins": 0.2870264947414398, "rewards/rejected": -0.4989010691642761, "step": 3420 }, { "epoch": 0.91, "learning_rate": 1.1009020308754587e-07, "logits/chosen": -1.422628402709961, "logits/rejected": -1.1864253282546997, "logps/chosen": -631.9682006835938, "logps/rejected": -1306.8543701171875, "loss": 0.1056, "rewards/accuracies": 0.75, "rewards/chosen": -0.20772810280323029, "rewards/margins": 0.2739812731742859, "rewards/rejected": -0.48170939087867737, "step": 3430 }, { "epoch": 0.92, "learning_rate": 1.0336415203768962e-07, "logits/chosen": -1.5100951194763184, "logits/rejected": -0.9938896894454956, "logps/chosen": -725.32373046875, "logps/rejected": -1294.2386474609375, "loss": 0.0828, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20762935280799866, "rewards/margins": 0.26330476999282837, "rewards/rejected": -0.47093409299850464, "step": 3440 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.4899585247039795, "logits/rejected": -0.9078477025032043, "logps/chosen": -672.1417846679688, "logps/rejected": -1225.94140625, "loss": 0.0873, "rewards/accuracies": 0.75, "rewards/chosen": -0.17601068317890167, "rewards/margins": 0.2746294140815735, "rewards/rejected": -0.4506400525569916, "step": 3450 }, { "epoch": 0.92, "learning_rate": 9.053559223036746e-08, "logits/chosen": -1.6049926280975342, "logits/rejected": -0.8928203582763672, "logps/chosen": -690.5973510742188, "logps/rejected": -1238.628173828125, "loss": 0.0812, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20545156300067902, "rewards/margins": 0.27740758657455444, "rewards/rejected": -0.48285919427871704, "step": 3460 }, { "epoch": 0.93, "learning_rate": 8.44341950176683e-08, "logits/chosen": -1.3215292692184448, "logits/rejected": -0.9712142944335938, "logps/chosen": -696.58349609375, "logps/rejected": -1317.867431640625, "loss": 0.0799, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18639104068279266, "rewards/margins": 0.28709009289741516, "rewards/rejected": -0.47348111867904663, "step": 3470 }, { "epoch": 0.93, "learning_rate": 7.854209717842231e-08, "logits/chosen": -1.5110366344451904, "logits/rejected": -0.9558350443840027, "logps/chosen": -647.8221435546875, "logps/rejected": -1398.7818603515625, "loss": 0.0389, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1730073243379593, "rewards/margins": 0.34914129972457886, "rewards/rejected": -0.522148609161377, "step": 3480 }, { "epoch": 0.93, "learning_rate": 7.285980923996989e-08, "logits/chosen": -1.4418294429779053, "logits/rejected": -0.9980441331863403, "logps/chosen": -586.1068115234375, "logps/rejected": -1378.889892578125, "loss": 0.0612, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17401185631752014, "rewards/margins": 0.3377479314804077, "rewards/rejected": -0.5117597579956055, "step": 3490 }, { "epoch": 0.93, "learning_rate": 6.738782355044048e-08, "logits/chosen": -1.775757074356079, "logits/rejected": -1.016351342201233, "logps/chosen": -715.5010986328125, "logps/rejected": -1282.7650146484375, "loss": 0.0603, "rewards/accuracies": 0.875, "rewards/chosen": -0.18584254384040833, "rewards/margins": 0.28814181685447693, "rewards/rejected": -0.47398439049720764, "step": 3500 }, { "epoch": 0.94, "learning_rate": 6.212661423609184e-08, "logits/chosen": -1.5122634172439575, "logits/rejected": -0.9780920743942261, "logps/chosen": -668.7765502929688, "logps/rejected": -1279.484619140625, "loss": 0.0737, "rewards/accuracies": 0.875, "rewards/chosen": -0.18126599490642548, "rewards/margins": 0.28931209444999695, "rewards/rejected": -0.47057804465293884, "step": 3510 }, { "epoch": 0.94, "learning_rate": 5.707663716023021e-08, "logits/chosen": -1.7019774913787842, "logits/rejected": -0.9764993786811829, "logps/chosen": -598.6218872070312, "logps/rejected": -1202.2509765625, "loss": 0.0736, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16811831295490265, "rewards/margins": 0.292553186416626, "rewards/rejected": -0.4606715142726898, "step": 3520 }, { "epoch": 0.94, "learning_rate": 5.22383298837098e-08, "logits/chosen": -1.589091420173645, "logits/rejected": -1.0338377952575684, "logps/chosen": -595.810302734375, "logps/rejected": -1203.7371826171875, "loss": 0.08, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1645013988018036, "rewards/margins": 0.29109999537467957, "rewards/rejected": -0.45560139417648315, "step": 3530 }, { "epoch": 0.94, "learning_rate": 4.761211162702117e-08, "logits/chosen": -1.7127647399902344, "logits/rejected": -0.8404116630554199, "logps/chosen": -608.0611572265625, "logps/rejected": -1101.7435302734375, "loss": 0.0955, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16957123577594757, "rewards/margins": 0.2522638440132141, "rewards/rejected": -0.4218350946903229, "step": 3540 }, { "epoch": 0.95, "learning_rate": 4.319838323396691e-08, "logits/chosen": -1.3308387994766235, "logits/rejected": -0.711550772190094, "logps/chosen": -609.5896606445312, "logps/rejected": -1312.685302734375, "loss": 0.0725, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17916107177734375, "rewards/margins": 0.2821735739707947, "rewards/rejected": -0.4613346457481384, "step": 3550 }, { "epoch": 0.95, "learning_rate": 3.8997527136930004e-08, "logits/chosen": -1.469405174255371, "logits/rejected": -0.9255521893501282, "logps/chosen": -650.4967041015625, "logps/rejected": -1233.0570068359375, "loss": 0.0673, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19022206962108612, "rewards/margins": 0.2924087345600128, "rewards/rejected": -0.48263078927993774, "step": 3560 }, { "epoch": 0.95, "learning_rate": 3.5009907323737826e-08, "logits/chosen": -1.5496537685394287, "logits/rejected": -1.0329219102859497, "logps/chosen": -653.6101684570312, "logps/rejected": -1362.90478515625, "loss": 0.0495, "rewards/accuracies": 0.875, "rewards/chosen": -0.19138206541538239, "rewards/margins": 0.3122704029083252, "rewards/rejected": -0.5036525130271912, "step": 3570 }, { "epoch": 0.95, "learning_rate": 3.1235869306123766e-08, "logits/chosen": -1.4933403730392456, "logits/rejected": -0.8053463101387024, "logps/chosen": -725.1935424804688, "logps/rejected": -1382.274169921875, "loss": 0.0651, "rewards/accuracies": 0.875, "rewards/chosen": -0.21708261966705322, "rewards/margins": 0.3156852126121521, "rewards/rejected": -0.5327678322792053, "step": 3580 }, { "epoch": 0.96, "learning_rate": 2.767574008979007e-08, "logits/chosen": -1.3949609994888306, "logits/rejected": -0.9586105346679688, "logps/chosen": -533.8550415039062, "logps/rejected": -1206.543212890625, "loss": 0.0737, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15743830800056458, "rewards/margins": 0.2968447208404541, "rewards/rejected": -0.4542829990386963, "step": 3590 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.6258302927017212, "logits/rejected": -1.0097240209579468, "logps/chosen": -639.5706176757812, "logps/rejected": -1331.8486328125, "loss": 0.0453, "rewards/accuracies": 0.875, "rewards/chosen": -0.17417296767234802, "rewards/margins": 0.3265232443809509, "rewards/rejected": -0.5006962418556213, "step": 3600 }, { "epoch": 0.96, "learning_rate": 2.1198423385220822e-08, "logits/chosen": -1.5256226062774658, "logits/rejected": -0.8268339037895203, "logps/chosen": -662.9381103515625, "logps/rejected": -1204.4327392578125, "loss": 0.0715, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19520241022109985, "rewards/margins": 0.27870461344718933, "rewards/rejected": -0.47390708327293396, "step": 3610 }, { "epoch": 0.97, "learning_rate": 1.82817971312621e-08, "logits/chosen": -1.7607667446136475, "logits/rejected": -1.1123876571655273, "logps/chosen": -596.0337524414062, "logps/rejected": -1304.044921875, "loss": 0.0639, "rewards/accuracies": 0.875, "rewards/chosen": -0.14941272139549255, "rewards/margins": 0.33007779717445374, "rewards/rejected": -0.47949057817459106, "step": 3620 }, { "epoch": 0.97, "learning_rate": 1.5580202098509078e-08, "logits/chosen": -1.6048189401626587, "logits/rejected": -0.984302818775177, "logps/chosen": -657.6131591796875, "logps/rejected": -1466.190185546875, "loss": 0.0442, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18197950720787048, "rewards/margins": 0.34665971994400024, "rewards/rejected": -0.5286391973495483, "step": 3630 }, { "epoch": 0.97, "learning_rate": 1.3093872369654148e-08, "logits/chosen": -1.5646374225616455, "logits/rejected": -0.8982056379318237, "logps/chosen": -580.00146484375, "logps/rejected": -1101.258544921875, "loss": 0.1123, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1632545292377472, "rewards/margins": 0.24513199925422668, "rewards/rejected": -0.4083865284919739, "step": 3640 }, { "epoch": 0.97, "learning_rate": 1.0823023375489128e-08, "logits/chosen": -1.5190411806106567, "logits/rejected": -1.027479887008667, "logps/chosen": -595.8035888671875, "logps/rejected": -1325.04443359375, "loss": 0.0629, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17017218470573425, "rewards/margins": 0.305239737033844, "rewards/rejected": -0.47541195154190063, "step": 3650 }, { "epoch": 0.98, "learning_rate": 8.767851876239075e-09, "logits/chosen": -1.6967909336090088, "logits/rejected": -0.9378561973571777, "logps/chosen": -561.2920532226562, "logps/rejected": -1203.197021484375, "loss": 0.0622, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14759351313114166, "rewards/margins": 0.31226032972335815, "rewards/rejected": -0.4598538279533386, "step": 3660 }, { "epoch": 0.98, "learning_rate": 6.9285359445145366e-09, "logits/chosen": -1.6613948345184326, "logits/rejected": -1.1440684795379639, "logps/chosen": -600.6172485351562, "logps/rejected": -1154.182861328125, "loss": 0.0852, "rewards/accuracies": 0.75, "rewards/chosen": -0.17384423315525055, "rewards/margins": 0.26200392842292786, "rewards/rejected": -0.4358481466770172, "step": 3670 }, { "epoch": 0.98, "learning_rate": 5.305234949880001e-09, "logits/chosen": -1.5972778797149658, "logits/rejected": -0.801485538482666, "logps/chosen": -688.7052001953125, "logps/rejected": -1276.092041015625, "loss": 0.0613, "rewards/accuracies": 0.875, "rewards/chosen": -0.18506471812725067, "rewards/margins": 0.29814431071281433, "rewards/rejected": -0.4832090437412262, "step": 3680 }, { "epoch": 0.98, "learning_rate": 3.8980895450474455e-09, "logits/chosen": -1.405221700668335, "logits/rejected": -0.8206748962402344, "logps/chosen": -693.5679931640625, "logps/rejected": -1329.2283935546875, "loss": 0.0479, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19921013712882996, "rewards/margins": 0.30057448148727417, "rewards/rejected": -0.4997846186161041, "step": 3690 }, { "epoch": 0.99, "learning_rate": 2.7072216536885855e-09, "logits/chosen": -1.4513555765151978, "logits/rejected": -0.7634484767913818, "logps/chosen": -613.1361083984375, "logps/rejected": -1215.9375, "loss": 0.0642, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18391281366348267, "rewards/margins": 0.2894715964794159, "rewards/rejected": -0.47338438034057617, "step": 3700 }, { "epoch": 0.99, "learning_rate": 1.7327344598702667e-09, "logits/chosen": -1.5481555461883545, "logits/rejected": -0.75025874376297, "logps/chosen": -655.292236328125, "logps/rejected": -1392.68115234375, "loss": 0.0376, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18728521466255188, "rewards/margins": 0.3374475836753845, "rewards/rejected": -0.524732768535614, "step": 3710 }, { "epoch": 0.99, "learning_rate": 9.747123991141193e-10, "logits/chosen": -1.4917545318603516, "logits/rejected": -1.0273144245147705, "logps/chosen": -579.1278686523438, "logps/rejected": -1233.640869140625, "loss": 0.0868, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17262960970401764, "rewards/margins": 0.2730409801006317, "rewards/rejected": -0.44567054510116577, "step": 3720 }, { "epoch": 0.99, "learning_rate": 4.332211510807427e-10, "logits/chosen": -1.4177316427230835, "logits/rejected": -0.9999169111251831, "logps/chosen": -677.0986328125, "logps/rejected": -1409.077880859375, "loss": 0.0463, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19204583764076233, "rewards/margins": 0.3291808068752289, "rewards/rejected": -0.5212266445159912, "step": 3730 }, { "epoch": 1.0, "learning_rate": 1.0830763387897902e-10, "logits/chosen": -1.4876052141189575, "logits/rejected": -0.8847878575325012, "logps/chosen": -651.7437744140625, "logps/rejected": -1331.395751953125, "loss": 0.0532, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15324924886226654, "rewards/margins": 0.33165210485458374, "rewards/rejected": -0.4849013388156891, "step": 3740 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.6436065435409546, "logits/rejected": -0.8282138705253601, "logps/chosen": -606.6455688476562, "logps/rejected": -1475.3424072265625, "loss": 0.0392, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18834789097309113, "rewards/margins": 0.36257123947143555, "rewards/rejected": -0.5509191751480103, "step": 3750 }, { "epoch": 1.0, "step": 3750, "total_flos": 0.0, "train_loss": 0.07734704875151316, "train_runtime": 15655.3296, "train_samples_per_second": 0.958, "train_steps_per_second": 0.24 } ], "logging_steps": 10, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }