zephyr-7b-gpo-v3-i2 / trainer_state.json
lole25's picture
Model save
ad7b878 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 3750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.3333333333333334e-08,
"logits/chosen": -1.381319522857666,
"logits/rejected": -0.9757366180419922,
"logps/chosen": -223.25863647460938,
"logps/rejected": -830.5400390625,
"loss": 0.2593,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 1.3333333333333336e-07,
"logits/chosen": -1.736572504043579,
"logits/rejected": -1.0549728870391846,
"logps/chosen": -406.9079284667969,
"logps/rejected": -761.596435546875,
"loss": 0.1822,
"rewards/accuracies": 0.5555555820465088,
"rewards/chosen": 0.00039627417572773993,
"rewards/margins": 0.000484730233438313,
"rewards/rejected": -8.845605771057308e-05,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 2.666666666666667e-07,
"logits/chosen": -1.6399459838867188,
"logits/rejected": -1.0379071235656738,
"logps/chosen": -483.6226501464844,
"logps/rejected": -819.0009765625,
"loss": 0.1801,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 3.848170308629051e-05,
"rewards/margins": 0.00036858199746347964,
"rewards/rejected": -0.00033010030165314674,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 4.0000000000000003e-07,
"logits/chosen": -1.7753417491912842,
"logits/rejected": -1.3355859518051147,
"logps/chosen": -443.94390869140625,
"logps/rejected": -788.3363647460938,
"loss": 0.2323,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.0017691084649413824,
"rewards/margins": 0.0024432786740362644,
"rewards/rejected": -0.0006741699180565774,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 5.333333333333335e-07,
"logits/chosen": -1.5635123252868652,
"logits/rejected": -0.9124569892883301,
"logps/chosen": -458.33428955078125,
"logps/rejected": -747.6420288085938,
"loss": 0.2195,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.004033350385725498,
"rewards/margins": 0.006722611375153065,
"rewards/rejected": -0.0026892595924437046,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 6.666666666666667e-07,
"logits/chosen": -1.631588339805603,
"logits/rejected": -0.8681947588920593,
"logps/chosen": -465.05731201171875,
"logps/rejected": -838.4075927734375,
"loss": 0.2014,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.008273603394627571,
"rewards/margins": 0.015597726218402386,
"rewards/rejected": -0.0073241242207586765,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 8.000000000000001e-07,
"logits/chosen": -1.4628058671951294,
"logits/rejected": -1.2347371578216553,
"logps/chosen": -343.9599304199219,
"logps/rejected": -739.0056762695312,
"loss": 0.1761,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.0036266068927943707,
"rewards/margins": 0.022512439638376236,
"rewards/rejected": -0.01888582855463028,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 9.333333333333334e-07,
"logits/chosen": -1.8094412088394165,
"logits/rejected": -0.9877569079399109,
"logps/chosen": -497.0489807128906,
"logps/rejected": -864.1619262695312,
"loss": 0.1809,
"rewards/accuracies": 0.875,
"rewards/chosen": 0.003260440658777952,
"rewards/margins": 0.046338800340890884,
"rewards/rejected": -0.04307835176587105,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 1.066666666666667e-06,
"logits/chosen": -1.6897491216659546,
"logits/rejected": -1.0848586559295654,
"logps/chosen": -560.68017578125,
"logps/rejected": -1089.6458740234375,
"loss": 0.1443,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.006865059025585651,
"rewards/margins": 0.08565281331539154,
"rewards/rejected": -0.09251787513494492,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 1.2000000000000002e-06,
"logits/chosen": -1.7690013647079468,
"logits/rejected": -0.9375957250595093,
"logps/chosen": -427.4967346191406,
"logps/rejected": -953.2610473632812,
"loss": 0.1582,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.010322836227715015,
"rewards/margins": 0.07507754862308502,
"rewards/rejected": -0.08540038764476776,
"step": 90
},
{
"epoch": 0.03,
"learning_rate": 1.3333333333333334e-06,
"logits/chosen": -1.4934628009796143,
"logits/rejected": -0.9881563186645508,
"logps/chosen": -397.26727294921875,
"logps/rejected": -905.0123901367188,
"loss": 0.1339,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.01196499913930893,
"rewards/margins": 0.11481380462646484,
"rewards/rejected": -0.12677881121635437,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 1.4666666666666669e-06,
"logits/chosen": -1.559560775756836,
"logits/rejected": -0.9702051877975464,
"logps/chosen": -446.76849365234375,
"logps/rejected": -964.1668090820312,
"loss": 0.1009,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.029861677438020706,
"rewards/margins": 0.14626939594745636,
"rewards/rejected": -0.17613105475902557,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 1.6000000000000001e-06,
"logits/chosen": -1.7105035781860352,
"logits/rejected": -0.9925721287727356,
"logps/chosen": -542.6316528320312,
"logps/rejected": -977.3997192382812,
"loss": 0.1034,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.06414582580327988,
"rewards/margins": 0.1390438973903656,
"rewards/rejected": -0.2031897008419037,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 1.7333333333333336e-06,
"logits/chosen": -1.7129449844360352,
"logits/rejected": -0.9808734655380249,
"logps/chosen": -639.7268676757812,
"logps/rejected": -1264.408203125,
"loss": 0.0778,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1564100980758667,
"rewards/margins": 0.22008244693279266,
"rewards/rejected": -0.37649255990982056,
"step": 130
},
{
"epoch": 0.04,
"learning_rate": 1.8666666666666669e-06,
"logits/chosen": -1.4957599639892578,
"logits/rejected": -0.9900957345962524,
"logps/chosen": -606.5774536132812,
"logps/rejected": -1158.996826171875,
"loss": 0.1186,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.1333167403936386,
"rewards/margins": 0.18605293333530426,
"rewards/rejected": -0.31936967372894287,
"step": 140
},
{
"epoch": 0.04,
"learning_rate": 2.0000000000000003e-06,
"logits/chosen": -1.7749484777450562,
"logits/rejected": -1.1498210430145264,
"logps/chosen": -588.8472900390625,
"logps/rejected": -1247.8353271484375,
"loss": 0.0621,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.15223875641822815,
"rewards/margins": 0.2504242956638336,
"rewards/rejected": -0.40266305208206177,
"step": 150
},
{
"epoch": 0.04,
"learning_rate": 2.133333333333334e-06,
"logits/chosen": -1.4668447971343994,
"logits/rejected": -0.9629266858100891,
"logps/chosen": -740.5608520507812,
"logps/rejected": -1320.8753662109375,
"loss": 0.074,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.22708892822265625,
"rewards/margins": 0.23111894726753235,
"rewards/rejected": -0.458207905292511,
"step": 160
},
{
"epoch": 0.05,
"learning_rate": 2.266666666666667e-06,
"logits/chosen": -1.5278871059417725,
"logits/rejected": -1.1116211414337158,
"logps/chosen": -571.50390625,
"logps/rejected": -1168.722412109375,
"loss": 0.1131,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.1509171426296234,
"rewards/margins": 0.27128082513809204,
"rewards/rejected": -0.4221979081630707,
"step": 170
},
{
"epoch": 0.05,
"learning_rate": 2.4000000000000003e-06,
"logits/chosen": -1.6129930019378662,
"logits/rejected": -1.0707186460494995,
"logps/chosen": -591.6637573242188,
"logps/rejected": -1284.7354736328125,
"loss": 0.0784,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.12449514865875244,
"rewards/margins": 0.2544993758201599,
"rewards/rejected": -0.37899452447891235,
"step": 180
},
{
"epoch": 0.05,
"learning_rate": 2.5333333333333338e-06,
"logits/chosen": -1.6850178241729736,
"logits/rejected": -1.1943457126617432,
"logps/chosen": -514.7299194335938,
"logps/rejected": -1000.5671997070312,
"loss": 0.1249,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.09646569192409515,
"rewards/margins": 0.17868806421756744,
"rewards/rejected": -0.2751538157463074,
"step": 190
},
{
"epoch": 0.05,
"learning_rate": 2.666666666666667e-06,
"logits/chosen": -1.5830456018447876,
"logits/rejected": -1.097068428993225,
"logps/chosen": -658.3897705078125,
"logps/rejected": -1211.879150390625,
"loss": 0.0931,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16921699047088623,
"rewards/margins": 0.20347478985786438,
"rewards/rejected": -0.3726917505264282,
"step": 200
},
{
"epoch": 0.06,
"learning_rate": 2.8000000000000003e-06,
"logits/chosen": -1.765363097190857,
"logits/rejected": -0.8959721326828003,
"logps/chosen": -716.1063842773438,
"logps/rejected": -1217.0675048828125,
"loss": 0.1018,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.21076449751853943,
"rewards/margins": 0.23035843670368195,
"rewards/rejected": -0.4411229193210602,
"step": 210
},
{
"epoch": 0.06,
"learning_rate": 2.9333333333333338e-06,
"logits/chosen": -1.4971026182174683,
"logits/rejected": -1.0308849811553955,
"logps/chosen": -635.874267578125,
"logps/rejected": -1254.032470703125,
"loss": 0.0944,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1963960826396942,
"rewards/margins": 0.2612842321395874,
"rewards/rejected": -0.4576803147792816,
"step": 220
},
{
"epoch": 0.06,
"learning_rate": 3.066666666666667e-06,
"logits/chosen": -1.3114674091339111,
"logits/rejected": -1.1226143836975098,
"logps/chosen": -669.163818359375,
"logps/rejected": -1447.716064453125,
"loss": 0.083,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2657012939453125,
"rewards/margins": 0.3253920078277588,
"rewards/rejected": -0.5910933613777161,
"step": 230
},
{
"epoch": 0.06,
"learning_rate": 3.2000000000000003e-06,
"logits/chosen": -1.5960338115692139,
"logits/rejected": -0.8448678255081177,
"logps/chosen": -783.5925903320312,
"logps/rejected": -1367.287841796875,
"loss": 0.0798,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2770916819572449,
"rewards/margins": 0.2540797293186188,
"rewards/rejected": -0.531171441078186,
"step": 240
},
{
"epoch": 0.07,
"learning_rate": 3.3333333333333333e-06,
"logits/chosen": -1.6775119304656982,
"logits/rejected": -1.2753263711929321,
"logps/chosen": -694.5948486328125,
"logps/rejected": -1378.860107421875,
"loss": 0.0801,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2328498661518097,
"rewards/margins": 0.26127415895462036,
"rewards/rejected": -0.49412399530410767,
"step": 250
},
{
"epoch": 0.07,
"learning_rate": 3.4666666666666672e-06,
"logits/chosen": -1.4416381120681763,
"logits/rejected": -0.9755349159240723,
"logps/chosen": -681.4529418945312,
"logps/rejected": -1261.5875244140625,
"loss": 0.1169,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.18144458532333374,
"rewards/margins": 0.2220906764268875,
"rewards/rejected": -0.40353527665138245,
"step": 260
},
{
"epoch": 0.07,
"learning_rate": 3.6000000000000003e-06,
"logits/chosen": -1.776125192642212,
"logits/rejected": -1.1443500518798828,
"logps/chosen": -665.0440673828125,
"logps/rejected": -1173.3468017578125,
"loss": 0.1028,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19433431327342987,
"rewards/margins": 0.21727688610553741,
"rewards/rejected": -0.4116111695766449,
"step": 270
},
{
"epoch": 0.07,
"learning_rate": 3.7333333333333337e-06,
"logits/chosen": -1.6408843994140625,
"logits/rejected": -1.2362545728683472,
"logps/chosen": -652.2579956054688,
"logps/rejected": -1271.698974609375,
"loss": 0.0917,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1795671433210373,
"rewards/margins": 0.24397559463977814,
"rewards/rejected": -0.4235427975654602,
"step": 280
},
{
"epoch": 0.08,
"learning_rate": 3.866666666666667e-06,
"logits/chosen": -1.8528718948364258,
"logits/rejected": -1.1004583835601807,
"logps/chosen": -762.6512451171875,
"logps/rejected": -1343.5460205078125,
"loss": 0.0868,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.226405531167984,
"rewards/margins": 0.2510288953781128,
"rewards/rejected": -0.4774344861507416,
"step": 290
},
{
"epoch": 0.08,
"learning_rate": 4.000000000000001e-06,
"logits/chosen": -1.7013801336288452,
"logits/rejected": -1.2125957012176514,
"logps/chosen": -613.29345703125,
"logps/rejected": -1406.8970947265625,
"loss": 0.0761,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.17005790770053864,
"rewards/margins": 0.32780343294143677,
"rewards/rejected": -0.4978613257408142,
"step": 300
},
{
"epoch": 0.08,
"learning_rate": 4.133333333333333e-06,
"logits/chosen": -1.415290117263794,
"logits/rejected": -0.9908515810966492,
"logps/chosen": -712.7332763671875,
"logps/rejected": -1258.039306640625,
"loss": 0.1258,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.25694146752357483,
"rewards/margins": 0.21659043431282043,
"rewards/rejected": -0.4735318720340729,
"step": 310
},
{
"epoch": 0.09,
"learning_rate": 4.266666666666668e-06,
"logits/chosen": -1.5041309595108032,
"logits/rejected": -1.0038108825683594,
"logps/chosen": -661.9385986328125,
"logps/rejected": -1160.186767578125,
"loss": 0.0992,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.21888642013072968,
"rewards/margins": 0.21484248340129852,
"rewards/rejected": -0.4337288737297058,
"step": 320
},
{
"epoch": 0.09,
"learning_rate": 4.4e-06,
"logits/chosen": -1.6438214778900146,
"logits/rejected": -1.0989625453948975,
"logps/chosen": -537.155517578125,
"logps/rejected": -1078.1251220703125,
"loss": 0.0751,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.11467760801315308,
"rewards/margins": 0.25429314374923706,
"rewards/rejected": -0.36897072196006775,
"step": 330
},
{
"epoch": 0.09,
"learning_rate": 4.533333333333334e-06,
"logits/chosen": -1.7438217401504517,
"logits/rejected": -1.0444936752319336,
"logps/chosen": -644.1298828125,
"logps/rejected": -1348.093505859375,
"loss": 0.0711,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.19465377926826477,
"rewards/margins": 0.32639193534851074,
"rewards/rejected": -0.5210457444190979,
"step": 340
},
{
"epoch": 0.09,
"learning_rate": 4.666666666666667e-06,
"logits/chosen": -1.7205537557601929,
"logits/rejected": -1.1176466941833496,
"logps/chosen": -653.7063598632812,
"logps/rejected": -1317.8218994140625,
"loss": 0.0812,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15995605289936066,
"rewards/margins": 0.2710942327976227,
"rewards/rejected": -0.43105024099349976,
"step": 350
},
{
"epoch": 0.1,
"learning_rate": 4.800000000000001e-06,
"logits/chosen": -1.8885447978973389,
"logits/rejected": -1.4283367395401,
"logps/chosen": -435.2276916503906,
"logps/rejected": -1041.008056640625,
"loss": 0.0918,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.05559268593788147,
"rewards/margins": 0.2262849360704422,
"rewards/rejected": -0.2818776071071625,
"step": 360
},
{
"epoch": 0.1,
"learning_rate": 4.933333333333334e-06,
"logits/chosen": -1.7745654582977295,
"logits/rejected": -1.2009865045547485,
"logps/chosen": -606.5958251953125,
"logps/rejected": -1145.1015625,
"loss": 0.0968,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.11594484001398087,
"rewards/margins": 0.24505428969860077,
"rewards/rejected": -0.36099910736083984,
"step": 370
},
{
"epoch": 0.1,
"learning_rate": 4.999972922944898e-06,
"logits/chosen": -1.6557433605194092,
"logits/rejected": -1.1534380912780762,
"logps/chosen": -643.7879028320312,
"logps/rejected": -1236.194091796875,
"loss": 0.091,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16532504558563232,
"rewards/margins": 0.24116845428943634,
"rewards/rejected": -0.40649348497390747,
"step": 380
},
{
"epoch": 0.1,
"learning_rate": 4.999756310023261e-06,
"logits/chosen": -1.6974895000457764,
"logits/rejected": -1.2435563802719116,
"logps/chosen": -619.0020751953125,
"logps/rejected": -1253.426513671875,
"loss": 0.0496,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.11514924466609955,
"rewards/margins": 0.26920756697654724,
"rewards/rejected": -0.3843567967414856,
"step": 390
},
{
"epoch": 0.11,
"learning_rate": 4.999323102948655e-06,
"logits/chosen": -1.6725631952285767,
"logits/rejected": -0.9952858686447144,
"logps/chosen": -683.99072265625,
"logps/rejected": -1263.6319580078125,
"loss": 0.0985,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.22003349661827087,
"rewards/margins": 0.21788537502288818,
"rewards/rejected": -0.4379189610481262,
"step": 400
},
{
"epoch": 0.11,
"learning_rate": 4.998673339256785e-06,
"logits/chosen": -1.6918662786483765,
"logits/rejected": -0.9807602167129517,
"logps/chosen": -646.4641723632812,
"logps/rejected": -1105.5599365234375,
"loss": 0.1276,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.13202176988124847,
"rewards/margins": 0.22680577635765076,
"rewards/rejected": -0.35882753133773804,
"step": 410
},
{
"epoch": 0.11,
"learning_rate": 4.997807075247147e-06,
"logits/chosen": -1.4633402824401855,
"logits/rejected": -0.8964066505432129,
"logps/chosen": -623.4297485351562,
"logps/rejected": -1171.818359375,
"loss": 0.0798,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.17750394344329834,
"rewards/margins": 0.23941746354103088,
"rewards/rejected": -0.4169214367866516,
"step": 420
},
{
"epoch": 0.11,
"learning_rate": 4.996724385978142e-06,
"logits/chosen": -1.7313741445541382,
"logits/rejected": -1.088205099105835,
"logps/chosen": -618.5508422851562,
"logps/rejected": -1335.5269775390625,
"loss": 0.0565,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.15107488632202148,
"rewards/margins": 0.30174392461776733,
"rewards/rejected": -0.4528188109397888,
"step": 430
},
{
"epoch": 0.12,
"learning_rate": 4.995425365260585e-06,
"logits/chosen": -1.6465423107147217,
"logits/rejected": -1.1094882488250732,
"logps/chosen": -621.9539794921875,
"logps/rejected": -1221.4598388671875,
"loss": 0.0909,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15349408984184265,
"rewards/margins": 0.25984710454940796,
"rewards/rejected": -0.4133411943912506,
"step": 440
},
{
"epoch": 0.12,
"learning_rate": 4.993910125649561e-06,
"logits/chosen": -1.8044379949569702,
"logits/rejected": -1.1311860084533691,
"logps/chosen": -721.5858764648438,
"logps/rejected": -1256.863037109375,
"loss": 0.0915,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.21946442127227783,
"rewards/margins": 0.22845225036144257,
"rewards/rejected": -0.4479166567325592,
"step": 450
},
{
"epoch": 0.12,
"learning_rate": 4.992178798434684e-06,
"logits/chosen": -1.76088547706604,
"logits/rejected": -1.2385786771774292,
"logps/chosen": -657.9778442382812,
"logps/rejected": -1414.336669921875,
"loss": 0.0575,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.16792455315589905,
"rewards/margins": 0.3237282633781433,
"rewards/rejected": -0.49165281653404236,
"step": 460
},
{
"epoch": 0.13,
"learning_rate": 4.990231533628719e-06,
"logits/chosen": -1.5809530019760132,
"logits/rejected": -1.1684823036193848,
"logps/chosen": -623.2067260742188,
"logps/rejected": -1329.0098876953125,
"loss": 0.067,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.15883824229240417,
"rewards/margins": 0.3003080189228058,
"rewards/rejected": -0.45914632081985474,
"step": 470
},
{
"epoch": 0.13,
"learning_rate": 4.988068499954578e-06,
"logits/chosen": -1.5603920221328735,
"logits/rejected": -1.0103719234466553,
"logps/chosen": -745.253173828125,
"logps/rejected": -1386.312744140625,
"loss": 0.0639,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2308214157819748,
"rewards/margins": 0.3027498126029968,
"rewards/rejected": -0.5335712432861328,
"step": 480
},
{
"epoch": 0.13,
"learning_rate": 4.985689884830711e-06,
"logits/chosen": -1.7204630374908447,
"logits/rejected": -1.0981186628341675,
"logps/chosen": -663.6007080078125,
"logps/rejected": -1271.954833984375,
"loss": 0.0546,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.23924314975738525,
"rewards/margins": 0.3018389344215393,
"rewards/rejected": -0.5410820841789246,
"step": 490
},
{
"epoch": 0.13,
"learning_rate": 4.983095894354858e-06,
"logits/chosen": -1.6816179752349854,
"logits/rejected": -1.2458436489105225,
"logps/chosen": -812.2794189453125,
"logps/rejected": -1452.4508056640625,
"loss": 0.0876,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.3606022000312805,
"rewards/margins": 0.2518552541732788,
"rewards/rejected": -0.6124575138092041,
"step": 500
},
{
"epoch": 0.14,
"learning_rate": 4.980286753286196e-06,
"logits/chosen": -1.675920844078064,
"logits/rejected": -1.2157505750656128,
"logps/chosen": -622.9227294921875,
"logps/rejected": -1268.478271484375,
"loss": 0.0997,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.23038606345653534,
"rewards/margins": 0.280417799949646,
"rewards/rejected": -0.5108038783073425,
"step": 510
},
{
"epoch": 0.14,
"learning_rate": 4.97726270502586e-06,
"logits/chosen": -1.6523020267486572,
"logits/rejected": -1.1194841861724854,
"logps/chosen": -536.9530029296875,
"logps/rejected": -1284.30517578125,
"loss": 0.0562,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15182599425315857,
"rewards/margins": 0.308131605386734,
"rewards/rejected": -0.4599575400352478,
"step": 520
},
{
"epoch": 0.14,
"learning_rate": 4.974024011595864e-06,
"logits/chosen": -1.5846920013427734,
"logits/rejected": -1.2534643411636353,
"logps/chosen": -688.9484252929688,
"logps/rejected": -1183.958984375,
"loss": 0.1024,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.21223464608192444,
"rewards/margins": 0.19630616903305054,
"rewards/rejected": -0.408540815114975,
"step": 530
},
{
"epoch": 0.14,
"learning_rate": 4.970570953616383e-06,
"logits/chosen": -1.7657943964004517,
"logits/rejected": -1.2593281269073486,
"logps/chosen": -591.6409912109375,
"logps/rejected": -1163.0576171875,
"loss": 0.1017,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.17924083769321442,
"rewards/margins": 0.2396513670682907,
"rewards/rejected": -0.4188922345638275,
"step": 540
},
{
"epoch": 0.15,
"learning_rate": 4.966903830281449e-06,
"logits/chosen": -1.7614797353744507,
"logits/rejected": -1.300492286682129,
"logps/chosen": -588.6466064453125,
"logps/rejected": -1244.7601318359375,
"loss": 0.0925,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.16072975099086761,
"rewards/margins": 0.29418593645095825,
"rewards/rejected": -0.4549156725406647,
"step": 550
},
{
"epoch": 0.15,
"learning_rate": 4.9630229593330226e-06,
"logits/chosen": -1.5666195154190063,
"logits/rejected": -0.924557089805603,
"logps/chosen": -734.8566284179688,
"logps/rejected": -1351.8369140625,
"loss": 0.0874,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.236628919839859,
"rewards/margins": 0.24980910122394562,
"rewards/rejected": -0.48643797636032104,
"step": 560
},
{
"epoch": 0.15,
"learning_rate": 4.958928677033465e-06,
"logits/chosen": -1.5746439695358276,
"logits/rejected": -1.0514501333236694,
"logps/chosen": -673.4780883789062,
"logps/rejected": -1367.333740234375,
"loss": 0.0705,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.23988430202007294,
"rewards/margins": 0.26904696226119995,
"rewards/rejected": -0.5089312791824341,
"step": 570
},
{
"epoch": 0.15,
"learning_rate": 4.954621338136399e-06,
"logits/chosen": -1.5549921989440918,
"logits/rejected": -0.825292706489563,
"logps/chosen": -724.3428955078125,
"logps/rejected": -1314.2396240234375,
"loss": 0.1148,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.2795710563659668,
"rewards/margins": 0.24751707911491394,
"rewards/rejected": -0.5270881652832031,
"step": 580
},
{
"epoch": 0.16,
"learning_rate": 4.95010131585597e-06,
"logits/chosen": -1.4858559370040894,
"logits/rejected": -1.164233922958374,
"logps/chosen": -720.483642578125,
"logps/rejected": -1470.8189697265625,
"loss": 0.0644,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.28550735116004944,
"rewards/margins": 0.27395910024642944,
"rewards/rejected": -0.5594664812088013,
"step": 590
},
{
"epoch": 0.16,
"learning_rate": 4.9453690018345144e-06,
"logits/chosen": -1.7503721714019775,
"logits/rejected": -1.0189541578292847,
"logps/chosen": -725.6884765625,
"logps/rejected": -1330.4554443359375,
"loss": 0.0731,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.19357889890670776,
"rewards/margins": 0.26617223024368286,
"rewards/rejected": -0.4597511887550354,
"step": 600
},
{
"epoch": 0.16,
"learning_rate": 4.940424806108619e-06,
"logits/chosen": -1.7605764865875244,
"logits/rejected": -0.9754387140274048,
"logps/chosen": -736.2041625976562,
"logps/rejected": -1220.272216796875,
"loss": 0.0966,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.16418889164924622,
"rewards/margins": 0.2352021038532257,
"rewards/rejected": -0.3993909955024719,
"step": 610
},
{
"epoch": 0.17,
"learning_rate": 4.935269157073597e-06,
"logits/chosen": -1.6696975231170654,
"logits/rejected": -1.1732470989227295,
"logps/chosen": -546.5994262695312,
"logps/rejected": -1076.2138671875,
"loss": 0.1142,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.17854368686676025,
"rewards/margins": 0.22630243003368378,
"rewards/rejected": -0.40484610199928284,
"step": 620
},
{
"epoch": 0.17,
"learning_rate": 4.9299025014463665e-06,
"logits/chosen": -1.6130393743515015,
"logits/rejected": -0.9944950342178345,
"logps/chosen": -607.0363159179688,
"logps/rejected": -1301.4781494140625,
"loss": 0.0704,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.19457684457302094,
"rewards/margins": 0.2787570357322693,
"rewards/rejected": -0.47333383560180664,
"step": 630
},
{
"epoch": 0.17,
"learning_rate": 4.924325304226745e-06,
"logits/chosen": -1.6456743478775024,
"logits/rejected": -1.2997629642486572,
"logps/chosen": -683.3946533203125,
"logps/rejected": -1356.107177734375,
"loss": 0.0818,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.2882430851459503,
"rewards/margins": 0.2655082941055298,
"rewards/rejected": -0.5537513494491577,
"step": 640
},
{
"epoch": 0.17,
"learning_rate": 4.91853804865716e-06,
"logits/chosen": -1.406205654144287,
"logits/rejected": -1.0480941534042358,
"logps/chosen": -837.6101684570312,
"logps/rejected": -1426.9271240234375,
"loss": 0.0759,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.3547836244106293,
"rewards/margins": 0.24995502829551697,
"rewards/rejected": -0.6047386527061462,
"step": 650
},
{
"epoch": 0.18,
"learning_rate": 4.912541236180779e-06,
"logits/chosen": -1.6352602243423462,
"logits/rejected": -1.0264801979064941,
"logps/chosen": -687.4603271484375,
"logps/rejected": -1298.3892822265625,
"loss": 0.0835,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.25935202836990356,
"rewards/margins": 0.28063350915908813,
"rewards/rejected": -0.5399855375289917,
"step": 660
},
{
"epoch": 0.18,
"learning_rate": 4.9063353863980565e-06,
"logits/chosen": -1.652361512184143,
"logits/rejected": -1.3324909210205078,
"logps/chosen": -703.0999755859375,
"logps/rejected": -1310.7120361328125,
"loss": 0.0822,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.2579403519630432,
"rewards/margins": 0.26742976903915405,
"rewards/rejected": -0.525370180606842,
"step": 670
},
{
"epoch": 0.18,
"learning_rate": 4.899921037021719e-06,
"logits/chosen": -1.8630950450897217,
"logits/rejected": -1.117851734161377,
"logps/chosen": -572.1090087890625,
"logps/rejected": -1142.517822265625,
"loss": 0.0669,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.13034145534038544,
"rewards/margins": 0.2702890932559967,
"rewards/rejected": -0.40063056349754333,
"step": 680
},
{
"epoch": 0.18,
"learning_rate": 4.893298743830168e-06,
"logits/chosen": -1.5307347774505615,
"logits/rejected": -1.1395881175994873,
"logps/chosen": -570.9049072265625,
"logps/rejected": -1325.3486328125,
"loss": 0.0776,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1899944543838501,
"rewards/margins": 0.31518790125846863,
"rewards/rejected": -0.5051823854446411,
"step": 690
},
{
"epoch": 0.19,
"learning_rate": 4.88646908061933e-06,
"logits/chosen": -1.6429624557495117,
"logits/rejected": -0.952468991279602,
"logps/chosen": -504.73321533203125,
"logps/rejected": -1041.1776123046875,
"loss": 0.1034,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.08391048014163971,
"rewards/margins": 0.24313923716545105,
"rewards/rejected": -0.3270496726036072,
"step": 700
},
{
"epoch": 0.19,
"learning_rate": 4.879432639152935e-06,
"logits/chosen": -1.8941549062728882,
"logits/rejected": -1.1734158992767334,
"logps/chosen": -530.647216796875,
"logps/rejected": -1243.784912109375,
"loss": 0.0827,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.03668345510959625,
"rewards/margins": 0.30672526359558105,
"rewards/rejected": -0.3434087336063385,
"step": 710
},
{
"epoch": 0.19,
"learning_rate": 4.8721900291112415e-06,
"logits/chosen": -1.6430208683013916,
"logits/rejected": -1.1618727445602417,
"logps/chosen": -664.0516357421875,
"logps/rejected": -1358.1705322265625,
"loss": 0.064,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1712174415588379,
"rewards/margins": 0.27873173356056213,
"rewards/rejected": -0.44994911551475525,
"step": 720
},
{
"epoch": 0.19,
"learning_rate": 4.864741878038218e-06,
"logits/chosen": -1.5911110639572144,
"logits/rejected": -0.9319060444831848,
"logps/chosen": -551.3786010742188,
"logps/rejected": -1062.9332275390625,
"loss": 0.0848,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.13672541081905365,
"rewards/margins": 0.23177051544189453,
"rewards/rejected": -0.368495911359787,
"step": 730
},
{
"epoch": 0.2,
"learning_rate": 4.857088831287158e-06,
"logits/chosen": -1.3883923292160034,
"logits/rejected": -0.8421472311019897,
"logps/chosen": -622.4197387695312,
"logps/rejected": -1147.806884765625,
"loss": 0.0948,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.19367149472236633,
"rewards/margins": 0.24927671253681183,
"rewards/rejected": -0.44294825196266174,
"step": 740
},
{
"epoch": 0.2,
"learning_rate": 4.849231551964771e-06,
"logits/chosen": -1.6589374542236328,
"logits/rejected": -1.0507800579071045,
"logps/chosen": -580.9486083984375,
"logps/rejected": -1298.055908203125,
"loss": 0.078,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1833840161561966,
"rewards/margins": 0.29099351167678833,
"rewards/rejected": -0.4743775427341461,
"step": 750
},
{
"epoch": 0.2,
"learning_rate": 4.841170720873723e-06,
"logits/chosen": -1.9193570613861084,
"logits/rejected": -1.29689359664917,
"logps/chosen": -593.7999267578125,
"logps/rejected": -1074.827880859375,
"loss": 0.1027,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.13971452414989471,
"rewards/margins": 0.2339230477809906,
"rewards/rejected": -0.3736375570297241,
"step": 760
},
{
"epoch": 0.21,
"learning_rate": 4.832907036453647e-06,
"logits/chosen": -1.7203441858291626,
"logits/rejected": -1.0238596200942993,
"logps/chosen": -575.1504516601562,
"logps/rejected": -1223.0185546875,
"loss": 0.0779,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1228102445602417,
"rewards/margins": 0.28532546758651733,
"rewards/rejected": -0.4081357419490814,
"step": 770
},
{
"epoch": 0.21,
"learning_rate": 4.824441214720629e-06,
"logits/chosen": -1.5191177129745483,
"logits/rejected": -1.0029339790344238,
"logps/chosen": -579.9059448242188,
"logps/rejected": -1316.46142578125,
"loss": 0.0519,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.1901233047246933,
"rewards/margins": 0.3058861494064331,
"rewards/rejected": -0.4960094392299652,
"step": 780
},
{
"epoch": 0.21,
"learning_rate": 4.815773989205165e-06,
"logits/chosen": -1.5836925506591797,
"logits/rejected": -0.9203447103500366,
"logps/chosen": -776.8148803710938,
"logps/rejected": -1455.188720703125,
"loss": 0.0819,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.23546621203422546,
"rewards/margins": 0.2814113199710846,
"rewards/rejected": -0.5168775916099548,
"step": 790
},
{
"epoch": 0.21,
"learning_rate": 4.806906110888606e-06,
"logits/chosen": -1.6255607604980469,
"logits/rejected": -1.1398379802703857,
"logps/chosen": -536.3740234375,
"logps/rejected": -1251.212646484375,
"loss": 0.0639,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.11835892498493195,
"rewards/margins": 0.3064490854740143,
"rewards/rejected": -0.4248080849647522,
"step": 800
},
{
"epoch": 0.22,
"learning_rate": 4.7978383481380865e-06,
"logits/chosen": -1.5587496757507324,
"logits/rejected": -1.0958257913589478,
"logps/chosen": -577.5291748046875,
"logps/rejected": -1253.976806640625,
"loss": 0.0724,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15895147621631622,
"rewards/margins": 0.29505571722984314,
"rewards/rejected": -0.45400720834732056,
"step": 810
},
{
"epoch": 0.22,
"learning_rate": 4.788571486639948e-06,
"logits/chosen": -1.5678253173828125,
"logits/rejected": -1.0112650394439697,
"logps/chosen": -582.6275634765625,
"logps/rejected": -1258.8802490234375,
"loss": 0.0726,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.16134771704673767,
"rewards/margins": 0.3210769593715668,
"rewards/rejected": -0.48242464661598206,
"step": 820
},
{
"epoch": 0.22,
"learning_rate": 4.779106329331665e-06,
"logits/chosen": -1.5958459377288818,
"logits/rejected": -1.0422935485839844,
"logps/chosen": -604.3923950195312,
"logps/rejected": -1268.2470703125,
"loss": 0.0656,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.17159458994865417,
"rewards/margins": 0.29530078172683716,
"rewards/rejected": -0.4668954014778137,
"step": 830
},
{
"epoch": 0.22,
"learning_rate": 4.769443696332272e-06,
"logits/chosen": -1.6635258197784424,
"logits/rejected": -1.1757241487503052,
"logps/chosen": -576.9772338867188,
"logps/rejected": -1184.485595703125,
"loss": 0.0987,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.10074315965175629,
"rewards/margins": 0.25036129355430603,
"rewards/rejected": -0.3511044681072235,
"step": 840
},
{
"epoch": 0.23,
"learning_rate": 4.759584424871302e-06,
"logits/chosen": -1.4811336994171143,
"logits/rejected": -0.9249873161315918,
"logps/chosen": -593.8721313476562,
"logps/rejected": -1355.8583984375,
"loss": 0.06,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1529252678155899,
"rewards/margins": 0.3431011736392975,
"rewards/rejected": -0.4960264265537262,
"step": 850
},
{
"epoch": 0.23,
"learning_rate": 4.749529369216246e-06,
"logits/chosen": -1.5341050624847412,
"logits/rejected": -0.9583051800727844,
"logps/chosen": -680.0247802734375,
"logps/rejected": -1330.1800537109375,
"loss": 0.0771,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.18310308456420898,
"rewards/margins": 0.31003543734550476,
"rewards/rejected": -0.49313855171203613,
"step": 860
},
{
"epoch": 0.23,
"learning_rate": 4.7392794005985324e-06,
"logits/chosen": -1.3542709350585938,
"logits/rejected": -0.9462020993232727,
"logps/chosen": -521.2957763671875,
"logps/rejected": -1280.841552734375,
"loss": 0.0739,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.12296704202890396,
"rewards/margins": 0.3202964663505554,
"rewards/rejected": -0.44326353073120117,
"step": 870
},
{
"epoch": 0.23,
"learning_rate": 4.7288354071380415e-06,
"logits/chosen": -1.4632813930511475,
"logits/rejected": -1.0232326984405518,
"logps/chosen": -572.9237060546875,
"logps/rejected": -1308.82666015625,
"loss": 0.0532,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.11455889046192169,
"rewards/margins": 0.3093631863594055,
"rewards/rejected": -0.423922061920166,
"step": 880
},
{
"epoch": 0.24,
"learning_rate": 4.7181982937661485e-06,
"logits/chosen": -1.8567373752593994,
"logits/rejected": -0.8586881756782532,
"logps/chosen": -683.0592651367188,
"logps/rejected": -1194.775634765625,
"loss": 0.0782,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.15534023940563202,
"rewards/margins": 0.24916231632232666,
"rewards/rejected": -0.4045025706291199,
"step": 890
},
{
"epoch": 0.24,
"learning_rate": 4.707368982147318e-06,
"logits/chosen": -1.5189939737319946,
"logits/rejected": -1.0900559425354004,
"logps/chosen": -617.9810791015625,
"logps/rejected": -1228.6695556640625,
"loss": 0.0811,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16101650893688202,
"rewards/margins": 0.2594471573829651,
"rewards/rejected": -0.4204636514186859,
"step": 900
},
{
"epoch": 0.24,
"learning_rate": 4.696348410599244e-06,
"logits/chosen": -1.609279990196228,
"logits/rejected": -0.9329544901847839,
"logps/chosen": -649.111328125,
"logps/rejected": -1244.6878662109375,
"loss": 0.0947,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1598513424396515,
"rewards/margins": 0.2521916627883911,
"rewards/rejected": -0.4120430052280426,
"step": 910
},
{
"epoch": 0.25,
"learning_rate": 4.685137534011549e-06,
"logits/chosen": -1.5942234992980957,
"logits/rejected": -0.9433167576789856,
"logps/chosen": -600.16796875,
"logps/rejected": -1137.0755615234375,
"loss": 0.0973,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1486530601978302,
"rewards/margins": 0.2408868372440338,
"rewards/rejected": -0.389539897441864,
"step": 920
},
{
"epoch": 0.25,
"learning_rate": 4.673737323763048e-06,
"logits/chosen": -1.757784128189087,
"logits/rejected": -0.9788764715194702,
"logps/chosen": -526.8590087890625,
"logps/rejected": -1164.993408203125,
"loss": 0.0518,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.06887698173522949,
"rewards/margins": 0.3250022530555725,
"rewards/rejected": -0.3938792049884796,
"step": 930
},
{
"epoch": 0.25,
"learning_rate": 4.662148767637578e-06,
"logits/chosen": -1.695051908493042,
"logits/rejected": -1.0422875881195068,
"logps/chosen": -673.8726806640625,
"logps/rejected": -1251.0634765625,
"loss": 0.0824,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.1298268437385559,
"rewards/margins": 0.2638325095176697,
"rewards/rejected": -0.3936593532562256,
"step": 940
},
{
"epoch": 0.25,
"learning_rate": 4.650372869738415e-06,
"logits/chosen": -1.817731261253357,
"logits/rejected": -1.1714346408843994,
"logps/chosen": -632.3103637695312,
"logps/rejected": -1204.101806640625,
"loss": 0.0757,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.08371297270059586,
"rewards/margins": 0.2807037830352783,
"rewards/rejected": -0.36441677808761597,
"step": 950
},
{
"epoch": 0.26,
"learning_rate": 4.638410650401267e-06,
"logits/chosen": -1.6917314529418945,
"logits/rejected": -1.2975494861602783,
"logps/chosen": -501.3326110839844,
"logps/rejected": -1110.889404296875,
"loss": 0.1007,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.010560419410467148,
"rewards/margins": 0.2687085270881653,
"rewards/rejected": -0.27926892042160034,
"step": 960
},
{
"epoch": 0.26,
"learning_rate": 4.626263146105875e-06,
"logits/chosen": -1.681670904159546,
"logits/rejected": -1.0678811073303223,
"logps/chosen": -548.2903442382812,
"logps/rejected": -1203.9267578125,
"loss": 0.0685,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.08060415089130402,
"rewards/margins": 0.297444224357605,
"rewards/rejected": -0.3780483603477478,
"step": 970
},
{
"epoch": 0.26,
"learning_rate": 4.613931409386196e-06,
"logits/chosen": -1.469982385635376,
"logits/rejected": -1.1716678142547607,
"logps/chosen": -675.4676513671875,
"logps/rejected": -1349.0924072265625,
"loss": 0.087,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.18839700520038605,
"rewards/margins": 0.28368327021598816,
"rewards/rejected": -0.4720802903175354,
"step": 980
},
{
"epoch": 0.26,
"learning_rate": 4.601416508739211e-06,
"logits/chosen": -1.589691162109375,
"logits/rejected": -1.0157541036605835,
"logps/chosen": -608.4013061523438,
"logps/rejected": -1338.4168701171875,
"loss": 0.0375,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.15417249500751495,
"rewards/margins": 0.30883660912513733,
"rewards/rejected": -0.4630090594291687,
"step": 990
},
{
"epoch": 0.27,
"learning_rate": 4.588719528532342e-06,
"logits/chosen": -1.6860065460205078,
"logits/rejected": -1.0934185981750488,
"logps/chosen": -706.9453735351562,
"logps/rejected": -1332.388916015625,
"loss": 0.0972,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2328319251537323,
"rewards/margins": 0.26258260011672974,
"rewards/rejected": -0.49541449546813965,
"step": 1000
},
{
"epoch": 0.27,
"learning_rate": 4.575841568909494e-06,
"logits/chosen": -1.433650016784668,
"logits/rejected": -1.1477049589157104,
"logps/chosen": -688.2756958007812,
"logps/rejected": -1245.33447265625,
"loss": 0.0894,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.22389158606529236,
"rewards/margins": 0.24267525970935822,
"rewards/rejected": -0.46656686067581177,
"step": 1010
},
{
"epoch": 0.27,
"learning_rate": 4.562783745695738e-06,
"logits/chosen": -1.5687224864959717,
"logits/rejected": -0.853603720664978,
"logps/chosen": -791.3260498046875,
"logps/rejected": -1367.469482421875,
"loss": 0.0939,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.21331918239593506,
"rewards/margins": 0.2616121768951416,
"rewards/rejected": -0.4749313294887543,
"step": 1020
},
{
"epoch": 0.27,
"learning_rate": 4.549547190300622e-06,
"logits/chosen": -1.7121455669403076,
"logits/rejected": -0.8818023800849915,
"logps/chosen": -657.982177734375,
"logps/rejected": -1258.4931640625,
"loss": 0.0877,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.12182845175266266,
"rewards/margins": 0.31102484464645386,
"rewards/rejected": -0.4328532814979553,
"step": 1030
},
{
"epoch": 0.28,
"learning_rate": 4.536133049620143e-06,
"logits/chosen": -1.4799646139144897,
"logits/rejected": -1.1651620864868164,
"logps/chosen": -479.6021423339844,
"logps/rejected": -1181.6322021484375,
"loss": 0.0789,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.09868054836988449,
"rewards/margins": 0.26966673135757446,
"rewards/rejected": -0.36834731698036194,
"step": 1040
},
{
"epoch": 0.28,
"learning_rate": 4.522542485937369e-06,
"logits/chosen": -1.827845573425293,
"logits/rejected": -1.0856597423553467,
"logps/chosen": -625.0631103515625,
"logps/rejected": -1238.838134765625,
"loss": 0.0703,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1401088982820511,
"rewards/margins": 0.30787450075149536,
"rewards/rejected": -0.44798341393470764,
"step": 1050
},
{
"epoch": 0.28,
"learning_rate": 4.508776676821739e-06,
"logits/chosen": -1.5721492767333984,
"logits/rejected": -0.8746267557144165,
"logps/chosen": -652.6207275390625,
"logps/rejected": -1226.645751953125,
"loss": 0.0661,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.17216768860816956,
"rewards/margins": 0.27679505944252014,
"rewards/rejected": -0.4489627778530121,
"step": 1060
},
{
"epoch": 0.29,
"learning_rate": 4.494836815027022e-06,
"logits/chosen": -1.6152639389038086,
"logits/rejected": -1.128306269645691,
"logps/chosen": -588.0072021484375,
"logps/rejected": -1203.587890625,
"loss": 0.0879,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.15817685425281525,
"rewards/margins": 0.2696138024330139,
"rewards/rejected": -0.42779064178466797,
"step": 1070
},
{
"epoch": 0.29,
"learning_rate": 4.4807241083879774e-06,
"logits/chosen": -1.3238633871078491,
"logits/rejected": -0.7138497233390808,
"logps/chosen": -601.8871459960938,
"logps/rejected": -1312.838623046875,
"loss": 0.0552,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.15420369803905487,
"rewards/margins": 0.3349114656448364,
"rewards/rejected": -0.4891151785850525,
"step": 1080
},
{
"epoch": 0.29,
"learning_rate": 4.466439779715696e-06,
"logits/chosen": -1.2504911422729492,
"logits/rejected": -0.7397804856300354,
"logps/chosen": -631.8212890625,
"logps/rejected": -1243.5828857421875,
"loss": 0.0868,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.19839642941951752,
"rewards/margins": 0.29437923431396484,
"rewards/rejected": -0.49277567863464355,
"step": 1090
},
{
"epoch": 0.29,
"learning_rate": 4.451985066691649e-06,
"logits/chosen": -1.809372901916504,
"logits/rejected": -0.8999295234680176,
"logps/chosen": -633.7000122070312,
"logps/rejected": -1235.184814453125,
"loss": 0.0684,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1565568596124649,
"rewards/margins": 0.30287352204322815,
"rewards/rejected": -0.45943036675453186,
"step": 1100
},
{
"epoch": 0.3,
"learning_rate": 4.437361221760449e-06,
"logits/chosen": -1.5753552913665771,
"logits/rejected": -0.8743413090705872,
"logps/chosen": -684.4049072265625,
"logps/rejected": -1305.688232421875,
"loss": 0.0796,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.17457641661167145,
"rewards/margins": 0.2981758415699005,
"rewards/rejected": -0.47275224328041077,
"step": 1110
},
{
"epoch": 0.3,
"learning_rate": 4.422569512021332e-06,
"logits/chosen": -1.5101526975631714,
"logits/rejected": -0.977883517742157,
"logps/chosen": -588.9329223632812,
"logps/rejected": -1159.4969482421875,
"loss": 0.0763,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1433287113904953,
"rewards/margins": 0.25360313057899475,
"rewards/rejected": -0.39693182706832886,
"step": 1120
},
{
"epoch": 0.3,
"learning_rate": 4.407611219118363e-06,
"logits/chosen": -1.4618273973464966,
"logits/rejected": -1.0363489389419556,
"logps/chosen": -448.98089599609375,
"logps/rejected": -1272.61865234375,
"loss": 0.0453,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.09960681945085526,
"rewards/margins": 0.31163084506988525,
"rewards/rejected": -0.4112376570701599,
"step": 1130
},
{
"epoch": 0.3,
"learning_rate": 4.3924876391293915e-06,
"logits/chosen": -1.6461549997329712,
"logits/rejected": -0.9912912249565125,
"logps/chosen": -611.0677490234375,
"logps/rejected": -1183.2215576171875,
"loss": 0.0846,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1306958794593811,
"rewards/margins": 0.28848105669021606,
"rewards/rejected": -0.41917696595191956,
"step": 1140
},
{
"epoch": 0.31,
"learning_rate": 4.377200082453748e-06,
"logits/chosen": -1.8383325338363647,
"logits/rejected": -0.9174288511276245,
"logps/chosen": -618.29638671875,
"logps/rejected": -1216.172119140625,
"loss": 0.0708,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.11510708183050156,
"rewards/margins": 0.29687556624412537,
"rewards/rejected": -0.41198262572288513,
"step": 1150
},
{
"epoch": 0.31,
"learning_rate": 4.361749873698707e-06,
"logits/chosen": -1.3221898078918457,
"logits/rejected": -0.9603347778320312,
"logps/chosen": -517.3427124023438,
"logps/rejected": -1317.27099609375,
"loss": 0.0551,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.09739838540554047,
"rewards/margins": 0.3246908485889435,
"rewards/rejected": -0.42208918929100037,
"step": 1160
},
{
"epoch": 0.31,
"learning_rate": 4.346138351564711e-06,
"logits/chosen": -1.7003717422485352,
"logits/rejected": -0.7912822961807251,
"logps/chosen": -598.9830932617188,
"logps/rejected": -1176.973388671875,
"loss": 0.0841,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.18871501088142395,
"rewards/margins": 0.29334282875061035,
"rewards/rejected": -0.4820578098297119,
"step": 1170
},
{
"epoch": 0.31,
"learning_rate": 4.330366868729376e-06,
"logits/chosen": -1.4258317947387695,
"logits/rejected": -1.199805498123169,
"logps/chosen": -769.9146728515625,
"logps/rejected": -1417.9521484375,
"loss": 0.0696,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.25572648644447327,
"rewards/margins": 0.25885215401649475,
"rewards/rejected": -0.5145785808563232,
"step": 1180
},
{
"epoch": 0.32,
"learning_rate": 4.3144367917302964e-06,
"logits/chosen": -1.580244779586792,
"logits/rejected": -0.9348461031913757,
"logps/chosen": -604.8896484375,
"logps/rejected": -1246.4227294921875,
"loss": 0.0574,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.14286699891090393,
"rewards/margins": 0.30815887451171875,
"rewards/rejected": -0.4510258734226227,
"step": 1190
},
{
"epoch": 0.32,
"learning_rate": 4.2983495008466285e-06,
"logits/chosen": -1.449998140335083,
"logits/rejected": -1.0776797533035278,
"logps/chosen": -581.5048217773438,
"logps/rejected": -1115.7093505859375,
"loss": 0.1337,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.0915089100599289,
"rewards/margins": 0.22939009964466095,
"rewards/rejected": -0.32089897990226746,
"step": 1200
},
{
"epoch": 0.32,
"learning_rate": 4.2821063899795015e-06,
"logits/chosen": -1.1581242084503174,
"logits/rejected": -0.6956531405448914,
"logps/chosen": -486.95257568359375,
"logps/rejected": -1211.6070556640625,
"loss": 0.0813,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.02962355688214302,
"rewards/margins": 0.285904198884964,
"rewards/rejected": -0.3155277669429779,
"step": 1210
},
{
"epoch": 0.33,
"learning_rate": 4.265708866531238e-06,
"logits/chosen": -1.6472032070159912,
"logits/rejected": -1.1526950597763062,
"logps/chosen": -458.576171875,
"logps/rejected": -1106.2225341796875,
"loss": 0.0867,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.08721883594989777,
"rewards/margins": 0.26627764105796814,
"rewards/rejected": -0.3534964919090271,
"step": 1220
},
{
"epoch": 0.33,
"learning_rate": 4.249158351283414e-06,
"logits/chosen": -1.5008890628814697,
"logits/rejected": -0.9523450136184692,
"logps/chosen": -564.3670654296875,
"logps/rejected": -1191.93798828125,
"loss": 0.0889,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.12786266207695007,
"rewards/margins": 0.2664950489997864,
"rewards/rejected": -0.39435768127441406,
"step": 1230
},
{
"epoch": 0.33,
"learning_rate": 4.232456278273743e-06,
"logits/chosen": -1.5925250053405762,
"logits/rejected": -0.787007749080658,
"logps/chosen": -634.4956665039062,
"logps/rejected": -1199.644287109375,
"loss": 0.0832,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1421954333782196,
"rewards/margins": 0.27115732431411743,
"rewards/rejected": -0.4133527874946594,
"step": 1240
},
{
"epoch": 0.33,
"learning_rate": 4.215604094671835e-06,
"logits/chosen": -1.5216588973999023,
"logits/rejected": -0.8067516088485718,
"logps/chosen": -736.2872314453125,
"logps/rejected": -1336.3583984375,
"loss": 0.0645,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2195323407649994,
"rewards/margins": 0.2807646691799164,
"rewards/rejected": -0.500296950340271,
"step": 1250
},
{
"epoch": 0.34,
"learning_rate": 4.198603260653792e-06,
"logits/chosen": -1.6377366781234741,
"logits/rejected": -1.1248198747634888,
"logps/chosen": -589.9413452148438,
"logps/rejected": -1174.857666015625,
"loss": 0.0898,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1545807123184204,
"rewards/margins": 0.25077009201049805,
"rewards/rejected": -0.40535083413124084,
"step": 1260
},
{
"epoch": 0.34,
"learning_rate": 4.181455249275701e-06,
"logits/chosen": -1.359490990638733,
"logits/rejected": -0.7116638422012329,
"logps/chosen": -482.0816345214844,
"logps/rejected": -1281.95068359375,
"loss": 0.0926,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.06251558661460876,
"rewards/margins": 0.3599362075328827,
"rewards/rejected": -0.42245182394981384,
"step": 1270
},
{
"epoch": 0.34,
"learning_rate": 4.1641615463459926e-06,
"logits/chosen": -1.4103469848632812,
"logits/rejected": -0.9721433520317078,
"logps/chosen": -495.11639404296875,
"logps/rejected": -1221.6544189453125,
"loss": 0.0611,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.09844540059566498,
"rewards/margins": 0.32037508487701416,
"rewards/rejected": -0.41882047057151794,
"step": 1280
},
{
"epoch": 0.34,
"learning_rate": 4.146723650296701e-06,
"logits/chosen": -1.5504339933395386,
"logits/rejected": -0.8985152244567871,
"logps/chosen": -516.6334228515625,
"logps/rejected": -1175.6920166015625,
"loss": 0.0714,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15037675201892853,
"rewards/margins": 0.27001953125,
"rewards/rejected": -0.42039623856544495,
"step": 1290
},
{
"epoch": 0.35,
"learning_rate": 4.129143072053639e-06,
"logits/chosen": -1.5418593883514404,
"logits/rejected": -0.9249173402786255,
"logps/chosen": -479.78521728515625,
"logps/rejected": -1107.382568359375,
"loss": 0.0675,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1212129220366478,
"rewards/margins": 0.27643007040023804,
"rewards/rejected": -0.3976430296897888,
"step": 1300
},
{
"epoch": 0.35,
"learning_rate": 4.111421334905468e-06,
"logits/chosen": -1.461808443069458,
"logits/rejected": -0.8558281660079956,
"logps/chosen": -667.7941284179688,
"logps/rejected": -1245.40234375,
"loss": 0.0659,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1607908010482788,
"rewards/margins": 0.2721284031867981,
"rewards/rejected": -0.4329192638397217,
"step": 1310
},
{
"epoch": 0.35,
"learning_rate": 4.093559974371725e-06,
"logits/chosen": -1.3010141849517822,
"logits/rejected": -0.9021800756454468,
"logps/chosen": -656.9991455078125,
"logps/rejected": -1318.10205078125,
"loss": 0.0846,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.20327822864055634,
"rewards/margins": 0.2836567759513855,
"rewards/rejected": -0.48693498969078064,
"step": 1320
},
{
"epoch": 0.35,
"learning_rate": 4.075560538069767e-06,
"logits/chosen": -1.2061702013015747,
"logits/rejected": -0.8291865587234497,
"logps/chosen": -608.1866455078125,
"logps/rejected": -1327.5355224609375,
"loss": 0.0806,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.16998444497585297,
"rewards/margins": 0.32111790776252747,
"rewards/rejected": -0.49110230803489685,
"step": 1330
},
{
"epoch": 0.36,
"learning_rate": 4.05742458558068e-06,
"logits/chosen": -1.6475965976715088,
"logits/rejected": -0.8810272216796875,
"logps/chosen": -616.7311401367188,
"logps/rejected": -1327.117431640625,
"loss": 0.062,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1568661332130432,
"rewards/margins": 0.3473976254463196,
"rewards/rejected": -0.5042637586593628,
"step": 1340
},
{
"epoch": 0.36,
"learning_rate": 4.039153688314146e-06,
"logits/chosen": -1.3039356470108032,
"logits/rejected": -0.9500153660774231,
"logps/chosen": -611.8599853515625,
"logps/rejected": -1257.874267578125,
"loss": 0.0689,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1369098573923111,
"rewards/margins": 0.29038089513778687,
"rewards/rejected": -0.42729073762893677,
"step": 1350
},
{
"epoch": 0.36,
"learning_rate": 4.020749429372286e-06,
"logits/chosen": -1.4324769973754883,
"logits/rejected": -0.807245135307312,
"logps/chosen": -625.339111328125,
"logps/rejected": -1257.253173828125,
"loss": 0.089,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.14000260829925537,
"rewards/margins": 0.28776806592941284,
"rewards/rejected": -0.4277706742286682,
"step": 1360
},
{
"epoch": 0.37,
"learning_rate": 4.002213403412492e-06,
"logits/chosen": -1.445261001586914,
"logits/rejected": -0.9508928060531616,
"logps/chosen": -594.2431030273438,
"logps/rejected": -1150.235595703125,
"loss": 0.073,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.15963387489318848,
"rewards/margins": 0.2395774871110916,
"rewards/rejected": -0.3992113471031189,
"step": 1370
},
{
"epoch": 0.37,
"learning_rate": 3.983547216509254e-06,
"logits/chosen": -1.33284592628479,
"logits/rejected": -0.7798209190368652,
"logps/chosen": -605.1170654296875,
"logps/rejected": -1115.391845703125,
"loss": 0.0769,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.17733311653137207,
"rewards/margins": 0.23692724108695984,
"rewards/rejected": -0.4142603278160095,
"step": 1380
},
{
"epoch": 0.37,
"learning_rate": 3.964752486015001e-06,
"logits/chosen": -1.353686809539795,
"logits/rejected": -0.9458833932876587,
"logps/chosen": -541.7138671875,
"logps/rejected": -1133.408935546875,
"loss": 0.0832,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1459619700908661,
"rewards/margins": 0.25547298789024353,
"rewards/rejected": -0.4014349579811096,
"step": 1390
},
{
"epoch": 0.37,
"learning_rate": 3.945830840419966e-06,
"logits/chosen": -1.3489134311676025,
"logits/rejected": -1.114639163017273,
"logps/chosen": -650.7469482421875,
"logps/rejected": -1327.571044921875,
"loss": 0.1023,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.19746045768260956,
"rewards/margins": 0.3029775023460388,
"rewards/rejected": -0.5004379749298096,
"step": 1400
},
{
"epoch": 0.38,
"learning_rate": 3.92678391921108e-06,
"logits/chosen": -1.3552124500274658,
"logits/rejected": -0.8964862823486328,
"logps/chosen": -505.401123046875,
"logps/rejected": -1367.124267578125,
"loss": 0.0499,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.12019245326519012,
"rewards/margins": 0.3561457395553589,
"rewards/rejected": -0.4763382375240326,
"step": 1410
},
{
"epoch": 0.38,
"learning_rate": 3.907613372729916e-06,
"logits/chosen": -1.5101354122161865,
"logits/rejected": -1.0879840850830078,
"logps/chosen": -619.8988037109375,
"logps/rejected": -1368.367919921875,
"loss": 0.0501,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.15163154900074005,
"rewards/margins": 0.34400704503059387,
"rewards/rejected": -0.49563854932785034,
"step": 1420
},
{
"epoch": 0.38,
"learning_rate": 3.888320862029699e-06,
"logits/chosen": -1.5360214710235596,
"logits/rejected": -0.9855524897575378,
"logps/chosen": -748.1507568359375,
"logps/rejected": -1296.6070556640625,
"loss": 0.0988,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1940506398677826,
"rewards/margins": 0.2233462780714035,
"rewards/rejected": -0.4173968732357025,
"step": 1430
},
{
"epoch": 0.38,
"learning_rate": 3.868908058731376e-06,
"logits/chosen": -1.5095126628875732,
"logits/rejected": -0.8970023989677429,
"logps/chosen": -497.6952209472656,
"logps/rejected": -1121.1767578125,
"loss": 0.07,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.10547561943531036,
"rewards/margins": 0.27518388628959656,
"rewards/rejected": -0.3806595206260681,
"step": 1440
},
{
"epoch": 0.39,
"learning_rate": 3.849376644878783e-06,
"logits/chosen": -1.4751381874084473,
"logits/rejected": -0.9001661539077759,
"logps/chosen": -588.8636474609375,
"logps/rejected": -1271.7371826171875,
"loss": 0.0558,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1327972114086151,
"rewards/margins": 0.305908739566803,
"rewards/rejected": -0.4387059211730957,
"step": 1450
},
{
"epoch": 0.39,
"learning_rate": 3.829728312792895e-06,
"logits/chosen": -1.616092324256897,
"logits/rejected": -1.0537126064300537,
"logps/chosen": -540.6871337890625,
"logps/rejected": -1179.548828125,
"loss": 0.0725,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.09595973044633865,
"rewards/margins": 0.3011319935321808,
"rewards/rejected": -0.39709168672561646,
"step": 1460
},
{
"epoch": 0.39,
"learning_rate": 3.8099647649251984e-06,
"logits/chosen": -1.423906683921814,
"logits/rejected": -0.7600029706954956,
"logps/chosen": -594.6055908203125,
"logps/rejected": -1255.1416015625,
"loss": 0.0918,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.12480314821004868,
"rewards/margins": 0.3190528154373169,
"rewards/rejected": -0.44385600090026855,
"step": 1470
},
{
"epoch": 0.39,
"learning_rate": 3.790087713710179e-06,
"logits/chosen": -1.5575999021530151,
"logits/rejected": -1.1269023418426514,
"logps/chosen": -627.612548828125,
"logps/rejected": -1399.1263427734375,
"loss": 0.0576,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1461525857448578,
"rewards/margins": 0.3743034303188324,
"rewards/rejected": -0.5204560160636902,
"step": 1480
},
{
"epoch": 0.4,
"learning_rate": 3.770098881416945e-06,
"logits/chosen": -1.4309592247009277,
"logits/rejected": -0.8114882707595825,
"logps/chosen": -639.8936767578125,
"logps/rejected": -1324.5836181640625,
"loss": 0.0548,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.15769222378730774,
"rewards/margins": 0.3088419735431671,
"rewards/rejected": -0.46653419733047485,
"step": 1490
},
{
"epoch": 0.4,
"learning_rate": 3.7500000000000005e-06,
"logits/chosen": -1.3537265062332153,
"logits/rejected": -0.9575905799865723,
"logps/chosen": -644.993408203125,
"logps/rejected": -1161.0167236328125,
"loss": 0.0985,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.13782911002635956,
"rewards/margins": 0.23069393634796143,
"rewards/rejected": -0.3685230612754822,
"step": 1500
},
{
"epoch": 0.4,
"learning_rate": 3.7297928109491765e-06,
"logits/chosen": -1.7066447734832764,
"logits/rejected": -0.8981353044509888,
"logps/chosen": -499.3677673339844,
"logps/rejected": -1228.71533203125,
"loss": 0.0597,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.10399389266967773,
"rewards/margins": 0.3211382031440735,
"rewards/rejected": -0.42513203620910645,
"step": 1510
},
{
"epoch": 0.41,
"learning_rate": 3.7094790651387414e-06,
"logits/chosen": -1.5993268489837646,
"logits/rejected": -0.943587601184845,
"logps/chosen": -549.6283569335938,
"logps/rejected": -1147.561279296875,
"loss": 0.0736,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.09537569433450699,
"rewards/margins": 0.2917521297931671,
"rewards/rejected": -0.3871277868747711,
"step": 1520
},
{
"epoch": 0.41,
"learning_rate": 3.689060522675689e-06,
"logits/chosen": -1.4540773630142212,
"logits/rejected": -0.9544679522514343,
"logps/chosen": -567.1152954101562,
"logps/rejected": -1310.57080078125,
"loss": 0.067,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.13753007352352142,
"rewards/margins": 0.3150814175605774,
"rewards/rejected": -0.45261150598526,
"step": 1530
},
{
"epoch": 0.41,
"learning_rate": 3.668538952747236e-06,
"logits/chosen": -1.5060861110687256,
"logits/rejected": -1.0383261442184448,
"logps/chosen": -541.1341552734375,
"logps/rejected": -1345.200927734375,
"loss": 0.0495,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.08431238681077957,
"rewards/margins": 0.34140679240226746,
"rewards/rejected": -0.42571917176246643,
"step": 1540
},
{
"epoch": 0.41,
"learning_rate": 3.6479161334675294e-06,
"logits/chosen": -1.6825485229492188,
"logits/rejected": -0.8775063753128052,
"logps/chosen": -636.63427734375,
"logps/rejected": -1235.6170654296875,
"loss": 0.0867,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.09943681955337524,
"rewards/margins": 0.2878590226173401,
"rewards/rejected": -0.38729584217071533,
"step": 1550
},
{
"epoch": 0.42,
"learning_rate": 3.627193851723577e-06,
"logits/chosen": -1.686977744102478,
"logits/rejected": -1.0781195163726807,
"logps/chosen": -594.205078125,
"logps/rejected": -1142.4056396484375,
"loss": 0.084,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.11142469942569733,
"rewards/margins": 0.26075294613838196,
"rewards/rejected": -0.3721776604652405,
"step": 1560
},
{
"epoch": 0.42,
"learning_rate": 3.6063739030204226e-06,
"logits/chosen": -1.571839451789856,
"logits/rejected": -1.1530930995941162,
"logps/chosen": -549.628173828125,
"logps/rejected": -1172.0718994140625,
"loss": 0.0807,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.10474500805139542,
"rewards/margins": 0.269944429397583,
"rewards/rejected": -0.37468940019607544,
"step": 1570
},
{
"epoch": 0.42,
"learning_rate": 3.5854580913255706e-06,
"logits/chosen": -1.61894953250885,
"logits/rejected": -0.9551402926445007,
"logps/chosen": -607.7701416015625,
"logps/rejected": -1296.424072265625,
"loss": 0.053,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.13554790616035461,
"rewards/margins": 0.3056618571281433,
"rewards/rejected": -0.4412097930908203,
"step": 1580
},
{
"epoch": 0.42,
"learning_rate": 3.564448228912682e-06,
"logits/chosen": -1.6686357259750366,
"logits/rejected": -1.0033533573150635,
"logps/chosen": -650.9888916015625,
"logps/rejected": -1215.917724609375,
"loss": 0.0889,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1597142517566681,
"rewards/margins": 0.26373302936553955,
"rewards/rejected": -0.42344728112220764,
"step": 1590
},
{
"epoch": 0.43,
"learning_rate": 3.543346136204545e-06,
"logits/chosen": -1.3500854969024658,
"logits/rejected": -0.8943287134170532,
"logps/chosen": -594.9763793945312,
"logps/rejected": -1269.090576171875,
"loss": 0.0873,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.14664295315742493,
"rewards/margins": 0.29260388016700745,
"rewards/rejected": -0.4392468333244324,
"step": 1600
},
{
"epoch": 0.43,
"learning_rate": 3.522153641615345e-06,
"logits/chosen": -1.5656368732452393,
"logits/rejected": -0.917197048664093,
"logps/chosen": -623.9747924804688,
"logps/rejected": -1312.198486328125,
"loss": 0.0674,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1256953328847885,
"rewards/margins": 0.3076288104057312,
"rewards/rejected": -0.4333241581916809,
"step": 1610
},
{
"epoch": 0.43,
"learning_rate": 3.5008725813922383e-06,
"logits/chosen": -1.4567426443099976,
"logits/rejected": -1.0509058237075806,
"logps/chosen": -514.6878051757812,
"logps/rejected": -1177.8046875,
"loss": 0.0862,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1288285255432129,
"rewards/margins": 0.2865757346153259,
"rewards/rejected": -0.4154042601585388,
"step": 1620
},
{
"epoch": 0.43,
"learning_rate": 3.4795047994562463e-06,
"logits/chosen": -1.6101102828979492,
"logits/rejected": -1.2119176387786865,
"logps/chosen": -549.9823608398438,
"logps/rejected": -1237.9873046875,
"loss": 0.0912,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1458604633808136,
"rewards/margins": 0.2935812771320343,
"rewards/rejected": -0.4394417405128479,
"step": 1630
},
{
"epoch": 0.44,
"learning_rate": 3.458052147242494e-06,
"logits/chosen": -1.6634056568145752,
"logits/rejected": -0.9373501539230347,
"logps/chosen": -565.6293334960938,
"logps/rejected": -1192.64208984375,
"loss": 0.065,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.15129828453063965,
"rewards/margins": 0.2895079553127289,
"rewards/rejected": -0.44080623984336853,
"step": 1640
},
{
"epoch": 0.44,
"learning_rate": 3.436516483539781e-06,
"logits/chosen": -1.4864572286605835,
"logits/rejected": -1.038694143295288,
"logps/chosen": -491.3564453125,
"logps/rejected": -1235.2786865234375,
"loss": 0.0685,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.09666456282138824,
"rewards/margins": 0.3150130808353424,
"rewards/rejected": -0.41167759895324707,
"step": 1650
},
{
"epoch": 0.44,
"learning_rate": 3.4148996743295305e-06,
"logits/chosen": -1.561033010482788,
"logits/rejected": -0.8384987711906433,
"logps/chosen": -697.5499877929688,
"logps/rejected": -1347.208251953125,
"loss": 0.0675,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.17049038410186768,
"rewards/margins": 0.30814796686172485,
"rewards/rejected": -0.47863835096359253,
"step": 1660
},
{
"epoch": 0.45,
"learning_rate": 3.3932035926241103e-06,
"logits/chosen": -1.4081476926803589,
"logits/rejected": -1.0466349124908447,
"logps/chosen": -630.5411376953125,
"logps/rejected": -1300.04541015625,
"loss": 0.0695,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.12465560436248779,
"rewards/margins": 0.29317888617515564,
"rewards/rejected": -0.41783446073532104,
"step": 1670
},
{
"epoch": 0.45,
"learning_rate": 3.3714301183045382e-06,
"logits/chosen": -1.6413304805755615,
"logits/rejected": -0.9996837377548218,
"logps/chosen": -584.4578857421875,
"logps/rejected": -1310.6175537109375,
"loss": 0.0728,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.16229207813739777,
"rewards/margins": 0.3190585970878601,
"rewards/rejected": -0.4813506603240967,
"step": 1680
},
{
"epoch": 0.45,
"learning_rate": 3.349581137957604e-06,
"logits/chosen": -1.5459994077682495,
"logits/rejected": -0.8717397451400757,
"logps/chosen": -689.1311645507812,
"logps/rejected": -1347.6954345703125,
"loss": 0.0896,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.18276944756507874,
"rewards/margins": 0.30370309948921204,
"rewards/rejected": -0.48647254705429077,
"step": 1690
},
{
"epoch": 0.45,
"learning_rate": 3.3276585447123957e-06,
"logits/chosen": -1.5712751150131226,
"logits/rejected": -0.9994010925292969,
"logps/chosen": -587.7183227539062,
"logps/rejected": -1291.80322265625,
"loss": 0.0678,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.13140062987804413,
"rewards/margins": 0.2967793345451355,
"rewards/rejected": -0.4281799793243408,
"step": 1700
},
{
"epoch": 0.46,
"learning_rate": 3.3056642380762783e-06,
"logits/chosen": -1.45646071434021,
"logits/rejected": -0.9559444189071655,
"logps/chosen": -743.5758056640625,
"logps/rejected": -1357.35693359375,
"loss": 0.0537,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1634978950023651,
"rewards/margins": 0.26821058988571167,
"rewards/rejected": -0.4317084848880768,
"step": 1710
},
{
"epoch": 0.46,
"learning_rate": 3.2836001237702993e-06,
"logits/chosen": -1.2642626762390137,
"logits/rejected": -0.8956004977226257,
"logps/chosen": -672.7369995117188,
"logps/rejected": -1317.1905517578125,
"loss": 0.0843,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.19635051488876343,
"rewards/margins": 0.26600882411003113,
"rewards/rejected": -0.46235933899879456,
"step": 1720
},
{
"epoch": 0.46,
"learning_rate": 3.2614681135640696e-06,
"logits/chosen": -1.538914442062378,
"logits/rejected": -0.9134441614151001,
"logps/chosen": -672.8499755859375,
"logps/rejected": -1275.136474609375,
"loss": 0.0585,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.17318633198738098,
"rewards/margins": 0.288135290145874,
"rewards/rejected": -0.4613215923309326,
"step": 1730
},
{
"epoch": 0.46,
"learning_rate": 3.2392701251101172e-06,
"logits/chosen": -1.4427398443222046,
"logits/rejected": -0.8872078061103821,
"logps/chosen": -686.1043701171875,
"logps/rejected": -1366.374755859375,
"loss": 0.0687,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19739912450313568,
"rewards/margins": 0.28218263387680054,
"rewards/rejected": -0.479581743478775,
"step": 1740
},
{
"epoch": 0.47,
"learning_rate": 3.217008081777726e-06,
"logits/chosen": -1.3497573137283325,
"logits/rejected": -0.872177004814148,
"logps/chosen": -589.72412109375,
"logps/rejected": -1277.909423828125,
"loss": 0.0563,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.13301566243171692,
"rewards/margins": 0.2993132770061493,
"rewards/rejected": -0.4323289394378662,
"step": 1750
},
{
"epoch": 0.47,
"learning_rate": 3.1946839124862873e-06,
"logits/chosen": -1.3833153247833252,
"logits/rejected": -1.0205485820770264,
"logps/chosen": -539.8531494140625,
"logps/rejected": -1183.308837890625,
"loss": 0.0925,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1366974115371704,
"rewards/margins": 0.2687934637069702,
"rewards/rejected": -0.4054908752441406,
"step": 1760
},
{
"epoch": 0.47,
"learning_rate": 3.1722995515381644e-06,
"logits/chosen": -1.4850012063980103,
"logits/rejected": -0.8387139439582825,
"logps/chosen": -636.983642578125,
"logps/rejected": -1304.3221435546875,
"loss": 0.0795,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1878032237291336,
"rewards/margins": 0.2711120843887329,
"rewards/rejected": -0.4589153230190277,
"step": 1770
},
{
"epoch": 0.47,
"learning_rate": 3.149856938451094e-06,
"logits/chosen": -1.0989512205123901,
"logits/rejected": -0.8349654078483582,
"logps/chosen": -627.0206298828125,
"logps/rejected": -1307.218505859375,
"loss": 0.0903,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1822880506515503,
"rewards/margins": 0.3011923134326935,
"rewards/rejected": -0.4834803640842438,
"step": 1780
},
{
"epoch": 0.48,
"learning_rate": 3.127358017790132e-06,
"logits/chosen": -1.485824704170227,
"logits/rejected": -0.8337934613227844,
"logps/chosen": -623.2086791992188,
"logps/rejected": -1302.7957763671875,
"loss": 0.0511,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.15663902461528778,
"rewards/margins": 0.3056802451610565,
"rewards/rejected": -0.4623193144798279,
"step": 1790
},
{
"epoch": 0.48,
"learning_rate": 3.1048047389991693e-06,
"logits/chosen": -1.7094755172729492,
"logits/rejected": -1.041133165359497,
"logps/chosen": -671.2548217773438,
"logps/rejected": -1311.30419921875,
"loss": 0.1018,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.167900949716568,
"rewards/margins": 0.27187058329582214,
"rewards/rejected": -0.43977150321006775,
"step": 1800
},
{
"epoch": 0.48,
"learning_rate": 3.082199056232015e-06,
"logits/chosen": -1.6966993808746338,
"logits/rejected": -1.220529556274414,
"logps/chosen": -596.4708251953125,
"logps/rejected": -1238.605224609375,
"loss": 0.0583,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.12312579154968262,
"rewards/margins": 0.272055447101593,
"rewards/rejected": -0.39518123865127563,
"step": 1810
},
{
"epoch": 0.49,
"learning_rate": 3.059542928183079e-06,
"logits/chosen": -1.661625623703003,
"logits/rejected": -1.1181296110153198,
"logps/chosen": -575.0912475585938,
"logps/rejected": -1266.91064453125,
"loss": 0.0801,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.11585699021816254,
"rewards/margins": 0.2694869041442871,
"rewards/rejected": -0.38534384965896606,
"step": 1820
},
{
"epoch": 0.49,
"learning_rate": 3.0368383179176584e-06,
"logits/chosen": -1.606603980064392,
"logits/rejected": -1.0587247610092163,
"logps/chosen": -512.89013671875,
"logps/rejected": -1128.80859375,
"loss": 0.0617,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.10761525481939316,
"rewards/margins": 0.2635376751422882,
"rewards/rejected": -0.37115293741226196,
"step": 1830
},
{
"epoch": 0.49,
"learning_rate": 3.0140871927018466e-06,
"logits/chosen": -1.5754063129425049,
"logits/rejected": -0.8801782727241516,
"logps/chosen": -655.8692626953125,
"logps/rejected": -1180.616943359375,
"loss": 0.07,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.153736412525177,
"rewards/margins": 0.2592002749443054,
"rewards/rejected": -0.41293662786483765,
"step": 1840
},
{
"epoch": 0.49,
"learning_rate": 2.9912915238320755e-06,
"logits/chosen": -1.427549123764038,
"logits/rejected": -1.0166289806365967,
"logps/chosen": -586.8533325195312,
"logps/rejected": -1178.782958984375,
"loss": 0.0907,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.15467192232608795,
"rewards/margins": 0.2657596468925476,
"rewards/rejected": -0.420431524515152,
"step": 1850
},
{
"epoch": 0.5,
"learning_rate": 2.9684532864643123e-06,
"logits/chosen": -1.580718755722046,
"logits/rejected": -1.1227762699127197,
"logps/chosen": -620.2764892578125,
"logps/rejected": -1323.167236328125,
"loss": 0.0675,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.14635826647281647,
"rewards/margins": 0.3277556300163269,
"rewards/rejected": -0.4741138815879822,
"step": 1860
},
{
"epoch": 0.5,
"learning_rate": 2.945574459442917e-06,
"logits/chosen": -1.291585922241211,
"logits/rejected": -0.7484699487686157,
"logps/chosen": -530.9237060546875,
"logps/rejected": -1149.4744873046875,
"loss": 0.0707,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1255730241537094,
"rewards/margins": 0.28315025568008423,
"rewards/rejected": -0.40872329473495483,
"step": 1870
},
{
"epoch": 0.5,
"learning_rate": 2.922657025129185e-06,
"logits/chosen": -1.3349201679229736,
"logits/rejected": -0.9772024154663086,
"logps/chosen": -620.5244140625,
"logps/rejected": -1263.1776123046875,
"loss": 0.0585,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16309909522533417,
"rewards/margins": 0.273107647895813,
"rewards/rejected": -0.43620675802230835,
"step": 1880
},
{
"epoch": 0.5,
"learning_rate": 2.8997029692295875e-06,
"logits/chosen": -1.4110755920410156,
"logits/rejected": -0.9908379316329956,
"logps/chosen": -516.9703369140625,
"logps/rejected": -1351.016357421875,
"loss": 0.0628,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.14381906390190125,
"rewards/margins": 0.3800903558731079,
"rewards/rejected": -0.5239094495773315,
"step": 1890
},
{
"epoch": 0.51,
"learning_rate": 2.876714280623708e-06,
"logits/chosen": -1.4153320789337158,
"logits/rejected": -0.820611298084259,
"logps/chosen": -487.5252990722656,
"logps/rejected": -1135.842529296875,
"loss": 0.0915,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.10841517150402069,
"rewards/margins": 0.29948437213897705,
"rewards/rejected": -0.40789952874183655,
"step": 1900
},
{
"epoch": 0.51,
"learning_rate": 2.8536929511919227e-06,
"logits/chosen": -1.4119293689727783,
"logits/rejected": -0.8228232264518738,
"logps/chosen": -627.8701171875,
"logps/rejected": -1294.2296142578125,
"loss": 0.0469,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.12193576991558075,
"rewards/margins": 0.32117849588394165,
"rewards/rejected": -0.4431142807006836,
"step": 1910
},
{
"epoch": 0.51,
"learning_rate": 2.8306409756428067e-06,
"logits/chosen": -1.527777075767517,
"logits/rejected": -0.8934208154678345,
"logps/chosen": -581.7477416992188,
"logps/rejected": -1238.830810546875,
"loss": 0.0816,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1308506429195404,
"rewards/margins": 0.29394230246543884,
"rewards/rejected": -0.42479294538497925,
"step": 1920
},
{
"epoch": 0.51,
"learning_rate": 2.807560351340302e-06,
"logits/chosen": -1.3228596448898315,
"logits/rejected": -0.7611247897148132,
"logps/chosen": -601.160400390625,
"logps/rejected": -1213.851806640625,
"loss": 0.0707,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1343916952610016,
"rewards/margins": 0.28090834617614746,
"rewards/rejected": -0.4152999818325043,
"step": 1930
},
{
"epoch": 0.52,
"learning_rate": 2.7844530781306544e-06,
"logits/chosen": -1.4402358531951904,
"logits/rejected": -0.8715489506721497,
"logps/chosen": -518.7476806640625,
"logps/rejected": -1256.0328369140625,
"loss": 0.0528,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.11650246381759644,
"rewards/margins": 0.3238303065299988,
"rewards/rejected": -0.4403327405452728,
"step": 1940
},
{
"epoch": 0.52,
"learning_rate": 2.761321158169134e-06,
"logits/chosen": -1.5251938104629517,
"logits/rejected": -1.1043987274169922,
"logps/chosen": -656.2462158203125,
"logps/rejected": -1206.697021484375,
"loss": 0.0894,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.17089466750621796,
"rewards/margins": 0.22701752185821533,
"rewards/rejected": -0.3979122042655945,
"step": 1950
},
{
"epoch": 0.52,
"learning_rate": 2.738166595746554e-06,
"logits/chosen": -1.3905115127563477,
"logits/rejected": -0.9935697317123413,
"logps/chosen": -628.3776245117188,
"logps/rejected": -1096.552734375,
"loss": 0.0795,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.15120725333690643,
"rewards/margins": 0.24288305640220642,
"rewards/rejected": -0.39409032464027405,
"step": 1960
},
{
"epoch": 0.53,
"learning_rate": 2.7149913971156105e-06,
"logits/chosen": -1.722516655921936,
"logits/rejected": -1.0052350759506226,
"logps/chosen": -496.67657470703125,
"logps/rejected": -1112.5921630859375,
"loss": 0.0733,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.10522119700908661,
"rewards/margins": 0.28908994793891907,
"rewards/rejected": -0.3943111300468445,
"step": 1970
},
{
"epoch": 0.53,
"learning_rate": 2.6917975703170466e-06,
"logits/chosen": -1.4045814275741577,
"logits/rejected": -0.9305141568183899,
"logps/chosen": -512.0284423828125,
"logps/rejected": -1209.204345703125,
"loss": 0.0672,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1334155797958374,
"rewards/margins": 0.2813461720943451,
"rewards/rejected": -0.4147617220878601,
"step": 1980
},
{
"epoch": 0.53,
"learning_rate": 2.668587125005663e-06,
"logits/chosen": -1.343185305595398,
"logits/rejected": -1.0273711681365967,
"logps/chosen": -549.5206298828125,
"logps/rejected": -1240.9766845703125,
"loss": 0.0707,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1306164562702179,
"rewards/margins": 0.29476845264434814,
"rewards/rejected": -0.4253849387168884,
"step": 1990
},
{
"epoch": 0.53,
"learning_rate": 2.6453620722761897e-06,
"logits/chosen": -1.6594680547714233,
"logits/rejected": -0.850638210773468,
"logps/chosen": -591.6373291015625,
"logps/rejected": -1358.3460693359375,
"loss": 0.0625,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.10016246140003204,
"rewards/margins": 0.33138564229011536,
"rewards/rejected": -0.43154802918434143,
"step": 2000
},
{
"epoch": 0.54,
"learning_rate": 2.6221244244890336e-06,
"logits/chosen": -1.5611286163330078,
"logits/rejected": -0.799461841583252,
"logps/chosen": -587.806640625,
"logps/rejected": -1161.3482666015625,
"loss": 0.0633,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.12262831628322601,
"rewards/margins": 0.2799530327320099,
"rewards/rejected": -0.4025813639163971,
"step": 2010
},
{
"epoch": 0.54,
"learning_rate": 2.5988761950959133e-06,
"logits/chosen": -1.6644928455352783,
"logits/rejected": -0.9483749270439148,
"logps/chosen": -535.2498168945312,
"logps/rejected": -1164.773681640625,
"loss": 0.0748,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1117793545126915,
"rewards/margins": 0.281157910823822,
"rewards/rejected": -0.3929373323917389,
"step": 2020
},
{
"epoch": 0.54,
"learning_rate": 2.575619398465402e-06,
"logits/chosen": -1.5217034816741943,
"logits/rejected": -0.715064287185669,
"logps/chosen": -589.2874755859375,
"logps/rejected": -1261.570556640625,
"loss": 0.0622,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1293199360370636,
"rewards/margins": 0.3084322214126587,
"rewards/rejected": -0.4377521574497223,
"step": 2030
},
{
"epoch": 0.54,
"learning_rate": 2.5523560497083927e-06,
"logits/chosen": -1.6095244884490967,
"logits/rejected": -0.9723415374755859,
"logps/chosen": -610.6204223632812,
"logps/rejected": -1346.6697998046875,
"loss": 0.0527,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.18110117316246033,
"rewards/margins": 0.3312448561191559,
"rewards/rejected": -0.5123459696769714,
"step": 2040
},
{
"epoch": 0.55,
"learning_rate": 2.5290881645034932e-06,
"logits/chosen": -1.4780033826828003,
"logits/rejected": -0.983650803565979,
"logps/chosen": -654.4927978515625,
"logps/rejected": -1217.2103271484375,
"loss": 0.0931,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1973283588886261,
"rewards/margins": 0.2564861476421356,
"rewards/rejected": -0.4538145065307617,
"step": 2050
},
{
"epoch": 0.55,
"learning_rate": 2.5058177589223766e-06,
"logits/chosen": -1.6766622066497803,
"logits/rejected": -0.907199501991272,
"logps/chosen": -659.3814086914062,
"logps/rejected": -1295.118896484375,
"loss": 0.0769,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.14497703313827515,
"rewards/margins": 0.3324897885322571,
"rewards/rejected": -0.477466881275177,
"step": 2060
},
{
"epoch": 0.55,
"learning_rate": 2.482546849255096e-06,
"logits/chosen": -1.5557941198349,
"logits/rejected": -0.8595023155212402,
"logps/chosen": -599.567626953125,
"logps/rejected": -1293.6220703125,
"loss": 0.038,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.14183931052684784,
"rewards/margins": 0.32466521859169006,
"rewards/rejected": -0.4665044844150543,
"step": 2070
},
{
"epoch": 0.55,
"learning_rate": 2.4592774518353858e-06,
"logits/chosen": -1.3870598077774048,
"logits/rejected": -0.7746745944023132,
"logps/chosen": -578.613525390625,
"logps/rejected": -1236.041748046875,
"loss": 0.0582,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1255788505077362,
"rewards/margins": 0.2901487350463867,
"rewards/rejected": -0.41572752594947815,
"step": 2080
},
{
"epoch": 0.56,
"learning_rate": 2.436011582865945e-06,
"logits/chosen": -1.539902925491333,
"logits/rejected": -0.8010295629501343,
"logps/chosen": -680.379638671875,
"logps/rejected": -1223.6986083984375,
"loss": 0.0678,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.15240542590618134,
"rewards/margins": 0.2671021819114685,
"rewards/rejected": -0.41950759291648865,
"step": 2090
},
{
"epoch": 0.56,
"learning_rate": 2.4127512582437486e-06,
"logits/chosen": -1.5265008211135864,
"logits/rejected": -1.3408691883087158,
"logps/chosen": -541.078125,
"logps/rejected": -1169.788818359375,
"loss": 0.0992,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15979784727096558,
"rewards/margins": 0.2517135739326477,
"rewards/rejected": -0.4115114212036133,
"step": 2100
},
{
"epoch": 0.56,
"learning_rate": 2.3894984933853734e-06,
"logits/chosen": -1.4429771900177002,
"logits/rejected": -0.9880257844924927,
"logps/chosen": -528.2786254882812,
"logps/rejected": -1244.5059814453125,
"loss": 0.0763,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.14250726997852325,
"rewards/margins": 0.30752262473106384,
"rewards/rejected": -0.4500298500061035,
"step": 2110
},
{
"epoch": 0.57,
"learning_rate": 2.366255303052377e-06,
"logits/chosen": -1.4017646312713623,
"logits/rejected": -0.8113569021224976,
"logps/chosen": -604.7733154296875,
"logps/rejected": -1221.2369384765625,
"loss": 0.094,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15813498198986053,
"rewards/margins": 0.2982472777366638,
"rewards/rejected": -0.45638221502304077,
"step": 2120
},
{
"epoch": 0.57,
"learning_rate": 2.3430237011767166e-06,
"logits/chosen": -1.5092931985855103,
"logits/rejected": -0.8236274719238281,
"logps/chosen": -533.9609985351562,
"logps/rejected": -1168.628173828125,
"loss": 0.0722,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.13640852272510529,
"rewards/margins": 0.28761720657348633,
"rewards/rejected": -0.4240257740020752,
"step": 2130
},
{
"epoch": 0.57,
"learning_rate": 2.319805700686257e-06,
"logits/chosen": -1.5162551403045654,
"logits/rejected": -0.786509096622467,
"logps/chosen": -623.4132080078125,
"logps/rejected": -1179.5946044921875,
"loss": 0.0579,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1392001360654831,
"rewards/margins": 0.26958781480789185,
"rewards/rejected": -0.40878796577453613,
"step": 2140
},
{
"epoch": 0.57,
"learning_rate": 2.296603313330355e-06,
"logits/chosen": -1.1993951797485352,
"logits/rejected": -0.6298279166221619,
"logps/chosen": -582.5997314453125,
"logps/rejected": -1334.009765625,
"loss": 0.0706,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.17245307564735413,
"rewards/margins": 0.29515841603279114,
"rewards/rejected": -0.46761149168014526,
"step": 2150
},
{
"epoch": 0.58,
"learning_rate": 2.2734185495055503e-06,
"logits/chosen": -1.7049709558486938,
"logits/rejected": -0.9636220932006836,
"logps/chosen": -705.0452270507812,
"logps/rejected": -1329.1993408203125,
"loss": 0.0595,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.15186749398708344,
"rewards/margins": 0.3114302158355713,
"rewards/rejected": -0.46329769492149353,
"step": 2160
},
{
"epoch": 0.58,
"learning_rate": 2.250253418081373e-06,
"logits/chosen": -1.4931375980377197,
"logits/rejected": -1.0522197484970093,
"logps/chosen": -568.1475830078125,
"logps/rejected": -1231.364013671875,
"loss": 0.0732,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.13682815432548523,
"rewards/margins": 0.29910990595817566,
"rewards/rejected": -0.4359380602836609,
"step": 2170
},
{
"epoch": 0.58,
"learning_rate": 2.22710992622628e-06,
"logits/chosen": -1.4385493993759155,
"logits/rejected": -1.0058460235595703,
"logps/chosen": -473.60498046875,
"logps/rejected": -1201.9755859375,
"loss": 0.0674,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.10265137255191803,
"rewards/margins": 0.32189419865608215,
"rewards/rejected": -0.4245455265045166,
"step": 2180
},
{
"epoch": 0.58,
"learning_rate": 2.2039900792337477e-06,
"logits/chosen": -1.3410050868988037,
"logits/rejected": -1.1295228004455566,
"logps/chosen": -553.0806274414062,
"logps/rejected": -1231.395263671875,
"loss": 0.0821,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.16361066699028015,
"rewards/margins": 0.2729097008705139,
"rewards/rejected": -0.43652039766311646,
"step": 2190
},
{
"epoch": 0.59,
"learning_rate": 2.1808958803485134e-06,
"logits/chosen": -1.3688017129898071,
"logits/rejected": -0.9007024765014648,
"logps/chosen": -651.8567504882812,
"logps/rejected": -1406.11572265625,
"loss": 0.0535,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.18758761882781982,
"rewards/margins": 0.3379947543144226,
"rewards/rejected": -0.5255824327468872,
"step": 2200
},
{
"epoch": 0.59,
"learning_rate": 2.157829330593008e-06,
"logits/chosen": -1.6441303491592407,
"logits/rejected": -1.0162547826766968,
"logps/chosen": -711.4053955078125,
"logps/rejected": -1393.6256103515625,
"loss": 0.0708,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.2044825553894043,
"rewards/margins": 0.30536073446273804,
"rewards/rejected": -0.5098432302474976,
"step": 2210
},
{
"epoch": 0.59,
"learning_rate": 2.134792428593971e-06,
"logits/chosen": -1.6138668060302734,
"logits/rejected": -0.9830889701843262,
"logps/chosen": -678.9330444335938,
"logps/rejected": -1337.688720703125,
"loss": 0.0712,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.16158784925937653,
"rewards/margins": 0.30953675508499146,
"rewards/rejected": -0.4711245596408844,
"step": 2220
},
{
"epoch": 0.59,
"learning_rate": 2.1117871704092818e-06,
"logits/chosen": -1.5652110576629639,
"logits/rejected": -0.7780826687812805,
"logps/chosen": -496.6578674316406,
"logps/rejected": -1144.76171875,
"loss": 0.0663,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.11985839903354645,
"rewards/margins": 0.3066459596157074,
"rewards/rejected": -0.42650431394577026,
"step": 2230
},
{
"epoch": 0.6,
"learning_rate": 2.0888155493550027e-06,
"logits/chosen": -1.516342282295227,
"logits/rejected": -1.1366102695465088,
"logps/chosen": -601.8604125976562,
"logps/rejected": -1439.399658203125,
"loss": 0.0535,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.15253353118896484,
"rewards/margins": 0.3515278697013855,
"rewards/rejected": -0.5040613412857056,
"step": 2240
},
{
"epoch": 0.6,
"learning_rate": 2.0658795558326745e-06,
"logits/chosen": -1.4972165822982788,
"logits/rejected": -1.1287825107574463,
"logps/chosen": -551.3253784179688,
"logps/rejected": -1201.3675537109375,
"loss": 0.0887,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.14231975376605988,
"rewards/margins": 0.2838347554206848,
"rewards/rejected": -0.4261545240879059,
"step": 2250
},
{
"epoch": 0.6,
"learning_rate": 2.0429811771568468e-06,
"logits/chosen": -1.283483862876892,
"logits/rejected": -0.8047486543655396,
"logps/chosen": -674.7145385742188,
"logps/rejected": -1273.704345703125,
"loss": 0.0626,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.19594672322273254,
"rewards/margins": 0.2617323696613312,
"rewards/rejected": -0.4576791226863861,
"step": 2260
},
{
"epoch": 0.61,
"learning_rate": 2.0201223973828917e-06,
"logits/chosen": -1.3640494346618652,
"logits/rejected": -0.9720889329910278,
"logps/chosen": -654.1539306640625,
"logps/rejected": -1382.0146484375,
"loss": 0.0744,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17939691245555878,
"rewards/margins": 0.31711509823799133,
"rewards/rejected": -0.4965119957923889,
"step": 2270
},
{
"epoch": 0.61,
"learning_rate": 1.997305197135089e-06,
"logits/chosen": -1.557839274406433,
"logits/rejected": -0.9444772601127625,
"logps/chosen": -631.0563354492188,
"logps/rejected": -1341.0086669921875,
"loss": 0.0733,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16753628849983215,
"rewards/margins": 0.30379122495651245,
"rewards/rejected": -0.4713274836540222,
"step": 2280
},
{
"epoch": 0.61,
"learning_rate": 1.9745315534350157e-06,
"logits/chosen": -1.2147436141967773,
"logits/rejected": -0.681081235408783,
"logps/chosen": -712.3760986328125,
"logps/rejected": -1318.22265625,
"loss": 0.0959,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.23314671218395233,
"rewards/margins": 0.2763899266719818,
"rewards/rejected": -0.509536623954773,
"step": 2290
},
{
"epoch": 0.61,
"learning_rate": 1.9518034395302413e-06,
"logits/chosen": -1.6583919525146484,
"logits/rejected": -1.0387184619903564,
"logps/chosen": -607.9590454101562,
"logps/rejected": -1096.9937744140625,
"loss": 0.1013,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.1655300408601761,
"rewards/margins": 0.24450743198394775,
"rewards/rejected": -0.41003745794296265,
"step": 2300
},
{
"epoch": 0.62,
"learning_rate": 1.9291228247233607e-06,
"logits/chosen": -1.4778130054473877,
"logits/rejected": -0.7460058331489563,
"logps/chosen": -651.7698974609375,
"logps/rejected": -1243.669921875,
"loss": 0.0769,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1902499794960022,
"rewards/margins": 0.2825292944908142,
"rewards/rejected": -0.4727793335914612,
"step": 2310
},
{
"epoch": 0.62,
"learning_rate": 1.9064916742013515e-06,
"logits/chosen": -1.3330655097961426,
"logits/rejected": -0.9275220036506653,
"logps/chosen": -523.3305053710938,
"logps/rejected": -1225.2471923828125,
"loss": 0.0641,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15531578660011292,
"rewards/margins": 0.31203147768974304,
"rewards/rejected": -0.46734723448753357,
"step": 2320
},
{
"epoch": 0.62,
"learning_rate": 1.883911948865306e-06,
"logits/chosen": -1.3421717882156372,
"logits/rejected": -1.1515899896621704,
"logps/chosen": -492.34246826171875,
"logps/rejected": -1202.8974609375,
"loss": 0.0831,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.15640634298324585,
"rewards/margins": 0.2934826612472534,
"rewards/rejected": -0.44988900423049927,
"step": 2330
},
{
"epoch": 0.62,
"learning_rate": 1.8613856051605242e-06,
"logits/chosen": -1.4181009531021118,
"logits/rejected": -0.8086174130439758,
"logps/chosen": -602.2988891601562,
"logps/rejected": -1168.888916015625,
"loss": 0.07,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.15534570813179016,
"rewards/margins": 0.2883809208869934,
"rewards/rejected": -0.44372662901878357,
"step": 2340
},
{
"epoch": 0.63,
"learning_rate": 1.8389145949069953e-06,
"logits/chosen": -1.6121231317520142,
"logits/rejected": -0.8654192090034485,
"logps/chosen": -598.3814697265625,
"logps/rejected": -1284.2470703125,
"loss": 0.0595,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.14926204085350037,
"rewards/margins": 0.3212565779685974,
"rewards/rejected": -0.47051864862442017,
"step": 2350
},
{
"epoch": 0.63,
"learning_rate": 1.816500865130279e-06,
"logits/chosen": -1.4523346424102783,
"logits/rejected": -0.9201906323432922,
"logps/chosen": -600.6221923828125,
"logps/rejected": -1303.9447021484375,
"loss": 0.0641,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.17756803333759308,
"rewards/margins": 0.3046211898326874,
"rewards/rejected": -0.48218923807144165,
"step": 2360
},
{
"epoch": 0.63,
"learning_rate": 1.7941463578928088e-06,
"logits/chosen": -1.5082757472991943,
"logits/rejected": -0.9013730883598328,
"logps/chosen": -610.1536254882812,
"logps/rejected": -1315.129638671875,
"loss": 0.0639,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.176839679479599,
"rewards/margins": 0.3339093327522278,
"rewards/rejected": -0.5107490420341492,
"step": 2370
},
{
"epoch": 0.63,
"learning_rate": 1.7718530101256115e-06,
"logits/chosen": -1.6840633153915405,
"logits/rejected": -0.9501806497573853,
"logps/chosen": -662.0902709960938,
"logps/rejected": -1296.606689453125,
"loss": 0.0698,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.17285804450511932,
"rewards/margins": 0.3210682272911072,
"rewards/rejected": -0.4939262866973877,
"step": 2380
},
{
"epoch": 0.64,
"learning_rate": 1.7496227534604859e-06,
"logits/chosen": -1.4562785625457764,
"logits/rejected": -1.0628981590270996,
"logps/chosen": -594.0131225585938,
"logps/rejected": -1322.223876953125,
"loss": 0.0512,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.185628280043602,
"rewards/margins": 0.31836962699890137,
"rewards/rejected": -0.5039979219436646,
"step": 2390
},
{
"epoch": 0.64,
"learning_rate": 1.7274575140626318e-06,
"logits/chosen": -1.622641921043396,
"logits/rejected": -0.7408018112182617,
"logps/chosen": -671.2892456054688,
"logps/rejected": -1338.8228759765625,
"loss": 0.0585,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19210806488990784,
"rewards/margins": 0.3319090008735657,
"rewards/rejected": -0.5240170359611511,
"step": 2400
},
{
"epoch": 0.64,
"learning_rate": 1.7053592124637557e-06,
"logits/chosen": -1.6081740856170654,
"logits/rejected": -0.7799841165542603,
"logps/chosen": -656.5260009765625,
"logps/rejected": -1301.16650390625,
"loss": 0.0568,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.21345119178295135,
"rewards/margins": 0.30246636271476746,
"rewards/rejected": -0.5159175992012024,
"step": 2410
},
{
"epoch": 0.65,
"learning_rate": 1.6833297633956647e-06,
"logits/chosen": -1.5897592306137085,
"logits/rejected": -0.830175518989563,
"logps/chosen": -643.302734375,
"logps/rejected": -1318.0699462890625,
"loss": 0.0593,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17795221507549286,
"rewards/margins": 0.332580029964447,
"rewards/rejected": -0.5105322599411011,
"step": 2420
},
{
"epoch": 0.65,
"learning_rate": 1.661371075624363e-06,
"logits/chosen": -1.5620427131652832,
"logits/rejected": -1.090867519378662,
"logps/chosen": -677.5968627929688,
"logps/rejected": -1375.831787109375,
"loss": 0.0626,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.185464009642601,
"rewards/margins": 0.3168545365333557,
"rewards/rejected": -0.5023185014724731,
"step": 2430
},
{
"epoch": 0.65,
"learning_rate": 1.6394850517846621e-06,
"logits/chosen": -1.4541980028152466,
"logits/rejected": -0.972217857837677,
"logps/chosen": -705.5538940429688,
"logps/rejected": -1215.7884521484375,
"loss": 0.1066,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.19816702604293823,
"rewards/margins": 0.24639299511909485,
"rewards/rejected": -0.4445599615573883,
"step": 2440
},
{
"epoch": 0.65,
"learning_rate": 1.6176735882153284e-06,
"logits/chosen": -1.5021053552627563,
"logits/rejected": -0.8954359292984009,
"logps/chosen": -642.7943115234375,
"logps/rejected": -1351.9677734375,
"loss": 0.0746,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18909773230552673,
"rewards/margins": 0.3134486675262451,
"rewards/rejected": -0.5025463104248047,
"step": 2450
},
{
"epoch": 0.66,
"learning_rate": 1.5959385747947697e-06,
"logits/chosen": -1.4700965881347656,
"logits/rejected": -0.7698783874511719,
"logps/chosen": -592.5593872070312,
"logps/rejected": -1228.227783203125,
"loss": 0.055,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.16944539546966553,
"rewards/margins": 0.2940993309020996,
"rewards/rejected": -0.46354469656944275,
"step": 2460
},
{
"epoch": 0.66,
"learning_rate": 1.5742818947772875e-06,
"logits/chosen": -1.6665054559707642,
"logits/rejected": -0.948663592338562,
"logps/chosen": -769.6544799804688,
"logps/rejected": -1263.137451171875,
"loss": 0.1006,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.24728581309318542,
"rewards/margins": 0.25588518381118774,
"rewards/rejected": -0.5031709671020508,
"step": 2470
},
{
"epoch": 0.66,
"learning_rate": 1.552705424629898e-06,
"logits/chosen": -1.4320557117462158,
"logits/rejected": -0.8846480250358582,
"logps/chosen": -672.8453369140625,
"logps/rejected": -1423.1922607421875,
"loss": 0.0535,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.1967121660709381,
"rewards/margins": 0.31136855483055115,
"rewards/rejected": -0.508080780506134,
"step": 2480
},
{
"epoch": 0.66,
"learning_rate": 1.5312110338697427e-06,
"logits/chosen": -1.596573829650879,
"logits/rejected": -0.9393990635871887,
"logps/chosen": -719.8648681640625,
"logps/rejected": -1458.17919921875,
"loss": 0.0633,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.19690574705600739,
"rewards/margins": 0.33237752318382263,
"rewards/rejected": -0.5292832851409912,
"step": 2490
},
{
"epoch": 0.67,
"learning_rate": 1.509800584902108e-06,
"logits/chosen": -1.6579780578613281,
"logits/rejected": -1.0232713222503662,
"logps/chosen": -629.9151611328125,
"logps/rejected": -1246.3726806640625,
"loss": 0.0641,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1578701287508011,
"rewards/margins": 0.3020227253437042,
"rewards/rejected": -0.45989280939102173,
"step": 2500
},
{
"epoch": 0.67,
"learning_rate": 1.4884759328590476e-06,
"logits/chosen": -1.6255989074707031,
"logits/rejected": -0.9438311457633972,
"logps/chosen": -571.7470703125,
"logps/rejected": -1234.297119140625,
"loss": 0.0744,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15460793673992157,
"rewards/margins": 0.3083663582801819,
"rewards/rejected": -0.4629742503166199,
"step": 2510
},
{
"epoch": 0.67,
"learning_rate": 1.467238925438646e-06,
"logits/chosen": -1.4770749807357788,
"logits/rejected": -0.936165452003479,
"logps/chosen": -617.3800048828125,
"logps/rejected": -1241.4713134765625,
"loss": 0.0783,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15939846634864807,
"rewards/margins": 0.28489136695861816,
"rewards/rejected": -0.44428983330726624,
"step": 2520
},
{
"epoch": 0.67,
"learning_rate": 1.446091402744923e-06,
"logits/chosen": -1.6321996450424194,
"logits/rejected": -1.2910716533660889,
"logps/chosen": -621.7635498046875,
"logps/rejected": -1340.450439453125,
"loss": 0.0587,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.16068853437900543,
"rewards/margins": 0.31573906540870667,
"rewards/rejected": -0.4764275550842285,
"step": 2530
},
{
"epoch": 0.68,
"learning_rate": 1.4250351971283937e-06,
"logits/chosen": -1.790464162826538,
"logits/rejected": -0.8707693219184875,
"logps/chosen": -630.2117919921875,
"logps/rejected": -1443.947509765625,
"loss": 0.0503,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.12107650935649872,
"rewards/margins": 0.3619995713233948,
"rewards/rejected": -0.4830760955810547,
"step": 2540
},
{
"epoch": 0.68,
"learning_rate": 1.4040721330273063e-06,
"logits/chosen": -1.4005483388900757,
"logits/rejected": -0.8707137107849121,
"logps/chosen": -572.6295166015625,
"logps/rejected": -1228.290283203125,
"loss": 0.0709,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.13189435005187988,
"rewards/margins": 0.30900174379348755,
"rewards/rejected": -0.44089609384536743,
"step": 2550
},
{
"epoch": 0.68,
"learning_rate": 1.3832040268095589e-06,
"logits/chosen": -1.5691678524017334,
"logits/rejected": -0.9897274971008301,
"logps/chosen": -598.7499389648438,
"logps/rejected": -1142.5048828125,
"loss": 0.0866,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1460307091474533,
"rewards/margins": 0.24934545159339905,
"rewards/rejected": -0.39537614583969116,
"step": 2560
},
{
"epoch": 0.69,
"learning_rate": 1.362432686615316e-06,
"logits/chosen": -1.69058358669281,
"logits/rejected": -1.319437861442566,
"logps/chosen": -554.8807373046875,
"logps/rejected": -1080.8026123046875,
"loss": 0.0886,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.13216087222099304,
"rewards/margins": 0.22610945999622345,
"rewards/rejected": -0.3582703471183777,
"step": 2570
},
{
"epoch": 0.69,
"learning_rate": 1.3417599122003464e-06,
"logits/chosen": -1.4841969013214111,
"logits/rejected": -0.8709270358085632,
"logps/chosen": -590.7588500976562,
"logps/rejected": -1269.95556640625,
"loss": 0.0861,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.156617671251297,
"rewards/margins": 0.2556094229221344,
"rewards/rejected": -0.4122270941734314,
"step": 2580
},
{
"epoch": 0.69,
"learning_rate": 1.3211874947800747e-06,
"logits/chosen": -1.7067668437957764,
"logits/rejected": -0.8970105051994324,
"logps/chosen": -637.1722412109375,
"logps/rejected": -1236.7254638671875,
"loss": 0.0759,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15255072712898254,
"rewards/margins": 0.2996431291103363,
"rewards/rejected": -0.45219388604164124,
"step": 2590
},
{
"epoch": 0.69,
"learning_rate": 1.3007172168743854e-06,
"logits/chosen": -1.722249984741211,
"logits/rejected": -0.9102805256843567,
"logps/chosen": -568.446533203125,
"logps/rejected": -1270.4002685546875,
"loss": 0.0711,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.13264772295951843,
"rewards/margins": 0.33025822043418884,
"rewards/rejected": -0.4629059433937073,
"step": 2600
},
{
"epoch": 0.7,
"learning_rate": 1.280350852153168e-06,
"logits/chosen": -1.4858075380325317,
"logits/rejected": -1.0102977752685547,
"logps/chosen": -668.56201171875,
"logps/rejected": -1335.7791748046875,
"loss": 0.0851,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.20782490074634552,
"rewards/margins": 0.29307177662849426,
"rewards/rejected": -0.5008966326713562,
"step": 2610
},
{
"epoch": 0.7,
"learning_rate": 1.260090165282645e-06,
"logits/chosen": -1.3048267364501953,
"logits/rejected": -0.79096519947052,
"logps/chosen": -672.3717041015625,
"logps/rejected": -1276.05224609375,
"loss": 0.091,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.2136264592409134,
"rewards/margins": 0.26162099838256836,
"rewards/rejected": -0.47524747252464294,
"step": 2620
},
{
"epoch": 0.7,
"learning_rate": 1.2399369117724582e-06,
"logits/chosen": -1.5877363681793213,
"logits/rejected": -0.9525176882743835,
"logps/chosen": -706.8630981445312,
"logps/rejected": -1383.3653564453125,
"loss": 0.0601,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.2066899538040161,
"rewards/margins": 0.30500850081443787,
"rewards/rejected": -0.5116984248161316,
"step": 2630
},
{
"epoch": 0.7,
"learning_rate": 1.2198928378235717e-06,
"logits/chosen": -1.7394685745239258,
"logits/rejected": -1.1289197206497192,
"logps/chosen": -770.9951171875,
"logps/rejected": -1358.626220703125,
"loss": 0.0663,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2261141985654831,
"rewards/margins": 0.2788589596748352,
"rewards/rejected": -0.5049731135368347,
"step": 2640
},
{
"epoch": 0.71,
"learning_rate": 1.1999596801769617e-06,
"logits/chosen": -1.5435702800750732,
"logits/rejected": -1.0047051906585693,
"logps/chosen": -634.2347412109375,
"logps/rejected": -1342.857177734375,
"loss": 0.0383,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18071284890174866,
"rewards/margins": 0.3370632231235504,
"rewards/rejected": -0.5177761316299438,
"step": 2650
},
{
"epoch": 0.71,
"learning_rate": 1.1801391659631423e-06,
"logits/chosen": -1.4378631114959717,
"logits/rejected": -1.1336383819580078,
"logps/chosen": -650.482177734375,
"logps/rejected": -1228.133056640625,
"loss": 0.1003,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.21179573237895966,
"rewards/margins": 0.25491851568222046,
"rewards/rejected": -0.46671420335769653,
"step": 2660
},
{
"epoch": 0.71,
"learning_rate": 1.160433012552508e-06,
"logits/chosen": -1.5026006698608398,
"logits/rejected": -1.0078703165054321,
"logps/chosen": -657.7984619140625,
"logps/rejected": -1241.7266845703125,
"loss": 0.0907,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.18873688578605652,
"rewards/margins": 0.2727685272693634,
"rewards/rejected": -0.4615054130554199,
"step": 2670
},
{
"epoch": 0.71,
"learning_rate": 1.1408429274065418e-06,
"logits/chosen": -1.5700013637542725,
"logits/rejected": -1.132730484008789,
"logps/chosen": -637.8956298828125,
"logps/rejected": -1278.509033203125,
"loss": 0.0772,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.19238412380218506,
"rewards/margins": 0.2825482189655304,
"rewards/rejected": -0.47493234276771545,
"step": 2680
},
{
"epoch": 0.72,
"learning_rate": 1.1213706079298566e-06,
"logits/chosen": -1.2392457723617554,
"logits/rejected": -0.6231773495674133,
"logps/chosen": -654.0189208984375,
"logps/rejected": -1259.2901611328125,
"loss": 0.0774,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2084679901599884,
"rewards/margins": 0.2823956310749054,
"rewards/rejected": -0.4908636212348938,
"step": 2690
},
{
"epoch": 0.72,
"learning_rate": 1.1020177413231334e-06,
"logits/chosen": -1.4837418794631958,
"logits/rejected": -1.0266939401626587,
"logps/chosen": -575.24169921875,
"logps/rejected": -1292.1416015625,
"loss": 0.0814,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.17716486752033234,
"rewards/margins": 0.29848557710647583,
"rewards/rejected": -0.4756503999233246,
"step": 2700
},
{
"epoch": 0.72,
"learning_rate": 1.0827860044369226e-06,
"logits/chosen": -1.7130857706069946,
"logits/rejected": -1.1947839260101318,
"logps/chosen": -707.26171875,
"logps/rejected": -1313.96484375,
"loss": 0.0685,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.20284132659435272,
"rewards/margins": 0.28745827078819275,
"rewards/rejected": -0.49029961228370667,
"step": 2710
},
{
"epoch": 0.73,
"learning_rate": 1.06367706362636e-06,
"logits/chosen": -1.60333251953125,
"logits/rejected": -1.095365047454834,
"logps/chosen": -590.465576171875,
"logps/rejected": -1228.563720703125,
"loss": 0.0736,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1857512891292572,
"rewards/margins": 0.2781633734703064,
"rewards/rejected": -0.463914692401886,
"step": 2720
},
{
"epoch": 0.73,
"learning_rate": 1.0446925746067768e-06,
"logits/chosen": -1.579377293586731,
"logits/rejected": -0.8978742361068726,
"logps/chosen": -583.1848754882812,
"logps/rejected": -1341.265380859375,
"loss": 0.0439,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.1685558259487152,
"rewards/margins": 0.35025161504745483,
"rewards/rejected": -0.5188074111938477,
"step": 2730
},
{
"epoch": 0.73,
"learning_rate": 1.0258341823102418e-06,
"logits/chosen": -1.5291283130645752,
"logits/rejected": -1.0524482727050781,
"logps/chosen": -632.1546020507812,
"logps/rejected": -1308.994384765625,
"loss": 0.0785,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19686022400856018,
"rewards/margins": 0.2875005602836609,
"rewards/rejected": -0.48436084389686584,
"step": 2740
},
{
"epoch": 0.73,
"learning_rate": 1.0071035207430352e-06,
"logits/chosen": -1.6773452758789062,
"logits/rejected": -0.8837090730667114,
"logps/chosen": -650.5315551757812,
"logps/rejected": -1348.764404296875,
"loss": 0.0673,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.16877111792564392,
"rewards/margins": 0.34151607751846313,
"rewards/rejected": -0.5102871656417847,
"step": 2750
},
{
"epoch": 0.74,
"learning_rate": 9.88502212844063e-07,
"logits/chosen": -1.2815361022949219,
"logits/rejected": -0.6873558163642883,
"logps/chosen": -566.9096069335938,
"logps/rejected": -1250.682373046875,
"loss": 0.0637,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18210506439208984,
"rewards/margins": 0.3151671588420868,
"rewards/rejected": -0.497272253036499,
"step": 2760
},
{
"epoch": 0.74,
"learning_rate": 9.700318703442437e-07,
"logits/chosen": -1.480957269668579,
"logits/rejected": -1.109178066253662,
"logps/chosen": -599.2630615234375,
"logps/rejected": -1315.3450927734375,
"loss": 0.0725,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.16557563841342926,
"rewards/margins": 0.32593974471092224,
"rewards/rejected": -0.49151545763015747,
"step": 2770
},
{
"epoch": 0.74,
"learning_rate": 9.516940936268504e-07,
"logits/chosen": -1.5238648653030396,
"logits/rejected": -0.9741169214248657,
"logps/chosen": -644.1940307617188,
"logps/rejected": -1286.3187255859375,
"loss": 0.0661,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1868639439344406,
"rewards/margins": 0.286797434091568,
"rewards/rejected": -0.4736614227294922,
"step": 2780
},
{
"epoch": 0.74,
"learning_rate": 9.334904715888496e-07,
"logits/chosen": -1.5936267375946045,
"logits/rejected": -1.060530424118042,
"logps/chosen": -593.8538208007812,
"logps/rejected": -1255.338623046875,
"loss": 0.0778,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.17814789712429047,
"rewards/margins": 0.27660074830055237,
"rewards/rejected": -0.45474863052368164,
"step": 2790
},
{
"epoch": 0.75,
"learning_rate": 9.154225815032242e-07,
"logits/chosen": -1.590341329574585,
"logits/rejected": -0.7449840903282166,
"logps/chosen": -610.4566650390625,
"logps/rejected": -1260.689208984375,
"loss": 0.0591,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1576831042766571,
"rewards/margins": 0.33318689465522766,
"rewards/rejected": -0.49086999893188477,
"step": 2800
},
{
"epoch": 0.75,
"learning_rate": 8.974919888823164e-07,
"logits/chosen": -1.3408777713775635,
"logits/rejected": -0.7836991548538208,
"logps/chosen": -589.0643920898438,
"logps/rejected": -1239.4324951171875,
"loss": 0.0914,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15606389939785004,
"rewards/margins": 0.29545897245407104,
"rewards/rejected": -0.45152291655540466,
"step": 2810
},
{
"epoch": 0.75,
"learning_rate": 8.797002473421729e-07,
"logits/chosen": -1.6109354496002197,
"logits/rejected": -1.0465881824493408,
"logps/chosen": -709.0189208984375,
"logps/rejected": -1308.754638671875,
"loss": 0.0682,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.20454378426074982,
"rewards/margins": 0.263343870639801,
"rewards/rejected": -0.4678876996040344,
"step": 2820
},
{
"epoch": 0.75,
"learning_rate": 8.620488984679378e-07,
"logits/chosen": -1.621872901916504,
"logits/rejected": -0.963768482208252,
"logps/chosen": -608.9706420898438,
"logps/rejected": -1198.4373779296875,
"loss": 0.0642,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.16753293573856354,
"rewards/margins": 0.2994327247142792,
"rewards/rejected": -0.4669656753540039,
"step": 2830
},
{
"epoch": 0.76,
"learning_rate": 8.445394716802754e-07,
"logits/chosen": -1.4504549503326416,
"logits/rejected": -0.7841233015060425,
"logps/chosen": -669.5154418945312,
"logps/rejected": -1335.7557373046875,
"loss": 0.0643,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.19036135077476501,
"rewards/margins": 0.3035343289375305,
"rewards/rejected": -0.49389567971229553,
"step": 2840
},
{
"epoch": 0.76,
"learning_rate": 8.271734841028553e-07,
"logits/chosen": -1.371618390083313,
"logits/rejected": -0.9690683484077454,
"logps/chosen": -578.9434814453125,
"logps/rejected": -1268.2574462890625,
"loss": 0.0852,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.17845505475997925,
"rewards/margins": 0.30898019671440125,
"rewards/rejected": -0.4874352812767029,
"step": 2850
},
{
"epoch": 0.76,
"learning_rate": 8.099524404308948e-07,
"logits/chosen": -1.3634693622589111,
"logits/rejected": -1.2364251613616943,
"logps/chosen": -655.4860229492188,
"logps/rejected": -1382.656005859375,
"loss": 0.0849,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.21539123356342316,
"rewards/margins": 0.2801755666732788,
"rewards/rejected": -0.49556678533554077,
"step": 2860
},
{
"epoch": 0.77,
"learning_rate": 7.928778328007918e-07,
"logits/chosen": -1.6657747030258179,
"logits/rejected": -1.1524592638015747,
"logps/chosen": -609.6968994140625,
"logps/rejected": -1228.130859375,
"loss": 0.1019,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.18815730512142181,
"rewards/margins": 0.2728883922100067,
"rewards/rejected": -0.46104568243026733,
"step": 2870
},
{
"epoch": 0.77,
"learning_rate": 7.759511406608255e-07,
"logits/chosen": -1.521481990814209,
"logits/rejected": -1.0144175291061401,
"logps/chosen": -666.2223510742188,
"logps/rejected": -1294.4681396484375,
"loss": 0.0567,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1811828464269638,
"rewards/margins": 0.2964962124824524,
"rewards/rejected": -0.4776790142059326,
"step": 2880
},
{
"epoch": 0.77,
"learning_rate": 7.591738306429769e-07,
"logits/chosen": -1.5954043865203857,
"logits/rejected": -0.9687323570251465,
"logps/chosen": -608.4232177734375,
"logps/rejected": -1266.69482421875,
"loss": 0.0699,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1722910851240158,
"rewards/margins": 0.308040052652359,
"rewards/rejected": -0.4803311228752136,
"step": 2890
},
{
"epoch": 0.77,
"learning_rate": 7.425473564358457e-07,
"logits/chosen": -1.4824254512786865,
"logits/rejected": -0.898513913154602,
"logps/chosen": -605.569091796875,
"logps/rejected": -1327.9998779296875,
"loss": 0.0638,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.16081413626670837,
"rewards/margins": 0.3248310089111328,
"rewards/rejected": -0.4856451451778412,
"step": 2900
},
{
"epoch": 0.78,
"learning_rate": 7.260731586586983e-07,
"logits/chosen": -1.2875173091888428,
"logits/rejected": -0.9592302441596985,
"logps/chosen": -512.1587524414062,
"logps/rejected": -1235.3233642578125,
"loss": 0.0696,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.13009069859981537,
"rewards/margins": 0.3230075538158417,
"rewards/rejected": -0.45309823751449585,
"step": 2910
},
{
"epoch": 0.78,
"learning_rate": 7.097526647366379e-07,
"logits/chosen": -1.526106357574463,
"logits/rejected": -0.9157294034957886,
"logps/chosen": -611.5623779296875,
"logps/rejected": -1331.778076171875,
"loss": 0.0441,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.1644577980041504,
"rewards/margins": 0.3466406762599945,
"rewards/rejected": -0.5110985040664673,
"step": 2920
},
{
"epoch": 0.78,
"learning_rate": 6.935872887769299e-07,
"logits/chosen": -1.4740724563598633,
"logits/rejected": -1.2198327779769897,
"logps/chosen": -515.4468994140625,
"logps/rejected": -1197.4677734375,
"loss": 0.0739,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.13031090795993805,
"rewards/margins": 0.29615822434425354,
"rewards/rejected": -0.42646917700767517,
"step": 2930
},
{
"epoch": 0.78,
"learning_rate": 6.775784314464717e-07,
"logits/chosen": -1.691954255104065,
"logits/rejected": -1.2317649126052856,
"logps/chosen": -551.2989501953125,
"logps/rejected": -1318.2874755859375,
"loss": 0.0574,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.15544219315052032,
"rewards/margins": 0.31455904245376587,
"rewards/rejected": -0.4700012803077698,
"step": 2940
},
{
"epoch": 0.79,
"learning_rate": 6.617274798504286e-07,
"logits/chosen": -1.5783944129943848,
"logits/rejected": -0.9322364926338196,
"logps/chosen": -619.6995849609375,
"logps/rejected": -1310.8206787109375,
"loss": 0.0682,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.13858993351459503,
"rewards/margins": 0.35597696900367737,
"rewards/rejected": -0.4945669174194336,
"step": 2950
},
{
"epoch": 0.79,
"learning_rate": 6.460358074120518e-07,
"logits/chosen": -1.6260960102081299,
"logits/rejected": -1.0405943393707275,
"logps/chosen": -588.0743408203125,
"logps/rejected": -1362.0108642578125,
"loss": 0.037,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.14083652198314667,
"rewards/margins": 0.32858163118362427,
"rewards/rejected": -0.46941813826560974,
"step": 2960
},
{
"epoch": 0.79,
"learning_rate": 6.305047737536707e-07,
"logits/chosen": -1.509161353111267,
"logits/rejected": -1.0090100765228271,
"logps/chosen": -575.589599609375,
"logps/rejected": -1260.2518310546875,
"loss": 0.0557,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16428951919078827,
"rewards/margins": 0.31566599011421204,
"rewards/rejected": -0.4799554944038391,
"step": 2970
},
{
"epoch": 0.79,
"learning_rate": 6.151357245788917e-07,
"logits/chosen": -1.3317458629608154,
"logits/rejected": -0.9225482940673828,
"logps/chosen": -795.397705078125,
"logps/rejected": -1355.7197265625,
"loss": 0.0627,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2432664930820465,
"rewards/margins": 0.2584363520145416,
"rewards/rejected": -0.5017029047012329,
"step": 2980
},
{
"epoch": 0.8,
"learning_rate": 5.999299915559956e-07,
"logits/chosen": -1.482460379600525,
"logits/rejected": -1.0576423406600952,
"logps/chosen": -619.4547119140625,
"logps/rejected": -1274.428955078125,
"loss": 0.0574,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.16743937134742737,
"rewards/margins": 0.3212641477584839,
"rewards/rejected": -0.48870354890823364,
"step": 2990
},
{
"epoch": 0.8,
"learning_rate": 5.848888922025553e-07,
"logits/chosen": -1.5611276626586914,
"logits/rejected": -1.225208044052124,
"logps/chosen": -633.8873901367188,
"logps/rejected": -1392.9622802734375,
"loss": 0.0583,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.15855436027050018,
"rewards/margins": 0.31555289030075073,
"rewards/rejected": -0.4741072654724121,
"step": 3000
},
{
"epoch": 0.8,
"learning_rate": 5.700137297712749e-07,
"logits/chosen": -1.5436227321624756,
"logits/rejected": -0.7761337161064148,
"logps/chosen": -619.49609375,
"logps/rejected": -1352.654052734375,
"loss": 0.0673,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.16255274415016174,
"rewards/margins": 0.3418361246585846,
"rewards/rejected": -0.5043889284133911,
"step": 3010
},
{
"epoch": 0.81,
"learning_rate": 5.553057931370729e-07,
"logits/chosen": -1.5770825147628784,
"logits/rejected": -0.8190025091171265,
"logps/chosen": -633.076416015625,
"logps/rejected": -1185.306396484375,
"loss": 0.0764,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.14611086249351501,
"rewards/margins": 0.2796880602836609,
"rewards/rejected": -0.4257989525794983,
"step": 3020
},
{
"epoch": 0.81,
"learning_rate": 5.407663566854008e-07,
"logits/chosen": -1.6848455667495728,
"logits/rejected": -0.7886224985122681,
"logps/chosen": -664.6588745117188,
"logps/rejected": -1323.16064453125,
"loss": 0.0494,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.15056583285331726,
"rewards/margins": 0.34144124388694763,
"rewards/rejected": -0.4920070767402649,
"step": 3030
},
{
"epoch": 0.81,
"learning_rate": 5.263966802018275e-07,
"logits/chosen": -1.736999750137329,
"logits/rejected": -0.8889113664627075,
"logps/chosen": -563.3355712890625,
"logps/rejected": -1182.220947265625,
"loss": 0.0538,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.10529695451259613,
"rewards/margins": 0.3400992751121521,
"rewards/rejected": -0.4453962445259094,
"step": 3040
},
{
"epoch": 0.81,
"learning_rate": 5.121980087628802e-07,
"logits/chosen": -1.3477979898452759,
"logits/rejected": -0.9392274618148804,
"logps/chosen": -666.6458740234375,
"logps/rejected": -1304.849853515625,
"loss": 0.0825,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18823906779289246,
"rewards/margins": 0.28339654207229614,
"rewards/rejected": -0.471635639667511,
"step": 3050
},
{
"epoch": 0.82,
"learning_rate": 4.981715726281666e-07,
"logits/chosen": -1.468207597732544,
"logits/rejected": -0.750462532043457,
"logps/chosen": -549.2374877929688,
"logps/rejected": -1129.191650390625,
"loss": 0.0837,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1359584927558899,
"rewards/margins": 0.2849787175655365,
"rewards/rejected": -0.4209372103214264,
"step": 3060
},
{
"epoch": 0.82,
"learning_rate": 4.843185871337722e-07,
"logits/chosen": -1.5334383249282837,
"logits/rejected": -1.020437240600586,
"logps/chosen": -542.3077392578125,
"logps/rejected": -1162.928955078125,
"loss": 0.0704,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.14567705988883972,
"rewards/margins": 0.2968258261680603,
"rewards/rejected": -0.4425028860569,
"step": 3070
},
{
"epoch": 0.82,
"learning_rate": 4.706402525869633e-07,
"logits/chosen": -1.614105463027954,
"logits/rejected": -0.9975967407226562,
"logps/chosen": -592.6704711914062,
"logps/rejected": -1195.4998779296875,
"loss": 0.0767,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.14713343977928162,
"rewards/margins": 0.293200820684433,
"rewards/rejected": -0.4403342306613922,
"step": 3080
},
{
"epoch": 0.82,
"learning_rate": 4.5713775416217884e-07,
"logits/chosen": -1.5890741348266602,
"logits/rejected": -1.1050251722335815,
"logps/chosen": -633.64990234375,
"logps/rejected": -1316.699462890625,
"loss": 0.0565,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.15724198520183563,
"rewards/margins": 0.3219819962978363,
"rewards/rejected": -0.47922396659851074,
"step": 3090
},
{
"epoch": 0.83,
"learning_rate": 4.438122617983442e-07,
"logits/chosen": -1.4880859851837158,
"logits/rejected": -1.0814130306243896,
"logps/chosen": -556.4613647460938,
"logps/rejected": -1175.9158935546875,
"loss": 0.0847,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1583164930343628,
"rewards/margins": 0.27797532081604004,
"rewards/rejected": -0.43629178404808044,
"step": 3100
},
{
"epoch": 0.83,
"learning_rate": 4.3066493009749853e-07,
"logits/chosen": -1.5819056034088135,
"logits/rejected": -0.8545023798942566,
"logps/chosen": -625.5091552734375,
"logps/rejected": -1260.5357666015625,
"loss": 0.056,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.14933553338050842,
"rewards/margins": 0.3165150284767151,
"rewards/rejected": -0.4658505916595459,
"step": 3110
},
{
"epoch": 0.83,
"learning_rate": 4.1769689822475147e-07,
"logits/chosen": -1.4295012950897217,
"logits/rejected": -0.9826697111129761,
"logps/chosen": -595.7457885742188,
"logps/rejected": -1152.125,
"loss": 0.0926,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1720040887594223,
"rewards/margins": 0.25839370489120483,
"rewards/rejected": -0.4303978383541107,
"step": 3120
},
{
"epoch": 0.83,
"learning_rate": 4.049092898095816e-07,
"logits/chosen": -1.69967782497406,
"logits/rejected": -1.0549393892288208,
"logps/chosen": -673.6993408203125,
"logps/rejected": -1231.6971435546875,
"loss": 0.0544,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1993969976902008,
"rewards/margins": 0.26704469323158264,
"rewards/rejected": -0.46644172072410583,
"step": 3130
},
{
"epoch": 0.84,
"learning_rate": 3.9230321284847856e-07,
"logits/chosen": -1.347398042678833,
"logits/rejected": -0.7797093391418457,
"logps/chosen": -616.3978271484375,
"logps/rejected": -1361.00732421875,
"loss": 0.0547,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.1867767572402954,
"rewards/margins": 0.33997079730033875,
"rewards/rejected": -0.5267475247383118,
"step": 3140
},
{
"epoch": 0.84,
"learning_rate": 3.798797596089351e-07,
"logits/chosen": -1.627018928527832,
"logits/rejected": -0.8644296526908875,
"logps/chosen": -735.9021606445312,
"logps/rejected": -1377.241455078125,
"loss": 0.0526,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.19497773051261902,
"rewards/margins": 0.33351653814315796,
"rewards/rejected": -0.5284942388534546,
"step": 3150
},
{
"epoch": 0.84,
"learning_rate": 3.6764000653481263e-07,
"logits/chosen": -1.6354873180389404,
"logits/rejected": -0.8436982035636902,
"logps/chosen": -638.96728515625,
"logps/rejected": -1231.9659423828125,
"loss": 0.0799,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.19029106199741364,
"rewards/margins": 0.27815455198287964,
"rewards/rejected": -0.4684455990791321,
"step": 3160
},
{
"epoch": 0.85,
"learning_rate": 3.555850141530659e-07,
"logits/chosen": -1.9651466608047485,
"logits/rejected": -1.0768983364105225,
"logps/chosen": -751.0382690429688,
"logps/rejected": -1339.02783203125,
"loss": 0.0791,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1946159154176712,
"rewards/margins": 0.29727649688720703,
"rewards/rejected": -0.49189239740371704,
"step": 3170
},
{
"epoch": 0.85,
"learning_rate": 3.4371582698185636e-07,
"logits/chosen": -1.511791467666626,
"logits/rejected": -1.1664907932281494,
"logps/chosen": -508.4298400878906,
"logps/rejected": -1147.3773193359375,
"loss": 0.0805,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.149129718542099,
"rewards/margins": 0.2729035019874573,
"rewards/rejected": -0.4220332205295563,
"step": 3180
},
{
"epoch": 0.85,
"learning_rate": 3.3203347344004737e-07,
"logits/chosen": -1.4468176364898682,
"logits/rejected": -1.2824242115020752,
"logps/chosen": -515.1135864257812,
"logps/rejected": -1145.444580078125,
"loss": 0.1051,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.1785714328289032,
"rewards/margins": 0.23533880710601807,
"rewards/rejected": -0.41391023993492126,
"step": 3190
},
{
"epoch": 0.85,
"learning_rate": 3.2053896575809426e-07,
"logits/chosen": -1.5166642665863037,
"logits/rejected": -0.9769018292427063,
"logps/chosen": -721.0452880859375,
"logps/rejected": -1224.28125,
"loss": 0.0988,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.19355639815330505,
"rewards/margins": 0.24941392242908478,
"rewards/rejected": -0.442970335483551,
"step": 3200
},
{
"epoch": 0.86,
"learning_rate": 3.092332998903416e-07,
"logits/chosen": -1.5955547094345093,
"logits/rejected": -0.8334843516349792,
"logps/chosen": -624.3068237304688,
"logps/rejected": -1299.734130859375,
"loss": 0.0705,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1941680610179901,
"rewards/margins": 0.31897804141044617,
"rewards/rejected": -0.5131461024284363,
"step": 3210
},
{
"epoch": 0.86,
"learning_rate": 2.981174554287239e-07,
"logits/chosen": -1.361081600189209,
"logits/rejected": -0.7655187845230103,
"logps/chosen": -657.2144775390625,
"logps/rejected": -1261.987548828125,
"loss": 0.0805,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19083121418952942,
"rewards/margins": 0.29023870825767517,
"rewards/rejected": -0.481069952249527,
"step": 3220
},
{
"epoch": 0.86,
"learning_rate": 2.871923955178918e-07,
"logits/chosen": -1.5999078750610352,
"logits/rejected": -0.7431113719940186,
"logps/chosen": -730.16455078125,
"logps/rejected": -1300.1298828125,
"loss": 0.0675,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.21212446689605713,
"rewards/margins": 0.29901638627052307,
"rewards/rejected": -0.5111408829689026,
"step": 3230
},
{
"epoch": 0.86,
"learning_rate": 2.764590667717562e-07,
"logits/chosen": -1.7554610967636108,
"logits/rejected": -1.0210330486297607,
"logps/chosen": -691.9470825195312,
"logps/rejected": -1321.851806640625,
"loss": 0.0668,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.17403197288513184,
"rewards/margins": 0.3116183876991272,
"rewards/rejected": -0.4856503903865814,
"step": 3240
},
{
"epoch": 0.87,
"learning_rate": 2.6591839919146963e-07,
"logits/chosen": -1.366562843322754,
"logits/rejected": -0.8948714137077332,
"logps/chosen": -622.9371948242188,
"logps/rejected": -1320.292724609375,
"loss": 0.068,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1862497329711914,
"rewards/margins": 0.32431843876838684,
"rewards/rejected": -0.5105680823326111,
"step": 3250
},
{
"epoch": 0.87,
"learning_rate": 2.555713060848433e-07,
"logits/chosen": -1.5601285696029663,
"logits/rejected": -0.8055307269096375,
"logps/chosen": -620.6419067382812,
"logps/rejected": -1285.7567138671875,
"loss": 0.0621,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.187259703874588,
"rewards/margins": 0.3192376494407654,
"rewards/rejected": -0.506497323513031,
"step": 3260
},
{
"epoch": 0.87,
"learning_rate": 2.454186839872158e-07,
"logits/chosen": -1.2374000549316406,
"logits/rejected": -0.9511027336120605,
"logps/chosen": -584.9683837890625,
"logps/rejected": -1257.975341796875,
"loss": 0.086,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.20605416595935822,
"rewards/margins": 0.264712929725647,
"rewards/rejected": -0.4707671105861664,
"step": 3270
},
{
"epoch": 0.87,
"learning_rate": 2.3546141258376786e-07,
"logits/chosen": -1.4687381982803345,
"logits/rejected": -1.0029032230377197,
"logps/chosen": -693.2366943359375,
"logps/rejected": -1394.9901123046875,
"loss": 0.0572,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.20397081971168518,
"rewards/margins": 0.31658852100372314,
"rewards/rejected": -0.5205592513084412,
"step": 3280
},
{
"epoch": 0.88,
"learning_rate": 2.257003546333042e-07,
"logits/chosen": -1.8167024850845337,
"logits/rejected": -0.9430249929428101,
"logps/chosen": -649.8558959960938,
"logps/rejected": -1447.1058349609375,
"loss": 0.0456,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.18286794424057007,
"rewards/margins": 0.35680800676345825,
"rewards/rejected": -0.5396759510040283,
"step": 3290
},
{
"epoch": 0.88,
"learning_rate": 2.1613635589349756e-07,
"logits/chosen": -1.5668996572494507,
"logits/rejected": -0.9328106045722961,
"logps/chosen": -549.9508056640625,
"logps/rejected": -1223.6793212890625,
"loss": 0.0725,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.156307652592659,
"rewards/margins": 0.3133383095264435,
"rewards/rejected": -0.46964597702026367,
"step": 3300
},
{
"epoch": 0.88,
"learning_rate": 2.0677024504760752e-07,
"logits/chosen": -1.8043934106826782,
"logits/rejected": -1.106671929359436,
"logps/chosen": -603.4810791015625,
"logps/rejected": -1286.2061767578125,
"loss": 0.0659,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.16138550639152527,
"rewards/margins": 0.31447383761405945,
"rewards/rejected": -0.4758593440055847,
"step": 3310
},
{
"epoch": 0.89,
"learning_rate": 1.9760283363267684e-07,
"logits/chosen": -1.6394857168197632,
"logits/rejected": -1.1434743404388428,
"logps/chosen": -560.7457885742188,
"logps/rejected": -1270.900390625,
"loss": 0.0668,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.18648633360862732,
"rewards/margins": 0.3022904694080353,
"rewards/rejected": -0.4887767732143402,
"step": 3320
},
{
"epoch": 0.89,
"learning_rate": 1.8863491596921745e-07,
"logits/chosen": -1.4652307033538818,
"logits/rejected": -0.8056305646896362,
"logps/chosen": -588.2001342773438,
"logps/rejected": -1267.324951171875,
"loss": 0.0493,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.17455193400382996,
"rewards/margins": 0.3139300048351288,
"rewards/rejected": -0.48848190903663635,
"step": 3330
},
{
"epoch": 0.89,
"learning_rate": 1.798672690923828e-07,
"logits/chosen": -1.417265772819519,
"logits/rejected": -0.8639974594116211,
"logps/chosen": -566.65234375,
"logps/rejected": -1115.6005859375,
"loss": 0.0645,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1619497835636139,
"rewards/margins": 0.26331502199172974,
"rewards/rejected": -0.42526477575302124,
"step": 3340
},
{
"epoch": 0.89,
"learning_rate": 1.713006526846439e-07,
"logits/chosen": -1.53346848487854,
"logits/rejected": -0.9233430027961731,
"logps/chosen": -593.0252075195312,
"logps/rejected": -1353.1142578125,
"loss": 0.0667,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.17712654173374176,
"rewards/margins": 0.3622104525566101,
"rewards/rejected": -0.5393369793891907,
"step": 3350
},
{
"epoch": 0.9,
"learning_rate": 1.629358090099639e-07,
"logits/chosen": -1.4553884267807007,
"logits/rejected": -0.867672324180603,
"logps/chosen": -564.8086547851562,
"logps/rejected": -1151.0306396484375,
"loss": 0.0901,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.18404200673103333,
"rewards/margins": 0.25011754035949707,
"rewards/rejected": -0.4341595768928528,
"step": 3360
},
{
"epoch": 0.9,
"learning_rate": 1.5477346284948292e-07,
"logits/chosen": -1.5238468647003174,
"logits/rejected": -1.0101631879806519,
"logps/chosen": -620.8792724609375,
"logps/rejected": -1464.6549072265625,
"loss": 0.0397,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.1736445277929306,
"rewards/margins": 0.37397870421409607,
"rewards/rejected": -0.5476232171058655,
"step": 3370
},
{
"epoch": 0.9,
"learning_rate": 1.4681432143872133e-07,
"logits/chosen": -1.4745080471038818,
"logits/rejected": -1.0695136785507202,
"logps/chosen": -757.6289672851562,
"logps/rejected": -1409.22802734375,
"loss": 0.0673,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.22448639571666718,
"rewards/margins": 0.2988077998161316,
"rewards/rejected": -0.5232942700386047,
"step": 3380
},
{
"epoch": 0.9,
"learning_rate": 1.3905907440629752e-07,
"logits/chosen": -1.6097753047943115,
"logits/rejected": -1.02309250831604,
"logps/chosen": -702.1261596679688,
"logps/rejected": -1313.4244384765625,
"loss": 0.0943,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2056199014186859,
"rewards/margins": 0.2749633491039276,
"rewards/rejected": -0.4805833399295807,
"step": 3390
},
{
"epoch": 0.91,
"learning_rate": 1.31508393714177e-07,
"logits/chosen": -1.528530478477478,
"logits/rejected": -1.0698211193084717,
"logps/chosen": -605.3930053710938,
"logps/rejected": -1313.823486328125,
"loss": 0.0457,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1993333399295807,
"rewards/margins": 0.3147328794002533,
"rewards/rejected": -0.514066219329834,
"step": 3400
},
{
"epoch": 0.91,
"learning_rate": 1.241629335994471e-07,
"logits/chosen": -1.638108253479004,
"logits/rejected": -0.8232443928718567,
"logps/chosen": -782.85986328125,
"logps/rejected": -1360.0474853515625,
"loss": 0.0739,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.2265842854976654,
"rewards/margins": 0.2933647036552429,
"rewards/rejected": -0.5199490785598755,
"step": 3410
},
{
"epoch": 0.91,
"learning_rate": 1.1702333051763271e-07,
"logits/chosen": -1.5153647661209106,
"logits/rejected": -0.7482441663742065,
"logps/chosen": -806.5430908203125,
"logps/rejected": -1379.1104736328125,
"loss": 0.1037,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.21187452971935272,
"rewards/margins": 0.2870264947414398,
"rewards/rejected": -0.4989010691642761,
"step": 3420
},
{
"epoch": 0.91,
"learning_rate": 1.1009020308754587e-07,
"logits/chosen": -1.422628402709961,
"logits/rejected": -1.1864253282546997,
"logps/chosen": -631.9682006835938,
"logps/rejected": -1306.8543701171875,
"loss": 0.1056,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.20772810280323029,
"rewards/margins": 0.2739812731742859,
"rewards/rejected": -0.48170939087867737,
"step": 3430
},
{
"epoch": 0.92,
"learning_rate": 1.0336415203768962e-07,
"logits/chosen": -1.5100951194763184,
"logits/rejected": -0.9938896894454956,
"logps/chosen": -725.32373046875,
"logps/rejected": -1294.2386474609375,
"loss": 0.0828,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.20762935280799866,
"rewards/margins": 0.26330476999282837,
"rewards/rejected": -0.47093409299850464,
"step": 3440
},
{
"epoch": 0.92,
"learning_rate": 9.684576015420277e-08,
"logits/chosen": -1.4899585247039795,
"logits/rejected": -0.9078477025032043,
"logps/chosen": -672.1417846679688,
"logps/rejected": -1225.94140625,
"loss": 0.0873,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.17601068317890167,
"rewards/margins": 0.2746294140815735,
"rewards/rejected": -0.4506400525569916,
"step": 3450
},
{
"epoch": 0.92,
"learning_rate": 9.053559223036746e-08,
"logits/chosen": -1.6049926280975342,
"logits/rejected": -0.8928203582763672,
"logps/chosen": -690.5973510742188,
"logps/rejected": -1238.628173828125,
"loss": 0.0812,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.20545156300067902,
"rewards/margins": 0.27740758657455444,
"rewards/rejected": -0.48285919427871704,
"step": 3460
},
{
"epoch": 0.93,
"learning_rate": 8.44341950176683e-08,
"logits/chosen": -1.3215292692184448,
"logits/rejected": -0.9712142944335938,
"logps/chosen": -696.58349609375,
"logps/rejected": -1317.867431640625,
"loss": 0.0799,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.18639104068279266,
"rewards/margins": 0.28709009289741516,
"rewards/rejected": -0.47348111867904663,
"step": 3470
},
{
"epoch": 0.93,
"learning_rate": 7.854209717842231e-08,
"logits/chosen": -1.5110366344451904,
"logits/rejected": -0.9558350443840027,
"logps/chosen": -647.8221435546875,
"logps/rejected": -1398.7818603515625,
"loss": 0.0389,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.1730073243379593,
"rewards/margins": 0.34914129972457886,
"rewards/rejected": -0.522148609161377,
"step": 3480
},
{
"epoch": 0.93,
"learning_rate": 7.285980923996989e-08,
"logits/chosen": -1.4418294429779053,
"logits/rejected": -0.9980441331863403,
"logps/chosen": -586.1068115234375,
"logps/rejected": -1378.889892578125,
"loss": 0.0612,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.17401185631752014,
"rewards/margins": 0.3377479314804077,
"rewards/rejected": -0.5117597579956055,
"step": 3490
},
{
"epoch": 0.93,
"learning_rate": 6.738782355044048e-08,
"logits/chosen": -1.775757074356079,
"logits/rejected": -1.016351342201233,
"logps/chosen": -715.5010986328125,
"logps/rejected": -1282.7650146484375,
"loss": 0.0603,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18584254384040833,
"rewards/margins": 0.28814181685447693,
"rewards/rejected": -0.47398439049720764,
"step": 3500
},
{
"epoch": 0.94,
"learning_rate": 6.212661423609184e-08,
"logits/chosen": -1.5122634172439575,
"logits/rejected": -0.9780920743942261,
"logps/chosen": -668.7765502929688,
"logps/rejected": -1279.484619140625,
"loss": 0.0737,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18126599490642548,
"rewards/margins": 0.28931209444999695,
"rewards/rejected": -0.47057804465293884,
"step": 3510
},
{
"epoch": 0.94,
"learning_rate": 5.707663716023021e-08,
"logits/chosen": -1.7019774913787842,
"logits/rejected": -0.9764993786811829,
"logps/chosen": -598.6218872070312,
"logps/rejected": -1202.2509765625,
"loss": 0.0736,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.16811831295490265,
"rewards/margins": 0.292553186416626,
"rewards/rejected": -0.4606715142726898,
"step": 3520
},
{
"epoch": 0.94,
"learning_rate": 5.22383298837098e-08,
"logits/chosen": -1.589091420173645,
"logits/rejected": -1.0338377952575684,
"logps/chosen": -595.810302734375,
"logps/rejected": -1203.7371826171875,
"loss": 0.08,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.1645013988018036,
"rewards/margins": 0.29109999537467957,
"rewards/rejected": -0.45560139417648315,
"step": 3530
},
{
"epoch": 0.94,
"learning_rate": 4.761211162702117e-08,
"logits/chosen": -1.7127647399902344,
"logits/rejected": -0.8404116630554199,
"logps/chosen": -608.0611572265625,
"logps/rejected": -1101.7435302734375,
"loss": 0.0955,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.16957123577594757,
"rewards/margins": 0.2522638440132141,
"rewards/rejected": -0.4218350946903229,
"step": 3540
},
{
"epoch": 0.95,
"learning_rate": 4.319838323396691e-08,
"logits/chosen": -1.3308387994766235,
"logits/rejected": -0.711550772190094,
"logps/chosen": -609.5896606445312,
"logps/rejected": -1312.685302734375,
"loss": 0.0725,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.17916107177734375,
"rewards/margins": 0.2821735739707947,
"rewards/rejected": -0.4613346457481384,
"step": 3550
},
{
"epoch": 0.95,
"learning_rate": 3.8997527136930004e-08,
"logits/chosen": -1.469405174255371,
"logits/rejected": -0.9255521893501282,
"logps/chosen": -650.4967041015625,
"logps/rejected": -1233.0570068359375,
"loss": 0.0673,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.19022206962108612,
"rewards/margins": 0.2924087345600128,
"rewards/rejected": -0.48263078927993774,
"step": 3560
},
{
"epoch": 0.95,
"learning_rate": 3.5009907323737826e-08,
"logits/chosen": -1.5496537685394287,
"logits/rejected": -1.0329219102859497,
"logps/chosen": -653.6101684570312,
"logps/rejected": -1362.90478515625,
"loss": 0.0495,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19138206541538239,
"rewards/margins": 0.3122704029083252,
"rewards/rejected": -0.5036525130271912,
"step": 3570
},
{
"epoch": 0.95,
"learning_rate": 3.1235869306123766e-08,
"logits/chosen": -1.4933403730392456,
"logits/rejected": -0.8053463101387024,
"logps/chosen": -725.1935424804688,
"logps/rejected": -1382.274169921875,
"loss": 0.0651,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.21708261966705322,
"rewards/margins": 0.3156852126121521,
"rewards/rejected": -0.5327678322792053,
"step": 3580
},
{
"epoch": 0.96,
"learning_rate": 2.767574008979007e-08,
"logits/chosen": -1.3949609994888306,
"logits/rejected": -0.9586105346679688,
"logps/chosen": -533.8550415039062,
"logps/rejected": -1206.543212890625,
"loss": 0.0737,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.15743830800056458,
"rewards/margins": 0.2968447208404541,
"rewards/rejected": -0.4542829990386963,
"step": 3590
},
{
"epoch": 0.96,
"learning_rate": 2.4329828146074096e-08,
"logits/chosen": -1.6258302927017212,
"logits/rejected": -1.0097240209579468,
"logps/chosen": -639.5706176757812,
"logps/rejected": -1331.8486328125,
"loss": 0.0453,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17417296767234802,
"rewards/margins": 0.3265232443809509,
"rewards/rejected": -0.5006962418556213,
"step": 3600
},
{
"epoch": 0.96,
"learning_rate": 2.1198423385220822e-08,
"logits/chosen": -1.5256226062774658,
"logits/rejected": -0.8268339037895203,
"logps/chosen": -662.9381103515625,
"logps/rejected": -1204.4327392578125,
"loss": 0.0715,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.19520241022109985,
"rewards/margins": 0.27870461344718933,
"rewards/rejected": -0.47390708327293396,
"step": 3610
},
{
"epoch": 0.97,
"learning_rate": 1.82817971312621e-08,
"logits/chosen": -1.7607667446136475,
"logits/rejected": -1.1123876571655273,
"logps/chosen": -596.0337524414062,
"logps/rejected": -1304.044921875,
"loss": 0.0639,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.14941272139549255,
"rewards/margins": 0.33007779717445374,
"rewards/rejected": -0.47949057817459106,
"step": 3620
},
{
"epoch": 0.97,
"learning_rate": 1.5580202098509078e-08,
"logits/chosen": -1.6048189401626587,
"logits/rejected": -0.984302818775177,
"logps/chosen": -657.6131591796875,
"logps/rejected": -1466.190185546875,
"loss": 0.0442,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.18197950720787048,
"rewards/margins": 0.34665971994400024,
"rewards/rejected": -0.5286391973495483,
"step": 3630
},
{
"epoch": 0.97,
"learning_rate": 1.3093872369654148e-08,
"logits/chosen": -1.5646374225616455,
"logits/rejected": -0.8982056379318237,
"logps/chosen": -580.00146484375,
"logps/rejected": -1101.258544921875,
"loss": 0.1123,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.1632545292377472,
"rewards/margins": 0.24513199925422668,
"rewards/rejected": -0.4083865284919739,
"step": 3640
},
{
"epoch": 0.97,
"learning_rate": 1.0823023375489128e-08,
"logits/chosen": -1.5190411806106567,
"logits/rejected": -1.027479887008667,
"logps/chosen": -595.8035888671875,
"logps/rejected": -1325.04443359375,
"loss": 0.0629,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.17017218470573425,
"rewards/margins": 0.305239737033844,
"rewards/rejected": -0.47541195154190063,
"step": 3650
},
{
"epoch": 0.98,
"learning_rate": 8.767851876239075e-09,
"logits/chosen": -1.6967909336090088,
"logits/rejected": -0.9378561973571777,
"logps/chosen": -561.2920532226562,
"logps/rejected": -1203.197021484375,
"loss": 0.0622,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.14759351313114166,
"rewards/margins": 0.31226032972335815,
"rewards/rejected": -0.4598538279533386,
"step": 3660
},
{
"epoch": 0.98,
"learning_rate": 6.9285359445145366e-09,
"logits/chosen": -1.6613948345184326,
"logits/rejected": -1.1440684795379639,
"logps/chosen": -600.6172485351562,
"logps/rejected": -1154.182861328125,
"loss": 0.0852,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.17384423315525055,
"rewards/margins": 0.26200392842292786,
"rewards/rejected": -0.4358481466770172,
"step": 3670
},
{
"epoch": 0.98,
"learning_rate": 5.305234949880001e-09,
"logits/chosen": -1.5972778797149658,
"logits/rejected": -0.801485538482666,
"logps/chosen": -688.7052001953125,
"logps/rejected": -1276.092041015625,
"loss": 0.0613,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18506471812725067,
"rewards/margins": 0.29814431071281433,
"rewards/rejected": -0.4832090437412262,
"step": 3680
},
{
"epoch": 0.98,
"learning_rate": 3.8980895450474455e-09,
"logits/chosen": -1.405221700668335,
"logits/rejected": -0.8206748962402344,
"logps/chosen": -693.5679931640625,
"logps/rejected": -1329.2283935546875,
"loss": 0.0479,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.19921013712882996,
"rewards/margins": 0.30057448148727417,
"rewards/rejected": -0.4997846186161041,
"step": 3690
},
{
"epoch": 0.99,
"learning_rate": 2.7072216536885855e-09,
"logits/chosen": -1.4513555765151978,
"logits/rejected": -0.7634484767913818,
"logps/chosen": -613.1361083984375,
"logps/rejected": -1215.9375,
"loss": 0.0642,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.18391281366348267,
"rewards/margins": 0.2894715964794159,
"rewards/rejected": -0.47338438034057617,
"step": 3700
},
{
"epoch": 0.99,
"learning_rate": 1.7327344598702667e-09,
"logits/chosen": -1.5481555461883545,
"logits/rejected": -0.75025874376297,
"logps/chosen": -655.292236328125,
"logps/rejected": -1392.68115234375,
"loss": 0.0376,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.18728521466255188,
"rewards/margins": 0.3374475836753845,
"rewards/rejected": -0.524732768535614,
"step": 3710
},
{
"epoch": 0.99,
"learning_rate": 9.747123991141193e-10,
"logits/chosen": -1.4917545318603516,
"logits/rejected": -1.0273144245147705,
"logps/chosen": -579.1278686523438,
"logps/rejected": -1233.640869140625,
"loss": 0.0868,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.17262960970401764,
"rewards/margins": 0.2730409801006317,
"rewards/rejected": -0.44567054510116577,
"step": 3720
},
{
"epoch": 0.99,
"learning_rate": 4.332211510807427e-10,
"logits/chosen": -1.4177316427230835,
"logits/rejected": -0.9999169111251831,
"logps/chosen": -677.0986328125,
"logps/rejected": -1409.077880859375,
"loss": 0.0463,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.19204583764076233,
"rewards/margins": 0.3291808068752289,
"rewards/rejected": -0.5212266445159912,
"step": 3730
},
{
"epoch": 1.0,
"learning_rate": 1.0830763387897902e-10,
"logits/chosen": -1.4876052141189575,
"logits/rejected": -0.8847878575325012,
"logps/chosen": -651.7437744140625,
"logps/rejected": -1331.395751953125,
"loss": 0.0532,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.15324924886226654,
"rewards/margins": 0.33165210485458374,
"rewards/rejected": -0.4849013388156891,
"step": 3740
},
{
"epoch": 1.0,
"learning_rate": 0.0,
"logits/chosen": -1.6436065435409546,
"logits/rejected": -0.8282138705253601,
"logps/chosen": -606.6455688476562,
"logps/rejected": -1475.3424072265625,
"loss": 0.0392,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.18834789097309113,
"rewards/margins": 0.36257123947143555,
"rewards/rejected": -0.5509191751480103,
"step": 3750
},
{
"epoch": 1.0,
"step": 3750,
"total_flos": 0.0,
"train_loss": 0.07734704875151316,
"train_runtime": 15655.3296,
"train_samples_per_second": 0.958,
"train_steps_per_second": 0.24
}
],
"logging_steps": 10,
"max_steps": 3750,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}