zephyr-7b-gpo-v6-i3 / trainer_state.json
lole25's picture
Model save
69eba08 verified
raw
history blame contribute delete
No virus
123 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 2550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9607843137254902e-08,
"logits/chosen": -0.505158543586731,
"logits/rejected": 1.1344256401062012,
"logps/chosen": -534.2272338867188,
"logps/rejected": -995.0223388671875,
"loss": 0.21,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 1.9607843137254904e-07,
"logits/chosen": -1.4771511554718018,
"logits/rejected": -0.7203052043914795,
"logps/chosen": -653.9701538085938,
"logps/rejected": -1290.11083984375,
"loss": 0.2983,
"rewards/accuracies": 0.3055555522441864,
"rewards/chosen": -0.00023890436568763107,
"rewards/margins": -0.0006189702544361353,
"rewards/rejected": 0.00038006596150808036,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 3.921568627450981e-07,
"logits/chosen": -1.5881028175354004,
"logits/rejected": -0.847257137298584,
"logps/chosen": -677.5276489257812,
"logps/rejected": -1343.302978515625,
"loss": 0.34,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.0005764259840361774,
"rewards/margins": 0.0008251671679317951,
"rewards/rejected": -0.0002487411838956177,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 5.882352941176471e-07,
"logits/chosen": -1.5565259456634521,
"logits/rejected": -0.9040892720222473,
"logps/chosen": -587.6061401367188,
"logps/rejected": -1259.46630859375,
"loss": 0.3992,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.0015199712943285704,
"rewards/margins": 0.002795459469780326,
"rewards/rejected": -0.004315430298447609,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 7.843137254901962e-07,
"logits/chosen": -1.3543564081192017,
"logits/rejected": -0.5594847798347473,
"logps/chosen": -660.8809814453125,
"logps/rejected": -1349.8839111328125,
"loss": 0.3377,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.007950540632009506,
"rewards/margins": 0.009673960506916046,
"rewards/rejected": -0.017624501138925552,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 9.80392156862745e-07,
"logits/chosen": -1.4439340829849243,
"logits/rejected": -0.9004542231559753,
"logps/chosen": -625.8778076171875,
"logps/rejected": -1303.6329345703125,
"loss": 0.3665,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.01639598235487938,
"rewards/margins": 0.029322799295186996,
"rewards/rejected": -0.04571877792477608,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 1.1764705882352942e-06,
"logits/chosen": -1.5793389081954956,
"logits/rejected": -0.6903096437454224,
"logps/chosen": -691.1597290039062,
"logps/rejected": -1354.8695068359375,
"loss": 0.3259,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.04962822049856186,
"rewards/margins": 0.04500482603907585,
"rewards/rejected": -0.0946330577135086,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 1.3725490196078434e-06,
"logits/chosen": -1.2960580587387085,
"logits/rejected": -0.5226901173591614,
"logps/chosen": -677.5730590820312,
"logps/rejected": -1611.273681640625,
"loss": 0.2328,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.09385339170694351,
"rewards/margins": 0.11672432720661163,
"rewards/rejected": -0.21057769656181335,
"step": 70
},
{
"epoch": 0.03,
"learning_rate": 1.5686274509803923e-06,
"logits/chosen": -1.0945719480514526,
"logits/rejected": -0.5267337560653687,
"logps/chosen": -776.620849609375,
"logps/rejected": -1658.595703125,
"loss": 0.1974,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.19967763125896454,
"rewards/margins": 0.2444140613079071,
"rewards/rejected": -0.44409170746803284,
"step": 80
},
{
"epoch": 0.04,
"learning_rate": 1.7647058823529414e-06,
"logits/chosen": -1.4510087966918945,
"logits/rejected": -0.023749172687530518,
"logps/chosen": -911.9953002929688,
"logps/rejected": -1725.72265625,
"loss": 0.2263,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.2247885912656784,
"rewards/margins": 0.1656641662120819,
"rewards/rejected": -0.3904527723789215,
"step": 90
},
{
"epoch": 0.04,
"learning_rate": 1.96078431372549e-06,
"logits/chosen": -1.3619906902313232,
"logits/rejected": -0.15897789597511292,
"logps/chosen": -822.1832275390625,
"logps/rejected": -1571.0025634765625,
"loss": 0.2765,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.16276445984840393,
"rewards/margins": 0.13095514476299286,
"rewards/rejected": -0.2937195897102356,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 2.1568627450980393e-06,
"logits/chosen": -1.1530998945236206,
"logits/rejected": -0.40491175651550293,
"logps/chosen": -854.7205200195312,
"logps/rejected": -1822.2965087890625,
"loss": 0.2099,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.15811415016651154,
"rewards/margins": 0.23178577423095703,
"rewards/rejected": -0.3898999094963074,
"step": 110
},
{
"epoch": 0.05,
"learning_rate": 2.3529411764705885e-06,
"logits/chosen": -1.5039284229278564,
"logits/rejected": -0.5590807199478149,
"logps/chosen": -726.2496337890625,
"logps/rejected": -1728.7135009765625,
"loss": 0.2438,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.13852688670158386,
"rewards/margins": 0.27724406123161316,
"rewards/rejected": -0.415770947933197,
"step": 120
},
{
"epoch": 0.05,
"learning_rate": 2.549019607843137e-06,
"logits/chosen": -1.47978937625885,
"logits/rejected": -0.7583194971084595,
"logps/chosen": -777.576416015625,
"logps/rejected": -1722.5013427734375,
"loss": 0.169,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.19047263264656067,
"rewards/margins": 0.221228688955307,
"rewards/rejected": -0.41170138120651245,
"step": 130
},
{
"epoch": 0.05,
"learning_rate": 2.7450980392156867e-06,
"logits/chosen": -1.3619725704193115,
"logits/rejected": -0.45514482259750366,
"logps/chosen": -859.7752685546875,
"logps/rejected": -1761.045654296875,
"loss": 0.2,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.2766234278678894,
"rewards/margins": 0.1959143877029419,
"rewards/rejected": -0.4725378155708313,
"step": 140
},
{
"epoch": 0.06,
"learning_rate": 2.9411764705882355e-06,
"logits/chosen": -1.2937225103378296,
"logits/rejected": -0.18269118666648865,
"logps/chosen": -1000.1593627929688,
"logps/rejected": -1983.649169921875,
"loss": 0.2365,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.3481788635253906,
"rewards/margins": 0.32042697072029114,
"rewards/rejected": -0.6686058640480042,
"step": 150
},
{
"epoch": 0.06,
"learning_rate": 3.1372549019607846e-06,
"logits/chosen": -1.3573691844940186,
"logits/rejected": -0.8902850151062012,
"logps/chosen": -908.5567626953125,
"logps/rejected": -1647.058349609375,
"loss": 0.2157,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.2951143980026245,
"rewards/margins": 0.15847407281398773,
"rewards/rejected": -0.45358848571777344,
"step": 160
},
{
"epoch": 0.07,
"learning_rate": 3.3333333333333333e-06,
"logits/chosen": -1.657151222229004,
"logits/rejected": -0.9709945917129517,
"logps/chosen": -817.062744140625,
"logps/rejected": -1781.638671875,
"loss": 0.1573,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.21228685975074768,
"rewards/margins": 0.23603840172290802,
"rewards/rejected": -0.4483252465724945,
"step": 170
},
{
"epoch": 0.07,
"learning_rate": 3.529411764705883e-06,
"logits/chosen": -1.4512741565704346,
"logits/rejected": -0.1740313172340393,
"logps/chosen": -889.5105590820312,
"logps/rejected": -2051.538818359375,
"loss": 0.2172,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.25935059785842896,
"rewards/margins": 0.3617965579032898,
"rewards/rejected": -0.6211471557617188,
"step": 180
},
{
"epoch": 0.07,
"learning_rate": 3.7254901960784316e-06,
"logits/chosen": -1.1388862133026123,
"logits/rejected": -0.18607623875141144,
"logps/chosen": -821.3484497070312,
"logps/rejected": -1919.426513671875,
"loss": 0.1605,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.21933992207050323,
"rewards/margins": 0.3276643455028534,
"rewards/rejected": -0.547004222869873,
"step": 190
},
{
"epoch": 0.08,
"learning_rate": 3.92156862745098e-06,
"logits/chosen": -1.348503828048706,
"logits/rejected": -0.5367448925971985,
"logps/chosen": -625.4483032226562,
"logps/rejected": -1496.484130859375,
"loss": 0.2089,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.10934285074472427,
"rewards/margins": 0.23809942603111267,
"rewards/rejected": -0.34744226932525635,
"step": 200
},
{
"epoch": 0.08,
"learning_rate": 4.11764705882353e-06,
"logits/chosen": -1.3590004444122314,
"logits/rejected": -0.8172636032104492,
"logps/chosen": -815.4562377929688,
"logps/rejected": -1780.2926025390625,
"loss": 0.2143,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1335686445236206,
"rewards/margins": 0.2463621348142624,
"rewards/rejected": -0.3799307644367218,
"step": 210
},
{
"epoch": 0.09,
"learning_rate": 4.313725490196079e-06,
"logits/chosen": -1.5793583393096924,
"logits/rejected": -0.32534486055374146,
"logps/chosen": -928.5148315429688,
"logps/rejected": -1738.1536865234375,
"loss": 0.2501,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.20052528381347656,
"rewards/margins": 0.21043157577514648,
"rewards/rejected": -0.41095685958862305,
"step": 220
},
{
"epoch": 0.09,
"learning_rate": 4.509803921568628e-06,
"logits/chosen": -0.9972221255302429,
"logits/rejected": -0.37468206882476807,
"logps/chosen": -708.7088623046875,
"logps/rejected": -1586.076416015625,
"loss": 0.205,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.14892444014549255,
"rewards/margins": 0.24091584980487823,
"rewards/rejected": -0.3898402750492096,
"step": 230
},
{
"epoch": 0.09,
"learning_rate": 4.705882352941177e-06,
"logits/chosen": -1.3743550777435303,
"logits/rejected": -0.13277244567871094,
"logps/chosen": -719.425537109375,
"logps/rejected": -1675.90234375,
"loss": 0.2009,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1083696037530899,
"rewards/margins": 0.3259289562702179,
"rewards/rejected": -0.4342985153198242,
"step": 240
},
{
"epoch": 0.1,
"learning_rate": 4.901960784313726e-06,
"logits/chosen": -1.1646835803985596,
"logits/rejected": -0.5943381786346436,
"logps/chosen": -621.46630859375,
"logps/rejected": -1612.871826171875,
"loss": 0.1643,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.17562244832515717,
"rewards/margins": 0.2645077705383301,
"rewards/rejected": -0.44013017416000366,
"step": 250
},
{
"epoch": 0.1,
"learning_rate": 4.999941442477777e-06,
"logits/chosen": -1.2978475093841553,
"logits/rejected": -0.576497495174408,
"logps/chosen": -937.4520263671875,
"logps/rejected": -1737.780029296875,
"loss": 0.2432,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.25836849212646484,
"rewards/margins": 0.241961270570755,
"rewards/rejected": -0.5003297924995422,
"step": 260
},
{
"epoch": 0.11,
"learning_rate": 4.999472998758979e-06,
"logits/chosen": -1.4330791234970093,
"logits/rejected": -0.8838942646980286,
"logps/chosen": -877.1728515625,
"logps/rejected": -1793.1947021484375,
"loss": 0.1393,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.25408753752708435,
"rewards/margins": 0.2761463522911072,
"rewards/rejected": -0.5302339792251587,
"step": 270
},
{
"epoch": 0.11,
"learning_rate": 4.998536199099246e-06,
"logits/chosen": -1.3899977207183838,
"logits/rejected": 0.03836112096905708,
"logps/chosen": -923.8590087890625,
"logps/rejected": -1724.1558837890625,
"loss": 0.1851,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1945658177137375,
"rewards/margins": 0.2358773946762085,
"rewards/rejected": -0.4304431974887848,
"step": 280
},
{
"epoch": 0.11,
"learning_rate": 4.997131219037856e-06,
"logits/chosen": -1.186488389968872,
"logits/rejected": -0.389091819524765,
"logps/chosen": -757.4147338867188,
"logps/rejected": -1886.984130859375,
"loss": 0.1841,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.21733467280864716,
"rewards/margins": 0.3353338837623596,
"rewards/rejected": -0.5526684522628784,
"step": 290
},
{
"epoch": 0.12,
"learning_rate": 4.995258321842611e-06,
"logits/chosen": -1.1964404582977295,
"logits/rejected": -0.06750938296318054,
"logps/chosen": -907.2109375,
"logps/rejected": -1809.0191650390625,
"loss": 0.1834,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.2906198799610138,
"rewards/margins": 0.279925137758255,
"rewards/rejected": -0.5705450177192688,
"step": 300
},
{
"epoch": 0.12,
"learning_rate": 4.9929178584605e-06,
"logits/chosen": -1.649431586265564,
"logits/rejected": -0.20804986357688904,
"logps/chosen": -891.9801635742188,
"logps/rejected": -1733.181884765625,
"loss": 0.1278,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.22755351662635803,
"rewards/margins": 0.2664097547531128,
"rewards/rejected": -0.49396324157714844,
"step": 310
},
{
"epoch": 0.13,
"learning_rate": 4.9901102674519446e-06,
"logits/chosen": -1.4958832263946533,
"logits/rejected": -0.3006078004837036,
"logps/chosen": -951.6578369140625,
"logps/rejected": -1706.25390625,
"loss": 0.2295,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2715206742286682,
"rewards/margins": 0.25203150510787964,
"rewards/rejected": -0.5235521793365479,
"step": 320
},
{
"epoch": 0.13,
"learning_rate": 4.986836074908616e-06,
"logits/chosen": -1.3995481729507446,
"logits/rejected": 0.009560632519423962,
"logps/chosen": -718.5650634765625,
"logps/rejected": -1350.846923828125,
"loss": 0.2471,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.20702452957630157,
"rewards/margins": 0.14036989212036133,
"rewards/rejected": -0.3473944067955017,
"step": 330
},
{
"epoch": 0.13,
"learning_rate": 4.983095894354858e-06,
"logits/chosen": -1.5904731750488281,
"logits/rejected": -0.14893893897533417,
"logps/chosen": -855.9501953125,
"logps/rejected": -1916.079345703125,
"loss": 0.2131,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.24469470977783203,
"rewards/margins": 0.22492530941963196,
"rewards/rejected": -0.4696199893951416,
"step": 340
},
{
"epoch": 0.14,
"learning_rate": 4.9788904266327206e-06,
"logits/chosen": -1.6823375225067139,
"logits/rejected": -0.4657576084136963,
"logps/chosen": -784.65234375,
"logps/rejected": -1751.244873046875,
"loss": 0.1888,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1359667181968689,
"rewards/margins": 0.29217660427093506,
"rewards/rejected": -0.42814335227012634,
"step": 350
},
{
"epoch": 0.14,
"learning_rate": 4.9742204597706386e-06,
"logits/chosen": -1.5003750324249268,
"logits/rejected": -0.001354557229205966,
"logps/chosen": -755.9137573242188,
"logps/rejected": -1653.0166015625,
"loss": 0.1933,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.12234246730804443,
"rewards/margins": 0.2765265107154846,
"rewards/rejected": -0.39886897802352905,
"step": 360
},
{
"epoch": 0.15,
"learning_rate": 4.9690868688357655e-06,
"logits/chosen": -1.3799958229064941,
"logits/rejected": -0.4311766028404236,
"logps/chosen": -724.7586059570312,
"logps/rejected": -1667.642822265625,
"loss": 0.1828,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.1624334752559662,
"rewards/margins": 0.2481921911239624,
"rewards/rejected": -0.4106256365776062,
"step": 370
},
{
"epoch": 0.15,
"learning_rate": 4.963490615770003e-06,
"logits/chosen": -1.295836329460144,
"logits/rejected": -0.5849100947380066,
"logps/chosen": -835.3861083984375,
"logps/rejected": -1846.414794921875,
"loss": 0.2211,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.23319277167320251,
"rewards/margins": 0.3248142898082733,
"rewards/rejected": -0.5580071210861206,
"step": 380
},
{
"epoch": 0.15,
"learning_rate": 4.957432749209755e-06,
"logits/chosen": -1.4312934875488281,
"logits/rejected": 0.31627362966537476,
"logps/chosen": -939.7803955078125,
"logps/rejected": -1674.4808349609375,
"loss": 0.2533,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.2971717119216919,
"rewards/margins": 0.19919905066490173,
"rewards/rejected": -0.496370792388916,
"step": 390
},
{
"epoch": 0.16,
"learning_rate": 4.950914404289423e-06,
"logits/chosen": -1.3529198169708252,
"logits/rejected": -0.19551090896129608,
"logps/chosen": -940.6759643554688,
"logps/rejected": -1822.1956787109375,
"loss": 0.2262,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.3031768500804901,
"rewards/margins": 0.22445103526115417,
"rewards/rejected": -0.5276279449462891,
"step": 400
},
{
"epoch": 0.16,
"learning_rate": 4.943936802428712e-06,
"logits/chosen": -1.1721961498260498,
"logits/rejected": 0.37075644731521606,
"logps/chosen": -702.531005859375,
"logps/rejected": -1698.3720703125,
"loss": 0.1711,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.18371441960334778,
"rewards/margins": 0.269645094871521,
"rewards/rejected": -0.4533595144748688,
"step": 410
},
{
"epoch": 0.16,
"learning_rate": 4.936501251103751e-06,
"logits/chosen": -1.1501245498657227,
"logits/rejected": -0.04669635370373726,
"logps/chosen": -934.7687377929688,
"logps/rejected": -1762.8375244140625,
"loss": 0.2049,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.27317720651626587,
"rewards/margins": 0.24144259095191956,
"rewards/rejected": -0.5146198272705078,
"step": 420
},
{
"epoch": 0.17,
"learning_rate": 4.928609143602102e-06,
"logits/chosen": -1.3455841541290283,
"logits/rejected": -0.689312219619751,
"logps/chosen": -953.3030395507812,
"logps/rejected": -2143.519775390625,
"loss": 0.1132,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.3812543451786041,
"rewards/margins": 0.47345447540283203,
"rewards/rejected": -0.8547086715698242,
"step": 430
},
{
"epoch": 0.17,
"learning_rate": 4.920261958761677e-06,
"logits/chosen": -1.1954295635223389,
"logits/rejected": 0.1524878442287445,
"logps/chosen": -988.5673828125,
"logps/rejected": -1907.625,
"loss": 0.2181,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.3235063850879669,
"rewards/margins": 0.30497947335243225,
"rewards/rejected": -0.6284858584403992,
"step": 440
},
{
"epoch": 0.18,
"learning_rate": 4.911461260693639e-06,
"logits/chosen": -1.384975552558899,
"logits/rejected": -0.3957231938838959,
"logps/chosen": -864.88623046875,
"logps/rejected": -1796.1107177734375,
"loss": 0.1692,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.24604813754558563,
"rewards/margins": 0.25146228075027466,
"rewards/rejected": -0.4975104331970215,
"step": 450
},
{
"epoch": 0.18,
"learning_rate": 4.902208698489302e-06,
"logits/chosen": -1.0432078838348389,
"logits/rejected": -0.16131794452667236,
"logps/chosen": -885.232421875,
"logps/rejected": -1651.9114990234375,
"loss": 0.2494,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.22546634078025818,
"rewards/margins": 0.1930330991744995,
"rewards/rejected": -0.4184994697570801,
"step": 460
},
{
"epoch": 0.18,
"learning_rate": 4.89250600591114e-06,
"logits/chosen": -1.3176567554473877,
"logits/rejected": -0.0033722042571753263,
"logps/chosen": -723.5933837890625,
"logps/rejected": -1598.0091552734375,
"loss": 0.2398,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.15554097294807434,
"rewards/margins": 0.26405271887779236,
"rewards/rejected": -0.4195936620235443,
"step": 470
},
{
"epoch": 0.19,
"learning_rate": 4.882355001067892e-06,
"logits/chosen": -1.188307523727417,
"logits/rejected": 0.14929169416427612,
"logps/chosen": -815.7213134765625,
"logps/rejected": -1634.1407470703125,
"loss": 0.2558,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.16603827476501465,
"rewards/margins": 0.21612891554832458,
"rewards/rejected": -0.38216716051101685,
"step": 480
},
{
"epoch": 0.19,
"learning_rate": 4.871757586073897e-06,
"logits/chosen": -1.3035974502563477,
"logits/rejected": 0.26524829864501953,
"logps/chosen": -763.2244262695312,
"logps/rejected": -1522.682861328125,
"loss": 0.2258,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.1318461149930954,
"rewards/margins": 0.23059546947479248,
"rewards/rejected": -0.3624415993690491,
"step": 490
},
{
"epoch": 0.2,
"learning_rate": 4.860715746692661e-06,
"logits/chosen": -1.1487717628479004,
"logits/rejected": 0.05942107364535332,
"logps/chosen": -886.2254638671875,
"logps/rejected": -1841.0814208984375,
"loss": 0.1885,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.18671779334545135,
"rewards/margins": 0.27246180176734924,
"rewards/rejected": -0.4591795802116394,
"step": 500
},
{
"epoch": 0.2,
"learning_rate": 4.849231551964771e-06,
"logits/chosen": -1.2474385499954224,
"logits/rejected": -0.14498676359653473,
"logps/chosen": -778.3880615234375,
"logps/rejected": -1752.142333984375,
"loss": 0.1754,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.18546968698501587,
"rewards/margins": 0.3222576379776001,
"rewards/rejected": -0.5077272653579712,
"step": 510
},
{
"epoch": 0.2,
"learning_rate": 4.837307153820184e-06,
"logits/chosen": -1.1251775026321411,
"logits/rejected": 0.15637345612049103,
"logps/chosen": -924.3635864257812,
"logps/rejected": -2070.327392578125,
"loss": 0.1343,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.3112488090991974,
"rewards/margins": 0.38895484805107117,
"rewards/rejected": -0.7002035975456238,
"step": 520
},
{
"epoch": 0.21,
"learning_rate": 4.824944786675003e-06,
"logits/chosen": -1.3947086334228516,
"logits/rejected": 0.045419882982969284,
"logps/chosen": -856.5111083984375,
"logps/rejected": -1587.355712890625,
"loss": 0.1704,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.2732272148132324,
"rewards/margins": 0.24021320044994354,
"rewards/rejected": -0.5134404301643372,
"step": 530
},
{
"epoch": 0.21,
"learning_rate": 4.81214676701278e-06,
"logits/chosen": -1.2445639371871948,
"logits/rejected": 0.1435929536819458,
"logps/chosen": -935.2590942382812,
"logps/rejected": -1872.558349609375,
"loss": 0.1603,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2728428244590759,
"rewards/margins": 0.303517609834671,
"rewards/rejected": -0.5763604044914246,
"step": 540
},
{
"epoch": 0.22,
"learning_rate": 4.798915492950456e-06,
"logits/chosen": -1.3926843404769897,
"logits/rejected": -0.8224552273750305,
"logps/chosen": -930.3948364257812,
"logps/rejected": -1831.987060546875,
"loss": 0.2094,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.22408756613731384,
"rewards/margins": 0.306917279958725,
"rewards/rejected": -0.5310048460960388,
"step": 550
},
{
"epoch": 0.22,
"learning_rate": 4.785253443788997e-06,
"logits/chosen": -1.452789306640625,
"logits/rejected": -0.08553876727819443,
"logps/chosen": -834.9271240234375,
"logps/rejected": -1715.3486328125,
"loss": 0.2066,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1931687295436859,
"rewards/margins": 0.24109697341918945,
"rewards/rejected": -0.43426570296287537,
"step": 560
},
{
"epoch": 0.22,
"learning_rate": 4.771163179548809e-06,
"logits/chosen": -1.2075916528701782,
"logits/rejected": -0.4084923267364502,
"logps/chosen": -895.1989135742188,
"logps/rejected": -1892.2545166015625,
"loss": 0.1562,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.27637767791748047,
"rewards/margins": 0.3559107184410095,
"rewards/rejected": -0.6322883367538452,
"step": 570
},
{
"epoch": 0.23,
"learning_rate": 4.75664734049005e-06,
"logits/chosen": -1.4612247943878174,
"logits/rejected": -0.4000505805015564,
"logps/chosen": -838.0030517578125,
"logps/rejected": -1836.1865234375,
"loss": 0.1866,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.27399009466171265,
"rewards/margins": 0.3403601050376892,
"rewards/rejected": -0.6143501996994019,
"step": 580
},
{
"epoch": 0.23,
"learning_rate": 4.741708646617879e-06,
"logits/chosen": -1.4533047676086426,
"logits/rejected": -0.44210928678512573,
"logps/chosen": -826.44921875,
"logps/rejected": -1627.9482421875,
"loss": 0.1568,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.18495787680149078,
"rewards/margins": 0.24494799971580505,
"rewards/rejected": -0.42990580201148987,
"step": 590
},
{
"epoch": 0.24,
"learning_rate": 4.726349897172791e-06,
"logits/chosen": -1.2161755561828613,
"logits/rejected": -0.4458787441253662,
"logps/chosen": -677.1725463867188,
"logps/rejected": -1372.3172607421875,
"loss": 0.2348,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.11152330785989761,
"rewards/margins": 0.17953188717365265,
"rewards/rejected": -0.29105520248413086,
"step": 600
},
{
"epoch": 0.24,
"learning_rate": 4.710573970106076e-06,
"logits/chosen": -1.2787022590637207,
"logits/rejected": -0.5003519654273987,
"logps/chosen": -937.7862548828125,
"logps/rejected": -1879.7720947265625,
"loss": 0.2216,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.27286097407341003,
"rewards/margins": 0.2761802077293396,
"rewards/rejected": -0.5490411520004272,
"step": 610
},
{
"epoch": 0.24,
"learning_rate": 4.694383821540554e-06,
"logits/chosen": -1.4234240055084229,
"logits/rejected": -0.529420793056488,
"logps/chosen": -879.75830078125,
"logps/rejected": -1886.7099609375,
"loss": 0.1449,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.23544082045555115,
"rewards/margins": 0.3297392725944519,
"rewards/rejected": -0.5651801824569702,
"step": 620
},
{
"epoch": 0.25,
"learning_rate": 4.677782485216644e-06,
"logits/chosen": -1.5074328184127808,
"logits/rejected": 0.13324348628520966,
"logps/chosen": -894.3519287109375,
"logps/rejected": -1656.986328125,
"loss": 0.2306,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.2585051953792572,
"rewards/margins": 0.17639592289924622,
"rewards/rejected": -0.4349011480808258,
"step": 630
},
{
"epoch": 0.25,
"learning_rate": 4.660773071923901e-06,
"logits/chosen": -1.254246473312378,
"logits/rejected": -0.4503572881221771,
"logps/chosen": -743.8980712890625,
"logps/rejected": -1586.318603515625,
"loss": 0.2306,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.20321564376354218,
"rewards/margins": 0.28816673159599304,
"rewards/rejected": -0.49138230085372925,
"step": 640
},
{
"epoch": 0.25,
"learning_rate": 4.643358768918106e-06,
"logits/chosen": -1.2100741863250732,
"logits/rejected": -0.6602537631988525,
"logps/chosen": -866.4385986328125,
"logps/rejected": -1698.565185546875,
"loss": 0.2341,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.24518051743507385,
"rewards/margins": 0.21809275448322296,
"rewards/rejected": -0.46327322721481323,
"step": 650
},
{
"epoch": 0.26,
"learning_rate": 4.625542839324036e-06,
"logits/chosen": -1.2801318168640137,
"logits/rejected": -0.20570655167102814,
"logps/chosen": -696.7703857421875,
"logps/rejected": -1810.203857421875,
"loss": 0.1443,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1753673553466797,
"rewards/margins": 0.33784395456314087,
"rewards/rejected": -0.5132113099098206,
"step": 660
},
{
"epoch": 0.26,
"learning_rate": 4.6073286215240105e-06,
"logits/chosen": -1.573704719543457,
"logits/rejected": -0.5480459928512573,
"logps/chosen": -698.1392211914062,
"logps/rejected": -1760.8834228515625,
"loss": 3.1142,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.11380796134471893,
"rewards/margins": 0.48678064346313477,
"rewards/rejected": -0.6005885601043701,
"step": 670
},
{
"epoch": 0.27,
"learning_rate": 4.588719528532342e-06,
"logits/chosen": -1.5839512348175049,
"logits/rejected": -0.7513319253921509,
"logps/chosen": -620.78955078125,
"logps/rejected": -1401.8199462890625,
"loss": 0.2422,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.02382536605000496,
"rewards/margins": 0.09586119651794434,
"rewards/rejected": -0.119686558842659,
"step": 680
},
{
"epoch": 0.27,
"learning_rate": 4.569719047355795e-06,
"logits/chosen": -1.5924733877182007,
"logits/rejected": -0.816574215888977,
"logps/chosen": -557.929931640625,
"logps/rejected": -1159.7681884765625,
"loss": 0.292,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.010283837094902992,
"rewards/margins": 0.04927302524447441,
"rewards/rejected": -0.059556860476732254,
"step": 690
},
{
"epoch": 0.27,
"learning_rate": 4.550330738340189e-06,
"logits/chosen": -1.4926470518112183,
"logits/rejected": -0.8066496849060059,
"logps/chosen": -669.9822387695312,
"logps/rejected": -1387.30419921875,
"loss": 0.2635,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.06443636119365692,
"rewards/margins": 0.10144983232021332,
"rewards/rejected": -0.16588619351387024,
"step": 700
},
{
"epoch": 0.28,
"learning_rate": 4.530558234503252e-06,
"logits/chosen": -1.504148244857788,
"logits/rejected": -0.710750937461853,
"logps/chosen": -563.5753173828125,
"logps/rejected": -1385.9373779296875,
"loss": 0.1933,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.02186558023095131,
"rewards/margins": 0.15772321820259094,
"rewards/rejected": -0.17958880960941315,
"step": 710
},
{
"epoch": 0.28,
"learning_rate": 4.5104052408538545e-06,
"logits/chosen": -1.3532848358154297,
"logits/rejected": -0.17277280986309052,
"logps/chosen": -665.1290893554688,
"logps/rejected": -1473.070068359375,
"loss": 0.2188,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.0683923214673996,
"rewards/margins": 0.23080816864967346,
"rewards/rejected": -0.29920047521591187,
"step": 720
},
{
"epoch": 0.29,
"learning_rate": 4.489875533697767e-06,
"logits/chosen": -1.2411041259765625,
"logits/rejected": -0.6769916415214539,
"logps/chosen": -796.1107177734375,
"logps/rejected": -1779.8375244140625,
"loss": 0.2014,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.11370061337947845,
"rewards/margins": 0.2886132001876831,
"rewards/rejected": -0.40231385827064514,
"step": 730
},
{
"epoch": 0.29,
"learning_rate": 4.468972959930043e-06,
"logits/chosen": -1.4062107801437378,
"logits/rejected": -0.11251994222402573,
"logps/chosen": -810.1907958984375,
"logps/rejected": -1755.5439453125,
"loss": 0.207,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.138728067278862,
"rewards/margins": 0.25112494826316833,
"rewards/rejected": -0.38985303044319153,
"step": 740
},
{
"epoch": 0.29,
"learning_rate": 4.447701436314176e-06,
"logits/chosen": -1.1295002698898315,
"logits/rejected": -0.491716206073761,
"logps/chosen": -665.5704345703125,
"logps/rejected": -1605.226318359375,
"loss": 0.2432,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.09421003609895706,
"rewards/margins": 0.22589227557182312,
"rewards/rejected": -0.3201023042201996,
"step": 750
},
{
"epoch": 0.3,
"learning_rate": 4.4260649487481835e-06,
"logits/chosen": -1.3528281450271606,
"logits/rejected": -0.8653984069824219,
"logps/chosen": -560.7476806640625,
"logps/rejected": -1564.6998291015625,
"loss": 0.1747,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.06951048225164413,
"rewards/margins": 0.29366621375083923,
"rewards/rejected": -0.36317676305770874,
"step": 760
},
{
"epoch": 0.3,
"learning_rate": 4.404067551517704e-06,
"logits/chosen": -1.496765375137329,
"logits/rejected": -0.7339566349983215,
"logps/chosen": -559.6861572265625,
"logps/rejected": -1561.775634765625,
"loss": 0.1495,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.0916418582201004,
"rewards/margins": 0.28826963901519775,
"rewards/rejected": -0.37991148233413696,
"step": 770
},
{
"epoch": 0.31,
"learning_rate": 4.381713366536312e-06,
"logits/chosen": -1.2229559421539307,
"logits/rejected": -0.3822958469390869,
"logps/chosen": -795.2717895507812,
"logps/rejected": -1646.298583984375,
"loss": 0.2385,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.16480056941509247,
"rewards/margins": 0.25636088848114014,
"rewards/rejected": -0.4211614727973938,
"step": 780
},
{
"epoch": 0.31,
"learning_rate": 4.359006582573138e-06,
"logits/chosen": -1.3127458095550537,
"logits/rejected": -0.6002156138420105,
"logps/chosen": -731.8434448242188,
"logps/rejected": -1655.339599609375,
"loss": 0.2386,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.1232454776763916,
"rewards/margins": 0.260085791349411,
"rewards/rejected": -0.3833312392234802,
"step": 790
},
{
"epoch": 0.31,
"learning_rate": 4.335951454467971e-06,
"logits/chosen": -1.4491212368011475,
"logits/rejected": -0.4968988299369812,
"logps/chosen": -708.8034057617188,
"logps/rejected": -1689.820068359375,
"loss": 0.1514,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.11892116069793701,
"rewards/margins": 0.311443567276001,
"rewards/rejected": -0.4303646981716156,
"step": 800
},
{
"epoch": 0.32,
"learning_rate": 4.3125523023339825e-06,
"logits/chosen": -1.532845377922058,
"logits/rejected": -0.5454439520835876,
"logps/chosen": -708.6060791015625,
"logps/rejected": -1473.7392578125,
"loss": 0.2365,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.0950343906879425,
"rewards/margins": 0.2247859686613083,
"rewards/rejected": -0.3198204040527344,
"step": 810
},
{
"epoch": 0.32,
"learning_rate": 4.288813510748207e-06,
"logits/chosen": -1.3746122121810913,
"logits/rejected": -0.3929213881492615,
"logps/chosen": -709.5933837890625,
"logps/rejected": -1493.141357421875,
"loss": 0.1891,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.05677127093076706,
"rewards/margins": 0.22434012591838837,
"rewards/rejected": -0.28111138939857483,
"step": 820
},
{
"epoch": 0.33,
"learning_rate": 4.264739527929959e-06,
"logits/chosen": -1.6062724590301514,
"logits/rejected": -0.8062151074409485,
"logps/chosen": -672.033447265625,
"logps/rejected": -1605.8253173828125,
"loss": 0.2076,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.04862620308995247,
"rewards/margins": 0.277982234954834,
"rewards/rejected": -0.32660841941833496,
"step": 830
},
{
"epoch": 0.33,
"learning_rate": 4.240334864907317e-06,
"logits/chosen": -1.429529070854187,
"logits/rejected": -0.1541730761528015,
"logps/chosen": -751.5721435546875,
"logps/rejected": -1614.796875,
"loss": 0.1689,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.07653092592954636,
"rewards/margins": 0.23011043667793274,
"rewards/rejected": -0.3066413402557373,
"step": 840
},
{
"epoch": 0.33,
"learning_rate": 4.215604094671835e-06,
"logits/chosen": -1.4942229986190796,
"logits/rejected": -0.5664646029472351,
"logps/chosen": -683.9749755859375,
"logps/rejected": -1751.165771484375,
"loss": 0.1305,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.07548153400421143,
"rewards/margins": 0.3088182806968689,
"rewards/rejected": -0.3842998147010803,
"step": 850
},
{
"epoch": 0.34,
"learning_rate": 4.190551851321647e-06,
"logits/chosen": -1.5068459510803223,
"logits/rejected": -0.3654994070529938,
"logps/chosen": -753.12060546875,
"logps/rejected": -1841.876220703125,
"loss": 0.1256,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.10451909154653549,
"rewards/margins": 0.35891246795654297,
"rewards/rejected": -0.46343153715133667,
"step": 860
},
{
"epoch": 0.34,
"learning_rate": 4.165182829193126e-06,
"logits/chosen": -1.4504587650299072,
"logits/rejected": 0.0904449075460434,
"logps/chosen": -773.3833618164062,
"logps/rejected": -1582.4493408203125,
"loss": 0.2156,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.06579799205064774,
"rewards/margins": 0.241295725107193,
"rewards/rejected": -0.3070937395095825,
"step": 870
},
{
"epoch": 0.35,
"learning_rate": 4.139501781981245e-06,
"logits/chosen": -1.5094424486160278,
"logits/rejected": -0.5480602383613586,
"logps/chosen": -672.755126953125,
"logps/rejected": -1651.116943359375,
"loss": 0.1111,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.07722672820091248,
"rewards/margins": 0.26155346632003784,
"rewards/rejected": -0.33878016471862793,
"step": 880
},
{
"epoch": 0.35,
"learning_rate": 4.113513521848821e-06,
"logits/chosen": -1.594499111175537,
"logits/rejected": -0.5706368684768677,
"logps/chosen": -772.4927978515625,
"logps/rejected": -1745.507080078125,
"loss": 0.1475,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.10902180522680283,
"rewards/margins": 0.3192656934261322,
"rewards/rejected": -0.4282875061035156,
"step": 890
},
{
"epoch": 0.35,
"learning_rate": 4.087222918524807e-06,
"logits/chosen": -1.297629952430725,
"logits/rejected": -0.6775213479995728,
"logps/chosen": -705.9368896484375,
"logps/rejected": -1540.4775390625,
"loss": 0.2268,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.1348380744457245,
"rewards/margins": 0.2274044305086136,
"rewards/rejected": -0.36224251985549927,
"step": 900
},
{
"epoch": 0.36,
"learning_rate": 4.0606348983917924e-06,
"logits/chosen": -1.3503175973892212,
"logits/rejected": -0.9185010194778442,
"logps/chosen": -610.7164306640625,
"logps/rejected": -1734.915771484375,
"loss": 0.1352,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.10167312622070312,
"rewards/margins": 0.36634570360183716,
"rewards/rejected": -0.4680188298225403,
"step": 910
},
{
"epoch": 0.36,
"learning_rate": 4.03375444356288e-06,
"logits/chosen": -1.4071118831634521,
"logits/rejected": -0.8690752983093262,
"logps/chosen": -836.24169921875,
"logps/rejected": -1863.6539306640625,
"loss": 0.2307,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1224076971411705,
"rewards/margins": 0.3151033818721771,
"rewards/rejected": -0.43751105666160583,
"step": 920
},
{
"epoch": 0.36,
"learning_rate": 4.006586590948141e-06,
"logits/chosen": -1.3949382305145264,
"logits/rejected": -0.680055558681488,
"logps/chosen": -666.8121948242188,
"logps/rejected": -1796.386474609375,
"loss": 0.199,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.07509482651948929,
"rewards/margins": 0.31507402658462524,
"rewards/rejected": -0.39016884565353394,
"step": 930
},
{
"epoch": 0.37,
"learning_rate": 3.979136431310781e-06,
"logits/chosen": -1.4007041454315186,
"logits/rejected": -0.44923824071884155,
"logps/chosen": -629.3880615234375,
"logps/rejected": -1281.4881591796875,
"loss": 0.27,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.06840632110834122,
"rewards/margins": 0.14729034900665283,
"rewards/rejected": -0.21569669246673584,
"step": 940
},
{
"epoch": 0.37,
"learning_rate": 3.951409108313223e-06,
"logits/chosen": -1.3141412734985352,
"logits/rejected": -0.3359532654285431,
"logps/chosen": -682.4598999023438,
"logps/rejected": -1479.57763671875,
"loss": 0.2002,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.06678664684295654,
"rewards/margins": 0.18387752771377563,
"rewards/rejected": -0.2506641745567322,
"step": 950
},
{
"epoch": 0.38,
"learning_rate": 3.923409817553284e-06,
"logits/chosen": -1.26377534866333,
"logits/rejected": -0.5578689575195312,
"logps/chosen": -753.383056640625,
"logps/rejected": -1470.470458984375,
"loss": 0.1909,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.08720171451568604,
"rewards/margins": 0.24566006660461426,
"rewards/rejected": -0.3328618109226227,
"step": 960
},
{
"epoch": 0.38,
"learning_rate": 3.895143805590609e-06,
"logits/chosen": -1.5301742553710938,
"logits/rejected": -0.33912280201911926,
"logps/chosen": -788.5135498046875,
"logps/rejected": -1906.780029296875,
"loss": 0.2098,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.10219583660364151,
"rewards/margins": 0.3592928946018219,
"rewards/rejected": -0.4614887833595276,
"step": 970
},
{
"epoch": 0.38,
"learning_rate": 3.8666163689635614e-06,
"logits/chosen": -1.4293967485427856,
"logits/rejected": -0.766064465045929,
"logps/chosen": -697.79443359375,
"logps/rejected": -1692.085693359375,
"loss": 0.2074,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.10171394050121307,
"rewards/margins": 0.3115464448928833,
"rewards/rejected": -0.41326045989990234,
"step": 980
},
{
"epoch": 0.39,
"learning_rate": 3.837832853196751e-06,
"logits/chosen": -1.4031484127044678,
"logits/rejected": -0.46277111768722534,
"logps/chosen": -741.0556030273438,
"logps/rejected": -1712.839111328125,
"loss": 0.1786,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.10311299562454224,
"rewards/margins": 0.2798925042152405,
"rewards/rejected": -0.3830054700374603,
"step": 990
},
{
"epoch": 0.39,
"learning_rate": 3.808798651799377e-06,
"logits/chosen": -1.4064973592758179,
"logits/rejected": -0.5826825499534607,
"logps/chosen": -687.228271484375,
"logps/rejected": -1728.9072265625,
"loss": 0.1515,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.10456766188144684,
"rewards/margins": 0.31050539016723633,
"rewards/rejected": -0.4150730073451996,
"step": 1000
},
{
"epoch": 0.4,
"learning_rate": 3.7795192052545805e-06,
"logits/chosen": -1.3606574535369873,
"logits/rejected": -0.26507607102394104,
"logps/chosen": -657.6034545898438,
"logps/rejected": -1711.599365234375,
"loss": 0.1027,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.11261602491140366,
"rewards/margins": 0.3513622283935547,
"rewards/rejected": -0.46397823095321655,
"step": 1010
},
{
"epoch": 0.4,
"learning_rate": 3.7500000000000005e-06,
"logits/chosen": -1.3821978569030762,
"logits/rejected": -0.846422016620636,
"logps/chosen": -700.0028076171875,
"logps/rejected": -1700.7777099609375,
"loss": 0.188,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1201629489660263,
"rewards/margins": 0.2851884663105011,
"rewards/rejected": -0.4053514003753662,
"step": 1020
},
{
"epoch": 0.4,
"learning_rate": 3.7202465673997123e-06,
"logits/chosen": -1.327423334121704,
"logits/rejected": -0.4249703884124756,
"logps/chosen": -733.533935546875,
"logps/rejected": -1811.9857177734375,
"loss": 0.2335,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.15838567912578583,
"rewards/margins": 0.3160237669944763,
"rewards/rejected": -0.47440940141677856,
"step": 1030
},
{
"epoch": 0.41,
"learning_rate": 3.6902644827077504e-06,
"logits/chosen": -1.163883924484253,
"logits/rejected": -0.564578652381897,
"logps/chosen": -714.31591796875,
"logps/rejected": -1658.974609375,
"loss": 0.204,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1494341641664505,
"rewards/margins": 0.2751534581184387,
"rewards/rejected": -0.4245876669883728,
"step": 1040
},
{
"epoch": 0.41,
"learning_rate": 3.660059364023409e-06,
"logits/chosen": -1.1056033372879028,
"logits/rejected": -0.6749047040939331,
"logps/chosen": -836.0635986328125,
"logps/rejected": -1795.9320068359375,
"loss": 0.1381,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.13029779493808746,
"rewards/margins": 0.3451148271560669,
"rewards/rejected": -0.47541260719299316,
"step": 1050
},
{
"epoch": 0.42,
"learning_rate": 3.6296368712385084e-06,
"logits/chosen": -1.2282450199127197,
"logits/rejected": 0.033928144723176956,
"logps/chosen": -668.1098022460938,
"logps/rejected": -1750.6011962890625,
"loss": 0.187,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.12075567245483398,
"rewards/margins": 0.3676701486110687,
"rewards/rejected": -0.4884257912635803,
"step": 1060
},
{
"epoch": 0.42,
"learning_rate": 3.599002704976835e-06,
"logits/chosen": -1.513203501701355,
"logits/rejected": -0.3770269453525543,
"logps/chosen": -774.125244140625,
"logps/rejected": -1470.924072265625,
"loss": 0.2331,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.0825313851237297,
"rewards/margins": 0.2151786983013153,
"rewards/rejected": -0.297710120677948,
"step": 1070
},
{
"epoch": 0.42,
"learning_rate": 3.5681626055259526e-06,
"logits/chosen": -1.351539134979248,
"logits/rejected": 0.01821332611143589,
"logps/chosen": -615.5689086914062,
"logps/rejected": -1394.30859375,
"loss": 0.1882,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.04903438687324524,
"rewards/margins": 0.17228753864765167,
"rewards/rejected": -0.22132191061973572,
"step": 1080
},
{
"epoch": 0.43,
"learning_rate": 3.5371223517615684e-06,
"logits/chosen": -1.1955583095550537,
"logits/rejected": -0.7964296340942383,
"logps/chosen": -650.0599365234375,
"logps/rejected": -1640.6591796875,
"loss": 0.1666,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.061278946697711945,
"rewards/margins": 0.23827362060546875,
"rewards/rejected": -0.2995525896549225,
"step": 1090
},
{
"epoch": 0.43,
"learning_rate": 3.5058877600646814e-06,
"logits/chosen": -1.5846580266952515,
"logits/rejected": -0.4390091896057129,
"logps/chosen": -774.6456298828125,
"logps/rejected": -1672.4420166015625,
"loss": 0.1899,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.09766945987939835,
"rewards/margins": 0.26369303464889526,
"rewards/rejected": -0.3613625466823578,
"step": 1100
},
{
"epoch": 0.44,
"learning_rate": 3.4744646832316985e-06,
"logits/chosen": -1.1662776470184326,
"logits/rejected": -0.2102310210466385,
"logps/chosen": -793.6665649414062,
"logps/rejected": -1921.721923828125,
"loss": 0.1516,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.128018319606781,
"rewards/margins": 0.35574427247047424,
"rewards/rejected": -0.48376256227493286,
"step": 1110
},
{
"epoch": 0.44,
"learning_rate": 3.442859009377724e-06,
"logits/chosen": -1.2999095916748047,
"logits/rejected": -0.5450000762939453,
"logps/chosen": -756.6891479492188,
"logps/rejected": -1727.3140869140625,
"loss": 0.2095,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.12410111725330353,
"rewards/margins": 0.2954896092414856,
"rewards/rejected": -0.41959071159362793,
"step": 1120
},
{
"epoch": 0.44,
"learning_rate": 3.4110766608332347e-06,
"logits/chosen": -1.3748492002487183,
"logits/rejected": -0.4282529950141907,
"logps/chosen": -715.91064453125,
"logps/rejected": -1581.970703125,
"loss": 0.2029,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.10205087810754776,
"rewards/margins": 0.2136324942111969,
"rewards/rejected": -0.31568339467048645,
"step": 1130
},
{
"epoch": 0.45,
"learning_rate": 3.379123593034342e-06,
"logits/chosen": -1.4860260486602783,
"logits/rejected": -0.33013448119163513,
"logps/chosen": -715.021240234375,
"logps/rejected": -1671.137939453125,
"loss": 0.1657,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.0988413542509079,
"rewards/margins": 0.2489662617444992,
"rewards/rejected": -0.3478075861930847,
"step": 1140
},
{
"epoch": 0.45,
"learning_rate": 3.3470057934068533e-06,
"logits/chosen": -1.4496772289276123,
"logits/rejected": -0.6596914529800415,
"logps/chosen": -673.6126098632812,
"logps/rejected": -1665.568603515625,
"loss": 0.1832,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.08717682957649231,
"rewards/margins": 0.2865816652774811,
"rewards/rejected": -0.3737585246562958,
"step": 1150
},
{
"epoch": 0.45,
"learning_rate": 3.314729280244332e-06,
"logits/chosen": -1.5033951997756958,
"logits/rejected": -0.4424918591976166,
"logps/chosen": -715.0887451171875,
"logps/rejected": -1384.922119140625,
"loss": 0.2064,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.12197653949260712,
"rewards/margins": 0.2576510012149811,
"rewards/rejected": -0.3796275556087494,
"step": 1160
},
{
"epoch": 0.46,
"learning_rate": 3.2823001015803863e-06,
"logits/chosen": -1.3551867008209229,
"logits/rejected": -0.6100107431411743,
"logps/chosen": -750.599853515625,
"logps/rejected": -1853.4273681640625,
"loss": 0.1589,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.09711600840091705,
"rewards/margins": 0.3520352840423584,
"rewards/rejected": -0.44915127754211426,
"step": 1170
},
{
"epoch": 0.46,
"learning_rate": 3.2497243340553675e-06,
"logits/chosen": -1.0115400552749634,
"logits/rejected": -0.17798957228660583,
"logps/chosen": -745.58984375,
"logps/rejected": -1906.7685546875,
"loss": 0.2539,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.14974217116832733,
"rewards/margins": 0.3422713875770569,
"rewards/rejected": -0.4920136332511902,
"step": 1180
},
{
"epoch": 0.47,
"learning_rate": 3.217008081777726e-06,
"logits/chosen": -1.1727737188339233,
"logits/rejected": -0.37460917234420776,
"logps/chosen": -709.9483642578125,
"logps/rejected": -1686.753173828125,
"loss": 0.1683,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.10941555351018906,
"rewards/margins": 0.2822516858577728,
"rewards/rejected": -0.3916672468185425,
"step": 1190
},
{
"epoch": 0.47,
"learning_rate": 3.184157475180208e-06,
"logits/chosen": -1.3031466007232666,
"logits/rejected": -0.5970622301101685,
"logps/chosen": -697.8651123046875,
"logps/rejected": -1595.6754150390625,
"loss": 0.2328,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.10812550783157349,
"rewards/margins": 0.23647412657737732,
"rewards/rejected": -0.3445996046066284,
"step": 1200
},
{
"epoch": 0.47,
"learning_rate": 3.1511786698711226e-06,
"logits/chosen": -1.3314238786697388,
"logits/rejected": 0.48418712615966797,
"logps/chosen": -731.9833984375,
"logps/rejected": -1517.853271484375,
"loss": 0.2287,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.12708911299705505,
"rewards/margins": 0.23969343304634094,
"rewards/rejected": -0.3667825162410736,
"step": 1210
},
{
"epoch": 0.48,
"learning_rate": 3.1180778454808973e-06,
"logits/chosen": -1.289541244506836,
"logits/rejected": -0.4609376788139343,
"logps/chosen": -746.2857666015625,
"logps/rejected": -1523.1092529296875,
"loss": 0.1886,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.09962339699268341,
"rewards/margins": 0.28612619638442993,
"rewards/rejected": -0.38574957847595215,
"step": 1220
},
{
"epoch": 0.48,
"learning_rate": 3.084861204504122e-06,
"logits/chosen": -1.0148189067840576,
"logits/rejected": -0.48453038930892944,
"logps/chosen": -778.4666748046875,
"logps/rejected": -1931.9429931640625,
"loss": 0.1131,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.08722411096096039,
"rewards/margins": 0.36971360445022583,
"rewards/rejected": -0.4569377303123474,
"step": 1230
},
{
"epoch": 0.49,
"learning_rate": 3.051534971137315e-06,
"logits/chosen": -1.2210582494735718,
"logits/rejected": -0.43022990226745605,
"logps/chosen": -752.8408813476562,
"logps/rejected": -1476.504638671875,
"loss": 0.2269,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.09663649648427963,
"rewards/margins": 0.20909292995929718,
"rewards/rejected": -0.3057294487953186,
"step": 1240
},
{
"epoch": 0.49,
"learning_rate": 3.0181053901126243e-06,
"logits/chosen": -1.1169403791427612,
"logits/rejected": 0.2767347991466522,
"logps/chosen": -749.15673828125,
"logps/rejected": -1505.369140625,
"loss": 0.1992,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.09570419043302536,
"rewards/margins": 0.19496819376945496,
"rewards/rejected": -0.2906723916530609,
"step": 1250
},
{
"epoch": 0.49,
"learning_rate": 2.9845787255276753e-06,
"logits/chosen": -1.5088775157928467,
"logits/rejected": -0.9695127606391907,
"logps/chosen": -588.0244750976562,
"logps/rejected": -1467.4212646484375,
"loss": 0.1487,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.04604783654212952,
"rewards/margins": 0.28641366958618164,
"rewards/rejected": -0.33246147632598877,
"step": 1260
},
{
"epoch": 0.5,
"learning_rate": 2.950961259671793e-06,
"logits/chosen": -1.50933837890625,
"logits/rejected": -0.6869689226150513,
"logps/chosen": -710.8389892578125,
"logps/rejected": -1601.9041748046875,
"loss": 0.2032,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.0675523430109024,
"rewards/margins": 0.27207452058792114,
"rewards/rejected": -0.33962687849998474,
"step": 1270
},
{
"epoch": 0.5,
"learning_rate": 2.917259291848814e-06,
"logits/chosen": -1.4775984287261963,
"logits/rejected": -0.3601114749908447,
"logps/chosen": -680.5808715820312,
"logps/rejected": -1640.981689453125,
"loss": 0.2072,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.06015778332948685,
"rewards/margins": 0.2646317481994629,
"rewards/rejected": -0.32478955388069153,
"step": 1280
},
{
"epoch": 0.51,
"learning_rate": 2.883479137196714e-06,
"logits/chosen": -1.826909065246582,
"logits/rejected": -0.6638845801353455,
"logps/chosen": -696.27734375,
"logps/rejected": -1482.72119140625,
"loss": 0.184,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.05563250929117203,
"rewards/margins": 0.2452922761440277,
"rewards/rejected": -0.30092480778694153,
"step": 1290
},
{
"epoch": 0.51,
"learning_rate": 2.849627125504262e-06,
"logits/chosen": -1.374955415725708,
"logits/rejected": -0.20216119289398193,
"logps/chosen": -578.1390380859375,
"logps/rejected": -1516.6820068359375,
"loss": 0.1879,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.0654110461473465,
"rewards/margins": 0.27813297510147095,
"rewards/rejected": -0.34354403614997864,
"step": 1300
},
{
"epoch": 0.51,
"learning_rate": 2.8157096000249334e-06,
"logits/chosen": -1.5065643787384033,
"logits/rejected": -0.7829849123954773,
"logps/chosen": -630.3825073242188,
"logps/rejected": -1537.6138916015625,
"loss": 0.2042,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.061698682606220245,
"rewards/margins": 0.2721042037010193,
"rewards/rejected": -0.33380287885665894,
"step": 1310
},
{
"epoch": 0.52,
"learning_rate": 2.7817329162883033e-06,
"logits/chosen": -1.471840500831604,
"logits/rejected": -0.21121864020824432,
"logps/chosen": -743.4503173828125,
"logps/rejected": -1590.5904541015625,
"loss": 0.1418,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.055032916367053986,
"rewards/margins": 0.24513819813728333,
"rewards/rejected": -0.3001710772514343,
"step": 1320
},
{
"epoch": 0.52,
"learning_rate": 2.747703440909128e-06,
"logits/chosen": -1.6148380041122437,
"logits/rejected": -0.6764585375785828,
"logps/chosen": -709.3273315429688,
"logps/rejected": -1805.447021484375,
"loss": 0.1334,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.030007129535079002,
"rewards/margins": 0.361659973859787,
"rewards/rejected": -0.39166706800460815,
"step": 1330
},
{
"epoch": 0.53,
"learning_rate": 2.713627550394363e-06,
"logits/chosen": -1.3852078914642334,
"logits/rejected": -0.6749362945556641,
"logps/chosen": -686.713623046875,
"logps/rejected": -1515.2998046875,
"loss": 0.1806,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.05257093161344528,
"rewards/margins": 0.26767003536224365,
"rewards/rejected": -0.3202410042285919,
"step": 1340
},
{
"epoch": 0.53,
"learning_rate": 2.679511629948319e-06,
"logits/chosen": -1.352468729019165,
"logits/rejected": -0.6524327993392944,
"logps/chosen": -796.8145751953125,
"logps/rejected": -1669.538330078125,
"loss": 0.2095,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.10906052589416504,
"rewards/margins": 0.23847489058971405,
"rewards/rejected": -0.3475354313850403,
"step": 1350
},
{
"epoch": 0.53,
"learning_rate": 2.6453620722761897e-06,
"logits/chosen": -1.554595708847046,
"logits/rejected": 0.08318161964416504,
"logps/chosen": -606.7230224609375,
"logps/rejected": -1430.286865234375,
"loss": 0.2034,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.04838673770427704,
"rewards/margins": 0.2375943958759308,
"rewards/rejected": -0.28598111867904663,
"step": 1360
},
{
"epoch": 0.54,
"learning_rate": 2.6111852763861763e-06,
"logits/chosen": -1.3457515239715576,
"logits/rejected": -0.39270055294036865,
"logps/chosen": -752.8702392578125,
"logps/rejected": -1860.8333740234375,
"loss": 0.1234,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.04639287292957306,
"rewards/margins": 0.38252198696136475,
"rewards/rejected": -0.4289148449897766,
"step": 1370
},
{
"epoch": 0.54,
"learning_rate": 2.576987646390426e-06,
"logits/chosen": -1.5459932088851929,
"logits/rejected": -0.5794991254806519,
"logps/chosen": -691.588134765625,
"logps/rejected": -1757.112548828125,
"loss": 0.1192,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.04728538915514946,
"rewards/margins": 0.32668638229370117,
"rewards/rejected": -0.3739717900753021,
"step": 1380
},
{
"epoch": 0.55,
"learning_rate": 2.542775590305023e-06,
"logits/chosen": -1.304917573928833,
"logits/rejected": -0.4121823310852051,
"logps/chosen": -630.0661010742188,
"logps/rejected": -1441.1773681640625,
"loss": 0.2289,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.03756406903266907,
"rewards/margins": 0.20802605152130127,
"rewards/rejected": -0.24559013545513153,
"step": 1390
},
{
"epoch": 0.55,
"learning_rate": 2.5085555188492384e-06,
"logits/chosen": -1.2159336805343628,
"logits/rejected": -0.3775702118873596,
"logps/chosen": -709.61376953125,
"logps/rejected": -1723.700927734375,
"loss": 0.1568,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.10398067533969879,
"rewards/margins": 0.2780481278896332,
"rewards/rejected": -0.38202884793281555,
"step": 1400
},
{
"epoch": 0.55,
"learning_rate": 2.474333844244276e-06,
"logits/chosen": -1.2202876806259155,
"logits/rejected": -0.35152697563171387,
"logps/chosen": -818.2611083984375,
"logps/rejected": -1743.5579833984375,
"loss": 0.1788,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.09500784426927567,
"rewards/margins": 0.300513356924057,
"rewards/rejected": -0.39552122354507446,
"step": 1410
},
{
"epoch": 0.56,
"learning_rate": 2.440116979011743e-06,
"logits/chosen": -1.4342302083969116,
"logits/rejected": -0.45796999335289,
"logps/chosen": -718.6922607421875,
"logps/rejected": -1725.5560302734375,
"loss": 0.197,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.05935473367571831,
"rewards/margins": 0.324557363986969,
"rewards/rejected": -0.383912056684494,
"step": 1420
},
{
"epoch": 0.56,
"learning_rate": 2.4059113347720573e-06,
"logits/chosen": -1.5391137599945068,
"logits/rejected": -0.13381418585777283,
"logps/chosen": -690.8306884765625,
"logps/rejected": -1534.461181640625,
"loss": 0.1946,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.10408179461956024,
"rewards/margins": 0.27237391471862793,
"rewards/rejected": -0.376455694437027,
"step": 1430
},
{
"epoch": 0.56,
"learning_rate": 2.3717233210430258e-06,
"logits/chosen": -1.308176875114441,
"logits/rejected": -0.5252507925033569,
"logps/chosen": -736.7550659179688,
"logps/rejected": -1814.572021484375,
"loss": 0.1699,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.1053951233625412,
"rewards/margins": 0.3335246741771698,
"rewards/rejected": -0.4389197826385498,
"step": 1440
},
{
"epoch": 0.57,
"learning_rate": 2.337559344038817e-06,
"logits/chosen": -1.2826203107833862,
"logits/rejected": 0.2594057321548462,
"logps/chosen": -654.9820556640625,
"logps/rejected": -1548.369384765625,
"loss": 0.1628,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.10001038014888763,
"rewards/margins": 0.2573556900024414,
"rewards/rejected": -0.35736608505249023,
"step": 1450
},
{
"epoch": 0.57,
"learning_rate": 2.303425805469554e-06,
"logits/chosen": -1.2893702983856201,
"logits/rejected": -0.615670382976532,
"logps/chosen": -686.9696044921875,
"logps/rejected": -1765.0921630859375,
"loss": 0.1341,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.06719444692134857,
"rewards/margins": 0.3570956885814667,
"rewards/rejected": -0.42429018020629883,
"step": 1460
},
{
"epoch": 0.58,
"learning_rate": 2.269329101341745e-06,
"logits/chosen": -1.5257200002670288,
"logits/rejected": -0.8465067744255066,
"logps/chosen": -722.9954833984375,
"logps/rejected": -1763.6884765625,
"loss": 0.1296,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.0741962268948555,
"rewards/margins": 0.36958009004592896,
"rewards/rejected": -0.44377630949020386,
"step": 1470
},
{
"epoch": 0.58,
"learning_rate": 2.235275620759797e-06,
"logits/chosen": -1.3611409664154053,
"logits/rejected": 0.612551748752594,
"logps/chosen": -703.1578979492188,
"logps/rejected": -1591.042236328125,
"loss": 0.1764,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.10201771557331085,
"rewards/margins": 0.2424260824918747,
"rewards/rejected": -0.34444376826286316,
"step": 1480
},
{
"epoch": 0.58,
"learning_rate": 2.2012717447288037e-06,
"logits/chosen": -1.3054463863372803,
"logits/rejected": -0.7033378481864929,
"logps/chosen": -731.6030883789062,
"logps/rejected": -1814.713134765625,
"loss": 0.1576,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.0751316249370575,
"rewards/margins": 0.3522658348083496,
"rewards/rejected": -0.42739754915237427,
"step": 1490
},
{
"epoch": 0.59,
"learning_rate": 2.167323844958867e-06,
"logits/chosen": -1.524957299232483,
"logits/rejected": -0.6119885444641113,
"logps/chosen": -701.2098388671875,
"logps/rejected": -1545.368896484375,
"loss": 0.14,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.10369672626256943,
"rewards/margins": 0.28280869126319885,
"rewards/rejected": -0.3865054249763489,
"step": 1500
},
{
"epoch": 0.59,
"learning_rate": 2.133438282671149e-06,
"logits/chosen": -1.2132611274719238,
"logits/rejected": -0.7082799673080444,
"logps/chosen": -762.6727294921875,
"logps/rejected": -1658.924072265625,
"loss": 0.1803,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1439850628376007,
"rewards/margins": 0.27077409625053406,
"rewards/rejected": -0.41475915908813477,
"step": 1510
},
{
"epoch": 0.6,
"learning_rate": 2.0996214074059033e-06,
"logits/chosen": -1.6239715814590454,
"logits/rejected": -0.5037415623664856,
"logps/chosen": -786.1912841796875,
"logps/rejected": -1638.0843505859375,
"loss": 0.2179,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.07825516164302826,
"rewards/margins": 0.2860822379589081,
"rewards/rejected": -0.36433738470077515,
"step": 1520
},
{
"epoch": 0.6,
"learning_rate": 2.0658795558326745e-06,
"logits/chosen": -1.3029212951660156,
"logits/rejected": -0.10125327110290527,
"logps/chosen": -724.8988647460938,
"logps/rejected": -1521.581787109375,
"loss": 0.1988,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.05552230030298233,
"rewards/margins": 0.29605624079704285,
"rewards/rejected": -0.3515785336494446,
"step": 1530
},
{
"epoch": 0.6,
"learning_rate": 2.0322190505629297e-06,
"logits/chosen": -1.1891577243804932,
"logits/rejected": -0.263233482837677,
"logps/chosen": -726.5543212890625,
"logps/rejected": -1851.503662109375,
"loss": 0.1454,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.10788372904062271,
"rewards/margins": 0.327489972114563,
"rewards/rejected": -0.4353737235069275,
"step": 1540
},
{
"epoch": 0.61,
"learning_rate": 1.998646198965312e-06,
"logits/chosen": -1.376450777053833,
"logits/rejected": -0.22948014736175537,
"logps/chosen": -596.0374755859375,
"logps/rejected": -1520.2818603515625,
"loss": 0.2496,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.062327928841114044,
"rewards/margins": 0.3048885762691498,
"rewards/rejected": -0.3672165274620056,
"step": 1550
},
{
"epoch": 0.61,
"learning_rate": 1.965167291983757e-06,
"logits/chosen": -1.6274656057357788,
"logits/rejected": -0.2617906928062439,
"logps/chosen": -786.1827392578125,
"logps/rejected": -1801.614990234375,
"loss": 0.1203,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.10416553169488907,
"rewards/margins": 0.327767550945282,
"rewards/rejected": -0.43193307518959045,
"step": 1560
},
{
"epoch": 0.62,
"learning_rate": 1.931788602958678e-06,
"logits/chosen": -0.9874919652938843,
"logits/rejected": 0.055336445569992065,
"logps/chosen": -801.8827514648438,
"logps/rejected": -1887.7252197265625,
"loss": 0.1647,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1263236552476883,
"rewards/margins": 0.3318944573402405,
"rewards/rejected": -0.4582180976867676,
"step": 1570
},
{
"epoch": 0.62,
"learning_rate": 1.8985163864514644e-06,
"logits/chosen": -1.4952738285064697,
"logits/rejected": -0.03670965135097504,
"logps/chosen": -776.7321166992188,
"logps/rejected": -1846.3646240234375,
"loss": 0.1433,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.11145295947790146,
"rewards/margins": 0.3160027265548706,
"rewards/rejected": -0.42745572328567505,
"step": 1580
},
{
"epoch": 0.62,
"learning_rate": 1.8653568770724805e-06,
"logits/chosen": -1.352738618850708,
"logits/rejected": -0.2683241367340088,
"logps/chosen": -648.5192260742188,
"logps/rejected": -1464.099365234375,
"loss": 0.185,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.08493933826684952,
"rewards/margins": 0.2524186968803406,
"rewards/rejected": -0.3373579978942871,
"step": 1590
},
{
"epoch": 0.63,
"learning_rate": 1.8323162883128211e-06,
"logits/chosen": -1.419662356376648,
"logits/rejected": -0.4111382067203522,
"logps/chosen": -699.5247802734375,
"logps/rejected": -1743.6064453125,
"loss": 0.1541,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.08453786373138428,
"rewards/margins": 0.296464741230011,
"rewards/rejected": -0.38100260496139526,
"step": 1600
},
{
"epoch": 0.63,
"learning_rate": 1.7994008113800105e-06,
"logits/chosen": -1.5189629793167114,
"logits/rejected": -0.9077790975570679,
"logps/chosen": -701.3331298828125,
"logps/rejected": -1603.5174560546875,
"loss": 0.1429,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.08178045600652695,
"rewards/margins": 0.3028547167778015,
"rewards/rejected": -0.38463518023490906,
"step": 1610
},
{
"epoch": 0.64,
"learning_rate": 1.7666166140378853e-06,
"logits/chosen": -1.169510841369629,
"logits/rejected": 0.19725301861763,
"logps/chosen": -734.7293090820312,
"logps/rejected": -1564.332763671875,
"loss": 0.1528,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.09660240262746811,
"rewards/margins": 0.28595981001853943,
"rewards/rejected": -0.38256219029426575,
"step": 1620
},
{
"epoch": 0.64,
"learning_rate": 1.7339698394508632e-06,
"logits/chosen": -1.266775369644165,
"logits/rejected": -0.6185767650604248,
"logps/chosen": -627.6648559570312,
"logps/rejected": -1780.268310546875,
"loss": 0.1694,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.0766761377453804,
"rewards/margins": 0.3623715043067932,
"rewards/rejected": -0.43904757499694824,
"step": 1630
},
{
"epoch": 0.64,
"learning_rate": 1.7014666050328325e-06,
"logits/chosen": -1.5317351818084717,
"logits/rejected": -0.46623557806015015,
"logps/chosen": -639.0328369140625,
"logps/rejected": -1635.7354736328125,
"loss": 0.126,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.07408356666564941,
"rewards/margins": 0.34189721941947937,
"rewards/rejected": -0.41598081588745117,
"step": 1640
},
{
"epoch": 0.65,
"learning_rate": 1.6691130013008514e-06,
"logits/chosen": -1.421917200088501,
"logits/rejected": -0.19839780032634735,
"logps/chosen": -837.2825317382812,
"logps/rejected": -1678.8179931640625,
"loss": 0.1956,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.08556106686592102,
"rewards/margins": 0.2512792646884918,
"rewards/rejected": -0.33684033155441284,
"step": 1650
},
{
"epoch": 0.65,
"learning_rate": 1.6369150907339007e-06,
"logits/chosen": -1.195821762084961,
"logits/rejected": -0.20372645556926727,
"logps/chosen": -709.2095336914062,
"logps/rejected": -1652.1871337890625,
"loss": 0.1906,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.07075698673725128,
"rewards/margins": 0.289537250995636,
"rewards/rejected": -0.36029425263404846,
"step": 1660
},
{
"epoch": 0.65,
"learning_rate": 1.6048789066368858e-06,
"logits/chosen": -1.354961633682251,
"logits/rejected": -0.20124280452728271,
"logps/chosen": -728.2799072265625,
"logps/rejected": -1569.3551025390625,
"loss": 0.1916,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.08767645061016083,
"rewards/margins": 0.2705709636211395,
"rewards/rejected": -0.35824739933013916,
"step": 1670
},
{
"epoch": 0.66,
"learning_rate": 1.5730104520100984e-06,
"logits/chosen": -1.496524453163147,
"logits/rejected": -0.8575867414474487,
"logps/chosen": -612.16650390625,
"logps/rejected": -1632.1365966796875,
"loss": 0.1279,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.06737435609102249,
"rewards/margins": 0.3229941725730896,
"rewards/rejected": -0.3903685212135315,
"step": 1680
},
{
"epoch": 0.66,
"learning_rate": 1.5413156984243715e-06,
"logits/chosen": -1.3209052085876465,
"logits/rejected": -0.12577922642230988,
"logps/chosen": -759.8672485351562,
"logps/rejected": -1498.656494140625,
"loss": 0.1552,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.10295001417398453,
"rewards/margins": 0.21431489288806915,
"rewards/rejected": -0.3172649145126343,
"step": 1690
},
{
"epoch": 0.67,
"learning_rate": 1.509800584902108e-06,
"logits/chosen": -1.1863139867782593,
"logits/rejected": -0.08450505882501602,
"logps/chosen": -838.8494873046875,
"logps/rejected": -1536.5277099609375,
"loss": 0.167,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.12472305446863174,
"rewards/margins": 0.24718424677848816,
"rewards/rejected": -0.3719072937965393,
"step": 1700
},
{
"epoch": 0.67,
"learning_rate": 1.4784710168044215e-06,
"logits/chosen": -1.369985818862915,
"logits/rejected": -0.5248149037361145,
"logps/chosen": -883.6121826171875,
"logps/rejected": -1617.128662109375,
"loss": 0.1984,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.1338253617286682,
"rewards/margins": 0.2511526942253113,
"rewards/rejected": -0.3849780857563019,
"step": 1710
},
{
"epoch": 0.67,
"learning_rate": 1.4473328647245726e-06,
"logits/chosen": -1.624087929725647,
"logits/rejected": -0.42871198058128357,
"logps/chosen": -694.0233764648438,
"logps/rejected": -1572.922119140625,
"loss": 0.2198,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.1324300318956375,
"rewards/margins": 0.2687898874282837,
"rewards/rejected": -0.4012199342250824,
"step": 1720
},
{
"epoch": 0.68,
"learning_rate": 1.4163919633879325e-06,
"logits/chosen": -1.4249976873397827,
"logits/rejected": -0.46216440200805664,
"logps/chosen": -831.1329956054688,
"logps/rejected": -1623.590087890625,
"loss": 0.2073,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.09118635952472687,
"rewards/margins": 0.26622968912124634,
"rewards/rejected": -0.357416033744812,
"step": 1730
},
{
"epoch": 0.68,
"learning_rate": 1.3856541105586545e-06,
"logits/chosen": -1.5596380233764648,
"logits/rejected": -0.4608355462551117,
"logps/chosen": -826.0984497070312,
"logps/rejected": -1898.5416259765625,
"loss": 0.1421,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1307828575372696,
"rewards/margins": 0.34290483593940735,
"rewards/rejected": -0.47368764877319336,
"step": 1740
},
{
"epoch": 0.69,
"learning_rate": 1.3551250659532853e-06,
"logits/chosen": -1.492356300354004,
"logits/rejected": -0.7112780809402466,
"logps/chosen": -699.1672973632812,
"logps/rejected": -1537.228271484375,
"loss": 0.1776,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.06596329063177109,
"rewards/margins": 0.2567977011203766,
"rewards/rejected": -0.32276099920272827,
"step": 1750
},
{
"epoch": 0.69,
"learning_rate": 1.3248105501614897e-06,
"logits/chosen": -1.2990128993988037,
"logits/rejected": -0.7208808660507202,
"logps/chosen": -714.08544921875,
"logps/rejected": -1732.0875244140625,
"loss": 0.2147,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.05500803142786026,
"rewards/margins": 0.26819437742233276,
"rewards/rejected": -0.32320234179496765,
"step": 1760
},
{
"epoch": 0.69,
"learning_rate": 1.2947162435741278e-06,
"logits/chosen": -1.1586157083511353,
"logits/rejected": 0.03688998147845268,
"logps/chosen": -734.365966796875,
"logps/rejected": -1622.6265869140625,
"loss": 0.2471,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.1007305383682251,
"rewards/margins": 0.197604700922966,
"rewards/rejected": -0.2983352243900299,
"step": 1770
},
{
"epoch": 0.7,
"learning_rate": 1.2648477853188395e-06,
"logits/chosen": -1.412379503250122,
"logits/rejected": -0.5264952778816223,
"logps/chosen": -698.6842651367188,
"logps/rejected": -1511.8642578125,
"loss": 0.1865,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.042982958257198334,
"rewards/margins": 0.26162266731262207,
"rewards/rejected": -0.304605633020401,
"step": 1780
},
{
"epoch": 0.7,
"learning_rate": 1.2352107722033842e-06,
"logits/chosen": -1.2586696147918701,
"logits/rejected": -0.15170638263225555,
"logps/chosen": -653.3026123046875,
"logps/rejected": -1529.8104248046875,
"loss": 0.1549,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.0475340336561203,
"rewards/margins": 0.275061696767807,
"rewards/rejected": -0.3225957453250885,
"step": 1790
},
{
"epoch": 0.71,
"learning_rate": 1.205810757666894e-06,
"logits/chosen": -1.3673145771026611,
"logits/rejected": -0.4642263948917389,
"logps/chosen": -588.0513916015625,
"logps/rejected": -1447.431396484375,
"loss": 0.1613,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.07587876915931702,
"rewards/margins": 0.24527780711650848,
"rewards/rejected": -0.3211565613746643,
"step": 1800
},
{
"epoch": 0.71,
"learning_rate": 1.176653250739265e-06,
"logits/chosen": -1.4524450302124023,
"logits/rejected": -0.21896734833717346,
"logps/chosen": -831.2824096679688,
"logps/rejected": -1819.2064208984375,
"loss": 0.1362,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.09457580000162125,
"rewards/margins": 0.29631510376930237,
"rewards/rejected": -0.390890896320343,
"step": 1810
},
{
"epoch": 0.71,
"learning_rate": 1.1477437150088599e-06,
"logits/chosen": -1.112823247909546,
"logits/rejected": -0.731514573097229,
"logps/chosen": -659.6626586914062,
"logps/rejected": -1812.48828125,
"loss": 0.1304,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.05299247428774834,
"rewards/margins": 0.3928179442882538,
"rewards/rejected": -0.4458104074001312,
"step": 1820
},
{
"epoch": 0.72,
"learning_rate": 1.1190875675987355e-06,
"logits/chosen": -1.3094470500946045,
"logits/rejected": -0.5637291073799133,
"logps/chosen": -753.520263671875,
"logps/rejected": -1700.703857421875,
"loss": 0.1502,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.09490607678890228,
"rewards/margins": 0.31419411301612854,
"rewards/rejected": -0.409100204706192,
"step": 1830
},
{
"epoch": 0.72,
"learning_rate": 1.0906901781515695e-06,
"logits/chosen": -1.550244927406311,
"logits/rejected": -0.08849823474884033,
"logps/chosen": -724.5099487304688,
"logps/rejected": -1681.033447265625,
"loss": 0.1606,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.08625416457653046,
"rewards/margins": 0.31124037504196167,
"rewards/rejected": -0.39749449491500854,
"step": 1840
},
{
"epoch": 0.73,
"learning_rate": 1.0625568678234839e-06,
"logits/chosen": -1.0879476070404053,
"logits/rejected": -0.13099336624145508,
"logps/chosen": -671.8837280273438,
"logps/rejected": -1590.70068359375,
"loss": 0.1721,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.06376481801271439,
"rewards/margins": 0.2916422486305237,
"rewards/rejected": -0.35540705919265747,
"step": 1850
},
{
"epoch": 0.73,
"learning_rate": 1.034692908286964e-06,
"logits/chosen": -1.3455946445465088,
"logits/rejected": -0.2840282917022705,
"logps/chosen": -611.4814453125,
"logps/rejected": -1663.345703125,
"loss": 0.2039,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.07838527858257294,
"rewards/margins": 0.31080400943756104,
"rewards/rejected": -0.3891892731189728,
"step": 1860
},
{
"epoch": 0.73,
"learning_rate": 1.0071035207430352e-06,
"logits/chosen": -1.2556473016738892,
"logits/rejected": -0.011271673254668713,
"logps/chosen": -753.8445434570312,
"logps/rejected": -1636.611083984375,
"loss": 0.2112,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.11006224155426025,
"rewards/margins": 0.24791212379932404,
"rewards/rejected": -0.3579743504524231,
"step": 1870
},
{
"epoch": 0.74,
"learning_rate": 9.797938749429088e-07,
"logits/chosen": -1.2267249822616577,
"logits/rejected": -0.35565489530563354,
"logps/chosen": -690.4405517578125,
"logps/rejected": -1600.0665283203125,
"loss": 0.1862,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.11180742084980011,
"rewards/margins": 0.24852195382118225,
"rewards/rejected": -0.36032935976982117,
"step": 1880
},
{
"epoch": 0.74,
"learning_rate": 9.527690882192636e-07,
"logits/chosen": -1.2072794437408447,
"logits/rejected": 0.457929790019989,
"logps/chosen": -697.0407104492188,
"logps/rejected": -1490.8367919921875,
"loss": 0.1672,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.08405301719903946,
"rewards/margins": 0.30805063247680664,
"rewards/rejected": -0.3921036422252655,
"step": 1890
},
{
"epoch": 0.75,
"learning_rate": 9.260342245273507e-07,
"logits/chosen": -1.3990890979766846,
"logits/rejected": -0.6794065237045288,
"logps/chosen": -618.4937744140625,
"logps/rejected": -1800.4622802734375,
"loss": 0.1376,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.07932893931865692,
"rewards/margins": 0.37783947587013245,
"rewards/rejected": -0.4571684002876282,
"step": 1900
},
{
"epoch": 0.75,
"learning_rate": 8.995942934960964e-07,
"logits/chosen": -1.4945213794708252,
"logits/rejected": -0.18756787478923798,
"logps/chosen": -803.509521484375,
"logps/rejected": -1819.3349609375,
"loss": 0.1544,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.09666319191455841,
"rewards/margins": 0.36414963006973267,
"rewards/rejected": -0.4608128070831299,
"step": 1910
},
{
"epoch": 0.75,
"learning_rate": 8.734542494893955e-07,
"logits/chosen": -1.431398868560791,
"logits/rejected": -0.4752410352230072,
"logps/chosen": -792.5185546875,
"logps/rejected": -1632.630126953125,
"loss": 0.2053,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.09532758593559265,
"rewards/margins": 0.30739787220954895,
"rewards/rejected": -0.4027254581451416,
"step": 1920
},
{
"epoch": 0.76,
"learning_rate": 8.476189906777457e-07,
"logits/chosen": -1.3982821702957153,
"logits/rejected": -0.08427709341049194,
"logps/chosen": -703.8153076171875,
"logps/rejected": -1600.6046142578125,
"loss": 0.1632,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.08243191242218018,
"rewards/margins": 0.2651790678501129,
"rewards/rejected": -0.3476109802722931,
"step": 1930
},
{
"epoch": 0.76,
"learning_rate": 8.220933581204257e-07,
"logits/chosen": -1.2576748132705688,
"logits/rejected": 0.40268439054489136,
"logps/chosen": -528.5084228515625,
"logps/rejected": -1385.6802978515625,
"loss": 0.1183,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.042114533483982086,
"rewards/margins": 0.2899821698665619,
"rewards/rejected": -0.3320966958999634,
"step": 1940
},
{
"epoch": 0.76,
"learning_rate": 7.968821348583644e-07,
"logits/chosen": -1.3039714097976685,
"logits/rejected": -0.34471797943115234,
"logps/chosen": -695.2639770507812,
"logps/rejected": -1490.329345703125,
"loss": 0.1969,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.08580182492733002,
"rewards/margins": 0.252492755651474,
"rewards/rejected": -0.3382945656776428,
"step": 1950
},
{
"epoch": 0.77,
"learning_rate": 7.719900450178882e-07,
"logits/chosen": -1.2936707735061646,
"logits/rejected": 0.12274640798568726,
"logps/chosen": -856.3453979492188,
"logps/rejected": -1843.365966796875,
"loss": 0.1424,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.11615820974111557,
"rewards/margins": 0.33368679881095886,
"rewards/rejected": -0.44984501600265503,
"step": 1960
},
{
"epoch": 0.77,
"learning_rate": 7.474217529255018e-07,
"logits/chosen": -1.611425757408142,
"logits/rejected": -0.11960859596729279,
"logps/chosen": -636.3781127929688,
"logps/rejected": -1416.641357421875,
"loss": 0.1836,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.03691656142473221,
"rewards/margins": 0.26324373483657837,
"rewards/rejected": -0.30016031861305237,
"step": 1970
},
{
"epoch": 0.78,
"learning_rate": 7.231818622338824e-07,
"logits/chosen": -1.616742730140686,
"logits/rejected": -0.024957846850156784,
"logps/chosen": -676.9722900390625,
"logps/rejected": -1823.395751953125,
"loss": 0.1225,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.06905169785022736,
"rewards/margins": 0.3410964906215668,
"rewards/rejected": -0.4101482033729553,
"step": 1980
},
{
"epoch": 0.78,
"learning_rate": 6.992749150592343e-07,
"logits/chosen": -1.2690980434417725,
"logits/rejected": -0.1918954849243164,
"logps/chosen": -866.05029296875,
"logps/rejected": -1606.396240234375,
"loss": 0.1865,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.102320097386837,
"rewards/margins": 0.27621665596961975,
"rewards/rejected": -0.37853676080703735,
"step": 1990
},
{
"epoch": 0.78,
"learning_rate": 6.75705391130183e-07,
"logits/chosen": -1.2711069583892822,
"logits/rejected": -0.00027151108952239156,
"logps/chosen": -804.7188720703125,
"logps/rejected": -1668.5374755859375,
"loss": 0.166,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.07086384296417236,
"rewards/margins": 0.2957269251346588,
"rewards/rejected": -0.3665907680988312,
"step": 2000
},
{
"epoch": 0.79,
"learning_rate": 6.524777069483526e-07,
"logits/chosen": -1.225556492805481,
"logits/rejected": 0.41769227385520935,
"logps/chosen": -634.6071166992188,
"logps/rejected": -1566.803466796875,
"loss": 0.1916,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.020447371527552605,
"rewards/margins": 0.31025153398513794,
"rewards/rejected": -0.3306989073753357,
"step": 2010
},
{
"epoch": 0.79,
"learning_rate": 6.29596214960792e-07,
"logits/chosen": -1.3543643951416016,
"logits/rejected": -0.1612066775560379,
"logps/chosen": -731.138671875,
"logps/rejected": -1658.898193359375,
"loss": 0.1954,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.078438401222229,
"rewards/margins": 0.2861797511577606,
"rewards/rejected": -0.36461812257766724,
"step": 2020
},
{
"epoch": 0.8,
"learning_rate": 6.070652027444102e-07,
"logits/chosen": -1.5058627128601074,
"logits/rejected": -0.940344512462616,
"logps/chosen": -629.819580078125,
"logps/rejected": -1781.6654052734375,
"loss": 0.1992,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.061135418713092804,
"rewards/margins": 0.3336263597011566,
"rewards/rejected": -0.39476174116134644,
"step": 2030
},
{
"epoch": 0.8,
"learning_rate": 5.848888922025553e-07,
"logits/chosen": -1.524287462234497,
"logits/rejected": -0.8633726239204407,
"logps/chosen": -602.5631103515625,
"logps/rejected": -1604.6434326171875,
"loss": 0.1721,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.05612843483686447,
"rewards/margins": 0.33471354842185974,
"rewards/rejected": -0.3908420205116272,
"step": 2040
},
{
"epoch": 0.8,
"learning_rate": 5.63071438773913e-07,
"logits/chosen": -1.4894258975982666,
"logits/rejected": -0.14880971610546112,
"logps/chosen": -642.1497802734375,
"logps/rejected": -1459.4459228515625,
"loss": 0.2064,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.0682179257273674,
"rewards/margins": 0.21762903034687042,
"rewards/rejected": -0.2858469486236572,
"step": 2050
},
{
"epoch": 0.81,
"learning_rate": 5.416169306538485e-07,
"logits/chosen": -1.3140041828155518,
"logits/rejected": 0.3596586287021637,
"logps/chosen": -820.9474487304688,
"logps/rejected": -1682.409912109375,
"loss": 0.2355,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.09827397763729095,
"rewards/margins": 0.281690388917923,
"rewards/rejected": -0.3799643814563751,
"step": 2060
},
{
"epoch": 0.81,
"learning_rate": 5.205293880283552e-07,
"logits/chosen": -1.5573115348815918,
"logits/rejected": -0.13623039424419403,
"logps/chosen": -671.4677124023438,
"logps/rejected": -1707.608642578125,
"loss": 0.1752,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.05831771343946457,
"rewards/margins": 0.3186204433441162,
"rewards/rejected": -0.37693825364112854,
"step": 2070
},
{
"epoch": 0.82,
"learning_rate": 4.998127623207404e-07,
"logits/chosen": -1.2270171642303467,
"logits/rejected": -0.16427640616893768,
"logps/chosen": -636.1573486328125,
"logps/rejected": -1320.9652099609375,
"loss": 0.1501,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.04943504184484482,
"rewards/margins": 0.23794174194335938,
"rewards/rejected": -0.2873767912387848,
"step": 2080
},
{
"epoch": 0.82,
"learning_rate": 4.794709354512073e-07,
"logits/chosen": -1.4142221212387085,
"logits/rejected": -0.6630762219429016,
"logps/chosen": -694.4979858398438,
"logps/rejected": -1861.2236328125,
"loss": 0.1027,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.0681043490767479,
"rewards/margins": 0.33745378255844116,
"rewards/rejected": -0.40555816888809204,
"step": 2090
},
{
"epoch": 0.82,
"learning_rate": 4.5950771910944603e-07,
"logits/chosen": -1.386041522026062,
"logits/rejected": -0.4771800637245178,
"logps/chosen": -552.6729736328125,
"logps/rejected": -1493.256103515625,
"loss": 0.1758,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.06818665564060211,
"rewards/margins": 0.2598266899585724,
"rewards/rejected": -0.3280133306980133,
"step": 2100
},
{
"epoch": 0.83,
"learning_rate": 4.399268540403975e-07,
"logits/chosen": -1.6429307460784912,
"logits/rejected": -0.7215126752853394,
"logps/chosen": -692.6094970703125,
"logps/rejected": -1617.4793701171875,
"loss": 0.1561,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.04670856520533562,
"rewards/margins": 0.3174007534980774,
"rewards/rejected": -0.3641093373298645,
"step": 2110
},
{
"epoch": 0.83,
"learning_rate": 4.2073200934330316e-07,
"logits/chosen": -1.318565011024475,
"logits/rejected": 0.31595462560653687,
"logps/chosen": -688.9269409179688,
"logps/rejected": -1576.12939453125,
"loss": 0.1494,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.05915086343884468,
"rewards/margins": 0.2843713164329529,
"rewards/rejected": -0.34352222084999084,
"step": 2120
},
{
"epoch": 0.84,
"learning_rate": 4.019267817841835e-07,
"logits/chosen": -1.4014190435409546,
"logits/rejected": 0.06803856045007706,
"logps/chosen": -661.041015625,
"logps/rejected": -1782.5843505859375,
"loss": 0.1339,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.060147546231746674,
"rewards/margins": 0.367543488740921,
"rewards/rejected": -0.4276910424232483,
"step": 2130
},
{
"epoch": 0.84,
"learning_rate": 3.8351469512186656e-07,
"logits/chosen": -1.293666124343872,
"logits/rejected": 0.01516579370945692,
"logps/chosen": -703.8981323242188,
"logps/rejected": -1585.350830078125,
"loss": 0.2612,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.07257186621427536,
"rewards/margins": 0.24214370548725128,
"rewards/rejected": -0.31471556425094604,
"step": 2140
},
{
"epoch": 0.84,
"learning_rate": 3.654991994477039e-07,
"logits/chosen": -1.4482967853546143,
"logits/rejected": -0.5136088132858276,
"logps/chosen": -739.101318359375,
"logps/rejected": -1636.627685546875,
"loss": 0.2446,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.08362185955047607,
"rewards/margins": 0.2510630488395691,
"rewards/rejected": -0.33468490839004517,
"step": 2150
},
{
"epoch": 0.85,
"learning_rate": 3.4788367053908087e-07,
"logits/chosen": -1.464727520942688,
"logits/rejected": -0.6895856261253357,
"logps/chosen": -649.2825927734375,
"logps/rejected": -1706.675048828125,
"loss": 0.1222,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.0619744174182415,
"rewards/margins": 0.3182070851325989,
"rewards/rejected": -0.3801814913749695,
"step": 2160
},
{
"epoch": 0.85,
"learning_rate": 3.3067140922686175e-07,
"logits/chosen": -1.2893580198287964,
"logits/rejected": -0.02746570110321045,
"logps/chosen": -637.2613525390625,
"logps/rejected": -1635.1025390625,
"loss": 0.1475,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.06607834994792938,
"rewards/margins": 0.306417852640152,
"rewards/rejected": -0.37249621748924255,
"step": 2170
},
{
"epoch": 0.85,
"learning_rate": 3.1386564077687115e-07,
"logits/chosen": -1.2429146766662598,
"logits/rejected": -0.5083945989608765,
"logps/chosen": -689.4899291992188,
"logps/rejected": -1385.622802734375,
"loss": 0.2019,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.08055596053600311,
"rewards/margins": 0.19417758285999298,
"rewards/rejected": -0.2747335135936737,
"step": 2180
},
{
"epoch": 0.86,
"learning_rate": 2.9746951428553884e-07,
"logits/chosen": -1.2200576066970825,
"logits/rejected": 0.4126719534397125,
"logps/chosen": -697.4050903320312,
"logps/rejected": -1761.609375,
"loss": 0.1621,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.057256706058979034,
"rewards/margins": 0.3544352650642395,
"rewards/rejected": -0.41169196367263794,
"step": 2190
},
{
"epoch": 0.86,
"learning_rate": 2.814861020898146e-07,
"logits/chosen": -1.5707600116729736,
"logits/rejected": -0.5523526668548584,
"logps/chosen": -807.9168090820312,
"logps/rejected": -1893.062255859375,
"loss": 0.12,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.036843474954366684,
"rewards/margins": 0.38658010959625244,
"rewards/rejected": -0.4234235882759094,
"step": 2200
},
{
"epoch": 0.87,
"learning_rate": 2.6591839919146963e-07,
"logits/chosen": -1.3549106121063232,
"logits/rejected": -0.0543874129652977,
"logps/chosen": -659.9330444335938,
"logps/rejected": -1517.608642578125,
"loss": 0.1865,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.08612764626741409,
"rewards/margins": 0.2554120123386383,
"rewards/rejected": -0.3415396809577942,
"step": 2210
},
{
"epoch": 0.87,
"learning_rate": 2.507693226958871e-07,
"logits/chosen": -1.5055897235870361,
"logits/rejected": -0.7960633635520935,
"logps/chosen": -594.6507568359375,
"logps/rejected": -1544.100341796875,
"loss": 0.1835,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.05378856509923935,
"rewards/margins": 0.26992538571357727,
"rewards/rejected": -0.3237139582633972,
"step": 2220
},
{
"epoch": 0.87,
"learning_rate": 2.360417112654481e-07,
"logits/chosen": -1.3403241634368896,
"logits/rejected": -0.036334630101919174,
"logps/chosen": -747.6497802734375,
"logps/rejected": -1497.166748046875,
"loss": 0.2369,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.09375442564487457,
"rewards/margins": 0.2080894410610199,
"rewards/rejected": -0.30184388160705566,
"step": 2230
},
{
"epoch": 0.88,
"learning_rate": 2.2173832458762146e-07,
"logits/chosen": -1.3305310010910034,
"logits/rejected": 0.5647405385971069,
"logps/chosen": -708.2887573242188,
"logps/rejected": -1672.27734375,
"loss": 0.1525,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.0794595405459404,
"rewards/margins": 0.27278995513916016,
"rewards/rejected": -0.35224950313568115,
"step": 2240
},
{
"epoch": 0.88,
"learning_rate": 2.07861842857843e-07,
"logits/chosen": -1.3758533000946045,
"logits/rejected": -0.3346394896507263,
"logps/chosen": -641.7481689453125,
"logps/rejected": -1659.6363525390625,
"loss": 0.1304,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.05183352157473564,
"rewards/margins": 0.3061942458152771,
"rewards/rejected": -0.35802772641181946,
"step": 2250
},
{
"epoch": 0.89,
"learning_rate": 1.9441486627729987e-07,
"logits/chosen": -1.2939542531967163,
"logits/rejected": -0.2226782590150833,
"logps/chosen": -574.517822265625,
"logps/rejected": -1345.4554443359375,
"loss": 0.2427,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.044371530413627625,
"rewards/margins": 0.25636622309684753,
"rewards/rejected": -0.30073776841163635,
"step": 2260
},
{
"epoch": 0.89,
"learning_rate": 1.8139991456569694e-07,
"logits/chosen": -1.5377174615859985,
"logits/rejected": -0.4445236623287201,
"logps/chosen": -666.4637451171875,
"logps/rejected": -1826.706298828125,
"loss": 0.1407,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.04511731117963791,
"rewards/margins": 0.3437032103538513,
"rewards/rejected": -0.3888205587863922,
"step": 2270
},
{
"epoch": 0.89,
"learning_rate": 1.6881942648911077e-07,
"logits/chosen": -1.1588428020477295,
"logits/rejected": -0.33162426948547363,
"logps/chosen": -692.0267333984375,
"logps/rejected": -1666.332763671875,
"loss": 0.1417,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.08260687440633774,
"rewards/margins": 0.27366960048675537,
"rewards/rejected": -0.3562764525413513,
"step": 2280
},
{
"epoch": 0.9,
"learning_rate": 1.5667575940300384e-07,
"logits/chosen": -1.2564775943756104,
"logits/rejected": 0.01690312661230564,
"logps/chosen": -673.8323974609375,
"logps/rejected": -1666.777099609375,
"loss": 0.1716,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.0681915134191513,
"rewards/margins": 0.31038275361061096,
"rewards/rejected": -0.3785742521286011,
"step": 2290
},
{
"epoch": 0.9,
"learning_rate": 1.449711888105046e-07,
"logits/chosen": -1.518640160560608,
"logits/rejected": -0.6442452669143677,
"logps/chosen": -570.983154296875,
"logps/rejected": -1282.307373046875,
"loss": 0.2478,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.0646091103553772,
"rewards/margins": 0.1985008716583252,
"rewards/rejected": -0.2631099820137024,
"step": 2300
},
{
"epoch": 0.91,
"learning_rate": 1.3370790793601373e-07,
"logits/chosen": -1.3143935203552246,
"logits/rejected": -0.862291157245636,
"logps/chosen": -554.1536254882812,
"logps/rejected": -1571.299560546875,
"loss": 0.1953,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.04557369276881218,
"rewards/margins": 0.30963796377182007,
"rewards/rejected": -0.35521167516708374,
"step": 2310
},
{
"epoch": 0.91,
"learning_rate": 1.2288802731423882e-07,
"logits/chosen": -1.0400464534759521,
"logits/rejected": -0.2425573766231537,
"logps/chosen": -563.1722412109375,
"logps/rejected": -1669.356201171875,
"loss": 0.1733,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.032249629497528076,
"rewards/margins": 0.35338449478149414,
"rewards/rejected": -0.38563409447669983,
"step": 2320
},
{
"epoch": 0.91,
"learning_rate": 1.125135743947145e-07,
"logits/chosen": -1.392665982246399,
"logits/rejected": -0.2731800079345703,
"logps/chosen": -636.9364013671875,
"logps/rejected": -1649.939453125,
"loss": 0.1732,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.05704592913389206,
"rewards/margins": 0.2943916916847229,
"rewards/rejected": -0.35143759846687317,
"step": 2330
},
{
"epoch": 0.92,
"learning_rate": 1.0258649316189722e-07,
"logits/chosen": -1.448233723640442,
"logits/rejected": -0.09116245806217194,
"logps/chosen": -595.1334838867188,
"logps/rejected": -1466.4798583984375,
"loss": 0.1563,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.029435504227876663,
"rewards/margins": 0.2879069745540619,
"rewards/rejected": -0.31734246015548706,
"step": 2340
},
{
"epoch": 0.92,
"learning_rate": 9.310864377089696e-08,
"logits/chosen": -1.298662543296814,
"logits/rejected": 0.7022291421890259,
"logps/chosen": -692.7830810546875,
"logps/rejected": -1592.398193359375,
"loss": 0.1976,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.06722499430179596,
"rewards/margins": 0.26029545068740845,
"rewards/rejected": -0.327520489692688,
"step": 2350
},
{
"epoch": 0.93,
"learning_rate": 8.408180219891899e-08,
"logits/chosen": -1.0684707164764404,
"logits/rejected": -0.727800726890564,
"logps/chosen": -588.0787353515625,
"logps/rejected": -1701.8707275390625,
"loss": 0.1205,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.026762153953313828,
"rewards/margins": 0.3723045885562897,
"rewards/rejected": -0.3990667462348938,
"step": 2360
},
{
"epoch": 0.93,
"learning_rate": 7.550765991247655e-08,
"logits/chosen": -1.3413885831832886,
"logits/rejected": -0.5428125262260437,
"logps/chosen": -576.0411376953125,
"logps/rejected": -1852.245361328125,
"loss": 0.1253,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.046153269708156586,
"rewards/margins": 0.3955245614051819,
"rewards/rejected": -0.44167786836624146,
"step": 2370
},
{
"epoch": 0.93,
"learning_rate": 6.738782355044048e-08,
"logits/chosen": -1.3843357563018799,
"logits/rejected": -0.3473323881626129,
"logps/chosen": -653.3553466796875,
"logps/rejected": -1551.5245361328125,
"loss": 0.2134,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.07392759621143341,
"rewards/margins": 0.27120834589004517,
"rewards/rejected": -0.3451359272003174,
"step": 2380
},
{
"epoch": 0.94,
"learning_rate": 5.972381462298643e-08,
"logits/chosen": -1.446597695350647,
"logits/rejected": -0.7707004547119141,
"logps/chosen": -587.423095703125,
"logps/rejected": -1533.5751953125,
"loss": 0.1378,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.048751670867204666,
"rewards/margins": 0.2718280255794525,
"rewards/rejected": -0.3205797076225281,
"step": 2390
},
{
"epoch": 0.94,
"learning_rate": 5.2517069226488694e-08,
"logits/chosen": -1.3285058736801147,
"logits/rejected": 0.6017956733703613,
"logps/chosen": -635.4934692382812,
"logps/rejected": -1658.809326171875,
"loss": 0.1218,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.054670076817274094,
"rewards/margins": 0.33853715658187866,
"rewards/rejected": -0.39320722222328186,
"step": 2400
},
{
"epoch": 0.95,
"learning_rate": 4.576893777442415e-08,
"logits/chosen": -1.455540418624878,
"logits/rejected": -0.42580240964889526,
"logps/chosen": -567.0203857421875,
"logps/rejected": -1439.871826171875,
"loss": 0.1791,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.04314614459872246,
"rewards/margins": 0.26481324434280396,
"rewards/rejected": -0.3079594075679779,
"step": 2410
},
{
"epoch": 0.95,
"learning_rate": 3.9480684744327145e-08,
"logits/chosen": -0.8030007481575012,
"logits/rejected": -0.6547081470489502,
"logps/chosen": -714.3633422851562,
"logps/rejected": -1776.2916259765625,
"loss": 0.135,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.07487257570028305,
"rewards/margins": 0.3637450039386749,
"rewards/rejected": -0.4386175274848938,
"step": 2420
},
{
"epoch": 0.95,
"learning_rate": 3.3653488440851255e-08,
"logits/chosen": -1.4338642358779907,
"logits/rejected": -0.2775370478630066,
"logps/chosen": -522.8925170898438,
"logps/rejected": -1403.964599609375,
"loss": 0.1429,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.028084104880690575,
"rewards/margins": 0.2945864796638489,
"rewards/rejected": -0.322670578956604,
"step": 2430
},
{
"epoch": 0.96,
"learning_rate": 2.82884407749745e-08,
"logits/chosen": -1.548905611038208,
"logits/rejected": -0.1580895483493805,
"logps/chosen": -721.1185913085938,
"logps/rejected": -1815.273681640625,
"loss": 0.168,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.058462172746658325,
"rewards/margins": 0.334224134683609,
"rewards/rejected": -0.39268630743026733,
"step": 2440
},
{
"epoch": 0.96,
"learning_rate": 2.3386547059396634e-08,
"logits/chosen": -1.3936518430709839,
"logits/rejected": -0.42037662863731384,
"logps/chosen": -727.2462768554688,
"logps/rejected": -1849.8060302734375,
"loss": 0.1504,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.053724952042102814,
"rewards/margins": 0.34565088152885437,
"rewards/rejected": -0.3993757963180542,
"step": 2450
},
{
"epoch": 0.96,
"learning_rate": 1.8948725820160663e-08,
"logits/chosen": -1.5373561382293701,
"logits/rejected": -0.5124548673629761,
"logps/chosen": -707.9256591796875,
"logps/rejected": -1602.8634033203125,
"loss": 0.152,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.06985752284526825,
"rewards/margins": 0.3106308579444885,
"rewards/rejected": -0.3804883658885956,
"step": 2460
},
{
"epoch": 0.97,
"learning_rate": 1.497580862453829e-08,
"logits/chosen": -1.3587336540222168,
"logits/rejected": 0.12183968722820282,
"logps/chosen": -682.3876342773438,
"logps/rejected": -1501.242431640625,
"loss": 0.179,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.07504203170537949,
"rewards/margins": 0.25899559259414673,
"rewards/rejected": -0.3340376317501068,
"step": 2470
},
{
"epoch": 0.97,
"learning_rate": 1.14685399252093e-08,
"logits/chosen": -1.2843676805496216,
"logits/rejected": -0.37819939851760864,
"logps/chosen": -639.9012451171875,
"logps/rejected": -1667.867431640625,
"loss": 0.1334,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.05129992961883545,
"rewards/margins": 0.30902066826820374,
"rewards/rejected": -0.3603206276893616,
"step": 2480
},
{
"epoch": 0.98,
"learning_rate": 8.427576920763957e-09,
"logits/chosen": -1.2090356349945068,
"logits/rejected": -0.08205322176218033,
"logps/chosen": -759.0253295898438,
"logps/rejected": -1694.987060546875,
"loss": 0.2816,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.13451895117759705,
"rewards/margins": 0.25275808572769165,
"rewards/rejected": -0.3872770071029663,
"step": 2490
},
{
"epoch": 0.98,
"learning_rate": 5.853489432556536e-09,
"logits/chosen": -1.5106983184814453,
"logits/rejected": -0.8639631271362305,
"logps/chosen": -654.6641845703125,
"logps/rejected": -1713.3929443359375,
"loss": 0.1831,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.06343318521976471,
"rewards/margins": 0.31247463822364807,
"rewards/rejected": -0.3759078085422516,
"step": 2500
},
{
"epoch": 0.98,
"learning_rate": 3.746759797931265e-09,
"logits/chosen": -1.4619848728179932,
"logits/rejected": 0.3511095643043518,
"logps/chosen": -736.3690795898438,
"logps/rejected": -1626.9466552734375,
"loss": 0.1604,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.06985476613044739,
"rewards/margins": 0.2860848307609558,
"rewards/rejected": -0.3559395968914032,
"step": 2510
},
{
"epoch": 0.99,
"learning_rate": 2.1077827798404728e-09,
"logits/chosen": -1.3730641603469849,
"logits/rejected": -0.6747050881385803,
"logps/chosen": -546.4849853515625,
"logps/rejected": -1590.315673828125,
"loss": 0.1671,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.036424748599529266,
"rewards/margins": 0.34394755959510803,
"rewards/rejected": -0.3803723454475403,
"step": 2520
},
{
"epoch": 0.99,
"learning_rate": 9.368654928731958e-10,
"logits/chosen": -1.3955776691436768,
"logits/rejected": -0.6365998983383179,
"logps/chosen": -608.5187377929688,
"logps/rejected": -1592.2816162109375,
"loss": 0.1843,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.08722618967294693,
"rewards/margins": 0.30947092175483704,
"rewards/rejected": -0.39669710397720337,
"step": 2530
},
{
"epoch": 1.0,
"learning_rate": 2.3422734570816006e-10,
"logits/chosen": -1.4981211423873901,
"logits/rejected": -0.8375118374824524,
"logps/chosen": -656.091796875,
"logps/rejected": -1573.434814453125,
"loss": 0.1836,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.05932006239891052,
"rewards/margins": 0.28673693537712097,
"rewards/rejected": -0.3460569679737091,
"step": 2540
},
{
"epoch": 1.0,
"learning_rate": 0.0,
"logits/chosen": -1.4274616241455078,
"logits/rejected": 0.46426883339881897,
"logps/chosen": -776.08154296875,
"logps/rejected": -1525.145751953125,
"loss": 0.1924,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.08221141993999481,
"rewards/margins": 0.24572968482971191,
"rewards/rejected": -0.32794108986854553,
"step": 2550
},
{
"epoch": 1.0,
"step": 2550,
"total_flos": 0.0,
"train_loss": 0.19951762257837782,
"train_runtime": 10798.5669,
"train_samples_per_second": 0.945,
"train_steps_per_second": 0.236
}
],
"logging_steps": 10,
"max_steps": 2550,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}