zephyr-ds / trainer_state.json
jikaixuan's picture
Model save
bbea48f verified
raw
history blame
53.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.208333333333334e-07,
"logits/chosen": -2.980285167694092,
"logits/rejected": -2.87275767326355,
"logps/chosen": -313.4390563964844,
"logps/rejected": -236.1754150390625,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 10.0
},
{
"epoch": 0.01,
"learning_rate": 5.208333333333334e-06,
"logits/chosen": -2.861464262008667,
"logits/rejected": -2.907951593399048,
"logps/chosen": -323.6517333984375,
"logps/rejected": -284.9451904296875,
"loss": 0.6921,
"pred_label": 0.0,
"rewards/accuracies": 0.4375,
"rewards/chosen": 0.0027037172112613916,
"rewards/margins": 0.001292458618991077,
"rewards/rejected": 0.0014112575445324183,
"step": 10,
"use_label": 90.0
},
{
"epoch": 0.02,
"learning_rate": 1.0416666666666668e-05,
"logits/chosen": -2.7527613639831543,
"logits/rejected": -2.796025037765503,
"logps/chosen": -236.6191864013672,
"logps/rejected": -242.22232055664062,
"loss": 0.6831,
"pred_label": 0.0,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.007748906500637531,
"rewards/margins": 0.018490687012672424,
"rewards/rejected": -0.010741781443357468,
"step": 20,
"use_label": 242.0
},
{
"epoch": 0.03,
"learning_rate": 1.5625e-05,
"logits/chosen": -2.8575313091278076,
"logits/rejected": -2.829209804534912,
"logps/chosen": -278.3554992675781,
"logps/rejected": -252.61123657226562,
"loss": 0.6574,
"pred_label": 0.0,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.026403894647955894,
"rewards/margins": 0.09085250645875931,
"rewards/rejected": -0.06444860994815826,
"step": 30,
"use_label": 402.0
},
{
"epoch": 0.04,
"learning_rate": 2.0833333333333336e-05,
"logits/chosen": -2.840946674346924,
"logits/rejected": -2.8493659496307373,
"logps/chosen": -281.32928466796875,
"logps/rejected": -277.8607482910156,
"loss": 0.6339,
"pred_label": 0.4749999940395355,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.02641097828745842,
"rewards/margins": 0.2079576551914215,
"rewards/rejected": -0.1815466731786728,
"step": 40,
"use_label": 561.5250244140625
},
{
"epoch": 0.05,
"learning_rate": 2.604166666666667e-05,
"logits/chosen": -2.8537254333496094,
"logits/rejected": -2.8391127586364746,
"logps/chosen": -266.79296875,
"logps/rejected": -262.0001220703125,
"loss": 0.5836,
"pred_label": 5.775000095367432,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.06846104562282562,
"rewards/margins": 0.33990827202796936,
"rewards/rejected": -0.4083693027496338,
"step": 50,
"use_label": 716.2249755859375
},
{
"epoch": 0.06,
"learning_rate": 3.125e-05,
"logits/chosen": -2.8152918815612793,
"logits/rejected": -2.804291009902954,
"logps/chosen": -301.41326904296875,
"logps/rejected": -291.53997802734375,
"loss": 0.5613,
"pred_label": 28.600000381469727,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.09980294108390808,
"rewards/margins": 0.4436502456665039,
"rewards/rejected": -0.5434532165527344,
"step": 60,
"use_label": 853.4000244140625
},
{
"epoch": 0.07,
"learning_rate": 3.6458333333333336e-05,
"logits/chosen": -2.8159656524658203,
"logits/rejected": -2.807382345199585,
"logps/chosen": -295.85113525390625,
"logps/rejected": -281.4297180175781,
"loss": 0.4736,
"pred_label": 72.82499694824219,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.15376296639442444,
"rewards/margins": 0.6926594972610474,
"rewards/rejected": -0.8464224934577942,
"step": 70,
"use_label": 969.1749877929688
},
{
"epoch": 0.08,
"learning_rate": 4.166666666666667e-05,
"logits/chosen": -2.760671377182007,
"logits/rejected": -2.745089292526245,
"logps/chosen": -309.682861328125,
"logps/rejected": -294.1726989746094,
"loss": 0.3682,
"pred_label": 129.4499969482422,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.4377492070198059,
"rewards/margins": 1.0782606601715088,
"rewards/rejected": -1.516010046005249,
"step": 80,
"use_label": 1072.550048828125
},
{
"epoch": 0.09,
"learning_rate": 4.6875e-05,
"logits/chosen": -2.689037799835205,
"logits/rejected": -2.7456631660461426,
"logps/chosen": -298.6680603027344,
"logps/rejected": -281.3171081542969,
"loss": 0.3626,
"pred_label": 213.02499389648438,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.28849169611930847,
"rewards/margins": 1.2157728672027588,
"rewards/rejected": -1.5042643547058105,
"step": 90,
"use_label": 1148.9749755859375
},
{
"epoch": 0.1,
"learning_rate": 4.976717112922003e-05,
"logits/chosen": -2.722339153289795,
"logits/rejected": -2.718428611755371,
"logps/chosen": -287.2553405761719,
"logps/rejected": -325.00335693359375,
"loss": 0.3168,
"pred_label": 303.125,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.3123645782470703,
"rewards/margins": 1.8343286514282227,
"rewards/rejected": -3.146693468093872,
"step": 100,
"use_label": 1218.875
},
{
"epoch": 0.12,
"learning_rate": 4.918509895227008e-05,
"logits/chosen": -2.694249391555786,
"logits/rejected": -2.633723497390747,
"logps/chosen": -288.16387939453125,
"logps/rejected": -293.7809143066406,
"loss": 0.2607,
"pred_label": 402.625,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -2.3489272594451904,
"rewards/margins": 2.1118221282958984,
"rewards/rejected": -4.46074914932251,
"step": 110,
"use_label": 1279.375
},
{
"epoch": 0.13,
"learning_rate": 4.860302677532014e-05,
"logits/chosen": -2.718721866607666,
"logits/rejected": -2.699587345123291,
"logps/chosen": -292.71112060546875,
"logps/rejected": -279.4311218261719,
"loss": 0.2879,
"pred_label": 507.5,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -1.3198258876800537,
"rewards/margins": 1.854914903640747,
"rewards/rejected": -3.1747405529022217,
"step": 120,
"use_label": 1334.5
},
{
"epoch": 0.14,
"learning_rate": 4.80209545983702e-05,
"logits/chosen": -2.7755086421966553,
"logits/rejected": -2.7087435722351074,
"logps/chosen": -329.43267822265625,
"logps/rejected": -308.2383728027344,
"loss": 0.2811,
"pred_label": 611.7249755859375,
"rewards/accuracies": 0.65625,
"rewards/chosen": -2.216526508331299,
"rewards/margins": 1.2961757183074951,
"rewards/rejected": -3.512702226638794,
"step": 130,
"use_label": 1390.2750244140625
},
{
"epoch": 0.15,
"learning_rate": 4.743888242142026e-05,
"logits/chosen": -2.6767191886901855,
"logits/rejected": -2.643078327178955,
"logps/chosen": -318.53924560546875,
"logps/rejected": -322.80078125,
"loss": 0.1985,
"pred_label": 719.9749755859375,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -6.043245792388916,
"rewards/margins": 2.734940767288208,
"rewards/rejected": -8.778186798095703,
"step": 140,
"use_label": 1442.0250244140625
},
{
"epoch": 0.16,
"learning_rate": 4.685681024447032e-05,
"logits/chosen": -2.008868932723999,
"logits/rejected": -2.024056911468506,
"logps/chosen": -2590.871337890625,
"logps/rejected": -2381.74951171875,
"loss": 0.037,
"pred_label": 853.2249755859375,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -229.59854125976562,
"rewards/margins": -19.598337173461914,
"rewards/rejected": -210.0001983642578,
"step": 150,
"use_label": 1468.7750244140625
},
{
"epoch": 0.17,
"learning_rate": 4.6274738067520374e-05,
"logits/chosen": -3.4510104656219482,
"logits/rejected": -3.4814345836639404,
"logps/chosen": -5424.06201171875,
"logps/rejected": -4965.0986328125,
"loss": 0.0229,
"pred_label": 1008.5750122070312,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -516.1680297851562,
"rewards/margins": -46.562461853027344,
"rewards/rejected": -469.6055603027344,
"step": 160,
"use_label": 1473.425048828125
},
{
"epoch": 0.18,
"learning_rate": 4.5692665890570435e-05,
"logits/chosen": -3.6305947303771973,
"logits/rejected": -3.6412110328674316,
"logps/chosen": -5863.26220703125,
"logps/rejected": -4459.16650390625,
"loss": 0.0239,
"pred_label": 1161.25,
"rewards/accuracies": 0.40625,
"rewards/chosen": -556.6785888671875,
"rewards/margins": -135.7264862060547,
"rewards/rejected": -420.95208740234375,
"step": 170,
"use_label": 1480.75
},
{
"epoch": 0.19,
"learning_rate": 4.511059371362049e-05,
"logits/chosen": -3.826639175415039,
"logits/rejected": -3.826951503753662,
"logps/chosen": -5895.041015625,
"logps/rejected": -5085.115234375,
"loss": 0.021,
"pred_label": 1315.800048828125,
"rewards/accuracies": 0.4375,
"rewards/chosen": -563.9112548828125,
"rewards/margins": -82.46792602539062,
"rewards/rejected": -481.443359375,
"step": 180,
"use_label": 1486.199951171875
},
{
"epoch": 0.2,
"learning_rate": 4.452852153667055e-05,
"logits/chosen": -3.8287880420684814,
"logits/rejected": -3.829810619354248,
"logps/chosen": -6264.6552734375,
"logps/rejected": -4964.57666015625,
"loss": 0.0083,
"pred_label": 1472.0,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -595.7398681640625,
"rewards/margins": -125.386474609375,
"rewards/rejected": -470.3534240722656,
"step": 190,
"use_label": 1490.0
},
{
"epoch": 0.21,
"learning_rate": 4.394644935972061e-05,
"logits/chosen": -3.800830364227295,
"logits/rejected": -3.8033287525177,
"logps/chosen": -5603.17431640625,
"logps/rejected": -5157.21826171875,
"loss": 0.0242,
"pred_label": 1629.2249755859375,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -534.1845092773438,
"rewards/margins": -45.28679656982422,
"rewards/rejected": -488.897705078125,
"step": 200,
"use_label": 1492.7750244140625
},
{
"epoch": 0.22,
"learning_rate": 4.336437718277067e-05,
"logits/chosen": -3.7285819053649902,
"logits/rejected": -3.7191810607910156,
"logps/chosen": -6087.337890625,
"logps/rejected": -5075.240234375,
"loss": 0.0165,
"pred_label": 1786.125,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -578.6089477539062,
"rewards/margins": -96.50392150878906,
"rewards/rejected": -482.1050720214844,
"step": 210,
"use_label": 1495.875
},
{
"epoch": 0.23,
"learning_rate": 4.278230500582072e-05,
"logits/chosen": -3.7653274536132812,
"logits/rejected": -3.7663722038269043,
"logps/chosen": -5865.328125,
"logps/rejected": -5630.29248046875,
"loss": 0.0263,
"pred_label": 1942.125,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -559.0337524414062,
"rewards/margins": -23.070148468017578,
"rewards/rejected": -535.9635620117188,
"step": 220,
"use_label": 1499.875
},
{
"epoch": 0.24,
"learning_rate": 4.220023282887078e-05,
"logits/chosen": -3.8049216270446777,
"logits/rejected": -3.8088595867156982,
"logps/chosen": -6366.97509765625,
"logps/rejected": -5381.87548828125,
"loss": 0.0175,
"pred_label": 2098.27490234375,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -605.8801879882812,
"rewards/margins": -94.10356140136719,
"rewards/rejected": -511.776611328125,
"step": 230,
"use_label": 1503.7249755859375
},
{
"epoch": 0.25,
"learning_rate": 4.161816065192084e-05,
"logits/chosen": -3.80168080329895,
"logits/rejected": -3.802356243133545,
"logps/chosen": -5398.353515625,
"logps/rejected": -4512.5625,
"loss": 0.0201,
"pred_label": 2253.375,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -512.3775634765625,
"rewards/margins": -83.42332458496094,
"rewards/rejected": -428.95428466796875,
"step": 240,
"use_label": 1508.625
},
{
"epoch": 0.26,
"learning_rate": 4.10360884749709e-05,
"logits/chosen": -3.815431594848633,
"logits/rejected": -3.8156495094299316,
"logps/chosen": -6113.8330078125,
"logps/rejected": -5319.52783203125,
"loss": 0.0204,
"pred_label": 2408.97509765625,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -582.3192138671875,
"rewards/margins": -77.30831146240234,
"rewards/rejected": -505.01092529296875,
"step": 250,
"use_label": 1513.0250244140625
},
{
"epoch": 0.27,
"learning_rate": 4.045401629802096e-05,
"logits/chosen": -3.8084158897399902,
"logits/rejected": -3.8078300952911377,
"logps/chosen": -5415.3056640625,
"logps/rejected": -4981.9599609375,
"loss": 0.0144,
"pred_label": 2563.925048828125,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -516.6696166992188,
"rewards/margins": -43.502445220947266,
"rewards/rejected": -473.16717529296875,
"step": 260,
"use_label": 1518.074951171875
},
{
"epoch": 0.28,
"learning_rate": 3.9871944121071014e-05,
"logits/chosen": -3.8132598400115967,
"logits/rejected": -3.8127427101135254,
"logps/chosen": -5882.3447265625,
"logps/rejected": -5165.20703125,
"loss": 0.0155,
"pred_label": 2719.97509765625,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -559.0473022460938,
"rewards/margins": -70.0018310546875,
"rewards/rejected": -489.0455017089844,
"step": 270,
"use_label": 1522.0250244140625
},
{
"epoch": 0.29,
"learning_rate": 3.928987194412107e-05,
"logits/chosen": -3.8188316822052,
"logits/rejected": -3.818444013595581,
"logps/chosen": -5914.48486328125,
"logps/rejected": -5317.22021484375,
"loss": 0.0222,
"pred_label": 2876.02490234375,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -562.0521240234375,
"rewards/margins": -56.552947998046875,
"rewards/rejected": -505.4991760253906,
"step": 280,
"use_label": 1525.9749755859375
},
{
"epoch": 0.3,
"learning_rate": 3.870779976717113e-05,
"logits/chosen": -3.819366931915283,
"logits/rejected": -3.82012939453125,
"logps/chosen": -5673.76416015625,
"logps/rejected": -4572.4462890625,
"loss": 0.0131,
"pred_label": 3034.27490234375,
"rewards/accuracies": 0.40625,
"rewards/chosen": -538.0841674804688,
"rewards/margins": -103.86119079589844,
"rewards/rejected": -434.2230529785156,
"step": 290,
"use_label": 1527.7249755859375
},
{
"epoch": 0.31,
"learning_rate": 3.812572759022119e-05,
"logits/chosen": -3.801610231399536,
"logits/rejected": -3.802950382232666,
"logps/chosen": -5732.44921875,
"logps/rejected": -4702.1435546875,
"loss": 0.0155,
"pred_label": 3192.824951171875,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -546.8770751953125,
"rewards/margins": -100.64713287353516,
"rewards/rejected": -446.2298889160156,
"step": 300,
"use_label": 1529.175048828125
},
{
"epoch": 0.32,
"learning_rate": 3.7543655413271246e-05,
"logits/chosen": -3.7929720878601074,
"logits/rejected": -3.7945361137390137,
"logps/chosen": -5449.23046875,
"logps/rejected": -5404.5537109375,
"loss": 0.0162,
"pred_label": 3350.675048828125,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -518.22998046875,
"rewards/margins": -6.628878593444824,
"rewards/rejected": -511.60107421875,
"step": 310,
"use_label": 1531.324951171875
},
{
"epoch": 0.33,
"learning_rate": 3.696158323632131e-05,
"logits/chosen": -3.804478883743286,
"logits/rejected": -3.808168411254883,
"logps/chosen": -6255.1689453125,
"logps/rejected": -5367.044921875,
"loss": 0.0127,
"pred_label": 3508.02490234375,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -595.9188842773438,
"rewards/margins": -87.29522705078125,
"rewards/rejected": -508.6236267089844,
"step": 320,
"use_label": 1533.9749755859375
},
{
"epoch": 0.35,
"learning_rate": 3.637951105937136e-05,
"logits/chosen": -3.806224822998047,
"logits/rejected": -3.809751510620117,
"logps/chosen": -5673.6767578125,
"logps/rejected": -4599.72119140625,
"loss": 0.0221,
"pred_label": 3666.10009765625,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -540.2728881835938,
"rewards/margins": -103.72891998291016,
"rewards/rejected": -436.5439453125,
"step": 330,
"use_label": 1535.9000244140625
},
{
"epoch": 0.36,
"learning_rate": 3.579743888242142e-05,
"logits/chosen": -3.807875871658325,
"logits/rejected": -3.8099751472473145,
"logps/chosen": -5879.23486328125,
"logps/rejected": -4872.8642578125,
"loss": 0.0213,
"pred_label": 3819.85009765625,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -559.0055541992188,
"rewards/margins": -95.80415344238281,
"rewards/rejected": -463.20135498046875,
"step": 340,
"use_label": 1542.1500244140625
},
{
"epoch": 0.37,
"learning_rate": 3.5215366705471484e-05,
"logits/chosen": -3.8283824920654297,
"logits/rejected": -3.8290863037109375,
"logps/chosen": -6421.64453125,
"logps/rejected": -5712.4833984375,
"loss": 0.0197,
"pred_label": 3974.35009765625,
"rewards/accuracies": 0.375,
"rewards/chosen": -612.4450073242188,
"rewards/margins": -68.9627685546875,
"rewards/rejected": -543.482177734375,
"step": 350,
"use_label": 1547.6500244140625
},
{
"epoch": 0.38,
"learning_rate": 3.463329452852154e-05,
"logits/chosen": -3.8224472999572754,
"logits/rejected": -3.822279691696167,
"logps/chosen": -5800.58251953125,
"logps/rejected": -5399.095703125,
"loss": 0.0133,
"pred_label": 4133.25,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -552.7788696289062,
"rewards/margins": -39.389373779296875,
"rewards/rejected": -513.3894653320312,
"step": 360,
"use_label": 1548.75
},
{
"epoch": 0.39,
"learning_rate": 3.40512223515716e-05,
"logits/chosen": -3.8213393688201904,
"logits/rejected": -3.8208725452423096,
"logps/chosen": -5875.4296875,
"logps/rejected": -5105.2080078125,
"loss": 0.0144,
"pred_label": 4289.4501953125,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -559.922607421875,
"rewards/margins": -76.97410583496094,
"rewards/rejected": -482.9485778808594,
"step": 370,
"use_label": 1552.550048828125
},
{
"epoch": 0.4,
"learning_rate": 3.3469150174621654e-05,
"logits/chosen": -3.786717176437378,
"logits/rejected": -3.7882437705993652,
"logps/chosen": -6002.546875,
"logps/rejected": -5331.99560546875,
"loss": 0.0231,
"pred_label": 4444.1748046875,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -571.731689453125,
"rewards/margins": -64.58997344970703,
"rewards/rejected": -507.1416931152344,
"step": 380,
"use_label": 1557.824951171875
},
{
"epoch": 0.41,
"learning_rate": 3.288707799767171e-05,
"logits/chosen": -3.6485819816589355,
"logits/rejected": -3.6548709869384766,
"logps/chosen": -5633.61083984375,
"logps/rejected": -4738.9384765625,
"loss": 0.0232,
"pred_label": 4600.875,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -535.0819091796875,
"rewards/margins": -85.61624145507812,
"rewards/rejected": -449.46563720703125,
"step": 390,
"use_label": 1561.125
},
{
"epoch": 0.42,
"learning_rate": 3.2305005820721776e-05,
"logits/chosen": -3.767920732498169,
"logits/rejected": -3.767390489578247,
"logps/chosen": -6094.14697265625,
"logps/rejected": -5175.2177734375,
"loss": 0.0231,
"pred_label": 4752.77490234375,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -578.7096557617188,
"rewards/margins": -87.12177276611328,
"rewards/rejected": -491.58782958984375,
"step": 400,
"use_label": 1569.2249755859375
},
{
"epoch": 0.43,
"learning_rate": 3.172293364377183e-05,
"logits/chosen": -3.7445671558380127,
"logits/rejected": -3.754565715789795,
"logps/chosen": -6168.5,
"logps/rejected": -5233.85009765625,
"loss": 0.0123,
"pred_label": 4906.02490234375,
"rewards/accuracies": 0.46875,
"rewards/chosen": -587.0402221679688,
"rewards/margins": -89.51008605957031,
"rewards/rejected": -497.5301818847656,
"step": 410,
"use_label": 1575.9749755859375
},
{
"epoch": 0.44,
"learning_rate": 3.1140861466821885e-05,
"logits/chosen": -3.766185760498047,
"logits/rejected": -3.764925003051758,
"logps/chosen": -4928.68701171875,
"logps/rejected": -4211.3857421875,
"loss": 0.0286,
"pred_label": 5062.2001953125,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -468.8106994628906,
"rewards/margins": -72.5836410522461,
"rewards/rejected": -396.22705078125,
"step": 420,
"use_label": 1579.800048828125
},
{
"epoch": 0.45,
"learning_rate": 3.055878928987195e-05,
"logits/chosen": -3.76971173286438,
"logits/rejected": -3.766024351119995,
"logps/chosen": -5624.13330078125,
"logps/rejected": -5330.14599609375,
"loss": 0.0128,
"pred_label": 5218.125,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -534.6233520507812,
"rewards/margins": -27.560443878173828,
"rewards/rejected": -507.06292724609375,
"step": 430,
"use_label": 1583.875
},
{
"epoch": 0.46,
"learning_rate": 2.9976717112922005e-05,
"logits/chosen": -3.8054771423339844,
"logits/rejected": -3.8054962158203125,
"logps/chosen": -5717.0419921875,
"logps/rejected": -4923.8671875,
"loss": 0.0159,
"pred_label": 5373.875,
"rewards/accuracies": 0.4375,
"rewards/chosen": -543.377197265625,
"rewards/margins": -76.2901382446289,
"rewards/rejected": -467.0870666503906,
"step": 440,
"use_label": 1588.125
},
{
"epoch": 0.47,
"learning_rate": 2.939464493597206e-05,
"logits/chosen": -3.7968783378601074,
"logits/rejected": -3.7904553413391113,
"logps/chosen": -4891.21484375,
"logps/rejected": -4621.8271484375,
"loss": 0.0209,
"pred_label": 5531.77490234375,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -466.149658203125,
"rewards/margins": -26.93206787109375,
"rewards/rejected": -439.21759033203125,
"step": 450,
"use_label": 1590.2249755859375
},
{
"epoch": 0.48,
"learning_rate": 2.881257275902212e-05,
"logits/chosen": -3.8137125968933105,
"logits/rejected": -3.8143749237060547,
"logps/chosen": -6517.14404296875,
"logps/rejected": -5308.48095703125,
"loss": 0.0172,
"pred_label": 5688.375,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -621.5343017578125,
"rewards/margins": -117.5860595703125,
"rewards/rejected": -503.9481506347656,
"step": 460,
"use_label": 1593.625
},
{
"epoch": 0.49,
"learning_rate": 2.8230500582072178e-05,
"logits/chosen": -3.7992587089538574,
"logits/rejected": -3.799516201019287,
"logps/chosen": -5745.47314453125,
"logps/rejected": -5189.96923828125,
"loss": 0.0155,
"pred_label": 5845.52490234375,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -547.8372802734375,
"rewards/margins": -54.52460861206055,
"rewards/rejected": -493.3126525878906,
"step": 470,
"use_label": 1596.4749755859375
},
{
"epoch": 0.5,
"learning_rate": 2.7648428405122233e-05,
"logits/chosen": -3.761199951171875,
"logits/rejected": -3.7633252143859863,
"logps/chosen": -5170.09765625,
"logps/rejected": -5077.68310546875,
"loss": 0.0168,
"pred_label": 6002.375,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -492.452392578125,
"rewards/margins": -9.535995483398438,
"rewards/rejected": -482.9164123535156,
"step": 480,
"use_label": 1599.625
},
{
"epoch": 0.51,
"learning_rate": 2.7066356228172297e-05,
"logits/chosen": -3.7587084770202637,
"logits/rejected": -3.758279323577881,
"logps/chosen": -5773.9345703125,
"logps/rejected": -4788.09765625,
"loss": 0.0171,
"pred_label": 6158.6748046875,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -550.6905517578125,
"rewards/margins": -95.62019348144531,
"rewards/rejected": -455.0704040527344,
"step": 490,
"use_label": 1603.324951171875
},
{
"epoch": 0.52,
"learning_rate": 2.6484284051222352e-05,
"logits/chosen": -3.767758846282959,
"logits/rejected": -3.7685482501983643,
"logps/chosen": -6388.5419921875,
"logps/rejected": -5069.38916015625,
"loss": 0.0222,
"pred_label": 6314.52490234375,
"rewards/accuracies": 0.375,
"rewards/chosen": -609.9085693359375,
"rewards/margins": -128.79580688476562,
"rewards/rejected": -481.1127014160156,
"step": 500,
"use_label": 1607.4749755859375
},
{
"epoch": 0.53,
"learning_rate": 2.590221187427241e-05,
"logits/chosen": -3.7820258140563965,
"logits/rejected": -3.784348964691162,
"logps/chosen": -5971.962890625,
"logps/rejected": -4760.34912109375,
"loss": 0.0301,
"pred_label": 6469.9501953125,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -569.66259765625,
"rewards/margins": -116.97715759277344,
"rewards/rejected": -452.6853942871094,
"step": 510,
"use_label": 1612.050048828125
},
{
"epoch": 0.54,
"learning_rate": 2.532013969732247e-05,
"logits/chosen": -3.718219041824341,
"logits/rejected": -3.72932767868042,
"logps/chosen": -6069.69580078125,
"logps/rejected": -5217.16015625,
"loss": 0.0223,
"pred_label": 6623.9248046875,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -577.7742309570312,
"rewards/margins": -83.02960205078125,
"rewards/rejected": -494.74456787109375,
"step": 520,
"use_label": 1618.074951171875
},
{
"epoch": 0.55,
"learning_rate": 2.4738067520372525e-05,
"logits/chosen": -3.7202675342559814,
"logits/rejected": -3.7229580879211426,
"logps/chosen": -6532.5537109375,
"logps/rejected": -5770.68359375,
"loss": 0.0095,
"pred_label": 6780.8251953125,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -623.2237548828125,
"rewards/margins": -75.55280303955078,
"rewards/rejected": -547.6709594726562,
"step": 530,
"use_label": 1621.175048828125
},
{
"epoch": 0.57,
"learning_rate": 2.4155995343422587e-05,
"logits/chosen": -3.759662628173828,
"logits/rejected": -3.7599411010742188,
"logps/chosen": -6315.06787109375,
"logps/rejected": -5507.916015625,
"loss": 0.01,
"pred_label": 6938.4501953125,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -602.653076171875,
"rewards/margins": -78.45845031738281,
"rewards/rejected": -524.1947021484375,
"step": 540,
"use_label": 1623.550048828125
},
{
"epoch": 0.58,
"learning_rate": 2.3573923166472644e-05,
"logits/chosen": -3.738492488861084,
"logits/rejected": -3.7378597259521484,
"logps/chosen": -5971.4853515625,
"logps/rejected": -5198.08935546875,
"loss": 0.0129,
"pred_label": 7093.9501953125,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -568.2282104492188,
"rewards/margins": -74.6135025024414,
"rewards/rejected": -493.61468505859375,
"step": 550,
"use_label": 1628.050048828125
},
{
"epoch": 0.59,
"learning_rate": 2.2991850989522702e-05,
"logits/chosen": -3.794232130050659,
"logits/rejected": -3.793727397918701,
"logps/chosen": -5239.75048828125,
"logps/rejected": -4281.4697265625,
"loss": 0.0136,
"pred_label": 7250.25,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -496.53460693359375,
"rewards/margins": -90.13624572753906,
"rewards/rejected": -406.3983459472656,
"step": 560,
"use_label": 1631.75
},
{
"epoch": 0.6,
"learning_rate": 2.240977881257276e-05,
"logits/chosen": -3.7495296001434326,
"logits/rejected": -3.7504706382751465,
"logps/chosen": -6018.4404296875,
"logps/rejected": -5286.20751953125,
"loss": 0.0237,
"pred_label": 7407.02490234375,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -574.8952026367188,
"rewards/margins": -72.47772979736328,
"rewards/rejected": -502.41748046875,
"step": 570,
"use_label": 1634.9749755859375
},
{
"epoch": 0.61,
"learning_rate": 2.1827706635622818e-05,
"logits/chosen": -3.7940216064453125,
"logits/rejected": -3.794236421585083,
"logps/chosen": -5965.88134765625,
"logps/rejected": -4998.4501953125,
"loss": 0.0206,
"pred_label": 7561.5498046875,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -568.77734375,
"rewards/margins": -95.3790054321289,
"rewards/rejected": -473.3983459472656,
"step": 580,
"use_label": 1640.449951171875
},
{
"epoch": 0.62,
"learning_rate": 2.124563445867288e-05,
"logits/chosen": -3.725088119506836,
"logits/rejected": -3.7297370433807373,
"logps/chosen": -5610.734375,
"logps/rejected": -5206.3388671875,
"loss": 0.0217,
"pred_label": 7717.9248046875,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -533.9136962890625,
"rewards/margins": -41.3577766418457,
"rewards/rejected": -492.555908203125,
"step": 590,
"use_label": 1644.074951171875
},
{
"epoch": 0.63,
"learning_rate": 2.0663562281722934e-05,
"logits/chosen": -2.4542346000671387,
"logits/rejected": -2.457996129989624,
"logps/chosen": -5316.2861328125,
"logps/rejected": -4824.51171875,
"loss": 0.0176,
"pred_label": 7873.5498046875,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -500.2923889160156,
"rewards/margins": -48.11725616455078,
"rewards/rejected": -452.1751403808594,
"step": 600,
"use_label": 1648.449951171875
},
{
"epoch": 0.64,
"learning_rate": 2.0081490104772992e-05,
"logits/chosen": 1.6535043716430664,
"logits/rejected": 1.6919664144515991,
"logps/chosen": -4125.20458984375,
"logps/rejected": -3309.930419921875,
"loss": 0.019,
"pred_label": 8029.0,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -381.9295349121094,
"rewards/margins": -74.89913177490234,
"rewards/rejected": -307.0304260253906,
"step": 610,
"use_label": 1653.0
},
{
"epoch": 0.65,
"learning_rate": 1.9499417927823053e-05,
"logits/chosen": 3.7263665199279785,
"logits/rejected": 3.714616298675537,
"logps/chosen": -5211.14453125,
"logps/rejected": -4633.3828125,
"loss": 0.0148,
"pred_label": 8184.77490234375,
"rewards/accuracies": 0.4375,
"rewards/chosen": -493.1163024902344,
"rewards/margins": -54.99699783325195,
"rewards/rejected": -438.11932373046875,
"step": 620,
"use_label": 1657.2249755859375
},
{
"epoch": 0.66,
"learning_rate": 1.8917345750873107e-05,
"logits/chosen": 7.3053741455078125,
"logits/rejected": 7.303783416748047,
"logps/chosen": -7381.1630859375,
"logps/rejected": -6444.02734375,
"loss": 0.009,
"pred_label": 8343.1748046875,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -709.6043701171875,
"rewards/margins": -91.3189697265625,
"rewards/rejected": -618.2854614257812,
"step": 630,
"use_label": 1658.824951171875
},
{
"epoch": 0.67,
"learning_rate": 1.833527357392317e-05,
"logits/chosen": 8.230302810668945,
"logits/rejected": 8.22825813293457,
"logps/chosen": -7595.42724609375,
"logps/rejected": -7036.8515625,
"loss": 0.0111,
"pred_label": 8500.5,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -734.2236328125,
"rewards/margins": -55.39581298828125,
"rewards/rejected": -678.8277587890625,
"step": 640,
"use_label": 1661.5
},
{
"epoch": 0.68,
"learning_rate": 1.7753201396973227e-05,
"logits/chosen": 8.20081901550293,
"logits/rejected": 8.195457458496094,
"logps/chosen": -9194.9013671875,
"logps/rejected": -7898.6552734375,
"loss": 0.0088,
"pred_label": 8658.349609375,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -889.2952880859375,
"rewards/margins": -127.2280044555664,
"rewards/rejected": -762.0673217773438,
"step": 650,
"use_label": 1663.6500244140625
},
{
"epoch": 0.69,
"learning_rate": 1.717112922002328e-05,
"logits/chosen": 9.882159233093262,
"logits/rejected": 9.892133712768555,
"logps/chosen": -10026.548828125,
"logps/rejected": -8868.25,
"loss": 0.0147,
"pred_label": 8817.3251953125,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -973.1184692382812,
"rewards/margins": -111.28104400634766,
"rewards/rejected": -861.83740234375,
"step": 660,
"use_label": 1664.675048828125
},
{
"epoch": 0.7,
"learning_rate": 1.6589057043073342e-05,
"logits/chosen": 11.399931907653809,
"logits/rejected": 11.406278610229492,
"logps/chosen": -11008.333984375,
"logps/rejected": -9124.1875,
"loss": 0.0161,
"pred_label": 8974.8251953125,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -1072.7261962890625,
"rewards/margins": -184.44720458984375,
"rewards/rejected": -888.2789916992188,
"step": 670,
"use_label": 1667.175048828125
},
{
"epoch": 0.71,
"learning_rate": 1.60069848661234e-05,
"logits/chosen": 9.982951164245605,
"logits/rejected": 9.928037643432617,
"logps/chosen": -10043.669921875,
"logps/rejected": -9005.763671875,
"loss": 0.0146,
"pred_label": 9133.150390625,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -975.6735229492188,
"rewards/margins": -100.92012023925781,
"rewards/rejected": -874.75341796875,
"step": 680,
"use_label": 1668.8499755859375
},
{
"epoch": 0.72,
"learning_rate": 1.5424912689173458e-05,
"logits/chosen": 3.900209903717041,
"logits/rejected": 3.7533345222473145,
"logps/chosen": -5247.2783203125,
"logps/rejected": -4165.42138671875,
"loss": 0.015,
"pred_label": 9290.625,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -499.68951416015625,
"rewards/margins": -104.0757827758789,
"rewards/rejected": -395.61370849609375,
"step": 690,
"use_label": 1671.375
},
{
"epoch": 0.73,
"learning_rate": 1.4842840512223516e-05,
"logits/chosen": 3.9459800720214844,
"logits/rejected": 3.741647243499756,
"logps/chosen": -6615.76708984375,
"logps/rejected": -5040.81982421875,
"loss": 0.0158,
"pred_label": 9445.275390625,
"rewards/accuracies": 0.40625,
"rewards/chosen": -634.0076904296875,
"rewards/margins": -154.7536163330078,
"rewards/rejected": -479.25408935546875,
"step": 700,
"use_label": 1676.7249755859375
},
{
"epoch": 0.74,
"learning_rate": 1.4260768335273575e-05,
"logits/chosen": 5.425192832946777,
"logits/rejected": 5.073692321777344,
"logps/chosen": -8362.833984375,
"logps/rejected": -6741.9013671875,
"loss": 0.0127,
"pred_label": 9602.0498046875,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -807.3259887695312,
"rewards/margins": -157.26266479492188,
"rewards/rejected": -650.0633544921875,
"step": 710,
"use_label": 1679.949951171875
},
{
"epoch": 0.75,
"learning_rate": 1.3678696158323633e-05,
"logits/chosen": 10.10822582244873,
"logits/rejected": 10.002889633178711,
"logps/chosen": -10245.421875,
"logps/rejected": -9104.876953125,
"loss": 0.023,
"pred_label": 9759.0751953125,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -997.3968505859375,
"rewards/margins": -112.24949645996094,
"rewards/rejected": -885.1474609375,
"step": 720,
"use_label": 1682.925048828125
},
{
"epoch": 0.76,
"learning_rate": 1.309662398137369e-05,
"logits/chosen": 10.97143840789795,
"logits/rejected": 10.992796897888184,
"logps/chosen": -10079.0634765625,
"logps/rejected": -8320.7255859375,
"loss": 0.0134,
"pred_label": 9917.2001953125,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -978.1613159179688,
"rewards/margins": -170.25567626953125,
"rewards/rejected": -807.9056396484375,
"step": 730,
"use_label": 1684.800048828125
},
{
"epoch": 0.77,
"learning_rate": 1.2514551804423749e-05,
"logits/chosen": 12.233144760131836,
"logits/rejected": 12.248846054077148,
"logps/chosen": -12818.298828125,
"logps/rejected": -11287.875,
"loss": 0.0048,
"pred_label": 10076.650390625,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -1251.0927734375,
"rewards/margins": -150.9541473388672,
"rewards/rejected": -1100.138671875,
"step": 740,
"use_label": 1685.3499755859375
},
{
"epoch": 0.79,
"learning_rate": 1.1932479627473807e-05,
"logits/chosen": 12.499679565429688,
"logits/rejected": 12.485097885131836,
"logps/chosen": -11923.8232421875,
"logps/rejected": -10479.5771484375,
"loss": 0.0074,
"pred_label": 10235.875,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -1166.3336181640625,
"rewards/margins": -144.428466796875,
"rewards/rejected": -1021.9050903320312,
"step": 750,
"use_label": 1686.125
},
{
"epoch": 0.8,
"learning_rate": 1.1350407450523866e-05,
"logits/chosen": 7.414717197418213,
"logits/rejected": 7.40515661239624,
"logps/chosen": -9329.333984375,
"logps/rejected": -8092.07177734375,
"loss": 0.0077,
"pred_label": 10393.625,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -903.3870849609375,
"rewards/margins": -120.39128112792969,
"rewards/rejected": -782.995849609375,
"step": 760,
"use_label": 1688.375
},
{
"epoch": 0.81,
"learning_rate": 1.0768335273573923e-05,
"logits/chosen": 3.0171780586242676,
"logits/rejected": 2.9968318939208984,
"logps/chosen": -6287.14453125,
"logps/rejected": -5580.78515625,
"loss": 0.0133,
"pred_label": 10549.275390625,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -598.4849243164062,
"rewards/margins": -68.8787841796875,
"rewards/rejected": -529.6060791015625,
"step": 770,
"use_label": 1692.7249755859375
},
{
"epoch": 0.82,
"learning_rate": 1.0186263096623982e-05,
"logits/chosen": -1.7731034755706787,
"logits/rejected": -1.784906029701233,
"logps/chosen": -4869.2548828125,
"logps/rejected": -4165.048828125,
"loss": 0.0135,
"pred_label": 10705.75,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -458.80615234375,
"rewards/margins": -67.01484680175781,
"rewards/rejected": -391.7913513183594,
"step": 780,
"use_label": 1696.25
},
{
"epoch": 0.83,
"learning_rate": 9.60419091967404e-06,
"logits/chosen": -0.7930339574813843,
"logits/rejected": -0.8520814180374146,
"logps/chosen": -4772.41162109375,
"logps/rejected": -4426.7998046875,
"loss": 0.0181,
"pred_label": 10861.849609375,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -450.46026611328125,
"rewards/margins": -33.853233337402344,
"rewards/rejected": -416.60699462890625,
"step": 790,
"use_label": 1700.1500244140625
},
{
"epoch": 0.84,
"learning_rate": 9.022118742724098e-06,
"logits/chosen": -2.1026828289031982,
"logits/rejected": -2.1392974853515625,
"logps/chosen": -5048.7392578125,
"logps/rejected": -4407.5849609375,
"loss": 0.0168,
"pred_label": 11020.3251953125,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -473.4267578125,
"rewards/margins": -60.274497985839844,
"rewards/rejected": -413.1521911621094,
"step": 800,
"use_label": 1701.675048828125
},
{
"epoch": 0.85,
"learning_rate": 8.440046565774158e-06,
"logits/chosen": -1.4834654331207275,
"logits/rejected": -1.5466824769973755,
"logps/chosen": -3907.274169921875,
"logps/rejected": -3107.385986328125,
"loss": 0.014,
"pred_label": 11177.2001953125,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -362.62506103515625,
"rewards/margins": -74.76776885986328,
"rewards/rejected": -287.8572692871094,
"step": 810,
"use_label": 1704.800048828125
},
{
"epoch": 0.86,
"learning_rate": 7.857974388824214e-06,
"logits/chosen": -0.9667215347290039,
"logits/rejected": -1.0632926225662231,
"logps/chosen": -3960.51708984375,
"logps/rejected": -3172.82275390625,
"loss": 0.0275,
"pred_label": 11334.25,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -366.71990966796875,
"rewards/margins": -73.59380340576172,
"rewards/rejected": -293.1261291503906,
"step": 820,
"use_label": 1707.75
},
{
"epoch": 0.87,
"learning_rate": 7.275902211874273e-06,
"logits/chosen": 3.487344264984131,
"logits/rejected": 3.3718509674072266,
"logps/chosen": -5937.5302734375,
"logps/rejected": -6147.76416015625,
"loss": 0.014,
"pred_label": 11490.599609375,
"rewards/accuracies": 0.5625,
"rewards/chosen": -567.4717407226562,
"rewards/margins": 19.284542083740234,
"rewards/rejected": -586.7562255859375,
"step": 830,
"use_label": 1711.4000244140625
},
{
"epoch": 0.88,
"learning_rate": 6.693830034924331e-06,
"logits/chosen": 11.813470840454102,
"logits/rejected": 11.792594909667969,
"logps/chosen": -11349.767578125,
"logps/rejected": -10712.0576171875,
"loss": 0.01,
"pred_label": 11647.599609375,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -1108.175048828125,
"rewards/margins": -63.417640686035156,
"rewards/rejected": -1044.7574462890625,
"step": 840,
"use_label": 1714.4000244140625
},
{
"epoch": 0.89,
"learning_rate": 6.111757857974389e-06,
"logits/chosen": 12.407671928405762,
"logits/rejected": 12.412581443786621,
"logps/chosen": -12044.3359375,
"logps/rejected": -10440.21875,
"loss": 0.0137,
"pred_label": 11806.0498046875,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -1174.930908203125,
"rewards/margins": -155.24510192871094,
"rewards/rejected": -1019.6856689453125,
"step": 850,
"use_label": 1715.949951171875
},
{
"epoch": 0.9,
"learning_rate": 5.529685681024447e-06,
"logits/chosen": 13.048141479492188,
"logits/rejected": 13.045463562011719,
"logps/chosen": -12283.849609375,
"logps/rejected": -11249.0595703125,
"loss": 0.0112,
"pred_label": 11965.0,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -1199.896728515625,
"rewards/margins": -102.23991394042969,
"rewards/rejected": -1097.6568603515625,
"step": 860,
"use_label": 1717.0
},
{
"epoch": 0.91,
"learning_rate": 4.947613504074506e-06,
"logits/chosen": 13.306634902954102,
"logits/rejected": 13.326390266418457,
"logps/chosen": -10968.7822265625,
"logps/rejected": -10457.6435546875,
"loss": 0.0108,
"pred_label": 12123.5751953125,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -1072.5794677734375,
"rewards/margins": -50.84003448486328,
"rewards/rejected": -1021.7394409179688,
"step": 870,
"use_label": 1718.425048828125
},
{
"epoch": 0.92,
"learning_rate": 4.3655413271245635e-06,
"logits/chosen": 13.300872802734375,
"logits/rejected": 13.316276550292969,
"logps/chosen": -13030.8095703125,
"logps/rejected": -11216.4794921875,
"loss": 0.0078,
"pred_label": 12279.9501953125,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -1274.422119140625,
"rewards/margins": -178.9391632080078,
"rewards/rejected": -1095.4830322265625,
"step": 880,
"use_label": 1722.050048828125
},
{
"epoch": 0.93,
"learning_rate": 3.7834691501746217e-06,
"logits/chosen": 13.323100090026855,
"logits/rejected": 13.341893196105957,
"logps/chosen": -13646.083984375,
"logps/rejected": -12056.134765625,
"loss": 0.0108,
"pred_label": 12438.625,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -1336.4798583984375,
"rewards/margins": -158.06932067871094,
"rewards/rejected": -1178.41064453125,
"step": 890,
"use_label": 1723.375
},
{
"epoch": 0.94,
"learning_rate": 3.2013969732246805e-06,
"logits/chosen": 13.762173652648926,
"logits/rejected": 13.755559921264648,
"logps/chosen": -13121.990234375,
"logps/rejected": -10966.107421875,
"loss": 0.0193,
"pred_label": 12596.1748046875,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -1284.2850341796875,
"rewards/margins": -213.5261688232422,
"rewards/rejected": -1070.7589111328125,
"step": 900,
"use_label": 1725.824951171875
},
{
"epoch": 0.95,
"learning_rate": 2.6193247962747383e-06,
"logits/chosen": 13.810602188110352,
"logits/rejected": 13.802284240722656,
"logps/chosen": -13679.8857421875,
"logps/rejected": -11569.97265625,
"loss": 0.0091,
"pred_label": 12752.150390625,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -1338.3470458984375,
"rewards/margins": -207.6631317138672,
"rewards/rejected": -1130.6839599609375,
"step": 910,
"use_label": 1729.8499755859375
},
{
"epoch": 0.96,
"learning_rate": 2.037252619324796e-06,
"logits/chosen": 13.879419326782227,
"logits/rejected": 13.855003356933594,
"logps/chosen": -14082.59375,
"logps/rejected": -11603.6435546875,
"loss": 0.0145,
"pred_label": 12909.7998046875,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -1378.5706787109375,
"rewards/margins": -244.1646270751953,
"rewards/rejected": -1134.406005859375,
"step": 920,
"use_label": 1732.199951171875
},
{
"epoch": 0.97,
"learning_rate": 1.4551804423748545e-06,
"logits/chosen": 13.563482284545898,
"logits/rejected": 13.548616409301758,
"logps/chosen": -13257.4296875,
"logps/rejected": -10279.7294921875,
"loss": 0.0103,
"pred_label": 13067.3251953125,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -1298.921630859375,
"rewards/margins": -295.1283264160156,
"rewards/rejected": -1003.79345703125,
"step": 930,
"use_label": 1734.675048828125
},
{
"epoch": 0.98,
"learning_rate": 8.731082654249127e-07,
"logits/chosen": 13.876431465148926,
"logits/rejected": 13.86772632598877,
"logps/chosen": -14265.5234375,
"logps/rejected": -11806.12890625,
"loss": 0.0096,
"pred_label": 13226.525390625,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -1397.813232421875,
"rewards/margins": -243.88876342773438,
"rewards/rejected": -1153.92431640625,
"step": 940,
"use_label": 1735.4749755859375
},
{
"epoch": 0.99,
"learning_rate": 2.910360884749709e-07,
"logits/chosen": 13.791536331176758,
"logits/rejected": 13.786079406738281,
"logps/chosen": -12623.412109375,
"logps/rejected": -11098.677734375,
"loss": 0.0144,
"pred_label": 13384.7998046875,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -1235.495849609375,
"rewards/margins": -150.9226837158203,
"rewards/rejected": -1084.572998046875,
"step": 950,
"use_label": 1737.199951171875
},
{
"epoch": 1.0,
"eval_logits/chosen": 13.824411392211914,
"eval_logits/rejected": 13.813151359558105,
"eval_logps/chosen": -13722.0166015625,
"eval_logps/rejected": -11596.5400390625,
"eval_loss": 0.011624496430158615,
"eval_pred_label": 13789.83984375,
"eval_rewards/accuracies": 0.4740000069141388,
"eval_rewards/chosen": -1343.776123046875,
"eval_rewards/margins": -210.05210876464844,
"eval_rewards/rejected": -1133.72412109375,
"eval_runtime": 449.9968,
"eval_samples_per_second": 4.444,
"eval_steps_per_second": 0.278,
"eval_use_label": 1742.1600341796875,
"step": 955
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.08065580570807007,
"train_runtime": 25025.0638,
"train_samples_per_second": 2.443,
"train_steps_per_second": 0.038
}
],
"logging_steps": 10,
"max_steps": 955,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}