zephyr-7b / trainer_state.json
jikaixuan's picture
Model save
be9d7d4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 0.59375,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": -2.1666858196258545,
"logits/rejected": -2.182244300842285,
"logps/chosen": -12.368609428405762,
"logps/rejected": -24.687644958496094,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 10.0
},
{
"epoch": 0.02,
"grad_norm": 0.6796875,
"learning_rate": 1.0416666666666667e-06,
"logits/chosen": -2.227864980697632,
"logits/rejected": -2.276106834411621,
"logps/chosen": -57.02927780151367,
"logps/rejected": -66.8729019165039,
"loss": 0.6927,
"pred_label": 0.0,
"rewards/accuracies": 0.24671052396297455,
"rewards/chosen": 0.003993770573288202,
"rewards/margins": 0.0009077258291654289,
"rewards/rejected": 0.003086044918745756,
"step": 20,
"use_label": 170.0
},
{
"epoch": 0.04,
"grad_norm": 0.546875,
"learning_rate": 2.0833333333333334e-06,
"logits/chosen": -2.2728817462921143,
"logits/rejected": -2.261592388153076,
"logps/chosen": -54.7827033996582,
"logps/rejected": -67.2376708984375,
"loss": 0.6915,
"pred_label": 0.0,
"rewards/accuracies": 0.24687500298023224,
"rewards/chosen": 0.017464743927121162,
"rewards/margins": 0.0016005486249923706,
"rewards/rejected": 0.015864195302128792,
"step": 40,
"use_label": 482.0
},
{
"epoch": 0.06,
"grad_norm": 0.625,
"learning_rate": 3.125e-06,
"logits/chosen": -2.321017026901245,
"logits/rejected": -2.318946123123169,
"logps/chosen": -75.58020782470703,
"logps/rejected": -87.66261291503906,
"loss": 0.6905,
"pred_label": 4.987500190734863,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": 0.031644098460674286,
"rewards/margins": 0.009459299966692924,
"rewards/rejected": 0.02218480221927166,
"step": 60,
"use_label": 797.0125122070312
},
{
"epoch": 0.08,
"grad_norm": 0.57421875,
"learning_rate": 4.166666666666667e-06,
"logits/chosen": -2.2973294258117676,
"logits/rejected": -2.2655692100524902,
"logps/chosen": -77.97566223144531,
"logps/rejected": -81.31121826171875,
"loss": 0.6909,
"pred_label": 29.850000381469727,
"rewards/accuracies": 0.3343749940395355,
"rewards/chosen": 0.02917659282684326,
"rewards/margins": 0.014682939276099205,
"rewards/rejected": 0.014493651688098907,
"step": 80,
"use_label": 1092.1500244140625
},
{
"epoch": 0.1,
"grad_norm": 0.625,
"learning_rate": 4.9997324926814375e-06,
"logits/chosen": -2.210732936859131,
"logits/rejected": -2.2176434993743896,
"logps/chosen": -66.4733657836914,
"logps/rejected": -74.55338287353516,
"loss": 0.6911,
"pred_label": 68.07499694824219,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": 0.013588580302894115,
"rewards/margins": 0.026675995439291,
"rewards/rejected": -0.01308741606771946,
"step": 100,
"use_label": 1373.925048828125
},
{
"epoch": 0.1,
"eval_logits/chosen": -2.120985984802246,
"eval_logits/rejected": -2.093513250350952,
"eval_logps/chosen": -69.42622375488281,
"eval_logps/rejected": -78.9540786743164,
"eval_loss": 0.691917359828949,
"eval_pred_label": 150.14285278320312,
"eval_rewards/accuracies": 0.3392857015132904,
"eval_rewards/chosen": -0.005256766453385353,
"eval_rewards/margins": 0.030303288251161575,
"eval_rewards/rejected": -0.03556005656719208,
"eval_runtime": 247.9513,
"eval_samples_per_second": 8.066,
"eval_steps_per_second": 0.254,
"eval_use_label": 1705.857177734375,
"step": 100
},
{
"epoch": 0.13,
"grad_norm": 0.53125,
"learning_rate": 4.9903757462135984e-06,
"logits/chosen": -2.24790620803833,
"logits/rejected": -2.1782658100128174,
"logps/chosen": -67.23531341552734,
"logps/rejected": -80.04717254638672,
"loss": 0.6914,
"pred_label": 243.0,
"rewards/accuracies": 0.3125,
"rewards/chosen": 0.00682856747880578,
"rewards/margins": 0.03467796370387077,
"rewards/rejected": -0.02784939482808113,
"step": 120,
"use_label": 2023.0
},
{
"epoch": 0.15,
"grad_norm": 0.341796875,
"learning_rate": 4.967700826904229e-06,
"logits/chosen": -2.1205825805664062,
"logits/rejected": -2.150360584259033,
"logps/chosen": -58.376564025878906,
"logps/rejected": -71.84730529785156,
"loss": 0.6917,
"pred_label": 304.9125061035156,
"rewards/accuracies": 0.2874999940395355,
"rewards/chosen": 0.0049698068760335445,
"rewards/margins": 0.025117725133895874,
"rewards/rejected": -0.02014791965484619,
"step": 140,
"use_label": 2281.08740234375
},
{
"epoch": 0.17,
"grad_norm": 0.515625,
"learning_rate": 4.931828996974498e-06,
"logits/chosen": -2.1872148513793945,
"logits/rejected": -2.1555256843566895,
"logps/chosen": -66.3367919921875,
"logps/rejected": -69.24983215332031,
"loss": 0.6918,
"pred_label": 371.5375061035156,
"rewards/accuracies": 0.3218750059604645,
"rewards/chosen": -0.017877796664834023,
"rewards/margins": 0.03909943252801895,
"rewards/rejected": -0.05697723478078842,
"step": 160,
"use_label": 2534.46240234375
},
{
"epoch": 0.19,
"grad_norm": 0.4921875,
"learning_rate": 4.882952093833628e-06,
"logits/chosen": -2.1010584831237793,
"logits/rejected": -2.112929582595825,
"logps/chosen": -67.18075561523438,
"logps/rejected": -77.23786163330078,
"loss": 0.6925,
"pred_label": 444.2124938964844,
"rewards/accuracies": 0.3031249940395355,
"rewards/chosen": -0.03951570764183998,
"rewards/margins": 0.03356783464550972,
"rewards/rejected": -0.0730835422873497,
"step": 180,
"use_label": 2781.78759765625
},
{
"epoch": 0.21,
"grad_norm": 0.546875,
"learning_rate": 4.821331504159906e-06,
"logits/chosen": -2.181281805038452,
"logits/rejected": -2.155298948287964,
"logps/chosen": -78.88096618652344,
"logps/rejected": -77.27136993408203,
"loss": 0.692,
"pred_label": 513.2125244140625,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.019123973324894905,
"rewards/margins": 0.040298379957675934,
"rewards/rejected": -0.05942235141992569,
"step": 200,
"use_label": 3032.78759765625
},
{
"epoch": 0.21,
"eval_logits/chosen": -2.1267549991607666,
"eval_logits/rejected": -2.1057066917419434,
"eval_logps/chosen": -71.54093170166016,
"eval_logps/rejected": -82.35039520263672,
"eval_loss": 0.6926834583282471,
"eval_pred_label": 622.952392578125,
"eval_rewards/accuracies": 0.3432539701461792,
"eval_rewards/chosen": -0.026403911411762238,
"eval_rewards/margins": 0.043119337409734726,
"eval_rewards/rejected": -0.06952324509620667,
"eval_runtime": 248.2687,
"eval_samples_per_second": 8.056,
"eval_steps_per_second": 0.254,
"eval_use_label": 3337.047607421875,
"step": 200
},
{
"epoch": 0.23,
"grad_norm": 0.609375,
"learning_rate": 4.747296766042161e-06,
"logits/chosen": -2.2548727989196777,
"logits/rejected": -2.2427258491516113,
"logps/chosen": -74.4991683959961,
"logps/rejected": -75.8321762084961,
"loss": 0.6924,
"pred_label": 738.5,
"rewards/accuracies": 0.3531250059604645,
"rewards/chosen": -0.024670986458659172,
"rewards/margins": 0.04779377579689026,
"rewards/rejected": -0.07246476411819458,
"step": 220,
"use_label": 3631.5
},
{
"epoch": 0.25,
"grad_norm": 0.46875,
"learning_rate": 4.661243806657256e-06,
"logits/chosen": -2.2358717918395996,
"logits/rejected": -2.216477870941162,
"logps/chosen": -72.57451629638672,
"logps/rejected": -79.20014953613281,
"loss": 0.6921,
"pred_label": 830.7750244140625,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.013481785543262959,
"rewards/margins": 0.0440317802131176,
"rewards/rejected": -0.05751357227563858,
"step": 240,
"use_label": 3859.22509765625
},
{
"epoch": 0.27,
"grad_norm": 0.5390625,
"learning_rate": 4.563632824908252e-06,
"logits/chosen": -2.204738140106201,
"logits/rejected": -2.2045350074768066,
"logps/chosen": -64.52825164794922,
"logps/rejected": -74.71345520019531,
"loss": 0.6919,
"pred_label": 912.1624755859375,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.01493888907134533,
"rewards/margins": 0.038629818707704544,
"rewards/rejected": -0.05356870964169502,
"step": 260,
"use_label": 4097.83740234375
},
{
"epoch": 0.29,
"grad_norm": 0.431640625,
"learning_rate": 4.454985830346574e-06,
"logits/chosen": -2.224844455718994,
"logits/rejected": -2.247999668121338,
"logps/chosen": -72.3452377319336,
"logps/rejected": -75.01800537109375,
"loss": 0.6916,
"pred_label": 993.7874755859375,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.04014473780989647,
"rewards/margins": 0.030534306541085243,
"rewards/rejected": -0.07067903876304626,
"step": 280,
"use_label": 4336.21240234375
},
{
"epoch": 0.31,
"grad_norm": 0.423828125,
"learning_rate": 4.335883851539693e-06,
"logits/chosen": -2.2155380249023438,
"logits/rejected": -2.2151846885681152,
"logps/chosen": -67.15587615966797,
"logps/rejected": -74.2086181640625,
"loss": 0.6924,
"pred_label": 1083.4625244140625,
"rewards/accuracies": 0.34687501192092896,
"rewards/chosen": -0.020541679114103317,
"rewards/margins": 0.06299655884504318,
"rewards/rejected": -0.0835382491350174,
"step": 300,
"use_label": 4566.53759765625
},
{
"epoch": 0.31,
"eval_logits/chosen": -2.2169294357299805,
"eval_logits/rejected": -2.1932876110076904,
"eval_logps/chosen": -72.5876693725586,
"eval_logps/rejected": -84.35366821289062,
"eval_loss": 0.6928625702857971,
"eval_pred_label": 1200.2698974609375,
"eval_rewards/accuracies": 0.3392857015132904,
"eval_rewards/chosen": -0.03687124699354172,
"eval_rewards/margins": 0.0526847243309021,
"eval_rewards/rejected": -0.08955597132444382,
"eval_runtime": 247.9119,
"eval_samples_per_second": 8.067,
"eval_steps_per_second": 0.254,
"eval_use_label": 4863.72998046875,
"step": 300
},
{
"epoch": 0.33,
"grad_norm": 0.61328125,
"learning_rate": 4.206963828813555e-06,
"logits/chosen": -2.291391134262085,
"logits/rejected": -2.3002986907958984,
"logps/chosen": -68.5405502319336,
"logps/rejected": -83.0180435180664,
"loss": 0.6927,
"pred_label": 1323.074951171875,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.07074997574090958,
"rewards/margins": 0.04067195579409599,
"rewards/rejected": -0.11142192780971527,
"step": 320,
"use_label": 5150.9248046875
},
{
"epoch": 0.36,
"grad_norm": 0.455078125,
"learning_rate": 4.068915207986931e-06,
"logits/chosen": -2.2867865562438965,
"logits/rejected": -2.2617173194885254,
"logps/chosen": -64.90373229980469,
"logps/rejected": -74.42888641357422,
"loss": 0.692,
"pred_label": 1427.7750244140625,
"rewards/accuracies": 0.3531250059604645,
"rewards/chosen": -0.016644436866044998,
"rewards/margins": 0.052551619708538055,
"rewards/rejected": -0.06919606029987335,
"step": 340,
"use_label": 5366.22509765625
},
{
"epoch": 0.38,
"grad_norm": 0.458984375,
"learning_rate": 3.922476253313921e-06,
"logits/chosen": -2.249298572540283,
"logits/rejected": -2.253566265106201,
"logps/chosen": -68.57295989990234,
"logps/rejected": -73.1113510131836,
"loss": 0.693,
"pred_label": 1522.0999755859375,
"rewards/accuracies": 0.328125,
"rewards/chosen": -0.037180084735155106,
"rewards/margins": 0.045733559876680374,
"rewards/rejected": -0.08291363716125488,
"step": 360,
"use_label": 5591.89990234375
},
{
"epoch": 0.4,
"grad_norm": 0.4453125,
"learning_rate": 3.768430099352445e-06,
"logits/chosen": -2.2458603382110596,
"logits/rejected": -2.2051453590393066,
"logps/chosen": -70.38607788085938,
"logps/rejected": -78.15666198730469,
"loss": 0.6923,
"pred_label": 1625.5374755859375,
"rewards/accuracies": 0.3531250059604645,
"rewards/chosen": -0.03562153875827789,
"rewards/margins": 0.054723359644412994,
"rewards/rejected": -0.09034489840269089,
"step": 380,
"use_label": 5808.46240234375
},
{
"epoch": 0.42,
"grad_norm": 0.59765625,
"learning_rate": 3.607600562872785e-06,
"logits/chosen": -2.196977138519287,
"logits/rejected": -2.197218656539917,
"logps/chosen": -81.0395736694336,
"logps/rejected": -81.44091033935547,
"loss": 0.6927,
"pred_label": 1725.362548828125,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.03092697635293007,
"rewards/margins": 0.049932099878787994,
"rewards/rejected": -0.08085907250642776,
"step": 400,
"use_label": 6028.6376953125
},
{
"epoch": 0.42,
"eval_logits/chosen": -2.118962526321411,
"eval_logits/rejected": -2.093430995941162,
"eval_logps/chosen": -71.01036071777344,
"eval_logps/rejected": -83.43638610839844,
"eval_loss": 0.6925376653671265,
"eval_pred_label": 1843.920654296875,
"eval_rewards/accuracies": 0.341269850730896,
"eval_rewards/chosen": -0.021098149940371513,
"eval_rewards/margins": 0.05928494408726692,
"eval_rewards/rejected": -0.08038310706615448,
"eval_runtime": 248.0095,
"eval_samples_per_second": 8.064,
"eval_steps_per_second": 0.254,
"eval_use_label": 6324.07958984375,
"step": 400
},
{
"epoch": 0.44,
"grad_norm": 0.30078125,
"learning_rate": 3.4408477372034743e-06,
"logits/chosen": -2.146075487136841,
"logits/rejected": -2.152238607406616,
"logps/chosen": -65.8438720703125,
"logps/rejected": -70.74162292480469,
"loss": 0.692,
"pred_label": 1975.637451171875,
"rewards/accuracies": 0.3531250059604645,
"rewards/chosen": -0.017682421952486038,
"rewards/margins": 0.05984373763203621,
"rewards/rejected": -0.07752615213394165,
"step": 420,
"use_label": 6602.3623046875
},
{
"epoch": 0.46,
"grad_norm": 0.9296875,
"learning_rate": 3.269063392575352e-06,
"logits/chosen": -2.2523856163024902,
"logits/rejected": -2.2490224838256836,
"logps/chosen": -74.74308013916016,
"logps/rejected": -74.57176208496094,
"loss": 0.6927,
"pred_label": 2072.27490234375,
"rewards/accuracies": 0.3218750059604645,
"rewards/chosen": -0.027858540415763855,
"rewards/margins": 0.05976608395576477,
"rewards/rejected": -0.08762462437152863,
"step": 440,
"use_label": 6825.72509765625
},
{
"epoch": 0.48,
"grad_norm": 0.34375,
"learning_rate": 3.09316620706208e-06,
"logits/chosen": -2.2484962940216064,
"logits/rejected": -2.253873109817505,
"logps/chosen": -68.02134704589844,
"logps/rejected": -73.40286254882812,
"loss": 0.6929,
"pred_label": 2175.53759765625,
"rewards/accuracies": 0.328125,
"rewards/chosen": -0.03111925721168518,
"rewards/margins": 0.06376632302999496,
"rewards/rejected": -0.09488557279109955,
"step": 460,
"use_label": 7042.46240234375
},
{
"epoch": 0.5,
"grad_norm": 0.3984375,
"learning_rate": 2.91409685362137e-06,
"logits/chosen": -2.2359812259674072,
"logits/rejected": -2.2330563068389893,
"logps/chosen": -75.03883361816406,
"logps/rejected": -84.55928039550781,
"loss": 0.6922,
"pred_label": 2276.949951171875,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.040116917341947556,
"rewards/margins": 0.0741645023226738,
"rewards/rejected": -0.11428143084049225,
"step": 480,
"use_label": 7261.0498046875
},
{
"epoch": 0.52,
"grad_norm": 0.5703125,
"learning_rate": 2.7328129695107205e-06,
"logits/chosen": -2.2053210735321045,
"logits/rejected": -2.2094616889953613,
"logps/chosen": -75.30181121826172,
"logps/rejected": -77.61902618408203,
"loss": 0.6924,
"pred_label": 2379.137451171875,
"rewards/accuracies": 0.3656249940395355,
"rewards/chosen": -0.039206866174936295,
"rewards/margins": 0.05418051406741142,
"rewards/rejected": -0.09338738024234772,
"step": 500,
"use_label": 7478.8623046875
},
{
"epoch": 0.52,
"eval_logits/chosen": -2.176236152648926,
"eval_logits/rejected": -2.151799201965332,
"eval_logps/chosen": -70.96183776855469,
"eval_logps/rejected": -83.7112045288086,
"eval_loss": 0.6929337382316589,
"eval_pred_label": 2499.22216796875,
"eval_rewards/accuracies": 0.3432539701461792,
"eval_rewards/chosen": -0.02061287872493267,
"eval_rewards/margins": 0.06251849234104156,
"eval_rewards/rejected": -0.08313137292861938,
"eval_runtime": 248.0888,
"eval_samples_per_second": 8.062,
"eval_steps_per_second": 0.254,
"eval_use_label": 7772.77783203125,
"step": 500
},
{
"epoch": 0.54,
"grad_norm": 0.61328125,
"learning_rate": 2.5502840349805074e-06,
"logits/chosen": -2.195094347000122,
"logits/rejected": -2.237112045288086,
"logps/chosen": -70.13484954833984,
"logps/rejected": -79.53434753417969,
"loss": 0.692,
"pred_label": 2632.125,
"rewards/accuracies": 0.3656249940395355,
"rewards/chosen": -0.018158430233597755,
"rewards/margins": 0.061979226768016815,
"rewards/rejected": -0.08013766258955002,
"step": 520,
"use_label": 8049.875
},
{
"epoch": 0.57,
"grad_norm": 0.55078125,
"learning_rate": 2.367486188632446e-06,
"logits/chosen": -2.1844329833984375,
"logits/rejected": -2.1980721950531006,
"logps/chosen": -78.40437316894531,
"logps/rejected": -80.49110412597656,
"loss": 0.6925,
"pred_label": 2729.66259765625,
"rewards/accuracies": 0.3656249940395355,
"rewards/chosen": -0.028946753591299057,
"rewards/margins": 0.0717843621969223,
"rewards/rejected": -0.10073111951351166,
"step": 540,
"use_label": 8272.337890625
},
{
"epoch": 0.59,
"grad_norm": 0.48828125,
"learning_rate": 2.1853970071701415e-06,
"logits/chosen": -2.19417667388916,
"logits/rejected": -2.1900599002838135,
"logps/chosen": -73.69783020019531,
"logps/rejected": -72.62937927246094,
"loss": 0.6926,
"pred_label": 2827.875,
"rewards/accuracies": 0.2874999940395355,
"rewards/chosen": -0.04889845848083496,
"rewards/margins": 0.04425561800599098,
"rewards/rejected": -0.09315408021211624,
"step": 560,
"use_label": 8494.125
},
{
"epoch": 0.61,
"grad_norm": 0.328125,
"learning_rate": 2.00499027745888e-06,
"logits/chosen": -2.224670171737671,
"logits/rejected": -2.230435371398926,
"logps/chosen": -76.27436065673828,
"logps/rejected": -87.6956787109375,
"loss": 0.6922,
"pred_label": 2926.862548828125,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.055185507982969284,
"rewards/margins": 0.05776001885533333,
"rewards/rejected": -0.11294553428888321,
"step": 580,
"use_label": 8715.1376953125
},
{
"epoch": 0.63,
"grad_norm": 0.5546875,
"learning_rate": 1.8272307888529276e-06,
"logits/chosen": -2.231316089630127,
"logits/rejected": -2.258852481842041,
"logps/chosen": -84.25640106201172,
"logps/rejected": -99.73040771484375,
"loss": 0.6929,
"pred_label": 3042.83740234375,
"rewards/accuracies": 0.40312498807907104,
"rewards/chosen": -0.07248945534229279,
"rewards/margins": 0.06686891615390778,
"rewards/rejected": -0.13935837149620056,
"step": 600,
"use_label": 8919.162109375
},
{
"epoch": 0.63,
"eval_logits/chosen": -2.1407980918884277,
"eval_logits/rejected": -2.1125032901763916,
"eval_logps/chosen": -73.41705322265625,
"eval_logps/rejected": -86.9944839477539,
"eval_loss": 0.6927017569541931,
"eval_pred_label": 3177.142822265625,
"eval_rewards/accuracies": 0.3511904776096344,
"eval_rewards/chosen": -0.04516514018177986,
"eval_rewards/margins": 0.07079902291297913,
"eval_rewards/rejected": -0.11596415936946869,
"eval_runtime": 248.1359,
"eval_samples_per_second": 8.06,
"eval_steps_per_second": 0.254,
"eval_use_label": 9198.857421875,
"step": 600
},
{
"epoch": 0.65,
"grad_norm": 0.404296875,
"learning_rate": 1.6530691736402317e-06,
"logits/chosen": -2.1386609077453613,
"logits/rejected": -2.1743404865264893,
"logps/chosen": -65.55394744873047,
"logps/rejected": -88.32081604003906,
"loss": 0.6924,
"pred_label": 3318.58740234375,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.05246468633413315,
"rewards/margins": 0.059620797634124756,
"rewards/rejected": -0.1120854839682579,
"step": 620,
"use_label": 9467.412109375
},
{
"epoch": 0.67,
"grad_norm": 0.51171875,
"learning_rate": 1.4834368231970922e-06,
"logits/chosen": -2.1956310272216797,
"logits/rejected": -2.2024998664855957,
"logps/chosen": -77.41986846923828,
"logps/rejected": -82.58815002441406,
"loss": 0.692,
"pred_label": 3414.199951171875,
"rewards/accuracies": 0.3656249940395355,
"rewards/chosen": -0.05061299726366997,
"rewards/margins": 0.07674984633922577,
"rewards/rejected": -0.12736284732818604,
"step": 640,
"use_label": 9691.7998046875
},
{
"epoch": 0.69,
"grad_norm": 0.2890625,
"learning_rate": 1.3192409070404582e-06,
"logits/chosen": -2.1827545166015625,
"logits/rejected": -2.1392319202423096,
"logps/chosen": -71.07948303222656,
"logps/rejected": -78.78751373291016,
"loss": 0.6924,
"pred_label": 3519.35009765625,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.0542152114212513,
"rewards/margins": 0.06142013147473335,
"rewards/rejected": -0.11563535034656525,
"step": 660,
"use_label": 9906.650390625
},
{
"epoch": 0.71,
"grad_norm": 0.435546875,
"learning_rate": 1.1613595214152713e-06,
"logits/chosen": -2.2185826301574707,
"logits/rejected": -2.2344555854797363,
"logps/chosen": -81.96281433105469,
"logps/rejected": -87.13890838623047,
"loss": 0.6923,
"pred_label": 3610.012451171875,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.06083091348409653,
"rewards/margins": 0.0633452981710434,
"rewards/rejected": -0.12417621910572052,
"step": 680,
"use_label": 10135.9873046875
},
{
"epoch": 0.73,
"grad_norm": 0.5078125,
"learning_rate": 1.0106369933615043e-06,
"logits/chosen": -2.2393274307250977,
"logits/rejected": -2.2085208892822266,
"logps/chosen": -90.31179809570312,
"logps/rejected": -96.00973510742188,
"loss": 0.6928,
"pred_label": 3716.97509765625,
"rewards/accuracies": 0.3843750059604645,
"rewards/chosen": -0.06285654008388519,
"rewards/margins": 0.07485760748386383,
"rewards/rejected": -0.13771414756774902,
"step": 700,
"use_label": 10349.025390625
},
{
"epoch": 0.73,
"eval_logits/chosen": -2.1372170448303223,
"eval_logits/rejected": -2.1086459159851074,
"eval_logps/chosen": -73.96572875976562,
"eval_logps/rejected": -87.70773315429688,
"eval_loss": 0.6929500102996826,
"eval_pred_label": 3852.730224609375,
"eval_rewards/accuracies": 0.3511904776096344,
"eval_rewards/chosen": -0.05065184459090233,
"eval_rewards/margins": 0.07244490087032318,
"eval_rewards/rejected": -0.12309674173593521,
"eval_runtime": 248.0038,
"eval_samples_per_second": 8.064,
"eval_steps_per_second": 0.254,
"eval_use_label": 10627.26953125,
"step": 700
},
{
"epoch": 0.75,
"grad_norm": 0.55078125,
"learning_rate": 8.678793653740633e-07,
"logits/chosen": -2.1876041889190674,
"logits/rejected": -2.1966712474823,
"logps/chosen": -64.94602966308594,
"logps/rejected": -77.46949005126953,
"loss": 0.6927,
"pred_label": 3992.16259765625,
"rewards/accuracies": 0.31562501192092896,
"rewards/chosen": -0.04975567013025284,
"rewards/margins": 0.06240048259496689,
"rewards/rejected": -0.11215615272521973,
"step": 720,
"use_label": 10897.837890625
},
{
"epoch": 0.77,
"grad_norm": 0.416015625,
"learning_rate": 7.338500848029603e-07,
"logits/chosen": -2.194794178009033,
"logits/rejected": -2.2083091735839844,
"logps/chosen": -69.16300201416016,
"logps/rejected": -74.87442779541016,
"loss": 0.6927,
"pred_label": 4088.0625,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.03673207014799118,
"rewards/margins": 0.07390830665826797,
"rewards/rejected": -0.11064038425683975,
"step": 740,
"use_label": 11121.9375
},
{
"epoch": 0.8,
"grad_norm": 0.47265625,
"learning_rate": 6.092659210462232e-07,
"logits/chosen": -2.2297511100769043,
"logits/rejected": -2.232818841934204,
"logps/chosen": -70.27059173583984,
"logps/rejected": -88.61542510986328,
"loss": 0.6927,
"pred_label": 4190.375,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.04521505907177925,
"rewards/margins": 0.05956338718533516,
"rewards/rejected": -0.10477845370769501,
"step": 760,
"use_label": 11339.625
},
{
"epoch": 0.82,
"grad_norm": 0.515625,
"learning_rate": 4.947931323697983e-07,
"logits/chosen": -2.224112033843994,
"logits/rejected": -2.241053581237793,
"logps/chosen": -82.8070068359375,
"logps/rejected": -85.62196350097656,
"loss": 0.6927,
"pred_label": 4299.97509765625,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.04633576422929764,
"rewards/margins": 0.0844966396689415,
"rewards/rejected": -0.13083240389823914,
"step": 780,
"use_label": 11550.025390625
},
{
"epoch": 0.84,
"grad_norm": 0.498046875,
"learning_rate": 3.910439028537638e-07,
"logits/chosen": -2.201280117034912,
"logits/rejected": -2.177452325820923,
"logps/chosen": -65.0578842163086,
"logps/rejected": -66.19197082519531,
"loss": 0.6927,
"pred_label": 4407.78759765625,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.026169428601861,
"rewards/margins": 0.06455135345458984,
"rewards/rejected": -0.0907207801938057,
"step": 800,
"use_label": 11762.212890625
},
{
"epoch": 0.84,
"eval_logits/chosen": -2.1430623531341553,
"eval_logits/rejected": -2.1141114234924316,
"eval_logps/chosen": -71.62469482421875,
"eval_logps/rejected": -85.3831787109375,
"eval_loss": 0.6928467750549316,
"eval_pred_label": 4538.47607421875,
"eval_rewards/accuracies": 0.3551587164402008,
"eval_rewards/chosen": -0.027241550385951996,
"eval_rewards/margins": 0.072609543800354,
"eval_rewards/rejected": -0.099851094186306,
"eval_runtime": 247.951,
"eval_samples_per_second": 8.066,
"eval_steps_per_second": 0.254,
"eval_use_label": 12045.5234375,
"step": 800
},
{
"epoch": 0.86,
"grad_norm": 0.48046875,
"learning_rate": 2.98573068519539e-07,
"logits/chosen": -2.228102684020996,
"logits/rejected": -2.2112691402435303,
"logps/chosen": -68.63658142089844,
"logps/rejected": -75.33064270019531,
"loss": 0.6923,
"pred_label": 4678.53759765625,
"rewards/accuracies": 0.3218750059604645,
"rewards/chosen": -0.03714119642972946,
"rewards/margins": 0.05530167371034622,
"rewards/rejected": -0.09244287014007568,
"step": 820,
"use_label": 12315.462890625
},
{
"epoch": 0.88,
"grad_norm": 0.56640625,
"learning_rate": 2.178751501463036e-07,
"logits/chosen": -2.204557418823242,
"logits/rejected": -2.2018847465515137,
"logps/chosen": -61.4800910949707,
"logps/rejected": -63.1760139465332,
"loss": 0.6929,
"pred_label": 4777.375,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.02809613011777401,
"rewards/margins": 0.05226613208651543,
"rewards/rejected": -0.08036227524280548,
"step": 840,
"use_label": 12536.625
},
{
"epoch": 0.9,
"grad_norm": 0.8515625,
"learning_rate": 1.4938170864468636e-07,
"logits/chosen": -2.252244234085083,
"logits/rejected": -2.242299795150757,
"logps/chosen": -84.9459228515625,
"logps/rejected": -90.69441223144531,
"loss": 0.6922,
"pred_label": 4874.3251953125,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.022363774478435516,
"rewards/margins": 0.09057153016328812,
"rewards/rejected": -0.11293530464172363,
"step": 860,
"use_label": 12759.6748046875
},
{
"epoch": 0.92,
"grad_norm": 0.4296875,
"learning_rate": 9.345903713082305e-08,
"logits/chosen": -2.2364704608917236,
"logits/rejected": -2.224773406982422,
"logps/chosen": -75.7426528930664,
"logps/rejected": -91.20499420166016,
"loss": 0.6925,
"pred_label": 4988.4873046875,
"rewards/accuracies": 0.3843750059604645,
"rewards/chosen": -0.03711060434579849,
"rewards/margins": 0.08993253856897354,
"rewards/rejected": -0.12704312801361084,
"step": 880,
"use_label": 12965.5126953125
},
{
"epoch": 0.94,
"grad_norm": 0.6015625,
"learning_rate": 5.0406202043228604e-08,
"logits/chosen": -2.121796131134033,
"logits/rejected": -2.15610671043396,
"logps/chosen": -69.87088775634766,
"logps/rejected": -90.85367584228516,
"loss": 0.6929,
"pred_label": 5089.85009765625,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.032511431723833084,
"rewards/margins": 0.06634987145662308,
"rewards/rejected": -0.09886129945516586,
"step": 900,
"use_label": 13184.150390625
},
{
"epoch": 0.94,
"eval_logits/chosen": -2.1427581310272217,
"eval_logits/rejected": -2.113929510116577,
"eval_logps/chosen": -71.7841567993164,
"eval_logps/rejected": -85.5160140991211,
"eval_loss": 0.6928035020828247,
"eval_pred_label": 5226.619140625,
"eval_rewards/accuracies": 0.3492063581943512,
"eval_rewards/chosen": -0.02883605659008026,
"eval_rewards/margins": 0.0723433569073677,
"eval_rewards/rejected": -0.10117942094802856,
"eval_runtime": 246.4796,
"eval_samples_per_second": 8.114,
"eval_steps_per_second": 0.256,
"eval_use_label": 13461.380859375,
"step": 900
},
{
"epoch": 0.96,
"grad_norm": 0.52734375,
"learning_rate": 2.0453443778310766e-08,
"logits/chosen": -2.1679275035858154,
"logits/rejected": -2.1737468242645264,
"logps/chosen": -59.419395446777344,
"logps/rejected": -76.71382141113281,
"loss": 0.6925,
"pred_label": 5365.3876953125,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.021122563630342484,
"rewards/margins": 0.0707126036286354,
"rewards/rejected": -0.09183517098426819,
"step": 920,
"use_label": 13732.6123046875
},
{
"epoch": 0.98,
"grad_norm": 0.64453125,
"learning_rate": 3.760945397705828e-09,
"logits/chosen": -2.1522116661071777,
"logits/rejected": -2.1893556118011475,
"logps/chosen": -68.75323486328125,
"logps/rejected": -82.70423889160156,
"loss": 0.6926,
"pred_label": 5459.0751953125,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.02701050415635109,
"rewards/margins": 0.06467042118310928,
"rewards/rejected": -0.09168092906475067,
"step": 940,
"use_label": 13958.9248046875
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.692275420283772,
"train_runtime": 20019.5915,
"train_samples_per_second": 3.054,
"train_steps_per_second": 0.048
}
],
"logging_steps": 20,
"max_steps": 955,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}