zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
a6dadf5 verified
raw
history blame
No virus
22.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 391,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 210.34713052784278,
"learning_rate": 2.5e-09,
"logits/chosen": -4.623842239379883,
"logits/rejected": -4.85917854309082,
"logps/chosen": -239.31422424316406,
"logps/rejected": -207.56365966796875,
"loss": 0.6927,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 198.95172630432864,
"learning_rate": 2.5e-08,
"logits/chosen": -4.3338446617126465,
"logits/rejected": -4.64424991607666,
"logps/chosen": -265.20184326171875,
"logps/rejected": -215.72174072265625,
"loss": 0.6928,
"rewards/accuracies": 0.4236111044883728,
"rewards/chosen": -0.004745930898934603,
"rewards/margins": -0.004067909903824329,
"rewards/rejected": -0.0006780209369026124,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 204.7891876677461,
"learning_rate": 5e-08,
"logits/chosen": -4.509727478027344,
"logits/rejected": -4.74410343170166,
"logps/chosen": -267.73052978515625,
"logps/rejected": -216.7478485107422,
"loss": 0.6872,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": 0.010470375418663025,
"rewards/margins": 0.01739482954144478,
"rewards/rejected": -0.006924452725797892,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 204.94575488992174,
"learning_rate": 7.5e-08,
"logits/chosen": -4.5970940589904785,
"logits/rejected": -4.777865409851074,
"logps/chosen": -257.5598449707031,
"logps/rejected": -215.4015350341797,
"loss": 0.6544,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": 0.04864828661084175,
"rewards/margins": 0.09208732843399048,
"rewards/rejected": -0.04343904182314873,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 163.67699084811588,
"learning_rate": 1e-07,
"logits/chosen": -4.643096923828125,
"logits/rejected": -4.7387237548828125,
"logps/chosen": -249.96743774414062,
"logps/rejected": -223.3234405517578,
"loss": 0.5584,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.18158790469169617,
"rewards/margins": 0.36420467495918274,
"rewards/rejected": -0.18261677026748657,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 125.1152304775479,
"learning_rate": 9.979985922607475e-08,
"logits/chosen": -4.558148384094238,
"logits/rejected": -4.785082817077637,
"logps/chosen": -265.6357727050781,
"logps/rejected": -234.0360107421875,
"loss": 0.45,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 0.3166799247264862,
"rewards/margins": 0.7249041795730591,
"rewards/rejected": -0.4082241952419281,
"step": 50
},
{
"epoch": 0.15,
"grad_norm": 110.5697848266263,
"learning_rate": 9.92010391574745e-08,
"logits/chosen": -4.701218605041504,
"logits/rejected": -4.855440139770508,
"logps/chosen": -232.1560821533203,
"logps/rejected": -235.8180389404297,
"loss": 0.3379,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.44831886887550354,
"rewards/margins": 1.4881489276885986,
"rewards/rejected": -1.039829969406128,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 103.09926490168155,
"learning_rate": 9.820833372667812e-08,
"logits/chosen": -4.597586631774902,
"logits/rejected": -4.846543312072754,
"logps/chosen": -243.5035858154297,
"logps/rejected": -245.3424072265625,
"loss": 0.3085,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 0.4819186329841614,
"rewards/margins": 1.8609161376953125,
"rewards/rejected": -1.378997564315796,
"step": 70
},
{
"epoch": 0.2,
"grad_norm": 89.87848352821936,
"learning_rate": 9.682969016701356e-08,
"logits/chosen": -4.592278957366943,
"logits/rejected": -4.840281963348389,
"logps/chosen": -249.3519744873047,
"logps/rejected": -261.445068359375,
"loss": 0.2624,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 0.593399703502655,
"rewards/margins": 2.1497161388397217,
"rewards/rejected": -1.5563163757324219,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 98.45898295424381,
"learning_rate": 9.507614539004081e-08,
"logits/chosen": -4.667254447937012,
"logits/rejected": -4.913816928863525,
"logps/chosen": -235.763427734375,
"logps/rejected": -244.2578582763672,
"loss": 0.2462,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 0.7724655866622925,
"rewards/margins": 2.8438591957092285,
"rewards/rejected": -2.0713934898376465,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 87.96881533227138,
"learning_rate": 9.296173762811083e-08,
"logits/chosen": -4.5116472244262695,
"logits/rejected": -4.829812049865723,
"logps/chosen": -238.08468627929688,
"logps/rejected": -269.5484619140625,
"loss": 0.2472,
"rewards/accuracies": 0.875,
"rewards/chosen": 0.931675910949707,
"rewards/margins": 3.0536458492279053,
"rewards/rejected": -2.1219699382781982,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -4.58513879776001,
"eval_logits/rejected": -4.80186128616333,
"eval_logps/chosen": -394.6981201171875,
"eval_logps/rejected": -515.9166259765625,
"eval_loss": 0.9610964059829712,
"eval_rewards/accuracies": 0.390625,
"eval_rewards/chosen": -0.21118265390396118,
"eval_rewards/margins": -0.3347358703613281,
"eval_rewards/rejected": 0.12355318665504456,
"eval_runtime": 97.8315,
"eval_samples_per_second": 20.443,
"eval_steps_per_second": 0.327,
"step": 100
},
{
"epoch": 0.28,
"grad_norm": 84.98735748868098,
"learning_rate": 9.050339404945832e-08,
"logits/chosen": -4.55401611328125,
"logits/rejected": -4.845933437347412,
"logps/chosen": -229.4434356689453,
"logps/rejected": -257.52984619140625,
"loss": 0.2226,
"rewards/accuracies": 0.90625,
"rewards/chosen": 0.9871166348457336,
"rewards/margins": 3.0293149948120117,
"rewards/rejected": -2.042198419570923,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 75.78122724506682,
"learning_rate": 8.77207952455395e-08,
"logits/chosen": -4.49249792098999,
"logits/rejected": -4.787415981292725,
"logps/chosen": -252.7578125,
"logps/rejected": -273.38555908203125,
"loss": 0.2215,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 1.1615877151489258,
"rewards/margins": 3.400435209274292,
"rewards/rejected": -2.238847255706787,
"step": 120
},
{
"epoch": 0.33,
"grad_norm": 109.1136183108071,
"learning_rate": 8.463621767547997e-08,
"logits/chosen": -4.589264869689941,
"logits/rejected": -4.87318229675293,
"logps/chosen": -239.29531860351562,
"logps/rejected": -265.04693603515625,
"loss": 0.2169,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": 0.9574294090270996,
"rewards/margins": 3.4433422088623047,
"rewards/rejected": -2.485912799835205,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 79.72525878658313,
"learning_rate": 8.127435532896387e-08,
"logits/chosen": -4.636221885681152,
"logits/rejected": -4.9098310470581055,
"logps/chosen": -267.59625244140625,
"logps/rejected": -288.02349853515625,
"loss": 0.2063,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 0.8299416303634644,
"rewards/margins": 3.622443675994873,
"rewards/rejected": -2.792501926422119,
"step": 140
},
{
"epoch": 0.38,
"grad_norm": 94.45112212404622,
"learning_rate": 7.766212203526569e-08,
"logits/chosen": -4.643942832946777,
"logits/rejected": -4.911728382110596,
"logps/chosen": -233.4263153076172,
"logps/rejected": -277.07818603515625,
"loss": 0.2098,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 0.9495984315872192,
"rewards/margins": 3.8475449085235596,
"rewards/rejected": -2.89794659614563,
"step": 150
},
{
"epoch": 0.41,
"grad_norm": 85.35291313866578,
"learning_rate": 7.382843600106538e-08,
"logits/chosen": -4.690377235412598,
"logits/rejected": -4.9024457931518555,
"logps/chosen": -233.21981811523438,
"logps/rejected": -271.2682189941406,
"loss": 0.1861,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.9118326306343079,
"rewards/margins": 3.6947906017303467,
"rewards/rejected": -2.7829582691192627,
"step": 160
},
{
"epoch": 0.43,
"grad_norm": 75.89103255157417,
"learning_rate": 6.980398830195784e-08,
"logits/chosen": -4.554282188415527,
"logits/rejected": -4.874223232269287,
"logps/chosen": -236.4412078857422,
"logps/rejected": -279.4911804199219,
"loss": 0.1833,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.2316501140594482,
"rewards/margins": 4.21605920791626,
"rewards/rejected": -2.9844090938568115,
"step": 170
},
{
"epoch": 0.46,
"grad_norm": 77.81291773020575,
"learning_rate": 6.562099718102787e-08,
"logits/chosen": -4.651320934295654,
"logits/rejected": -4.9173784255981445,
"logps/chosen": -215.70126342773438,
"logps/rejected": -251.5159149169922,
"loss": 0.2065,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 1.0581190586090088,
"rewards/margins": 3.832904815673828,
"rewards/rejected": -2.7747855186462402,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 86.461876717381,
"learning_rate": 6.131295012148612e-08,
"logits/chosen": -4.617634296417236,
"logits/rejected": -4.793360233306885,
"logps/chosen": -239.32681274414062,
"logps/rejected": -286.96124267578125,
"loss": 0.2013,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 1.0783030986785889,
"rewards/margins": 3.6680614948272705,
"rewards/rejected": -2.5897579193115234,
"step": 190
},
{
"epoch": 0.51,
"grad_norm": 84.24320751887706,
"learning_rate": 5.691433575823665e-08,
"logits/chosen": -4.624228477478027,
"logits/rejected": -4.830000877380371,
"logps/chosen": -233.09713745117188,
"logps/rejected": -271.84051513671875,
"loss": 0.2112,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.9521042108535767,
"rewards/margins": 3.7768027782440186,
"rewards/rejected": -2.8246986865997314,
"step": 200
},
{
"epoch": 0.51,
"eval_logits/chosen": -4.606511116027832,
"eval_logits/rejected": -4.8388166427612305,
"eval_logps/chosen": -405.0722351074219,
"eval_logps/rejected": -524.7885131835938,
"eval_loss": 1.102483868598938,
"eval_rewards/accuracies": 0.375,
"eval_rewards/chosen": -0.729888916015625,
"eval_rewards/margins": -0.40984660387039185,
"eval_rewards/rejected": -0.32004231214523315,
"eval_runtime": 97.8012,
"eval_samples_per_second": 20.45,
"eval_steps_per_second": 0.327,
"step": 200
},
{
"epoch": 0.54,
"grad_norm": 70.97621814359026,
"learning_rate": 5.2460367774593905e-08,
"logits/chosen": -4.6944451332092285,
"logits/rejected": -4.962179183959961,
"logps/chosen": -243.93307495117188,
"logps/rejected": -297.62066650390625,
"loss": 0.1723,
"rewards/accuracies": 0.9375,
"rewards/chosen": 1.0976004600524902,
"rewards/margins": 4.546332836151123,
"rewards/rejected": -3.448732376098633,
"step": 210
},
{
"epoch": 0.56,
"grad_norm": 71.88477654183092,
"learning_rate": 4.798670299452925e-08,
"logits/chosen": -4.529160499572754,
"logits/rejected": -4.8643479347229,
"logps/chosen": -241.5579833984375,
"logps/rejected": -293.224365234375,
"loss": 0.1923,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 1.0626676082611084,
"rewards/margins": 4.490227699279785,
"rewards/rejected": -3.4275600910186768,
"step": 220
},
{
"epoch": 0.59,
"grad_norm": 83.09100453064212,
"learning_rate": 4.3529155927297226e-08,
"logits/chosen": -4.6047258377075195,
"logits/rejected": -4.93651008605957,
"logps/chosen": -241.11477661132812,
"logps/rejected": -293.9808044433594,
"loss": 0.2012,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.9927155375480652,
"rewards/margins": 4.5062031745910645,
"rewards/rejected": -3.5134873390197754,
"step": 230
},
{
"epoch": 0.61,
"grad_norm": 70.11336436391163,
"learning_rate": 3.9123412049691636e-08,
"logits/chosen": -4.588685035705566,
"logits/rejected": -4.866146087646484,
"logps/chosen": -252.31533813476562,
"logps/rejected": -294.6343688964844,
"loss": 0.1875,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.0253931283950806,
"rewards/margins": 4.710432529449463,
"rewards/rejected": -3.6850390434265137,
"step": 240
},
{
"epoch": 0.64,
"grad_norm": 77.75874575792918,
"learning_rate": 3.480474212128766e-08,
"logits/chosen": -4.716187000274658,
"logits/rejected": -4.966707229614258,
"logps/chosen": -231.89279174804688,
"logps/rejected": -266.51666259765625,
"loss": 0.1825,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.840434193611145,
"rewards/margins": 3.7858078479766846,
"rewards/rejected": -2.94537353515625,
"step": 250
},
{
"epoch": 0.66,
"grad_norm": 97.12524424809816,
"learning_rate": 3.060771981975726e-08,
"logits/chosen": -4.585513114929199,
"logits/rejected": -4.878482341766357,
"logps/chosen": -234.92617797851562,
"logps/rejected": -297.1214904785156,
"loss": 0.1837,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.9561206102371216,
"rewards/margins": 4.824769973754883,
"rewards/rejected": -3.86864972114563,
"step": 260
},
{
"epoch": 0.69,
"grad_norm": 92.49874438996748,
"learning_rate": 2.6565944956764818e-08,
"logits/chosen": -4.684746742248535,
"logits/rejected": -4.911890983581543,
"logps/chosen": -243.29568481445312,
"logps/rejected": -288.39111328125,
"loss": 0.1961,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.8868792653083801,
"rewards/margins": 4.555182456970215,
"rewards/rejected": -3.6683037281036377,
"step": 270
},
{
"epoch": 0.72,
"grad_norm": 73.7028241699641,
"learning_rate": 2.2711774490274766e-08,
"logits/chosen": -4.634344577789307,
"logits/rejected": -4.873081207275391,
"logps/chosen": -245.1703338623047,
"logps/rejected": -317.2539978027344,
"loss": 0.1644,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.9668266177177429,
"rewards/margins": 4.682557582855225,
"rewards/rejected": -3.715731143951416,
"step": 280
},
{
"epoch": 0.74,
"grad_norm": 142.25337407808868,
"learning_rate": 1.9076063486687256e-08,
"logits/chosen": -4.503401756286621,
"logits/rejected": -4.866554260253906,
"logps/chosen": -250.9346160888672,
"logps/rejected": -283.41046142578125,
"loss": 0.1799,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 1.083687424659729,
"rewards/margins": 4.472739219665527,
"rewards/rejected": -3.389052152633667,
"step": 290
},
{
"epoch": 0.77,
"grad_norm": 88.66793876665662,
"learning_rate": 1.5687918106563324e-08,
"logits/chosen": -4.625166416168213,
"logits/rejected": -4.831929683685303,
"logps/chosen": -232.6981658935547,
"logps/rejected": -288.00457763671875,
"loss": 0.195,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 1.0354994535446167,
"rewards/margins": 4.637454509735107,
"rewards/rejected": -3.6019554138183594,
"step": 300
},
{
"epoch": 0.77,
"eval_logits/chosen": -4.624210357666016,
"eval_logits/rejected": -4.856749057769775,
"eval_logps/chosen": -411.3396911621094,
"eval_logps/rejected": -531.6535034179688,
"eval_loss": 1.1301820278167725,
"eval_rewards/accuracies": 0.41015625,
"eval_rewards/chosen": -1.0432608127593994,
"eval_rewards/margins": -0.3799673318862915,
"eval_rewards/rejected": -0.6632934212684631,
"eval_runtime": 97.9609,
"eval_samples_per_second": 20.416,
"eval_steps_per_second": 0.327,
"step": 300
},
{
"epoch": 0.79,
"grad_norm": 90.90394303193246,
"learning_rate": 1.257446259144494e-08,
"logits/chosen": -4.541079044342041,
"logits/rejected": -4.873132228851318,
"logps/chosen": -239.60592651367188,
"logps/rejected": -298.159423828125,
"loss": 0.1847,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 1.076683759689331,
"rewards/margins": 4.874758243560791,
"rewards/rejected": -3.7980740070343018,
"step": 310
},
{
"epoch": 0.82,
"grad_norm": 87.85310576006609,
"learning_rate": 9.760622117187234e-09,
"logits/chosen": -4.597599029541016,
"logits/rejected": -4.9500837326049805,
"logps/chosen": -227.94247436523438,
"logps/rejected": -279.3809814453125,
"loss": 0.1934,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 0.8610901832580566,
"rewards/margins": 4.507565498352051,
"rewards/rejected": -3.6464743614196777,
"step": 320
},
{
"epoch": 0.84,
"grad_norm": 85.81889719468313,
"learning_rate": 7.2689232521989885e-09,
"logits/chosen": -4.554391860961914,
"logits/rejected": -4.864416599273682,
"logps/chosen": -249.89169311523438,
"logps/rejected": -304.54913330078125,
"loss": 0.1773,
"rewards/accuracies": 0.90625,
"rewards/chosen": 0.9128581881523132,
"rewards/margins": 4.5053324699401855,
"rewards/rejected": -3.5924744606018066,
"step": 330
},
{
"epoch": 0.87,
"grad_norm": 77.2990699180903,
"learning_rate": 5.119313618049309e-09,
"logits/chosen": -4.570425987243652,
"logits/rejected": -4.913475513458252,
"logps/chosen": -250.6792449951172,
"logps/rejected": -277.26556396484375,
"loss": 0.1723,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.1733391284942627,
"rewards/margins": 4.697513580322266,
"rewards/rejected": -3.524174451828003,
"step": 340
},
{
"epoch": 0.9,
"grad_norm": 74.61892537865367,
"learning_rate": 3.3290021961708158e-09,
"logits/chosen": -4.588479995727539,
"logits/rejected": -4.761317253112793,
"logps/chosen": -238.91921997070312,
"logps/rejected": -291.2458190917969,
"loss": 0.1937,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.8275976181030273,
"rewards/margins": 4.001389026641846,
"rewards/rejected": -3.1737911701202393,
"step": 350
},
{
"epoch": 0.92,
"grad_norm": 81.22407668854541,
"learning_rate": 1.9123215591052013e-09,
"logits/chosen": -4.583038806915283,
"logits/rejected": -4.805889129638672,
"logps/chosen": -244.8368682861328,
"logps/rejected": -294.9869079589844,
"loss": 0.1907,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 0.8399018049240112,
"rewards/margins": 4.216121673583984,
"rewards/rejected": -3.3762192726135254,
"step": 360
},
{
"epoch": 0.95,
"grad_norm": 75.17805842008224,
"learning_rate": 8.806131292167618e-10,
"logits/chosen": -4.595518112182617,
"logits/rejected": -4.752079010009766,
"logps/chosen": -239.1554412841797,
"logps/rejected": -302.4869079589844,
"loss": 0.1904,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 0.8832891583442688,
"rewards/margins": 4.165283679962158,
"rewards/rejected": -3.281994581222534,
"step": 370
},
{
"epoch": 0.97,
"grad_norm": 91.00267878372446,
"learning_rate": 2.4213638345040867e-10,
"logits/chosen": -4.70483922958374,
"logits/rejected": -4.97845983505249,
"logps/chosen": -242.5469207763672,
"logps/rejected": -292.7474670410156,
"loss": 0.1788,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.9767888188362122,
"rewards/margins": 4.587931156158447,
"rewards/rejected": -3.61114239692688,
"step": 380
},
{
"epoch": 1.0,
"grad_norm": 100.5241948062632,
"learning_rate": 2.0027310073833516e-12,
"logits/chosen": -4.696263313293457,
"logits/rejected": -4.96966028213501,
"logps/chosen": -238.3385772705078,
"logps/rejected": -292.5868835449219,
"loss": 0.1773,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.066699743270874,
"rewards/margins": 4.670289516448975,
"rewards/rejected": -3.6035892963409424,
"step": 390
},
{
"epoch": 1.0,
"step": 391,
"total_flos": 0.0,
"train_loss": 0.256967593336959,
"train_runtime": 6146.1986,
"train_samples_per_second": 8.135,
"train_steps_per_second": 0.064
}
],
"logging_steps": 10,
"max_steps": 391,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}