zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
00dc39f verified
raw
history blame
No virus
20.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 350,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 3067.8628948133914,
"learning_rate": 4.9998992904271775e-08,
"logits/chosen": -4.185730934143066,
"logits/rejected": -4.509836196899414,
"logps/chosen": -274.000732421875,
"logps/rejected": -205.8054962158203,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 3330.3974170986107,
"learning_rate": 4.9899357349880975e-08,
"logits/chosen": -4.211880207061768,
"logits/rejected": -4.48573637008667,
"logps/chosen": -318.31072998046875,
"logps/rejected": -257.18267822265625,
"loss": 0.7459,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.200405091047287,
"rewards/margins": 0.10155472159385681,
"rewards/rejected": 0.09885036945343018,
"step": 10
},
{
"epoch": 0.06,
"grad_norm": 2932.727170813642,
"learning_rate": 4.959823971496574e-08,
"logits/chosen": -4.2464704513549805,
"logits/rejected": -4.50115966796875,
"logps/chosen": -304.53350830078125,
"logps/rejected": -244.1282501220703,
"loss": 0.6293,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": 0.7030802965164185,
"rewards/margins": 0.6052380800247192,
"rewards/rejected": 0.09784229844808578,
"step": 20
},
{
"epoch": 0.09,
"grad_norm": 2159.097276891197,
"learning_rate": 4.9099071517396326e-08,
"logits/chosen": -4.3018364906311035,
"logits/rejected": -4.5636820793151855,
"logps/chosen": -305.11822509765625,
"logps/rejected": -258.89215087890625,
"loss": 0.5093,
"rewards/accuracies": 0.71875,
"rewards/chosen": 1.3964869976043701,
"rewards/margins": 0.9537334442138672,
"rewards/rejected": 0.44275355339050293,
"step": 30
},
{
"epoch": 0.11,
"grad_norm": 2233.10446662558,
"learning_rate": 4.8405871765993426e-08,
"logits/chosen": -4.304145812988281,
"logits/rejected": -4.571420192718506,
"logps/chosen": -293.4151916503906,
"logps/rejected": -234.4054412841797,
"loss": 0.4371,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": 2.119215488433838,
"rewards/margins": 1.3193193674087524,
"rewards/rejected": 0.7998961806297302,
"step": 40
},
{
"epoch": 0.14,
"grad_norm": 1863.9092640792912,
"learning_rate": 4.7524221697560474e-08,
"logits/chosen": -4.298985481262207,
"logits/rejected": -4.545313835144043,
"logps/chosen": -299.71026611328125,
"logps/rejected": -252.57339477539062,
"loss": 0.4054,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": 3.015381336212158,
"rewards/margins": 1.8283360004425049,
"rewards/rejected": 1.1870452165603638,
"step": 50
},
{
"epoch": 0.17,
"grad_norm": 1861.0742759245438,
"learning_rate": 4.646121984004665e-08,
"logits/chosen": -4.3018717765808105,
"logits/rejected": -4.5299859046936035,
"logps/chosen": -308.25457763671875,
"logps/rejected": -261.1996154785156,
"loss": 0.3815,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": 3.097055673599243,
"rewards/margins": 1.6846046447753906,
"rewards/rejected": 1.412451148033142,
"step": 60
},
{
"epoch": 0.2,
"grad_norm": 2083.1341477087894,
"learning_rate": 4.522542485937369e-08,
"logits/chosen": -4.417206764221191,
"logits/rejected": -4.548245429992676,
"logps/chosen": -285.4747009277344,
"logps/rejected": -236.24136352539062,
"loss": 0.3773,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": 3.4294419288635254,
"rewards/margins": 2.4485509395599365,
"rewards/rejected": 0.9808910489082336,
"step": 70
},
{
"epoch": 0.23,
"grad_norm": 1999.1118673285923,
"learning_rate": 4.3826786650090273e-08,
"logits/chosen": -4.271725177764893,
"logits/rejected": -4.525103569030762,
"logps/chosen": -292.2157897949219,
"logps/rejected": -239.5623321533203,
"loss": 0.3663,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": 3.471898317337036,
"rewards/margins": 2.5827386379241943,
"rewards/rejected": 0.8891592025756836,
"step": 80
},
{
"epoch": 0.26,
"grad_norm": 1543.0151245523064,
"learning_rate": 4.2276566224671614e-08,
"logits/chosen": -4.196888446807861,
"logits/rejected": -4.430451393127441,
"logps/chosen": -303.9364929199219,
"logps/rejected": -258.19708251953125,
"loss": 0.37,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 3.6267776489257812,
"rewards/margins": 2.5005435943603516,
"rewards/rejected": 1.1262344121932983,
"step": 90
},
{
"epoch": 0.29,
"grad_norm": 2558.2358091969077,
"learning_rate": 4.058724504646834e-08,
"logits/chosen": -4.298203468322754,
"logits/rejected": -4.51765251159668,
"logps/chosen": -291.99151611328125,
"logps/rejected": -240.97909545898438,
"loss": 0.3573,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 3.8364264965057373,
"rewards/margins": 2.6143250465393066,
"rewards/rejected": 1.2221016883850098,
"step": 100
},
{
"epoch": 0.29,
"eval_logits/chosen": -3.2259409427642822,
"eval_logits/rejected": -3.2259409427642822,
"eval_logps/chosen": -157.8415985107422,
"eval_logps/rejected": -157.8415985107422,
"eval_loss": 0.6931471824645996,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": -2.2645912170410156,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": -2.2645912170410156,
"eval_runtime": 1.5044,
"eval_samples_per_second": 0.665,
"eval_steps_per_second": 0.665,
"step": 100
},
{
"epoch": 0.31,
"grad_norm": 2075.8470964199623,
"learning_rate": 3.8772424536302564e-08,
"logits/chosen": -4.3160247802734375,
"logits/rejected": -4.557186126708984,
"logps/chosen": -299.556640625,
"logps/rejected": -250.2120361328125,
"loss": 0.3653,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 3.8744053840637207,
"rewards/margins": 2.781764268875122,
"rewards/rejected": 1.0926413536071777,
"step": 110
},
{
"epoch": 0.34,
"grad_norm": 2129.2578794603846,
"learning_rate": 3.6846716561824964e-08,
"logits/chosen": -4.358242988586426,
"logits/rejected": -4.6036834716796875,
"logps/chosen": -288.9602966308594,
"logps/rejected": -237.98257446289062,
"loss": 0.346,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 3.973881959915161,
"rewards/margins": 2.8389506340026855,
"rewards/rejected": 1.1349313259124756,
"step": 120
},
{
"epoch": 0.37,
"grad_norm": 1374.3088736284383,
"learning_rate": 3.482562579134809e-08,
"logits/chosen": -4.360684871673584,
"logits/rejected": -4.608490467071533,
"logps/chosen": -278.861572265625,
"logps/rejected": -218.7367706298828,
"loss": 0.3426,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 3.8384926319122314,
"rewards/margins": 2.634833812713623,
"rewards/rejected": 1.2036586999893188,
"step": 130
},
{
"epoch": 0.4,
"grad_norm": 1741.7465783603645,
"learning_rate": 3.272542485937369e-08,
"logits/chosen": -4.276978969573975,
"logits/rejected": -4.593733787536621,
"logps/chosen": -296.0984191894531,
"logps/rejected": -240.01248168945312,
"loss": 0.3729,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": 3.785256862640381,
"rewards/margins": 2.9941701889038086,
"rewards/rejected": 0.79108726978302,
"step": 140
},
{
"epoch": 0.43,
"grad_norm": 1837.7137132104272,
"learning_rate": 3.056302334890786e-08,
"logits/chosen": -4.245262622833252,
"logits/rejected": -4.510401725769043,
"logps/chosen": -295.3984680175781,
"logps/rejected": -250.73580932617188,
"loss": 0.3235,
"rewards/accuracies": 0.84375,
"rewards/chosen": 4.011710166931152,
"rewards/margins": 3.0462794303894043,
"rewards/rejected": 0.9654304385185242,
"step": 150
},
{
"epoch": 0.46,
"grad_norm": 1744.335126050233,
"learning_rate": 2.8355831645441387e-08,
"logits/chosen": -4.277425765991211,
"logits/rejected": -4.570274829864502,
"logps/chosen": -296.66839599609375,
"logps/rejected": -235.6475372314453,
"loss": 0.36,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 4.259499549865723,
"rewards/margins": 3.127065658569336,
"rewards/rejected": 1.1324341297149658,
"step": 160
},
{
"epoch": 0.49,
"grad_norm": 1875.319827037545,
"learning_rate": 2.6121620758762875e-08,
"logits/chosen": -4.229983329772949,
"logits/rejected": -4.467092990875244,
"logps/chosen": -296.31683349609375,
"logps/rejected": -241.3401336669922,
"loss": 0.3474,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 4.343829154968262,
"rewards/margins": 3.233609437942505,
"rewards/rejected": 1.1102204322814941,
"step": 170
},
{
"epoch": 0.51,
"grad_norm": 2082.5003671787076,
"learning_rate": 2.3878379241237133e-08,
"logits/chosen": -4.364750862121582,
"logits/rejected": -4.597868919372559,
"logps/chosen": -285.72869873046875,
"logps/rejected": -241.40652465820312,
"loss": 0.3417,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 4.1484293937683105,
"rewards/margins": 3.0738511085510254,
"rewards/rejected": 1.074578046798706,
"step": 180
},
{
"epoch": 0.54,
"grad_norm": 1597.9774938638957,
"learning_rate": 2.164416835455862e-08,
"logits/chosen": -4.3281121253967285,
"logits/rejected": -4.498069763183594,
"logps/chosen": -308.14776611328125,
"logps/rejected": -257.7415466308594,
"loss": 0.2852,
"rewards/accuracies": 0.875,
"rewards/chosen": 4.146700859069824,
"rewards/margins": 3.202249526977539,
"rewards/rejected": 0.9444509744644165,
"step": 190
},
{
"epoch": 0.57,
"grad_norm": 1601.8580723204816,
"learning_rate": 1.943697665109214e-08,
"logits/chosen": -4.358348846435547,
"logits/rejected": -4.601215839385986,
"logps/chosen": -292.93658447265625,
"logps/rejected": -249.59469604492188,
"loss": 0.3184,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 4.194998741149902,
"rewards/margins": 2.974621534347534,
"rewards/rejected": 1.2203772068023682,
"step": 200
},
{
"epoch": 0.57,
"eval_logits/chosen": -3.2195205688476562,
"eval_logits/rejected": -3.2195205688476562,
"eval_logps/chosen": -157.37933349609375,
"eval_logps/rejected": -157.37933349609375,
"eval_loss": 0.6931471824645996,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": -1.8023262023925781,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": -1.8023262023925781,
"eval_runtime": 1.4741,
"eval_samples_per_second": 0.678,
"eval_steps_per_second": 0.678,
"step": 200
},
{
"epoch": 0.6,
"grad_norm": 1818.1510653253358,
"learning_rate": 1.7274575140626317e-08,
"logits/chosen": -4.293700218200684,
"logits/rejected": -4.587708473205566,
"logps/chosen": -306.94647216796875,
"logps/rejected": -254.83981323242188,
"loss": 0.3169,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 4.274092674255371,
"rewards/margins": 3.556690216064453,
"rewards/rejected": 0.7174022793769836,
"step": 210
},
{
"epoch": 0.63,
"grad_norm": 2084.9707047014217,
"learning_rate": 1.517437420865191e-08,
"logits/chosen": -4.2438554763793945,
"logits/rejected": -4.590119361877441,
"logps/chosen": -297.3277587890625,
"logps/rejected": -225.09414672851562,
"loss": 0.3117,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 4.186089515686035,
"rewards/margins": 3.6873459815979004,
"rewards/rejected": 0.4987434446811676,
"step": 220
},
{
"epoch": 0.66,
"grad_norm": 1793.5243127965375,
"learning_rate": 1.3153283438175034e-08,
"logits/chosen": -4.3719801902771,
"logits/rejected": -4.563234806060791,
"logps/chosen": -281.373779296875,
"logps/rejected": -226.25576782226562,
"loss": 0.2879,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": 3.584429979324341,
"rewards/margins": 2.8807406425476074,
"rewards/rejected": 0.7036892771720886,
"step": 230
},
{
"epoch": 0.69,
"grad_norm": 1621.528952660571,
"learning_rate": 1.1227575463697438e-08,
"logits/chosen": -4.3936567306518555,
"logits/rejected": -4.714280128479004,
"logps/chosen": -258.6517639160156,
"logps/rejected": -215.28759765625,
"loss": 0.3042,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": 3.862626552581787,
"rewards/margins": 3.0625431537628174,
"rewards/rejected": 0.8000835180282593,
"step": 240
},
{
"epoch": 0.71,
"grad_norm": 2231.5682374793205,
"learning_rate": 9.412754953531663e-09,
"logits/chosen": -4.34213924407959,
"logits/rejected": -4.6162428855896,
"logps/chosen": -278.9085388183594,
"logps/rejected": -232.6056365966797,
"loss": 0.3109,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": 3.6670002937316895,
"rewards/margins": 2.8657121658325195,
"rewards/rejected": 0.8012881278991699,
"step": 250
},
{
"epoch": 0.74,
"grad_norm": 1668.5476234310504,
"learning_rate": 7.723433775328384e-09,
"logits/chosen": -4.386145114898682,
"logits/rejected": -4.632050037384033,
"logps/chosen": -271.8704833984375,
"logps/rejected": -240.48257446289062,
"loss": 0.3039,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 3.5878806114196777,
"rewards/margins": 2.9320101737976074,
"rewards/rejected": 0.6558703184127808,
"step": 260
},
{
"epoch": 0.77,
"grad_norm": 1726.7631750123023,
"learning_rate": 6.173213349909728e-09,
"logits/chosen": -4.517698764801025,
"logits/rejected": -4.687317848205566,
"logps/chosen": -273.4754943847656,
"logps/rejected": -228.2833709716797,
"loss": 0.3356,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": 3.6022744178771973,
"rewards/margins": 2.6589503288269043,
"rewards/rejected": 0.9433239698410034,
"step": 270
},
{
"epoch": 0.8,
"grad_norm": 1197.1122441391342,
"learning_rate": 4.7745751406263165e-09,
"logits/chosen": -4.299304008483887,
"logits/rejected": -4.589285850524902,
"logps/chosen": -274.9901123046875,
"logps/rejected": -229.76449584960938,
"loss": 0.2791,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": 4.094995021820068,
"rewards/margins": 3.0975327491760254,
"rewards/rejected": 0.997462272644043,
"step": 280
},
{
"epoch": 0.83,
"grad_norm": 2356.4193384705377,
"learning_rate": 3.5387801599533474e-09,
"logits/chosen": -4.320891857147217,
"logits/rejected": -4.508334636688232,
"logps/chosen": -282.45013427734375,
"logps/rejected": -236.50424194335938,
"loss": 0.3316,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 4.115664958953857,
"rewards/margins": 3.2731566429138184,
"rewards/rejected": 0.8425084948539734,
"step": 290
},
{
"epoch": 0.86,
"grad_norm": 1485.14332328563,
"learning_rate": 2.475778302439524e-09,
"logits/chosen": -4.295617580413818,
"logits/rejected": -4.5400543212890625,
"logps/chosen": -298.4153137207031,
"logps/rejected": -240.1478271484375,
"loss": 0.3594,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": 4.416214942932129,
"rewards/margins": 3.1984994411468506,
"rewards/rejected": 1.2177152633666992,
"step": 300
},
{
"epoch": 0.86,
"eval_logits/chosen": -3.220174551010132,
"eval_logits/rejected": -3.220174551010132,
"eval_logps/chosen": -157.367431640625,
"eval_logps/rejected": -157.367431640625,
"eval_loss": 0.6931471824645996,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": -1.7904319763183594,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": -1.7904319763183594,
"eval_runtime": 1.47,
"eval_samples_per_second": 0.68,
"eval_steps_per_second": 0.68,
"step": 300
},
{
"epoch": 0.89,
"grad_norm": 2625.0873445651387,
"learning_rate": 1.5941282340065698e-09,
"logits/chosen": -4.43851900100708,
"logits/rejected": -4.580752372741699,
"logps/chosen": -262.37445068359375,
"logps/rejected": -226.46572875976562,
"loss": 0.3007,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": 3.741738796234131,
"rewards/margins": 2.9144444465637207,
"rewards/rejected": 0.8272944688796997,
"step": 310
},
{
"epoch": 0.91,
"grad_norm": 1589.6112135444553,
"learning_rate": 9.009284826036689e-10,
"logits/chosen": -4.277141094207764,
"logits/rejected": -4.5314412117004395,
"logps/chosen": -292.65875244140625,
"logps/rejected": -243.8509063720703,
"loss": 0.3277,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 4.2250542640686035,
"rewards/margins": 3.1320207118988037,
"rewards/rejected": 1.093034029006958,
"step": 320
},
{
"epoch": 0.94,
"grad_norm": 2192.855370501752,
"learning_rate": 4.017602850342583e-10,
"logits/chosen": -4.330888271331787,
"logits/rejected": -4.536975383758545,
"logps/chosen": -305.5764465332031,
"logps/rejected": -252.0467529296875,
"loss": 0.3203,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 4.009243488311768,
"rewards/margins": 3.056270122528076,
"rewards/rejected": 0.9529730677604675,
"step": 330
},
{
"epoch": 0.97,
"grad_norm": 2158.7231383937637,
"learning_rate": 1.0064265011902328e-10,
"logits/chosen": -4.319821357727051,
"logits/rejected": -4.614516735076904,
"logps/chosen": -285.194091796875,
"logps/rejected": -227.5124053955078,
"loss": 0.3239,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 3.751185178756714,
"rewards/margins": 2.758882522583008,
"rewards/rejected": 0.9923027753829956,
"step": 340
},
{
"epoch": 1.0,
"grad_norm": 1350.3403367664616,
"learning_rate": 0.0,
"logits/chosen": -4.290497779846191,
"logits/rejected": -4.4949870109558105,
"logps/chosen": -291.93768310546875,
"logps/rejected": -244.3520965576172,
"loss": 0.3142,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": 4.083470344543457,
"rewards/margins": 3.2036800384521484,
"rewards/rejected": 0.8797903060913086,
"step": 350
},
{
"epoch": 1.0,
"step": 350,
"total_flos": 0.0,
"train_loss": 0.36299856867109026,
"train_runtime": 5294.123,
"train_samples_per_second": 8.454,
"train_steps_per_second": 0.066
}
],
"logging_steps": 10,
"max_steps": 350,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}