zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
95c9714 verified
raw
history blame
No virus
22.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 391,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 2089.5337638761494,
"learning_rate": 2.5e-09,
"logits/chosen": -4.623842239379883,
"logits/rejected": -4.85917854309082,
"logps/chosen": -239.31422424316406,
"logps/rejected": -207.56365966796875,
"loss": 0.6952,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 2112.4857671472687,
"learning_rate": 2.5e-08,
"logits/chosen": -4.333562850952148,
"logits/rejected": -4.643319129943848,
"logps/chosen": -265.2981262207031,
"logps/rejected": -215.68804931640625,
"loss": 0.7355,
"rewards/accuracies": 0.3888888955116272,
"rewards/chosen": -0.09561138600111008,
"rewards/margins": -0.10567205399274826,
"rewards/rejected": 0.010060659609735012,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 1939.2525079641944,
"learning_rate": 5e-08,
"logits/chosen": -4.508406162261963,
"logits/rejected": -4.7436203956604,
"logps/chosen": -267.76934814453125,
"logps/rejected": -216.88119506835938,
"loss": 0.6656,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.08529385179281235,
"rewards/margins": 0.22122922539710999,
"rewards/rejected": -0.13593538105487823,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 1485.5526937989268,
"learning_rate": 7.5e-08,
"logits/chosen": -4.591097354888916,
"logits/rejected": -4.771042823791504,
"logps/chosen": -257.5138244628906,
"logps/rejected": -215.06607055664062,
"loss": 0.4916,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.5094950795173645,
"rewards/margins": 0.7761520147323608,
"rewards/rejected": -0.2666569650173187,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 1059.7800988486467,
"learning_rate": 1e-07,
"logits/chosen": -4.61653995513916,
"logits/rejected": -4.705571174621582,
"logps/chosen": -250.05783081054688,
"logps/rejected": -220.47665405273438,
"loss": 0.3139,
"rewards/accuracies": 0.875,
"rewards/chosen": 1.7706722021102905,
"rewards/margins": 2.1734442710876465,
"rewards/rejected": -0.4027720093727112,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 837.9194721075112,
"learning_rate": 9.979985922607475e-08,
"logits/chosen": -4.497745513916016,
"logits/rejected": -4.6963934898376465,
"logps/chosen": -266.4471740722656,
"logps/rejected": -227.05908203125,
"loss": 0.2475,
"rewards/accuracies": 0.875,
"rewards/chosen": 2.7611026763916016,
"rewards/margins": 3.3548762798309326,
"rewards/rejected": -0.5937734246253967,
"step": 50
},
{
"epoch": 0.15,
"grad_norm": 912.9246800740217,
"learning_rate": 9.92010391574745e-08,
"logits/chosen": -4.585003852844238,
"logits/rejected": -4.705927848815918,
"logps/chosen": -235.20071411132812,
"logps/rejected": -217.2942352294922,
"loss": 0.2013,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 2.9608712196350098,
"rewards/margins": 4.097281455993652,
"rewards/rejected": -1.1364095211029053,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 634.4685088072516,
"learning_rate": 9.820833372667812e-08,
"logits/chosen": -4.462503910064697,
"logits/rejected": -4.6857805252075195,
"logps/chosen": -246.69186401367188,
"logps/rejected": -220.57937622070312,
"loss": 0.1884,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 3.2250447273254395,
"rewards/margins": 4.633510112762451,
"rewards/rejected": -1.4084659814834595,
"step": 70
},
{
"epoch": 0.2,
"grad_norm": 468.8604524803785,
"learning_rate": 9.682969016701356e-08,
"logits/chosen": -4.449667453765869,
"logits/rejected": -4.664923667907715,
"logps/chosen": -253.8452606201172,
"logps/rejected": -233.0582733154297,
"loss": 0.1796,
"rewards/accuracies": 0.875,
"rewards/chosen": 3.6873557567596436,
"rewards/margins": 5.057134628295898,
"rewards/rejected": -1.3697788715362549,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 958.5162002808887,
"learning_rate": 9.507614539004081e-08,
"logits/chosen": -4.535862445831299,
"logits/rejected": -4.733909606933594,
"logps/chosen": -243.66317749023438,
"logps/rejected": -206.82388305664062,
"loss": 0.1733,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 3.7747840881347656,
"rewards/margins": 5.771730899810791,
"rewards/rejected": -1.9969465732574463,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 928.8107393024507,
"learning_rate": 9.296173762811083e-08,
"logits/chosen": -4.406120777130127,
"logits/rejected": -4.672289848327637,
"logps/chosen": -248.62539672851562,
"logps/rejected": -231.67758178710938,
"loss": 0.1833,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": 4.046411037445068,
"rewards/margins": 6.330681324005127,
"rewards/rejected": -2.2842705249786377,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -4.5091094970703125,
"eval_logits/rejected": -4.724847316741943,
"eval_logps/chosen": -389.6205749511719,
"eval_logps/rejected": -515.4835205078125,
"eval_loss": 1.8368816375732422,
"eval_rewards/accuracies": 0.375,
"eval_rewards/chosen": 0.4269474744796753,
"eval_rewards/margins": -1.0251328945159912,
"eval_rewards/rejected": 1.452080249786377,
"eval_runtime": 97.8781,
"eval_samples_per_second": 20.434,
"eval_steps_per_second": 0.327,
"step": 100
},
{
"epoch": 0.28,
"grad_norm": 759.2228167566217,
"learning_rate": 9.050339404945832e-08,
"logits/chosen": -4.45731258392334,
"logits/rejected": -4.700920581817627,
"logps/chosen": -240.77047729492188,
"logps/rejected": -220.7100830078125,
"loss": 0.1645,
"rewards/accuracies": 0.9375,
"rewards/chosen": 4.207625865936279,
"rewards/margins": 6.219720363616943,
"rewards/rejected": -2.012094259262085,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 615.4147404438793,
"learning_rate": 8.77207952455395e-08,
"logits/chosen": -4.41110897064209,
"logits/rejected": -4.632037162780762,
"logps/chosen": -266.83837890625,
"logps/rejected": -232.83670043945312,
"loss": 0.1648,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 4.575605869293213,
"rewards/margins": 6.689634799957275,
"rewards/rejected": -2.1140289306640625,
"step": 120
},
{
"epoch": 0.33,
"grad_norm": 1154.0005388666061,
"learning_rate": 8.463621767547997e-08,
"logits/chosen": -4.474618434906006,
"logits/rejected": -4.724778652191162,
"logps/chosen": -250.192626953125,
"logps/rejected": -220.4983673095703,
"loss": 0.1701,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 4.125626564025879,
"rewards/margins": 6.710474967956543,
"rewards/rejected": -2.5848488807678223,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 510.3907389648352,
"learning_rate": 8.127435532896387e-08,
"logits/chosen": -4.497905254364014,
"logits/rejected": -4.757509708404541,
"logps/chosen": -276.1819763183594,
"logps/rejected": -237.9337921142578,
"loss": 0.169,
"rewards/accuracies": 0.9375,
"rewards/chosen": 4.006547451019287,
"rewards/margins": 6.8867011070251465,
"rewards/rejected": -2.880154848098755,
"step": 140
},
{
"epoch": 0.38,
"grad_norm": 616.3949177365913,
"learning_rate": 7.766212203526569e-08,
"logits/chosen": -4.483530521392822,
"logits/rejected": -4.700650691986084,
"logps/chosen": -244.07785034179688,
"logps/rejected": -224.0546417236328,
"loss": 0.1668,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 4.170205116271973,
"rewards/margins": 6.6378936767578125,
"rewards/rejected": -2.4676883220672607,
"step": 150
},
{
"epoch": 0.41,
"grad_norm": 759.2665515018776,
"learning_rate": 7.382843600106538e-08,
"logits/chosen": -4.538361072540283,
"logits/rejected": -4.685894966125488,
"logps/chosen": -243.0140380859375,
"logps/rejected": -220.0860137939453,
"loss": 0.1473,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 4.22122859954834,
"rewards/margins": 6.459697723388672,
"rewards/rejected": -2.238469362258911,
"step": 160
},
{
"epoch": 0.43,
"grad_norm": 688.1440407430587,
"learning_rate": 6.980398830195784e-08,
"logits/chosen": -4.427027702331543,
"logits/rejected": -4.675489902496338,
"logps/chosen": -251.1200408935547,
"logps/rejected": -225.5527801513672,
"loss": 0.1434,
"rewards/accuracies": 0.9375,
"rewards/chosen": 4.977096080780029,
"rewards/margins": 7.851990699768066,
"rewards/rejected": -2.874894618988037,
"step": 170
},
{
"epoch": 0.46,
"grad_norm": 572.2642343737211,
"learning_rate": 6.562099718102787e-08,
"logits/chosen": -4.530760765075684,
"logits/rejected": -4.731973171234131,
"logps/chosen": -228.52304077148438,
"logps/rejected": -202.01510620117188,
"loss": 0.1552,
"rewards/accuracies": 0.9375,
"rewards/chosen": 4.1703057289123535,
"rewards/margins": 7.167737007141113,
"rewards/rejected": -2.9974308013916016,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 887.5255514170451,
"learning_rate": 6.131295012148612e-08,
"logits/chosen": -4.499785423278809,
"logits/rejected": -4.621634006500244,
"logps/chosen": -251.9990692138672,
"logps/rejected": -240.3909149169922,
"loss": 0.1634,
"rewards/accuracies": 0.90625,
"rewards/chosen": 4.446890830993652,
"rewards/margins": 7.0593156814575195,
"rewards/rejected": -2.6124250888824463,
"step": 190
},
{
"epoch": 0.51,
"grad_norm": 622.6699519046258,
"learning_rate": 5.691433575823665e-08,
"logits/chosen": -4.48135232925415,
"logits/rejected": -4.617772102355957,
"logps/chosen": -243.34725952148438,
"logps/rejected": -220.18392944335938,
"loss": 0.1786,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 4.395993709564209,
"rewards/margins": 6.814687252044678,
"rewards/rejected": -2.4186930656433105,
"step": 200
},
{
"epoch": 0.51,
"eval_logits/chosen": -4.4697723388671875,
"eval_logits/rejected": -4.687928199768066,
"eval_logps/chosen": -389.2646179199219,
"eval_logps/rejected": -514.896484375,
"eval_loss": 2.016343355178833,
"eval_rewards/accuracies": 0.375,
"eval_rewards/chosen": 0.6049206256866455,
"eval_rewards/margins": -1.140692114830017,
"eval_rewards/rejected": 1.745612621307373,
"eval_runtime": 97.8297,
"eval_samples_per_second": 20.444,
"eval_steps_per_second": 0.327,
"step": 200
},
{
"epoch": 0.54,
"grad_norm": 473.28271440178054,
"learning_rate": 5.2460367774593905e-08,
"logits/chosen": -4.541897773742676,
"logits/rejected": -4.740262031555176,
"logps/chosen": -255.6215362548828,
"logps/rejected": -234.78518676757812,
"loss": 0.1232,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 5.131775856018066,
"rewards/margins": 8.201360702514648,
"rewards/rejected": -3.069584846496582,
"step": 210
},
{
"epoch": 0.56,
"grad_norm": 670.7484200931372,
"learning_rate": 4.798670299452925e-08,
"logits/chosen": -4.39837646484375,
"logits/rejected": -4.688643455505371,
"logps/chosen": -253.91787719726562,
"logps/rejected": -231.707275390625,
"loss": 0.1672,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 4.446724891662598,
"rewards/margins": 7.963796138763428,
"rewards/rejected": -3.517070770263672,
"step": 220
},
{
"epoch": 0.59,
"grad_norm": 763.2480410824999,
"learning_rate": 4.3529155927297226e-08,
"logits/chosen": -4.47940731048584,
"logits/rejected": -4.748034954071045,
"logps/chosen": -252.20700073242188,
"logps/rejected": -230.70425415039062,
"loss": 0.1691,
"rewards/accuracies": 0.9375,
"rewards/chosen": 4.38104248046875,
"rewards/margins": 7.8776116371154785,
"rewards/rejected": -3.4965691566467285,
"step": 230
},
{
"epoch": 0.61,
"grad_norm": 547.6628902362396,
"learning_rate": 3.9123412049691636e-08,
"logits/chosen": -4.450512886047363,
"logits/rejected": -4.651386260986328,
"logps/chosen": -263.7304382324219,
"logps/rejected": -227.78604125976562,
"loss": 0.1511,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 4.546363353729248,
"rewards/margins": 7.972568511962891,
"rewards/rejected": -3.4262046813964844,
"step": 240
},
{
"epoch": 0.64,
"grad_norm": 706.6026662780071,
"learning_rate": 3.480474212128766e-08,
"logits/chosen": -4.571944236755371,
"logits/rejected": -4.786678791046143,
"logps/chosen": -240.4440155029297,
"logps/rejected": -212.46694946289062,
"loss": 0.1403,
"rewards/accuracies": 0.9375,
"rewards/chosen": 4.128727912902832,
"rewards/margins": 6.557607173919678,
"rewards/rejected": -2.428879976272583,
"step": 250
},
{
"epoch": 0.66,
"grad_norm": 957.1848027668926,
"learning_rate": 3.060771981975726e-08,
"logits/chosen": -4.445496082305908,
"logits/rejected": -4.674472808837891,
"logps/chosen": -244.96701049804688,
"logps/rejected": -227.3423614501953,
"loss": 0.1506,
"rewards/accuracies": 0.90625,
"rewards/chosen": 4.540780067443848,
"rewards/margins": 8.337722778320312,
"rewards/rejected": -3.7969424724578857,
"step": 260
},
{
"epoch": 0.69,
"grad_norm": 1053.6903730937584,
"learning_rate": 2.6565944956764818e-08,
"logits/chosen": -4.53262996673584,
"logits/rejected": -4.71115255355835,
"logps/chosen": -252.1263427734375,
"logps/rejected": -221.7955322265625,
"loss": 0.1551,
"rewards/accuracies": 0.9375,
"rewards/chosen": 4.453462600708008,
"rewards/margins": 7.838715553283691,
"rewards/rejected": -3.385251998901367,
"step": 270
},
{
"epoch": 0.72,
"grad_norm": 661.3688193511013,
"learning_rate": 2.2711774490274766e-08,
"logits/chosen": -4.489356994628906,
"logits/rejected": -4.654987812042236,
"logps/chosen": -254.680908203125,
"logps/rejected": -248.8947296142578,
"loss": 0.1253,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 4.912972450256348,
"rewards/margins": 7.8906402587890625,
"rewards/rejected": -2.9776668548583984,
"step": 280
},
{
"epoch": 0.74,
"grad_norm": 1908.0202284500367,
"learning_rate": 1.9076063486687256e-08,
"logits/chosen": -4.361441135406494,
"logits/rejected": -4.647955417633057,
"logps/chosen": -262.6406555175781,
"logps/rejected": -221.6370086669922,
"loss": 0.1481,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 4.983874320983887,
"rewards/margins": 7.9876885414123535,
"rewards/rejected": -3.0038130283355713,
"step": 290
},
{
"epoch": 0.77,
"grad_norm": 734.7948711655655,
"learning_rate": 1.5687918106563324e-08,
"logits/chosen": -4.47251033782959,
"logits/rejected": -4.634402275085449,
"logps/chosen": -243.4433135986328,
"logps/rejected": -222.4509735107422,
"loss": 0.1648,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 4.982421398162842,
"rewards/margins": 8.225171089172363,
"rewards/rejected": -3.2427496910095215,
"step": 300
},
{
"epoch": 0.77,
"eval_logits/chosen": -4.4838151931762695,
"eval_logits/rejected": -4.69987678527832,
"eval_logps/chosen": -390.0736999511719,
"eval_logps/rejected": -516.0419921875,
"eval_loss": 1.9448436498641968,
"eval_rewards/accuracies": 0.3984375,
"eval_rewards/chosen": 0.2003953605890274,
"eval_rewards/margins": -0.972442626953125,
"eval_rewards/rejected": 1.1728378534317017,
"eval_runtime": 97.9077,
"eval_samples_per_second": 20.427,
"eval_steps_per_second": 0.327,
"step": 300
},
{
"epoch": 0.79,
"grad_norm": 701.7075104371141,
"learning_rate": 1.257446259144494e-08,
"logits/chosen": -4.397843360900879,
"logits/rejected": -4.662208557128906,
"logps/chosen": -251.11611938476562,
"logps/rejected": -229.4883270263672,
"loss": 0.1577,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 5.011745929718018,
"rewards/margins": 8.656941413879395,
"rewards/rejected": -3.6451950073242188,
"step": 310
},
{
"epoch": 0.82,
"grad_norm": 1065.7098800029996,
"learning_rate": 9.760622117187234e-09,
"logits/chosen": -4.4547929763793945,
"logits/rejected": -4.7404327392578125,
"logps/chosen": -235.986083984375,
"logps/rejected": -213.1405029296875,
"loss": 0.1434,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 4.589102745056152,
"rewards/margins": 7.9336113929748535,
"rewards/rejected": -3.344507932662964,
"step": 320
},
{
"epoch": 0.84,
"grad_norm": 766.1086061468453,
"learning_rate": 7.2689232521989885e-09,
"logits/chosen": -4.407891750335693,
"logits/rejected": -4.665772914886475,
"logps/chosen": -258.3376159667969,
"logps/rejected": -240.0522003173828,
"loss": 0.1405,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 4.90563440322876,
"rewards/margins": 8.58189868927002,
"rewards/rejected": -3.6762642860412598,
"step": 330
},
{
"epoch": 0.87,
"grad_norm": 626.4348398301977,
"learning_rate": 5.119313618049309e-09,
"logits/chosen": -4.429708003997803,
"logits/rejected": -4.715014457702637,
"logps/chosen": -263.54986572265625,
"logps/rejected": -213.69723510742188,
"loss": 0.1494,
"rewards/accuracies": 0.9375,
"rewards/chosen": 5.298083305358887,
"rewards/margins": 8.755678176879883,
"rewards/rejected": -3.457595109939575,
"step": 340
},
{
"epoch": 0.9,
"grad_norm": 564.1193509438065,
"learning_rate": 3.3290021961708158e-09,
"logits/chosen": -4.445944309234619,
"logits/rejected": -4.576190948486328,
"logps/chosen": -247.17697143554688,
"logps/rejected": -233.48477172851562,
"loss": 0.1576,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": 4.14711332321167,
"rewards/margins": 7.0045037269592285,
"rewards/rejected": -2.8573899269104004,
"step": 350
},
{
"epoch": 0.92,
"grad_norm": 651.5911217726903,
"learning_rate": 1.9123215591052013e-09,
"logits/chosen": -4.436100482940674,
"logits/rejected": -4.62412166595459,
"logps/chosen": -253.4558563232422,
"logps/rejected": -233.94869995117188,
"loss": 0.1582,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 4.089536190032959,
"rewards/margins": 7.332627296447754,
"rewards/rejected": -3.243091583251953,
"step": 360
},
{
"epoch": 0.95,
"grad_norm": 610.6809759122384,
"learning_rate": 8.806131292167618e-10,
"logits/chosen": -4.4610724449157715,
"logits/rejected": -4.592678070068359,
"logps/chosen": -247.8229217529297,
"logps/rejected": -242.95114135742188,
"loss": 0.1649,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": 4.499147415161133,
"rewards/margins": 7.5511980056762695,
"rewards/rejected": -3.052050828933716,
"step": 370
},
{
"epoch": 0.97,
"grad_norm": 661.3141861471707,
"learning_rate": 2.4213638345040867e-10,
"logits/chosen": -4.557965278625488,
"logits/rejected": -4.776811122894287,
"logps/chosen": -252.97561645507812,
"logps/rejected": -227.4269561767578,
"loss": 0.1552,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 4.553546905517578,
"rewards/margins": 8.004728317260742,
"rewards/rejected": -3.4511806964874268,
"step": 380
},
{
"epoch": 1.0,
"grad_norm": 1178.6266155115975,
"learning_rate": 2.0027310073833516e-12,
"logits/chosen": -4.544768810272217,
"logits/rejected": -4.75381326675415,
"logps/chosen": -250.1166534423828,
"logps/rejected": -226.92916870117188,
"loss": 0.1545,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 4.777965068817139,
"rewards/margins": 7.9850053787231445,
"rewards/rejected": -3.2070395946502686,
"step": 390
},
{
"epoch": 1.0,
"step": 391,
"total_flos": 0.0,
"train_loss": 0.20245660769055263,
"train_runtime": 6146.5091,
"train_samples_per_second": 8.135,
"train_steps_per_second": 0.064
}
],
"logging_steps": 10,
"max_steps": 391,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}