zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
9c5c1e5 verified
raw
history blame
23.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9988623435722411,
"eval_steps": 10000000,
"global_step": 439,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 33.30332403665468,
"learning_rate": 2.2727272727272727e-09,
"logits/chosen": -1.6768856048583984,
"logits/rejected": -1.7259055376052856,
"logps/chosen": -1.2793102264404297,
"logps/rejected": -1.2162058353424072,
"loss": 1.3133,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02,
"grad_norm": 37.64303926905523,
"learning_rate": 2.2727272727272725e-08,
"logits/chosen": -1.7033135890960693,
"logits/rejected": -1.668673038482666,
"logps/chosen": -1.2131016254425049,
"logps/rejected": -1.22050142288208,
"loss": 1.313,
"rewards/accuracies": 0.4513888955116272,
"rewards/chosen": 0.00040783319855108857,
"rewards/margins": -8.263149356935173e-05,
"rewards/rejected": 0.0004904646775685251,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 38.69260337999141,
"learning_rate": 4.545454545454545e-08,
"logits/chosen": -1.7795250415802002,
"logits/rejected": -1.7348783016204834,
"logps/chosen": -1.1448484659194946,
"logps/rejected": -1.1852957010269165,
"loss": 1.3122,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.003251913469284773,
"rewards/margins": 0.0004202231648378074,
"rewards/rejected": -0.003672136692330241,
"step": 20
},
{
"epoch": 0.07,
"grad_norm": 44.09359407998382,
"learning_rate": 6.818181818181817e-08,
"logits/chosen": -1.7442439794540405,
"logits/rejected": -1.6752439737319946,
"logps/chosen": -1.1954559087753296,
"logps/rejected": -1.248280644416809,
"loss": 1.3059,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.025108838453888893,
"rewards/margins": 0.01119022723287344,
"rewards/rejected": -0.03629906848073006,
"step": 30
},
{
"epoch": 0.09,
"grad_norm": 28.883029165176804,
"learning_rate": 9.09090909090909e-08,
"logits/chosen": -1.7305904626846313,
"logits/rejected": -1.6642875671386719,
"logps/chosen": -1.2533624172210693,
"logps/rejected": -1.3383153676986694,
"loss": 1.2922,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.09282750636339188,
"rewards/margins": 0.06525905430316925,
"rewards/rejected": -0.15808656811714172,
"step": 40
},
{
"epoch": 0.11,
"grad_norm": 36.39900209589975,
"learning_rate": 9.994307990108962e-08,
"logits/chosen": -1.690720796585083,
"logits/rejected": -1.625451683998108,
"logps/chosen": -1.3044583797454834,
"logps/rejected": -1.3643444776535034,
"loss": 1.2643,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.19274269044399261,
"rewards/margins": 0.0795869454741478,
"rewards/rejected": -0.2723296284675598,
"step": 50
},
{
"epoch": 0.14,
"grad_norm": 43.081578827458706,
"learning_rate": 9.959570405988094e-08,
"logits/chosen": -1.71735417842865,
"logits/rejected": -1.6361076831817627,
"logps/chosen": -1.3119524717330933,
"logps/rejected": -1.4046932458877563,
"loss": 1.2541,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.4057086110115051,
"rewards/margins": 0.0816243588924408,
"rewards/rejected": -0.4873329699039459,
"step": 60
},
{
"epoch": 0.16,
"grad_norm": 33.06897498171632,
"learning_rate": 9.893476820924666e-08,
"logits/chosen": -1.7922325134277344,
"logits/rejected": -1.7017757892608643,
"logps/chosen": -1.5047810077667236,
"logps/rejected": -1.630091667175293,
"loss": 1.2355,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.5838777422904968,
"rewards/margins": 0.1572917252779007,
"rewards/rejected": -0.7411695718765259,
"step": 70
},
{
"epoch": 0.18,
"grad_norm": 37.24284057004877,
"learning_rate": 9.796445099843647e-08,
"logits/chosen": -1.774518370628357,
"logits/rejected": -1.6856935024261475,
"logps/chosen": -1.5832115411758423,
"logps/rejected": -1.7514270544052124,
"loss": 1.232,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.7524863481521606,
"rewards/margins": 0.21537098288536072,
"rewards/rejected": -0.9678572416305542,
"step": 80
},
{
"epoch": 0.2,
"grad_norm": 45.064021238231845,
"learning_rate": 9.669088708527066e-08,
"logits/chosen": -1.7184202671051025,
"logits/rejected": -1.6467373371124268,
"logps/chosen": -1.7363929748535156,
"logps/rejected": -1.8083902597427368,
"loss": 1.2104,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -1.0013912916183472,
"rewards/margins": 0.12990526854991913,
"rewards/rejected": -1.1312966346740723,
"step": 90
},
{
"epoch": 0.23,
"grad_norm": 44.286763175528534,
"learning_rate": 9.512212835085849e-08,
"logits/chosen": -1.757889986038208,
"logits/rejected": -1.6645339727401733,
"logps/chosen": -1.779813528060913,
"logps/rejected": -1.9353383779525757,
"loss": 1.1819,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -1.2227165699005127,
"rewards/margins": 0.2256297618150711,
"rewards/rejected": -1.448346495628357,
"step": 100
},
{
"epoch": 0.25,
"grad_norm": 43.36692624974112,
"learning_rate": 9.326809299301306e-08,
"logits/chosen": -1.761940360069275,
"logits/rejected": -1.6550146341323853,
"logps/chosen": -1.8854389190673828,
"logps/rejected": -2.1229450702667236,
"loss": 1.1674,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -1.369593620300293,
"rewards/margins": 0.371805876493454,
"rewards/rejected": -1.7413995265960693,
"step": 110
},
{
"epoch": 0.27,
"grad_norm": 41.78554813342914,
"learning_rate": 9.114050282021158e-08,
"logits/chosen": -1.7491047382354736,
"logits/rejected": -1.6867637634277344,
"logps/chosen": -1.8475677967071533,
"logps/rejected": -2.0627474784851074,
"loss": 1.1591,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -1.400010108947754,
"rewards/margins": 0.3355749249458313,
"rewards/rejected": -1.7355849742889404,
"step": 120
},
{
"epoch": 0.3,
"grad_norm": 40.42754129950971,
"learning_rate": 8.875280914254802e-08,
"logits/chosen": -1.737173080444336,
"logits/rejected": -1.644561529159546,
"logps/chosen": -2.0521700382232666,
"logps/rejected": -2.296677827835083,
"loss": 1.1348,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -1.7186797857284546,
"rewards/margins": 0.43216562271118164,
"rewards/rejected": -2.1508452892303467,
"step": 130
},
{
"epoch": 0.32,
"grad_norm": 39.13812568144021,
"learning_rate": 8.612010772821971e-08,
"logits/chosen": -1.7612278461456299,
"logits/rejected": -1.715679407119751,
"logps/chosen": -2.0781049728393555,
"logps/rejected": -2.2759194374084473,
"loss": 1.127,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.7284520864486694,
"rewards/margins": 0.4454485774040222,
"rewards/rejected": -2.173900604248047,
"step": 140
},
{
"epoch": 0.34,
"grad_norm": 37.596667789585375,
"learning_rate": 8.325904336322055e-08,
"logits/chosen": -1.735419511795044,
"logits/rejected": -1.6814868450164795,
"logps/chosen": -2.305412769317627,
"logps/rejected": -2.55448317527771,
"loss": 1.1399,
"rewards/accuracies": 0.65625,
"rewards/chosen": -2.2760961055755615,
"rewards/margins": 0.4037933945655823,
"rewards/rejected": -2.679889440536499,
"step": 150
},
{
"epoch": 0.36,
"grad_norm": 37.85769539137667,
"learning_rate": 8.01877046176447e-08,
"logits/chosen": -1.6751991510391235,
"logits/rejected": -1.6064836978912354,
"logps/chosen": -2.5598020553588867,
"logps/rejected": -2.8157076835632324,
"loss": 1.09,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -2.763003349304199,
"rewards/margins": 0.40903931856155396,
"rewards/rejected": -3.1720428466796875,
"step": 160
},
{
"epoch": 0.39,
"grad_norm": 33.932219318133306,
"learning_rate": 7.692550948392249e-08,
"logits/chosen": -1.7231628894805908,
"logits/rejected": -1.6755987405776978,
"logps/chosen": -2.624762535095215,
"logps/rejected": -2.9136133193969727,
"loss": 1.1053,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -2.8290135860443115,
"rewards/margins": 0.5568121671676636,
"rewards/rejected": -3.3858256340026855,
"step": 170
},
{
"epoch": 0.41,
"grad_norm": 48.11500069751816,
"learning_rate": 7.349308261002021e-08,
"logits/chosen": -1.6858348846435547,
"logits/rejected": -1.6378986835479736,
"logps/chosen": -2.640817165374756,
"logps/rejected": -2.949113368988037,
"loss": 1.0837,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -2.887462615966797,
"rewards/margins": 0.5172919034957886,
"rewards/rejected": -3.404754638671875,
"step": 180
},
{
"epoch": 0.43,
"grad_norm": 42.33388198011932,
"learning_rate": 6.991212490377531e-08,
"logits/chosen": -1.7423484325408936,
"logits/rejected": -1.7037559747695923,
"logps/chosen": -2.6472008228302,
"logps/rejected": -3.0077877044677734,
"loss": 1.0335,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -2.834195613861084,
"rewards/margins": 0.6780903339385986,
"rewards/rejected": -3.5122859477996826,
"step": 190
},
{
"epoch": 0.46,
"grad_norm": 49.65676542149092,
"learning_rate": 6.620527633276978e-08,
"logits/chosen": -1.6741564273834229,
"logits/rejected": -1.6151821613311768,
"logps/chosen": -2.735678195953369,
"logps/rejected": -3.225632905960083,
"loss": 1.0663,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -3.0824506282806396,
"rewards/margins": 0.80633145570755,
"rewards/rejected": -3.888781785964966,
"step": 200
},
{
"epoch": 0.48,
"grad_norm": 46.53275655997813,
"learning_rate": 6.239597278716581e-08,
"logits/chosen": -1.7146323919296265,
"logits/rejected": -1.6657183170318604,
"logps/chosen": -3.098931312561035,
"logps/rejected": -3.467923641204834,
"loss": 1.0287,
"rewards/accuracies": 0.71875,
"rewards/chosen": -3.707202911376953,
"rewards/margins": 0.7793115377426147,
"rewards/rejected": -4.486514091491699,
"step": 210
},
{
"epoch": 0.5,
"grad_norm": 47.77625681519385,
"learning_rate": 5.8508297910462456e-08,
"logits/chosen": -1.6560382843017578,
"logits/rejected": -1.5879056453704834,
"logps/chosen": -3.1243553161621094,
"logps/rejected": -3.6013519763946533,
"loss": 1.032,
"rewards/accuracies": 0.71875,
"rewards/chosen": -3.921679735183716,
"rewards/margins": 0.8384466171264648,
"rewards/rejected": -4.76012659072876,
"step": 220
},
{
"epoch": 0.52,
"grad_norm": 45.3152158322423,
"learning_rate": 5.456683083494731e-08,
"logits/chosen": -1.6423381567001343,
"logits/rejected": -1.6075971126556396,
"logps/chosen": -3.002626419067383,
"logps/rejected": -3.339411497116089,
"loss": 1.068,
"rewards/accuracies": 0.65625,
"rewards/chosen": -3.6176345348358154,
"rewards/margins": 0.6260865330696106,
"rewards/rejected": -4.2437214851379395,
"step": 230
},
{
"epoch": 0.55,
"grad_norm": 50.18712381426658,
"learning_rate": 5.059649078450834e-08,
"logits/chosen": -1.6221996545791626,
"logits/rejected": -1.587894082069397,
"logps/chosen": -2.9972426891326904,
"logps/rejected": -3.4068732261657715,
"loss": 1.0045,
"rewards/accuracies": 0.65625,
"rewards/chosen": -3.657778263092041,
"rewards/margins": 0.6951833963394165,
"rewards/rejected": -4.352961540222168,
"step": 240
},
{
"epoch": 0.57,
"grad_norm": 42.94625970616266,
"learning_rate": 4.6622379527277186e-08,
"logits/chosen": -1.6361802816390991,
"logits/rejected": -1.5883018970489502,
"logps/chosen": -3.0472984313964844,
"logps/rejected": -3.4067275524139404,
"loss": 1.0159,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -3.816819429397583,
"rewards/margins": 0.65269935131073,
"rewards/rejected": -4.469518661499023,
"step": 250
},
{
"epoch": 0.59,
"grad_norm": 49.81189434860217,
"learning_rate": 4.26696226741691e-08,
"logits/chosen": -1.6441590785980225,
"logits/rejected": -1.5848346948623657,
"logps/chosen": -3.2412009239196777,
"logps/rejected": -3.667572021484375,
"loss": 1.0333,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -4.0167555809021,
"rewards/margins": 0.8128072619438171,
"rewards/rejected": -4.829562664031982,
"step": 260
},
{
"epoch": 0.61,
"grad_norm": 46.698998113891435,
"learning_rate": 3.876321082668098e-08,
"logits/chosen": -1.6987736225128174,
"logits/rejected": -1.6376842260360718,
"logps/chosen": -3.1670312881469727,
"logps/rejected": -3.625418186187744,
"loss": 1.0046,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -3.857081174850464,
"rewards/margins": 0.8981560468673706,
"rewards/rejected": -4.755237579345703,
"step": 270
},
{
"epoch": 0.64,
"grad_norm": 54.35348471111713,
"learning_rate": 3.492784157826244e-08,
"logits/chosen": -1.63980233669281,
"logits/rejected": -1.552004337310791,
"logps/chosen": -3.2830092906951904,
"logps/rejected": -3.8152382373809814,
"loss": 1.0119,
"rewards/accuracies": 0.71875,
"rewards/chosen": -4.120265483856201,
"rewards/margins": 0.9936790466308594,
"rewards/rejected": -5.113945007324219,
"step": 280
},
{
"epoch": 0.66,
"grad_norm": 44.812750561614926,
"learning_rate": 3.118776336817812e-08,
"logits/chosen": -1.6625276803970337,
"logits/rejected": -1.6122783422470093,
"logps/chosen": -3.191256284713745,
"logps/rejected": -3.756882429122925,
"loss": 0.9859,
"rewards/accuracies": 0.71875,
"rewards/chosen": -3.97601580619812,
"rewards/margins": 1.1168193817138672,
"rewards/rejected": -5.092835426330566,
"step": 290
},
{
"epoch": 0.68,
"grad_norm": 45.89134253017904,
"learning_rate": 2.7566622175067443e-08,
"logits/chosen": -1.6413261890411377,
"logits/rejected": -1.5825086832046509,
"logps/chosen": -3.339484691619873,
"logps/rejected": -3.9588654041290283,
"loss": 0.994,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -4.261802673339844,
"rewards/margins": 1.0827885866165161,
"rewards/rejected": -5.3445916175842285,
"step": 300
},
{
"epoch": 0.71,
"grad_norm": 47.840562340740895,
"learning_rate": 2.408731201945432e-08,
"logits/chosen": -1.64263117313385,
"logits/rejected": -1.6013950109481812,
"logps/chosen": -3.251277446746826,
"logps/rejected": -3.651395082473755,
"loss": 1.0008,
"rewards/accuracies": 0.71875,
"rewards/chosen": -4.11476469039917,
"rewards/margins": 0.7599252462387085,
"rewards/rejected": -4.874690532684326,
"step": 310
},
{
"epoch": 0.73,
"grad_norm": 51.79356167073485,
"learning_rate": 2.0771830220378112e-08,
"logits/chosen": -1.5991486310958862,
"logits/rejected": -1.5396713018417358,
"logps/chosen": -3.2509543895721436,
"logps/rejected": -3.6364498138427734,
"loss": 1.0066,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -4.056139945983887,
"rewards/margins": 0.7866916060447693,
"rewards/rejected": -4.842831611633301,
"step": 320
},
{
"epoch": 0.75,
"grad_norm": 43.99284684689101,
"learning_rate": 1.7641138321260257e-08,
"logits/chosen": -1.6334537267684937,
"logits/rejected": -1.5692901611328125,
"logps/chosen": -3.158041477203369,
"logps/rejected": -3.8241424560546875,
"loss": 0.9807,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -3.9483726024627686,
"rewards/margins": 1.2771327495574951,
"rewards/rejected": -5.225505352020264,
"step": 330
},
{
"epoch": 0.77,
"grad_norm": 50.47520523412627,
"learning_rate": 1.4715029564277793e-08,
"logits/chosen": -1.6923463344573975,
"logits/rejected": -1.6500104665756226,
"logps/chosen": -3.0949554443359375,
"logps/rejected": -3.6954338550567627,
"loss": 1.0051,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -3.7805895805358887,
"rewards/margins": 1.1268298625946045,
"rewards/rejected": -4.907419681549072,
"step": 340
},
{
"epoch": 0.8,
"grad_norm": 48.63775480340643,
"learning_rate": 1.2012003751113343e-08,
"logits/chosen": -1.6796951293945312,
"logits/rejected": -1.6264684200286865,
"logps/chosen": -3.3736748695373535,
"logps/rejected": -3.9650447368621826,
"loss": 0.9726,
"rewards/accuracies": 0.6875,
"rewards/chosen": -4.423010349273682,
"rewards/margins": 1.0823583602905273,
"rewards/rejected": -5.505368709564209,
"step": 350
},
{
"epoch": 0.82,
"grad_norm": 55.26630420954737,
"learning_rate": 9.549150281252633e-09,
"logits/chosen": -1.6259968280792236,
"logits/rejected": -1.5858485698699951,
"logps/chosen": -3.211542844772339,
"logps/rejected": -3.735614776611328,
"loss": 0.9729,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -4.078815460205078,
"rewards/margins": 0.9446828961372375,
"rewards/rejected": -5.02349853515625,
"step": 360
},
{
"epoch": 0.84,
"grad_norm": 51.52261591377872,
"learning_rate": 7.3420401072985306e-09,
"logits/chosen": -1.6755279302597046,
"logits/rejected": -1.6221554279327393,
"logps/chosen": -3.299112319946289,
"logps/rejected": -3.911120653152466,
"loss": 0.9649,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -4.168996810913086,
"rewards/margins": 1.1090896129608154,
"rewards/rejected": -5.2780866622924805,
"step": 370
},
{
"epoch": 0.86,
"grad_norm": 51.72886520205544,
"learning_rate": 5.404627290395369e-09,
"logits/chosen": -1.6374752521514893,
"logits/rejected": -1.5786619186401367,
"logps/chosen": -3.220484972000122,
"logps/rejected": -3.803584337234497,
"loss": 0.968,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -4.061758518218994,
"rewards/margins": 1.0862071514129639,
"rewards/rejected": -5.147965431213379,
"step": 380
},
{
"epoch": 0.89,
"grad_norm": 55.56376010319163,
"learning_rate": 3.74916077816162e-09,
"logits/chosen": -1.6384235620498657,
"logits/rejected": -1.5836341381072998,
"logps/chosen": -3.2395005226135254,
"logps/rejected": -3.746983051300049,
"loss": 1.0011,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -4.179410934448242,
"rewards/margins": 0.982707142829895,
"rewards/rejected": -5.162117958068848,
"step": 390
},
{
"epoch": 0.91,
"grad_norm": 54.81143409505458,
"learning_rate": 2.386106962899165e-09,
"logits/chosen": -1.5698174238204956,
"logits/rejected": -1.5115009546279907,
"logps/chosen": -3.4176878929138184,
"logps/rejected": -3.958037853240967,
"loss": 0.9695,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -4.382534027099609,
"rewards/margins": 0.9680202603340149,
"rewards/rejected": -5.350554466247559,
"step": 400
},
{
"epoch": 0.93,
"grad_norm": 56.75402221437199,
"learning_rate": 1.3240835096913706e-09,
"logits/chosen": -1.594696283340454,
"logits/rejected": -1.502890944480896,
"logps/chosen": -3.213305711746216,
"logps/rejected": -3.921264171600342,
"loss": 1.0286,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -4.083509922027588,
"rewards/margins": 1.2770874500274658,
"rewards/rejected": -5.360597133636475,
"step": 410
},
{
"epoch": 0.96,
"grad_norm": 47.085112169528884,
"learning_rate": 5.698048727497462e-10,
"logits/chosen": -1.6298091411590576,
"logits/rejected": -1.5658090114593506,
"logps/chosen": -3.3380351066589355,
"logps/rejected": -3.9660251140594482,
"loss": 0.983,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -4.293475151062012,
"rewards/margins": 1.1220663785934448,
"rewards/rejected": -5.415541648864746,
"step": 420
},
{
"epoch": 0.98,
"grad_norm": 46.70771599324875,
"learning_rate": 1.2803984447259387e-10,
"logits/chosen": -1.6368719339370728,
"logits/rejected": -1.5942411422729492,
"logps/chosen": -3.3361122608184814,
"logps/rejected": -3.98066782951355,
"loss": 0.9434,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -4.3075456619262695,
"rewards/margins": 1.1938055753707886,
"rewards/rejected": -5.501351356506348,
"step": 430
},
{
"epoch": 1.0,
"step": 439,
"total_flos": 0.0,
"train_loss": 1.0809600353240967,
"train_runtime": 6838.8864,
"train_samples_per_second": 8.223,
"train_steps_per_second": 0.064
}
],
"logging_steps": 10,
"max_steps": 439,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}