zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
ed90503 verified
raw
history blame
No virus
19.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 368,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 2062.9417756205603,
"learning_rate": 2.702702702702703e-10,
"logits/chosen": -1.3332719802856445,
"logits/rejected": -1.246394395828247,
"logps/chosen": -286.9539794921875,
"logps/rejected": -263.3782958984375,
"loss": 0.7136,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.05,
"grad_norm": 2488.3980990852974,
"learning_rate": 2.702702702702703e-09,
"logits/chosen": -1.6142714023590088,
"logits/rejected": -1.3925563097000122,
"logps/chosen": -342.4814758300781,
"logps/rejected": -294.5446472167969,
"loss": 0.8226,
"rewards/accuracies": 0.4618055522441864,
"rewards/chosen": 0.079922616481781,
"rewards/margins": 0.09200635552406311,
"rewards/rejected": -0.012083739042282104,
"step": 10
},
{
"epoch": 0.11,
"grad_norm": 2085.30491295085,
"learning_rate": 5.405405405405406e-09,
"logits/chosen": -1.4863827228546143,
"logits/rejected": -1.3085709810256958,
"logps/chosen": -314.74273681640625,
"logps/rejected": -279.32977294921875,
"loss": 0.8217,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.03496693819761276,
"rewards/margins": 0.07092654705047607,
"rewards/rejected": -0.03595960885286331,
"step": 20
},
{
"epoch": 0.16,
"grad_norm": 2613.9787597915297,
"learning_rate": 8.108108108108109e-09,
"logits/chosen": -1.5464979410171509,
"logits/rejected": -1.3788726329803467,
"logps/chosen": -324.9065246582031,
"logps/rejected": -286.29925537109375,
"loss": 0.8318,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.0007322698947973549,
"rewards/margins": 0.02973010204732418,
"rewards/rejected": -0.030462373048067093,
"step": 30
},
{
"epoch": 0.22,
"grad_norm": 2309.6989479898994,
"learning_rate": 9.997973265157192e-09,
"logits/chosen": -1.5338213443756104,
"logits/rejected": -1.356065034866333,
"logps/chosen": -325.39349365234375,
"logps/rejected": -285.630859375,
"loss": 0.8544,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": -0.00019043684005737305,
"rewards/margins": -0.028223956003785133,
"rewards/rejected": 0.02803351916372776,
"step": 40
},
{
"epoch": 0.27,
"grad_norm": 2372.8781916000794,
"learning_rate": 9.961988113473708e-09,
"logits/chosen": -1.540814757347107,
"logits/rejected": -1.3939155340194702,
"logps/chosen": -337.01385498046875,
"logps/rejected": -297.3047790527344,
"loss": 0.7925,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": 0.010568022727966309,
"rewards/margins": 0.0009421706199645996,
"rewards/rejected": 0.009625854901969433,
"step": 50
},
{
"epoch": 0.33,
"grad_norm": 1906.9193219897543,
"learning_rate": 9.881337335184878e-09,
"logits/chosen": -1.5821880102157593,
"logits/rejected": -1.433316707611084,
"logps/chosen": -319.8349609375,
"logps/rejected": -285.03131103515625,
"loss": 0.7444,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.011926290579140186,
"rewards/margins": 0.23517760634422302,
"rewards/rejected": -0.22325129806995392,
"step": 60
},
{
"epoch": 0.38,
"grad_norm": 2229.621479388874,
"learning_rate": 9.756746912994832e-09,
"logits/chosen": -1.5089519023895264,
"logits/rejected": -1.3478004932403564,
"logps/chosen": -312.11767578125,
"logps/rejected": -275.03704833984375,
"loss": 0.7381,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.015234187245368958,
"rewards/margins": 0.07565010339021683,
"rewards/rejected": -0.09088429063558578,
"step": 70
},
{
"epoch": 0.43,
"grad_norm": 1970.0426820414286,
"learning_rate": 9.589338354885628e-09,
"logits/chosen": -1.591552734375,
"logits/rejected": -1.4374128580093384,
"logps/chosen": -323.3088684082031,
"logps/rejected": -288.12445068359375,
"loss": 0.7257,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.1117367148399353,
"rewards/margins": 0.34563174843788147,
"rewards/rejected": -0.23389501869678497,
"step": 80
},
{
"epoch": 0.49,
"grad_norm": 1647.476042777907,
"learning_rate": 9.380618598797472e-09,
"logits/chosen": -1.6083869934082031,
"logits/rejected": -1.4117141962051392,
"logps/chosen": -319.9634094238281,
"logps/rejected": -281.79248046875,
"loss": 0.6768,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.1753208488225937,
"rewards/margins": 0.44467267394065857,
"rewards/rejected": -0.2693518102169037,
"step": 90
},
{
"epoch": 0.54,
"grad_norm": 1779.591190181612,
"learning_rate": 9.132466447838596e-09,
"logits/chosen": -1.5439790487289429,
"logits/rejected": -1.368858814239502,
"logps/chosen": -321.8800964355469,
"logps/rejected": -282.66168212890625,
"loss": 0.6482,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.34998807311058044,
"rewards/margins": 0.6073418855667114,
"rewards/rejected": -0.25735384225845337,
"step": 100
},
{
"epoch": 0.6,
"grad_norm": 1799.5128068859713,
"learning_rate": 8.847115658129039e-09,
"logits/chosen": -1.5068881511688232,
"logits/rejected": -1.3783992528915405,
"logps/chosen": -318.10797119140625,
"logps/rejected": -287.1791076660156,
"loss": 0.6577,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.35399100184440613,
"rewards/margins": 0.5296486616134644,
"rewards/rejected": -0.17565762996673584,
"step": 110
},
{
"epoch": 0.65,
"grad_norm": 1676.764876114058,
"learning_rate": 8.527134831514116e-09,
"logits/chosen": -1.5781362056732178,
"logits/rejected": -1.4229751825332642,
"logps/chosen": -331.3733825683594,
"logps/rejected": -297.85699462890625,
"loss": 0.6575,
"rewards/accuracies": 0.609375,
"rewards/chosen": 0.3793606460094452,
"rewards/margins": 0.4118588864803314,
"rewards/rejected": -0.03249818831682205,
"step": 120
},
{
"epoch": 0.71,
"grad_norm": 1566.6901996912077,
"learning_rate": 8.175404294144481e-09,
"logits/chosen": -1.6145737171173096,
"logits/rejected": -1.4269483089447021,
"logps/chosen": -317.0880432128906,
"logps/rejected": -271.5414123535156,
"loss": 0.6044,
"rewards/accuracies": 0.671875,
"rewards/chosen": 0.6310849189758301,
"rewards/margins": 0.7299145460128784,
"rewards/rejected": -0.09882961958646774,
"step": 130
},
{
"epoch": 0.76,
"grad_norm": 1706.595775593044,
"learning_rate": 7.79509016905158e-09,
"logits/chosen": -1.5648548603057861,
"logits/rejected": -1.4158308506011963,
"logps/chosen": -331.06622314453125,
"logps/rejected": -294.2123718261719,
"loss": 0.6171,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.7887445092201233,
"rewards/margins": 0.765161395072937,
"rewards/rejected": 0.023583168163895607,
"step": 140
},
{
"epoch": 0.82,
"grad_norm": 1648.2049279025357,
"learning_rate": 7.389615876105773e-09,
"logits/chosen": -1.5560743808746338,
"logits/rejected": -1.4283266067504883,
"logps/chosen": -314.5069274902344,
"logps/rejected": -291.7706298828125,
"loss": 0.6127,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": 0.8379846811294556,
"rewards/margins": 0.7371869087219238,
"rewards/rejected": 0.10079775750637054,
"step": 150
},
{
"epoch": 0.87,
"grad_norm": 1635.8235385722824,
"learning_rate": 6.962631315901861e-09,
"logits/chosen": -1.5186518430709839,
"logits/rejected": -1.4028724431991577,
"logps/chosen": -317.958251953125,
"logps/rejected": -291.0096435546875,
"loss": 0.6088,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": 0.8378221392631531,
"rewards/margins": 0.6740074753761292,
"rewards/rejected": 0.16381461918354034,
"step": 160
},
{
"epoch": 0.92,
"grad_norm": 1575.6130834814026,
"learning_rate": 6.517980014965139e-09,
"logits/chosen": -1.6025912761688232,
"logits/rejected": -1.4152277708053589,
"logps/chosen": -331.40386962890625,
"logps/rejected": -289.4659729003906,
"loss": 0.5997,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 0.8780991435050964,
"rewards/margins": 0.8349622488021851,
"rewards/rejected": 0.04313689470291138,
"step": 170
},
{
"epoch": 0.98,
"grad_norm": 1546.3751249922345,
"learning_rate": 6.059664528022266e-09,
"logits/chosen": -1.5942988395690918,
"logits/rejected": -1.44364333152771,
"logps/chosen": -315.07196044921875,
"logps/rejected": -276.7376708984375,
"loss": 0.5773,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 0.8913241624832153,
"rewards/margins": 0.9472495317459106,
"rewards/rejected": -0.05592530965805054,
"step": 180
},
{
"epoch": 1.03,
"grad_norm": 1681.3148479750444,
"learning_rate": 5.591810408770492e-09,
"logits/chosen": -1.5504480600357056,
"logits/rejected": -1.3759148120880127,
"logps/chosen": -315.5844421386719,
"logps/rejected": -278.6695861816406,
"loss": 0.5632,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": 0.8848656415939331,
"rewards/margins": 0.8844806551933289,
"rewards/rejected": 0.00038505197153426707,
"step": 190
},
{
"epoch": 1.09,
"grad_norm": 1651.7882136807318,
"learning_rate": 5.118629073464423e-09,
"logits/chosen": -1.571003794670105,
"logits/rejected": -1.3608561754226685,
"logps/chosen": -325.93023681640625,
"logps/rejected": -282.7080993652344,
"loss": 0.5605,
"rewards/accuracies": 0.71875,
"rewards/chosen": 1.0313498973846436,
"rewards/margins": 0.9450349807739258,
"rewards/rejected": 0.08631500601768494,
"step": 200
},
{
"epoch": 1.14,
"grad_norm": 1538.4386313699126,
"learning_rate": 4.644379891605983e-09,
"logits/chosen": -1.608812689781189,
"logits/rejected": -1.4315342903137207,
"logps/chosen": -324.66522216796875,
"logps/rejected": -291.33428955078125,
"loss": 0.5478,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": 1.0752595663070679,
"rewards/margins": 1.0428497791290283,
"rewards/rejected": 0.03240995481610298,
"step": 210
},
{
"epoch": 1.2,
"grad_norm": 1737.3887570467818,
"learning_rate": 4.173331844980362e-09,
"logits/chosen": -1.5384166240692139,
"logits/rejected": -1.4137290716171265,
"logps/chosen": -323.9536437988281,
"logps/rejected": -293.42535400390625,
"loss": 0.563,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": 0.9658479690551758,
"rewards/margins": 0.9138795137405396,
"rewards/rejected": 0.051968496292829514,
"step": 220
},
{
"epoch": 1.25,
"grad_norm": 1605.3661746462226,
"learning_rate": 3.7097251001664824e-09,
"logits/chosen": -1.537548542022705,
"logits/rejected": -1.3787362575531006,
"logps/chosen": -323.85125732421875,
"logps/rejected": -286.95379638671875,
"loss": 0.526,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": 1.146087408065796,
"rewards/margins": 1.0939618349075317,
"rewards/rejected": 0.0521254763007164,
"step": 230
},
{
"epoch": 1.3,
"grad_norm": 1689.839854162397,
"learning_rate": 3.2577328404292057e-09,
"logits/chosen": -1.5391089916229248,
"logits/rejected": -1.4084638357162476,
"logps/chosen": -312.51373291015625,
"logps/rejected": -285.9711608886719,
"loss": 0.5418,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": 1.0901774168014526,
"rewards/margins": 1.016390085220337,
"rewards/rejected": 0.07378745824098587,
"step": 240
},
{
"epoch": 1.36,
"grad_norm": 1710.94558540331,
"learning_rate": 2.821423700565763e-09,
"logits/chosen": -1.5968081951141357,
"logits/rejected": -1.4188272953033447,
"logps/chosen": -350.68487548828125,
"logps/rejected": -306.6036071777344,
"loss": 0.532,
"rewards/accuracies": 0.78125,
"rewards/chosen": 1.2678377628326416,
"rewards/margins": 1.2405023574829102,
"rewards/rejected": 0.027335500344634056,
"step": 250
},
{
"epoch": 1.41,
"grad_norm": 1638.2367115980887,
"learning_rate": 2.4047251428513483e-09,
"logits/chosen": -1.6129051446914673,
"logits/rejected": -1.4581451416015625,
"logps/chosen": -325.2450256347656,
"logps/rejected": -291.1476745605469,
"loss": 0.5289,
"rewards/accuracies": 0.734375,
"rewards/chosen": 1.2301806211471558,
"rewards/margins": 1.2308820486068726,
"rewards/rejected": -0.0007013082504272461,
"step": 260
},
{
"epoch": 1.47,
"grad_norm": 1199.4883951774482,
"learning_rate": 2.011388103757442e-09,
"logits/chosen": -1.5265954732894897,
"logits/rejected": -1.3828239440917969,
"logps/chosen": -316.2944641113281,
"logps/rejected": -285.7884826660156,
"loss": 0.5191,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": 1.3710923194885254,
"rewards/margins": 1.2594387531280518,
"rewards/rejected": 0.11165344715118408,
"step": 270
},
{
"epoch": 1.52,
"grad_norm": 1472.2115597857592,
"learning_rate": 1.644953229677474e-09,
"logits/chosen": -1.600651502609253,
"logits/rejected": -1.4179413318634033,
"logps/chosen": -326.00335693359375,
"logps/rejected": -284.74188232421875,
"loss": 0.5459,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.3610546588897705,
"rewards/margins": 1.2091944217681885,
"rewards/rejected": 0.1518600881099701,
"step": 280
},
{
"epoch": 1.58,
"grad_norm": 1566.9737970600454,
"learning_rate": 1.308719005590957e-09,
"logits/chosen": -1.5032551288604736,
"logits/rejected": -1.3876453638076782,
"logps/chosen": -318.40948486328125,
"logps/rejected": -282.49554443359375,
"loss": 0.5407,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": 1.2658413648605347,
"rewards/margins": 1.187675952911377,
"rewards/rejected": 0.07816555351018906,
"step": 290
},
{
"epoch": 1.63,
"grad_norm": 1348.7257224769698,
"learning_rate": 1.005712063557776e-09,
"logits/chosen": -1.6333671808242798,
"logits/rejected": -1.455556869506836,
"logps/chosen": -324.13885498046875,
"logps/rejected": -290.60186767578125,
"loss": 0.5346,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": 1.1175706386566162,
"rewards/margins": 1.0337438583374023,
"rewards/rejected": 0.08382664620876312,
"step": 300
},
{
"epoch": 1.68,
"grad_norm": 1356.5441208888985,
"learning_rate": 7.386599383124321e-10,
"logits/chosen": -1.565224051475525,
"logits/rejected": -1.3825923204421997,
"logps/chosen": -321.80316162109375,
"logps/rejected": -285.7908630371094,
"loss": 0.5304,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.2159234285354614,
"rewards/margins": 1.1465200185775757,
"rewards/rejected": 0.06940338760614395,
"step": 310
},
{
"epoch": 1.74,
"grad_norm": 1445.3559110776998,
"learning_rate": 5.099665152003929e-10,
"logits/chosen": -1.5921494960784912,
"logits/rejected": -1.3807857036590576,
"logps/chosen": -333.7308654785156,
"logps/rejected": -289.9362487792969,
"loss": 0.5241,
"rewards/accuracies": 0.7718750238418579,
"rewards/chosen": 1.3256893157958984,
"rewards/margins": 1.292041540145874,
"rewards/rejected": 0.03364778310060501,
"step": 320
},
{
"epoch": 1.79,
"grad_norm": 1681.5042999261696,
"learning_rate": 3.216903914633745e-10,
"logits/chosen": -1.5627129077911377,
"logits/rejected": -1.4408833980560303,
"logps/chosen": -325.2505187988281,
"logps/rejected": -296.106201171875,
"loss": 0.5429,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 1.165374517440796,
"rewards/margins": 1.0651426315307617,
"rewards/rejected": 0.1002318263053894,
"step": 330
},
{
"epoch": 1.85,
"grad_norm": 1536.75287567762,
"learning_rate": 1.7552634565570324e-10,
"logits/chosen": -1.5574743747711182,
"logits/rejected": -1.3901411294937134,
"logps/chosen": -329.89141845703125,
"logps/rejected": -292.8751525878906,
"loss": 0.5342,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": 1.4129165410995483,
"rewards/margins": 1.3112914562225342,
"rewards/rejected": 0.10162514448165894,
"step": 340
},
{
"epoch": 1.9,
"grad_norm": 1492.8399510840338,
"learning_rate": 7.279008199590543e-11,
"logits/chosen": -1.5503973960876465,
"logits/rejected": -1.3889100551605225,
"logps/chosen": -326.42120361328125,
"logps/rejected": -291.9585266113281,
"loss": 0.5261,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.3398044109344482,
"rewards/margins": 1.2421011924743652,
"rewards/rejected": 0.09770330041646957,
"step": 350
},
{
"epoch": 1.96,
"grad_norm": 1452.281513333118,
"learning_rate": 1.4406386978128017e-11,
"logits/chosen": -1.6207876205444336,
"logits/rejected": -1.424393653869629,
"logps/chosen": -331.06390380859375,
"logps/rejected": -291.6929626464844,
"loss": 0.5043,
"rewards/accuracies": 0.7906249761581421,
"rewards/chosen": 1.518845558166504,
"rewards/margins": 1.381410837173462,
"rewards/rejected": 0.13743488490581512,
"step": 360
},
{
"epoch": 2.0,
"step": 368,
"total_flos": 0.0,
"train_loss": 0.6161670185949492,
"train_runtime": 9955.6802,
"train_samples_per_second": 9.461,
"train_steps_per_second": 0.037
}
],
"logging_steps": 10,
"max_steps": 368,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}