zephyr-7b-dpo-full / trainer_state.json
wzhouad's picture
Model save
8c87b08 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.998691442030882,
"eval_steps": 10000,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": 0.17655496299266815,
"logits/rejected": 0.2531452775001526,
"logps/chosen": -354.29669189453125,
"logps/rejected": -305.259765625,
"loss": 0.5,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": 0.0010361697059124708,
"rewards/margins": 0.0014542521676048636,
"rewards/rejected": -0.00041808263631537557,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": 0.07140998542308807,
"logits/rejected": 0.19915328919887543,
"logps/chosen": -316.61407470703125,
"logps/rejected": -276.1783142089844,
"loss": 0.4997,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.001211934955790639,
"rewards/margins": 0.00264042429625988,
"rewards/rejected": -0.0014284893404692411,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 6.249999999999999e-07,
"logits/chosen": 0.1830858290195465,
"logits/rejected": 0.25493288040161133,
"logps/chosen": -294.3023376464844,
"logps/rejected": -298.47430419921875,
"loss": 0.4979,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.00664560217410326,
"rewards/margins": 0.008408578112721443,
"rewards/rejected": -0.0017629768699407578,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 8.333333333333333e-07,
"logits/chosen": 0.1198926791548729,
"logits/rejected": 0.2388772964477539,
"logps/chosen": -343.3688659667969,
"logps/rejected": -318.56866455078125,
"loss": 0.4944,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.04203338176012039,
"rewards/margins": 0.023049216717481613,
"rewards/rejected": 0.01898416317999363,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 9.999463737538052e-07,
"logits/chosen": 0.19016575813293457,
"logits/rejected": 0.2768324613571167,
"logps/chosen": -305.9139709472656,
"logps/rejected": -285.70263671875,
"loss": 0.4888,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.0463864728808403,
"rewards/margins": 0.06659023463726044,
"rewards/rejected": -0.02020375430583954,
"step": 50
},
{
"epoch": 0.13,
"learning_rate": 9.980706626858607e-07,
"logits/chosen": 0.1583642065525055,
"logits/rejected": 0.2964373230934143,
"logps/chosen": -292.2091979980469,
"logps/rejected": -283.33062744140625,
"loss": 0.4823,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 0.006695735268294811,
"rewards/margins": 0.08554854989051819,
"rewards/rejected": -0.0788528248667717,
"step": 60
},
{
"epoch": 0.15,
"learning_rate": 9.935251313189563e-07,
"logits/chosen": 0.1668189913034439,
"logits/rejected": 0.25383955240249634,
"logps/chosen": -330.51483154296875,
"logps/rejected": -332.74249267578125,
"loss": 0.476,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.007911129854619503,
"rewards/margins": 0.13003569841384888,
"rewards/rejected": -0.13794682919979095,
"step": 70
},
{
"epoch": 0.17,
"learning_rate": 9.86334145175542e-07,
"logits/chosen": 0.22892770171165466,
"logits/rejected": 0.32262876629829407,
"logps/chosen": -326.62847900390625,
"logps/rejected": -321.47064208984375,
"loss": 0.4678,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.07964827120304108,
"rewards/margins": 0.2643834054470062,
"rewards/rejected": -0.3440317213535309,
"step": 80
},
{
"epoch": 0.19,
"learning_rate": 9.765362502737097e-07,
"logits/chosen": 0.12489993870258331,
"logits/rejected": 0.2657889425754547,
"logps/chosen": -358.5821838378906,
"logps/rejected": -333.71466064453125,
"loss": 0.4612,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.21640650928020477,
"rewards/margins": 0.4499947130680084,
"rewards/rejected": -0.6664012670516968,
"step": 90
},
{
"epoch": 0.21,
"learning_rate": 9.641839665080363e-07,
"logits/chosen": 0.2374851256608963,
"logits/rejected": 0.4098134934902191,
"logps/chosen": -378.7792053222656,
"logps/rejected": -408.1399841308594,
"loss": 0.4512,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.44217753410339355,
"rewards/margins": 0.715401291847229,
"rewards/rejected": -1.157578706741333,
"step": 100
},
{
"epoch": 0.23,
"learning_rate": 9.493435061259129e-07,
"logits/chosen": 0.29897215962409973,
"logits/rejected": 0.34014248847961426,
"logps/chosen": -395.0293884277344,
"logps/rejected": -461.2764587402344,
"loss": 0.4418,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.8240998983383179,
"rewards/margins": 0.7941638231277466,
"rewards/rejected": -1.618263602256775,
"step": 110
},
{
"epoch": 0.25,
"learning_rate": 9.320944188084241e-07,
"logits/chosen": 0.18543429672718048,
"logits/rejected": 0.282682329416275,
"logps/chosen": -440.6853942871094,
"logps/rejected": -526.3844604492188,
"loss": 0.4495,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -1.201317548751831,
"rewards/margins": 0.8505627512931824,
"rewards/rejected": -2.051880359649658,
"step": 120
},
{
"epoch": 0.27,
"learning_rate": 9.125291652582547e-07,
"logits/chosen": 0.10988249629735947,
"logits/rejected": 0.2532512843608856,
"logps/chosen": -429.30322265625,
"logps/rejected": -460.0655822753906,
"loss": 0.4407,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.9080715179443359,
"rewards/margins": 0.8440803289413452,
"rewards/rejected": -1.7521518468856812,
"step": 130
},
{
"epoch": 0.29,
"learning_rate": 8.90752621580335e-07,
"logits/chosen": 0.05259154364466667,
"logits/rejected": 0.20351815223693848,
"logps/chosen": -478.1226501464844,
"logps/rejected": -552.33154296875,
"loss": 0.4381,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -1.4607925415039062,
"rewards/margins": 1.3634538650512695,
"rewards/rejected": -2.8242461681365967,
"step": 140
},
{
"epoch": 0.31,
"learning_rate": 8.668815171119019e-07,
"logits/chosen": 0.1267194300889969,
"logits/rejected": 0.16065822541713715,
"logps/chosen": -432.47418212890625,
"logps/rejected": -556.4413452148438,
"loss": 0.4373,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.9717355966567993,
"rewards/margins": 1.443182110786438,
"rewards/rejected": -2.4149177074432373,
"step": 150
},
{
"epoch": 0.33,
"learning_rate": 8.410438087153911e-07,
"logits/chosen": 0.05742305517196655,
"logits/rejected": 0.03335579112172127,
"logps/chosen": -386.4638366699219,
"logps/rejected": -537.6171264648438,
"loss": 0.4335,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.8440232276916504,
"rewards/margins": 1.7251598834991455,
"rewards/rejected": -2.569182872772217,
"step": 160
},
{
"epoch": 0.36,
"learning_rate": 8.133779948881513e-07,
"logits/chosen": 0.04388447850942612,
"logits/rejected": 0.06478340178728104,
"logps/chosen": -450.94049072265625,
"logps/rejected": -571.2717895507812,
"loss": 0.4268,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -1.1457209587097168,
"rewards/margins": 1.4885038137435913,
"rewards/rejected": -2.6342251300811768,
"step": 170
},
{
"epoch": 0.38,
"learning_rate": 7.840323733655778e-07,
"logits/chosen": 0.03801240772008896,
"logits/rejected": 0.0668804943561554,
"logps/chosen": -415.9105529785156,
"logps/rejected": -594.4246826171875,
"loss": 0.426,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.8987852931022644,
"rewards/margins": 2.0467095375061035,
"rewards/rejected": -2.9454948902130127,
"step": 180
},
{
"epoch": 0.4,
"learning_rate": 7.531642461971514e-07,
"logits/chosen": 0.12394122779369354,
"logits/rejected": 0.07622597366571426,
"logps/chosen": -482.99774169921875,
"logps/rejected": -617.9317626953125,
"loss": 0.4148,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.4833831787109375,
"rewards/margins": 1.5686841011047363,
"rewards/rejected": -3.052067279815674,
"step": 190
},
{
"epoch": 0.42,
"learning_rate": 7.209390765564318e-07,
"logits/chosen": 0.12547728419303894,
"logits/rejected": 0.039741553366184235,
"logps/chosen": -470.0662536621094,
"logps/rejected": -810.3030395507812,
"loss": 0.4152,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -1.2957651615142822,
"rewards/margins": 3.8659985065460205,
"rewards/rejected": -5.1617631912231445,
"step": 200
},
{
"epoch": 0.44,
"learning_rate": 6.875296018047809e-07,
"logits/chosen": 0.20153549313545227,
"logits/rejected": 0.1317548155784607,
"logps/chosen": -447.82562255859375,
"logps/rejected": -725.8985595703125,
"loss": 0.4249,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -1.3366836309432983,
"rewards/margins": 3.223564863204956,
"rewards/rejected": -4.560248374938965,
"step": 210
},
{
"epoch": 0.46,
"learning_rate": 6.531149075630796e-07,
"logits/chosen": -0.017775116488337517,
"logits/rejected": 0.05367380380630493,
"logps/chosen": -476.78790283203125,
"logps/rejected": -663.9365844726562,
"loss": 0.4167,
"rewards/accuracies": 0.6875,
"rewards/chosen": -1.6010878086090088,
"rewards/margins": 2.401573419570923,
"rewards/rejected": -4.002661228179932,
"step": 220
},
{
"epoch": 0.48,
"learning_rate": 6.178794677547137e-07,
"logits/chosen": 0.07326556742191315,
"logits/rejected": -0.006058653350919485,
"logps/chosen": -590.01123046875,
"logps/rejected": -870.9129028320312,
"loss": 0.4193,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -2.6394991874694824,
"rewards/margins": 3.238422393798828,
"rewards/rejected": -5.8779215812683105,
"step": 230
},
{
"epoch": 0.5,
"learning_rate": 5.820121557655108e-07,
"logits/chosen": 0.13632330298423767,
"logits/rejected": 0.12085568904876709,
"logps/chosen": -450.1314392089844,
"logps/rejected": -587.374267578125,
"loss": 0.425,
"rewards/accuracies": 0.65625,
"rewards/chosen": -1.3817965984344482,
"rewards/margins": 1.4728713035583496,
"rewards/rejected": -2.854668140411377,
"step": 240
},
{
"epoch": 0.52,
"learning_rate": 5.457052320211339e-07,
"logits/chosen": 0.09744735062122345,
"logits/rejected": -0.04311475530266762,
"logps/chosen": -561.7251586914062,
"logps/rejected": -1082.66064453125,
"loss": 0.4126,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -2.4967703819274902,
"rewards/margins": 5.509397029876709,
"rewards/rejected": -8.006166458129883,
"step": 250
},
{
"epoch": 0.54,
"learning_rate": 5.091533134088387e-07,
"logits/chosen": 0.007685136049985886,
"logits/rejected": -0.026540469378232956,
"logps/chosen": -681.2808837890625,
"logps/rejected": -1102.198486328125,
"loss": 0.4237,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -3.266371250152588,
"rewards/margins": 4.754992485046387,
"rewards/rejected": -8.021364212036133,
"step": 260
},
{
"epoch": 0.57,
"learning_rate": 4.7255233006783624e-07,
"logits/chosen": 0.24146917462348938,
"logits/rejected": 0.05772332474589348,
"logps/chosen": -437.0887756347656,
"logps/rejected": -754.1742553710938,
"loss": 0.409,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -1.0795494318008423,
"rewards/margins": 3.357706069946289,
"rewards/rejected": -4.437255859375,
"step": 270
},
{
"epoch": 0.59,
"learning_rate": 4.3609847514019763e-07,
"logits/chosen": 0.15583154559135437,
"logits/rejected": -0.01679980382323265,
"logps/chosen": -622.4188232421875,
"logps/rejected": -1143.203857421875,
"loss": 0.4172,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -3.2593586444854736,
"rewards/margins": 5.262009143829346,
"rewards/rejected": -8.521368980407715,
"step": 280
},
{
"epoch": 0.61,
"learning_rate": 3.9998715311197783e-07,
"logits/chosen": 0.12384140491485596,
"logits/rejected": -0.03689634054899216,
"logps/chosen": -612.9854736328125,
"logps/rejected": -1161.8275146484375,
"loss": 0.4065,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -2.665544033050537,
"rewards/margins": 5.724797248840332,
"rewards/rejected": -8.390340805053711,
"step": 290
},
{
"epoch": 0.63,
"learning_rate": 3.6441193238179146e-07,
"logits/chosen": 0.23247964680194855,
"logits/rejected": 0.08442293107509613,
"logps/chosen": -644.8258056640625,
"logps/rejected": -1333.277099609375,
"loss": 0.4067,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -3.5307083129882812,
"rewards/margins": 6.568638801574707,
"rewards/rejected": -10.099346160888672,
"step": 300
},
{
"epoch": 0.65,
"learning_rate": 3.295635076714144e-07,
"logits/chosen": 0.21653930842876434,
"logits/rejected": -0.010667298920452595,
"logps/chosen": -576.2736206054688,
"logps/rejected": -1167.0555419921875,
"loss": 0.4003,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -2.805418014526367,
"rewards/margins": 5.748055458068848,
"rewards/rejected": -8.553472518920898,
"step": 310
},
{
"epoch": 0.67,
"learning_rate": 2.956286778402226e-07,
"logits/chosen": 0.14956721663475037,
"logits/rejected": -0.00617391150444746,
"logps/chosen": -499.51556396484375,
"logps/rejected": -1073.225830078125,
"loss": 0.4081,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -1.8388452529907227,
"rewards/margins": 5.99139928817749,
"rewards/rejected": -7.830244541168213,
"step": 320
},
{
"epoch": 0.69,
"learning_rate": 2.6278934458271996e-07,
"logits/chosen": 0.20027479529380798,
"logits/rejected": 0.06552217900753021,
"logps/chosen": -461.4195861816406,
"logps/rejected": -1150.258544921875,
"loss": 0.4027,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.6265113353729248,
"rewards/margins": 6.768563270568848,
"rewards/rejected": -8.395073890686035,
"step": 330
},
{
"epoch": 0.71,
"learning_rate": 2.312215373764551e-07,
"logits/chosen": 0.1772742122411728,
"logits/rejected": 0.058857548981904984,
"logps/chosen": -519.1689453125,
"logps/rejected": -1075.103759765625,
"loss": 0.4056,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -2.324723720550537,
"rewards/margins": 5.8179826736450195,
"rewards/rejected": -8.142705917358398,
"step": 340
},
{
"epoch": 0.73,
"learning_rate": 2.0109446990692963e-07,
"logits/chosen": 0.09322932362556458,
"logits/rejected": -0.021080341190099716,
"logps/chosen": -524.8082275390625,
"logps/rejected": -1263.429443359375,
"loss": 0.404,
"rewards/accuracies": 0.65625,
"rewards/chosen": -2.233060121536255,
"rewards/margins": 7.2954888343811035,
"rewards/rejected": -9.528549194335938,
"step": 350
},
{
"epoch": 0.75,
"learning_rate": 1.725696330273575e-07,
"logits/chosen": 0.12329642474651337,
"logits/rejected": -0.045363299548625946,
"logps/chosen": -477.84747314453125,
"logps/rejected": -1159.287353515625,
"loss": 0.3987,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.4969019889831543,
"rewards/margins": 7.274144172668457,
"rewards/rejected": -8.77104663848877,
"step": 360
},
{
"epoch": 0.77,
"learning_rate": 1.4579992911531496e-07,
"logits/chosen": 0.13813820481300354,
"logits/rejected": 0.06726070493459702,
"logps/chosen": -596.8673706054688,
"logps/rejected": -1229.910888671875,
"loss": 0.3989,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -2.484358787536621,
"rewards/margins": 6.708567142486572,
"rewards/rejected": -9.192926406860352,
"step": 370
},
{
"epoch": 0.8,
"learning_rate": 1.209288524664029e-07,
"logits/chosen": 0.2262219935655594,
"logits/rejected": 0.04883592948317528,
"logps/chosen": -571.9241333007812,
"logps/rejected": -1147.636474609375,
"loss": 0.3965,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -2.7885093688964844,
"rewards/margins": 5.966012954711914,
"rewards/rejected": -8.754522323608398,
"step": 380
},
{
"epoch": 0.82,
"learning_rate": 9.808972011828054e-08,
"logits/chosen": 0.13919615745544434,
"logits/rejected": 0.08005174249410629,
"logps/chosen": -603.2689208984375,
"logps/rejected": -1278.978271484375,
"loss": 0.3993,
"rewards/accuracies": 0.75,
"rewards/chosen": -2.6157753467559814,
"rewards/margins": 7.164151668548584,
"rewards/rejected": -9.779927253723145,
"step": 390
},
{
"epoch": 0.84,
"learning_rate": 7.740495722810269e-08,
"logits/chosen": 0.1855761706829071,
"logits/rejected": 0.03339262679219246,
"logps/chosen": -554.6050415039062,
"logps/rejected": -1247.11474609375,
"loss": 0.4064,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -2.375999927520752,
"rewards/margins": 7.147269248962402,
"rewards/rejected": -9.523270606994629,
"step": 400
},
{
"epoch": 0.86,
"learning_rate": 5.898544083397e-08,
"logits/chosen": 0.10612723976373672,
"logits/rejected": -0.03204170614480972,
"logps/chosen": -598.8375244140625,
"logps/rejected": -1218.921142578125,
"loss": 0.4009,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -3.0963997840881348,
"rewards/margins": 6.186778545379639,
"rewards/rejected": -9.283178329467773,
"step": 410
},
{
"epoch": 0.88,
"learning_rate": 4.292990551804171e-08,
"logits/chosen": 0.3134514391422272,
"logits/rejected": 0.1133495420217514,
"logps/chosen": -560.297607421875,
"logps/rejected": -1385.083251953125,
"loss": 0.3991,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -2.669637680053711,
"rewards/margins": 8.3246488571167,
"rewards/rejected": -10.994285583496094,
"step": 420
},
{
"epoch": 0.9,
"learning_rate": 2.9324414157151367e-08,
"logits/chosen": 0.14708609879016876,
"logits/rejected": 0.05113764852285385,
"logps/chosen": -646.3408203125,
"logps/rejected": -1521.79345703125,
"loss": 0.3999,
"rewards/accuracies": 0.75,
"rewards/chosen": -3.122638702392578,
"rewards/margins": 8.8574800491333,
"rewards/rejected": -11.980117797851562,
"step": 430
},
{
"epoch": 0.92,
"learning_rate": 1.824189659787284e-08,
"logits/chosen": 0.19891302287578583,
"logits/rejected": 0.057393454015254974,
"logps/chosen": -530.86865234375,
"logps/rejected": -1372.778076171875,
"loss": 0.3979,
"rewards/accuracies": 0.6875,
"rewards/chosen": -2.4631145000457764,
"rewards/margins": 8.486894607543945,
"rewards/rejected": -10.950007438659668,
"step": 440
},
{
"epoch": 0.94,
"learning_rate": 9.741758728888217e-09,
"logits/chosen": 0.20876403152942657,
"logits/rejected": 0.052755843847990036,
"logps/chosen": -683.3274536132812,
"logps/rejected": -1404.552978515625,
"loss": 0.3915,
"rewards/accuracies": 0.71875,
"rewards/chosen": -3.3337600231170654,
"rewards/margins": 7.872265815734863,
"rewards/rejected": -11.206026077270508,
"step": 450
},
{
"epoch": 0.96,
"learning_rate": 3.869564046156459e-09,
"logits/chosen": 0.2985457181930542,
"logits/rejected": 0.15650448203086853,
"logps/chosen": -468.8932189941406,
"logps/rejected": -1197.56201171875,
"loss": 0.3987,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -1.7095565795898438,
"rewards/margins": 7.608504295349121,
"rewards/rejected": -9.318059921264648,
"step": 460
},
{
"epoch": 0.98,
"learning_rate": 6.567894177967325e-10,
"logits/chosen": 0.17393910884857178,
"logits/rejected": 0.02789122983813286,
"logps/chosen": -607.3438720703125,
"logps/rejected": -1505.235595703125,
"loss": 0.3978,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -2.6819469928741455,
"rewards/margins": 9.39558219909668,
"rewards/rejected": -12.07752799987793,
"step": 470
},
{
"epoch": 1.0,
"step": 477,
"total_flos": 0.0,
"train_loss": 0.42718374404267445,
"train_runtime": 6325.1171,
"train_samples_per_second": 9.665,
"train_steps_per_second": 0.075
}
],
"logging_steps": 10,
"max_steps": 477,
"num_train_epochs": 1,
"save_steps": 10000,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}