zephyr-7b-dpo-full / trainer_state.json
wzhouad's picture
Model save
266fc39 verified
raw
history blame
No virus
17.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9975412715138743,
"eval_steps": 10000,
"global_step": 355,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.7777777777777774e-08,
"logits/chosen": -0.13174405694007874,
"logits/rejected": -0.027169257402420044,
"logps/chosen": -477.4691162109375,
"logps/rejected": -277.6482238769531,
"loss": 0.4106,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 2.7777777777777776e-07,
"logits/chosen": -0.0896572694182396,
"logits/rejected": -0.04708625003695488,
"logps/chosen": -334.1234130859375,
"logps/rejected": -264.19927978515625,
"loss": 0.4187,
"rewards/accuracies": 0.4027777910232544,
"rewards/chosen": -0.0011318529723212123,
"rewards/margins": -0.0006679879734292626,
"rewards/rejected": -0.0004638649697881192,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 5.555555555555555e-07,
"logits/chosen": -0.06541652977466583,
"logits/rejected": -0.027149802073836327,
"logps/chosen": -312.1936950683594,
"logps/rejected": -212.1822967529297,
"loss": 0.422,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.0018345726421102881,
"rewards/margins": 0.008850323967635632,
"rewards/rejected": -0.007015751209110022,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 8.333333333333333e-07,
"logits/chosen": -0.033993594348430634,
"logits/rejected": 0.014452556148171425,
"logps/chosen": -369.52886962890625,
"logps/rejected": -227.0442657470703,
"loss": 0.4287,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.012805985286831856,
"rewards/margins": 0.05532551556825638,
"rewards/rejected": -0.04251953214406967,
"step": 30
},
{
"epoch": 0.11,
"learning_rate": 9.99612097830993e-07,
"logits/chosen": -0.049494121223688126,
"logits/rejected": -0.007341804448515177,
"logps/chosen": -328.2823791503906,
"logps/rejected": -251.8525848388672,
"loss": 0.4553,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.03969588130712509,
"rewards/margins": 0.06737084686756134,
"rewards/rejected": -0.10706672817468643,
"step": 40
},
{
"epoch": 0.14,
"learning_rate": 9.952551076085863e-07,
"logits/chosen": -0.054784227162599564,
"logits/rejected": -0.018202614039182663,
"logps/chosen": -343.4543762207031,
"logps/rejected": -278.887451171875,
"loss": 0.478,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.029784226790070534,
"rewards/margins": 0.14676395058631897,
"rewards/rejected": -0.17654818296432495,
"step": 50
},
{
"epoch": 0.17,
"learning_rate": 9.860986139994238e-07,
"logits/chosen": -0.17503580451011658,
"logits/rejected": -0.10935833305120468,
"logps/chosen": -399.97161865234375,
"logps/rejected": -245.5420684814453,
"loss": 0.4856,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.0788765698671341,
"rewards/margins": 0.30753207206726074,
"rewards/rejected": -0.22865548729896545,
"step": 60
},
{
"epoch": 0.2,
"learning_rate": 9.722313523268027e-07,
"logits/chosen": -0.13078172504901886,
"logits/rejected": -0.018874743953347206,
"logps/chosen": -382.87396240234375,
"logps/rejected": -252.6072540283203,
"loss": 0.4667,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.09564249962568283,
"rewards/margins": 0.21217863261699677,
"rewards/rejected": -0.11653614044189453,
"step": 70
},
{
"epoch": 0.22,
"learning_rate": 9.537877098354784e-07,
"logits/chosen": 0.019111448898911476,
"logits/rejected": 0.04028189927339554,
"logps/chosen": -277.33154296875,
"logps/rejected": -215.5694122314453,
"loss": 0.4657,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": 0.01853206194937229,
"rewards/margins": 0.18713198602199554,
"rewards/rejected": -0.168599933385849,
"step": 80
},
{
"epoch": 0.25,
"learning_rate": 9.309464233486386e-07,
"logits/chosen": -0.184749573469162,
"logits/rejected": -0.12197474390268326,
"logps/chosen": -374.56268310546875,
"logps/rejected": -224.47860717773438,
"loss": 0.4724,
"rewards/accuracies": 0.71875,
"rewards/chosen": 0.19164375960826874,
"rewards/margins": 0.33850011229515076,
"rewards/rejected": -0.1468563675880432,
"step": 90
},
{
"epoch": 0.28,
"learning_rate": 9.039288471343504e-07,
"logits/chosen": -0.06358620524406433,
"logits/rejected": -0.022323714569211006,
"logps/chosen": -352.3625183105469,
"logps/rejected": -265.12457275390625,
"loss": 0.4579,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": 0.11637835204601288,
"rewards/margins": 0.15370506048202515,
"rewards/rejected": -0.03732669726014137,
"step": 100
},
{
"epoch": 0.31,
"learning_rate": 8.729968077675454e-07,
"logits/chosen": -0.16022691130638123,
"logits/rejected": -0.06551636755466461,
"logps/chosen": -304.0919189453125,
"logps/rejected": -257.5033874511719,
"loss": 0.4444,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": 0.09323982149362564,
"rewards/margins": 0.10607878863811493,
"rewards/rejected": -0.012838983908295631,
"step": 110
},
{
"epoch": 0.34,
"learning_rate": 8.384500667760089e-07,
"logits/chosen": -0.18294575810432434,
"logits/rejected": -0.1334661990404129,
"logps/chosen": -323.1263427734375,
"logps/rejected": -206.44387817382812,
"loss": 0.4535,
"rewards/accuracies": 0.65625,
"rewards/chosen": 0.17489886283874512,
"rewards/margins": 0.1883935183286667,
"rewards/rejected": -0.013494668528437614,
"step": 120
},
{
"epoch": 0.37,
"learning_rate": 8.006234156598042e-07,
"logits/chosen": -0.09687581658363342,
"logits/rejected": -0.0031311712227761745,
"logps/chosen": -361.1056823730469,
"logps/rejected": -219.41552734375,
"loss": 0.4484,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.12578140199184418,
"rewards/margins": 0.2744066119194031,
"rewards/rejected": -0.1486252248287201,
"step": 130
},
{
"epoch": 0.39,
"learning_rate": 7.59883431436215e-07,
"logits/chosen": -0.03516136482357979,
"logits/rejected": -0.005446717143058777,
"logps/chosen": -316.314208984375,
"logps/rejected": -241.97024536132812,
"loss": 0.4383,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.025916021317243576,
"rewards/margins": 0.16546496748924255,
"rewards/rejected": -0.13954894244670868,
"step": 140
},
{
"epoch": 0.42,
"learning_rate": 7.166249241521318e-07,
"logits/chosen": 0.0030886970926076174,
"logits/rejected": 0.06723493337631226,
"logps/chosen": -293.86627197265625,
"logps/rejected": -255.26492309570312,
"loss": 0.4382,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.05645722150802612,
"rewards/margins": 0.17538480460643768,
"rewards/rejected": -0.2318420112133026,
"step": 150
},
{
"epoch": 0.45,
"learning_rate": 6.712671107909358e-07,
"logits/chosen": -0.03268152475357056,
"logits/rejected": 0.12709534168243408,
"logps/chosen": -369.74859619140625,
"logps/rejected": -260.13128662109375,
"loss": 0.4255,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": 0.003464625682681799,
"rewards/margins": 0.2506219744682312,
"rewards/rejected": -0.24715733528137207,
"step": 160
},
{
"epoch": 0.48,
"learning_rate": 6.24249552652447e-07,
"logits/chosen": 0.04178273305296898,
"logits/rejected": 0.12335582822561264,
"logps/chosen": -316.4767150878906,
"logps/rejected": -268.18829345703125,
"loss": 0.4169,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.006209957879036665,
"rewards/margins": 0.20326288044452667,
"rewards/rejected": -0.20947282016277313,
"step": 170
},
{
"epoch": 0.51,
"learning_rate": 5.760278955766694e-07,
"logits/chosen": -0.12427058070898056,
"logits/rejected": 0.009830540046095848,
"logps/chosen": -327.13958740234375,
"logps/rejected": -258.3717041015625,
"loss": 0.4267,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.042253121733665466,
"rewards/margins": 0.19665148854255676,
"rewards/rejected": -0.23890459537506104,
"step": 180
},
{
"epoch": 0.53,
"learning_rate": 5.270694542927088e-07,
"logits/chosen": -0.16560761630535126,
"logits/rejected": -0.04073227569460869,
"logps/chosen": -341.1844482421875,
"logps/rejected": -227.2850341796875,
"loss": 0.4261,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.0071119763888418674,
"rewards/margins": 0.2952454090118408,
"rewards/rejected": -0.288133442401886,
"step": 190
},
{
"epoch": 0.56,
"learning_rate": 4.778486836848107e-07,
"logits/chosen": -0.007979141548275948,
"logits/rejected": 0.1243690699338913,
"logps/chosen": -329.4273986816406,
"logps/rejected": -260.57806396484375,
"loss": 0.4096,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.19496320188045502,
"rewards/margins": 0.16824397444725037,
"rewards/rejected": -0.3632071614265442,
"step": 200
},
{
"epoch": 0.59,
"learning_rate": 4.2884258086335745e-07,
"logits/chosen": 0.09336410462856293,
"logits/rejected": 0.19506987929344177,
"logps/chosen": -391.4615173339844,
"logps/rejected": -279.2521057128906,
"loss": 0.4003,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.11562051624059677,
"rewards/margins": 0.245724156498909,
"rewards/rejected": -0.36134466528892517,
"step": 210
},
{
"epoch": 0.62,
"learning_rate": 3.8052606259922095e-07,
"logits/chosen": -0.16688141226768494,
"logits/rejected": -0.08500812947750092,
"logps/chosen": -362.3302917480469,
"logps/rejected": -247.5942840576172,
"loss": 0.4244,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.07475811243057251,
"rewards/margins": 0.20620782673358917,
"rewards/rejected": -0.28096598386764526,
"step": 220
},
{
"epoch": 0.65,
"learning_rate": 3.333673629186279e-07,
"logits/chosen": -0.02717510424554348,
"logits/rejected": 0.12363864481449127,
"logps/chosen": -332.33319091796875,
"logps/rejected": -247.22817993164062,
"loss": 0.4115,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.08057795464992523,
"rewards/margins": 0.2381751984357834,
"rewards/rejected": -0.3187531530857086,
"step": 230
},
{
"epoch": 0.67,
"learning_rate": 2.878234954603167e-07,
"logits/chosen": 0.033598482608795166,
"logits/rejected": 0.18793973326683044,
"logps/chosen": -381.00506591796875,
"logps/rejected": -270.8756103515625,
"loss": 0.3798,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.12533587217330933,
"rewards/margins": 0.24204333126544952,
"rewards/rejected": -0.36737921833992004,
"step": 240
},
{
"epoch": 0.7,
"learning_rate": 2.443358245691555e-07,
"logits/chosen": 0.030673842877149582,
"logits/rejected": 0.18992134928703308,
"logps/chosen": -383.7073059082031,
"logps/rejected": -261.9964294433594,
"loss": 0.3877,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.09187673032283783,
"rewards/margins": 0.3298332989215851,
"rewards/rejected": -0.4217100143432617,
"step": 250
},
{
"epoch": 0.73,
"learning_rate": 2.0332578804662782e-07,
"logits/chosen": 0.024305405095219612,
"logits/rejected": 0.132650688290596,
"logps/chosen": -368.91131591796875,
"logps/rejected": -269.9962463378906,
"loss": 0.4026,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.14426225423812866,
"rewards/margins": 0.2727457880973816,
"rewards/rejected": -0.41700801253318787,
"step": 260
},
{
"epoch": 0.76,
"learning_rate": 1.651908130088947e-07,
"logits/chosen": 0.13495397567749023,
"logits/rejected": 0.21630129218101501,
"logps/chosen": -346.6638488769531,
"logps/rejected": -274.6488952636719,
"loss": 0.3821,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.25730255246162415,
"rewards/margins": 0.2660498023033142,
"rewards/rejected": -0.5233522653579712,
"step": 270
},
{
"epoch": 0.79,
"learning_rate": 1.3030046443173442e-07,
"logits/chosen": 0.12753400206565857,
"logits/rejected": 0.26089444756507874,
"logps/chosen": -396.9707336425781,
"logps/rejected": -273.0684814453125,
"loss": 0.4015,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.19528909027576447,
"rewards/margins": 0.3000025451183319,
"rewards/rejected": -0.49529165029525757,
"step": 280
},
{
"epoch": 0.81,
"learning_rate": 9.899286370670574e-08,
"logits/chosen": 0.18344645202159882,
"logits/rejected": 0.3353565037250519,
"logps/chosen": -358.2286376953125,
"logps/rejected": -288.5234680175781,
"loss": 0.4025,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.28220534324645996,
"rewards/margins": 0.19654087722301483,
"rewards/rejected": -0.478746235370636,
"step": 290
},
{
"epoch": 0.84,
"learning_rate": 7.157141191620548e-08,
"logits/chosen": 0.0641961470246315,
"logits/rejected": 0.2366667091846466,
"logps/chosen": -380.06103515625,
"logps/rejected": -267.75213623046875,
"loss": 0.3997,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.13748347759246826,
"rewards/margins": 0.3018878996372223,
"rewards/rejected": -0.43937140703201294,
"step": 300
},
{
"epoch": 0.87,
"learning_rate": 4.830184958207006e-08,
"logits/chosen": 0.03403336927294731,
"logits/rejected": 0.19396355748176575,
"logps/chosen": -347.7532653808594,
"logps/rejected": -284.3299865722656,
"loss": 0.4026,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.16916589438915253,
"rewards/margins": 0.25709637999534607,
"rewards/rejected": -0.4262623190879822,
"step": 310
},
{
"epoch": 0.9,
"learning_rate": 2.940968138161731e-08,
"logits/chosen": 0.11429999023675919,
"logits/rejected": 0.17834721505641937,
"logps/chosen": -330.48284912109375,
"logps/rejected": -274.64727783203125,
"loss": 0.3982,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.1756475865840912,
"rewards/margins": 0.1860581338405609,
"rewards/rejected": -0.3617057204246521,
"step": 320
},
{
"epoch": 0.93,
"learning_rate": 1.507799078812799e-08,
"logits/chosen": -0.007492154836654663,
"logits/rejected": 0.10714348405599594,
"logps/chosen": -425.7674865722656,
"logps/rejected": -314.00860595703125,
"loss": 0.3971,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.17962414026260376,
"rewards/margins": 0.292041152715683,
"rewards/rejected": -0.47166532278060913,
"step": 330
},
{
"epoch": 0.96,
"learning_rate": 5.445665814031941e-09,
"logits/chosen": 0.06636445224285126,
"logits/rejected": 0.1753680408000946,
"logps/chosen": -369.9612731933594,
"logps/rejected": -278.1523132324219,
"loss": 0.4052,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.18568792939186096,
"rewards/margins": 0.26998209953308105,
"rewards/rejected": -0.4556700587272644,
"step": 340
},
{
"epoch": 0.98,
"learning_rate": 6.060530510659245e-10,
"logits/chosen": 0.03166942670941353,
"logits/rejected": 0.10389814525842667,
"logps/chosen": -357.81988525390625,
"logps/rejected": -275.1876525878906,
"loss": 0.4038,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.20625650882720947,
"rewards/margins": 0.26138800382614136,
"rewards/rejected": -0.46764451265335083,
"step": 350
},
{
"epoch": 1.0,
"step": 355,
"total_flos": 0.0,
"train_loss": 0.4252443082735572,
"train_runtime": 5295.2592,
"train_samples_per_second": 8.602,
"train_steps_per_second": 0.067
}
],
"logging_steps": 10,
"max_steps": 355,
"num_train_epochs": 1,
"save_steps": 10000,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}