qwen2.5-0.5b-expo-DPO-noES3-0.1 / trainer_state.json
hZzy's picture
Model save
7894361 verified
raw
history blame
70.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.995276334435522,
"eval_steps": 50,
"global_step": 704,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 18.397043918777676,
"learning_rate": 7.042253521126761e-08,
"logits": -1.2867579460144043,
"logps": -84.34933471679688,
"loss": 0.6931,
"objective": 0.6931471824645996,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6931471824645996,
"step": 1,
"wo_beta": 5.2708282470703125
},
{
"dpo_loss": 0.6924155354499817,
"epoch": 0.014170996693434105,
"grad_norm": 18.351841049696436,
"learning_rate": 3.521126760563381e-07,
"logits": -1.430939793586731,
"logps": -83.6253890991211,
"loss": 0.6927,
"objective": 0.6924155354499817,
"ranking_simple": 0.4895833432674408,
"regularize": 0.6924155354499817,
"step": 5,
"wo_beta": 7.649607181549072
},
{
"dpo_loss": 0.6938675045967102,
"epoch": 0.02834199338686821,
"grad_norm": 19.803358997748763,
"learning_rate": 7.042253521126762e-07,
"logits": -1.4047328233718872,
"logps": -83.41845703125,
"loss": 0.6929,
"objective": 0.6938675045967102,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6938675045967102,
"step": 10,
"wo_beta": 5.940184593200684
},
{
"dpo_loss": 0.6885419487953186,
"epoch": 0.042512990080302314,
"grad_norm": 19.861932182637975,
"learning_rate": 1.0563380281690142e-06,
"logits": -1.5388954877853394,
"logps": -82.4270248413086,
"loss": 0.689,
"objective": 0.6885419487953186,
"ranking_simple": 0.5708333253860474,
"regularize": 0.6885419487953186,
"step": 15,
"wo_beta": 6.731040000915527
},
{
"dpo_loss": 0.6827310919761658,
"epoch": 0.05668398677373642,
"grad_norm": 16.87924772685924,
"learning_rate": 1.4084507042253523e-06,
"logits": -1.3925108909606934,
"logps": -80.71045684814453,
"loss": 0.684,
"objective": 0.6827310919761658,
"ranking_simple": 0.4833333194255829,
"regularize": 0.6827310919761658,
"step": 20,
"wo_beta": 8.312360763549805
},
{
"dpo_loss": 0.6760162711143494,
"epoch": 0.07085498346717052,
"grad_norm": 17.86836459306927,
"learning_rate": 1.7605633802816902e-06,
"logits": -1.4580414295196533,
"logps": -80.7186508178711,
"loss": 0.6703,
"objective": 0.6760162711143494,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6760162711143494,
"step": 25,
"wo_beta": 6.106756210327148
},
{
"dpo_loss": 0.6718389391899109,
"epoch": 0.08502598016060463,
"grad_norm": 19.355249838601942,
"learning_rate": 2.1126760563380285e-06,
"logits": -1.6006718873977661,
"logps": -84.822021484375,
"loss": 0.6727,
"objective": 0.6718389391899109,
"ranking_simple": 0.4958333373069763,
"regularize": 0.6718389391899109,
"step": 30,
"wo_beta": 8.509283065795898
},
{
"dpo_loss": 0.6751564145088196,
"epoch": 0.09919697685403873,
"grad_norm": 19.23258032194357,
"learning_rate": 2.4647887323943666e-06,
"logits": -1.6175826787948608,
"logps": -85.66646575927734,
"loss": 0.6624,
"objective": 0.6751564145088196,
"ranking_simple": 0.5416666865348816,
"regularize": 0.6751564145088196,
"step": 35,
"wo_beta": 6.50329065322876
},
{
"dpo_loss": 0.6747376322746277,
"epoch": 0.11336797354747284,
"grad_norm": 20.018488756494,
"learning_rate": 2.8169014084507046e-06,
"logits": -1.621884822845459,
"logps": -84.25798797607422,
"loss": 0.6598,
"objective": 0.6747376322746277,
"ranking_simple": 0.4791666567325592,
"regularize": 0.6747376322746277,
"step": 40,
"wo_beta": 6.886596202850342
},
{
"dpo_loss": 0.6456737518310547,
"epoch": 0.12753897024090693,
"grad_norm": 14.933918801623976,
"learning_rate": 3.1690140845070427e-06,
"logits": -1.6078789234161377,
"logps": -81.69025421142578,
"loss": 0.6563,
"objective": 0.6456737518310547,
"ranking_simple": 0.5166666507720947,
"regularize": 0.6456737518310547,
"step": 45,
"wo_beta": 6.529275417327881
},
{
"dpo_loss": 0.6442943811416626,
"epoch": 0.14170996693434104,
"grad_norm": 16.57958550585861,
"learning_rate": 3.5211267605633804e-06,
"logits": -1.6018227338790894,
"logps": -82.79893493652344,
"loss": 0.6316,
"objective": 0.6442943811416626,
"ranking_simple": 0.5375000238418579,
"regularize": 0.6442943811416626,
"step": 50,
"wo_beta": 7.091952323913574
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.6824604868888855,
"eval_logits": -1.587925672531128,
"eval_logps": -90.32820892333984,
"eval_loss": 0.6806595921516418,
"eval_objective": 0.6824604868888855,
"eval_ranking_simple": 0.5341615080833435,
"eval_regularize": 0.6824604868888855,
"eval_runtime": 367.8191,
"eval_samples_per_second": 15.741,
"eval_steps_per_second": 1.313,
"eval_wo_beta": 7.8618669509887695,
"step": 50
},
{
"dpo_loss": 0.6267839074134827,
"epoch": 0.15588096362777515,
"grad_norm": 17.05808296065035,
"learning_rate": 3.873239436619718e-06,
"logits": -1.5757447481155396,
"logps": -84.72220611572266,
"loss": 0.6258,
"objective": 0.6267839074134827,
"ranking_simple": 0.5208333134651184,
"regularize": 0.6267839074134827,
"step": 55,
"wo_beta": 6.998147487640381
},
{
"dpo_loss": 0.6304137110710144,
"epoch": 0.17005196032120926,
"grad_norm": 17.359597890876504,
"learning_rate": 4.225352112676057e-06,
"logits": -1.5341871976852417,
"logps": -87.07388305664062,
"loss": 0.6204,
"objective": 0.6304137110710144,
"ranking_simple": 0.5708333253860474,
"regularize": 0.6304137110710144,
"step": 60,
"wo_beta": 5.637550354003906
},
{
"dpo_loss": 0.5928328633308411,
"epoch": 0.18422295701464336,
"grad_norm": 18.81033297245387,
"learning_rate": 4.577464788732395e-06,
"logits": -1.681604266166687,
"logps": -88.5802001953125,
"loss": 0.6021,
"objective": 0.5928328633308411,
"ranking_simple": 0.6041666865348816,
"regularize": 0.5928328633308411,
"step": 65,
"wo_beta": 5.803915023803711
},
{
"dpo_loss": 0.5882770419120789,
"epoch": 0.19839395370807747,
"grad_norm": 18.134916504512542,
"learning_rate": 4.929577464788733e-06,
"logits": -1.7763893604278564,
"logps": -92.76559448242188,
"loss": 0.5898,
"objective": 0.5882770419120789,
"ranking_simple": 0.5874999761581421,
"regularize": 0.5882770419120789,
"step": 70,
"wo_beta": 6.3991007804870605
},
{
"dpo_loss": 0.5995556712150574,
"epoch": 0.21256495040151158,
"grad_norm": 21.061539129772793,
"learning_rate": 4.999507384516835e-06,
"logits": -1.727323055267334,
"logps": -92.68614196777344,
"loss": 0.5904,
"objective": 0.5995556712150574,
"ranking_simple": 0.5874999761581421,
"regularize": 0.5995556712150574,
"step": 75,
"wo_beta": 6.095489978790283
},
{
"dpo_loss": 0.5429711937904358,
"epoch": 0.22673594709494568,
"grad_norm": 17.500250856825723,
"learning_rate": 4.997506466835171e-06,
"logits": -1.7568250894546509,
"logps": -89.59777069091797,
"loss": 0.5696,
"objective": 0.5429711937904358,
"ranking_simple": 0.6625000238418579,
"regularize": 0.5429711937904358,
"step": 80,
"wo_beta": 4.702788829803467
},
{
"dpo_loss": 0.5587875247001648,
"epoch": 0.2409069437883798,
"grad_norm": 14.610779362874785,
"learning_rate": 4.9939676896203576e-06,
"logits": -1.7483054399490356,
"logps": -89.5750961303711,
"loss": 0.5788,
"objective": 0.5587875247001648,
"ranking_simple": 0.6291666626930237,
"regularize": 0.5587875247001648,
"step": 85,
"wo_beta": 5.067751407623291
},
{
"dpo_loss": 0.4884372353553772,
"epoch": 0.25507794048181387,
"grad_norm": 16.59773892580396,
"learning_rate": 4.9888932319026994e-06,
"logits": -1.829767107963562,
"logps": -88.12342071533203,
"loss": 0.5584,
"objective": 0.4884372353553772,
"ranking_simple": 0.6541666388511658,
"regularize": 0.4884372353553772,
"step": 90,
"wo_beta": 4.8033833503723145
},
{
"dpo_loss": 0.5380887985229492,
"epoch": 0.269248937175248,
"grad_norm": 16.942898511925186,
"learning_rate": 4.982286218320023e-06,
"logits": -1.8157219886779785,
"logps": -89.4225082397461,
"loss": 0.548,
"objective": 0.5380887985229492,
"ranking_simple": 0.6208333373069763,
"regularize": 0.5380887985229492,
"step": 95,
"wo_beta": 6.339680194854736
},
{
"dpo_loss": 0.5714857578277588,
"epoch": 0.2834199338686821,
"grad_norm": 16.553483764105664,
"learning_rate": 4.974150717193654e-06,
"logits": -1.7562583684921265,
"logps": -88.6811294555664,
"loss": 0.5922,
"objective": 0.5714857578277588,
"ranking_simple": 0.6083333492279053,
"regularize": 0.5714857578277588,
"step": 100,
"wo_beta": 5.948981285095215
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6818667054176331,
"eval_logits": -1.7963634729385376,
"eval_logps": -95.815185546875,
"eval_loss": 0.6793084740638733,
"eval_objective": 0.6818667054176331,
"eval_ranking_simple": 0.5486542582511902,
"eval_regularize": 0.6818667054176331,
"eval_runtime": 368.0636,
"eval_samples_per_second": 15.731,
"eval_steps_per_second": 1.312,
"eval_wo_beta": 7.707693576812744,
"step": 100
},
{
"dpo_loss": 0.5925873517990112,
"epoch": 0.2975909305621162,
"grad_norm": 16.87738475184783,
"learning_rate": 4.964491738023321e-06,
"logits": -1.8753679990768433,
"logps": -90.80838775634766,
"loss": 0.5707,
"objective": 0.5925873517990112,
"ranking_simple": 0.6291666626930237,
"regularize": 0.5925873517990112,
"step": 105,
"wo_beta": 5.3273797035217285
},
{
"dpo_loss": 0.5388583540916443,
"epoch": 0.3117619272555503,
"grad_norm": 15.466132286249575,
"learning_rate": 4.953315228402512e-06,
"logits": -1.7556500434875488,
"logps": -88.66165924072266,
"loss": 0.5374,
"objective": 0.5388583540916443,
"ranking_simple": 0.5916666388511658,
"regularize": 0.5388583540916443,
"step": 110,
"wo_beta": 5.21142053604126
},
{
"dpo_loss": 0.5987796187400818,
"epoch": 0.32593292394898443,
"grad_norm": 16.57925719769902,
"learning_rate": 4.9406280703561944e-06,
"logits": -1.6699596643447876,
"logps": -87.46602630615234,
"loss": 0.5713,
"objective": 0.5987796187400818,
"ranking_simple": 0.6208333373069763,
"regularize": 0.5987796187400818,
"step": 115,
"wo_beta": 5.849599838256836
},
{
"dpo_loss": 0.5472472906112671,
"epoch": 0.3401039206424185,
"grad_norm": 16.2014631256649,
"learning_rate": 4.926438076103162e-06,
"logits": -1.6490483283996582,
"logps": -87.74899291992188,
"loss": 0.5603,
"objective": 0.5472472906112671,
"ranking_simple": 0.6333333253860474,
"regularize": 0.5472472906112671,
"step": 120,
"wo_beta": 6.010056495666504
},
{
"dpo_loss": 0.5509154796600342,
"epoch": 0.35427491733585265,
"grad_norm": 14.950351510543216,
"learning_rate": 4.910753983245589e-06,
"logits": -1.7191225290298462,
"logps": -88.99519348144531,
"loss": 0.5397,
"objective": 0.5509154796600342,
"ranking_simple": 0.6083333492279053,
"regularize": 0.5509154796600342,
"step": 125,
"wo_beta": 5.906139850616455
},
{
"dpo_loss": 0.5123094916343689,
"epoch": 0.3684459140292867,
"grad_norm": 13.21141785871735,
"learning_rate": 4.893585449388786e-06,
"logits": -1.695233702659607,
"logps": -85.64789581298828,
"loss": 0.5398,
"objective": 0.5123094916343689,
"ranking_simple": 0.6583333611488342,
"regularize": 0.5123094916343689,
"step": 130,
"wo_beta": 4.518141269683838
},
{
"dpo_loss": 0.5679463744163513,
"epoch": 0.3826169107227208,
"grad_norm": 16.522848387628404,
"learning_rate": 4.8749430461944536e-06,
"logits": -1.6519335508346558,
"logps": -88.2005615234375,
"loss": 0.5526,
"objective": 0.5679463744163513,
"ranking_simple": 0.6333333253860474,
"regularize": 0.5679463744163513,
"step": 135,
"wo_beta": 5.8905439376831055
},
{
"dpo_loss": 0.5832223296165466,
"epoch": 0.39678790741615494,
"grad_norm": 18.436163706658995,
"learning_rate": 4.854838252871097e-06,
"logits": -1.5592352151870728,
"logps": -90.67977142333984,
"loss": 0.5534,
"objective": 0.5832223296165466,
"ranking_simple": 0.625,
"regularize": 0.5832223296165466,
"step": 140,
"wo_beta": 5.7765278816223145
},
{
"dpo_loss": 0.510637640953064,
"epoch": 0.410958904109589,
"grad_norm": 15.85144306565166,
"learning_rate": 4.833283449105609e-06,
"logits": -1.5676114559173584,
"logps": -90.87916564941406,
"loss": 0.527,
"objective": 0.510637640953064,
"ranking_simple": 0.699999988079071,
"regularize": 0.510637640953064,
"step": 145,
"wo_beta": 4.243090629577637
},
{
"dpo_loss": 0.5151103138923645,
"epoch": 0.42512990080302315,
"grad_norm": 14.430458288203226,
"learning_rate": 4.810291907440382e-06,
"logits": -1.5757466554641724,
"logps": -91.76609802246094,
"loss": 0.5002,
"objective": 0.5151103138923645,
"ranking_simple": 0.6291666626930237,
"regularize": 0.5151103138923645,
"step": 150,
"wo_beta": 6.819777011871338
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 0.6748862862586975,
"eval_logits": -1.5950791835784912,
"eval_logps": -96.20237731933594,
"eval_loss": 0.6815473437309265,
"eval_objective": 0.6748862862586975,
"eval_ranking_simple": 0.5496894121170044,
"eval_regularize": 0.6748862862586975,
"eval_runtime": 367.9317,
"eval_samples_per_second": 15.737,
"eval_steps_per_second": 1.313,
"eval_wo_beta": 7.438036918640137,
"step": 150
},
{
"dpo_loss": 0.49763983488082886,
"epoch": 0.43930089749645723,
"grad_norm": 17.361740656063542,
"learning_rate": 4.785877785100633e-06,
"logits": -1.6784894466400146,
"logps": -92.56346130371094,
"loss": 0.5198,
"objective": 0.49763983488082886,
"ranking_simple": 0.6499999761581421,
"regularize": 0.49763983488082886,
"step": 155,
"wo_beta": 5.170775890350342
},
{
"dpo_loss": 0.5579937100410461,
"epoch": 0.45347189418989137,
"grad_norm": 15.652756053936857,
"learning_rate": 4.7600561152769795e-06,
"logits": -1.5714988708496094,
"logps": -92.15592956542969,
"loss": 0.5328,
"objective": 0.5579937100410461,
"ranking_simple": 0.6416666507720947,
"regularize": 0.5579937100410461,
"step": 160,
"wo_beta": 5.608686447143555
},
{
"dpo_loss": 0.5186927318572998,
"epoch": 0.46764289088332545,
"grad_norm": 14.630193025898393,
"learning_rate": 4.732842797868631e-06,
"logits": -1.6691575050354004,
"logps": -91.15839385986328,
"loss": 0.5189,
"objective": 0.5186927318572998,
"ranking_simple": 0.625,
"regularize": 0.5186927318572998,
"step": 165,
"wo_beta": 5.489006996154785
},
{
"dpo_loss": 0.4966394603252411,
"epoch": 0.4818138875767596,
"grad_norm": 13.19317387340531,
"learning_rate": 4.704254589692903e-06,
"logits": -1.7252763509750366,
"logps": -92.86217498779297,
"loss": 0.4959,
"objective": 0.4966394603252411,
"ranking_simple": 0.6416666507720947,
"regularize": 0.4966394603252411,
"step": 170,
"wo_beta": 5.683476448059082
},
{
"dpo_loss": 0.4829941689968109,
"epoch": 0.49598488427019366,
"grad_norm": 16.695755417688215,
"learning_rate": 4.6743090941670675e-06,
"logits": -1.609352946281433,
"logps": -95.68805694580078,
"loss": 0.4892,
"objective": 0.4829941689968109,
"ranking_simple": 0.675000011920929,
"regularize": 0.4829941689968109,
"step": 175,
"wo_beta": 4.217517852783203
},
{
"dpo_loss": 0.5080674886703491,
"epoch": 0.5101558809636277,
"grad_norm": 18.149531395287664,
"learning_rate": 4.643024750468913e-06,
"logits": -1.6799732446670532,
"logps": -99.82926177978516,
"loss": 0.468,
"objective": 0.5080674886703491,
"ranking_simple": 0.6333333253860474,
"regularize": 0.5080674886703491,
"step": 180,
"wo_beta": 5.674283504486084
},
{
"dpo_loss": 0.4599061608314514,
"epoch": 0.5243268776570619,
"grad_norm": 16.418473362321556,
"learning_rate": 4.610420822182671e-06,
"logits": -1.6607011556625366,
"logps": -99.13754272460938,
"loss": 0.4631,
"objective": 0.4599061608314514,
"ranking_simple": 0.699999988079071,
"regularize": 0.4599061608314514,
"step": 185,
"wo_beta": 4.34508752822876
},
{
"dpo_loss": 0.4760186970233917,
"epoch": 0.538497874350496,
"grad_norm": 16.5909593130195,
"learning_rate": 4.576517385437315e-06,
"logits": -1.7211116552352905,
"logps": -96.94883728027344,
"loss": 0.4859,
"objective": 0.4760186970233917,
"ranking_simple": 0.6958333253860474,
"regularize": 0.4760186970233917,
"step": 190,
"wo_beta": 4.704507827758789
},
{
"dpo_loss": 0.4567195773124695,
"epoch": 0.5526688710439301,
"grad_norm": 17.67231005570377,
"learning_rate": 4.541335316544514e-06,
"logits": -1.7492233514785767,
"logps": -95.30302429199219,
"loss": 0.5112,
"objective": 0.4567195773124695,
"ranking_simple": 0.7208333611488342,
"regularize": 0.4567195773124695,
"step": 195,
"wo_beta": 4.662184238433838
},
{
"dpo_loss": 0.4767034947872162,
"epoch": 0.5668398677373642,
"grad_norm": 14.561592123615519,
"learning_rate": 4.5048962791438885e-06,
"logits": -1.7373807430267334,
"logps": -95.8047866821289,
"loss": 0.4735,
"objective": 0.4767034947872162,
"ranking_simple": 0.6583333611488342,
"regularize": 0.4767034947872162,
"step": 200,
"wo_beta": 5.47299861907959
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.691116213798523,
"eval_logits": -1.7563871145248413,
"eval_logps": -98.91757202148438,
"eval_loss": 0.6950607299804688,
"eval_objective": 0.691116213798523,
"eval_ranking_simple": 0.5569358468055725,
"eval_regularize": 0.691116213798523,
"eval_runtime": 367.7852,
"eval_samples_per_second": 15.743,
"eval_steps_per_second": 1.313,
"eval_wo_beta": 7.524092197418213,
"step": 200
},
{
"dpo_loss": 0.49286583065986633,
"epoch": 0.5810108644307983,
"grad_norm": 15.609742263801415,
"learning_rate": 4.467222710863444e-06,
"logits": -1.6046305894851685,
"logps": -95.40666961669922,
"loss": 0.4728,
"objective": 0.49286583065986633,
"ranking_simple": 0.6833333373069763,
"regularize": 0.49286583065986633,
"step": 205,
"wo_beta": 5.8981099128723145
},
{
"dpo_loss": 0.42898985743522644,
"epoch": 0.5951818611242324,
"grad_norm": 16.532108905835056,
"learning_rate": 4.428337809503425e-06,
"logits": -1.6189254522323608,
"logps": -94.09720611572266,
"loss": 0.4362,
"objective": 0.42898985743522644,
"ranking_simple": 0.6916666626930237,
"regularize": 0.42898985743522644,
"step": 210,
"wo_beta": 3.8531105518341064
},
{
"dpo_loss": 0.44433167576789856,
"epoch": 0.6093528578176665,
"grad_norm": 18.51464695126941,
"learning_rate": 4.388265518752085e-06,
"logits": -1.7230619192123413,
"logps": -92.92915344238281,
"loss": 0.4621,
"objective": 0.44433167576789856,
"ranking_simple": 0.6791666746139526,
"regularize": 0.44433167576789856,
"step": 215,
"wo_beta": 5.033292770385742
},
{
"dpo_loss": 0.44531288743019104,
"epoch": 0.6235238545111006,
"grad_norm": 14.674842226422456,
"learning_rate": 4.347030513442168e-06,
"logits": -1.7578327655792236,
"logps": -91.2856674194336,
"loss": 0.4332,
"objective": 0.44531288743019104,
"ranking_simple": 0.6916666626930237,
"regularize": 0.44531288743019104,
"step": 220,
"wo_beta": 5.138680934906006
},
{
"dpo_loss": 0.47229692339897156,
"epoch": 0.6376948512045347,
"grad_norm": 13.123177972096014,
"learning_rate": 4.304658184357186e-06,
"logits": -1.8197827339172363,
"logps": -92.20543670654297,
"loss": 0.4692,
"objective": 0.47229692339897156,
"ranking_simple": 0.6541666388511658,
"regularize": 0.47229692339897156,
"step": 225,
"wo_beta": 5.442239284515381
},
{
"dpo_loss": 0.4127563536167145,
"epoch": 0.6518658478979689,
"grad_norm": 13.818617424678937,
"learning_rate": 4.261174622596835e-06,
"logits": -1.6802526712417603,
"logps": -90.7798843383789,
"loss": 0.4559,
"objective": 0.4127563536167145,
"ranking_simple": 0.7333333492279053,
"regularize": 0.4127563536167145,
"step": 230,
"wo_beta": 3.521521806716919
},
{
"dpo_loss": 0.45779237151145935,
"epoch": 0.6660368445914029,
"grad_norm": 15.036299488311245,
"learning_rate": 4.216606603511202e-06,
"logits": -1.6339088678359985,
"logps": -90.51241302490234,
"loss": 0.4578,
"objective": 0.45779237151145935,
"ranking_simple": 0.6666666865348816,
"regularize": 0.45779237151145935,
"step": 235,
"wo_beta": 4.571218967437744
},
{
"dpo_loss": 0.4738180935382843,
"epoch": 0.680207841284837,
"grad_norm": 13.614428839916116,
"learning_rate": 4.170981570213621e-06,
"logits": -1.8103351593017578,
"logps": -92.29689025878906,
"loss": 0.4481,
"objective": 0.4738180935382843,
"ranking_simple": 0.6958333253860474,
"regularize": 0.4738180935382843,
"step": 240,
"wo_beta": 6.207835674285889
},
{
"dpo_loss": 0.5174158215522766,
"epoch": 0.6943788379782712,
"grad_norm": 16.01438129051185,
"learning_rate": 4.124327616682362e-06,
"logits": -1.7986476421356201,
"logps": -91.10508728027344,
"loss": 0.4642,
"objective": 0.5174158215522766,
"ranking_simple": 0.6958333253860474,
"regularize": 0.5174158215522766,
"step": 245,
"wo_beta": 4.900957107543945
},
{
"dpo_loss": 0.44342610239982605,
"epoch": 0.7085498346717053,
"grad_norm": 13.738305838918832,
"learning_rate": 4.076673470461538e-06,
"logits": -1.5667024850845337,
"logps": -88.29222106933594,
"loss": 0.4626,
"objective": 0.44342610239982605,
"ranking_simple": 0.7041666507720947,
"regularize": 0.44342610239982605,
"step": 250,
"wo_beta": 4.291601181030273
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 0.694493293762207,
"eval_logits": -1.7985897064208984,
"eval_logps": -93.47747039794922,
"eval_loss": 0.6976169347763062,
"eval_objective": 0.694493293762207,
"eval_ranking_simple": 0.5579710006713867,
"eval_regularize": 0.694493293762207,
"eval_runtime": 367.886,
"eval_samples_per_second": 15.739,
"eval_steps_per_second": 1.313,
"eval_wo_beta": 7.902660369873047,
"step": 250
},
{
"dpo_loss": 0.43279239535331726,
"epoch": 0.7227208313651393,
"grad_norm": 12.486423165060444,
"learning_rate": 4.028048474971889e-06,
"logits": -1.860019564628601,
"logps": -87.4556884765625,
"loss": 0.4459,
"objective": 0.43279239535331726,
"ranking_simple": 0.6833333373069763,
"regularize": 0.43279239535331726,
"step": 255,
"wo_beta": 4.541534900665283
},
{
"dpo_loss": 0.41317591071128845,
"epoch": 0.7368918280585735,
"grad_norm": 13.93218309283749,
"learning_rate": 3.978482571442339e-06,
"logits": -1.8741662502288818,
"logps": -91.13224029541016,
"loss": 0.4561,
"objective": 0.41317591071128845,
"ranking_simple": 0.6625000238418579,
"regularize": 0.41317591071128845,
"step": 260,
"wo_beta": 5.1168012619018555
},
{
"dpo_loss": 0.39290040731430054,
"epoch": 0.7510628247520076,
"grad_norm": 13.220960314976885,
"learning_rate": 3.928006280473445e-06,
"logits": -1.8604073524475098,
"logps": -92.02545928955078,
"loss": 0.419,
"objective": 0.39290040731430054,
"ranking_simple": 0.7708333134651184,
"regularize": 0.39290040731430054,
"step": 265,
"wo_beta": 4.408606052398682
},
{
"dpo_loss": 0.47771012783050537,
"epoch": 0.7652338214454416,
"grad_norm": 14.906696639905949,
"learning_rate": 3.876650683244093e-06,
"logits": -1.997718095779419,
"logps": -94.74840545654297,
"loss": 0.4365,
"objective": 0.47771012783050537,
"ranking_simple": 0.6666666865348816,
"regularize": 0.47771012783050537,
"step": 270,
"wo_beta": 4.766172409057617
},
{
"dpo_loss": 0.4372769594192505,
"epoch": 0.7794048181388757,
"grad_norm": 14.602546448634548,
"learning_rate": 3.8244474023730155e-06,
"logits": -1.8585816621780396,
"logps": -94.3137435913086,
"loss": 0.4293,
"objective": 0.4372769594192505,
"ranking_simple": 0.6791666746139526,
"regularize": 0.4372769594192505,
"step": 275,
"wo_beta": 5.203604698181152
},
{
"dpo_loss": 0.3766806721687317,
"epoch": 0.7935758148323099,
"grad_norm": 15.405745134115882,
"learning_rate": 3.771428582446908e-06,
"logits": -1.9468127489089966,
"logps": -95.09542846679688,
"loss": 0.4039,
"objective": 0.3766806721687317,
"ranking_simple": 0.7208333611488342,
"regularize": 0.3766806721687317,
"step": 280,
"wo_beta": 4.175257205963135
},
{
"dpo_loss": 0.41625434160232544,
"epoch": 0.807746811525744,
"grad_norm": 15.243116510039572,
"learning_rate": 3.7176268702271468e-06,
"logits": -1.9459937810897827,
"logps": -95.90043640136719,
"loss": 0.407,
"objective": 0.41625434160232544,
"ranking_simple": 0.699999988079071,
"regularize": 0.41625434160232544,
"step": 285,
"wo_beta": 5.091909408569336
},
{
"dpo_loss": 0.3713260293006897,
"epoch": 0.821917808219178,
"grad_norm": 16.703078330526107,
"learning_rate": 3.6630753945472854e-06,
"logits": -1.963159203529358,
"logps": -95.55049133300781,
"loss": 0.3981,
"objective": 0.3713260293006897,
"ranking_simple": 0.7791666388511658,
"regularize": 0.3713260293006897,
"step": 290,
"wo_beta": 3.84537935256958
},
{
"dpo_loss": 0.4009644687175751,
"epoch": 0.8360888049126122,
"grad_norm": 13.199801209484807,
"learning_rate": 3.6078077459137097e-06,
"logits": -1.99600350856781,
"logps": -98.70610809326172,
"loss": 0.4243,
"objective": 0.4009644687175751,
"ranking_simple": 0.7166666388511658,
"regularize": 0.4009644687175751,
"step": 295,
"wo_beta": 5.210625171661377
},
{
"dpo_loss": 0.42688027024269104,
"epoch": 0.8502598016060463,
"grad_norm": 12.622555129903681,
"learning_rate": 3.5518579558220144e-06,
"logits": -1.9770207405090332,
"logps": -95.8087387084961,
"loss": 0.4214,
"objective": 0.42688027024269104,
"ranking_simple": 0.7458333373069763,
"regularize": 0.42688027024269104,
"step": 300,
"wo_beta": 3.2817904949188232
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.6865138411521912,
"eval_logits": -2.0138111114501953,
"eval_logps": -104.43373107910156,
"eval_loss": 0.693064272403717,
"eval_objective": 0.6865138411521912,
"eval_ranking_simple": 0.5615941882133484,
"eval_regularize": 0.6865138411521912,
"eval_runtime": 367.6512,
"eval_samples_per_second": 15.749,
"eval_steps_per_second": 1.314,
"eval_wo_beta": 7.581406593322754,
"step": 300
},
{
"dpo_loss": 0.44818738102912903,
"epoch": 0.8644307982994804,
"grad_norm": 13.23517832354566,
"learning_rate": 3.495260475801841e-06,
"logits": -1.9494545459747314,
"logps": -97.299560546875,
"loss": 0.3903,
"objective": 0.44818738102912903,
"ranking_simple": 0.7041666507720947,
"regularize": 0.44818738102912903,
"step": 305,
"wo_beta": 4.3523712158203125
},
{
"dpo_loss": 0.34828221797943115,
"epoch": 0.8786017949929145,
"grad_norm": 14.41832502286332,
"learning_rate": 3.4380501562030704e-06,
"logits": -1.9428808689117432,
"logps": -98.20413970947266,
"loss": 0.3807,
"objective": 0.34828221797943115,
"ranking_simple": 0.7333333492279053,
"regularize": 0.34828221797943115,
"step": 310,
"wo_beta": 3.6427719593048096
},
{
"dpo_loss": 0.3642140030860901,
"epoch": 0.8927727916863486,
"grad_norm": 12.300830909289896,
"learning_rate": 3.3802622247364446e-06,
"logits": -1.9570696353912354,
"logps": -98.85526275634766,
"loss": 0.4042,
"objective": 0.3642140030860901,
"ranking_simple": 0.699999988079071,
"regularize": 0.3642140030860901,
"step": 315,
"wo_beta": 4.256704330444336
},
{
"dpo_loss": 0.399863064289093,
"epoch": 0.9069437883797827,
"grad_norm": 15.886821504052966,
"learning_rate": 3.321932264781822e-06,
"logits": -1.9135253429412842,
"logps": -101.23651885986328,
"loss": 0.4395,
"objective": 0.399863064289093,
"ranking_simple": 0.7708333134651184,
"regularize": 0.399863064289093,
"step": 320,
"wo_beta": 2.9900147914886475
},
{
"dpo_loss": 0.37714484333992004,
"epoch": 0.9211147850732169,
"grad_norm": 14.587168312549423,
"learning_rate": 3.2630961934774265e-06,
"logits": -1.9540404081344604,
"logps": -100.98246002197266,
"loss": 0.3745,
"objective": 0.37714484333992004,
"ranking_simple": 0.7250000238418579,
"regularize": 0.37714484333992004,
"step": 325,
"wo_beta": 4.075650691986084
},
{
"dpo_loss": 0.35718733072280884,
"epoch": 0.9352857817666509,
"grad_norm": 15.295489026198931,
"learning_rate": 3.203790239603583e-06,
"logits": -1.7621917724609375,
"logps": -101.14175415039062,
"loss": 0.4011,
"objective": 0.35718733072280884,
"ranking_simple": 0.737500011920929,
"regularize": 0.35718733072280884,
"step": 330,
"wo_beta": 3.9375758171081543
},
{
"dpo_loss": 0.3431912660598755,
"epoch": 0.949456778460085,
"grad_norm": 13.496480338032299,
"learning_rate": 3.1440509212745584e-06,
"logits": -1.7355188131332397,
"logps": -100.78395080566406,
"loss": 0.3733,
"objective": 0.3431912660598755,
"ranking_simple": 0.7416666746139526,
"regularize": 0.3431912660598755,
"step": 335,
"wo_beta": 3.3042349815368652
},
{
"dpo_loss": 0.43064969778060913,
"epoch": 0.9636277751535192,
"grad_norm": 13.67781828181165,
"learning_rate": 3.0839150234522404e-06,
"logits": -1.781424641609192,
"logps": -99.20060729980469,
"loss": 0.4067,
"objective": 0.43064969778060913,
"ranking_simple": 0.7083333134651184,
"regularize": 0.43064969778060913,
"step": 340,
"wo_beta": 4.711233615875244
},
{
"dpo_loss": 0.3512551188468933,
"epoch": 0.9777987718469532,
"grad_norm": 13.324791495929361,
"learning_rate": 3.0234195752955032e-06,
"logits": -1.8463162183761597,
"logps": -95.25973510742188,
"loss": 0.3729,
"objective": 0.3512551188468933,
"ranking_simple": 0.7541666626930237,
"regularize": 0.3512551188468933,
"step": 345,
"wo_beta": 2.9623959064483643
},
{
"dpo_loss": 0.32169008255004883,
"epoch": 0.9919697685403873,
"grad_norm": 15.127441089438493,
"learning_rate": 2.962601827359208e-06,
"logits": -1.78915274143219,
"logps": -97.9096450805664,
"loss": 0.3652,
"objective": 0.32169008255004883,
"ranking_simple": 0.824999988079071,
"regularize": 0.32169008255004883,
"step": 350,
"wo_beta": 2.600872039794922
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 0.6984499096870422,
"eval_logits": -1.9094278812408447,
"eval_logps": -102.83055114746094,
"eval_loss": 0.7074127793312073,
"eval_objective": 0.6984499096870422,
"eval_ranking_simple": 0.5559006333351135,
"eval_regularize": 0.6984499096870422,
"eval_runtime": 367.6345,
"eval_samples_per_second": 15.749,
"eval_steps_per_second": 1.314,
"eval_wo_beta": 7.8343892097473145,
"step": 350
},
{
"dpo_loss": 0.2995939552783966,
"epoch": 1.0061407652338215,
"grad_norm": 9.31634152713278,
"learning_rate": 2.9014992286568773e-06,
"logits": -2.007425546646118,
"logps": -96.57454681396484,
"loss": 0.3042,
"objective": 0.2995939552783966,
"ranking_simple": 0.8041666746139526,
"regularize": 0.2995939552783966,
"step": 355,
"wo_beta": 2.7125356197357178
},
{
"dpo_loss": 0.24162109196186066,
"epoch": 1.0203117619272555,
"grad_norm": 11.90366408091981,
"learning_rate": 2.840149403601166e-06,
"logits": -1.9152239561080933,
"logps": -101.0685043334961,
"loss": 0.2301,
"objective": 0.24162109196186066,
"ranking_simple": 0.8125,
"regularize": 0.24162109196186066,
"step": 360,
"wo_beta": 2.7127444744110107
},
{
"dpo_loss": 0.19792620837688446,
"epoch": 1.0344827586206897,
"grad_norm": 9.884538723237807,
"learning_rate": 2.7785901288363253e-06,
"logits": -1.9689671993255615,
"logps": -107.58961486816406,
"loss": 0.2082,
"objective": 0.19792620837688446,
"ranking_simple": 0.8291666507720947,
"regularize": 0.19792620837688446,
"step": 365,
"wo_beta": 1.9694886207580566
},
{
"dpo_loss": 0.23545877635478973,
"epoch": 1.0486537553141237,
"grad_norm": 16.911618406294945,
"learning_rate": 2.7168593099769414e-06,
"logits": -1.9311782121658325,
"logps": -106.47748565673828,
"loss": 0.2379,
"objective": 0.23545877635478973,
"ranking_simple": 0.8125,
"regularize": 0.23545877635478973,
"step": 370,
"wo_beta": 3.021015167236328
},
{
"dpo_loss": 0.22429493069648743,
"epoch": 1.0628247520075578,
"grad_norm": 15.795921555252965,
"learning_rate": 2.654994958267241e-06,
"logits": -1.9985809326171875,
"logps": -108.60234832763672,
"loss": 0.2364,
"objective": 0.22429493069648743,
"ranking_simple": 0.8374999761581421,
"regularize": 0.22429493069648743,
"step": 375,
"wo_beta": 1.6982978582382202
},
{
"dpo_loss": 0.20513677597045898,
"epoch": 1.076995748700992,
"grad_norm": 15.164372148185517,
"learning_rate": 2.5930351671753707e-06,
"logits": -2.0427591800689697,
"logps": -109.7289047241211,
"loss": 0.2153,
"objective": 0.20513677597045898,
"ranking_simple": 0.8583333492279053,
"regularize": 0.20513677597045898,
"step": 380,
"wo_beta": 1.7354587316513062
},
{
"dpo_loss": 0.24022004008293152,
"epoch": 1.091166745394426,
"grad_norm": 13.938458603767867,
"learning_rate": 2.5310180889370374e-06,
"logits": -2.007528781890869,
"logps": -109.47885131835938,
"loss": 0.2371,
"objective": 0.24022004008293152,
"ranking_simple": 0.8291666507720947,
"regularize": 0.24022004008293152,
"step": 385,
"wo_beta": 1.9898384809494019
},
{
"dpo_loss": 0.22819384932518005,
"epoch": 1.10533774208786,
"grad_norm": 10.75019956346705,
"learning_rate": 2.468981911062964e-06,
"logits": -1.9904738664627075,
"logps": -110.48689270019531,
"loss": 0.2092,
"objective": 0.22819384932518005,
"ranking_simple": 0.8291666507720947,
"regularize": 0.22819384932518005,
"step": 390,
"wo_beta": 2.570746660232544
},
{
"dpo_loss": 0.2051982581615448,
"epoch": 1.1195087387812943,
"grad_norm": 10.957538026383743,
"learning_rate": 2.4069648328246305e-06,
"logits": -2.0448696613311768,
"logps": -108.89817810058594,
"loss": 0.2315,
"objective": 0.2051982581615448,
"ranking_simple": 0.8666666746139526,
"regularize": 0.2051982581615448,
"step": 395,
"wo_beta": 2.1264781951904297
},
{
"dpo_loss": 0.20951204001903534,
"epoch": 1.1336797354747283,
"grad_norm": 10.831310704591798,
"learning_rate": 2.3450050417327593e-06,
"logits": -2.0998401641845703,
"logps": -106.39730072021484,
"loss": 0.2206,
"objective": 0.20951204001903534,
"ranking_simple": 0.8500000238418579,
"regularize": 0.20951204001903534,
"step": 400,
"wo_beta": 1.4239428043365479
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.729559600353241,
"eval_logits": -2.090850591659546,
"eval_logps": -113.60484313964844,
"eval_loss": 0.7347043752670288,
"eval_objective": 0.729559600353241,
"eval_ranking_simple": 0.5502070188522339,
"eval_regularize": 0.729559600353241,
"eval_runtime": 367.8652,
"eval_samples_per_second": 15.739,
"eval_steps_per_second": 1.313,
"eval_wo_beta": 8.675128936767578,
"step": 400
},
{
"dpo_loss": 0.24946197867393494,
"epoch": 1.1478507321681626,
"grad_norm": 13.639203958436303,
"learning_rate": 2.2831406900230586e-06,
"logits": -2.0489606857299805,
"logps": -109.95741271972656,
"loss": 0.2302,
"objective": 0.24946197867393494,
"ranking_simple": 0.8208333253860474,
"regularize": 0.24946197867393494,
"step": 405,
"wo_beta": 3.3028647899627686
},
{
"dpo_loss": 0.22408606112003326,
"epoch": 1.1620217288615966,
"grad_norm": 9.982686874740121,
"learning_rate": 2.221409871163675e-06,
"logits": -2.026094913482666,
"logps": -112.10523986816406,
"loss": 0.204,
"objective": 0.22408606112003326,
"ranking_simple": 0.824999988079071,
"regularize": 0.22408606112003326,
"step": 410,
"wo_beta": 3.6203742027282715
},
{
"dpo_loss": 0.24647466838359833,
"epoch": 1.1761927255550306,
"grad_norm": 14.127702730262909,
"learning_rate": 2.1598505963988354e-06,
"logits": -1.9576979875564575,
"logps": -112.045654296875,
"loss": 0.2487,
"objective": 0.24647466838359833,
"ranking_simple": 0.8041666746139526,
"regularize": 0.24647466838359833,
"step": 415,
"wo_beta": 3.0596923828125
},
{
"dpo_loss": 0.2189057618379593,
"epoch": 1.1903637222484649,
"grad_norm": 12.453759090133827,
"learning_rate": 2.098500771343124e-06,
"logits": -1.9014623165130615,
"logps": -112.163330078125,
"loss": 0.224,
"objective": 0.2189057618379593,
"ranking_simple": 0.8333333134651184,
"regularize": 0.2189057618379593,
"step": 420,
"wo_beta": 2.8976945877075195
},
{
"dpo_loss": 0.24521614611148834,
"epoch": 1.204534718941899,
"grad_norm": 11.810902908919543,
"learning_rate": 2.037398172640793e-06,
"logits": -1.939537525177002,
"logps": -111.03417205810547,
"loss": 0.225,
"objective": 0.24521614611148834,
"ranking_simple": 0.8166666626930237,
"regularize": 0.24521614611148834,
"step": 425,
"wo_beta": 2.2815327644348145
},
{
"dpo_loss": 0.25871187448501587,
"epoch": 1.2187057156353331,
"grad_norm": 13.805740179315583,
"learning_rate": 1.976580424704498e-06,
"logits": -1.908257246017456,
"logps": -114.54412078857422,
"loss": 0.2315,
"objective": 0.25871187448501587,
"ranking_simple": 0.8208333253860474,
"regularize": 0.25871187448501587,
"step": 430,
"wo_beta": 2.5115749835968018
},
{
"dpo_loss": 0.20695915818214417,
"epoch": 1.2328767123287672,
"grad_norm": 13.189182575578958,
"learning_rate": 1.9160849765477604e-06,
"logits": -1.845086932182312,
"logps": -111.52816772460938,
"loss": 0.2502,
"objective": 0.20695915818214417,
"ranking_simple": 0.8666666746139526,
"regularize": 0.20695915818214417,
"step": 435,
"wo_beta": 2.1831676959991455
},
{
"dpo_loss": 0.2140309065580368,
"epoch": 1.2470477090222012,
"grad_norm": 15.231839264333766,
"learning_rate": 1.8559490787254423e-06,
"logits": -1.8013054132461548,
"logps": -112.4487533569336,
"loss": 0.2241,
"objective": 0.2140309065580368,
"ranking_simple": 0.8500000238418579,
"regularize": 0.2140309065580368,
"step": 440,
"wo_beta": 2.4584310054779053
},
{
"dpo_loss": 0.2538544535636902,
"epoch": 1.2612187057156352,
"grad_norm": 11.902330497163605,
"learning_rate": 1.7962097603964177e-06,
"logits": -1.8283072710037231,
"logps": -111.75948333740234,
"loss": 0.2393,
"objective": 0.2538544535636902,
"ranking_simple": 0.8291666507720947,
"regularize": 0.2538544535636902,
"step": 445,
"wo_beta": 2.5012738704681396
},
{
"dpo_loss": 0.21088647842407227,
"epoch": 1.2753897024090695,
"grad_norm": 12.114049593362662,
"learning_rate": 1.7369038065225743e-06,
"logits": -1.9961200952529907,
"logps": -110.46500396728516,
"loss": 0.2202,
"objective": 0.21088647842407227,
"ranking_simple": 0.8458333611488342,
"regularize": 0.21088647842407227,
"step": 450,
"wo_beta": 2.1799509525299072
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 0.7433450222015381,
"eval_logits": -1.9911303520202637,
"eval_logps": -115.7781753540039,
"eval_loss": 0.7462677955627441,
"eval_objective": 0.7433450222015381,
"eval_ranking_simple": 0.5512422323226929,
"eval_regularize": 0.7433450222015381,
"eval_runtime": 369.6423,
"eval_samples_per_second": 15.664,
"eval_steps_per_second": 1.307,
"eval_wo_beta": 8.912315368652344,
"step": 450
},
{
"dpo_loss": 0.22404690086841583,
"epoch": 1.2895606991025035,
"grad_norm": 12.618712156759832,
"learning_rate": 1.6780677352181781e-06,
"logits": -1.821974515914917,
"logps": -112.83589935302734,
"loss": 0.2503,
"objective": 0.22404690086841583,
"ranking_simple": 0.8125,
"regularize": 0.22404690086841583,
"step": 455,
"wo_beta": 2.789232015609741
},
{
"dpo_loss": 0.2399408221244812,
"epoch": 1.3037316957959377,
"grad_norm": 10.297452895979795,
"learning_rate": 1.6197377752635563e-06,
"logits": -1.9601954221725464,
"logps": -111.7652587890625,
"loss": 0.2322,
"objective": 0.2399408221244812,
"ranking_simple": 0.8166666626930237,
"regularize": 0.2399408221244812,
"step": 460,
"wo_beta": 3.1664645671844482
},
{
"dpo_loss": 0.21428868174552917,
"epoch": 1.3179026924893718,
"grad_norm": 11.673896578608254,
"learning_rate": 1.5619498437969302e-06,
"logits": -1.9731502532958984,
"logps": -109.419189453125,
"loss": 0.2432,
"objective": 0.21428868174552917,
"ranking_simple": 0.824999988079071,
"regularize": 0.21428868174552917,
"step": 465,
"wo_beta": 2.3578691482543945
},
{
"dpo_loss": 0.2447548806667328,
"epoch": 1.3320736891828058,
"grad_norm": 11.318434823205884,
"learning_rate": 1.5047395241981606e-06,
"logits": -2.0559751987457275,
"logps": -107.98088836669922,
"loss": 0.2364,
"objective": 0.2447548806667328,
"ranking_simple": 0.8500000238418579,
"regularize": 0.2447548806667328,
"step": 470,
"wo_beta": 2.524103879928589
},
{
"dpo_loss": 0.2010059952735901,
"epoch": 1.34624468587624,
"grad_norm": 13.4415115238709,
"learning_rate": 1.4481420441779862e-06,
"logits": -1.910614013671875,
"logps": -110.5365982055664,
"loss": 0.2194,
"objective": 0.2010059952735901,
"ranking_simple": 0.8666666746139526,
"regularize": 0.2010059952735901,
"step": 475,
"wo_beta": 2.4653513431549072
},
{
"dpo_loss": 0.21145032346248627,
"epoch": 1.360415682569674,
"grad_norm": 15.045958431800248,
"learning_rate": 1.3921922540862907e-06,
"logits": -2.0142934322357178,
"logps": -109.8845443725586,
"loss": 0.2244,
"objective": 0.21145032346248627,
"ranking_simple": 0.8166666626930237,
"regularize": 0.21145032346248627,
"step": 480,
"wo_beta": 3.300536632537842
},
{
"dpo_loss": 0.23482932150363922,
"epoch": 1.3745866792631083,
"grad_norm": 14.203111358887098,
"learning_rate": 1.3369246054527152e-06,
"logits": -1.996147632598877,
"logps": -108.94562530517578,
"loss": 0.2473,
"objective": 0.23482932150363922,
"ranking_simple": 0.8291666507720947,
"regularize": 0.23482932150363922,
"step": 485,
"wo_beta": 2.5876729488372803
},
{
"dpo_loss": 0.23283059895038605,
"epoch": 1.3887576759565423,
"grad_norm": 11.652820113044354,
"learning_rate": 1.2823731297728536e-06,
"logits": -1.98202645778656,
"logps": -112.48513793945312,
"loss": 0.2222,
"objective": 0.23283059895038605,
"ranking_simple": 0.824999988079071,
"regularize": 0.23283059895038605,
"step": 490,
"wo_beta": 2.047060251235962
},
{
"dpo_loss": 0.19643358886241913,
"epoch": 1.4029286726499763,
"grad_norm": 16.098991414414286,
"learning_rate": 1.2285714175530936e-06,
"logits": -1.9894219636917114,
"logps": -111.90442657470703,
"loss": 0.2257,
"objective": 0.19643358886241913,
"ranking_simple": 0.8458333611488342,
"regularize": 0.19643358886241913,
"step": 495,
"wo_beta": 2.041602611541748
},
{
"dpo_loss": 0.22570651769638062,
"epoch": 1.4170996693434104,
"grad_norm": 14.942258982525255,
"learning_rate": 1.1755525976269851e-06,
"logits": -1.9338775873184204,
"logps": -109.2611083984375,
"loss": 0.2366,
"objective": 0.22570651769638062,
"ranking_simple": 0.8208333253860474,
"regularize": 0.22570651769638062,
"step": 500,
"wo_beta": 1.8392161130905151
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.7387389540672302,
"eval_logits": -2.046374797821045,
"eval_logps": -114.77104187011719,
"eval_loss": 0.7444195747375488,
"eval_objective": 0.7387389540672302,
"eval_ranking_simple": 0.5517598390579224,
"eval_regularize": 0.7387389540672302,
"eval_runtime": 367.6953,
"eval_samples_per_second": 15.747,
"eval_steps_per_second": 1.314,
"eval_wo_beta": 8.863048553466797,
"step": 500
},
{
"dpo_loss": 0.18702387809753418,
"epoch": 1.4312706660368446,
"grad_norm": 11.645842357352148,
"learning_rate": 1.1233493167559065e-06,
"logits": -2.00331711769104,
"logps": -110.28618621826172,
"loss": 0.2087,
"objective": 0.18702387809753418,
"ranking_simple": 0.8458333611488342,
"regularize": 0.18702387809753418,
"step": 505,
"wo_beta": 2.0265085697174072
},
{
"dpo_loss": 0.23954346776008606,
"epoch": 1.4454416627302786,
"grad_norm": 11.232727785445284,
"learning_rate": 1.0719937195265555e-06,
"logits": -2.0127902030944824,
"logps": -110.59080505371094,
"loss": 0.2313,
"objective": 0.23954346776008606,
"ranking_simple": 0.8458333611488342,
"regularize": 0.23954346776008606,
"step": 510,
"wo_beta": 2.1970200538635254
},
{
"dpo_loss": 0.20086827874183655,
"epoch": 1.4596126594237129,
"grad_norm": 10.217720875279324,
"learning_rate": 1.0215174285576615e-06,
"logits": -2.087155818939209,
"logps": -110.73674774169922,
"loss": 0.2043,
"objective": 0.20086827874183655,
"ranking_simple": 0.8083333373069763,
"regularize": 0.20086827874183655,
"step": 515,
"wo_beta": 2.5676777362823486
},
{
"dpo_loss": 0.19097186625003815,
"epoch": 1.473783656117147,
"grad_norm": 11.650910390578003,
"learning_rate": 9.719515250281122e-07,
"logits": -1.9594320058822632,
"logps": -110.79000091552734,
"loss": 0.1898,
"objective": 0.19097186625003815,
"ranking_simple": 0.8833333253860474,
"regularize": 0.19097186625003815,
"step": 520,
"wo_beta": 2.2379517555236816
},
{
"dpo_loss": 0.2083693891763687,
"epoch": 1.487954652810581,
"grad_norm": 12.830417240600797,
"learning_rate": 9.233265295384624e-07,
"logits": -1.8601106405258179,
"logps": -110.70569610595703,
"loss": 0.2274,
"objective": 0.2083693891763687,
"ranking_simple": 0.8416666388511658,
"regularize": 0.2083693891763687,
"step": 525,
"wo_beta": 2.14703369140625
},
{
"dpo_loss": 0.19533474743366241,
"epoch": 1.5021256495040152,
"grad_norm": 14.108055837836243,
"learning_rate": 8.756723833176376e-07,
"logits": -2.0109665393829346,
"logps": -114.77526092529297,
"loss": 0.205,
"objective": 0.19533474743366241,
"ranking_simple": 0.8583333492279053,
"regularize": 0.19533474743366241,
"step": 530,
"wo_beta": 1.9219799041748047
},
{
"dpo_loss": 0.18096224963665009,
"epoch": 1.5162966461974492,
"grad_norm": 13.367132869446376,
"learning_rate": 8.290184297863793e-07,
"logits": -1.971710205078125,
"logps": -113.77371978759766,
"loss": 0.188,
"objective": 0.18096224963665009,
"ranking_simple": 0.8541666865348816,
"regularize": 0.18096224963665009,
"step": 535,
"wo_beta": 2.045213222503662
},
{
"dpo_loss": 0.26400619745254517,
"epoch": 1.5304676428908834,
"grad_norm": 15.135814096600672,
"learning_rate": 7.833933964887985e-07,
"logits": -1.9013224840164185,
"logps": -113.3902816772461,
"loss": 0.221,
"objective": 0.26400619745254517,
"ranking_simple": 0.8374999761581421,
"regularize": 0.26400619745254517,
"step": 540,
"wo_beta": 2.3301329612731934
},
{
"dpo_loss": 0.23929236829280853,
"epoch": 1.5446386395843175,
"grad_norm": 15.186538591312983,
"learning_rate": 7.388253774031659e-07,
"logits": -1.9914318323135376,
"logps": -113.67050170898438,
"loss": 0.2343,
"objective": 0.23929236829280853,
"ranking_simple": 0.8666666746139526,
"regularize": 0.23929236829280853,
"step": 545,
"wo_beta": 1.8687002658843994
},
{
"dpo_loss": 0.17025238275527954,
"epoch": 1.5588096362777515,
"grad_norm": 11.147125855718437,
"learning_rate": 6.953418156428152e-07,
"logits": -1.9760197401046753,
"logps": -114.83491516113281,
"loss": 0.1989,
"objective": 0.17025238275527954,
"ranking_simple": 0.8458333611488342,
"regularize": 0.17025238275527954,
"step": 550,
"wo_beta": 2.0717434883117676
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 0.7519087195396423,
"eval_logits": -2.0168354511260986,
"eval_logps": -118.77753448486328,
"eval_loss": 0.7552616596221924,
"eval_objective": 0.7519087195396423,
"eval_ranking_simple": 0.5595238208770752,
"eval_regularize": 0.7519087195396423,
"eval_runtime": 367.683,
"eval_samples_per_second": 15.747,
"eval_steps_per_second": 1.314,
"eval_wo_beta": 8.984560012817383,
"step": 550
},
{
"dpo_loss": 0.2299811840057373,
"epoch": 1.5729806329711855,
"grad_norm": 14.207116899254263,
"learning_rate": 6.529694865578318e-07,
"logits": -1.7790377140045166,
"logps": -116.34921264648438,
"loss": 0.2215,
"objective": 0.2299811840057373,
"ranking_simple": 0.8166666626930237,
"regularize": 0.2299811840057373,
"step": 555,
"wo_beta": 2.6884348392486572
},
{
"dpo_loss": 0.2206832468509674,
"epoch": 1.5871516296646198,
"grad_norm": 14.824039668104726,
"learning_rate": 6.117344812479154e-07,
"logits": -1.8644143342971802,
"logps": -111.79485321044922,
"loss": 0.2146,
"objective": 0.2206832468509674,
"ranking_simple": 0.8416666388511658,
"regularize": 0.2206832468509674,
"step": 560,
"wo_beta": 2.7559573650360107
},
{
"dpo_loss": 0.18442773818969727,
"epoch": 1.601322626358054,
"grad_norm": 13.453462098796246,
"learning_rate": 5.71662190496575e-07,
"logits": -1.8391135931015015,
"logps": -113.25553131103516,
"loss": 0.2078,
"objective": 0.18442773818969727,
"ranking_simple": 0.8333333134651184,
"regularize": 0.18442773818969727,
"step": 565,
"wo_beta": 2.2805912494659424
},
{
"dpo_loss": 0.19617310166358948,
"epoch": 1.615493623051488,
"grad_norm": 12.497698816224,
"learning_rate": 5.327772891365565e-07,
"logits": -1.984673023223877,
"logps": -115.45191192626953,
"loss": 0.1869,
"objective": 0.19617310166358948,
"ranking_simple": 0.8374999761581421,
"regularize": 0.19617310166358948,
"step": 570,
"wo_beta": 2.3473691940307617
},
{
"dpo_loss": 0.2390568107366562,
"epoch": 1.629664619744922,
"grad_norm": 14.590840547972736,
"learning_rate": 4.951037208561116e-07,
"logits": -1.9447566270828247,
"logps": -111.96437072753906,
"loss": 0.2312,
"objective": 0.2390568107366562,
"ranking_simple": 0.8458333611488342,
"regularize": 0.2390568107366562,
"step": 575,
"wo_beta": 2.296287775039673
},
{
"dpo_loss": 0.20053791999816895,
"epoch": 1.643835616438356,
"grad_norm": 15.024890919749566,
"learning_rate": 4.586646834554864e-07,
"logits": -1.9810107946395874,
"logps": -110.92058563232422,
"loss": 0.2089,
"objective": 0.20053791999816895,
"ranking_simple": 0.8666666746139526,
"regularize": 0.20053791999816895,
"step": 580,
"wo_beta": 1.4837419986724854
},
{
"dpo_loss": 0.19388006627559662,
"epoch": 1.6580066131317903,
"grad_norm": 11.343666690979733,
"learning_rate": 4.234826145626855e-07,
"logits": -1.8341389894485474,
"logps": -109.36263275146484,
"loss": 0.2048,
"objective": 0.19388006627559662,
"ranking_simple": 0.8500000238418579,
"regularize": 0.19388006627559662,
"step": 585,
"wo_beta": 2.263803243637085
},
{
"dpo_loss": 0.2181146889925003,
"epoch": 1.6721776098252243,
"grad_norm": 13.28342611357644,
"learning_rate": 3.8957917781732883e-07,
"logits": -1.87205970287323,
"logps": -113.15511322021484,
"loss": 0.2156,
"objective": 0.2181146889925003,
"ranking_simple": 0.8333333134651184,
"regularize": 0.2181146889925003,
"step": 590,
"wo_beta": 2.4472413063049316
},
{
"dpo_loss": 0.1822492927312851,
"epoch": 1.6863486065186586,
"grad_norm": 10.596854767592859,
"learning_rate": 3.569752495310877e-07,
"logits": -1.8391311168670654,
"logps": -113.05868530273438,
"loss": 0.1882,
"objective": 0.1822492927312851,
"ranking_simple": 0.8333333134651184,
"regularize": 0.1822492927312851,
"step": 595,
"wo_beta": 2.815018892288208
},
{
"dpo_loss": 0.17215129733085632,
"epoch": 1.7005196032120926,
"grad_norm": 11.460383813341208,
"learning_rate": 3.2569090583293356e-07,
"logits": -1.8718314170837402,
"logps": -113.5940933227539,
"loss": 0.1952,
"objective": 0.17215129733085632,
"ranking_simple": 0.8583333492279053,
"regularize": 0.17215129733085632,
"step": 600,
"wo_beta": 1.7114546298980713
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.7512882947921753,
"eval_logits": -1.970719814300537,
"eval_logps": -117.48804473876953,
"eval_loss": 0.7544336318969727,
"eval_objective": 0.7512882947921753,
"eval_ranking_simple": 0.5595238208770752,
"eval_regularize": 0.7512882947921753,
"eval_runtime": 367.7185,
"eval_samples_per_second": 15.746,
"eval_steps_per_second": 1.314,
"eval_wo_beta": 9.029719352722168,
"step": 600
},
{
"dpo_loss": 0.19925004243850708,
"epoch": 1.7146905999055266,
"grad_norm": 17.415629157530653,
"learning_rate": 2.957454103070978e-07,
"logits": -1.8373870849609375,
"logps": -112.65380096435547,
"loss": 0.2262,
"objective": 0.19925004243850708,
"ranking_simple": 0.8333333134651184,
"regularize": 0.19925004243850708,
"step": 605,
"wo_beta": 2.9711499214172363
},
{
"dpo_loss": 0.21553590893745422,
"epoch": 1.7288615965989607,
"grad_norm": 16.124254557780983,
"learning_rate": 2.6715720213136955e-07,
"logits": -1.8880244493484497,
"logps": -112.18531799316406,
"loss": 0.1988,
"objective": 0.21553590893745422,
"ranking_simple": 0.8291666507720947,
"regularize": 0.21553590893745422,
"step": 610,
"wo_beta": 2.8244071006774902
},
{
"dpo_loss": 0.21212069690227509,
"epoch": 1.743032593292395,
"grad_norm": 12.906958893668936,
"learning_rate": 2.399438847230212e-07,
"logits": -1.9108936786651611,
"logps": -111.8115005493164,
"loss": 0.2027,
"objective": 0.21212069690227509,
"ranking_simple": 0.8166666626930237,
"regularize": 0.21212069690227509,
"step": 615,
"wo_beta": 2.961397886276245
},
{
"dpo_loss": 0.20163790881633759,
"epoch": 1.7572035899858292,
"grad_norm": 15.219355254954188,
"learning_rate": 2.1412221489936796e-07,
"logits": -1.947303295135498,
"logps": -111.9202880859375,
"loss": 0.2188,
"objective": 0.20163790881633759,
"ranking_simple": 0.8583333492279053,
"regularize": 0.20163790881633759,
"step": 620,
"wo_beta": 1.839400053024292
},
{
"dpo_loss": 0.1938161551952362,
"epoch": 1.7713745866792632,
"grad_norm": 17.102171291997916,
"learning_rate": 1.897080925596187e-07,
"logits": -1.8294084072113037,
"logps": -111.60057830810547,
"loss": 0.2068,
"objective": 0.1938161551952362,
"ranking_simple": 0.8583333492279053,
"regularize": 0.1938161551952362,
"step": 625,
"wo_beta": 2.0019137859344482
},
{
"dpo_loss": 0.25330111384391785,
"epoch": 1.7855455833726972,
"grad_norm": 14.422081659451388,
"learning_rate": 1.6671655089439186e-07,
"logits": -1.8686004877090454,
"logps": -112.4965591430664,
"loss": 0.2312,
"objective": 0.25330111384391785,
"ranking_simple": 0.7791666388511658,
"regularize": 0.25330111384391785,
"step": 630,
"wo_beta": 3.286885976791382
},
{
"dpo_loss": 0.20808285474777222,
"epoch": 1.7997165800661312,
"grad_norm": 16.725368606459067,
"learning_rate": 1.4516174712890406e-07,
"logits": -1.9575639963150024,
"logps": -114.01631164550781,
"loss": 0.2184,
"objective": 0.20808285474777222,
"ranking_simple": 0.8291666507720947,
"regularize": 0.20808285474777222,
"step": 635,
"wo_beta": 2.427999258041382
},
{
"dpo_loss": 0.20768284797668457,
"epoch": 1.8138875767595655,
"grad_norm": 12.030792650400683,
"learning_rate": 1.2505695380554712e-07,
"logits": -1.8871350288391113,
"logps": -113.53579711914062,
"loss": 0.2015,
"objective": 0.20768284797668457,
"ranking_simple": 0.8291666507720947,
"regularize": 0.20768284797668457,
"step": 640,
"wo_beta": 2.9419753551483154
},
{
"dpo_loss": 0.21022367477416992,
"epoch": 1.8280585734529995,
"grad_norm": 12.06072694382626,
"learning_rate": 1.0641455061121519e-07,
"logits": -1.9376109838485718,
"logps": -114.6182632446289,
"loss": 0.2087,
"objective": 0.21022367477416992,
"ranking_simple": 0.875,
"regularize": 0.21022367477416992,
"step": 645,
"wo_beta": 2.246778964996338
},
{
"dpo_loss": 0.21791066229343414,
"epoch": 1.8422295701464337,
"grad_norm": 17.74332836561221,
"learning_rate": 8.924601675441207e-08,
"logits": -1.9945629835128784,
"logps": -115.14165496826172,
"loss": 0.2252,
"objective": 0.21791066229343414,
"ranking_simple": 0.8791666626930237,
"regularize": 0.21791066229343414,
"step": 650,
"wo_beta": 1.7516138553619385
},
{
"epoch": 1.8422295701464337,
"eval_dpo_loss": 0.7528993487358093,
"eval_logits": -1.9747523069381714,
"eval_logps": -117.80084228515625,
"eval_loss": 0.7559728622436523,
"eval_objective": 0.7528993487358093,
"eval_ranking_simple": 0.5584886074066162,
"eval_regularize": 0.7528993487358093,
"eval_runtime": 367.648,
"eval_samples_per_second": 15.749,
"eval_steps_per_second": 1.314,
"eval_wo_beta": 9.092611312866211,
"step": 650
},
{
"dpo_loss": 0.21706603467464447,
"epoch": 1.8564005668398678,
"grad_norm": 13.027657065435218,
"learning_rate": 7.356192389683825e-08,
"logits": -1.844641923904419,
"logps": -110.724609375,
"loss": 0.2263,
"objective": 0.21706603467464447,
"ranking_simple": 0.8333333134651184,
"regularize": 0.21706603467464447,
"step": 655,
"wo_beta": 2.2099640369415283
},
{
"dpo_loss": 0.19901646673679352,
"epoch": 1.8705715635333018,
"grad_norm": 11.442935681801226,
"learning_rate": 5.937192964380556e-08,
"logits": -1.9180775880813599,
"logps": -115.1898193359375,
"loss": 0.1992,
"objective": 0.19901646673679352,
"ranking_simple": 0.8708333373069763,
"regularize": 0.19901646673679352,
"step": 660,
"wo_beta": 1.9250274896621704
},
{
"dpo_loss": 0.20594698190689087,
"epoch": 1.8847425602267358,
"grad_norm": 14.384890803281685,
"learning_rate": 4.668477159748858e-08,
"logits": -1.8975155353546143,
"logps": -112.05779266357422,
"loss": 0.22,
"objective": 0.20594698190689087,
"ranking_simple": 0.8458333611488342,
"regularize": 0.20594698190689087,
"step": 665,
"wo_beta": 1.7807292938232422
},
{
"dpo_loss": 0.23021073639392853,
"epoch": 1.89891355692017,
"grad_norm": 11.678760414899436,
"learning_rate": 3.5508261976678894e-08,
"logits": -1.8350870609283447,
"logps": -111.71804809570312,
"loss": 0.2054,
"objective": 0.23021073639392853,
"ranking_simple": 0.8208333253860474,
"regularize": 0.23021073639392853,
"step": 670,
"wo_beta": 2.838752269744873
},
{
"dpo_loss": 0.19814661145210266,
"epoch": 1.9130845536136043,
"grad_norm": 14.42884511344777,
"learning_rate": 2.5849282806345855e-08,
"logits": -1.8832274675369263,
"logps": -112.52620697021484,
"loss": 0.2208,
"objective": 0.19814661145210266,
"ranking_simple": 0.8791666626930237,
"regularize": 0.19814661145210266,
"step": 675,
"wo_beta": 1.6049467325210571
},
{
"dpo_loss": 0.2337852120399475,
"epoch": 1.9272555503070383,
"grad_norm": 12.72035969054902,
"learning_rate": 1.771378167997745e-08,
"logits": -1.9469962120056152,
"logps": -114.031494140625,
"loss": 0.2132,
"objective": 0.2337852120399475,
"ranking_simple": 0.8541666865348816,
"regularize": 0.2337852120399475,
"step": 680,
"wo_beta": 2.8053526878356934
},
{
"dpo_loss": 0.18736791610717773,
"epoch": 1.9414265470004723,
"grad_norm": 11.578463444163043,
"learning_rate": 1.1106768097300657e-08,
"logits": -1.8774739503860474,
"logps": -113.84810638427734,
"loss": 0.1807,
"objective": 0.18736791610717773,
"ranking_simple": 0.8458333611488342,
"regularize": 0.18736791610717773,
"step": 685,
"wo_beta": 2.1088075637817383
},
{
"dpo_loss": 0.19145923852920532,
"epoch": 1.9555975436939064,
"grad_norm": 11.501744444827747,
"learning_rate": 6.032310379642803e-09,
"logits": -1.9342644214630127,
"logps": -112.51237487792969,
"loss": 0.204,
"objective": 0.19145923852920532,
"ranking_simple": 0.8583333492279053,
"regularize": 0.19145923852920532,
"step": 690,
"wo_beta": 1.7116554975509644
},
{
"dpo_loss": 0.2177831530570984,
"epoch": 1.9697685403873406,
"grad_norm": 13.662395330088454,
"learning_rate": 2.4935331648298644e-09,
"logits": -1.7998664379119873,
"logps": -113.02543640136719,
"loss": 0.2048,
"objective": 0.2177831530570984,
"ranking_simple": 0.8166666626930237,
"regularize": 0.2177831530570984,
"step": 695,
"wo_beta": 2.7438058853149414
},
{
"dpo_loss": 0.19972722232341766,
"epoch": 1.9839395370807746,
"grad_norm": 13.232400433126339,
"learning_rate": 4.926154831655372e-10,
"logits": -1.788934350013733,
"logps": -115.31278228759766,
"loss": 0.199,
"objective": 0.19972722232341766,
"ranking_simple": 0.8583333492279053,
"regularize": 0.19972722232341766,
"step": 700,
"wo_beta": 2.272697687149048
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.7537301182746887,
"eval_logits": -1.9777543544769287,
"eval_logps": -117.9869155883789,
"eval_loss": 0.7566089034080505,
"eval_objective": 0.7537301182746887,
"eval_ranking_simple": 0.5595238208770752,
"eval_regularize": 0.7537301182746887,
"eval_runtime": 367.5678,
"eval_samples_per_second": 15.752,
"eval_steps_per_second": 1.314,
"eval_wo_beta": 9.104193687438965,
"step": 700
},
{
"epoch": 1.995276334435522,
"step": 704,
"total_flos": 0.0,
"train_loss": 0.3641128831289031,
"train_runtime": 23012.8835,
"train_samples_per_second": 4.415,
"train_steps_per_second": 0.031
}
],
"logging_steps": 5,
"max_steps": 704,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}