{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.995276334435522, "eval_steps": 50, "global_step": 704, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 18.397043918777676, "learning_rate": 7.042253521126761e-08, "logits": -1.2867579460144043, "logps": -84.34933471679688, "loss": 0.6931, "objective": 0.6931471824645996, "ranking_simple": 0.5833333134651184, "regularize": 0.6931471824645996, "step": 1, "wo_beta": 5.2708282470703125 }, { "dpo_loss": 0.6924155354499817, "epoch": 0.014170996693434105, "grad_norm": 18.351841049696436, "learning_rate": 3.521126760563381e-07, "logits": -1.430939793586731, "logps": -83.6253890991211, "loss": 0.6927, "objective": 0.6924155354499817, "ranking_simple": 0.4895833432674408, "regularize": 0.6924155354499817, "step": 5, "wo_beta": 7.649607181549072 }, { "dpo_loss": 0.6938675045967102, "epoch": 0.02834199338686821, "grad_norm": 19.803358997748763, "learning_rate": 7.042253521126762e-07, "logits": -1.4047328233718872, "logps": -83.41845703125, "loss": 0.6929, "objective": 0.6938675045967102, "ranking_simple": 0.5833333134651184, "regularize": 0.6938675045967102, "step": 10, "wo_beta": 5.940184593200684 }, { "dpo_loss": 0.6885419487953186, "epoch": 0.042512990080302314, "grad_norm": 19.861932182637975, "learning_rate": 1.0563380281690142e-06, "logits": -1.5388954877853394, "logps": -82.4270248413086, "loss": 0.689, "objective": 0.6885419487953186, "ranking_simple": 0.5708333253860474, "regularize": 0.6885419487953186, "step": 15, "wo_beta": 6.731040000915527 }, { "dpo_loss": 0.6827310919761658, "epoch": 0.05668398677373642, "grad_norm": 16.87924772685924, "learning_rate": 1.4084507042253523e-06, "logits": -1.3925108909606934, "logps": -80.71045684814453, "loss": 0.684, "objective": 0.6827310919761658, "ranking_simple": 0.4833333194255829, "regularize": 0.6827310919761658, "step": 20, "wo_beta": 8.312360763549805 }, { "dpo_loss": 0.6760162711143494, "epoch": 0.07085498346717052, "grad_norm": 17.86836459306927, "learning_rate": 1.7605633802816902e-06, "logits": -1.4580414295196533, "logps": -80.7186508178711, "loss": 0.6703, "objective": 0.6760162711143494, "ranking_simple": 0.5833333134651184, "regularize": 0.6760162711143494, "step": 25, "wo_beta": 6.106756210327148 }, { "dpo_loss": 0.6718389391899109, "epoch": 0.08502598016060463, "grad_norm": 19.355249838601942, "learning_rate": 2.1126760563380285e-06, "logits": -1.6006718873977661, "logps": -84.822021484375, "loss": 0.6727, "objective": 0.6718389391899109, "ranking_simple": 0.4958333373069763, "regularize": 0.6718389391899109, "step": 30, "wo_beta": 8.509283065795898 }, { "dpo_loss": 0.6751564145088196, "epoch": 0.09919697685403873, "grad_norm": 19.23258032194357, "learning_rate": 2.4647887323943666e-06, "logits": -1.6175826787948608, "logps": -85.66646575927734, "loss": 0.6624, "objective": 0.6751564145088196, "ranking_simple": 0.5416666865348816, "regularize": 0.6751564145088196, "step": 35, "wo_beta": 6.50329065322876 }, { "dpo_loss": 0.6747376322746277, "epoch": 0.11336797354747284, "grad_norm": 20.018488756494, "learning_rate": 2.8169014084507046e-06, "logits": -1.621884822845459, "logps": -84.25798797607422, "loss": 0.6598, "objective": 0.6747376322746277, "ranking_simple": 0.4791666567325592, "regularize": 0.6747376322746277, "step": 40, "wo_beta": 6.886596202850342 }, { "dpo_loss": 0.6456737518310547, "epoch": 0.12753897024090693, "grad_norm": 14.933918801623976, "learning_rate": 3.1690140845070427e-06, "logits": -1.6078789234161377, "logps": -81.69025421142578, "loss": 0.6563, "objective": 0.6456737518310547, "ranking_simple": 0.5166666507720947, "regularize": 0.6456737518310547, "step": 45, "wo_beta": 6.529275417327881 }, { "dpo_loss": 0.6442943811416626, "epoch": 0.14170996693434104, "grad_norm": 16.57958550585861, "learning_rate": 3.5211267605633804e-06, "logits": -1.6018227338790894, "logps": -82.79893493652344, "loss": 0.6316, "objective": 0.6442943811416626, "ranking_simple": 0.5375000238418579, "regularize": 0.6442943811416626, "step": 50, "wo_beta": 7.091952323913574 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6824604868888855, "eval_logits": -1.587925672531128, "eval_logps": -90.32820892333984, "eval_loss": 0.6806595921516418, "eval_objective": 0.6824604868888855, "eval_ranking_simple": 0.5341615080833435, "eval_regularize": 0.6824604868888855, "eval_runtime": 367.8191, "eval_samples_per_second": 15.741, "eval_steps_per_second": 1.313, "eval_wo_beta": 7.8618669509887695, "step": 50 }, { "dpo_loss": 0.6267839074134827, "epoch": 0.15588096362777515, "grad_norm": 17.05808296065035, "learning_rate": 3.873239436619718e-06, "logits": -1.5757447481155396, "logps": -84.72220611572266, "loss": 0.6258, "objective": 0.6267839074134827, "ranking_simple": 0.5208333134651184, "regularize": 0.6267839074134827, "step": 55, "wo_beta": 6.998147487640381 }, { "dpo_loss": 0.6304137110710144, "epoch": 0.17005196032120926, "grad_norm": 17.359597890876504, "learning_rate": 4.225352112676057e-06, "logits": -1.5341871976852417, "logps": -87.07388305664062, "loss": 0.6204, "objective": 0.6304137110710144, "ranking_simple": 0.5708333253860474, "regularize": 0.6304137110710144, "step": 60, "wo_beta": 5.637550354003906 }, { "dpo_loss": 0.5928328633308411, "epoch": 0.18422295701464336, "grad_norm": 18.81033297245387, "learning_rate": 4.577464788732395e-06, "logits": -1.681604266166687, "logps": -88.5802001953125, "loss": 0.6021, "objective": 0.5928328633308411, "ranking_simple": 0.6041666865348816, "regularize": 0.5928328633308411, "step": 65, "wo_beta": 5.803915023803711 }, { "dpo_loss": 0.5882770419120789, "epoch": 0.19839395370807747, "grad_norm": 18.134916504512542, "learning_rate": 4.929577464788733e-06, "logits": -1.7763893604278564, "logps": -92.76559448242188, "loss": 0.5898, "objective": 0.5882770419120789, "ranking_simple": 0.5874999761581421, "regularize": 0.5882770419120789, "step": 70, "wo_beta": 6.3991007804870605 }, { "dpo_loss": 0.5995556712150574, "epoch": 0.21256495040151158, "grad_norm": 21.061539129772793, "learning_rate": 4.999507384516835e-06, "logits": -1.727323055267334, "logps": -92.68614196777344, "loss": 0.5904, "objective": 0.5995556712150574, "ranking_simple": 0.5874999761581421, "regularize": 0.5995556712150574, "step": 75, "wo_beta": 6.095489978790283 }, { "dpo_loss": 0.5429711937904358, "epoch": 0.22673594709494568, "grad_norm": 17.500250856825723, "learning_rate": 4.997506466835171e-06, "logits": -1.7568250894546509, "logps": -89.59777069091797, "loss": 0.5696, "objective": 0.5429711937904358, "ranking_simple": 0.6625000238418579, "regularize": 0.5429711937904358, "step": 80, "wo_beta": 4.702788829803467 }, { "dpo_loss": 0.5587875247001648, "epoch": 0.2409069437883798, "grad_norm": 14.610779362874785, "learning_rate": 4.9939676896203576e-06, "logits": -1.7483054399490356, "logps": -89.5750961303711, "loss": 0.5788, "objective": 0.5587875247001648, "ranking_simple": 0.6291666626930237, "regularize": 0.5587875247001648, "step": 85, "wo_beta": 5.067751407623291 }, { "dpo_loss": 0.4884372353553772, "epoch": 0.25507794048181387, "grad_norm": 16.59773892580396, "learning_rate": 4.9888932319026994e-06, "logits": -1.829767107963562, "logps": -88.12342071533203, "loss": 0.5584, "objective": 0.4884372353553772, "ranking_simple": 0.6541666388511658, "regularize": 0.4884372353553772, "step": 90, "wo_beta": 4.8033833503723145 }, { "dpo_loss": 0.5380887985229492, "epoch": 0.269248937175248, "grad_norm": 16.942898511925186, "learning_rate": 4.982286218320023e-06, "logits": -1.8157219886779785, "logps": -89.4225082397461, "loss": 0.548, "objective": 0.5380887985229492, "ranking_simple": 0.6208333373069763, "regularize": 0.5380887985229492, "step": 95, "wo_beta": 6.339680194854736 }, { "dpo_loss": 0.5714857578277588, "epoch": 0.2834199338686821, "grad_norm": 16.553483764105664, "learning_rate": 4.974150717193654e-06, "logits": -1.7562583684921265, "logps": -88.6811294555664, "loss": 0.5922, "objective": 0.5714857578277588, "ranking_simple": 0.6083333492279053, "regularize": 0.5714857578277588, "step": 100, "wo_beta": 5.948981285095215 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6818667054176331, "eval_logits": -1.7963634729385376, "eval_logps": -95.815185546875, "eval_loss": 0.6793084740638733, "eval_objective": 0.6818667054176331, "eval_ranking_simple": 0.5486542582511902, "eval_regularize": 0.6818667054176331, "eval_runtime": 368.0636, "eval_samples_per_second": 15.731, "eval_steps_per_second": 1.312, "eval_wo_beta": 7.707693576812744, "step": 100 }, { "dpo_loss": 0.5925873517990112, "epoch": 0.2975909305621162, "grad_norm": 16.87738475184783, "learning_rate": 4.964491738023321e-06, "logits": -1.8753679990768433, "logps": -90.80838775634766, "loss": 0.5707, "objective": 0.5925873517990112, "ranking_simple": 0.6291666626930237, "regularize": 0.5925873517990112, "step": 105, "wo_beta": 5.3273797035217285 }, { "dpo_loss": 0.5388583540916443, "epoch": 0.3117619272555503, "grad_norm": 15.466132286249575, "learning_rate": 4.953315228402512e-06, "logits": -1.7556500434875488, "logps": -88.66165924072266, "loss": 0.5374, "objective": 0.5388583540916443, "ranking_simple": 0.5916666388511658, "regularize": 0.5388583540916443, "step": 110, "wo_beta": 5.21142053604126 }, { "dpo_loss": 0.5987796187400818, "epoch": 0.32593292394898443, "grad_norm": 16.57925719769902, "learning_rate": 4.9406280703561944e-06, "logits": -1.6699596643447876, "logps": -87.46602630615234, "loss": 0.5713, "objective": 0.5987796187400818, "ranking_simple": 0.6208333373069763, "regularize": 0.5987796187400818, "step": 115, "wo_beta": 5.849599838256836 }, { "dpo_loss": 0.5472472906112671, "epoch": 0.3401039206424185, "grad_norm": 16.2014631256649, "learning_rate": 4.926438076103162e-06, "logits": -1.6490483283996582, "logps": -87.74899291992188, "loss": 0.5603, "objective": 0.5472472906112671, "ranking_simple": 0.6333333253860474, "regularize": 0.5472472906112671, "step": 120, "wo_beta": 6.010056495666504 }, { "dpo_loss": 0.5509154796600342, "epoch": 0.35427491733585265, "grad_norm": 14.950351510543216, "learning_rate": 4.910753983245589e-06, "logits": -1.7191225290298462, "logps": -88.99519348144531, "loss": 0.5397, "objective": 0.5509154796600342, "ranking_simple": 0.6083333492279053, "regularize": 0.5509154796600342, "step": 125, "wo_beta": 5.906139850616455 }, { "dpo_loss": 0.5123094916343689, "epoch": 0.3684459140292867, "grad_norm": 13.21141785871735, "learning_rate": 4.893585449388786e-06, "logits": -1.695233702659607, "logps": -85.64789581298828, "loss": 0.5398, "objective": 0.5123094916343689, "ranking_simple": 0.6583333611488342, "regularize": 0.5123094916343689, "step": 130, "wo_beta": 4.518141269683838 }, { "dpo_loss": 0.5679463744163513, "epoch": 0.3826169107227208, "grad_norm": 16.522848387628404, "learning_rate": 4.8749430461944536e-06, "logits": -1.6519335508346558, "logps": -88.2005615234375, "loss": 0.5526, "objective": 0.5679463744163513, "ranking_simple": 0.6333333253860474, "regularize": 0.5679463744163513, "step": 135, "wo_beta": 5.8905439376831055 }, { "dpo_loss": 0.5832223296165466, "epoch": 0.39678790741615494, "grad_norm": 18.436163706658995, "learning_rate": 4.854838252871097e-06, "logits": -1.5592352151870728, "logps": -90.67977142333984, "loss": 0.5534, "objective": 0.5832223296165466, "ranking_simple": 0.625, "regularize": 0.5832223296165466, "step": 140, "wo_beta": 5.7765278816223145 }, { "dpo_loss": 0.510637640953064, "epoch": 0.410958904109589, "grad_norm": 15.85144306565166, "learning_rate": 4.833283449105609e-06, "logits": -1.5676114559173584, "logps": -90.87916564941406, "loss": 0.527, "objective": 0.510637640953064, "ranking_simple": 0.699999988079071, "regularize": 0.510637640953064, "step": 145, "wo_beta": 4.243090629577637 }, { "dpo_loss": 0.5151103138923645, "epoch": 0.42512990080302315, "grad_norm": 14.430458288203226, "learning_rate": 4.810291907440382e-06, "logits": -1.5757466554641724, "logps": -91.76609802246094, "loss": 0.5002, "objective": 0.5151103138923645, "ranking_simple": 0.6291666626930237, "regularize": 0.5151103138923645, "step": 150, "wo_beta": 6.819777011871338 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.6748862862586975, "eval_logits": -1.5950791835784912, "eval_logps": -96.20237731933594, "eval_loss": 0.6815473437309265, "eval_objective": 0.6748862862586975, "eval_ranking_simple": 0.5496894121170044, "eval_regularize": 0.6748862862586975, "eval_runtime": 367.9317, "eval_samples_per_second": 15.737, "eval_steps_per_second": 1.313, "eval_wo_beta": 7.438036918640137, "step": 150 }, { "dpo_loss": 0.49763983488082886, "epoch": 0.43930089749645723, "grad_norm": 17.361740656063542, "learning_rate": 4.785877785100633e-06, "logits": -1.6784894466400146, "logps": -92.56346130371094, "loss": 0.5198, "objective": 0.49763983488082886, "ranking_simple": 0.6499999761581421, "regularize": 0.49763983488082886, "step": 155, "wo_beta": 5.170775890350342 }, { "dpo_loss": 0.5579937100410461, "epoch": 0.45347189418989137, "grad_norm": 15.652756053936857, "learning_rate": 4.7600561152769795e-06, "logits": -1.5714988708496094, "logps": -92.15592956542969, "loss": 0.5328, "objective": 0.5579937100410461, "ranking_simple": 0.6416666507720947, "regularize": 0.5579937100410461, "step": 160, "wo_beta": 5.608686447143555 }, { "dpo_loss": 0.5186927318572998, "epoch": 0.46764289088332545, "grad_norm": 14.630193025898393, "learning_rate": 4.732842797868631e-06, "logits": -1.6691575050354004, "logps": -91.15839385986328, "loss": 0.5189, "objective": 0.5186927318572998, "ranking_simple": 0.625, "regularize": 0.5186927318572998, "step": 165, "wo_beta": 5.489006996154785 }, { "dpo_loss": 0.4966394603252411, "epoch": 0.4818138875767596, "grad_norm": 13.19317387340531, "learning_rate": 4.704254589692903e-06, "logits": -1.7252763509750366, "logps": -92.86217498779297, "loss": 0.4959, "objective": 0.4966394603252411, "ranking_simple": 0.6416666507720947, "regularize": 0.4966394603252411, "step": 170, "wo_beta": 5.683476448059082 }, { "dpo_loss": 0.4829941689968109, "epoch": 0.49598488427019366, "grad_norm": 16.695755417688215, "learning_rate": 4.6743090941670675e-06, "logits": -1.609352946281433, "logps": -95.68805694580078, "loss": 0.4892, "objective": 0.4829941689968109, "ranking_simple": 0.675000011920929, "regularize": 0.4829941689968109, "step": 175, "wo_beta": 4.217517852783203 }, { "dpo_loss": 0.5080674886703491, "epoch": 0.5101558809636277, "grad_norm": 18.149531395287664, "learning_rate": 4.643024750468913e-06, "logits": -1.6799732446670532, "logps": -99.82926177978516, "loss": 0.468, "objective": 0.5080674886703491, "ranking_simple": 0.6333333253860474, "regularize": 0.5080674886703491, "step": 180, "wo_beta": 5.674283504486084 }, { "dpo_loss": 0.4599061608314514, "epoch": 0.5243268776570619, "grad_norm": 16.418473362321556, "learning_rate": 4.610420822182671e-06, "logits": -1.6607011556625366, "logps": -99.13754272460938, "loss": 0.4631, "objective": 0.4599061608314514, "ranking_simple": 0.699999988079071, "regularize": 0.4599061608314514, "step": 185, "wo_beta": 4.34508752822876 }, { "dpo_loss": 0.4760186970233917, "epoch": 0.538497874350496, "grad_norm": 16.5909593130195, "learning_rate": 4.576517385437315e-06, "logits": -1.7211116552352905, "logps": -96.94883728027344, "loss": 0.4859, "objective": 0.4760186970233917, "ranking_simple": 0.6958333253860474, "regularize": 0.4760186970233917, "step": 190, "wo_beta": 4.704507827758789 }, { "dpo_loss": 0.4567195773124695, "epoch": 0.5526688710439301, "grad_norm": 17.67231005570377, "learning_rate": 4.541335316544514e-06, "logits": -1.7492233514785767, "logps": -95.30302429199219, "loss": 0.5112, "objective": 0.4567195773124695, "ranking_simple": 0.7208333611488342, "regularize": 0.4567195773124695, "step": 195, "wo_beta": 4.662184238433838 }, { "dpo_loss": 0.4767034947872162, "epoch": 0.5668398677373642, "grad_norm": 14.561592123615519, "learning_rate": 4.5048962791438885e-06, "logits": -1.7373807430267334, "logps": -95.8047866821289, "loss": 0.4735, "objective": 0.4767034947872162, "ranking_simple": 0.6583333611488342, "regularize": 0.4767034947872162, "step": 200, "wo_beta": 5.47299861907959 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.691116213798523, "eval_logits": -1.7563871145248413, "eval_logps": -98.91757202148438, "eval_loss": 0.6950607299804688, "eval_objective": 0.691116213798523, "eval_ranking_simple": 0.5569358468055725, "eval_regularize": 0.691116213798523, "eval_runtime": 367.7852, "eval_samples_per_second": 15.743, "eval_steps_per_second": 1.313, "eval_wo_beta": 7.524092197418213, "step": 200 }, { "dpo_loss": 0.49286583065986633, "epoch": 0.5810108644307983, "grad_norm": 15.609742263801415, "learning_rate": 4.467222710863444e-06, "logits": -1.6046305894851685, "logps": -95.40666961669922, "loss": 0.4728, "objective": 0.49286583065986633, "ranking_simple": 0.6833333373069763, "regularize": 0.49286583065986633, "step": 205, "wo_beta": 5.8981099128723145 }, { "dpo_loss": 0.42898985743522644, "epoch": 0.5951818611242324, "grad_norm": 16.532108905835056, "learning_rate": 4.428337809503425e-06, "logits": -1.6189254522323608, "logps": -94.09720611572266, "loss": 0.4362, "objective": 0.42898985743522644, "ranking_simple": 0.6916666626930237, "regularize": 0.42898985743522644, "step": 210, "wo_beta": 3.8531105518341064 }, { "dpo_loss": 0.44433167576789856, "epoch": 0.6093528578176665, "grad_norm": 18.51464695126941, "learning_rate": 4.388265518752085e-06, "logits": -1.7230619192123413, "logps": -92.92915344238281, "loss": 0.4621, "objective": 0.44433167576789856, "ranking_simple": 0.6791666746139526, "regularize": 0.44433167576789856, "step": 215, "wo_beta": 5.033292770385742 }, { "dpo_loss": 0.44531288743019104, "epoch": 0.6235238545111006, "grad_norm": 14.674842226422456, "learning_rate": 4.347030513442168e-06, "logits": -1.7578327655792236, "logps": -91.2856674194336, "loss": 0.4332, "objective": 0.44531288743019104, "ranking_simple": 0.6916666626930237, "regularize": 0.44531288743019104, "step": 220, "wo_beta": 5.138680934906006 }, { "dpo_loss": 0.47229692339897156, "epoch": 0.6376948512045347, "grad_norm": 13.123177972096014, "learning_rate": 4.304658184357186e-06, "logits": -1.8197827339172363, "logps": -92.20543670654297, "loss": 0.4692, "objective": 0.47229692339897156, "ranking_simple": 0.6541666388511658, "regularize": 0.47229692339897156, "step": 225, "wo_beta": 5.442239284515381 }, { "dpo_loss": 0.4127563536167145, "epoch": 0.6518658478979689, "grad_norm": 13.818617424678937, "learning_rate": 4.261174622596835e-06, "logits": -1.6802526712417603, "logps": -90.7798843383789, "loss": 0.4559, "objective": 0.4127563536167145, "ranking_simple": 0.7333333492279053, "regularize": 0.4127563536167145, "step": 230, "wo_beta": 3.521521806716919 }, { "dpo_loss": 0.45779237151145935, "epoch": 0.6660368445914029, "grad_norm": 15.036299488311245, "learning_rate": 4.216606603511202e-06, "logits": -1.6339088678359985, "logps": -90.51241302490234, "loss": 0.4578, "objective": 0.45779237151145935, "ranking_simple": 0.6666666865348816, "regularize": 0.45779237151145935, "step": 235, "wo_beta": 4.571218967437744 }, { "dpo_loss": 0.4738180935382843, "epoch": 0.680207841284837, "grad_norm": 13.614428839916116, "learning_rate": 4.170981570213621e-06, "logits": -1.8103351593017578, "logps": -92.29689025878906, "loss": 0.4481, "objective": 0.4738180935382843, "ranking_simple": 0.6958333253860474, "regularize": 0.4738180935382843, "step": 240, "wo_beta": 6.207835674285889 }, { "dpo_loss": 0.5174158215522766, "epoch": 0.6943788379782712, "grad_norm": 16.01438129051185, "learning_rate": 4.124327616682362e-06, "logits": -1.7986476421356201, "logps": -91.10508728027344, "loss": 0.4642, "objective": 0.5174158215522766, "ranking_simple": 0.6958333253860474, "regularize": 0.5174158215522766, "step": 245, "wo_beta": 4.900957107543945 }, { "dpo_loss": 0.44342610239982605, "epoch": 0.7085498346717053, "grad_norm": 13.738305838918832, "learning_rate": 4.076673470461538e-06, "logits": -1.5667024850845337, "logps": -88.29222106933594, "loss": 0.4626, "objective": 0.44342610239982605, "ranking_simple": 0.7041666507720947, "regularize": 0.44342610239982605, "step": 250, "wo_beta": 4.291601181030273 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.694493293762207, "eval_logits": -1.7985897064208984, "eval_logps": -93.47747039794922, "eval_loss": 0.6976169347763062, "eval_objective": 0.694493293762207, "eval_ranking_simple": 0.5579710006713867, "eval_regularize": 0.694493293762207, "eval_runtime": 367.886, "eval_samples_per_second": 15.739, "eval_steps_per_second": 1.313, "eval_wo_beta": 7.902660369873047, "step": 250 }, { "dpo_loss": 0.43279239535331726, "epoch": 0.7227208313651393, "grad_norm": 12.486423165060444, "learning_rate": 4.028048474971889e-06, "logits": -1.860019564628601, "logps": -87.4556884765625, "loss": 0.4459, "objective": 0.43279239535331726, "ranking_simple": 0.6833333373069763, "regularize": 0.43279239535331726, "step": 255, "wo_beta": 4.541534900665283 }, { "dpo_loss": 0.41317591071128845, "epoch": 0.7368918280585735, "grad_norm": 13.93218309283749, "learning_rate": 3.978482571442339e-06, "logits": -1.8741662502288818, "logps": -91.13224029541016, "loss": 0.4561, "objective": 0.41317591071128845, "ranking_simple": 0.6625000238418579, "regularize": 0.41317591071128845, "step": 260, "wo_beta": 5.1168012619018555 }, { "dpo_loss": 0.39290040731430054, "epoch": 0.7510628247520076, "grad_norm": 13.220960314976885, "learning_rate": 3.928006280473445e-06, "logits": -1.8604073524475098, "logps": -92.02545928955078, "loss": 0.419, "objective": 0.39290040731430054, "ranking_simple": 0.7708333134651184, "regularize": 0.39290040731430054, "step": 265, "wo_beta": 4.408606052398682 }, { "dpo_loss": 0.47771012783050537, "epoch": 0.7652338214454416, "grad_norm": 14.906696639905949, "learning_rate": 3.876650683244093e-06, "logits": -1.997718095779419, "logps": -94.74840545654297, "loss": 0.4365, "objective": 0.47771012783050537, "ranking_simple": 0.6666666865348816, "regularize": 0.47771012783050537, "step": 270, "wo_beta": 4.766172409057617 }, { "dpo_loss": 0.4372769594192505, "epoch": 0.7794048181388757, "grad_norm": 14.602546448634548, "learning_rate": 3.8244474023730155e-06, "logits": -1.8585816621780396, "logps": -94.3137435913086, "loss": 0.4293, "objective": 0.4372769594192505, "ranking_simple": 0.6791666746139526, "regularize": 0.4372769594192505, "step": 275, "wo_beta": 5.203604698181152 }, { "dpo_loss": 0.3766806721687317, "epoch": 0.7935758148323099, "grad_norm": 15.405745134115882, "learning_rate": 3.771428582446908e-06, "logits": -1.9468127489089966, "logps": -95.09542846679688, "loss": 0.4039, "objective": 0.3766806721687317, "ranking_simple": 0.7208333611488342, "regularize": 0.3766806721687317, "step": 280, "wo_beta": 4.175257205963135 }, { "dpo_loss": 0.41625434160232544, "epoch": 0.807746811525744, "grad_norm": 15.243116510039572, "learning_rate": 3.7176268702271468e-06, "logits": -1.9459937810897827, "logps": -95.90043640136719, "loss": 0.407, "objective": 0.41625434160232544, "ranking_simple": 0.699999988079071, "regularize": 0.41625434160232544, "step": 285, "wo_beta": 5.091909408569336 }, { "dpo_loss": 0.3713260293006897, "epoch": 0.821917808219178, "grad_norm": 16.703078330526107, "learning_rate": 3.6630753945472854e-06, "logits": -1.963159203529358, "logps": -95.55049133300781, "loss": 0.3981, "objective": 0.3713260293006897, "ranking_simple": 0.7791666388511658, "regularize": 0.3713260293006897, "step": 290, "wo_beta": 3.84537935256958 }, { "dpo_loss": 0.4009644687175751, "epoch": 0.8360888049126122, "grad_norm": 13.199801209484807, "learning_rate": 3.6078077459137097e-06, "logits": -1.99600350856781, "logps": -98.70610809326172, "loss": 0.4243, "objective": 0.4009644687175751, "ranking_simple": 0.7166666388511658, "regularize": 0.4009644687175751, "step": 295, "wo_beta": 5.210625171661377 }, { "dpo_loss": 0.42688027024269104, "epoch": 0.8502598016060463, "grad_norm": 12.622555129903681, "learning_rate": 3.5518579558220144e-06, "logits": -1.9770207405090332, "logps": -95.8087387084961, "loss": 0.4214, "objective": 0.42688027024269104, "ranking_simple": 0.7458333373069763, "regularize": 0.42688027024269104, "step": 300, "wo_beta": 3.2817904949188232 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6865138411521912, "eval_logits": -2.0138111114501953, "eval_logps": -104.43373107910156, "eval_loss": 0.693064272403717, "eval_objective": 0.6865138411521912, "eval_ranking_simple": 0.5615941882133484, "eval_regularize": 0.6865138411521912, "eval_runtime": 367.6512, "eval_samples_per_second": 15.749, "eval_steps_per_second": 1.314, "eval_wo_beta": 7.581406593322754, "step": 300 }, { "dpo_loss": 0.44818738102912903, "epoch": 0.8644307982994804, "grad_norm": 13.23517832354566, "learning_rate": 3.495260475801841e-06, "logits": -1.9494545459747314, "logps": -97.299560546875, "loss": 0.3903, "objective": 0.44818738102912903, "ranking_simple": 0.7041666507720947, "regularize": 0.44818738102912903, "step": 305, "wo_beta": 4.3523712158203125 }, { "dpo_loss": 0.34828221797943115, "epoch": 0.8786017949929145, "grad_norm": 14.41832502286332, "learning_rate": 3.4380501562030704e-06, "logits": -1.9428808689117432, "logps": -98.20413970947266, "loss": 0.3807, "objective": 0.34828221797943115, "ranking_simple": 0.7333333492279053, "regularize": 0.34828221797943115, "step": 310, "wo_beta": 3.6427719593048096 }, { "dpo_loss": 0.3642140030860901, "epoch": 0.8927727916863486, "grad_norm": 12.300830909289896, "learning_rate": 3.3802622247364446e-06, "logits": -1.9570696353912354, "logps": -98.85526275634766, "loss": 0.4042, "objective": 0.3642140030860901, "ranking_simple": 0.699999988079071, "regularize": 0.3642140030860901, "step": 315, "wo_beta": 4.256704330444336 }, { "dpo_loss": 0.399863064289093, "epoch": 0.9069437883797827, "grad_norm": 15.886821504052966, "learning_rate": 3.321932264781822e-06, "logits": -1.9135253429412842, "logps": -101.23651885986328, "loss": 0.4395, "objective": 0.399863064289093, "ranking_simple": 0.7708333134651184, "regularize": 0.399863064289093, "step": 320, "wo_beta": 2.9900147914886475 }, { "dpo_loss": 0.37714484333992004, "epoch": 0.9211147850732169, "grad_norm": 14.587168312549423, "learning_rate": 3.2630961934774265e-06, "logits": -1.9540404081344604, "logps": -100.98246002197266, "loss": 0.3745, "objective": 0.37714484333992004, "ranking_simple": 0.7250000238418579, "regularize": 0.37714484333992004, "step": 325, "wo_beta": 4.075650691986084 }, { "dpo_loss": 0.35718733072280884, "epoch": 0.9352857817666509, "grad_norm": 15.295489026198931, "learning_rate": 3.203790239603583e-06, "logits": -1.7621917724609375, "logps": -101.14175415039062, "loss": 0.4011, "objective": 0.35718733072280884, "ranking_simple": 0.737500011920929, "regularize": 0.35718733072280884, "step": 330, "wo_beta": 3.9375758171081543 }, { "dpo_loss": 0.3431912660598755, "epoch": 0.949456778460085, "grad_norm": 13.496480338032299, "learning_rate": 3.1440509212745584e-06, "logits": -1.7355188131332397, "logps": -100.78395080566406, "loss": 0.3733, "objective": 0.3431912660598755, "ranking_simple": 0.7416666746139526, "regularize": 0.3431912660598755, "step": 335, "wo_beta": 3.3042349815368652 }, { "dpo_loss": 0.43064969778060913, "epoch": 0.9636277751535192, "grad_norm": 13.67781828181165, "learning_rate": 3.0839150234522404e-06, "logits": -1.781424641609192, "logps": -99.20060729980469, "loss": 0.4067, "objective": 0.43064969778060913, "ranking_simple": 0.7083333134651184, "regularize": 0.43064969778060913, "step": 340, "wo_beta": 4.711233615875244 }, { "dpo_loss": 0.3512551188468933, "epoch": 0.9777987718469532, "grad_norm": 13.324791495929361, "learning_rate": 3.0234195752955032e-06, "logits": -1.8463162183761597, "logps": -95.25973510742188, "loss": 0.3729, "objective": 0.3512551188468933, "ranking_simple": 0.7541666626930237, "regularize": 0.3512551188468933, "step": 345, "wo_beta": 2.9623959064483643 }, { "dpo_loss": 0.32169008255004883, "epoch": 0.9919697685403873, "grad_norm": 15.127441089438493, "learning_rate": 2.962601827359208e-06, "logits": -1.78915274143219, "logps": -97.9096450805664, "loss": 0.3652, "objective": 0.32169008255004883, "ranking_simple": 0.824999988079071, "regularize": 0.32169008255004883, "step": 350, "wo_beta": 2.600872039794922 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.6984499096870422, "eval_logits": -1.9094278812408447, "eval_logps": -102.83055114746094, "eval_loss": 0.7074127793312073, "eval_objective": 0.6984499096870422, "eval_ranking_simple": 0.5559006333351135, "eval_regularize": 0.6984499096870422, "eval_runtime": 367.6345, "eval_samples_per_second": 15.749, "eval_steps_per_second": 1.314, "eval_wo_beta": 7.8343892097473145, "step": 350 }, { "dpo_loss": 0.2995939552783966, "epoch": 1.0061407652338215, "grad_norm": 9.31634152713278, "learning_rate": 2.9014992286568773e-06, "logits": -2.007425546646118, "logps": -96.57454681396484, "loss": 0.3042, "objective": 0.2995939552783966, "ranking_simple": 0.8041666746139526, "regularize": 0.2995939552783966, "step": 355, "wo_beta": 2.7125356197357178 }, { "dpo_loss": 0.24162109196186066, "epoch": 1.0203117619272555, "grad_norm": 11.90366408091981, "learning_rate": 2.840149403601166e-06, "logits": -1.9152239561080933, "logps": -101.0685043334961, "loss": 0.2301, "objective": 0.24162109196186066, "ranking_simple": 0.8125, "regularize": 0.24162109196186066, "step": 360, "wo_beta": 2.7127444744110107 }, { "dpo_loss": 0.19792620837688446, "epoch": 1.0344827586206897, "grad_norm": 9.884538723237807, "learning_rate": 2.7785901288363253e-06, "logits": -1.9689671993255615, "logps": -107.58961486816406, "loss": 0.2082, "objective": 0.19792620837688446, "ranking_simple": 0.8291666507720947, "regularize": 0.19792620837688446, "step": 365, "wo_beta": 1.9694886207580566 }, { "dpo_loss": 0.23545877635478973, "epoch": 1.0486537553141237, "grad_norm": 16.911618406294945, "learning_rate": 2.7168593099769414e-06, "logits": -1.9311782121658325, "logps": -106.47748565673828, "loss": 0.2379, "objective": 0.23545877635478973, "ranking_simple": 0.8125, "regularize": 0.23545877635478973, "step": 370, "wo_beta": 3.021015167236328 }, { "dpo_loss": 0.22429493069648743, "epoch": 1.0628247520075578, "grad_norm": 15.795921555252965, "learning_rate": 2.654994958267241e-06, "logits": -1.9985809326171875, "logps": -108.60234832763672, "loss": 0.2364, "objective": 0.22429493069648743, "ranking_simple": 0.8374999761581421, "regularize": 0.22429493069648743, "step": 375, "wo_beta": 1.6982978582382202 }, { "dpo_loss": 0.20513677597045898, "epoch": 1.076995748700992, "grad_norm": 15.164372148185517, "learning_rate": 2.5930351671753707e-06, "logits": -2.0427591800689697, "logps": -109.7289047241211, "loss": 0.2153, "objective": 0.20513677597045898, "ranking_simple": 0.8583333492279053, "regularize": 0.20513677597045898, "step": 380, "wo_beta": 1.7354587316513062 }, { "dpo_loss": 0.24022004008293152, "epoch": 1.091166745394426, "grad_norm": 13.938458603767867, "learning_rate": 2.5310180889370374e-06, "logits": -2.007528781890869, "logps": -109.47885131835938, "loss": 0.2371, "objective": 0.24022004008293152, "ranking_simple": 0.8291666507720947, "regularize": 0.24022004008293152, "step": 385, "wo_beta": 1.9898384809494019 }, { "dpo_loss": 0.22819384932518005, "epoch": 1.10533774208786, "grad_norm": 10.75019956346705, "learning_rate": 2.468981911062964e-06, "logits": -1.9904738664627075, "logps": -110.48689270019531, "loss": 0.2092, "objective": 0.22819384932518005, "ranking_simple": 0.8291666507720947, "regularize": 0.22819384932518005, "step": 390, "wo_beta": 2.570746660232544 }, { "dpo_loss": 0.2051982581615448, "epoch": 1.1195087387812943, "grad_norm": 10.957538026383743, "learning_rate": 2.4069648328246305e-06, "logits": -2.0448696613311768, "logps": -108.89817810058594, "loss": 0.2315, "objective": 0.2051982581615448, "ranking_simple": 0.8666666746139526, "regularize": 0.2051982581615448, "step": 395, "wo_beta": 2.1264781951904297 }, { "dpo_loss": 0.20951204001903534, "epoch": 1.1336797354747283, "grad_norm": 10.831310704591798, "learning_rate": 2.3450050417327593e-06, "logits": -2.0998401641845703, "logps": -106.39730072021484, "loss": 0.2206, "objective": 0.20951204001903534, "ranking_simple": 0.8500000238418579, "regularize": 0.20951204001903534, "step": 400, "wo_beta": 1.4239428043365479 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.729559600353241, "eval_logits": -2.090850591659546, "eval_logps": -113.60484313964844, "eval_loss": 0.7347043752670288, "eval_objective": 0.729559600353241, "eval_ranking_simple": 0.5502070188522339, "eval_regularize": 0.729559600353241, "eval_runtime": 367.8652, "eval_samples_per_second": 15.739, "eval_steps_per_second": 1.313, "eval_wo_beta": 8.675128936767578, "step": 400 }, { "dpo_loss": 0.24946197867393494, "epoch": 1.1478507321681626, "grad_norm": 13.639203958436303, "learning_rate": 2.2831406900230586e-06, "logits": -2.0489606857299805, "logps": -109.95741271972656, "loss": 0.2302, "objective": 0.24946197867393494, "ranking_simple": 0.8208333253860474, "regularize": 0.24946197867393494, "step": 405, "wo_beta": 3.3028647899627686 }, { "dpo_loss": 0.22408606112003326, "epoch": 1.1620217288615966, "grad_norm": 9.982686874740121, "learning_rate": 2.221409871163675e-06, "logits": -2.026094913482666, "logps": -112.10523986816406, "loss": 0.204, "objective": 0.22408606112003326, "ranking_simple": 0.824999988079071, "regularize": 0.22408606112003326, "step": 410, "wo_beta": 3.6203742027282715 }, { "dpo_loss": 0.24647466838359833, "epoch": 1.1761927255550306, "grad_norm": 14.127702730262909, "learning_rate": 2.1598505963988354e-06, "logits": -1.9576979875564575, "logps": -112.045654296875, "loss": 0.2487, "objective": 0.24647466838359833, "ranking_simple": 0.8041666746139526, "regularize": 0.24647466838359833, "step": 415, "wo_beta": 3.0596923828125 }, { "dpo_loss": 0.2189057618379593, "epoch": 1.1903637222484649, "grad_norm": 12.453759090133827, "learning_rate": 2.098500771343124e-06, "logits": -1.9014623165130615, "logps": -112.163330078125, "loss": 0.224, "objective": 0.2189057618379593, "ranking_simple": 0.8333333134651184, "regularize": 0.2189057618379593, "step": 420, "wo_beta": 2.8976945877075195 }, { "dpo_loss": 0.24521614611148834, "epoch": 1.204534718941899, "grad_norm": 11.810902908919543, "learning_rate": 2.037398172640793e-06, "logits": -1.939537525177002, "logps": -111.03417205810547, "loss": 0.225, "objective": 0.24521614611148834, "ranking_simple": 0.8166666626930237, "regularize": 0.24521614611148834, "step": 425, "wo_beta": 2.2815327644348145 }, { "dpo_loss": 0.25871187448501587, "epoch": 1.2187057156353331, "grad_norm": 13.805740179315583, "learning_rate": 1.976580424704498e-06, "logits": -1.908257246017456, "logps": -114.54412078857422, "loss": 0.2315, "objective": 0.25871187448501587, "ranking_simple": 0.8208333253860474, "regularize": 0.25871187448501587, "step": 430, "wo_beta": 2.5115749835968018 }, { "dpo_loss": 0.20695915818214417, "epoch": 1.2328767123287672, "grad_norm": 13.189182575578958, "learning_rate": 1.9160849765477604e-06, "logits": -1.845086932182312, "logps": -111.52816772460938, "loss": 0.2502, "objective": 0.20695915818214417, "ranking_simple": 0.8666666746139526, "regularize": 0.20695915818214417, "step": 435, "wo_beta": 2.1831676959991455 }, { "dpo_loss": 0.2140309065580368, "epoch": 1.2470477090222012, "grad_norm": 15.231839264333766, "learning_rate": 1.8559490787254423e-06, "logits": -1.8013054132461548, "logps": -112.4487533569336, "loss": 0.2241, "objective": 0.2140309065580368, "ranking_simple": 0.8500000238418579, "regularize": 0.2140309065580368, "step": 440, "wo_beta": 2.4584310054779053 }, { "dpo_loss": 0.2538544535636902, "epoch": 1.2612187057156352, "grad_norm": 11.902330497163605, "learning_rate": 1.7962097603964177e-06, "logits": -1.8283072710037231, "logps": -111.75948333740234, "loss": 0.2393, "objective": 0.2538544535636902, "ranking_simple": 0.8291666507720947, "regularize": 0.2538544535636902, "step": 445, "wo_beta": 2.5012738704681396 }, { "dpo_loss": 0.21088647842407227, "epoch": 1.2753897024090695, "grad_norm": 12.114049593362662, "learning_rate": 1.7369038065225743e-06, "logits": -1.9961200952529907, "logps": -110.46500396728516, "loss": 0.2202, "objective": 0.21088647842407227, "ranking_simple": 0.8458333611488342, "regularize": 0.21088647842407227, "step": 450, "wo_beta": 2.1799509525299072 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.7433450222015381, "eval_logits": -1.9911303520202637, "eval_logps": -115.7781753540039, "eval_loss": 0.7462677955627441, "eval_objective": 0.7433450222015381, "eval_ranking_simple": 0.5512422323226929, "eval_regularize": 0.7433450222015381, "eval_runtime": 369.6423, "eval_samples_per_second": 15.664, "eval_steps_per_second": 1.307, "eval_wo_beta": 8.912315368652344, "step": 450 }, { "dpo_loss": 0.22404690086841583, "epoch": 1.2895606991025035, "grad_norm": 12.618712156759832, "learning_rate": 1.6780677352181781e-06, "logits": -1.821974515914917, "logps": -112.83589935302734, "loss": 0.2503, "objective": 0.22404690086841583, "ranking_simple": 0.8125, "regularize": 0.22404690086841583, "step": 455, "wo_beta": 2.789232015609741 }, { "dpo_loss": 0.2399408221244812, "epoch": 1.3037316957959377, "grad_norm": 10.297452895979795, "learning_rate": 1.6197377752635563e-06, "logits": -1.9601954221725464, "logps": -111.7652587890625, "loss": 0.2322, "objective": 0.2399408221244812, "ranking_simple": 0.8166666626930237, "regularize": 0.2399408221244812, "step": 460, "wo_beta": 3.1664645671844482 }, { "dpo_loss": 0.21428868174552917, "epoch": 1.3179026924893718, "grad_norm": 11.673896578608254, "learning_rate": 1.5619498437969302e-06, "logits": -1.9731502532958984, "logps": -109.419189453125, "loss": 0.2432, "objective": 0.21428868174552917, "ranking_simple": 0.824999988079071, "regularize": 0.21428868174552917, "step": 465, "wo_beta": 2.3578691482543945 }, { "dpo_loss": 0.2447548806667328, "epoch": 1.3320736891828058, "grad_norm": 11.318434823205884, "learning_rate": 1.5047395241981606e-06, "logits": -2.0559751987457275, "logps": -107.98088836669922, "loss": 0.2364, "objective": 0.2447548806667328, "ranking_simple": 0.8500000238418579, "regularize": 0.2447548806667328, "step": 470, "wo_beta": 2.524103879928589 }, { "dpo_loss": 0.2010059952735901, "epoch": 1.34624468587624, "grad_norm": 13.4415115238709, "learning_rate": 1.4481420441779862e-06, "logits": -1.910614013671875, "logps": -110.5365982055664, "loss": 0.2194, "objective": 0.2010059952735901, "ranking_simple": 0.8666666746139526, "regularize": 0.2010059952735901, "step": 475, "wo_beta": 2.4653513431549072 }, { "dpo_loss": 0.21145032346248627, "epoch": 1.360415682569674, "grad_norm": 15.045958431800248, "learning_rate": 1.3921922540862907e-06, "logits": -2.0142934322357178, "logps": -109.8845443725586, "loss": 0.2244, "objective": 0.21145032346248627, "ranking_simple": 0.8166666626930237, "regularize": 0.21145032346248627, "step": 480, "wo_beta": 3.300536632537842 }, { "dpo_loss": 0.23482932150363922, "epoch": 1.3745866792631083, "grad_norm": 14.203111358887098, "learning_rate": 1.3369246054527152e-06, "logits": -1.996147632598877, "logps": -108.94562530517578, "loss": 0.2473, "objective": 0.23482932150363922, "ranking_simple": 0.8291666507720947, "regularize": 0.23482932150363922, "step": 485, "wo_beta": 2.5876729488372803 }, { "dpo_loss": 0.23283059895038605, "epoch": 1.3887576759565423, "grad_norm": 11.652820113044354, "learning_rate": 1.2823731297728536e-06, "logits": -1.98202645778656, "logps": -112.48513793945312, "loss": 0.2222, "objective": 0.23283059895038605, "ranking_simple": 0.824999988079071, "regularize": 0.23283059895038605, "step": 490, "wo_beta": 2.047060251235962 }, { "dpo_loss": 0.19643358886241913, "epoch": 1.4029286726499763, "grad_norm": 16.098991414414286, "learning_rate": 1.2285714175530936e-06, "logits": -1.9894219636917114, "logps": -111.90442657470703, "loss": 0.2257, "objective": 0.19643358886241913, "ranking_simple": 0.8458333611488342, "regularize": 0.19643358886241913, "step": 495, "wo_beta": 2.041602611541748 }, { "dpo_loss": 0.22570651769638062, "epoch": 1.4170996693434104, "grad_norm": 14.942258982525255, "learning_rate": 1.1755525976269851e-06, "logits": -1.9338775873184204, "logps": -109.2611083984375, "loss": 0.2366, "objective": 0.22570651769638062, "ranking_simple": 0.8208333253860474, "regularize": 0.22570651769638062, "step": 500, "wo_beta": 1.8392161130905151 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.7387389540672302, "eval_logits": -2.046374797821045, "eval_logps": -114.77104187011719, "eval_loss": 0.7444195747375488, "eval_objective": 0.7387389540672302, "eval_ranking_simple": 0.5517598390579224, "eval_regularize": 0.7387389540672302, "eval_runtime": 367.6953, "eval_samples_per_second": 15.747, "eval_steps_per_second": 1.314, "eval_wo_beta": 8.863048553466797, "step": 500 }, { "dpo_loss": 0.18702387809753418, "epoch": 1.4312706660368446, "grad_norm": 11.645842357352148, "learning_rate": 1.1233493167559065e-06, "logits": -2.00331711769104, "logps": -110.28618621826172, "loss": 0.2087, "objective": 0.18702387809753418, "ranking_simple": 0.8458333611488342, "regularize": 0.18702387809753418, "step": 505, "wo_beta": 2.0265085697174072 }, { "dpo_loss": 0.23954346776008606, "epoch": 1.4454416627302786, "grad_norm": 11.232727785445284, "learning_rate": 1.0719937195265555e-06, "logits": -2.0127902030944824, "logps": -110.59080505371094, "loss": 0.2313, "objective": 0.23954346776008606, "ranking_simple": 0.8458333611488342, "regularize": 0.23954346776008606, "step": 510, "wo_beta": 2.1970200538635254 }, { "dpo_loss": 0.20086827874183655, "epoch": 1.4596126594237129, "grad_norm": 10.217720875279324, "learning_rate": 1.0215174285576615e-06, "logits": -2.087155818939209, "logps": -110.73674774169922, "loss": 0.2043, "objective": 0.20086827874183655, "ranking_simple": 0.8083333373069763, "regularize": 0.20086827874183655, "step": 515, "wo_beta": 2.5676777362823486 }, { "dpo_loss": 0.19097186625003815, "epoch": 1.473783656117147, "grad_norm": 11.650910390578003, "learning_rate": 9.719515250281122e-07, "logits": -1.9594320058822632, "logps": -110.79000091552734, "loss": 0.1898, "objective": 0.19097186625003815, "ranking_simple": 0.8833333253860474, "regularize": 0.19097186625003815, "step": 520, "wo_beta": 2.2379517555236816 }, { "dpo_loss": 0.2083693891763687, "epoch": 1.487954652810581, "grad_norm": 12.830417240600797, "learning_rate": 9.233265295384624e-07, "logits": -1.8601106405258179, "logps": -110.70569610595703, "loss": 0.2274, "objective": 0.2083693891763687, "ranking_simple": 0.8416666388511658, "regularize": 0.2083693891763687, "step": 525, "wo_beta": 2.14703369140625 }, { "dpo_loss": 0.19533474743366241, "epoch": 1.5021256495040152, "grad_norm": 14.108055837836243, "learning_rate": 8.756723833176376e-07, "logits": -2.0109665393829346, "logps": -114.77526092529297, "loss": 0.205, "objective": 0.19533474743366241, "ranking_simple": 0.8583333492279053, "regularize": 0.19533474743366241, "step": 530, "wo_beta": 1.9219799041748047 }, { "dpo_loss": 0.18096224963665009, "epoch": 1.5162966461974492, "grad_norm": 13.367132869446376, "learning_rate": 8.290184297863793e-07, "logits": -1.971710205078125, "logps": -113.77371978759766, "loss": 0.188, "objective": 0.18096224963665009, "ranking_simple": 0.8541666865348816, "regularize": 0.18096224963665009, "step": 535, "wo_beta": 2.045213222503662 }, { "dpo_loss": 0.26400619745254517, "epoch": 1.5304676428908834, "grad_norm": 15.135814096600672, "learning_rate": 7.833933964887985e-07, "logits": -1.9013224840164185, "logps": -113.3902816772461, "loss": 0.221, "objective": 0.26400619745254517, "ranking_simple": 0.8374999761581421, "regularize": 0.26400619745254517, "step": 540, "wo_beta": 2.3301329612731934 }, { "dpo_loss": 0.23929236829280853, "epoch": 1.5446386395843175, "grad_norm": 15.186538591312983, "learning_rate": 7.388253774031659e-07, "logits": -1.9914318323135376, "logps": -113.67050170898438, "loss": 0.2343, "objective": 0.23929236829280853, "ranking_simple": 0.8666666746139526, "regularize": 0.23929236829280853, "step": 545, "wo_beta": 1.8687002658843994 }, { "dpo_loss": 0.17025238275527954, "epoch": 1.5588096362777515, "grad_norm": 11.147125855718437, "learning_rate": 6.953418156428152e-07, "logits": -1.9760197401046753, "logps": -114.83491516113281, "loss": 0.1989, "objective": 0.17025238275527954, "ranking_simple": 0.8458333611488342, "regularize": 0.17025238275527954, "step": 550, "wo_beta": 2.0717434883117676 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.7519087195396423, "eval_logits": -2.0168354511260986, "eval_logps": -118.77753448486328, "eval_loss": 0.7552616596221924, "eval_objective": 0.7519087195396423, "eval_ranking_simple": 0.5595238208770752, "eval_regularize": 0.7519087195396423, "eval_runtime": 367.683, "eval_samples_per_second": 15.747, "eval_steps_per_second": 1.314, "eval_wo_beta": 8.984560012817383, "step": 550 }, { "dpo_loss": 0.2299811840057373, "epoch": 1.5729806329711855, "grad_norm": 14.207116899254263, "learning_rate": 6.529694865578318e-07, "logits": -1.7790377140045166, "logps": -116.34921264648438, "loss": 0.2215, "objective": 0.2299811840057373, "ranking_simple": 0.8166666626930237, "regularize": 0.2299811840057373, "step": 555, "wo_beta": 2.6884348392486572 }, { "dpo_loss": 0.2206832468509674, "epoch": 1.5871516296646198, "grad_norm": 14.824039668104726, "learning_rate": 6.117344812479154e-07, "logits": -1.8644143342971802, "logps": -111.79485321044922, "loss": 0.2146, "objective": 0.2206832468509674, "ranking_simple": 0.8416666388511658, "regularize": 0.2206832468509674, "step": 560, "wo_beta": 2.7559573650360107 }, { "dpo_loss": 0.18442773818969727, "epoch": 1.601322626358054, "grad_norm": 13.453462098796246, "learning_rate": 5.71662190496575e-07, "logits": -1.8391135931015015, "logps": -113.25553131103516, "loss": 0.2078, "objective": 0.18442773818969727, "ranking_simple": 0.8333333134651184, "regularize": 0.18442773818969727, "step": 565, "wo_beta": 2.2805912494659424 }, { "dpo_loss": 0.19617310166358948, "epoch": 1.615493623051488, "grad_norm": 12.497698816224, "learning_rate": 5.327772891365565e-07, "logits": -1.984673023223877, "logps": -115.45191192626953, "loss": 0.1869, "objective": 0.19617310166358948, "ranking_simple": 0.8374999761581421, "regularize": 0.19617310166358948, "step": 570, "wo_beta": 2.3473691940307617 }, { "dpo_loss": 0.2390568107366562, "epoch": 1.629664619744922, "grad_norm": 14.590840547972736, "learning_rate": 4.951037208561116e-07, "logits": -1.9447566270828247, "logps": -111.96437072753906, "loss": 0.2312, "objective": 0.2390568107366562, "ranking_simple": 0.8458333611488342, "regularize": 0.2390568107366562, "step": 575, "wo_beta": 2.296287775039673 }, { "dpo_loss": 0.20053791999816895, "epoch": 1.643835616438356, "grad_norm": 15.024890919749566, "learning_rate": 4.586646834554864e-07, "logits": -1.9810107946395874, "logps": -110.92058563232422, "loss": 0.2089, "objective": 0.20053791999816895, "ranking_simple": 0.8666666746139526, "regularize": 0.20053791999816895, "step": 580, "wo_beta": 1.4837419986724854 }, { "dpo_loss": 0.19388006627559662, "epoch": 1.6580066131317903, "grad_norm": 11.343666690979733, "learning_rate": 4.234826145626855e-07, "logits": -1.8341389894485474, "logps": -109.36263275146484, "loss": 0.2048, "objective": 0.19388006627559662, "ranking_simple": 0.8500000238418579, "regularize": 0.19388006627559662, "step": 585, "wo_beta": 2.263803243637085 }, { "dpo_loss": 0.2181146889925003, "epoch": 1.6721776098252243, "grad_norm": 13.28342611357644, "learning_rate": 3.8957917781732883e-07, "logits": -1.87205970287323, "logps": -113.15511322021484, "loss": 0.2156, "objective": 0.2181146889925003, "ranking_simple": 0.8333333134651184, "regularize": 0.2181146889925003, "step": 590, "wo_beta": 2.4472413063049316 }, { "dpo_loss": 0.1822492927312851, "epoch": 1.6863486065186586, "grad_norm": 10.596854767592859, "learning_rate": 3.569752495310877e-07, "logits": -1.8391311168670654, "logps": -113.05868530273438, "loss": 0.1882, "objective": 0.1822492927312851, "ranking_simple": 0.8333333134651184, "regularize": 0.1822492927312851, "step": 595, "wo_beta": 2.815018892288208 }, { "dpo_loss": 0.17215129733085632, "epoch": 1.7005196032120926, "grad_norm": 11.460383813341208, "learning_rate": 3.2569090583293356e-07, "logits": -1.8718314170837402, "logps": -113.5940933227539, "loss": 0.1952, "objective": 0.17215129733085632, "ranking_simple": 0.8583333492279053, "regularize": 0.17215129733085632, "step": 600, "wo_beta": 1.7114546298980713 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.7512882947921753, "eval_logits": -1.970719814300537, "eval_logps": -117.48804473876953, "eval_loss": 0.7544336318969727, "eval_objective": 0.7512882947921753, "eval_ranking_simple": 0.5595238208770752, "eval_regularize": 0.7512882947921753, "eval_runtime": 367.7185, "eval_samples_per_second": 15.746, "eval_steps_per_second": 1.314, "eval_wo_beta": 9.029719352722168, "step": 600 }, { "dpo_loss": 0.19925004243850708, "epoch": 1.7146905999055266, "grad_norm": 17.415629157530653, "learning_rate": 2.957454103070978e-07, "logits": -1.8373870849609375, "logps": -112.65380096435547, "loss": 0.2262, "objective": 0.19925004243850708, "ranking_simple": 0.8333333134651184, "regularize": 0.19925004243850708, "step": 605, "wo_beta": 2.9711499214172363 }, { "dpo_loss": 0.21553590893745422, "epoch": 1.7288615965989607, "grad_norm": 16.124254557780983, "learning_rate": 2.6715720213136955e-07, "logits": -1.8880244493484497, "logps": -112.18531799316406, "loss": 0.1988, "objective": 0.21553590893745422, "ranking_simple": 0.8291666507720947, "regularize": 0.21553590893745422, "step": 610, "wo_beta": 2.8244071006774902 }, { "dpo_loss": 0.21212069690227509, "epoch": 1.743032593292395, "grad_norm": 12.906958893668936, "learning_rate": 2.399438847230212e-07, "logits": -1.9108936786651611, "logps": -111.8115005493164, "loss": 0.2027, "objective": 0.21212069690227509, "ranking_simple": 0.8166666626930237, "regularize": 0.21212069690227509, "step": 615, "wo_beta": 2.961397886276245 }, { "dpo_loss": 0.20163790881633759, "epoch": 1.7572035899858292, "grad_norm": 15.219355254954188, "learning_rate": 2.1412221489936796e-07, "logits": -1.947303295135498, "logps": -111.9202880859375, "loss": 0.2188, "objective": 0.20163790881633759, "ranking_simple": 0.8583333492279053, "regularize": 0.20163790881633759, "step": 620, "wo_beta": 1.839400053024292 }, { "dpo_loss": 0.1938161551952362, "epoch": 1.7713745866792632, "grad_norm": 17.102171291997916, "learning_rate": 1.897080925596187e-07, "logits": -1.8294084072113037, "logps": -111.60057830810547, "loss": 0.2068, "objective": 0.1938161551952362, "ranking_simple": 0.8583333492279053, "regularize": 0.1938161551952362, "step": 625, "wo_beta": 2.0019137859344482 }, { "dpo_loss": 0.25330111384391785, "epoch": 1.7855455833726972, "grad_norm": 14.422081659451388, "learning_rate": 1.6671655089439186e-07, "logits": -1.8686004877090454, "logps": -112.4965591430664, "loss": 0.2312, "objective": 0.25330111384391785, "ranking_simple": 0.7791666388511658, "regularize": 0.25330111384391785, "step": 630, "wo_beta": 3.286885976791382 }, { "dpo_loss": 0.20808285474777222, "epoch": 1.7997165800661312, "grad_norm": 16.725368606459067, "learning_rate": 1.4516174712890406e-07, "logits": -1.9575639963150024, "logps": -114.01631164550781, "loss": 0.2184, "objective": 0.20808285474777222, "ranking_simple": 0.8291666507720947, "regularize": 0.20808285474777222, "step": 635, "wo_beta": 2.427999258041382 }, { "dpo_loss": 0.20768284797668457, "epoch": 1.8138875767595655, "grad_norm": 12.030792650400683, "learning_rate": 1.2505695380554712e-07, "logits": -1.8871350288391113, "logps": -113.53579711914062, "loss": 0.2015, "objective": 0.20768284797668457, "ranking_simple": 0.8291666507720947, "regularize": 0.20768284797668457, "step": 640, "wo_beta": 2.9419753551483154 }, { "dpo_loss": 0.21022367477416992, "epoch": 1.8280585734529995, "grad_norm": 12.06072694382626, "learning_rate": 1.0641455061121519e-07, "logits": -1.9376109838485718, "logps": -114.6182632446289, "loss": 0.2087, "objective": 0.21022367477416992, "ranking_simple": 0.875, "regularize": 0.21022367477416992, "step": 645, "wo_beta": 2.246778964996338 }, { "dpo_loss": 0.21791066229343414, "epoch": 1.8422295701464337, "grad_norm": 17.74332836561221, "learning_rate": 8.924601675441207e-08, "logits": -1.9945629835128784, "logps": -115.14165496826172, "loss": 0.2252, "objective": 0.21791066229343414, "ranking_simple": 0.8791666626930237, "regularize": 0.21791066229343414, "step": 650, "wo_beta": 1.7516138553619385 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 0.7528993487358093, "eval_logits": -1.9747523069381714, "eval_logps": -117.80084228515625, "eval_loss": 0.7559728622436523, "eval_objective": 0.7528993487358093, "eval_ranking_simple": 0.5584886074066162, "eval_regularize": 0.7528993487358093, "eval_runtime": 367.648, "eval_samples_per_second": 15.749, "eval_steps_per_second": 1.314, "eval_wo_beta": 9.092611312866211, "step": 650 }, { "dpo_loss": 0.21706603467464447, "epoch": 1.8564005668398678, "grad_norm": 13.027657065435218, "learning_rate": 7.356192389683825e-08, "logits": -1.844641923904419, "logps": -110.724609375, "loss": 0.2263, "objective": 0.21706603467464447, "ranking_simple": 0.8333333134651184, "regularize": 0.21706603467464447, "step": 655, "wo_beta": 2.2099640369415283 }, { "dpo_loss": 0.19901646673679352, "epoch": 1.8705715635333018, "grad_norm": 11.442935681801226, "learning_rate": 5.937192964380556e-08, "logits": -1.9180775880813599, "logps": -115.1898193359375, "loss": 0.1992, "objective": 0.19901646673679352, "ranking_simple": 0.8708333373069763, "regularize": 0.19901646673679352, "step": 660, "wo_beta": 1.9250274896621704 }, { "dpo_loss": 0.20594698190689087, "epoch": 1.8847425602267358, "grad_norm": 14.384890803281685, "learning_rate": 4.668477159748858e-08, "logits": -1.8975155353546143, "logps": -112.05779266357422, "loss": 0.22, "objective": 0.20594698190689087, "ranking_simple": 0.8458333611488342, "regularize": 0.20594698190689087, "step": 665, "wo_beta": 1.7807292938232422 }, { "dpo_loss": 0.23021073639392853, "epoch": 1.89891355692017, "grad_norm": 11.678760414899436, "learning_rate": 3.5508261976678894e-08, "logits": -1.8350870609283447, "logps": -111.71804809570312, "loss": 0.2054, "objective": 0.23021073639392853, "ranking_simple": 0.8208333253860474, "regularize": 0.23021073639392853, "step": 670, "wo_beta": 2.838752269744873 }, { "dpo_loss": 0.19814661145210266, "epoch": 1.9130845536136043, "grad_norm": 14.42884511344777, "learning_rate": 2.5849282806345855e-08, "logits": -1.8832274675369263, "logps": -112.52620697021484, "loss": 0.2208, "objective": 0.19814661145210266, "ranking_simple": 0.8791666626930237, "regularize": 0.19814661145210266, "step": 675, "wo_beta": 1.6049467325210571 }, { "dpo_loss": 0.2337852120399475, "epoch": 1.9272555503070383, "grad_norm": 12.72035969054902, "learning_rate": 1.771378167997745e-08, "logits": -1.9469962120056152, "logps": -114.031494140625, "loss": 0.2132, "objective": 0.2337852120399475, "ranking_simple": 0.8541666865348816, "regularize": 0.2337852120399475, "step": 680, "wo_beta": 2.8053526878356934 }, { "dpo_loss": 0.18736791610717773, "epoch": 1.9414265470004723, "grad_norm": 11.578463444163043, "learning_rate": 1.1106768097300657e-08, "logits": -1.8774739503860474, "logps": -113.84810638427734, "loss": 0.1807, "objective": 0.18736791610717773, "ranking_simple": 0.8458333611488342, "regularize": 0.18736791610717773, "step": 685, "wo_beta": 2.1088075637817383 }, { "dpo_loss": 0.19145923852920532, "epoch": 1.9555975436939064, "grad_norm": 11.501744444827747, "learning_rate": 6.032310379642803e-09, "logits": -1.9342644214630127, "logps": -112.51237487792969, "loss": 0.204, "objective": 0.19145923852920532, "ranking_simple": 0.8583333492279053, "regularize": 0.19145923852920532, "step": 690, "wo_beta": 1.7116554975509644 }, { "dpo_loss": 0.2177831530570984, "epoch": 1.9697685403873406, "grad_norm": 13.662395330088454, "learning_rate": 2.4935331648298644e-09, "logits": -1.7998664379119873, "logps": -113.02543640136719, "loss": 0.2048, "objective": 0.2177831530570984, "ranking_simple": 0.8166666626930237, "regularize": 0.2177831530570984, "step": 695, "wo_beta": 2.7438058853149414 }, { "dpo_loss": 0.19972722232341766, "epoch": 1.9839395370807746, "grad_norm": 13.232400433126339, "learning_rate": 4.926154831655372e-10, "logits": -1.788934350013733, "logps": -115.31278228759766, "loss": 0.199, "objective": 0.19972722232341766, "ranking_simple": 0.8583333492279053, "regularize": 0.19972722232341766, "step": 700, "wo_beta": 2.272697687149048 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.7537301182746887, "eval_logits": -1.9777543544769287, "eval_logps": -117.9869155883789, "eval_loss": 0.7566089034080505, "eval_objective": 0.7537301182746887, "eval_ranking_simple": 0.5595238208770752, "eval_regularize": 0.7537301182746887, "eval_runtime": 367.5678, "eval_samples_per_second": 15.752, "eval_steps_per_second": 1.314, "eval_wo_beta": 9.104193687438965, "step": 700 }, { "epoch": 1.995276334435522, "step": 704, "total_flos": 0.0, "train_loss": 0.3641128831289031, "train_runtime": 23012.8835, "train_samples_per_second": 4.415, "train_steps_per_second": 0.031 } ], "logging_steps": 5, "max_steps": 704, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }