|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.995276334435522, |
|
"eval_steps": 50, |
|
"global_step": 704, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 18.397043918777676, |
|
"learning_rate": 7.042253521126761e-08, |
|
"logits": -1.2867579460144043, |
|
"logps": -84.34933471679688, |
|
"loss": 0.6931, |
|
"objective": 0.6931471824645996, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6931471824645996, |
|
"step": 1, |
|
"wo_beta": 5.2708282470703125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6924155354499817, |
|
"epoch": 0.014170996693434105, |
|
"grad_norm": 18.351841049696436, |
|
"learning_rate": 3.521126760563381e-07, |
|
"logits": -1.430939793586731, |
|
"logps": -83.6253890991211, |
|
"loss": 0.6927, |
|
"objective": 0.6924155354499817, |
|
"ranking_simple": 0.4895833432674408, |
|
"regularize": 0.6924155354499817, |
|
"step": 5, |
|
"wo_beta": 7.649607181549072 |
|
}, |
|
{ |
|
"dpo_loss": 0.6938675045967102, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 19.803358997748763, |
|
"learning_rate": 7.042253521126762e-07, |
|
"logits": -1.4047328233718872, |
|
"logps": -83.41845703125, |
|
"loss": 0.6929, |
|
"objective": 0.6938675045967102, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6938675045967102, |
|
"step": 10, |
|
"wo_beta": 5.940184593200684 |
|
}, |
|
{ |
|
"dpo_loss": 0.6885419487953186, |
|
"epoch": 0.042512990080302314, |
|
"grad_norm": 19.861932182637975, |
|
"learning_rate": 1.0563380281690142e-06, |
|
"logits": -1.5388954877853394, |
|
"logps": -82.4270248413086, |
|
"loss": 0.689, |
|
"objective": 0.6885419487953186, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.6885419487953186, |
|
"step": 15, |
|
"wo_beta": 6.731040000915527 |
|
}, |
|
{ |
|
"dpo_loss": 0.6827310919761658, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 16.87924772685924, |
|
"learning_rate": 1.4084507042253523e-06, |
|
"logits": -1.3925108909606934, |
|
"logps": -80.71045684814453, |
|
"loss": 0.684, |
|
"objective": 0.6827310919761658, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.6827310919761658, |
|
"step": 20, |
|
"wo_beta": 8.312360763549805 |
|
}, |
|
{ |
|
"dpo_loss": 0.6760162711143494, |
|
"epoch": 0.07085498346717052, |
|
"grad_norm": 17.86836459306927, |
|
"learning_rate": 1.7605633802816902e-06, |
|
"logits": -1.4580414295196533, |
|
"logps": -80.7186508178711, |
|
"loss": 0.6703, |
|
"objective": 0.6760162711143494, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6760162711143494, |
|
"step": 25, |
|
"wo_beta": 6.106756210327148 |
|
}, |
|
{ |
|
"dpo_loss": 0.6718389391899109, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 19.355249838601942, |
|
"learning_rate": 2.1126760563380285e-06, |
|
"logits": -1.6006718873977661, |
|
"logps": -84.822021484375, |
|
"loss": 0.6727, |
|
"objective": 0.6718389391899109, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.6718389391899109, |
|
"step": 30, |
|
"wo_beta": 8.509283065795898 |
|
}, |
|
{ |
|
"dpo_loss": 0.6751564145088196, |
|
"epoch": 0.09919697685403873, |
|
"grad_norm": 19.23258032194357, |
|
"learning_rate": 2.4647887323943666e-06, |
|
"logits": -1.6175826787948608, |
|
"logps": -85.66646575927734, |
|
"loss": 0.6624, |
|
"objective": 0.6751564145088196, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.6751564145088196, |
|
"step": 35, |
|
"wo_beta": 6.50329065322876 |
|
}, |
|
{ |
|
"dpo_loss": 0.6747376322746277, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 20.018488756494, |
|
"learning_rate": 2.8169014084507046e-06, |
|
"logits": -1.621884822845459, |
|
"logps": -84.25798797607422, |
|
"loss": 0.6598, |
|
"objective": 0.6747376322746277, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.6747376322746277, |
|
"step": 40, |
|
"wo_beta": 6.886596202850342 |
|
}, |
|
{ |
|
"dpo_loss": 0.6456737518310547, |
|
"epoch": 0.12753897024090693, |
|
"grad_norm": 14.933918801623976, |
|
"learning_rate": 3.1690140845070427e-06, |
|
"logits": -1.6078789234161377, |
|
"logps": -81.69025421142578, |
|
"loss": 0.6563, |
|
"objective": 0.6456737518310547, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.6456737518310547, |
|
"step": 45, |
|
"wo_beta": 6.529275417327881 |
|
}, |
|
{ |
|
"dpo_loss": 0.6442943811416626, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 16.57958550585861, |
|
"learning_rate": 3.5211267605633804e-06, |
|
"logits": -1.6018227338790894, |
|
"logps": -82.79893493652344, |
|
"loss": 0.6316, |
|
"objective": 0.6442943811416626, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.6442943811416626, |
|
"step": 50, |
|
"wo_beta": 7.091952323913574 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.6824604868888855, |
|
"eval_logits": -1.587925672531128, |
|
"eval_logps": -90.32820892333984, |
|
"eval_loss": 0.6806595921516418, |
|
"eval_objective": 0.6824604868888855, |
|
"eval_ranking_simple": 0.5341615080833435, |
|
"eval_regularize": 0.6824604868888855, |
|
"eval_runtime": 367.8191, |
|
"eval_samples_per_second": 15.741, |
|
"eval_steps_per_second": 1.313, |
|
"eval_wo_beta": 7.8618669509887695, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6267839074134827, |
|
"epoch": 0.15588096362777515, |
|
"grad_norm": 17.05808296065035, |
|
"learning_rate": 3.873239436619718e-06, |
|
"logits": -1.5757447481155396, |
|
"logps": -84.72220611572266, |
|
"loss": 0.6258, |
|
"objective": 0.6267839074134827, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.6267839074134827, |
|
"step": 55, |
|
"wo_beta": 6.998147487640381 |
|
}, |
|
{ |
|
"dpo_loss": 0.6304137110710144, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 17.359597890876504, |
|
"learning_rate": 4.225352112676057e-06, |
|
"logits": -1.5341871976852417, |
|
"logps": -87.07388305664062, |
|
"loss": 0.6204, |
|
"objective": 0.6304137110710144, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.6304137110710144, |
|
"step": 60, |
|
"wo_beta": 5.637550354003906 |
|
}, |
|
{ |
|
"dpo_loss": 0.5928328633308411, |
|
"epoch": 0.18422295701464336, |
|
"grad_norm": 18.81033297245387, |
|
"learning_rate": 4.577464788732395e-06, |
|
"logits": -1.681604266166687, |
|
"logps": -88.5802001953125, |
|
"loss": 0.6021, |
|
"objective": 0.5928328633308411, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.5928328633308411, |
|
"step": 65, |
|
"wo_beta": 5.803915023803711 |
|
}, |
|
{ |
|
"dpo_loss": 0.5882770419120789, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 18.134916504512542, |
|
"learning_rate": 4.929577464788733e-06, |
|
"logits": -1.7763893604278564, |
|
"logps": -92.76559448242188, |
|
"loss": 0.5898, |
|
"objective": 0.5882770419120789, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.5882770419120789, |
|
"step": 70, |
|
"wo_beta": 6.3991007804870605 |
|
}, |
|
{ |
|
"dpo_loss": 0.5995556712150574, |
|
"epoch": 0.21256495040151158, |
|
"grad_norm": 21.061539129772793, |
|
"learning_rate": 4.999507384516835e-06, |
|
"logits": -1.727323055267334, |
|
"logps": -92.68614196777344, |
|
"loss": 0.5904, |
|
"objective": 0.5995556712150574, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.5995556712150574, |
|
"step": 75, |
|
"wo_beta": 6.095489978790283 |
|
}, |
|
{ |
|
"dpo_loss": 0.5429711937904358, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 17.500250856825723, |
|
"learning_rate": 4.997506466835171e-06, |
|
"logits": -1.7568250894546509, |
|
"logps": -89.59777069091797, |
|
"loss": 0.5696, |
|
"objective": 0.5429711937904358, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.5429711937904358, |
|
"step": 80, |
|
"wo_beta": 4.702788829803467 |
|
}, |
|
{ |
|
"dpo_loss": 0.5587875247001648, |
|
"epoch": 0.2409069437883798, |
|
"grad_norm": 14.610779362874785, |
|
"learning_rate": 4.9939676896203576e-06, |
|
"logits": -1.7483054399490356, |
|
"logps": -89.5750961303711, |
|
"loss": 0.5788, |
|
"objective": 0.5587875247001648, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.5587875247001648, |
|
"step": 85, |
|
"wo_beta": 5.067751407623291 |
|
}, |
|
{ |
|
"dpo_loss": 0.4884372353553772, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 16.59773892580396, |
|
"learning_rate": 4.9888932319026994e-06, |
|
"logits": -1.829767107963562, |
|
"logps": -88.12342071533203, |
|
"loss": 0.5584, |
|
"objective": 0.4884372353553772, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.4884372353553772, |
|
"step": 90, |
|
"wo_beta": 4.8033833503723145 |
|
}, |
|
{ |
|
"dpo_loss": 0.5380887985229492, |
|
"epoch": 0.269248937175248, |
|
"grad_norm": 16.942898511925186, |
|
"learning_rate": 4.982286218320023e-06, |
|
"logits": -1.8157219886779785, |
|
"logps": -89.4225082397461, |
|
"loss": 0.548, |
|
"objective": 0.5380887985229492, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.5380887985229492, |
|
"step": 95, |
|
"wo_beta": 6.339680194854736 |
|
}, |
|
{ |
|
"dpo_loss": 0.5714857578277588, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 16.553483764105664, |
|
"learning_rate": 4.974150717193654e-06, |
|
"logits": -1.7562583684921265, |
|
"logps": -88.6811294555664, |
|
"loss": 0.5922, |
|
"objective": 0.5714857578277588, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.5714857578277588, |
|
"step": 100, |
|
"wo_beta": 5.948981285095215 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6818667054176331, |
|
"eval_logits": -1.7963634729385376, |
|
"eval_logps": -95.815185546875, |
|
"eval_loss": 0.6793084740638733, |
|
"eval_objective": 0.6818667054176331, |
|
"eval_ranking_simple": 0.5486542582511902, |
|
"eval_regularize": 0.6818667054176331, |
|
"eval_runtime": 368.0636, |
|
"eval_samples_per_second": 15.731, |
|
"eval_steps_per_second": 1.312, |
|
"eval_wo_beta": 7.707693576812744, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.5925873517990112, |
|
"epoch": 0.2975909305621162, |
|
"grad_norm": 16.87738475184783, |
|
"learning_rate": 4.964491738023321e-06, |
|
"logits": -1.8753679990768433, |
|
"logps": -90.80838775634766, |
|
"loss": 0.5707, |
|
"objective": 0.5925873517990112, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.5925873517990112, |
|
"step": 105, |
|
"wo_beta": 5.3273797035217285 |
|
}, |
|
{ |
|
"dpo_loss": 0.5388583540916443, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 15.466132286249575, |
|
"learning_rate": 4.953315228402512e-06, |
|
"logits": -1.7556500434875488, |
|
"logps": -88.66165924072266, |
|
"loss": 0.5374, |
|
"objective": 0.5388583540916443, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.5388583540916443, |
|
"step": 110, |
|
"wo_beta": 5.21142053604126 |
|
}, |
|
{ |
|
"dpo_loss": 0.5987796187400818, |
|
"epoch": 0.32593292394898443, |
|
"grad_norm": 16.57925719769902, |
|
"learning_rate": 4.9406280703561944e-06, |
|
"logits": -1.6699596643447876, |
|
"logps": -87.46602630615234, |
|
"loss": 0.5713, |
|
"objective": 0.5987796187400818, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.5987796187400818, |
|
"step": 115, |
|
"wo_beta": 5.849599838256836 |
|
}, |
|
{ |
|
"dpo_loss": 0.5472472906112671, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 16.2014631256649, |
|
"learning_rate": 4.926438076103162e-06, |
|
"logits": -1.6490483283996582, |
|
"logps": -87.74899291992188, |
|
"loss": 0.5603, |
|
"objective": 0.5472472906112671, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.5472472906112671, |
|
"step": 120, |
|
"wo_beta": 6.010056495666504 |
|
}, |
|
{ |
|
"dpo_loss": 0.5509154796600342, |
|
"epoch": 0.35427491733585265, |
|
"grad_norm": 14.950351510543216, |
|
"learning_rate": 4.910753983245589e-06, |
|
"logits": -1.7191225290298462, |
|
"logps": -88.99519348144531, |
|
"loss": 0.5397, |
|
"objective": 0.5509154796600342, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.5509154796600342, |
|
"step": 125, |
|
"wo_beta": 5.906139850616455 |
|
}, |
|
{ |
|
"dpo_loss": 0.5123094916343689, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 13.21141785871735, |
|
"learning_rate": 4.893585449388786e-06, |
|
"logits": -1.695233702659607, |
|
"logps": -85.64789581298828, |
|
"loss": 0.5398, |
|
"objective": 0.5123094916343689, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.5123094916343689, |
|
"step": 130, |
|
"wo_beta": 4.518141269683838 |
|
}, |
|
{ |
|
"dpo_loss": 0.5679463744163513, |
|
"epoch": 0.3826169107227208, |
|
"grad_norm": 16.522848387628404, |
|
"learning_rate": 4.8749430461944536e-06, |
|
"logits": -1.6519335508346558, |
|
"logps": -88.2005615234375, |
|
"loss": 0.5526, |
|
"objective": 0.5679463744163513, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.5679463744163513, |
|
"step": 135, |
|
"wo_beta": 5.8905439376831055 |
|
}, |
|
{ |
|
"dpo_loss": 0.5832223296165466, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 18.436163706658995, |
|
"learning_rate": 4.854838252871097e-06, |
|
"logits": -1.5592352151870728, |
|
"logps": -90.67977142333984, |
|
"loss": 0.5534, |
|
"objective": 0.5832223296165466, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.5832223296165466, |
|
"step": 140, |
|
"wo_beta": 5.7765278816223145 |
|
}, |
|
{ |
|
"dpo_loss": 0.510637640953064, |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 15.85144306565166, |
|
"learning_rate": 4.833283449105609e-06, |
|
"logits": -1.5676114559173584, |
|
"logps": -90.87916564941406, |
|
"loss": 0.527, |
|
"objective": 0.510637640953064, |
|
"ranking_simple": 0.699999988079071, |
|
"regularize": 0.510637640953064, |
|
"step": 145, |
|
"wo_beta": 4.243090629577637 |
|
}, |
|
{ |
|
"dpo_loss": 0.5151103138923645, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 14.430458288203226, |
|
"learning_rate": 4.810291907440382e-06, |
|
"logits": -1.5757466554641724, |
|
"logps": -91.76609802246094, |
|
"loss": 0.5002, |
|
"objective": 0.5151103138923645, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.5151103138923645, |
|
"step": 150, |
|
"wo_beta": 6.819777011871338 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.6748862862586975, |
|
"eval_logits": -1.5950791835784912, |
|
"eval_logps": -96.20237731933594, |
|
"eval_loss": 0.6815473437309265, |
|
"eval_objective": 0.6748862862586975, |
|
"eval_ranking_simple": 0.5496894121170044, |
|
"eval_regularize": 0.6748862862586975, |
|
"eval_runtime": 367.9317, |
|
"eval_samples_per_second": 15.737, |
|
"eval_steps_per_second": 1.313, |
|
"eval_wo_beta": 7.438036918640137, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.49763983488082886, |
|
"epoch": 0.43930089749645723, |
|
"grad_norm": 17.361740656063542, |
|
"learning_rate": 4.785877785100633e-06, |
|
"logits": -1.6784894466400146, |
|
"logps": -92.56346130371094, |
|
"loss": 0.5198, |
|
"objective": 0.49763983488082886, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.49763983488082886, |
|
"step": 155, |
|
"wo_beta": 5.170775890350342 |
|
}, |
|
{ |
|
"dpo_loss": 0.5579937100410461, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 15.652756053936857, |
|
"learning_rate": 4.7600561152769795e-06, |
|
"logits": -1.5714988708496094, |
|
"logps": -92.15592956542969, |
|
"loss": 0.5328, |
|
"objective": 0.5579937100410461, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.5579937100410461, |
|
"step": 160, |
|
"wo_beta": 5.608686447143555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5186927318572998, |
|
"epoch": 0.46764289088332545, |
|
"grad_norm": 14.630193025898393, |
|
"learning_rate": 4.732842797868631e-06, |
|
"logits": -1.6691575050354004, |
|
"logps": -91.15839385986328, |
|
"loss": 0.5189, |
|
"objective": 0.5186927318572998, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.5186927318572998, |
|
"step": 165, |
|
"wo_beta": 5.489006996154785 |
|
}, |
|
{ |
|
"dpo_loss": 0.4966394603252411, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 13.19317387340531, |
|
"learning_rate": 4.704254589692903e-06, |
|
"logits": -1.7252763509750366, |
|
"logps": -92.86217498779297, |
|
"loss": 0.4959, |
|
"objective": 0.4966394603252411, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.4966394603252411, |
|
"step": 170, |
|
"wo_beta": 5.683476448059082 |
|
}, |
|
{ |
|
"dpo_loss": 0.4829941689968109, |
|
"epoch": 0.49598488427019366, |
|
"grad_norm": 16.695755417688215, |
|
"learning_rate": 4.6743090941670675e-06, |
|
"logits": -1.609352946281433, |
|
"logps": -95.68805694580078, |
|
"loss": 0.4892, |
|
"objective": 0.4829941689968109, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.4829941689968109, |
|
"step": 175, |
|
"wo_beta": 4.217517852783203 |
|
}, |
|
{ |
|
"dpo_loss": 0.5080674886703491, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 18.149531395287664, |
|
"learning_rate": 4.643024750468913e-06, |
|
"logits": -1.6799732446670532, |
|
"logps": -99.82926177978516, |
|
"loss": 0.468, |
|
"objective": 0.5080674886703491, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.5080674886703491, |
|
"step": 180, |
|
"wo_beta": 5.674283504486084 |
|
}, |
|
{ |
|
"dpo_loss": 0.4599061608314514, |
|
"epoch": 0.5243268776570619, |
|
"grad_norm": 16.418473362321556, |
|
"learning_rate": 4.610420822182671e-06, |
|
"logits": -1.6607011556625366, |
|
"logps": -99.13754272460938, |
|
"loss": 0.4631, |
|
"objective": 0.4599061608314514, |
|
"ranking_simple": 0.699999988079071, |
|
"regularize": 0.4599061608314514, |
|
"step": 185, |
|
"wo_beta": 4.34508752822876 |
|
}, |
|
{ |
|
"dpo_loss": 0.4760186970233917, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 16.5909593130195, |
|
"learning_rate": 4.576517385437315e-06, |
|
"logits": -1.7211116552352905, |
|
"logps": -96.94883728027344, |
|
"loss": 0.4859, |
|
"objective": 0.4760186970233917, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.4760186970233917, |
|
"step": 190, |
|
"wo_beta": 4.704507827758789 |
|
}, |
|
{ |
|
"dpo_loss": 0.4567195773124695, |
|
"epoch": 0.5526688710439301, |
|
"grad_norm": 17.67231005570377, |
|
"learning_rate": 4.541335316544514e-06, |
|
"logits": -1.7492233514785767, |
|
"logps": -95.30302429199219, |
|
"loss": 0.5112, |
|
"objective": 0.4567195773124695, |
|
"ranking_simple": 0.7208333611488342, |
|
"regularize": 0.4567195773124695, |
|
"step": 195, |
|
"wo_beta": 4.662184238433838 |
|
}, |
|
{ |
|
"dpo_loss": 0.4767034947872162, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 14.561592123615519, |
|
"learning_rate": 4.5048962791438885e-06, |
|
"logits": -1.7373807430267334, |
|
"logps": -95.8047866821289, |
|
"loss": 0.4735, |
|
"objective": 0.4767034947872162, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.4767034947872162, |
|
"step": 200, |
|
"wo_beta": 5.47299861907959 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.691116213798523, |
|
"eval_logits": -1.7563871145248413, |
|
"eval_logps": -98.91757202148438, |
|
"eval_loss": 0.6950607299804688, |
|
"eval_objective": 0.691116213798523, |
|
"eval_ranking_simple": 0.5569358468055725, |
|
"eval_regularize": 0.691116213798523, |
|
"eval_runtime": 367.7852, |
|
"eval_samples_per_second": 15.743, |
|
"eval_steps_per_second": 1.313, |
|
"eval_wo_beta": 7.524092197418213, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.49286583065986633, |
|
"epoch": 0.5810108644307983, |
|
"grad_norm": 15.609742263801415, |
|
"learning_rate": 4.467222710863444e-06, |
|
"logits": -1.6046305894851685, |
|
"logps": -95.40666961669922, |
|
"loss": 0.4728, |
|
"objective": 0.49286583065986633, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.49286583065986633, |
|
"step": 205, |
|
"wo_beta": 5.8981099128723145 |
|
}, |
|
{ |
|
"dpo_loss": 0.42898985743522644, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 16.532108905835056, |
|
"learning_rate": 4.428337809503425e-06, |
|
"logits": -1.6189254522323608, |
|
"logps": -94.09720611572266, |
|
"loss": 0.4362, |
|
"objective": 0.42898985743522644, |
|
"ranking_simple": 0.6916666626930237, |
|
"regularize": 0.42898985743522644, |
|
"step": 210, |
|
"wo_beta": 3.8531105518341064 |
|
}, |
|
{ |
|
"dpo_loss": 0.44433167576789856, |
|
"epoch": 0.6093528578176665, |
|
"grad_norm": 18.51464695126941, |
|
"learning_rate": 4.388265518752085e-06, |
|
"logits": -1.7230619192123413, |
|
"logps": -92.92915344238281, |
|
"loss": 0.4621, |
|
"objective": 0.44433167576789856, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.44433167576789856, |
|
"step": 215, |
|
"wo_beta": 5.033292770385742 |
|
}, |
|
{ |
|
"dpo_loss": 0.44531288743019104, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 14.674842226422456, |
|
"learning_rate": 4.347030513442168e-06, |
|
"logits": -1.7578327655792236, |
|
"logps": -91.2856674194336, |
|
"loss": 0.4332, |
|
"objective": 0.44531288743019104, |
|
"ranking_simple": 0.6916666626930237, |
|
"regularize": 0.44531288743019104, |
|
"step": 220, |
|
"wo_beta": 5.138680934906006 |
|
}, |
|
{ |
|
"dpo_loss": 0.47229692339897156, |
|
"epoch": 0.6376948512045347, |
|
"grad_norm": 13.123177972096014, |
|
"learning_rate": 4.304658184357186e-06, |
|
"logits": -1.8197827339172363, |
|
"logps": -92.20543670654297, |
|
"loss": 0.4692, |
|
"objective": 0.47229692339897156, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.47229692339897156, |
|
"step": 225, |
|
"wo_beta": 5.442239284515381 |
|
}, |
|
{ |
|
"dpo_loss": 0.4127563536167145, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 13.818617424678937, |
|
"learning_rate": 4.261174622596835e-06, |
|
"logits": -1.6802526712417603, |
|
"logps": -90.7798843383789, |
|
"loss": 0.4559, |
|
"objective": 0.4127563536167145, |
|
"ranking_simple": 0.7333333492279053, |
|
"regularize": 0.4127563536167145, |
|
"step": 230, |
|
"wo_beta": 3.521521806716919 |
|
}, |
|
{ |
|
"dpo_loss": 0.45779237151145935, |
|
"epoch": 0.6660368445914029, |
|
"grad_norm": 15.036299488311245, |
|
"learning_rate": 4.216606603511202e-06, |
|
"logits": -1.6339088678359985, |
|
"logps": -90.51241302490234, |
|
"loss": 0.4578, |
|
"objective": 0.45779237151145935, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.45779237151145935, |
|
"step": 235, |
|
"wo_beta": 4.571218967437744 |
|
}, |
|
{ |
|
"dpo_loss": 0.4738180935382843, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 13.614428839916116, |
|
"learning_rate": 4.170981570213621e-06, |
|
"logits": -1.8103351593017578, |
|
"logps": -92.29689025878906, |
|
"loss": 0.4481, |
|
"objective": 0.4738180935382843, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.4738180935382843, |
|
"step": 240, |
|
"wo_beta": 6.207835674285889 |
|
}, |
|
{ |
|
"dpo_loss": 0.5174158215522766, |
|
"epoch": 0.6943788379782712, |
|
"grad_norm": 16.01438129051185, |
|
"learning_rate": 4.124327616682362e-06, |
|
"logits": -1.7986476421356201, |
|
"logps": -91.10508728027344, |
|
"loss": 0.4642, |
|
"objective": 0.5174158215522766, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.5174158215522766, |
|
"step": 245, |
|
"wo_beta": 4.900957107543945 |
|
}, |
|
{ |
|
"dpo_loss": 0.44342610239982605, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 13.738305838918832, |
|
"learning_rate": 4.076673470461538e-06, |
|
"logits": -1.5667024850845337, |
|
"logps": -88.29222106933594, |
|
"loss": 0.4626, |
|
"objective": 0.44342610239982605, |
|
"ranking_simple": 0.7041666507720947, |
|
"regularize": 0.44342610239982605, |
|
"step": 250, |
|
"wo_beta": 4.291601181030273 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.694493293762207, |
|
"eval_logits": -1.7985897064208984, |
|
"eval_logps": -93.47747039794922, |
|
"eval_loss": 0.6976169347763062, |
|
"eval_objective": 0.694493293762207, |
|
"eval_ranking_simple": 0.5579710006713867, |
|
"eval_regularize": 0.694493293762207, |
|
"eval_runtime": 367.886, |
|
"eval_samples_per_second": 15.739, |
|
"eval_steps_per_second": 1.313, |
|
"eval_wo_beta": 7.902660369873047, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.43279239535331726, |
|
"epoch": 0.7227208313651393, |
|
"grad_norm": 12.486423165060444, |
|
"learning_rate": 4.028048474971889e-06, |
|
"logits": -1.860019564628601, |
|
"logps": -87.4556884765625, |
|
"loss": 0.4459, |
|
"objective": 0.43279239535331726, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.43279239535331726, |
|
"step": 255, |
|
"wo_beta": 4.541534900665283 |
|
}, |
|
{ |
|
"dpo_loss": 0.41317591071128845, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 13.93218309283749, |
|
"learning_rate": 3.978482571442339e-06, |
|
"logits": -1.8741662502288818, |
|
"logps": -91.13224029541016, |
|
"loss": 0.4561, |
|
"objective": 0.41317591071128845, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.41317591071128845, |
|
"step": 260, |
|
"wo_beta": 5.1168012619018555 |
|
}, |
|
{ |
|
"dpo_loss": 0.39290040731430054, |
|
"epoch": 0.7510628247520076, |
|
"grad_norm": 13.220960314976885, |
|
"learning_rate": 3.928006280473445e-06, |
|
"logits": -1.8604073524475098, |
|
"logps": -92.02545928955078, |
|
"loss": 0.419, |
|
"objective": 0.39290040731430054, |
|
"ranking_simple": 0.7708333134651184, |
|
"regularize": 0.39290040731430054, |
|
"step": 265, |
|
"wo_beta": 4.408606052398682 |
|
}, |
|
{ |
|
"dpo_loss": 0.47771012783050537, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 14.906696639905949, |
|
"learning_rate": 3.876650683244093e-06, |
|
"logits": -1.997718095779419, |
|
"logps": -94.74840545654297, |
|
"loss": 0.4365, |
|
"objective": 0.47771012783050537, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.47771012783050537, |
|
"step": 270, |
|
"wo_beta": 4.766172409057617 |
|
}, |
|
{ |
|
"dpo_loss": 0.4372769594192505, |
|
"epoch": 0.7794048181388757, |
|
"grad_norm": 14.602546448634548, |
|
"learning_rate": 3.8244474023730155e-06, |
|
"logits": -1.8585816621780396, |
|
"logps": -94.3137435913086, |
|
"loss": 0.4293, |
|
"objective": 0.4372769594192505, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.4372769594192505, |
|
"step": 275, |
|
"wo_beta": 5.203604698181152 |
|
}, |
|
{ |
|
"dpo_loss": 0.3766806721687317, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 15.405745134115882, |
|
"learning_rate": 3.771428582446908e-06, |
|
"logits": -1.9468127489089966, |
|
"logps": -95.09542846679688, |
|
"loss": 0.4039, |
|
"objective": 0.3766806721687317, |
|
"ranking_simple": 0.7208333611488342, |
|
"regularize": 0.3766806721687317, |
|
"step": 280, |
|
"wo_beta": 4.175257205963135 |
|
}, |
|
{ |
|
"dpo_loss": 0.41625434160232544, |
|
"epoch": 0.807746811525744, |
|
"grad_norm": 15.243116510039572, |
|
"learning_rate": 3.7176268702271468e-06, |
|
"logits": -1.9459937810897827, |
|
"logps": -95.90043640136719, |
|
"loss": 0.407, |
|
"objective": 0.41625434160232544, |
|
"ranking_simple": 0.699999988079071, |
|
"regularize": 0.41625434160232544, |
|
"step": 285, |
|
"wo_beta": 5.091909408569336 |
|
}, |
|
{ |
|
"dpo_loss": 0.3713260293006897, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 16.703078330526107, |
|
"learning_rate": 3.6630753945472854e-06, |
|
"logits": -1.963159203529358, |
|
"logps": -95.55049133300781, |
|
"loss": 0.3981, |
|
"objective": 0.3713260293006897, |
|
"ranking_simple": 0.7791666388511658, |
|
"regularize": 0.3713260293006897, |
|
"step": 290, |
|
"wo_beta": 3.84537935256958 |
|
}, |
|
{ |
|
"dpo_loss": 0.4009644687175751, |
|
"epoch": 0.8360888049126122, |
|
"grad_norm": 13.199801209484807, |
|
"learning_rate": 3.6078077459137097e-06, |
|
"logits": -1.99600350856781, |
|
"logps": -98.70610809326172, |
|
"loss": 0.4243, |
|
"objective": 0.4009644687175751, |
|
"ranking_simple": 0.7166666388511658, |
|
"regularize": 0.4009644687175751, |
|
"step": 295, |
|
"wo_beta": 5.210625171661377 |
|
}, |
|
{ |
|
"dpo_loss": 0.42688027024269104, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 12.622555129903681, |
|
"learning_rate": 3.5518579558220144e-06, |
|
"logits": -1.9770207405090332, |
|
"logps": -95.8087387084961, |
|
"loss": 0.4214, |
|
"objective": 0.42688027024269104, |
|
"ranking_simple": 0.7458333373069763, |
|
"regularize": 0.42688027024269104, |
|
"step": 300, |
|
"wo_beta": 3.2817904949188232 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.6865138411521912, |
|
"eval_logits": -2.0138111114501953, |
|
"eval_logps": -104.43373107910156, |
|
"eval_loss": 0.693064272403717, |
|
"eval_objective": 0.6865138411521912, |
|
"eval_ranking_simple": 0.5615941882133484, |
|
"eval_regularize": 0.6865138411521912, |
|
"eval_runtime": 367.6512, |
|
"eval_samples_per_second": 15.749, |
|
"eval_steps_per_second": 1.314, |
|
"eval_wo_beta": 7.581406593322754, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.44818738102912903, |
|
"epoch": 0.8644307982994804, |
|
"grad_norm": 13.23517832354566, |
|
"learning_rate": 3.495260475801841e-06, |
|
"logits": -1.9494545459747314, |
|
"logps": -97.299560546875, |
|
"loss": 0.3903, |
|
"objective": 0.44818738102912903, |
|
"ranking_simple": 0.7041666507720947, |
|
"regularize": 0.44818738102912903, |
|
"step": 305, |
|
"wo_beta": 4.3523712158203125 |
|
}, |
|
{ |
|
"dpo_loss": 0.34828221797943115, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 14.41832502286332, |
|
"learning_rate": 3.4380501562030704e-06, |
|
"logits": -1.9428808689117432, |
|
"logps": -98.20413970947266, |
|
"loss": 0.3807, |
|
"objective": 0.34828221797943115, |
|
"ranking_simple": 0.7333333492279053, |
|
"regularize": 0.34828221797943115, |
|
"step": 310, |
|
"wo_beta": 3.6427719593048096 |
|
}, |
|
{ |
|
"dpo_loss": 0.3642140030860901, |
|
"epoch": 0.8927727916863486, |
|
"grad_norm": 12.300830909289896, |
|
"learning_rate": 3.3802622247364446e-06, |
|
"logits": -1.9570696353912354, |
|
"logps": -98.85526275634766, |
|
"loss": 0.4042, |
|
"objective": 0.3642140030860901, |
|
"ranking_simple": 0.699999988079071, |
|
"regularize": 0.3642140030860901, |
|
"step": 315, |
|
"wo_beta": 4.256704330444336 |
|
}, |
|
{ |
|
"dpo_loss": 0.399863064289093, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 15.886821504052966, |
|
"learning_rate": 3.321932264781822e-06, |
|
"logits": -1.9135253429412842, |
|
"logps": -101.23651885986328, |
|
"loss": 0.4395, |
|
"objective": 0.399863064289093, |
|
"ranking_simple": 0.7708333134651184, |
|
"regularize": 0.399863064289093, |
|
"step": 320, |
|
"wo_beta": 2.9900147914886475 |
|
}, |
|
{ |
|
"dpo_loss": 0.37714484333992004, |
|
"epoch": 0.9211147850732169, |
|
"grad_norm": 14.587168312549423, |
|
"learning_rate": 3.2630961934774265e-06, |
|
"logits": -1.9540404081344604, |
|
"logps": -100.98246002197266, |
|
"loss": 0.3745, |
|
"objective": 0.37714484333992004, |
|
"ranking_simple": 0.7250000238418579, |
|
"regularize": 0.37714484333992004, |
|
"step": 325, |
|
"wo_beta": 4.075650691986084 |
|
}, |
|
{ |
|
"dpo_loss": 0.35718733072280884, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 15.295489026198931, |
|
"learning_rate": 3.203790239603583e-06, |
|
"logits": -1.7621917724609375, |
|
"logps": -101.14175415039062, |
|
"loss": 0.4011, |
|
"objective": 0.35718733072280884, |
|
"ranking_simple": 0.737500011920929, |
|
"regularize": 0.35718733072280884, |
|
"step": 330, |
|
"wo_beta": 3.9375758171081543 |
|
}, |
|
{ |
|
"dpo_loss": 0.3431912660598755, |
|
"epoch": 0.949456778460085, |
|
"grad_norm": 13.496480338032299, |
|
"learning_rate": 3.1440509212745584e-06, |
|
"logits": -1.7355188131332397, |
|
"logps": -100.78395080566406, |
|
"loss": 0.3733, |
|
"objective": 0.3431912660598755, |
|
"ranking_simple": 0.7416666746139526, |
|
"regularize": 0.3431912660598755, |
|
"step": 335, |
|
"wo_beta": 3.3042349815368652 |
|
}, |
|
{ |
|
"dpo_loss": 0.43064969778060913, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 13.67781828181165, |
|
"learning_rate": 3.0839150234522404e-06, |
|
"logits": -1.781424641609192, |
|
"logps": -99.20060729980469, |
|
"loss": 0.4067, |
|
"objective": 0.43064969778060913, |
|
"ranking_simple": 0.7083333134651184, |
|
"regularize": 0.43064969778060913, |
|
"step": 340, |
|
"wo_beta": 4.711233615875244 |
|
}, |
|
{ |
|
"dpo_loss": 0.3512551188468933, |
|
"epoch": 0.9777987718469532, |
|
"grad_norm": 13.324791495929361, |
|
"learning_rate": 3.0234195752955032e-06, |
|
"logits": -1.8463162183761597, |
|
"logps": -95.25973510742188, |
|
"loss": 0.3729, |
|
"objective": 0.3512551188468933, |
|
"ranking_simple": 0.7541666626930237, |
|
"regularize": 0.3512551188468933, |
|
"step": 345, |
|
"wo_beta": 2.9623959064483643 |
|
}, |
|
{ |
|
"dpo_loss": 0.32169008255004883, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 15.127441089438493, |
|
"learning_rate": 2.962601827359208e-06, |
|
"logits": -1.78915274143219, |
|
"logps": -97.9096450805664, |
|
"loss": 0.3652, |
|
"objective": 0.32169008255004883, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.32169008255004883, |
|
"step": 350, |
|
"wo_beta": 2.600872039794922 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.6984499096870422, |
|
"eval_logits": -1.9094278812408447, |
|
"eval_logps": -102.83055114746094, |
|
"eval_loss": 0.7074127793312073, |
|
"eval_objective": 0.6984499096870422, |
|
"eval_ranking_simple": 0.5559006333351135, |
|
"eval_regularize": 0.6984499096870422, |
|
"eval_runtime": 367.6345, |
|
"eval_samples_per_second": 15.749, |
|
"eval_steps_per_second": 1.314, |
|
"eval_wo_beta": 7.8343892097473145, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.2995939552783966, |
|
"epoch": 1.0061407652338215, |
|
"grad_norm": 9.31634152713278, |
|
"learning_rate": 2.9014992286568773e-06, |
|
"logits": -2.007425546646118, |
|
"logps": -96.57454681396484, |
|
"loss": 0.3042, |
|
"objective": 0.2995939552783966, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.2995939552783966, |
|
"step": 355, |
|
"wo_beta": 2.7125356197357178 |
|
}, |
|
{ |
|
"dpo_loss": 0.24162109196186066, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 11.90366408091981, |
|
"learning_rate": 2.840149403601166e-06, |
|
"logits": -1.9152239561080933, |
|
"logps": -101.0685043334961, |
|
"loss": 0.2301, |
|
"objective": 0.24162109196186066, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.24162109196186066, |
|
"step": 360, |
|
"wo_beta": 2.7127444744110107 |
|
}, |
|
{ |
|
"dpo_loss": 0.19792620837688446, |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 9.884538723237807, |
|
"learning_rate": 2.7785901288363253e-06, |
|
"logits": -1.9689671993255615, |
|
"logps": -107.58961486816406, |
|
"loss": 0.2082, |
|
"objective": 0.19792620837688446, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.19792620837688446, |
|
"step": 365, |
|
"wo_beta": 1.9694886207580566 |
|
}, |
|
{ |
|
"dpo_loss": 0.23545877635478973, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 16.911618406294945, |
|
"learning_rate": 2.7168593099769414e-06, |
|
"logits": -1.9311782121658325, |
|
"logps": -106.47748565673828, |
|
"loss": 0.2379, |
|
"objective": 0.23545877635478973, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.23545877635478973, |
|
"step": 370, |
|
"wo_beta": 3.021015167236328 |
|
}, |
|
{ |
|
"dpo_loss": 0.22429493069648743, |
|
"epoch": 1.0628247520075578, |
|
"grad_norm": 15.795921555252965, |
|
"learning_rate": 2.654994958267241e-06, |
|
"logits": -1.9985809326171875, |
|
"logps": -108.60234832763672, |
|
"loss": 0.2364, |
|
"objective": 0.22429493069648743, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.22429493069648743, |
|
"step": 375, |
|
"wo_beta": 1.6982978582382202 |
|
}, |
|
{ |
|
"dpo_loss": 0.20513677597045898, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 15.164372148185517, |
|
"learning_rate": 2.5930351671753707e-06, |
|
"logits": -2.0427591800689697, |
|
"logps": -109.7289047241211, |
|
"loss": 0.2153, |
|
"objective": 0.20513677597045898, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.20513677597045898, |
|
"step": 380, |
|
"wo_beta": 1.7354587316513062 |
|
}, |
|
{ |
|
"dpo_loss": 0.24022004008293152, |
|
"epoch": 1.091166745394426, |
|
"grad_norm": 13.938458603767867, |
|
"learning_rate": 2.5310180889370374e-06, |
|
"logits": -2.007528781890869, |
|
"logps": -109.47885131835938, |
|
"loss": 0.2371, |
|
"objective": 0.24022004008293152, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.24022004008293152, |
|
"step": 385, |
|
"wo_beta": 1.9898384809494019 |
|
}, |
|
{ |
|
"dpo_loss": 0.22819384932518005, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 10.75019956346705, |
|
"learning_rate": 2.468981911062964e-06, |
|
"logits": -1.9904738664627075, |
|
"logps": -110.48689270019531, |
|
"loss": 0.2092, |
|
"objective": 0.22819384932518005, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.22819384932518005, |
|
"step": 390, |
|
"wo_beta": 2.570746660232544 |
|
}, |
|
{ |
|
"dpo_loss": 0.2051982581615448, |
|
"epoch": 1.1195087387812943, |
|
"grad_norm": 10.957538026383743, |
|
"learning_rate": 2.4069648328246305e-06, |
|
"logits": -2.0448696613311768, |
|
"logps": -108.89817810058594, |
|
"loss": 0.2315, |
|
"objective": 0.2051982581615448, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.2051982581615448, |
|
"step": 395, |
|
"wo_beta": 2.1264781951904297 |
|
}, |
|
{ |
|
"dpo_loss": 0.20951204001903534, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 10.831310704591798, |
|
"learning_rate": 2.3450050417327593e-06, |
|
"logits": -2.0998401641845703, |
|
"logps": -106.39730072021484, |
|
"loss": 0.2206, |
|
"objective": 0.20951204001903534, |
|
"ranking_simple": 0.8500000238418579, |
|
"regularize": 0.20951204001903534, |
|
"step": 400, |
|
"wo_beta": 1.4239428043365479 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.729559600353241, |
|
"eval_logits": -2.090850591659546, |
|
"eval_logps": -113.60484313964844, |
|
"eval_loss": 0.7347043752670288, |
|
"eval_objective": 0.729559600353241, |
|
"eval_ranking_simple": 0.5502070188522339, |
|
"eval_regularize": 0.729559600353241, |
|
"eval_runtime": 367.8652, |
|
"eval_samples_per_second": 15.739, |
|
"eval_steps_per_second": 1.313, |
|
"eval_wo_beta": 8.675128936767578, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.24946197867393494, |
|
"epoch": 1.1478507321681626, |
|
"grad_norm": 13.639203958436303, |
|
"learning_rate": 2.2831406900230586e-06, |
|
"logits": -2.0489606857299805, |
|
"logps": -109.95741271972656, |
|
"loss": 0.2302, |
|
"objective": 0.24946197867393494, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.24946197867393494, |
|
"step": 405, |
|
"wo_beta": 3.3028647899627686 |
|
}, |
|
{ |
|
"dpo_loss": 0.22408606112003326, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 9.982686874740121, |
|
"learning_rate": 2.221409871163675e-06, |
|
"logits": -2.026094913482666, |
|
"logps": -112.10523986816406, |
|
"loss": 0.204, |
|
"objective": 0.22408606112003326, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.22408606112003326, |
|
"step": 410, |
|
"wo_beta": 3.6203742027282715 |
|
}, |
|
{ |
|
"dpo_loss": 0.24647466838359833, |
|
"epoch": 1.1761927255550306, |
|
"grad_norm": 14.127702730262909, |
|
"learning_rate": 2.1598505963988354e-06, |
|
"logits": -1.9576979875564575, |
|
"logps": -112.045654296875, |
|
"loss": 0.2487, |
|
"objective": 0.24647466838359833, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.24647466838359833, |
|
"step": 415, |
|
"wo_beta": 3.0596923828125 |
|
}, |
|
{ |
|
"dpo_loss": 0.2189057618379593, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 12.453759090133827, |
|
"learning_rate": 2.098500771343124e-06, |
|
"logits": -1.9014623165130615, |
|
"logps": -112.163330078125, |
|
"loss": 0.224, |
|
"objective": 0.2189057618379593, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.2189057618379593, |
|
"step": 420, |
|
"wo_beta": 2.8976945877075195 |
|
}, |
|
{ |
|
"dpo_loss": 0.24521614611148834, |
|
"epoch": 1.204534718941899, |
|
"grad_norm": 11.810902908919543, |
|
"learning_rate": 2.037398172640793e-06, |
|
"logits": -1.939537525177002, |
|
"logps": -111.03417205810547, |
|
"loss": 0.225, |
|
"objective": 0.24521614611148834, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.24521614611148834, |
|
"step": 425, |
|
"wo_beta": 2.2815327644348145 |
|
}, |
|
{ |
|
"dpo_loss": 0.25871187448501587, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 13.805740179315583, |
|
"learning_rate": 1.976580424704498e-06, |
|
"logits": -1.908257246017456, |
|
"logps": -114.54412078857422, |
|
"loss": 0.2315, |
|
"objective": 0.25871187448501587, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.25871187448501587, |
|
"step": 430, |
|
"wo_beta": 2.5115749835968018 |
|
}, |
|
{ |
|
"dpo_loss": 0.20695915818214417, |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 13.189182575578958, |
|
"learning_rate": 1.9160849765477604e-06, |
|
"logits": -1.845086932182312, |
|
"logps": -111.52816772460938, |
|
"loss": 0.2502, |
|
"objective": 0.20695915818214417, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.20695915818214417, |
|
"step": 435, |
|
"wo_beta": 2.1831676959991455 |
|
}, |
|
{ |
|
"dpo_loss": 0.2140309065580368, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 15.231839264333766, |
|
"learning_rate": 1.8559490787254423e-06, |
|
"logits": -1.8013054132461548, |
|
"logps": -112.4487533569336, |
|
"loss": 0.2241, |
|
"objective": 0.2140309065580368, |
|
"ranking_simple": 0.8500000238418579, |
|
"regularize": 0.2140309065580368, |
|
"step": 440, |
|
"wo_beta": 2.4584310054779053 |
|
}, |
|
{ |
|
"dpo_loss": 0.2538544535636902, |
|
"epoch": 1.2612187057156352, |
|
"grad_norm": 11.902330497163605, |
|
"learning_rate": 1.7962097603964177e-06, |
|
"logits": -1.8283072710037231, |
|
"logps": -111.75948333740234, |
|
"loss": 0.2393, |
|
"objective": 0.2538544535636902, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.2538544535636902, |
|
"step": 445, |
|
"wo_beta": 2.5012738704681396 |
|
}, |
|
{ |
|
"dpo_loss": 0.21088647842407227, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 12.114049593362662, |
|
"learning_rate": 1.7369038065225743e-06, |
|
"logits": -1.9961200952529907, |
|
"logps": -110.46500396728516, |
|
"loss": 0.2202, |
|
"objective": 0.21088647842407227, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.21088647842407227, |
|
"step": 450, |
|
"wo_beta": 2.1799509525299072 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.7433450222015381, |
|
"eval_logits": -1.9911303520202637, |
|
"eval_logps": -115.7781753540039, |
|
"eval_loss": 0.7462677955627441, |
|
"eval_objective": 0.7433450222015381, |
|
"eval_ranking_simple": 0.5512422323226929, |
|
"eval_regularize": 0.7433450222015381, |
|
"eval_runtime": 369.6423, |
|
"eval_samples_per_second": 15.664, |
|
"eval_steps_per_second": 1.307, |
|
"eval_wo_beta": 8.912315368652344, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.22404690086841583, |
|
"epoch": 1.2895606991025035, |
|
"grad_norm": 12.618712156759832, |
|
"learning_rate": 1.6780677352181781e-06, |
|
"logits": -1.821974515914917, |
|
"logps": -112.83589935302734, |
|
"loss": 0.2503, |
|
"objective": 0.22404690086841583, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.22404690086841583, |
|
"step": 455, |
|
"wo_beta": 2.789232015609741 |
|
}, |
|
{ |
|
"dpo_loss": 0.2399408221244812, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 10.297452895979795, |
|
"learning_rate": 1.6197377752635563e-06, |
|
"logits": -1.9601954221725464, |
|
"logps": -111.7652587890625, |
|
"loss": 0.2322, |
|
"objective": 0.2399408221244812, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.2399408221244812, |
|
"step": 460, |
|
"wo_beta": 3.1664645671844482 |
|
}, |
|
{ |
|
"dpo_loss": 0.21428868174552917, |
|
"epoch": 1.3179026924893718, |
|
"grad_norm": 11.673896578608254, |
|
"learning_rate": 1.5619498437969302e-06, |
|
"logits": -1.9731502532958984, |
|
"logps": -109.419189453125, |
|
"loss": 0.2432, |
|
"objective": 0.21428868174552917, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.21428868174552917, |
|
"step": 465, |
|
"wo_beta": 2.3578691482543945 |
|
}, |
|
{ |
|
"dpo_loss": 0.2447548806667328, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 11.318434823205884, |
|
"learning_rate": 1.5047395241981606e-06, |
|
"logits": -2.0559751987457275, |
|
"logps": -107.98088836669922, |
|
"loss": 0.2364, |
|
"objective": 0.2447548806667328, |
|
"ranking_simple": 0.8500000238418579, |
|
"regularize": 0.2447548806667328, |
|
"step": 470, |
|
"wo_beta": 2.524103879928589 |
|
}, |
|
{ |
|
"dpo_loss": 0.2010059952735901, |
|
"epoch": 1.34624468587624, |
|
"grad_norm": 13.4415115238709, |
|
"learning_rate": 1.4481420441779862e-06, |
|
"logits": -1.910614013671875, |
|
"logps": -110.5365982055664, |
|
"loss": 0.2194, |
|
"objective": 0.2010059952735901, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.2010059952735901, |
|
"step": 475, |
|
"wo_beta": 2.4653513431549072 |
|
}, |
|
{ |
|
"dpo_loss": 0.21145032346248627, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 15.045958431800248, |
|
"learning_rate": 1.3921922540862907e-06, |
|
"logits": -2.0142934322357178, |
|
"logps": -109.8845443725586, |
|
"loss": 0.2244, |
|
"objective": 0.21145032346248627, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.21145032346248627, |
|
"step": 480, |
|
"wo_beta": 3.300536632537842 |
|
}, |
|
{ |
|
"dpo_loss": 0.23482932150363922, |
|
"epoch": 1.3745866792631083, |
|
"grad_norm": 14.203111358887098, |
|
"learning_rate": 1.3369246054527152e-06, |
|
"logits": -1.996147632598877, |
|
"logps": -108.94562530517578, |
|
"loss": 0.2473, |
|
"objective": 0.23482932150363922, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.23482932150363922, |
|
"step": 485, |
|
"wo_beta": 2.5876729488372803 |
|
}, |
|
{ |
|
"dpo_loss": 0.23283059895038605, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 11.652820113044354, |
|
"learning_rate": 1.2823731297728536e-06, |
|
"logits": -1.98202645778656, |
|
"logps": -112.48513793945312, |
|
"loss": 0.2222, |
|
"objective": 0.23283059895038605, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.23283059895038605, |
|
"step": 490, |
|
"wo_beta": 2.047060251235962 |
|
}, |
|
{ |
|
"dpo_loss": 0.19643358886241913, |
|
"epoch": 1.4029286726499763, |
|
"grad_norm": 16.098991414414286, |
|
"learning_rate": 1.2285714175530936e-06, |
|
"logits": -1.9894219636917114, |
|
"logps": -111.90442657470703, |
|
"loss": 0.2257, |
|
"objective": 0.19643358886241913, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.19643358886241913, |
|
"step": 495, |
|
"wo_beta": 2.041602611541748 |
|
}, |
|
{ |
|
"dpo_loss": 0.22570651769638062, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 14.942258982525255, |
|
"learning_rate": 1.1755525976269851e-06, |
|
"logits": -1.9338775873184204, |
|
"logps": -109.2611083984375, |
|
"loss": 0.2366, |
|
"objective": 0.22570651769638062, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.22570651769638062, |
|
"step": 500, |
|
"wo_beta": 1.8392161130905151 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.7387389540672302, |
|
"eval_logits": -2.046374797821045, |
|
"eval_logps": -114.77104187011719, |
|
"eval_loss": 0.7444195747375488, |
|
"eval_objective": 0.7387389540672302, |
|
"eval_ranking_simple": 0.5517598390579224, |
|
"eval_regularize": 0.7387389540672302, |
|
"eval_runtime": 367.6953, |
|
"eval_samples_per_second": 15.747, |
|
"eval_steps_per_second": 1.314, |
|
"eval_wo_beta": 8.863048553466797, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.18702387809753418, |
|
"epoch": 1.4312706660368446, |
|
"grad_norm": 11.645842357352148, |
|
"learning_rate": 1.1233493167559065e-06, |
|
"logits": -2.00331711769104, |
|
"logps": -110.28618621826172, |
|
"loss": 0.2087, |
|
"objective": 0.18702387809753418, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.18702387809753418, |
|
"step": 505, |
|
"wo_beta": 2.0265085697174072 |
|
}, |
|
{ |
|
"dpo_loss": 0.23954346776008606, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 11.232727785445284, |
|
"learning_rate": 1.0719937195265555e-06, |
|
"logits": -2.0127902030944824, |
|
"logps": -110.59080505371094, |
|
"loss": 0.2313, |
|
"objective": 0.23954346776008606, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.23954346776008606, |
|
"step": 510, |
|
"wo_beta": 2.1970200538635254 |
|
}, |
|
{ |
|
"dpo_loss": 0.20086827874183655, |
|
"epoch": 1.4596126594237129, |
|
"grad_norm": 10.217720875279324, |
|
"learning_rate": 1.0215174285576615e-06, |
|
"logits": -2.087155818939209, |
|
"logps": -110.73674774169922, |
|
"loss": 0.2043, |
|
"objective": 0.20086827874183655, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.20086827874183655, |
|
"step": 515, |
|
"wo_beta": 2.5676777362823486 |
|
}, |
|
{ |
|
"dpo_loss": 0.19097186625003815, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 11.650910390578003, |
|
"learning_rate": 9.719515250281122e-07, |
|
"logits": -1.9594320058822632, |
|
"logps": -110.79000091552734, |
|
"loss": 0.1898, |
|
"objective": 0.19097186625003815, |
|
"ranking_simple": 0.8833333253860474, |
|
"regularize": 0.19097186625003815, |
|
"step": 520, |
|
"wo_beta": 2.2379517555236816 |
|
}, |
|
{ |
|
"dpo_loss": 0.2083693891763687, |
|
"epoch": 1.487954652810581, |
|
"grad_norm": 12.830417240600797, |
|
"learning_rate": 9.233265295384624e-07, |
|
"logits": -1.8601106405258179, |
|
"logps": -110.70569610595703, |
|
"loss": 0.2274, |
|
"objective": 0.2083693891763687, |
|
"ranking_simple": 0.8416666388511658, |
|
"regularize": 0.2083693891763687, |
|
"step": 525, |
|
"wo_beta": 2.14703369140625 |
|
}, |
|
{ |
|
"dpo_loss": 0.19533474743366241, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 14.108055837836243, |
|
"learning_rate": 8.756723833176376e-07, |
|
"logits": -2.0109665393829346, |
|
"logps": -114.77526092529297, |
|
"loss": 0.205, |
|
"objective": 0.19533474743366241, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.19533474743366241, |
|
"step": 530, |
|
"wo_beta": 1.9219799041748047 |
|
}, |
|
{ |
|
"dpo_loss": 0.18096224963665009, |
|
"epoch": 1.5162966461974492, |
|
"grad_norm": 13.367132869446376, |
|
"learning_rate": 8.290184297863793e-07, |
|
"logits": -1.971710205078125, |
|
"logps": -113.77371978759766, |
|
"loss": 0.188, |
|
"objective": 0.18096224963665009, |
|
"ranking_simple": 0.8541666865348816, |
|
"regularize": 0.18096224963665009, |
|
"step": 535, |
|
"wo_beta": 2.045213222503662 |
|
}, |
|
{ |
|
"dpo_loss": 0.26400619745254517, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 15.135814096600672, |
|
"learning_rate": 7.833933964887985e-07, |
|
"logits": -1.9013224840164185, |
|
"logps": -113.3902816772461, |
|
"loss": 0.221, |
|
"objective": 0.26400619745254517, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.26400619745254517, |
|
"step": 540, |
|
"wo_beta": 2.3301329612731934 |
|
}, |
|
{ |
|
"dpo_loss": 0.23929236829280853, |
|
"epoch": 1.5446386395843175, |
|
"grad_norm": 15.186538591312983, |
|
"learning_rate": 7.388253774031659e-07, |
|
"logits": -1.9914318323135376, |
|
"logps": -113.67050170898438, |
|
"loss": 0.2343, |
|
"objective": 0.23929236829280853, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.23929236829280853, |
|
"step": 545, |
|
"wo_beta": 1.8687002658843994 |
|
}, |
|
{ |
|
"dpo_loss": 0.17025238275527954, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 11.147125855718437, |
|
"learning_rate": 6.953418156428152e-07, |
|
"logits": -1.9760197401046753, |
|
"logps": -114.83491516113281, |
|
"loss": 0.1989, |
|
"objective": 0.17025238275527954, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.17025238275527954, |
|
"step": 550, |
|
"wo_beta": 2.0717434883117676 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.7519087195396423, |
|
"eval_logits": -2.0168354511260986, |
|
"eval_logps": -118.77753448486328, |
|
"eval_loss": 0.7552616596221924, |
|
"eval_objective": 0.7519087195396423, |
|
"eval_ranking_simple": 0.5595238208770752, |
|
"eval_regularize": 0.7519087195396423, |
|
"eval_runtime": 367.683, |
|
"eval_samples_per_second": 15.747, |
|
"eval_steps_per_second": 1.314, |
|
"eval_wo_beta": 8.984560012817383, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.2299811840057373, |
|
"epoch": 1.5729806329711855, |
|
"grad_norm": 14.207116899254263, |
|
"learning_rate": 6.529694865578318e-07, |
|
"logits": -1.7790377140045166, |
|
"logps": -116.34921264648438, |
|
"loss": 0.2215, |
|
"objective": 0.2299811840057373, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.2299811840057373, |
|
"step": 555, |
|
"wo_beta": 2.6884348392486572 |
|
}, |
|
{ |
|
"dpo_loss": 0.2206832468509674, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 14.824039668104726, |
|
"learning_rate": 6.117344812479154e-07, |
|
"logits": -1.8644143342971802, |
|
"logps": -111.79485321044922, |
|
"loss": 0.2146, |
|
"objective": 0.2206832468509674, |
|
"ranking_simple": 0.8416666388511658, |
|
"regularize": 0.2206832468509674, |
|
"step": 560, |
|
"wo_beta": 2.7559573650360107 |
|
}, |
|
{ |
|
"dpo_loss": 0.18442773818969727, |
|
"epoch": 1.601322626358054, |
|
"grad_norm": 13.453462098796246, |
|
"learning_rate": 5.71662190496575e-07, |
|
"logits": -1.8391135931015015, |
|
"logps": -113.25553131103516, |
|
"loss": 0.2078, |
|
"objective": 0.18442773818969727, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.18442773818969727, |
|
"step": 565, |
|
"wo_beta": 2.2805912494659424 |
|
}, |
|
{ |
|
"dpo_loss": 0.19617310166358948, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 12.497698816224, |
|
"learning_rate": 5.327772891365565e-07, |
|
"logits": -1.984673023223877, |
|
"logps": -115.45191192626953, |
|
"loss": 0.1869, |
|
"objective": 0.19617310166358948, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.19617310166358948, |
|
"step": 570, |
|
"wo_beta": 2.3473691940307617 |
|
}, |
|
{ |
|
"dpo_loss": 0.2390568107366562, |
|
"epoch": 1.629664619744922, |
|
"grad_norm": 14.590840547972736, |
|
"learning_rate": 4.951037208561116e-07, |
|
"logits": -1.9447566270828247, |
|
"logps": -111.96437072753906, |
|
"loss": 0.2312, |
|
"objective": 0.2390568107366562, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.2390568107366562, |
|
"step": 575, |
|
"wo_beta": 2.296287775039673 |
|
}, |
|
{ |
|
"dpo_loss": 0.20053791999816895, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 15.024890919749566, |
|
"learning_rate": 4.586646834554864e-07, |
|
"logits": -1.9810107946395874, |
|
"logps": -110.92058563232422, |
|
"loss": 0.2089, |
|
"objective": 0.20053791999816895, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.20053791999816895, |
|
"step": 580, |
|
"wo_beta": 1.4837419986724854 |
|
}, |
|
{ |
|
"dpo_loss": 0.19388006627559662, |
|
"epoch": 1.6580066131317903, |
|
"grad_norm": 11.343666690979733, |
|
"learning_rate": 4.234826145626855e-07, |
|
"logits": -1.8341389894485474, |
|
"logps": -109.36263275146484, |
|
"loss": 0.2048, |
|
"objective": 0.19388006627559662, |
|
"ranking_simple": 0.8500000238418579, |
|
"regularize": 0.19388006627559662, |
|
"step": 585, |
|
"wo_beta": 2.263803243637085 |
|
}, |
|
{ |
|
"dpo_loss": 0.2181146889925003, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 13.28342611357644, |
|
"learning_rate": 3.8957917781732883e-07, |
|
"logits": -1.87205970287323, |
|
"logps": -113.15511322021484, |
|
"loss": 0.2156, |
|
"objective": 0.2181146889925003, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.2181146889925003, |
|
"step": 590, |
|
"wo_beta": 2.4472413063049316 |
|
}, |
|
{ |
|
"dpo_loss": 0.1822492927312851, |
|
"epoch": 1.6863486065186586, |
|
"grad_norm": 10.596854767592859, |
|
"learning_rate": 3.569752495310877e-07, |
|
"logits": -1.8391311168670654, |
|
"logps": -113.05868530273438, |
|
"loss": 0.1882, |
|
"objective": 0.1822492927312851, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.1822492927312851, |
|
"step": 595, |
|
"wo_beta": 2.815018892288208 |
|
}, |
|
{ |
|
"dpo_loss": 0.17215129733085632, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 11.460383813341208, |
|
"learning_rate": 3.2569090583293356e-07, |
|
"logits": -1.8718314170837402, |
|
"logps": -113.5940933227539, |
|
"loss": 0.1952, |
|
"objective": 0.17215129733085632, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.17215129733085632, |
|
"step": 600, |
|
"wo_beta": 1.7114546298980713 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.7512882947921753, |
|
"eval_logits": -1.970719814300537, |
|
"eval_logps": -117.48804473876953, |
|
"eval_loss": 0.7544336318969727, |
|
"eval_objective": 0.7512882947921753, |
|
"eval_ranking_simple": 0.5595238208770752, |
|
"eval_regularize": 0.7512882947921753, |
|
"eval_runtime": 367.7185, |
|
"eval_samples_per_second": 15.746, |
|
"eval_steps_per_second": 1.314, |
|
"eval_wo_beta": 9.029719352722168, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.19925004243850708, |
|
"epoch": 1.7146905999055266, |
|
"grad_norm": 17.415629157530653, |
|
"learning_rate": 2.957454103070978e-07, |
|
"logits": -1.8373870849609375, |
|
"logps": -112.65380096435547, |
|
"loss": 0.2262, |
|
"objective": 0.19925004243850708, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.19925004243850708, |
|
"step": 605, |
|
"wo_beta": 2.9711499214172363 |
|
}, |
|
{ |
|
"dpo_loss": 0.21553590893745422, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 16.124254557780983, |
|
"learning_rate": 2.6715720213136955e-07, |
|
"logits": -1.8880244493484497, |
|
"logps": -112.18531799316406, |
|
"loss": 0.1988, |
|
"objective": 0.21553590893745422, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.21553590893745422, |
|
"step": 610, |
|
"wo_beta": 2.8244071006774902 |
|
}, |
|
{ |
|
"dpo_loss": 0.21212069690227509, |
|
"epoch": 1.743032593292395, |
|
"grad_norm": 12.906958893668936, |
|
"learning_rate": 2.399438847230212e-07, |
|
"logits": -1.9108936786651611, |
|
"logps": -111.8115005493164, |
|
"loss": 0.2027, |
|
"objective": 0.21212069690227509, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.21212069690227509, |
|
"step": 615, |
|
"wo_beta": 2.961397886276245 |
|
}, |
|
{ |
|
"dpo_loss": 0.20163790881633759, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 15.219355254954188, |
|
"learning_rate": 2.1412221489936796e-07, |
|
"logits": -1.947303295135498, |
|
"logps": -111.9202880859375, |
|
"loss": 0.2188, |
|
"objective": 0.20163790881633759, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.20163790881633759, |
|
"step": 620, |
|
"wo_beta": 1.839400053024292 |
|
}, |
|
{ |
|
"dpo_loss": 0.1938161551952362, |
|
"epoch": 1.7713745866792632, |
|
"grad_norm": 17.102171291997916, |
|
"learning_rate": 1.897080925596187e-07, |
|
"logits": -1.8294084072113037, |
|
"logps": -111.60057830810547, |
|
"loss": 0.2068, |
|
"objective": 0.1938161551952362, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.1938161551952362, |
|
"step": 625, |
|
"wo_beta": 2.0019137859344482 |
|
}, |
|
{ |
|
"dpo_loss": 0.25330111384391785, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 14.422081659451388, |
|
"learning_rate": 1.6671655089439186e-07, |
|
"logits": -1.8686004877090454, |
|
"logps": -112.4965591430664, |
|
"loss": 0.2312, |
|
"objective": 0.25330111384391785, |
|
"ranking_simple": 0.7791666388511658, |
|
"regularize": 0.25330111384391785, |
|
"step": 630, |
|
"wo_beta": 3.286885976791382 |
|
}, |
|
{ |
|
"dpo_loss": 0.20808285474777222, |
|
"epoch": 1.7997165800661312, |
|
"grad_norm": 16.725368606459067, |
|
"learning_rate": 1.4516174712890406e-07, |
|
"logits": -1.9575639963150024, |
|
"logps": -114.01631164550781, |
|
"loss": 0.2184, |
|
"objective": 0.20808285474777222, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.20808285474777222, |
|
"step": 635, |
|
"wo_beta": 2.427999258041382 |
|
}, |
|
{ |
|
"dpo_loss": 0.20768284797668457, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 12.030792650400683, |
|
"learning_rate": 1.2505695380554712e-07, |
|
"logits": -1.8871350288391113, |
|
"logps": -113.53579711914062, |
|
"loss": 0.2015, |
|
"objective": 0.20768284797668457, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.20768284797668457, |
|
"step": 640, |
|
"wo_beta": 2.9419753551483154 |
|
}, |
|
{ |
|
"dpo_loss": 0.21022367477416992, |
|
"epoch": 1.8280585734529995, |
|
"grad_norm": 12.06072694382626, |
|
"learning_rate": 1.0641455061121519e-07, |
|
"logits": -1.9376109838485718, |
|
"logps": -114.6182632446289, |
|
"loss": 0.2087, |
|
"objective": 0.21022367477416992, |
|
"ranking_simple": 0.875, |
|
"regularize": 0.21022367477416992, |
|
"step": 645, |
|
"wo_beta": 2.246778964996338 |
|
}, |
|
{ |
|
"dpo_loss": 0.21791066229343414, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 17.74332836561221, |
|
"learning_rate": 8.924601675441207e-08, |
|
"logits": -1.9945629835128784, |
|
"logps": -115.14165496826172, |
|
"loss": 0.2252, |
|
"objective": 0.21791066229343414, |
|
"ranking_simple": 0.8791666626930237, |
|
"regularize": 0.21791066229343414, |
|
"step": 650, |
|
"wo_beta": 1.7516138553619385 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 0.7528993487358093, |
|
"eval_logits": -1.9747523069381714, |
|
"eval_logps": -117.80084228515625, |
|
"eval_loss": 0.7559728622436523, |
|
"eval_objective": 0.7528993487358093, |
|
"eval_ranking_simple": 0.5584886074066162, |
|
"eval_regularize": 0.7528993487358093, |
|
"eval_runtime": 367.648, |
|
"eval_samples_per_second": 15.749, |
|
"eval_steps_per_second": 1.314, |
|
"eval_wo_beta": 9.092611312866211, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.21706603467464447, |
|
"epoch": 1.8564005668398678, |
|
"grad_norm": 13.027657065435218, |
|
"learning_rate": 7.356192389683825e-08, |
|
"logits": -1.844641923904419, |
|
"logps": -110.724609375, |
|
"loss": 0.2263, |
|
"objective": 0.21706603467464447, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.21706603467464447, |
|
"step": 655, |
|
"wo_beta": 2.2099640369415283 |
|
}, |
|
{ |
|
"dpo_loss": 0.19901646673679352, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 11.442935681801226, |
|
"learning_rate": 5.937192964380556e-08, |
|
"logits": -1.9180775880813599, |
|
"logps": -115.1898193359375, |
|
"loss": 0.1992, |
|
"objective": 0.19901646673679352, |
|
"ranking_simple": 0.8708333373069763, |
|
"regularize": 0.19901646673679352, |
|
"step": 660, |
|
"wo_beta": 1.9250274896621704 |
|
}, |
|
{ |
|
"dpo_loss": 0.20594698190689087, |
|
"epoch": 1.8847425602267358, |
|
"grad_norm": 14.384890803281685, |
|
"learning_rate": 4.668477159748858e-08, |
|
"logits": -1.8975155353546143, |
|
"logps": -112.05779266357422, |
|
"loss": 0.22, |
|
"objective": 0.20594698190689087, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.20594698190689087, |
|
"step": 665, |
|
"wo_beta": 1.7807292938232422 |
|
}, |
|
{ |
|
"dpo_loss": 0.23021073639392853, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 11.678760414899436, |
|
"learning_rate": 3.5508261976678894e-08, |
|
"logits": -1.8350870609283447, |
|
"logps": -111.71804809570312, |
|
"loss": 0.2054, |
|
"objective": 0.23021073639392853, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.23021073639392853, |
|
"step": 670, |
|
"wo_beta": 2.838752269744873 |
|
}, |
|
{ |
|
"dpo_loss": 0.19814661145210266, |
|
"epoch": 1.9130845536136043, |
|
"grad_norm": 14.42884511344777, |
|
"learning_rate": 2.5849282806345855e-08, |
|
"logits": -1.8832274675369263, |
|
"logps": -112.52620697021484, |
|
"loss": 0.2208, |
|
"objective": 0.19814661145210266, |
|
"ranking_simple": 0.8791666626930237, |
|
"regularize": 0.19814661145210266, |
|
"step": 675, |
|
"wo_beta": 1.6049467325210571 |
|
}, |
|
{ |
|
"dpo_loss": 0.2337852120399475, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 12.72035969054902, |
|
"learning_rate": 1.771378167997745e-08, |
|
"logits": -1.9469962120056152, |
|
"logps": -114.031494140625, |
|
"loss": 0.2132, |
|
"objective": 0.2337852120399475, |
|
"ranking_simple": 0.8541666865348816, |
|
"regularize": 0.2337852120399475, |
|
"step": 680, |
|
"wo_beta": 2.8053526878356934 |
|
}, |
|
{ |
|
"dpo_loss": 0.18736791610717773, |
|
"epoch": 1.9414265470004723, |
|
"grad_norm": 11.578463444163043, |
|
"learning_rate": 1.1106768097300657e-08, |
|
"logits": -1.8774739503860474, |
|
"logps": -113.84810638427734, |
|
"loss": 0.1807, |
|
"objective": 0.18736791610717773, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.18736791610717773, |
|
"step": 685, |
|
"wo_beta": 2.1088075637817383 |
|
}, |
|
{ |
|
"dpo_loss": 0.19145923852920532, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 11.501744444827747, |
|
"learning_rate": 6.032310379642803e-09, |
|
"logits": -1.9342644214630127, |
|
"logps": -112.51237487792969, |
|
"loss": 0.204, |
|
"objective": 0.19145923852920532, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.19145923852920532, |
|
"step": 690, |
|
"wo_beta": 1.7116554975509644 |
|
}, |
|
{ |
|
"dpo_loss": 0.2177831530570984, |
|
"epoch": 1.9697685403873406, |
|
"grad_norm": 13.662395330088454, |
|
"learning_rate": 2.4935331648298644e-09, |
|
"logits": -1.7998664379119873, |
|
"logps": -113.02543640136719, |
|
"loss": 0.2048, |
|
"objective": 0.2177831530570984, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.2177831530570984, |
|
"step": 695, |
|
"wo_beta": 2.7438058853149414 |
|
}, |
|
{ |
|
"dpo_loss": 0.19972722232341766, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 13.232400433126339, |
|
"learning_rate": 4.926154831655372e-10, |
|
"logits": -1.788934350013733, |
|
"logps": -115.31278228759766, |
|
"loss": 0.199, |
|
"objective": 0.19972722232341766, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.19972722232341766, |
|
"step": 700, |
|
"wo_beta": 2.272697687149048 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.7537301182746887, |
|
"eval_logits": -1.9777543544769287, |
|
"eval_logps": -117.9869155883789, |
|
"eval_loss": 0.7566089034080505, |
|
"eval_objective": 0.7537301182746887, |
|
"eval_ranking_simple": 0.5595238208770752, |
|
"eval_regularize": 0.7537301182746887, |
|
"eval_runtime": 367.5678, |
|
"eval_samples_per_second": 15.752, |
|
"eval_steps_per_second": 1.314, |
|
"eval_wo_beta": 9.104193687438965, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.995276334435522, |
|
"step": 704, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3641128831289031, |
|
"train_runtime": 23012.8835, |
|
"train_samples_per_second": 4.415, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 704, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|