|
{ |
|
"best_metric": 1.347064733505249, |
|
"best_model_checkpoint": "saves/Gemma-7B-It/lora/orpo/checkpoint-1500", |
|
"epoch": 2.997999555456768, |
|
"eval_steps": 500, |
|
"global_step": 1686, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017781729273171815, |
|
"grad_norm": 2.0664310455322266, |
|
"learning_rate": 4.9995745934141085e-06, |
|
"logits/chosen": 207.18429565429688, |
|
"logits/rejected": 208.7433624267578, |
|
"logps/chosen": -2.1626429557800293, |
|
"logps/rejected": -2.4889369010925293, |
|
"loss": 2.2441, |
|
"odds_ratio_loss": 0.8149965405464172, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.2162642925977707, |
|
"rewards/margins": 0.03262939304113388, |
|
"rewards/rejected": -0.24889369308948517, |
|
"sft_loss": 2.1626429557800293, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03556345854634363, |
|
"grad_norm": 3.5887179374694824, |
|
"learning_rate": 4.9982812903243405e-06, |
|
"logits/chosen": 207.42489624023438, |
|
"logits/rejected": 210.30307006835938, |
|
"logps/chosen": -2.3814258575439453, |
|
"logps/rejected": -2.279829740524292, |
|
"loss": 2.4841, |
|
"odds_ratio_loss": 1.0264532566070557, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.23814257979393005, |
|
"rewards/margins": -0.01015959121286869, |
|
"rewards/rejected": -0.2279830276966095, |
|
"sft_loss": 2.3814258575439453, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05334518781951545, |
|
"grad_norm": 7.692424297332764, |
|
"learning_rate": 4.996120496405222e-06, |
|
"logits/chosen": 210.8332977294922, |
|
"logits/rejected": 211.61099243164062, |
|
"logps/chosen": -2.3326590061187744, |
|
"logps/rejected": -2.6421866416931152, |
|
"loss": 2.4103, |
|
"odds_ratio_loss": 0.7764064073562622, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.23326590657234192, |
|
"rewards/margins": 0.03095281682908535, |
|
"rewards/rejected": -0.2642187178134918, |
|
"sft_loss": 2.3326590061187744, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07112691709268726, |
|
"grad_norm": 2.3221993446350098, |
|
"learning_rate": 4.99309296196014e-06, |
|
"logits/chosen": 213.6151123046875, |
|
"logits/rejected": 215.0422821044922, |
|
"logps/chosen": -2.1157479286193848, |
|
"logps/rejected": -2.377347469329834, |
|
"loss": 2.2037, |
|
"odds_ratio_loss": 0.8793656229972839, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2115747630596161, |
|
"rewards/margins": 0.026159971952438354, |
|
"rewards/rejected": -0.23773476481437683, |
|
"sft_loss": 2.1157479286193848, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08890864636585907, |
|
"grad_norm": 3.239240884780884, |
|
"learning_rate": 4.989199738255166e-06, |
|
"logits/chosen": 215.62277221679688, |
|
"logits/rejected": 217.9526824951172, |
|
"logps/chosen": -2.122495174407959, |
|
"logps/rejected": -2.3831381797790527, |
|
"loss": 2.2102, |
|
"odds_ratio_loss": 0.876750648021698, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2122495174407959, |
|
"rewards/margins": 0.0260643120855093, |
|
"rewards/rejected": -0.23831383883953094, |
|
"sft_loss": 2.122495174407959, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1066903756390309, |
|
"grad_norm": 2.045614719390869, |
|
"learning_rate": 4.984442177154031e-06, |
|
"logits/chosen": 218.19265747070312, |
|
"logits/rejected": 219.45907592773438, |
|
"logps/chosen": -2.157195568084717, |
|
"logps/rejected": -2.426860809326172, |
|
"loss": 2.2519, |
|
"odds_ratio_loss": 0.9465675354003906, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.215719535946846, |
|
"rewards/margins": 0.02696654573082924, |
|
"rewards/rejected": -0.24268610775470734, |
|
"sft_loss": 2.157195568084717, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12447210491220272, |
|
"grad_norm": 2.5059540271759033, |
|
"learning_rate": 4.978821930648704e-06, |
|
"logits/chosen": 222.6106414794922, |
|
"logits/rejected": 222.9716339111328, |
|
"logps/chosen": -2.108616590499878, |
|
"logps/rejected": -2.0369954109191895, |
|
"loss": 2.2226, |
|
"odds_ratio_loss": 1.1399357318878174, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.21086165308952332, |
|
"rewards/margins": -0.007162079215049744, |
|
"rewards/rejected": -0.20369958877563477, |
|
"sft_loss": 2.108616590499878, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14225383418537452, |
|
"grad_norm": 3.193876028060913, |
|
"learning_rate": 4.97234095028576e-06, |
|
"logits/chosen": 226.2631378173828, |
|
"logits/rejected": 227.37841796875, |
|
"logps/chosen": -2.011748790740967, |
|
"logps/rejected": -2.0834341049194336, |
|
"loss": 2.0967, |
|
"odds_ratio_loss": 0.849190354347229, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.20117488503456116, |
|
"rewards/margins": 0.0071685537695884705, |
|
"rewards/rejected": -0.20834341645240784, |
|
"sft_loss": 2.011748790740967, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16003556345854633, |
|
"grad_norm": 1.5080177783966064, |
|
"learning_rate": 4.965001486488743e-06, |
|
"logits/chosen": 227.5520477294922, |
|
"logits/rejected": 228.8264923095703, |
|
"logps/chosen": -1.7941173315048218, |
|
"logps/rejected": -2.0014424324035645, |
|
"loss": 1.8703, |
|
"odds_ratio_loss": 0.7616177797317505, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.17941176891326904, |
|
"rewards/margins": 0.020732494071125984, |
|
"rewards/rejected": -0.20014424622058868, |
|
"sft_loss": 1.7941173315048218, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17781729273171815, |
|
"grad_norm": 1.543541669845581, |
|
"learning_rate": 4.956806087776732e-06, |
|
"logits/chosen": 230.88284301757812, |
|
"logits/rejected": 231.593017578125, |
|
"logps/chosen": -1.7711349725723267, |
|
"logps/rejected": -2.1635921001434326, |
|
"loss": 1.8365, |
|
"odds_ratio_loss": 0.6534941792488098, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.17711350321769714, |
|
"rewards/margins": 0.039245713502168655, |
|
"rewards/rejected": -0.2163592129945755, |
|
"sft_loss": 1.7711349725723267, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19559902200489, |
|
"grad_norm": 2.5298948287963867, |
|
"learning_rate": 4.947757599879411e-06, |
|
"logits/chosen": 235.8949737548828, |
|
"logits/rejected": 235.8369140625, |
|
"logps/chosen": -1.6983522176742554, |
|
"logps/rejected": -1.9385782480239868, |
|
"loss": 1.7686, |
|
"odds_ratio_loss": 0.7024893760681152, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.16983522474765778, |
|
"rewards/margins": 0.024022620171308517, |
|
"rewards/rejected": -0.1938578337430954, |
|
"sft_loss": 1.6983522176742554, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2133807512780618, |
|
"grad_norm": 3.6936049461364746, |
|
"learning_rate": 4.937859164748931e-06, |
|
"logits/chosen": 239.0133514404297, |
|
"logits/rejected": 239.0744171142578, |
|
"logps/chosen": -1.5454410314559937, |
|
"logps/rejected": -1.6620423793792725, |
|
"loss": 1.6182, |
|
"odds_ratio_loss": 0.727584183216095, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.15454408526420593, |
|
"rewards/margins": 0.011660170741379261, |
|
"rewards/rejected": -0.16620425879955292, |
|
"sft_loss": 1.5454410314559937, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23116248055123362, |
|
"grad_norm": 2.5647566318511963, |
|
"learning_rate": 4.92711421946891e-06, |
|
"logits/chosen": 239.533203125, |
|
"logits/rejected": 240.0679931640625, |
|
"logps/chosen": -1.6095718145370483, |
|
"logps/rejected": -1.8200355768203735, |
|
"loss": 1.6813, |
|
"odds_ratio_loss": 0.7172408103942871, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.16095717251300812, |
|
"rewards/margins": 0.021046385169029236, |
|
"rewards/rejected": -0.18200358748435974, |
|
"sft_loss": 1.6095718145370483, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.24894420982440543, |
|
"grad_norm": 7.456451416015625, |
|
"learning_rate": 4.915526495060961e-06, |
|
"logits/chosen": 242.9502410888672, |
|
"logits/rejected": 243.6327362060547, |
|
"logps/chosen": -1.5086013078689575, |
|
"logps/rejected": -1.7959527969360352, |
|
"loss": 1.5772, |
|
"odds_ratio_loss": 0.6855502128601074, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.15086011588573456, |
|
"rewards/margins": 0.028735166415572166, |
|
"rewards/rejected": -0.17959527671337128, |
|
"sft_loss": 1.5086013078689575, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26672593909757725, |
|
"grad_norm": 3.7102017402648926, |
|
"learning_rate": 4.903100015189153e-06, |
|
"logits/chosen": 243.65646362304688, |
|
"logits/rejected": 243.8603973388672, |
|
"logps/chosen": -1.5454883575439453, |
|
"logps/rejected": -1.7633510828018188, |
|
"loss": 1.6165, |
|
"odds_ratio_loss": 0.7103039026260376, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.15454885363578796, |
|
"rewards/margins": 0.021786261349916458, |
|
"rewards/rejected": -0.17633512616157532, |
|
"sft_loss": 1.5454883575439453, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.28450766837074903, |
|
"grad_norm": 1.9714837074279785, |
|
"learning_rate": 4.889839094762848e-06, |
|
"logits/chosen": 244.4144744873047, |
|
"logits/rejected": 244.4231414794922, |
|
"logps/chosen": -1.6150789260864258, |
|
"logps/rejected": -1.7520729303359985, |
|
"loss": 1.6882, |
|
"odds_ratio_loss": 0.7308647036552429, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.16150791943073273, |
|
"rewards/margins": 0.013699382543563843, |
|
"rewards/rejected": -0.17520728707313538, |
|
"sft_loss": 1.6150789260864258, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3022893976439209, |
|
"grad_norm": 1.7966150045394897, |
|
"learning_rate": 4.875748338438416e-06, |
|
"logits/chosen": 245.6141815185547, |
|
"logits/rejected": 245.50735473632812, |
|
"logps/chosen": -1.5294561386108398, |
|
"logps/rejected": -1.7359291315078735, |
|
"loss": 1.5978, |
|
"odds_ratio_loss": 0.683633029460907, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.1529456079006195, |
|
"rewards/margins": 0.020647313445806503, |
|
"rewards/rejected": -0.1735929250717163, |
|
"sft_loss": 1.5294561386108398, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.32007112691709266, |
|
"grad_norm": 4.217123031616211, |
|
"learning_rate": 4.8608326390203386e-06, |
|
"logits/chosen": 245.0269317626953, |
|
"logits/rejected": 245.72494506835938, |
|
"logps/chosen": -1.4982770681381226, |
|
"logps/rejected": -1.7273998260498047, |
|
"loss": 1.5646, |
|
"odds_ratio_loss": 0.663575291633606, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.1498277187347412, |
|
"rewards/margins": 0.022912275046110153, |
|
"rewards/rejected": -0.17273999750614166, |
|
"sft_loss": 1.4982770681381226, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3378528561902645, |
|
"grad_norm": 2.1345717906951904, |
|
"learning_rate": 4.845097175762251e-06, |
|
"logits/chosen": 246.3583984375, |
|
"logits/rejected": 246.6023712158203, |
|
"logps/chosen": -1.4693564176559448, |
|
"logps/rejected": -1.6270701885223389, |
|
"loss": 1.5391, |
|
"odds_ratio_loss": 0.6974608898162842, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1469356268644333, |
|
"rewards/margins": 0.01577138900756836, |
|
"rewards/rejected": -0.16270704567432404, |
|
"sft_loss": 1.4693564176559448, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3556345854634363, |
|
"grad_norm": 4.533120632171631, |
|
"learning_rate": 4.8285474125685286e-06, |
|
"logits/chosen": 245.734130859375, |
|
"logits/rejected": 246.148193359375, |
|
"logps/chosen": -1.5207107067108154, |
|
"logps/rejected": -1.6708800792694092, |
|
"loss": 1.5928, |
|
"odds_ratio_loss": 0.7206315994262695, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.15207107365131378, |
|
"rewards/margins": 0.015016931109130383, |
|
"rewards/rejected": -0.16708801686763763, |
|
"sft_loss": 1.5207107067108154, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37341631473660813, |
|
"grad_norm": 2.590592861175537, |
|
"learning_rate": 4.811189096097025e-06, |
|
"logits/chosen": 244.85757446289062, |
|
"logits/rejected": 245.19387817382812, |
|
"logps/chosen": -1.4962936639785767, |
|
"logps/rejected": -1.7369601726531982, |
|
"loss": 1.5627, |
|
"odds_ratio_loss": 0.6638623476028442, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1496293693780899, |
|
"rewards/margins": 0.024066651239991188, |
|
"rewards/rejected": -0.17369601130485535, |
|
"sft_loss": 1.4962936639785767, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.39119804400978, |
|
"grad_norm": 2.847151279449463, |
|
"learning_rate": 4.793028253763633e-06, |
|
"logits/chosen": 247.3143310546875, |
|
"logits/rejected": 247.9826202392578, |
|
"logps/chosen": -1.4769468307495117, |
|
"logps/rejected": -1.6658426523208618, |
|
"loss": 1.5539, |
|
"odds_ratio_loss": 0.769631028175354, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.14769470691680908, |
|
"rewards/margins": 0.018889565020799637, |
|
"rewards/rejected": -0.16658425331115723, |
|
"sft_loss": 1.4769468307495117, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.40897977328295176, |
|
"grad_norm": 2.361198902130127, |
|
"learning_rate": 4.774071191649352e-06, |
|
"logits/chosen": 247.5041961669922, |
|
"logits/rejected": 247.5510711669922, |
|
"logps/chosen": -1.3598966598510742, |
|
"logps/rejected": -1.6928675174713135, |
|
"loss": 1.421, |
|
"odds_ratio_loss": 0.6115107536315918, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.13598966598510742, |
|
"rewards/margins": 0.033297087997198105, |
|
"rewards/rejected": -0.16928674280643463, |
|
"sft_loss": 1.3598966598510742, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4267615025561236, |
|
"grad_norm": 2.774188280105591, |
|
"learning_rate": 4.7543244923105975e-06, |
|
"logits/chosen": 248.81741333007812, |
|
"logits/rejected": 248.56063842773438, |
|
"logps/chosen": -1.479175090789795, |
|
"logps/rejected": -1.5817492008209229, |
|
"loss": 1.5553, |
|
"odds_ratio_loss": 0.7613282799720764, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.14791752398014069, |
|
"rewards/margins": 0.010257410816848278, |
|
"rewards/rejected": -0.15817493200302124, |
|
"sft_loss": 1.479175090789795, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4445432318292954, |
|
"grad_norm": 1.5498669147491455, |
|
"learning_rate": 4.733795012493506e-06, |
|
"logits/chosen": 249.3889923095703, |
|
"logits/rejected": 249.21475219726562, |
|
"logps/chosen": -1.544840693473816, |
|
"logps/rejected": -1.6548751592636108, |
|
"loss": 1.6181, |
|
"odds_ratio_loss": 0.7324239611625671, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.15448406338691711, |
|
"rewards/margins": 0.011003440245985985, |
|
"rewards/rejected": -0.16548751294612885, |
|
"sft_loss": 1.544840693473816, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.46232496110246724, |
|
"grad_norm": 1.862833857536316, |
|
"learning_rate": 4.712489880753035e-06, |
|
"logits/chosen": 245.6899871826172, |
|
"logits/rejected": 246.0701446533203, |
|
"logps/chosen": -1.316460132598877, |
|
"logps/rejected": -1.5619014501571655, |
|
"loss": 1.3796, |
|
"odds_ratio_loss": 0.631010890007019, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.13164600729942322, |
|
"rewards/margins": 0.02454412914812565, |
|
"rewards/rejected": -0.15619014203548431, |
|
"sft_loss": 1.316460132598877, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.480106690375639, |
|
"grad_norm": 7.502230644226074, |
|
"learning_rate": 4.690416494977673e-06, |
|
"logits/chosen": 248.2025146484375, |
|
"logits/rejected": 249.02481079101562, |
|
"logps/chosen": -1.4416104555130005, |
|
"logps/rejected": -1.7268083095550537, |
|
"loss": 1.5056, |
|
"odds_ratio_loss": 0.6399883031845093, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.14416104555130005, |
|
"rewards/margins": 0.028519803658127785, |
|
"rewards/rejected": -0.1726808398962021, |
|
"sft_loss": 1.4416104555130005, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.49788841964881086, |
|
"grad_norm": 1.7004640102386475, |
|
"learning_rate": 4.667582519820639e-06, |
|
"logits/chosen": 248.4412841796875, |
|
"logits/rejected": 248.5986328125, |
|
"logps/chosen": -1.4644906520843506, |
|
"logps/rejected": -1.6034975051879883, |
|
"loss": 1.5385, |
|
"odds_ratio_loss": 0.7402389645576477, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.14644907414913177, |
|
"rewards/margins": 0.013900673016905785, |
|
"rewards/rejected": -0.1603497415781021, |
|
"sft_loss": 1.4644906520843506, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5156701489219827, |
|
"grad_norm": 1.9778798818588257, |
|
"learning_rate": 4.643995884038443e-06, |
|
"logits/chosen": 249.9197540283203, |
|
"logits/rejected": 249.89920043945312, |
|
"logps/chosen": -1.390932559967041, |
|
"logps/rejected": -1.5962976217269897, |
|
"loss": 1.4582, |
|
"odds_ratio_loss": 0.6727808713912964, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13909325003623962, |
|
"rewards/margins": 0.020536506548523903, |
|
"rewards/rejected": -0.15962976217269897, |
|
"sft_loss": 1.390932559967041, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5334518781951545, |
|
"grad_norm": 1.796635627746582, |
|
"learning_rate": 4.6196647777377475e-06, |
|
"logits/chosen": 248.5753936767578, |
|
"logits/rejected": 248.55288696289062, |
|
"logps/chosen": -1.3909634351730347, |
|
"logps/rejected": -1.4929519891738892, |
|
"loss": 1.4621, |
|
"odds_ratio_loss": 0.7113169431686401, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.13909634947776794, |
|
"rewards/margins": 0.010198858566582203, |
|
"rewards/rejected": -0.14929521083831787, |
|
"sft_loss": 1.3909634351730347, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5512336074683263, |
|
"grad_norm": 1.5784835815429688, |
|
"learning_rate": 4.59459764953147e-06, |
|
"logits/chosen": 247.842529296875, |
|
"logits/rejected": 248.6540069580078, |
|
"logps/chosen": -1.449033498764038, |
|
"logps/rejected": -1.587956190109253, |
|
"loss": 1.5181, |
|
"odds_ratio_loss": 0.6905879974365234, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.14490333199501038, |
|
"rewards/margins": 0.013892298564314842, |
|
"rewards/rejected": -0.15879563987255096, |
|
"sft_loss": 1.449033498764038, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5690153367414981, |
|
"grad_norm": 2.8430497646331787, |
|
"learning_rate": 4.568803203605133e-06, |
|
"logits/chosen": 247.75045776367188, |
|
"logits/rejected": 247.95272827148438, |
|
"logps/chosen": -1.3736222982406616, |
|
"logps/rejected": -1.5858898162841797, |
|
"loss": 1.4444, |
|
"odds_ratio_loss": 0.7075997591018677, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.13736224174499512, |
|
"rewards/margins": 0.02122673951089382, |
|
"rewards/rejected": -0.15858899056911469, |
|
"sft_loss": 1.3736222982406616, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.58679706601467, |
|
"grad_norm": 8.24238395690918, |
|
"learning_rate": 4.542290396694462e-06, |
|
"logits/chosen": 250.15707397460938, |
|
"logits/rejected": 249.8236541748047, |
|
"logps/chosen": -1.4158143997192383, |
|
"logps/rejected": -1.5936088562011719, |
|
"loss": 1.4861, |
|
"odds_ratio_loss": 0.7032537460327148, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1415814310312271, |
|
"rewards/margins": 0.017779454588890076, |
|
"rewards/rejected": -0.15936090052127838, |
|
"sft_loss": 1.4158143997192383, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6045787952878418, |
|
"grad_norm": 8.322469711303711, |
|
"learning_rate": 4.515068434975298e-06, |
|
"logits/chosen": 247.47579956054688, |
|
"logits/rejected": 248.01553344726562, |
|
"logps/chosen": -1.413419246673584, |
|
"logps/rejected": -1.6688693761825562, |
|
"loss": 1.4802, |
|
"odds_ratio_loss": 0.6680575013160706, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1413419246673584, |
|
"rewards/margins": 0.02554500475525856, |
|
"rewards/rejected": -0.16688695549964905, |
|
"sft_loss": 1.413419246673584, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6223605245610135, |
|
"grad_norm": 1.7108134031295776, |
|
"learning_rate": 4.487146770866887e-06, |
|
"logits/chosen": 249.2637939453125, |
|
"logits/rejected": 249.53549194335938, |
|
"logps/chosen": -1.3779569864273071, |
|
"logps/rejected": -1.5148077011108398, |
|
"loss": 1.4475, |
|
"odds_ratio_loss": 0.6950393915176392, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.13779568672180176, |
|
"rewards/margins": 0.013685077428817749, |
|
"rewards/rejected": -0.1514807641506195, |
|
"sft_loss": 1.3779569864273071, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6401422538341853, |
|
"grad_norm": 1.5160853862762451, |
|
"learning_rate": 4.458535099749666e-06, |
|
"logits/chosen": 251.33633422851562, |
|
"logits/rejected": 251.1912841796875, |
|
"logps/chosen": -1.4455270767211914, |
|
"logps/rejected": -1.5792427062988281, |
|
"loss": 1.5213, |
|
"odds_ratio_loss": 0.7574425935745239, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.14455269277095795, |
|
"rewards/margins": 0.013371584005653858, |
|
"rewards/rejected": -0.15792429447174072, |
|
"sft_loss": 1.4455270767211914, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6579239831073572, |
|
"grad_norm": 2.7603657245635986, |
|
"learning_rate": 4.429243356598694e-06, |
|
"logits/chosen": 249.0806121826172, |
|
"logits/rejected": 249.3708038330078, |
|
"logps/chosen": -1.4384644031524658, |
|
"logps/rejected": -1.6943752765655518, |
|
"loss": 1.504, |
|
"odds_ratio_loss": 0.655007004737854, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.14384643733501434, |
|
"rewards/margins": 0.025591086596250534, |
|
"rewards/rejected": -0.16943752765655518, |
|
"sft_loss": 1.4384644031524658, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.675705712380529, |
|
"grad_norm": 1.7461826801300049, |
|
"learning_rate": 4.399281712533875e-06, |
|
"logits/chosen": 249.9875030517578, |
|
"logits/rejected": 250.59976196289062, |
|
"logps/chosen": -1.385258674621582, |
|
"logps/rejected": -1.5310487747192383, |
|
"loss": 1.4575, |
|
"odds_ratio_loss": 0.7227787375450134, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13852587342262268, |
|
"rewards/margins": 0.014579012989997864, |
|
"rewards/rejected": -0.15310488641262054, |
|
"sft_loss": 1.385258674621582, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6934874416537008, |
|
"grad_norm": 2.5713703632354736, |
|
"learning_rate": 4.368660571288192e-06, |
|
"logits/chosen": 249.9259490966797, |
|
"logits/rejected": 250.7145538330078, |
|
"logps/chosen": -1.3335117101669312, |
|
"logps/rejected": -1.4699729681015015, |
|
"loss": 1.404, |
|
"odds_ratio_loss": 0.7047192454338074, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1333511769771576, |
|
"rewards/margins": 0.013646131381392479, |
|
"rewards/rejected": -0.14699730277061462, |
|
"sft_loss": 1.3335117101669312, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7112691709268726, |
|
"grad_norm": 1.9281286001205444, |
|
"learning_rate": 4.337390565595163e-06, |
|
"logits/chosen": 251.0037384033203, |
|
"logits/rejected": 250.80783081054688, |
|
"logps/chosen": -1.4610103368759155, |
|
"logps/rejected": -1.5656871795654297, |
|
"loss": 1.5334, |
|
"odds_ratio_loss": 0.7237542271614075, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.14610104262828827, |
|
"rewards/margins": 0.010467682033777237, |
|
"rewards/rejected": -0.1565687209367752, |
|
"sft_loss": 1.4610103368759155, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7290509002000445, |
|
"grad_norm": 2.2883145809173584, |
|
"learning_rate": 4.305482553496786e-06, |
|
"logits/chosen": 249.4977569580078, |
|
"logits/rejected": 249.899658203125, |
|
"logps/chosen": -1.3607113361358643, |
|
"logps/rejected": -1.5497020483016968, |
|
"loss": 1.4299, |
|
"odds_ratio_loss": 0.692295491695404, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1360711306333542, |
|
"rewards/margins": 0.018899066373705864, |
|
"rewards/rejected": -0.1549702137708664, |
|
"sft_loss": 1.3607113361358643, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7468326294732163, |
|
"grad_norm": 1.6453748941421509, |
|
"learning_rate": 4.272947614573244e-06, |
|
"logits/chosen": 251.3318328857422, |
|
"logits/rejected": 251.18276977539062, |
|
"logps/chosen": -1.3767774105072021, |
|
"logps/rejected": -1.5617191791534424, |
|
"loss": 1.4431, |
|
"odds_ratio_loss": 0.662979006767273, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.13767775893211365, |
|
"rewards/margins": 0.01849415898323059, |
|
"rewards/rejected": -0.15617190301418304, |
|
"sft_loss": 1.3767774105072021, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7646143587463881, |
|
"grad_norm": 1.49653959274292, |
|
"learning_rate": 4.23979704609569e-06, |
|
"logits/chosen": 251.7825469970703, |
|
"logits/rejected": 251.8596954345703, |
|
"logps/chosen": -1.360251545906067, |
|
"logps/rejected": -1.4845999479293823, |
|
"loss": 1.43, |
|
"odds_ratio_loss": 0.6971336603164673, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.13602514564990997, |
|
"rewards/margins": 0.012434848584234715, |
|
"rewards/rejected": -0.1484600007534027, |
|
"sft_loss": 1.360251545906067, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.78239608801956, |
|
"grad_norm": 2.6121129989624023, |
|
"learning_rate": 4.206042359103435e-06, |
|
"logits/chosen": 249.96774291992188, |
|
"logits/rejected": 249.6472930908203, |
|
"logps/chosen": -1.451643705368042, |
|
"logps/rejected": -1.6453460454940796, |
|
"loss": 1.5216, |
|
"odds_ratio_loss": 0.6996283531188965, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.145164355635643, |
|
"rewards/margins": 0.01937025412917137, |
|
"rewards/rejected": -0.16453461349010468, |
|
"sft_loss": 1.451643705368042, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8001778172927317, |
|
"grad_norm": 4.463571548461914, |
|
"learning_rate": 4.17169527440691e-06, |
|
"logits/chosen": 249.04443359375, |
|
"logits/rejected": 249.074951171875, |
|
"logps/chosen": -1.3891347646713257, |
|
"logps/rejected": -1.5458732843399048, |
|
"loss": 1.4601, |
|
"odds_ratio_loss": 0.7098666429519653, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13891348242759705, |
|
"rewards/margins": 0.015673857182264328, |
|
"rewards/rejected": -0.15458734333515167, |
|
"sft_loss": 1.3891347646713257, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8179595465659035, |
|
"grad_norm": 3.0045888423919678, |
|
"learning_rate": 4.136767718517797e-06, |
|
"logits/chosen": 250.2001495361328, |
|
"logits/rejected": 250.69189453125, |
|
"logps/chosen": -1.2655476331710815, |
|
"logps/rejected": -1.5482288599014282, |
|
"loss": 1.3289, |
|
"odds_ratio_loss": 0.6339131593704224, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12655475735664368, |
|
"rewards/margins": 0.02826813980937004, |
|
"rewards/rejected": -0.15482288599014282, |
|
"sft_loss": 1.2655476331710815, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8357412758390753, |
|
"grad_norm": 5.639819145202637, |
|
"learning_rate": 4.1012718195077196e-06, |
|
"logits/chosen": 250.90878295898438, |
|
"logits/rejected": 251.2098388671875, |
|
"logps/chosen": -1.3541600704193115, |
|
"logps/rejected": -1.4638829231262207, |
|
"loss": 1.4238, |
|
"odds_ratio_loss": 0.6962955594062805, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.13541600108146667, |
|
"rewards/margins": 0.01097229402512312, |
|
"rewards/rejected": -0.14638829231262207, |
|
"sft_loss": 1.3541600704193115, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8535230051122472, |
|
"grad_norm": 2.0918374061584473, |
|
"learning_rate": 4.065219902796953e-06, |
|
"logits/chosen": 250.6866455078125, |
|
"logits/rejected": 250.68875122070312, |
|
"logps/chosen": -1.3266746997833252, |
|
"logps/rejected": -1.5351797342300415, |
|
"loss": 1.3953, |
|
"odds_ratio_loss": 0.6859580278396606, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.13266746699810028, |
|
"rewards/margins": 0.02085050381720066, |
|
"rewards/rejected": -0.1535179764032364, |
|
"sft_loss": 1.3266746997833252, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.871304734385419, |
|
"grad_norm": 1.7283604145050049, |
|
"learning_rate": 4.028624486874608e-06, |
|
"logits/chosen": 250.7782745361328, |
|
"logits/rejected": 250.49868774414062, |
|
"logps/chosen": -1.3391790390014648, |
|
"logps/rejected": -1.5556788444519043, |
|
"loss": 1.4069, |
|
"odds_ratio_loss": 0.6768004894256592, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.133917897939682, |
|
"rewards/margins": 0.021649986505508423, |
|
"rewards/rejected": -0.15556788444519043, |
|
"sft_loss": 1.3391790390014648, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8890864636585908, |
|
"grad_norm": 1.839385747909546, |
|
"learning_rate": 3.99149827895177e-06, |
|
"logits/chosen": 251.3027801513672, |
|
"logits/rejected": 250.90676879882812, |
|
"logps/chosen": -1.4337760210037231, |
|
"logps/rejected": -1.565932273864746, |
|
"loss": 1.5041, |
|
"odds_ratio_loss": 0.7032449841499329, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.14337760210037231, |
|
"rewards/margins": 0.01321563869714737, |
|
"rewards/rejected": -0.1565932184457779, |
|
"sft_loss": 1.4337760210037231, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8890864636585908, |
|
"eval_logits/chosen": 250.64625549316406, |
|
"eval_logits/rejected": 250.75489807128906, |
|
"eval_logps/chosen": -1.352203369140625, |
|
"eval_logps/rejected": -1.5643647909164429, |
|
"eval_loss": 1.418463945388794, |
|
"eval_odds_ratio_loss": 0.6626060605049133, |
|
"eval_rewards/accuracies": 0.5529999732971191, |
|
"eval_rewards/chosen": -0.13522033393383026, |
|
"eval_rewards/margins": 0.021216128021478653, |
|
"eval_rewards/rejected": -0.15643645823001862, |
|
"eval_runtime": 217.284, |
|
"eval_samples_per_second": 4.602, |
|
"eval_sft_loss": 1.352203369140625, |
|
"eval_steps_per_second": 2.301, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9068681929317626, |
|
"grad_norm": 4.217110633850098, |
|
"learning_rate": 3.953854170549114e-06, |
|
"logits/chosen": 251.8994598388672, |
|
"logits/rejected": 251.8975067138672, |
|
"logps/chosen": -1.34084153175354, |
|
"logps/rejected": -1.4915145635604858, |
|
"loss": 1.4078, |
|
"odds_ratio_loss": 0.6700819730758667, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.13408413529396057, |
|
"rewards/margins": 0.015067322179675102, |
|
"rewards/rejected": -0.14915145933628082, |
|
"sft_loss": 1.34084153175354, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9246499222049345, |
|
"grad_norm": 2.6186540126800537, |
|
"learning_rate": 3.91570523302051e-06, |
|
"logits/chosen": 250.59036254882812, |
|
"logits/rejected": 250.75003051757812, |
|
"logps/chosen": -1.3541263341903687, |
|
"logps/rejected": -1.4764378070831299, |
|
"loss": 1.427, |
|
"odds_ratio_loss": 0.728742241859436, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13541266322135925, |
|
"rewards/margins": 0.012231158092617989, |
|
"rewards/rejected": -0.1476438045501709, |
|
"sft_loss": 1.3541263341903687, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9424316514781063, |
|
"grad_norm": 2.1178462505340576, |
|
"learning_rate": 3.8770647130141996e-06, |
|
"logits/chosen": 250.88150024414062, |
|
"logits/rejected": 251.63052368164062, |
|
"logps/chosen": -1.3341526985168457, |
|
"logps/rejected": -1.5821571350097656, |
|
"loss": 1.4029, |
|
"odds_ratio_loss": 0.6879525780677795, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13341526687145233, |
|
"rewards/margins": 0.02480044774711132, |
|
"rewards/rejected": -0.1582157164812088, |
|
"sft_loss": 1.3341526985168457, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.960213380751278, |
|
"grad_norm": 2.5758562088012695, |
|
"learning_rate": 3.837946027873086e-06, |
|
"logits/chosen": 249.52285766601562, |
|
"logits/rejected": 250.219970703125, |
|
"logps/chosen": -1.4206160306930542, |
|
"logps/rejected": -1.6295530796051025, |
|
"loss": 1.4917, |
|
"odds_ratio_loss": 0.7105556726455688, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.14206160604953766, |
|
"rewards/margins": 0.020893706008791924, |
|
"rewards/rejected": -0.16295531392097473, |
|
"sft_loss": 1.4206160306930542, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9779951100244498, |
|
"grad_norm": 2.2665178775787354, |
|
"learning_rate": 3.7983627609757713e-06, |
|
"logits/chosen": 250.9306640625, |
|
"logits/rejected": 251.22189331054688, |
|
"logps/chosen": -1.3822170495986938, |
|
"logps/rejected": -1.51272714138031, |
|
"loss": 1.4502, |
|
"odds_ratio_loss": 0.679997444152832, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.13822171092033386, |
|
"rewards/margins": 0.01305101066827774, |
|
"rewards/rejected": -0.1512727290391922, |
|
"sft_loss": 1.3822170495986938, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9957768392976217, |
|
"grad_norm": 9.529622077941895, |
|
"learning_rate": 3.758328657019924e-06, |
|
"logits/chosen": 252.7086639404297, |
|
"logits/rejected": 252.86575317382812, |
|
"logps/chosen": -1.3466806411743164, |
|
"logps/rejected": -1.4755653142929077, |
|
"loss": 1.4192, |
|
"odds_ratio_loss": 0.7249861359596252, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13466808199882507, |
|
"rewards/margins": 0.012888476252555847, |
|
"rewards/rejected": -0.14755654335021973, |
|
"sft_loss": 1.3466806411743164, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.0135585685707935, |
|
"grad_norm": 8.696991920471191, |
|
"learning_rate": 3.717857617249642e-06, |
|
"logits/chosen": 250.4936981201172, |
|
"logits/rejected": 250.49435424804688, |
|
"logps/chosen": -1.4108977317810059, |
|
"logps/rejected": -1.591428279876709, |
|
"loss": 1.4822, |
|
"odds_ratio_loss": 0.7131791710853577, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1410897672176361, |
|
"rewards/margins": 0.018053067848086357, |
|
"rewards/rejected": -0.1591428518295288, |
|
"sft_loss": 1.4108977317810059, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.0313402978439654, |
|
"grad_norm": 1.4178659915924072, |
|
"learning_rate": 3.6769636946284543e-06, |
|
"logits/chosen": 252.4293975830078, |
|
"logits/rejected": 252.90542602539062, |
|
"logps/chosen": -1.2866795063018799, |
|
"logps/rejected": -1.4288690090179443, |
|
"loss": 1.3561, |
|
"odds_ratio_loss": 0.694588840007782, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12866798043251038, |
|
"rewards/margins": 0.01421893946826458, |
|
"rewards/rejected": -0.1428869068622589, |
|
"sft_loss": 1.2866795063018799, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.049122027117137, |
|
"grad_norm": 2.9459097385406494, |
|
"learning_rate": 3.6356610889596355e-06, |
|
"logits/chosen": 251.82577514648438, |
|
"logits/rejected": 252.0190887451172, |
|
"logps/chosen": -1.3364391326904297, |
|
"logps/rejected": -1.4779332876205444, |
|
"loss": 1.4062, |
|
"odds_ratio_loss": 0.6974235773086548, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.13364391028881073, |
|
"rewards/margins": 0.014149405062198639, |
|
"rewards/rejected": -0.14779332280158997, |
|
"sft_loss": 1.3364391326904297, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.066903756390309, |
|
"grad_norm": 2.037022590637207, |
|
"learning_rate": 3.593964141955541e-06, |
|
"logits/chosen": 251.60153198242188, |
|
"logits/rejected": 252.28738403320312, |
|
"logps/chosen": -1.3261809349060059, |
|
"logps/rejected": -1.4387245178222656, |
|
"loss": 1.3975, |
|
"odds_ratio_loss": 0.7132095098495483, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.13261809945106506, |
|
"rewards/margins": 0.011254330165684223, |
|
"rewards/rejected": -0.1438724398612976, |
|
"sft_loss": 1.3261809349060059, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0846854856634809, |
|
"grad_norm": 1.648507833480835, |
|
"learning_rate": 3.5518873322576573e-06, |
|
"logits/chosen": 251.1070556640625, |
|
"logits/rejected": 251.15121459960938, |
|
"logps/chosen": -1.3120733499526978, |
|
"logps/rejected": -1.5062081813812256, |
|
"loss": 1.3823, |
|
"odds_ratio_loss": 0.7022742033004761, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13120731711387634, |
|
"rewards/margins": 0.019413486123085022, |
|
"rewards/rejected": -0.15062081813812256, |
|
"sft_loss": 1.3120733499526978, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.1024672149366526, |
|
"grad_norm": 1.7841641902923584, |
|
"learning_rate": 3.5094452704091143e-06, |
|
"logits/chosen": 253.28305053710938, |
|
"logits/rejected": 253.123046875, |
|
"logps/chosen": -1.3503539562225342, |
|
"logps/rejected": -1.5104092359542847, |
|
"loss": 1.4195, |
|
"odds_ratio_loss": 0.6916577219963074, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13503538072109222, |
|
"rewards/margins": 0.016005536541342735, |
|
"rewards/rejected": -0.1510409265756607, |
|
"sft_loss": 1.3503539562225342, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.1202489442098245, |
|
"grad_norm": 4.146026611328125, |
|
"learning_rate": 3.46665269378139e-06, |
|
"logits/chosen": 251.23709106445312, |
|
"logits/rejected": 251.81686401367188, |
|
"logps/chosen": -1.3730881214141846, |
|
"logps/rejected": -1.4982974529266357, |
|
"loss": 1.4452, |
|
"odds_ratio_loss": 0.7208770513534546, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.13730882108211517, |
|
"rewards/margins": 0.012520933523774147, |
|
"rewards/rejected": -0.14982974529266357, |
|
"sft_loss": 1.3730881214141846, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.1380306734829961, |
|
"grad_norm": 3.3884165287017822, |
|
"learning_rate": 3.4235244614569794e-06, |
|
"logits/chosen": 250.8208770751953, |
|
"logits/rejected": 251.1712646484375, |
|
"logps/chosen": -1.3837242126464844, |
|
"logps/rejected": -1.5583369731903076, |
|
"loss": 1.4557, |
|
"odds_ratio_loss": 0.7200697660446167, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13837242126464844, |
|
"rewards/margins": 0.017461290583014488, |
|
"rewards/rejected": -0.15583372116088867, |
|
"sft_loss": 1.3837242126464844, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.155812402756168, |
|
"grad_norm": 1.377685546875, |
|
"learning_rate": 3.3800755490698008e-06, |
|
"logits/chosen": 252.3297576904297, |
|
"logits/rejected": 252.5504150390625, |
|
"logps/chosen": -1.310136079788208, |
|
"logps/rejected": -1.5744563341140747, |
|
"loss": 1.3736, |
|
"odds_ratio_loss": 0.6345242857933044, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13101361691951752, |
|
"rewards/margins": 0.026432007551193237, |
|
"rewards/rejected": -0.15744563937187195, |
|
"sft_loss": 1.310136079788208, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.17359413202934, |
|
"grad_norm": 2.116525411605835, |
|
"learning_rate": 3.3363210436051287e-06, |
|
"logits/chosen": 252.10305786132812, |
|
"logits/rejected": 252.2167205810547, |
|
"logps/chosen": -1.348901629447937, |
|
"logps/rejected": -1.5241731405258179, |
|
"loss": 1.4188, |
|
"odds_ratio_loss": 0.6987627148628235, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13489016890525818, |
|
"rewards/margins": 0.017527148127555847, |
|
"rewards/rejected": -0.15241730213165283, |
|
"sft_loss": 1.348901629447937, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1913758613025116, |
|
"grad_norm": 1.8333208560943604, |
|
"learning_rate": 3.292276138160867e-06, |
|
"logits/chosen": 251.9469757080078, |
|
"logits/rejected": 252.2017059326172, |
|
"logps/chosen": -1.331968069076538, |
|
"logps/rejected": -1.551393747329712, |
|
"loss": 1.4003, |
|
"odds_ratio_loss": 0.6828904151916504, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.13319680094718933, |
|
"rewards/margins": 0.021942555904388428, |
|
"rewards/rejected": -0.15513937175273895, |
|
"sft_loss": 1.331968069076538, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.2091575905756835, |
|
"grad_norm": 2.233184576034546, |
|
"learning_rate": 3.2479561266719694e-06, |
|
"logits/chosen": 252.11386108398438, |
|
"logits/rejected": 252.0362548828125, |
|
"logps/chosen": -1.346163272857666, |
|
"logps/rejected": -1.5309796333312988, |
|
"loss": 1.4146, |
|
"odds_ratio_loss": 0.6838955283164978, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.13461633026599884, |
|
"rewards/margins": 0.01848164014518261, |
|
"rewards/rejected": -0.1530979573726654, |
|
"sft_loss": 1.346163272857666, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.2269393198488552, |
|
"grad_norm": 4.622295379638672, |
|
"learning_rate": 3.2033763985998533e-06, |
|
"logits/chosen": 253.1451416015625, |
|
"logits/rejected": 253.5478973388672, |
|
"logps/chosen": -1.2959039211273193, |
|
"logps/rejected": -1.597389578819275, |
|
"loss": 1.3583, |
|
"odds_ratio_loss": 0.6243518590927124, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.12959036231040955, |
|
"rewards/margins": 0.03014858439564705, |
|
"rewards/rejected": -0.15973897278308868, |
|
"sft_loss": 1.2959039211273193, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.244721049122027, |
|
"grad_norm": 2.6373684406280518, |
|
"learning_rate": 3.1585524335886335e-06, |
|
"logits/chosen": 252.57833862304688, |
|
"logits/rejected": 252.81259155273438, |
|
"logps/chosen": -1.2123761177062988, |
|
"logps/rejected": -1.4513623714447021, |
|
"loss": 1.2775, |
|
"odds_ratio_loss": 0.6515756845474243, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.121237613260746, |
|
"rewards/margins": 0.023898636922240257, |
|
"rewards/rejected": -0.1451362520456314, |
|
"sft_loss": 1.2123761177062988, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.262502778395199, |
|
"grad_norm": 2.7742574214935303, |
|
"learning_rate": 3.1134997960900536e-06, |
|
"logits/chosen": 250.22891235351562, |
|
"logits/rejected": 251.8517608642578, |
|
"logps/chosen": -1.2099064588546753, |
|
"logps/rejected": -1.5609335899353027, |
|
"loss": 1.2689, |
|
"odds_ratio_loss": 0.5895074009895325, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.12099063396453857, |
|
"rewards/margins": 0.0351027250289917, |
|
"rewards/rejected": -0.15609335899353027, |
|
"sft_loss": 1.2099064588546753, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2802845076683709, |
|
"grad_norm": 1.7436786890029907, |
|
"learning_rate": 3.0682341299589583e-06, |
|
"logits/chosen": 252.24365234375, |
|
"logits/rejected": 252.2627410888672, |
|
"logps/chosen": -1.2945445775985718, |
|
"logps/rejected": -1.419683814048767, |
|
"loss": 1.3646, |
|
"odds_ratio_loss": 0.7010321617126465, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.12945446372032166, |
|
"rewards/margins": 0.012513910420238972, |
|
"rewards/rejected": -0.14196836948394775, |
|
"sft_loss": 1.2945445775985718, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.2980662369415426, |
|
"grad_norm": 1.6442652940750122, |
|
"learning_rate": 3.022771153021201e-06, |
|
"logits/chosen": 252.22427368164062, |
|
"logits/rejected": 252.268310546875, |
|
"logps/chosen": -1.2626879215240479, |
|
"logps/rejected": -1.530016541481018, |
|
"loss": 1.3271, |
|
"odds_ratio_loss": 0.644324779510498, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.12626877427101135, |
|
"rewards/margins": 0.026732871308922768, |
|
"rewards/rejected": -0.15300165116786957, |
|
"sft_loss": 1.2626879215240479, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.3158479662147144, |
|
"grad_norm": 2.3948917388916016, |
|
"learning_rate": 2.9771266516158625e-06, |
|
"logits/chosen": 252.8442840576172, |
|
"logits/rejected": 253.3193359375, |
|
"logps/chosen": -1.2903095483779907, |
|
"logps/rejected": -1.4887399673461914, |
|
"loss": 1.3586, |
|
"odds_ratio_loss": 0.682421088218689, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.12903094291687012, |
|
"rewards/margins": 0.019843045622110367, |
|
"rewards/rejected": -0.14887399971485138, |
|
"sft_loss": 1.2903095483779907, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.3336296954878861, |
|
"grad_norm": 1.7583057880401611, |
|
"learning_rate": 2.9313164751136802e-06, |
|
"logits/chosen": 251.74462890625, |
|
"logits/rejected": 252.4144287109375, |
|
"logps/chosen": -1.3303029537200928, |
|
"logps/rejected": -1.4901565313339233, |
|
"loss": 1.3975, |
|
"odds_ratio_loss": 0.6716746091842651, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13303029537200928, |
|
"rewards/margins": 0.015985365957021713, |
|
"rewards/rejected": -0.1490156650543213, |
|
"sft_loss": 1.3303029537200928, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.351411424761058, |
|
"grad_norm": 2.620116710662842, |
|
"learning_rate": 2.8853565304135956e-06, |
|
"logits/chosen": 254.57931518554688, |
|
"logits/rejected": 254.56857299804688, |
|
"logps/chosen": -1.3150755167007446, |
|
"logps/rejected": -1.3985707759857178, |
|
"loss": 1.3882, |
|
"odds_ratio_loss": 0.730746865272522, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.13150756061077118, |
|
"rewards/margins": 0.008349532261490822, |
|
"rewards/rejected": -0.13985709846019745, |
|
"sft_loss": 1.3150755167007446, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.36919315403423, |
|
"grad_norm": 6.955394744873047, |
|
"learning_rate": 2.839262776419313e-06, |
|
"logits/chosen": 253.0518035888672, |
|
"logits/rejected": 253.3127899169922, |
|
"logps/chosen": -1.2819963693618774, |
|
"logps/rejected": -1.5402740240097046, |
|
"loss": 1.346, |
|
"odds_ratio_loss": 0.6400705575942993, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.12819963693618774, |
|
"rewards/margins": 0.02582777664065361, |
|
"rewards/rejected": -0.15402741730213165, |
|
"sft_loss": 1.2819963693618774, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3869748833074016, |
|
"grad_norm": 2.157224416732788, |
|
"learning_rate": 2.793051218497817e-06, |
|
"logits/chosen": 253.73080444335938, |
|
"logits/rejected": 253.3892822265625, |
|
"logps/chosen": -1.324377179145813, |
|
"logps/rejected": -1.411966323852539, |
|
"loss": 1.3963, |
|
"odds_ratio_loss": 0.7193102240562439, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.13243773579597473, |
|
"rewards/margins": 0.00875892210751772, |
|
"rewards/rejected": -0.14119663834571838, |
|
"sft_loss": 1.324377179145813, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.4047566125805735, |
|
"grad_norm": 1.813340663909912, |
|
"learning_rate": 2.7467379029217437e-06, |
|
"logits/chosen": 253.36312866210938, |
|
"logits/rejected": 254.00588989257812, |
|
"logps/chosen": -1.2761633396148682, |
|
"logps/rejected": -1.5060415267944336, |
|
"loss": 1.343, |
|
"odds_ratio_loss": 0.6682227849960327, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12761631608009338, |
|
"rewards/margins": 0.022987816482782364, |
|
"rewards/rejected": -0.15060414373874664, |
|
"sft_loss": 1.2761633396148682, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.4225383418537452, |
|
"grad_norm": 1.6623824834823608, |
|
"learning_rate": 2.7003389112975546e-06, |
|
"logits/chosen": 253.5782470703125, |
|
"logits/rejected": 253.32089233398438, |
|
"logps/chosen": -1.3095474243164062, |
|
"logps/rejected": -1.491077184677124, |
|
"loss": 1.3757, |
|
"odds_ratio_loss": 0.6613184213638306, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.13095474243164062, |
|
"rewards/margins": 0.018152965232729912, |
|
"rewards/rejected": -0.14910772442817688, |
|
"sft_loss": 1.3095474243164062, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.440320071126917, |
|
"grad_norm": 5.644952774047852, |
|
"learning_rate": 2.653870354981437e-06, |
|
"logits/chosen": 252.7183380126953, |
|
"logits/rejected": 253.2029571533203, |
|
"logps/chosen": -1.2087516784667969, |
|
"logps/rejected": -1.411168098449707, |
|
"loss": 1.2748, |
|
"odds_ratio_loss": 0.6601918935775757, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.12087517976760864, |
|
"rewards/margins": 0.020241642370820045, |
|
"rewards/rejected": -0.14111682772636414, |
|
"sft_loss": 1.2087516784667969, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.458101800400089, |
|
"grad_norm": 4.477853298187256, |
|
"learning_rate": 2.6073483694848777e-06, |
|
"logits/chosen": 253.3741912841797, |
|
"logits/rejected": 253.51382446289062, |
|
"logps/chosen": -1.2232033014297485, |
|
"logps/rejected": -1.4460740089416504, |
|
"loss": 1.2895, |
|
"odds_ratio_loss": 0.663042426109314, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.12232033163309097, |
|
"rewards/margins": 0.02228708192706108, |
|
"rewards/rejected": -0.14460742473602295, |
|
"sft_loss": 1.2232033014297485, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.4758835296732609, |
|
"grad_norm": 1.6548304557800293, |
|
"learning_rate": 2.560789108871847e-06, |
|
"logits/chosen": 253.0503692626953, |
|
"logits/rejected": 252.9963836669922, |
|
"logps/chosen": -1.3039335012435913, |
|
"logps/rejected": -1.57505202293396, |
|
"loss": 1.3696, |
|
"odds_ratio_loss": 0.6571396589279175, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1303933560848236, |
|
"rewards/margins": 0.027111848816275597, |
|
"rewards/rejected": -0.15750519931316376, |
|
"sft_loss": 1.3039335012435913, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4936652589464325, |
|
"grad_norm": 5.2365007400512695, |
|
"learning_rate": 2.514208740149544e-06, |
|
"logits/chosen": 253.81640625, |
|
"logits/rejected": 253.57717895507812, |
|
"logps/chosen": -1.3866493701934814, |
|
"logps/rejected": -1.5284301042556763, |
|
"loss": 1.4596, |
|
"odds_ratio_loss": 0.7293277978897095, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.13866494596004486, |
|
"rewards/margins": 0.014178059995174408, |
|
"rewards/rejected": -0.15284302830696106, |
|
"sft_loss": 1.3866493701934814, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.5114469882196042, |
|
"grad_norm": 2.916973114013672, |
|
"learning_rate": 2.46762343765464e-06, |
|
"logits/chosen": 254.6901397705078, |
|
"logits/rejected": 254.9998016357422, |
|
"logps/chosen": -1.3546521663665771, |
|
"logps/rejected": -1.587242841720581, |
|
"loss": 1.4197, |
|
"odds_ratio_loss": 0.6507903933525085, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13546521961688995, |
|
"rewards/margins": 0.02325906977057457, |
|
"rewards/rejected": -0.15872427821159363, |
|
"sft_loss": 1.3546521663665771, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.5292287174927761, |
|
"grad_norm": 2.888098955154419, |
|
"learning_rate": 2.4210493774369903e-06, |
|
"logits/chosen": 253.61563110351562, |
|
"logits/rejected": 253.7008056640625, |
|
"logps/chosen": -1.3398011922836304, |
|
"logps/rejected": -1.5939433574676514, |
|
"loss": 1.4092, |
|
"odds_ratio_loss": 0.6944610476493835, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13398012518882751, |
|
"rewards/margins": 0.02541421353816986, |
|
"rewards/rejected": -0.15939433872699738, |
|
"sft_loss": 1.3398011922836304, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.547010446765948, |
|
"grad_norm": 1.7133105993270874, |
|
"learning_rate": 2.374502731642732e-06, |
|
"logits/chosen": 253.13046264648438, |
|
"logits/rejected": 253.28274536132812, |
|
"logps/chosen": -1.2972921133041382, |
|
"logps/rejected": -1.5040593147277832, |
|
"loss": 1.3657, |
|
"odds_ratio_loss": 0.6844374537467957, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.12972919642925262, |
|
"rewards/margins": 0.02067672833800316, |
|
"rewards/rejected": -0.15040592849254608, |
|
"sft_loss": 1.2972921133041382, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.56479217603912, |
|
"grad_norm": 1.869072437286377, |
|
"learning_rate": 2.3279996628987556e-06, |
|
"logits/chosen": 253.9283447265625, |
|
"logits/rejected": 254.41073608398438, |
|
"logps/chosen": -1.2677370309829712, |
|
"logps/rejected": -1.4599510431289673, |
|
"loss": 1.3369, |
|
"odds_ratio_loss": 0.6917437314987183, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1267736852169037, |
|
"rewards/margins": 0.01922140084207058, |
|
"rewards/rejected": -0.1459950953722, |
|
"sft_loss": 1.2677370309829712, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.5825739053122916, |
|
"grad_norm": 2.387537717819214, |
|
"learning_rate": 2.281556318700474e-06, |
|
"logits/chosen": 253.6074676513672, |
|
"logits/rejected": 253.7229461669922, |
|
"logps/chosen": -1.230033278465271, |
|
"logps/rejected": -1.369767189025879, |
|
"loss": 1.3007, |
|
"odds_ratio_loss": 0.7063087224960327, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12300334125757217, |
|
"rewards/margins": 0.013973374851047993, |
|
"rewards/rejected": -0.1369767189025879, |
|
"sft_loss": 1.230033278465271, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.6003556345854635, |
|
"grad_norm": 7.490704536437988, |
|
"learning_rate": 2.2351888258048408e-06, |
|
"logits/chosen": 251.65274047851562, |
|
"logits/rejected": 251.5640106201172, |
|
"logps/chosen": -1.2508928775787354, |
|
"logps/rejected": -1.4577651023864746, |
|
"loss": 1.3169, |
|
"odds_ratio_loss": 0.6601670980453491, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12508928775787354, |
|
"rewards/margins": 0.020687231793999672, |
|
"rewards/rejected": -0.14577652513980865, |
|
"sft_loss": 1.2508928775787354, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6181373638586352, |
|
"grad_norm": 1.7122284173965454, |
|
"learning_rate": 2.188913284630584e-06, |
|
"logits/chosen": 253.48501586914062, |
|
"logits/rejected": 253.5773468017578, |
|
"logps/chosen": -1.3153185844421387, |
|
"logps/rejected": -1.4097468852996826, |
|
"loss": 1.3867, |
|
"odds_ratio_loss": 0.7141562700271606, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13153186440467834, |
|
"rewards/margins": 0.009442826732993126, |
|
"rewards/rejected": -0.1409747153520584, |
|
"sft_loss": 1.3153185844421387, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.635919093131807, |
|
"grad_norm": 3.5988399982452393, |
|
"learning_rate": 2.1427457636675652e-06, |
|
"logits/chosen": 254.33633422851562, |
|
"logits/rejected": 253.96981811523438, |
|
"logps/chosen": -1.2676795721054077, |
|
"logps/rejected": -1.4219448566436768, |
|
"loss": 1.3387, |
|
"odds_ratio_loss": 0.7098127007484436, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12676796317100525, |
|
"rewards/margins": 0.015426510944962502, |
|
"rewards/rejected": -0.1421944797039032, |
|
"sft_loss": 1.2676795721054077, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.653700822404979, |
|
"grad_norm": 1.8244489431381226, |
|
"learning_rate": 2.096702293897247e-06, |
|
"logits/chosen": 254.08718872070312, |
|
"logits/rejected": 254.5830535888672, |
|
"logps/chosen": -1.283644437789917, |
|
"logps/rejected": -1.5567032098770142, |
|
"loss": 1.35, |
|
"odds_ratio_loss": 0.6635793447494507, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1283644437789917, |
|
"rewards/margins": 0.027305880561470985, |
|
"rewards/rejected": -0.15567031502723694, |
|
"sft_loss": 1.283644437789917, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.6714825516781509, |
|
"grad_norm": 3.096039056777954, |
|
"learning_rate": 2.0507988632261672e-06, |
|
"logits/chosen": 252.3622589111328, |
|
"logits/rejected": 252.93716430664062, |
|
"logps/chosen": -1.2625596523284912, |
|
"logps/rejected": -1.5456993579864502, |
|
"loss": 1.3258, |
|
"odds_ratio_loss": 0.6320444345474243, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12625595927238464, |
|
"rewards/margins": 0.028313983231782913, |
|
"rewards/rejected": -0.15456993877887726, |
|
"sft_loss": 1.2625596523284912, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.6892642809513225, |
|
"grad_norm": 3.4061944484710693, |
|
"learning_rate": 2.005051410934382e-06, |
|
"logits/chosen": 253.2592010498047, |
|
"logits/rejected": 253.57846069335938, |
|
"logps/chosen": -1.3725305795669556, |
|
"logps/rejected": -1.5077531337738037, |
|
"loss": 1.4437, |
|
"odds_ratio_loss": 0.7114490866661072, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.137253075838089, |
|
"rewards/margins": 0.013522250577807426, |
|
"rewards/rejected": -0.15077531337738037, |
|
"sft_loss": 1.3725305795669556, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.7070460102244942, |
|
"grad_norm": 2.6905410289764404, |
|
"learning_rate": 1.9594758221407843e-06, |
|
"logits/chosen": 254.19900512695312, |
|
"logits/rejected": 254.83987426757812, |
|
"logps/chosen": -1.2452795505523682, |
|
"logps/rejected": -1.5318635702133179, |
|
"loss": 1.3081, |
|
"odds_ratio_loss": 0.6286849975585938, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1245279535651207, |
|
"rewards/margins": 0.02865840494632721, |
|
"rewards/rejected": -0.1531863659620285, |
|
"sft_loss": 1.2452795505523682, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.724827739497666, |
|
"grad_norm": 2.9211478233337402, |
|
"learning_rate": 1.9140879222872408e-06, |
|
"logits/chosen": 254.0661163330078, |
|
"logits/rejected": 254.55819702148438, |
|
"logps/chosen": -1.274641752243042, |
|
"logps/rejected": -1.3871382474899292, |
|
"loss": 1.3478, |
|
"odds_ratio_loss": 0.7320677042007446, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1274641752243042, |
|
"rewards/margins": 0.011249655857682228, |
|
"rewards/rejected": -0.13871383666992188, |
|
"sft_loss": 1.274641752243042, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.742609468770838, |
|
"grad_norm": 2.0924360752105713, |
|
"learning_rate": 1.8689034716434346e-06, |
|
"logits/chosen": 253.67953491210938, |
|
"logits/rejected": 254.02993774414062, |
|
"logps/chosen": -1.334486722946167, |
|
"logps/rejected": -1.4547626972198486, |
|
"loss": 1.4054, |
|
"odds_ratio_loss": 0.7087211012840271, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13344867527484894, |
|
"rewards/margins": 0.01202760823071003, |
|
"rewards/rejected": -0.14547628164291382, |
|
"sft_loss": 1.334486722946167, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.76039119804401, |
|
"grad_norm": 2.326310634613037, |
|
"learning_rate": 1.8239381598343576e-06, |
|
"logits/chosen": 255.02243041992188, |
|
"logits/rejected": 255.69680786132812, |
|
"logps/chosen": -1.28658127784729, |
|
"logps/rejected": -1.4356472492218018, |
|
"loss": 1.3565, |
|
"odds_ratio_loss": 0.6996346712112427, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.12865814566612244, |
|
"rewards/margins": 0.014906583353877068, |
|
"rewards/rejected": -0.14356473088264465, |
|
"sft_loss": 1.28658127784729, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.7781729273171816, |
|
"grad_norm": 5.255044460296631, |
|
"learning_rate": 1.779207600392312e-06, |
|
"logits/chosen": 253.11575317382812, |
|
"logits/rejected": 253.3959197998047, |
|
"logps/chosen": -1.3602896928787231, |
|
"logps/rejected": -1.5267302989959717, |
|
"loss": 1.428, |
|
"odds_ratio_loss": 0.6775819063186646, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1360289752483368, |
|
"rewards/margins": 0.016644055023789406, |
|
"rewards/rejected": -0.15267305076122284, |
|
"sft_loss": 1.3602896928787231, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7781729273171816, |
|
"eval_logits/chosen": 253.95806884765625, |
|
"eval_logits/rejected": 254.13504028320312, |
|
"eval_logps/chosen": -1.2936838865280151, |
|
"eval_logps/rejected": -1.5090892314910889, |
|
"eval_loss": 1.3595393896102905, |
|
"eval_odds_ratio_loss": 0.6585569381713867, |
|
"eval_rewards/accuracies": 0.5600000023841858, |
|
"eval_rewards/chosen": -0.1293683797121048, |
|
"eval_rewards/margins": 0.02154054492712021, |
|
"eval_rewards/rejected": -0.1509089171886444, |
|
"eval_runtime": 217.2988, |
|
"eval_samples_per_second": 4.602, |
|
"eval_sft_loss": 1.2936838865280151, |
|
"eval_steps_per_second": 2.301, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7959546565903532, |
|
"grad_norm": 2.5635573863983154, |
|
"learning_rate": 1.7347273253353552e-06, |
|
"logits/chosen": 254.41915893554688, |
|
"logits/rejected": 254.5049285888672, |
|
"logps/chosen": -1.2720413208007812, |
|
"logps/rejected": -1.4322283267974854, |
|
"loss": 1.3422, |
|
"odds_ratio_loss": 0.7016333341598511, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12720413506031036, |
|
"rewards/margins": 0.01601869985461235, |
|
"rewards/rejected": -0.14322282373905182, |
|
"sft_loss": 1.2720413208007812, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.8137363858635251, |
|
"grad_norm": 5.554447650909424, |
|
"learning_rate": 1.690512779774029e-06, |
|
"logits/chosen": 253.4269561767578, |
|
"logits/rejected": 253.6225128173828, |
|
"logps/chosen": -1.3230665922164917, |
|
"logps/rejected": -1.678013563156128, |
|
"loss": 1.3849, |
|
"odds_ratio_loss": 0.6180821657180786, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.13230668008327484, |
|
"rewards/margins": 0.035494692623615265, |
|
"rewards/rejected": -0.16780135035514832, |
|
"sft_loss": 1.3230665922164917, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.831518115136697, |
|
"grad_norm": 2.634599447250366, |
|
"learning_rate": 1.6465793165482838e-06, |
|
"logits/chosen": 253.33926391601562, |
|
"logits/rejected": 253.8477020263672, |
|
"logps/chosen": -1.2152363061904907, |
|
"logps/rejected": -1.465906023979187, |
|
"loss": 1.2779, |
|
"odds_ratio_loss": 0.6263755559921265, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12152364104986191, |
|
"rewards/margins": 0.025066960602998734, |
|
"rewards/rejected": -0.14659060537815094, |
|
"sft_loss": 1.2152363061904907, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.849299844409869, |
|
"grad_norm": 3.051729917526245, |
|
"learning_rate": 1.6029421908964305e-06, |
|
"logits/chosen": 252.26016235351562, |
|
"logits/rejected": 253.5745086669922, |
|
"logps/chosen": -1.2302608489990234, |
|
"logps/rejected": -1.62982976436615, |
|
"loss": 1.2943, |
|
"odds_ratio_loss": 0.6404699683189392, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12302607297897339, |
|
"rewards/margins": 0.03995689004659653, |
|
"rewards/rejected": -0.1629829704761505, |
|
"sft_loss": 1.2302608489990234, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.8670815736830408, |
|
"grad_norm": 5.069088459014893, |
|
"learning_rate": 1.559616555157985e-06, |
|
"logits/chosen": 253.4644775390625, |
|
"logits/rejected": 253.29727172851562, |
|
"logps/chosen": -1.2889888286590576, |
|
"logps/rejected": -1.4968655109405518, |
|
"loss": 1.3589, |
|
"odds_ratio_loss": 0.6988304853439331, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.12889888882637024, |
|
"rewards/margins": 0.020787667483091354, |
|
"rewards/rejected": -0.1496865451335907, |
|
"sft_loss": 1.2889888286590576, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.8848633029562125, |
|
"grad_norm": 2.464268207550049, |
|
"learning_rate": 1.516617453512252e-06, |
|
"logits/chosen": 254.87197875976562, |
|
"logits/rejected": 254.7659454345703, |
|
"logps/chosen": -1.351043462753296, |
|
"logps/rejected": -1.4525654315948486, |
|
"loss": 1.4237, |
|
"odds_ratio_loss": 0.7263092398643494, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13510434329509735, |
|
"rewards/margins": 0.010152206756174564, |
|
"rewards/rejected": -0.14525654911994934, |
|
"sft_loss": 1.351043462753296, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.9026450322293842, |
|
"grad_norm": 10.498387336730957, |
|
"learning_rate": 1.473959816754449e-06, |
|
"logits/chosen": 253.4375457763672, |
|
"logits/rejected": 254.0106201171875, |
|
"logps/chosen": -1.2346054315567017, |
|
"logps/rejected": -1.3691545724868774, |
|
"loss": 1.3067, |
|
"odds_ratio_loss": 0.721213161945343, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.123460553586483, |
|
"rewards/margins": 0.013454906642436981, |
|
"rewards/rejected": -0.13691547513008118, |
|
"sft_loss": 1.2346054315567017, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.920426761502556, |
|
"grad_norm": 4.5994696617126465, |
|
"learning_rate": 1.4316584571112213e-06, |
|
"logits/chosen": 256.06500244140625, |
|
"logits/rejected": 256.2422790527344, |
|
"logps/chosen": -1.3240019083023071, |
|
"logps/rejected": -1.4346423149108887, |
|
"loss": 1.3965, |
|
"odds_ratio_loss": 0.7249402403831482, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.13240018486976624, |
|
"rewards/margins": 0.011064060032367706, |
|
"rewards/rejected": -0.14346425235271454, |
|
"sft_loss": 1.3240019083023071, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.938208490775728, |
|
"grad_norm": 3.5848639011383057, |
|
"learning_rate": 1.389728063097306e-06, |
|
"logits/chosen": 255.7421112060547, |
|
"logits/rejected": 255.6949005126953, |
|
"logps/chosen": -1.3223278522491455, |
|
"logps/rejected": -1.6033687591552734, |
|
"loss": 1.3875, |
|
"odds_ratio_loss": 0.6514922976493835, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.13223278522491455, |
|
"rewards/margins": 0.028104085475206375, |
|
"rewards/rejected": -0.16033688187599182, |
|
"sft_loss": 1.3223278522491455, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.9559902200488999, |
|
"grad_norm": 2.085165500640869, |
|
"learning_rate": 1.348183194415179e-06, |
|
"logits/chosen": 254.6555938720703, |
|
"logits/rejected": 255.1551513671875, |
|
"logps/chosen": -1.2908974885940552, |
|
"logps/rejected": -1.5859901905059814, |
|
"loss": 1.3544, |
|
"odds_ratio_loss": 0.6346372365951538, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.12908974289894104, |
|
"rewards/margins": 0.029509279876947403, |
|
"rewards/rejected": -0.15859903395175934, |
|
"sft_loss": 1.2908974885940552, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.9737719493220716, |
|
"grad_norm": 2.3228931427001953, |
|
"learning_rate": 1.3070382768994015e-06, |
|
"logits/chosen": 254.2451171875, |
|
"logits/rejected": 254.3721160888672, |
|
"logps/chosen": -1.2544920444488525, |
|
"logps/rejected": -1.4442470073699951, |
|
"loss": 1.3195, |
|
"odds_ratio_loss": 0.6503337621688843, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12544921040534973, |
|
"rewards/margins": 0.018975483253598213, |
|
"rewards/rejected": -0.1444246917963028, |
|
"sft_loss": 1.2544920444488525, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.9915536785952432, |
|
"grad_norm": 2.0866916179656982, |
|
"learning_rate": 1.2663075975074746e-06, |
|
"logits/chosen": 253.80899047851562, |
|
"logits/rejected": 254.31460571289062, |
|
"logps/chosen": -1.2685939073562622, |
|
"logps/rejected": -1.4882584810256958, |
|
"loss": 1.3361, |
|
"odds_ratio_loss": 0.6750525236129761, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1268593817949295, |
|
"rewards/margins": 0.021966462954878807, |
|
"rewards/rejected": -0.14882585406303406, |
|
"sft_loss": 1.2685939073562622, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.009335407868415, |
|
"grad_norm": 5.936447620391846, |
|
"learning_rate": 1.2260052993589034e-06, |
|
"logits/chosen": 253.43161010742188, |
|
"logits/rejected": 253.4159698486328, |
|
"logps/chosen": -1.3823201656341553, |
|
"logps/rejected": -1.4824841022491455, |
|
"loss": 1.4548, |
|
"odds_ratio_loss": 0.7252174615859985, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1382320076227188, |
|
"rewards/margins": 0.010016398504376411, |
|
"rewards/rejected": -0.14824840426445007, |
|
"sft_loss": 1.3823201656341553, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.027117137141587, |
|
"grad_norm": 1.858097791671753, |
|
"learning_rate": 1.1861453768242099e-06, |
|
"logits/chosen": 255.3394317626953, |
|
"logits/rejected": 256.12109375, |
|
"logps/chosen": -1.223772644996643, |
|
"logps/rejected": -1.5006752014160156, |
|
"loss": 1.2835, |
|
"odds_ratio_loss": 0.5976046323776245, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.12237726151943207, |
|
"rewards/margins": 0.027690261602401733, |
|
"rewards/rejected": -0.1500675231218338, |
|
"sft_loss": 1.223772644996643, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.044898866414759, |
|
"grad_norm": 9.576284408569336, |
|
"learning_rate": 1.1467416706655982e-06, |
|
"logits/chosen": 255.4516143798828, |
|
"logits/rejected": 255.5124969482422, |
|
"logps/chosen": -1.33961021900177, |
|
"logps/rejected": -1.5424822568893433, |
|
"loss": 1.4101, |
|
"odds_ratio_loss": 0.7047215700149536, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13396099209785461, |
|
"rewards/margins": 0.020287221297621727, |
|
"rewards/rejected": -0.1542482227087021, |
|
"sft_loss": 1.33961021900177, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.062680595687931, |
|
"grad_norm": 2.312715768814087, |
|
"learning_rate": 1.1078078632309559e-06, |
|
"logits/chosen": 255.4163360595703, |
|
"logits/rejected": 256.0242004394531, |
|
"logps/chosen": -1.2566642761230469, |
|
"logps/rejected": -1.4380619525909424, |
|
"loss": 1.3236, |
|
"odds_ratio_loss": 0.6697817444801331, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12566642463207245, |
|
"rewards/margins": 0.018139759078621864, |
|
"rewards/rejected": -0.14380618929862976, |
|
"sft_loss": 1.2566642761230469, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.0804623249611023, |
|
"grad_norm": 2.0991952419281006, |
|
"learning_rate": 1.0693574737028627e-06, |
|
"logits/chosen": 254.43576049804688, |
|
"logits/rejected": 255.13711547851562, |
|
"logps/chosen": -1.2939661741256714, |
|
"logps/rejected": -1.4546544551849365, |
|
"loss": 1.3642, |
|
"odds_ratio_loss": 0.7022632360458374, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12939661741256714, |
|
"rewards/margins": 0.01606883481144905, |
|
"rewards/rejected": -0.14546546339988708, |
|
"sft_loss": 1.2939661741256714, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.098244054234274, |
|
"grad_norm": 4.281792640686035, |
|
"learning_rate": 1.0314038534042586e-06, |
|
"logits/chosen": 254.57168579101562, |
|
"logits/rejected": 255.064453125, |
|
"logps/chosen": -1.1770070791244507, |
|
"logps/rejected": -1.3855504989624023, |
|
"loss": 1.2441, |
|
"odds_ratio_loss": 0.6709107756614685, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1177007183432579, |
|
"rewards/margins": 0.020854344591498375, |
|
"rewards/rejected": -0.13855504989624023, |
|
"sft_loss": 1.1770070791244507, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.116025783507446, |
|
"grad_norm": 1.9617539644241333, |
|
"learning_rate": 9.939601811623946e-07, |
|
"logits/chosen": 253.7810516357422, |
|
"logits/rejected": 254.1466827392578, |
|
"logps/chosen": -1.2411329746246338, |
|
"logps/rejected": -1.4748810529708862, |
|
"loss": 1.3104, |
|
"odds_ratio_loss": 0.692661702632904, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1241132840514183, |
|
"rewards/margins": 0.02337482199072838, |
|
"rewards/rejected": -0.14748811721801758, |
|
"sft_loss": 1.2411329746246338, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.133807512780618, |
|
"grad_norm": 2.653927803039551, |
|
"learning_rate": 9.570394587326825e-07, |
|
"logits/chosen": 253.3402099609375, |
|
"logits/rejected": 253.52310180664062, |
|
"logps/chosen": -1.2719714641571045, |
|
"logps/rejected": -1.571843147277832, |
|
"loss": 1.3354, |
|
"odds_ratio_loss": 0.6337886452674866, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12719716131687164, |
|
"rewards/margins": 0.02998717501759529, |
|
"rewards/rejected": -0.15718431770801544, |
|
"sft_loss": 1.2719714641571045, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.15158924205379, |
|
"grad_norm": 1.7235380411148071, |
|
"learning_rate": 9.206545062840302e-07, |
|
"logits/chosen": 255.1512908935547, |
|
"logits/rejected": 255.9069366455078, |
|
"logps/chosen": -1.2449194192886353, |
|
"logps/rejected": -1.4648315906524658, |
|
"loss": 1.3088, |
|
"odds_ratio_loss": 0.6390582323074341, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12449195235967636, |
|
"rewards/margins": 0.021991217508912086, |
|
"rewards/rejected": -0.1464831531047821, |
|
"sft_loss": 1.2449194192886353, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.1693709713269618, |
|
"grad_norm": 2.0629489421844482, |
|
"learning_rate": 8.848179579472285e-07, |
|
"logits/chosen": 255.0786590576172, |
|
"logits/rejected": 255.07534790039062, |
|
"logps/chosen": -1.2237968444824219, |
|
"logps/rejected": -1.3279929161071777, |
|
"loss": 1.2941, |
|
"odds_ratio_loss": 0.7028827667236328, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12237968295812607, |
|
"rewards/margins": 0.010419609025120735, |
|
"rewards/rejected": -0.13279929757118225, |
|
"sft_loss": 1.2237968444824219, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.1871527006001332, |
|
"grad_norm": 4.134257793426514, |
|
"learning_rate": 8.495422574279403e-07, |
|
"logits/chosen": 253.44869995117188, |
|
"logits/rejected": 253.70126342773438, |
|
"logps/chosen": -1.201407551765442, |
|
"logps/rejected": -1.4721620082855225, |
|
"loss": 1.2647, |
|
"odds_ratio_loss": 0.633193850517273, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.12014075368642807, |
|
"rewards/margins": 0.02707546018064022, |
|
"rewards/rejected": -0.14721623063087463, |
|
"sft_loss": 1.201407551765442, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.204934429873305, |
|
"grad_norm": 2.6145901679992676, |
|
"learning_rate": 8.148396536858063e-07, |
|
"logits/chosen": 255.00973510742188, |
|
"logits/rejected": 255.087646484375, |
|
"logps/chosen": -1.311307668685913, |
|
"logps/rejected": -1.5930455923080444, |
|
"loss": 1.3784, |
|
"odds_ratio_loss": 0.670753538608551, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13113076984882355, |
|
"rewards/margins": 0.028173794969916344, |
|
"rewards/rejected": -0.15930457413196564, |
|
"sft_loss": 1.311307668685913, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.222716159146477, |
|
"grad_norm": 1.5492827892303467, |
|
"learning_rate": 7.807221966811815e-07, |
|
"logits/chosen": 255.0123291015625, |
|
"logits/rejected": 256.08282470703125, |
|
"logps/chosen": -1.222886323928833, |
|
"logps/rejected": -1.411444067955017, |
|
"loss": 1.2915, |
|
"odds_ratio_loss": 0.6862080097198486, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12228864431381226, |
|
"rewards/margins": 0.01885577104985714, |
|
"rewards/rejected": -0.14114442467689514, |
|
"sft_loss": 1.222886323928833, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.240497888419649, |
|
"grad_norm": 1.9974987506866455, |
|
"learning_rate": 7.47201733190962e-07, |
|
"logits/chosen": 254.1110382080078, |
|
"logits/rejected": 253.8502197265625, |
|
"logps/chosen": -1.2319315671920776, |
|
"logps/rejected": -1.3752238750457764, |
|
"loss": 1.2993, |
|
"odds_ratio_loss": 0.6734907627105713, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.12319314479827881, |
|
"rewards/margins": 0.01432921551167965, |
|
"rewards/rejected": -0.1375223696231842, |
|
"sft_loss": 1.2319315671920776, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.258279617692821, |
|
"grad_norm": 2.013181209564209, |
|
"learning_rate": 7.142899026949721e-07, |
|
"logits/chosen": 255.99240112304688, |
|
"logits/rejected": 255.9341278076172, |
|
"logps/chosen": -1.2691768407821655, |
|
"logps/rejected": -1.4378575086593628, |
|
"loss": 1.3351, |
|
"odds_ratio_loss": 0.6596958637237549, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12691769003868103, |
|
"rewards/margins": 0.016868067905306816, |
|
"rewards/rejected": -0.143785759806633, |
|
"sft_loss": 1.2691768407821655, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.2760613469659923, |
|
"grad_norm": 5.814607620239258, |
|
"learning_rate": 6.819981333343273e-07, |
|
"logits/chosen": 253.1020965576172, |
|
"logits/rejected": 253.30661010742188, |
|
"logps/chosen": -1.2039464712142944, |
|
"logps/rejected": -1.4708151817321777, |
|
"loss": 1.2691, |
|
"odds_ratio_loss": 0.6517861485481262, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12039466202259064, |
|
"rewards/margins": 0.026686882600188255, |
|
"rewards/rejected": -0.14708155393600464, |
|
"sft_loss": 1.2039464712142944, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.293843076239164, |
|
"grad_norm": 2.745338201522827, |
|
"learning_rate": 6.503376379431839e-07, |
|
"logits/chosen": 253.5093536376953, |
|
"logits/rejected": 253.670654296875, |
|
"logps/chosen": -1.3510358333587646, |
|
"logps/rejected": -1.4214909076690674, |
|
"loss": 1.423, |
|
"odds_ratio_loss": 0.7199310660362244, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.13510356843471527, |
|
"rewards/margins": 0.007045503705739975, |
|
"rewards/rejected": -0.14214907586574554, |
|
"sft_loss": 1.3510358333587646, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.311624805512336, |
|
"grad_norm": 2.5681943893432617, |
|
"learning_rate": 6.193194101552502e-07, |
|
"logits/chosen": 256.16900634765625, |
|
"logits/rejected": 256.3623962402344, |
|
"logps/chosen": -1.24690580368042, |
|
"logps/rejected": -1.4972703456878662, |
|
"loss": 1.3081, |
|
"odds_ratio_loss": 0.6117135286331177, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.12469057738780975, |
|
"rewards/margins": 0.02503645047545433, |
|
"rewards/rejected": -0.14972704648971558, |
|
"sft_loss": 1.24690580368042, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.329406534785508, |
|
"grad_norm": 3.2992701530456543, |
|
"learning_rate": 5.889542205864083e-07, |
|
"logits/chosen": 254.740966796875, |
|
"logits/rejected": 255.06307983398438, |
|
"logps/chosen": -1.2395174503326416, |
|
"logps/rejected": -1.4891048669815063, |
|
"loss": 1.303, |
|
"odds_ratio_loss": 0.6343892812728882, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1239517480134964, |
|
"rewards/margins": 0.024958748370409012, |
|
"rewards/rejected": -0.1489104926586151, |
|
"sft_loss": 1.2395174503326416, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.34718826405868, |
|
"grad_norm": 2.295304298400879, |
|
"learning_rate": 5.592526130947862e-07, |
|
"logits/chosen": 253.2501220703125, |
|
"logits/rejected": 253.6666717529297, |
|
"logps/chosen": -1.3131468296051025, |
|
"logps/rejected": -1.5006693601608276, |
|
"loss": 1.3841, |
|
"odds_ratio_loss": 0.709243893623352, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.13131467998027802, |
|
"rewards/margins": 0.018752261996269226, |
|
"rewards/rejected": -0.15006692707538605, |
|
"sft_loss": 1.3131468296051025, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.3649699933318518, |
|
"grad_norm": 3.0129616260528564, |
|
"learning_rate": 5.302249011195507e-07, |
|
"logits/chosen": 254.51284790039062, |
|
"logits/rejected": 254.08145141601562, |
|
"logps/chosen": -1.2322486639022827, |
|
"logps/rejected": -1.3891911506652832, |
|
"loss": 1.3008, |
|
"odds_ratio_loss": 0.6851211786270142, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.12322486937046051, |
|
"rewards/margins": 0.015694255009293556, |
|
"rewards/rejected": -0.13891912996768951, |
|
"sft_loss": 1.2322486639022827, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.382751722605023, |
|
"grad_norm": 4.828993797302246, |
|
"learning_rate": 5.018811640997307e-07, |
|
"logits/chosen": 255.24087524414062, |
|
"logits/rejected": 255.8361358642578, |
|
"logps/chosen": -1.3192299604415894, |
|
"logps/rejected": -1.6515556573867798, |
|
"loss": 1.3833, |
|
"odds_ratio_loss": 0.6403645873069763, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.13192300498485565, |
|
"rewards/margins": 0.03323255851864815, |
|
"rewards/rejected": -0.1651555597782135, |
|
"sft_loss": 1.3192299604415894, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.400533451878195, |
|
"grad_norm": 1.868326187133789, |
|
"learning_rate": 4.7423124397427105e-07, |
|
"logits/chosen": 253.5162811279297, |
|
"logits/rejected": 253.6891632080078, |
|
"logps/chosen": -1.2553585767745972, |
|
"logps/rejected": -1.4340143203735352, |
|
"loss": 1.3209, |
|
"odds_ratio_loss": 0.6558007001876831, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12553584575653076, |
|
"rewards/margins": 0.017865588888525963, |
|
"rewards/rejected": -0.14340144395828247, |
|
"sft_loss": 1.2553585767745972, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.418315181151367, |
|
"grad_norm": 3.549006938934326, |
|
"learning_rate": 4.472847417645787e-07, |
|
"logits/chosen": 255.4973602294922, |
|
"logits/rejected": 255.242431640625, |
|
"logps/chosen": -1.3243919610977173, |
|
"logps/rejected": -1.665623426437378, |
|
"loss": 1.3842, |
|
"odds_ratio_loss": 0.5979620814323425, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.13243919610977173, |
|
"rewards/margins": 0.03412316367030144, |
|
"rewards/rejected": -0.16656234860420227, |
|
"sft_loss": 1.3243919610977173, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.436096910424539, |
|
"grad_norm": 1.720616340637207, |
|
"learning_rate": 4.210510142406993e-07, |
|
"logits/chosen": 254.47842407226562, |
|
"logits/rejected": 254.3418731689453, |
|
"logps/chosen": -1.2948418855667114, |
|
"logps/rejected": -1.6191186904907227, |
|
"loss": 1.3596, |
|
"odds_ratio_loss": 0.6471132040023804, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1294841766357422, |
|
"rewards/margins": 0.03242768719792366, |
|
"rewards/rejected": -0.16191187500953674, |
|
"sft_loss": 1.2948418855667114, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.4538786396977104, |
|
"grad_norm": 2.2009122371673584, |
|
"learning_rate": 3.9553917067232966e-07, |
|
"logits/chosen": 255.0864715576172, |
|
"logits/rejected": 255.9404754638672, |
|
"logps/chosen": -1.2902610301971436, |
|
"logps/rejected": -1.4941017627716064, |
|
"loss": 1.3585, |
|
"odds_ratio_loss": 0.6828353404998779, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.12902610003948212, |
|
"rewards/margins": 0.02038406766951084, |
|
"rewards/rejected": -0.1494101732969284, |
|
"sft_loss": 1.2902610301971436, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.4716603689708823, |
|
"grad_norm": 2.2108042240142822, |
|
"learning_rate": 3.707580696657509e-07, |
|
"logits/chosen": 253.1954345703125, |
|
"logits/rejected": 253.47250366210938, |
|
"logps/chosen": -1.2349462509155273, |
|
"logps/rejected": -1.409597396850586, |
|
"loss": 1.3025, |
|
"odds_ratio_loss": 0.6752232313156128, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.12349464744329453, |
|
"rewards/margins": 0.017465103417634964, |
|
"rewards/rejected": -0.1409597247838974, |
|
"sft_loss": 1.2349462509155273, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.489442098244054, |
|
"grad_norm": 2.4644668102264404, |
|
"learning_rate": 3.4671631608781815e-07, |
|
"logits/chosen": 254.1881561279297, |
|
"logits/rejected": 254.3625030517578, |
|
"logps/chosen": -1.3036715984344482, |
|
"logps/rejected": -1.4527599811553955, |
|
"loss": 1.3758, |
|
"odds_ratio_loss": 0.721402108669281, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13036717474460602, |
|
"rewards/margins": 0.014908820390701294, |
|
"rewards/rejected": -0.14527598023414612, |
|
"sft_loss": 1.3036715984344482, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.507223827517226, |
|
"grad_norm": 3.9182987213134766, |
|
"learning_rate": 3.234222580780405e-07, |
|
"logits/chosen": 254.8416290283203, |
|
"logits/rejected": 254.9809112548828, |
|
"logps/chosen": -1.2807973623275757, |
|
"logps/rejected": -1.4203259944915771, |
|
"loss": 1.3485, |
|
"odds_ratio_loss": 0.6770311594009399, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12807972729206085, |
|
"rewards/margins": 0.013952854089438915, |
|
"rewards/rejected": -0.14203259348869324, |
|
"sft_loss": 1.2807973623275757, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.525005556790398, |
|
"grad_norm": 8.136197090148926, |
|
"learning_rate": 3.0088398414982375e-07, |
|
"logits/chosen": 255.15200805664062, |
|
"logits/rejected": 255.2108612060547, |
|
"logps/chosen": -1.269274115562439, |
|
"logps/rejected": -1.5304492712020874, |
|
"loss": 1.3364, |
|
"odds_ratio_loss": 0.6713584661483765, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.12692740559577942, |
|
"rewards/margins": 0.026117529720067978, |
|
"rewards/rejected": -0.1530449539422989, |
|
"sft_loss": 1.269274115562439, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.54278728606357, |
|
"grad_norm": 3.0494956970214844, |
|
"learning_rate": 2.7910932038184487e-07, |
|
"logits/chosen": 252.93734741210938, |
|
"logits/rejected": 254.24777221679688, |
|
"logps/chosen": -1.24881112575531, |
|
"logps/rejected": -1.4509551525115967, |
|
"loss": 1.3164, |
|
"odds_ratio_loss": 0.675870418548584, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12488112598657608, |
|
"rewards/margins": 0.020214393734931946, |
|
"rewards/rejected": -0.14509549736976624, |
|
"sft_loss": 1.24881112575531, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.5605690153367417, |
|
"grad_norm": 3.582563638687134, |
|
"learning_rate": 2.5810582770057325e-07, |
|
"logits/chosen": 255.56686401367188, |
|
"logits/rejected": 255.5573272705078, |
|
"logps/chosen": -1.2086716890335083, |
|
"logps/rejected": -1.3686209917068481, |
|
"loss": 1.2763, |
|
"odds_ratio_loss": 0.6763256788253784, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.12086717784404755, |
|
"rewards/margins": 0.01599491387605667, |
|
"rewards/rejected": -0.13686209917068481, |
|
"sft_loss": 1.2086716890335083, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.578350744609913, |
|
"grad_norm": 2.657351016998291, |
|
"learning_rate": 2.3788079925484402e-07, |
|
"logits/chosen": 254.91213989257812, |
|
"logits/rejected": 254.9799346923828, |
|
"logps/chosen": -1.3017337322235107, |
|
"logps/rejected": -1.4446247816085815, |
|
"loss": 1.3706, |
|
"odds_ratio_loss": 0.6881921887397766, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13017335534095764, |
|
"rewards/margins": 0.01428910344839096, |
|
"rewards/rejected": -0.1444624662399292, |
|
"sft_loss": 1.3017337322235107, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.596132473883085, |
|
"grad_norm": 3.423999309539795, |
|
"learning_rate": 2.1844125788342661e-07, |
|
"logits/chosen": 253.8058624267578, |
|
"logits/rejected": 254.35348510742188, |
|
"logps/chosen": -1.220453143119812, |
|
"logps/rejected": -1.6302309036254883, |
|
"loss": 1.284, |
|
"odds_ratio_loss": 0.6357576251029968, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12204531580209732, |
|
"rewards/margins": 0.04097776859998703, |
|
"rewards/rejected": -0.16302308440208435, |
|
"sft_loss": 1.220453143119812, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.613914203156257, |
|
"grad_norm": 1.957571029663086, |
|
"learning_rate": 1.9979395367644428e-07, |
|
"logits/chosen": 254.90676879882812, |
|
"logits/rejected": 254.95101928710938, |
|
"logps/chosen": -1.2191929817199707, |
|
"logps/rejected": -1.4415347576141357, |
|
"loss": 1.2838, |
|
"odds_ratio_loss": 0.645682156085968, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12191929668188095, |
|
"rewards/margins": 0.022234182804822922, |
|
"rewards/rejected": -0.14415349066257477, |
|
"sft_loss": 1.2191929817199707, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.631695932429429, |
|
"grad_norm": 4.140540599822998, |
|
"learning_rate": 1.81945361631512e-07, |
|
"logits/chosen": 254.05709838867188, |
|
"logits/rejected": 253.84130859375, |
|
"logps/chosen": -1.2751600742340088, |
|
"logps/rejected": -1.4497580528259277, |
|
"loss": 1.3463, |
|
"odds_ratio_loss": 0.711692214012146, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1275160014629364, |
|
"rewards/margins": 0.017459798604249954, |
|
"rewards/rejected": -0.14497579634189606, |
|
"sft_loss": 1.2751600742340088, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.6494776617026004, |
|
"grad_norm": 3.2640628814697266, |
|
"learning_rate": 1.6490167940538343e-07, |
|
"logits/chosen": 254.5946807861328, |
|
"logits/rejected": 254.5460662841797, |
|
"logps/chosen": -1.2259750366210938, |
|
"logps/rejected": -1.4610532522201538, |
|
"loss": 1.289, |
|
"odds_ratio_loss": 0.6301438212394714, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12259751558303833, |
|
"rewards/margins": 0.023507816717028618, |
|
"rewards/rejected": -0.1461053192615509, |
|
"sft_loss": 1.2259750366210938, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.6672593909757722, |
|
"grad_norm": 2.282480001449585, |
|
"learning_rate": 1.4866882516191339e-07, |
|
"logits/chosen": 253.92684936523438, |
|
"logits/rejected": 254.4107208251953, |
|
"logps/chosen": -1.259511113166809, |
|
"logps/rejected": -1.3850682973861694, |
|
"loss": 1.3302, |
|
"odds_ratio_loss": 0.7073375582695007, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1259511113166809, |
|
"rewards/margins": 0.012555733323097229, |
|
"rewards/rejected": -0.13850684463977814, |
|
"sft_loss": 1.259511113166809, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.6672593909757722, |
|
"eval_logits/chosen": 254.46791076660156, |
|
"eval_logits/rejected": 254.66136169433594, |
|
"eval_logps/chosen": -1.2813518047332764, |
|
"eval_logps/rejected": -1.5003557205200195, |
|
"eval_loss": 1.347064733505249, |
|
"eval_odds_ratio_loss": 0.6571269631385803, |
|
"eval_rewards/accuracies": 0.5609999895095825, |
|
"eval_rewards/chosen": -0.12813518941402435, |
|
"eval_rewards/margins": 0.021900387480854988, |
|
"eval_rewards/rejected": -0.15003558993339539, |
|
"eval_runtime": 217.1974, |
|
"eval_samples_per_second": 4.604, |
|
"eval_sft_loss": 1.2813518047332764, |
|
"eval_steps_per_second": 2.302, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.685041120248944, |
|
"grad_norm": 6.244467258453369, |
|
"learning_rate": 1.3325243551706057e-07, |
|
"logits/chosen": 254.00894165039062, |
|
"logits/rejected": 254.4985809326172, |
|
"logps/chosen": -1.272092580795288, |
|
"logps/rejected": -1.6640291213989258, |
|
"loss": 1.3335, |
|
"odds_ratio_loss": 0.6145035624504089, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.12720926105976105, |
|
"rewards/margins": 0.03919365257024765, |
|
"rewards/rejected": -0.1664029359817505, |
|
"sft_loss": 1.272092580795288, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.702822849522116, |
|
"grad_norm": 6.613005638122559, |
|
"learning_rate": 1.1865786358165737e-07, |
|
"logits/chosen": 254.2620086669922, |
|
"logits/rejected": 254.74526977539062, |
|
"logps/chosen": -1.2863306999206543, |
|
"logps/rejected": -1.474635124206543, |
|
"loss": 1.3537, |
|
"odds_ratio_loss": 0.6741579174995422, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12863309681415558, |
|
"rewards/margins": 0.0188304390758276, |
|
"rewards/rejected": -0.14746353030204773, |
|
"sft_loss": 1.2863306999206543, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.720604578795288, |
|
"grad_norm": 6.351752758026123, |
|
"learning_rate": 1.0489017710262311e-07, |
|
"logits/chosen": 255.4362030029297, |
|
"logits/rejected": 256.20306396484375, |
|
"logps/chosen": -1.331672191619873, |
|
"logps/rejected": -1.618596076965332, |
|
"loss": 1.4013, |
|
"odds_ratio_loss": 0.6964471340179443, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13316722214221954, |
|
"rewards/margins": 0.028692390769720078, |
|
"rewards/rejected": -0.16185960173606873, |
|
"sft_loss": 1.331672191619873, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.73838630806846, |
|
"grad_norm": 2.5552542209625244, |
|
"learning_rate": 9.195415670326446e-08, |
|
"logits/chosen": 256.7084655761719, |
|
"logits/rejected": 256.827392578125, |
|
"logps/chosen": -1.285346269607544, |
|
"logps/rejected": -1.4887077808380127, |
|
"loss": 1.3524, |
|
"odds_ratio_loss": 0.6702226400375366, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12853462994098663, |
|
"rewards/margins": 0.020336147397756577, |
|
"rewards/rejected": -0.1488707810640335, |
|
"sft_loss": 1.285346269607544, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.7561680373416317, |
|
"grad_norm": 4.229678630828857, |
|
"learning_rate": 7.985429422327384e-08, |
|
"logits/chosen": 254.65280151367188, |
|
"logits/rejected": 255.42001342773438, |
|
"logps/chosen": -1.2554762363433838, |
|
"logps/rejected": -1.3682177066802979, |
|
"loss": 1.3267, |
|
"odds_ratio_loss": 0.7126489877700806, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1255476176738739, |
|
"rewards/margins": 0.011274145916104317, |
|
"rewards/rejected": -0.13682177662849426, |
|
"sft_loss": 1.2554762363433838, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.773949766614803, |
|
"grad_norm": 3.981313705444336, |
|
"learning_rate": 6.859479115900818e-08, |
|
"logits/chosen": 254.1326446533203, |
|
"logits/rejected": 254.6916961669922, |
|
"logps/chosen": -1.359695553779602, |
|
"logps/rejected": -1.6282466650009155, |
|
"loss": 1.425, |
|
"odds_ratio_loss": 0.6527189016342163, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13596956431865692, |
|
"rewards/margins": 0.026855114847421646, |
|
"rewards/rejected": -0.16282466053962708, |
|
"sft_loss": 1.359695553779602, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.791731495887975, |
|
"grad_norm": 3.927825450897217, |
|
"learning_rate": 5.817955720457902e-08, |
|
"logits/chosen": 253.6094970703125, |
|
"logits/rejected": 253.5572967529297, |
|
"logps/chosen": -1.1961861848831177, |
|
"logps/rejected": -1.3642635345458984, |
|
"loss": 1.262, |
|
"odds_ratio_loss": 0.6578332185745239, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.11961860954761505, |
|
"rewards/margins": 0.016807744279503822, |
|
"rewards/rejected": -0.13642635941505432, |
|
"sft_loss": 1.1961861848831177, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.809513225161147, |
|
"grad_norm": 3.0377912521362305, |
|
"learning_rate": 4.861220889427199e-08, |
|
"logits/chosen": 254.80587768554688, |
|
"logits/rejected": 254.9072265625, |
|
"logps/chosen": -1.2765783071517944, |
|
"logps/rejected": -1.4059991836547852, |
|
"loss": 1.3469, |
|
"odds_ratio_loss": 0.7033384442329407, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12765783071517944, |
|
"rewards/margins": 0.012942099943757057, |
|
"rewards/rejected": -0.14059992134571075, |
|
"sft_loss": 1.2765783071517944, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.827294954434319, |
|
"grad_norm": 1.8500066995620728, |
|
"learning_rate": 3.9896068346758074e-08, |
|
"logits/chosen": 254.78158569335938, |
|
"logits/rejected": 254.95730590820312, |
|
"logps/chosen": -1.2893197536468506, |
|
"logps/rejected": -1.4526543617248535, |
|
"loss": 1.3588, |
|
"odds_ratio_loss": 0.6947753429412842, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.12893196940422058, |
|
"rewards/margins": 0.01633346639573574, |
|
"rewards/rejected": -0.14526543021202087, |
|
"sft_loss": 1.2893197536468506, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.8450766837074903, |
|
"grad_norm": 5.4357829093933105, |
|
"learning_rate": 3.203416211153832e-08, |
|
"logits/chosen": 254.9698944091797, |
|
"logits/rejected": 256.3062438964844, |
|
"logps/chosen": -1.2634626626968384, |
|
"logps/rejected": -1.4395668506622314, |
|
"loss": 1.3344, |
|
"odds_ratio_loss": 0.7093390822410583, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12634627521038055, |
|
"rewards/margins": 0.017610404640436172, |
|
"rewards/rejected": -0.14395669102668762, |
|
"sft_loss": 1.2634626626968384, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.8628584129806622, |
|
"grad_norm": 3.7756481170654297, |
|
"learning_rate": 2.5029220118019393e-08, |
|
"logits/chosen": 253.7996826171875, |
|
"logits/rejected": 254.91201782226562, |
|
"logps/chosen": -1.3131462335586548, |
|
"logps/rejected": -1.434832215309143, |
|
"loss": 1.3818, |
|
"odds_ratio_loss": 0.6866922974586487, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.13131462037563324, |
|
"rewards/margins": 0.012168603017926216, |
|
"rewards/rejected": -0.1434832215309143, |
|
"sft_loss": 1.3131462335586548, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.880640142253834, |
|
"grad_norm": 3.6297013759613037, |
|
"learning_rate": 1.8883674727586122e-08, |
|
"logits/chosen": 254.3390350341797, |
|
"logits/rejected": 254.14718627929688, |
|
"logps/chosen": -1.1930474042892456, |
|
"logps/rejected": -1.5245510339736938, |
|
"loss": 1.2541, |
|
"odds_ratio_loss": 0.6109124422073364, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.11930473893880844, |
|
"rewards/margins": 0.03315035253763199, |
|
"rewards/rejected": -0.15245509147644043, |
|
"sft_loss": 1.1930474042892456, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.898421871527006, |
|
"grad_norm": 1.5044124126434326, |
|
"learning_rate": 1.3599659889000639e-08, |
|
"logits/chosen": 255.17379760742188, |
|
"logits/rejected": 254.8286895751953, |
|
"logps/chosen": -1.3174787759780884, |
|
"logps/rejected": -1.4666922092437744, |
|
"loss": 1.3872, |
|
"odds_ratio_loss": 0.6974038481712341, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.13174788653850555, |
|
"rewards/margins": 0.01492134016007185, |
|
"rewards/rejected": -0.14666922390460968, |
|
"sft_loss": 1.3174787759780884, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.916203600800178, |
|
"grad_norm": 2.1138768196105957, |
|
"learning_rate": 9.179010397421528e-09, |
|
"logits/chosen": 256.32757568359375, |
|
"logits/rejected": 255.87295532226562, |
|
"logps/chosen": -1.26983642578125, |
|
"logps/rejected": -1.4480781555175781, |
|
"loss": 1.3386, |
|
"odds_ratio_loss": 0.6873185038566589, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1269836574792862, |
|
"rewards/margins": 0.017824167385697365, |
|
"rewards/rejected": -0.144807830452919, |
|
"sft_loss": 1.26983642578125, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.93398533007335, |
|
"grad_norm": 1.8579224348068237, |
|
"learning_rate": 5.623261257296509e-09, |
|
"logits/chosen": 253.3066864013672, |
|
"logits/rejected": 253.75753784179688, |
|
"logps/chosen": -1.1514933109283447, |
|
"logps/rejected": -1.3553564548492432, |
|
"loss": 1.2157, |
|
"odds_ratio_loss": 0.6422222256660461, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11514933407306671, |
|
"rewards/margins": 0.020386312156915665, |
|
"rewards/rejected": -0.13553564250469208, |
|
"sft_loss": 1.1514933109283447, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.9517670593465217, |
|
"grad_norm": 2.8349151611328125, |
|
"learning_rate": 2.933647149357122e-09, |
|
"logits/chosen": 254.45968627929688, |
|
"logits/rejected": 254.57400512695312, |
|
"logps/chosen": -1.300631046295166, |
|
"logps/rejected": -1.4870007038116455, |
|
"loss": 1.3705, |
|
"odds_ratio_loss": 0.6987138390541077, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13006310164928436, |
|
"rewards/margins": 0.018636973574757576, |
|
"rewards/rejected": -0.14870008826255798, |
|
"sft_loss": 1.300631046295166, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.969548788619693, |
|
"grad_norm": 2.5957581996917725, |
|
"learning_rate": 1.1111020018930717e-09, |
|
"logits/chosen": 255.29000854492188, |
|
"logits/rejected": 255.3350830078125, |
|
"logps/chosen": -1.2563540935516357, |
|
"logps/rejected": -1.3871452808380127, |
|
"loss": 1.3264, |
|
"odds_ratio_loss": 0.7003098726272583, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.12563541531562805, |
|
"rewards/margins": 0.013079119846224785, |
|
"rewards/rejected": -0.1387145221233368, |
|
"sft_loss": 1.2563540935516357, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.987330517892865, |
|
"grad_norm": 1.7800966501235962, |
|
"learning_rate": 1.5625866646051813e-10, |
|
"logits/chosen": 255.032470703125, |
|
"logits/rejected": 255.3948974609375, |
|
"logps/chosen": -1.2115042209625244, |
|
"logps/rejected": -1.4695723056793213, |
|
"loss": 1.2721, |
|
"odds_ratio_loss": 0.6058895587921143, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.1211504191160202, |
|
"rewards/margins": 0.025806818157434464, |
|
"rewards/rejected": -0.14695724844932556, |
|
"sft_loss": 1.2115042209625244, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.997999555456768, |
|
"step": 1686, |
|
"total_flos": 2.0747345909474918e+18, |
|
"train_loss": 1.4404786094086308, |
|
"train_runtime": 22862.0738, |
|
"train_samples_per_second": 1.181, |
|
"train_steps_per_second": 0.074 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1686, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 2.0747345909474918e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|