|
{ |
|
"best_metric": 1.3159173727035522, |
|
"best_model_checkpoint": "saves/Gemma-7B-It/lora/orpo-salt-half/checkpoint-1500", |
|
"epoch": 2.9974597798475866, |
|
"eval_steps": 500, |
|
"global_step": 1770, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01693480101608806, |
|
"grad_norm": 4.468957901000977, |
|
"learning_rate": 4.999614014035063e-06, |
|
"logits/chosen": 207.0987548828125, |
|
"logits/rejected": 208.2423858642578, |
|
"logps/chosen": -2.50309681892395, |
|
"logps/rejected": -2.976195812225342, |
|
"loss": 2.5989, |
|
"odds_ratio_loss": 0.9582594633102417, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.25030970573425293, |
|
"rewards/margins": 0.04730992019176483, |
|
"rewards/rejected": -0.29761961102485657, |
|
"sft_loss": 2.50309681892395, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03386960203217612, |
|
"grad_norm": 3.255079984664917, |
|
"learning_rate": 4.998440543386042e-06, |
|
"logits/chosen": 207.7864227294922, |
|
"logits/rejected": 210.0211944580078, |
|
"logps/chosen": -2.684905529022217, |
|
"logps/rejected": -2.9186933040618896, |
|
"loss": 2.776, |
|
"odds_ratio_loss": 0.9109451174736023, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.26849058270454407, |
|
"rewards/margins": 0.023378772661089897, |
|
"rewards/rejected": -0.2918693423271179, |
|
"sft_loss": 2.684905529022217, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05080440304826418, |
|
"grad_norm": 2.5176427364349365, |
|
"learning_rate": 4.996479918381253e-06, |
|
"logits/chosen": 212.6219024658203, |
|
"logits/rejected": 213.315673828125, |
|
"logps/chosen": -2.225409746170044, |
|
"logps/rejected": -2.3403306007385254, |
|
"loss": 2.3097, |
|
"odds_ratio_loss": 0.8430029153823853, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.22254100441932678, |
|
"rewards/margins": 0.01149209588766098, |
|
"rewards/rejected": -0.23403307795524597, |
|
"sft_loss": 2.225409746170044, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06773920406435224, |
|
"grad_norm": 2.6412463188171387, |
|
"learning_rate": 4.993732756731818e-06, |
|
"logits/chosen": 212.7200164794922, |
|
"logits/rejected": 213.7319793701172, |
|
"logps/chosen": -2.2491908073425293, |
|
"logps/rejected": -2.6265273094177246, |
|
"loss": 2.3215, |
|
"odds_ratio_loss": 0.7235576510429382, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.22491908073425293, |
|
"rewards/margins": 0.03773364797234535, |
|
"rewards/rejected": -0.262652724981308, |
|
"sft_loss": 2.2491908073425293, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0846740050804403, |
|
"grad_norm": 4.878486633300781, |
|
"learning_rate": 4.9901999239537345e-06, |
|
"logits/chosen": 218.13858032226562, |
|
"logits/rejected": 218.47933959960938, |
|
"logps/chosen": -2.325134754180908, |
|
"logps/rejected": -2.5971357822418213, |
|
"loss": 2.4052, |
|
"odds_ratio_loss": 0.8009692430496216, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.23251347243785858, |
|
"rewards/margins": 0.027200128883123398, |
|
"rewards/rejected": -0.2597135901451111, |
|
"sft_loss": 2.325134754180908, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10160880609652836, |
|
"grad_norm": 1.934574842453003, |
|
"learning_rate": 4.985882533095186e-06, |
|
"logits/chosen": 218.4595184326172, |
|
"logits/rejected": 218.9486083984375, |
|
"logps/chosen": -2.1272051334381104, |
|
"logps/rejected": -2.2577621936798096, |
|
"loss": 2.2131, |
|
"odds_ratio_loss": 0.8588129878044128, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.21272051334381104, |
|
"rewards/margins": 0.013055694289505482, |
|
"rewards/rejected": -0.22577619552612305, |
|
"sft_loss": 2.1272051334381104, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11854360711261643, |
|
"grad_norm": 8.395120620727539, |
|
"learning_rate": 4.9807819443858705e-06, |
|
"logits/chosen": 221.9539031982422, |
|
"logits/rejected": 223.3571319580078, |
|
"logps/chosen": -2.175661087036133, |
|
"logps/rejected": -2.3411145210266113, |
|
"loss": 2.2597, |
|
"odds_ratio_loss": 0.8399366140365601, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.2175661027431488, |
|
"rewards/margins": 0.016545329242944717, |
|
"rewards/rejected": -0.2341114580631256, |
|
"sft_loss": 2.175661087036133, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1354784081287045, |
|
"grad_norm": 4.525905132293701, |
|
"learning_rate": 4.9748997648084404e-06, |
|
"logits/chosen": 224.2382354736328, |
|
"logits/rejected": 224.88278198242188, |
|
"logps/chosen": -1.8507779836654663, |
|
"logps/rejected": -2.099276304244995, |
|
"loss": 1.9368, |
|
"odds_ratio_loss": 0.8602777719497681, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.18507780134677887, |
|
"rewards/margins": 0.024849826470017433, |
|
"rewards/rejected": -0.20992763340473175, |
|
"sft_loss": 1.8507779836654663, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15241320914479256, |
|
"grad_norm": 4.903147220611572, |
|
"learning_rate": 4.96823784759222e-06, |
|
"logits/chosen": 225.79443359375, |
|
"logits/rejected": 226.6028289794922, |
|
"logps/chosen": -1.7729737758636475, |
|
"logps/rejected": -2.0627281665802, |
|
"loss": 1.848, |
|
"odds_ratio_loss": 0.7500249147415161, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.17729738354682922, |
|
"rewards/margins": 0.02897545136511326, |
|
"rewards/rejected": -0.20627284049987793, |
|
"sft_loss": 1.7729737758636475, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1693480101608806, |
|
"grad_norm": 3.5224053859710693, |
|
"learning_rate": 4.960798291629323e-06, |
|
"logits/chosen": 232.6717071533203, |
|
"logits/rejected": 232.73568725585938, |
|
"logps/chosen": -1.7854640483856201, |
|
"logps/rejected": -1.9538242816925049, |
|
"loss": 1.8613, |
|
"odds_ratio_loss": 0.7585908770561218, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.17854642868041992, |
|
"rewards/margins": 0.016835981979966164, |
|
"rewards/rejected": -0.19538240134716034, |
|
"sft_loss": 1.7854640483856201, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18628281117696868, |
|
"grad_norm": 6.120446681976318, |
|
"learning_rate": 4.952583440813383e-06, |
|
"logits/chosen": 235.1363983154297, |
|
"logits/rejected": 236.200927734375, |
|
"logps/chosen": -1.8061208724975586, |
|
"logps/rejected": -2.0079829692840576, |
|
"loss": 1.8823, |
|
"odds_ratio_loss": 0.7622145414352417, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.18061205744743347, |
|
"rewards/margins": 0.020186223089694977, |
|
"rewards/rejected": -0.20079830288887024, |
|
"sft_loss": 1.8061208724975586, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.20321761219305673, |
|
"grad_norm": 1.8270832300186157, |
|
"learning_rate": 4.943595883301086e-06, |
|
"logits/chosen": 234.9897918701172, |
|
"logits/rejected": 235.3126983642578, |
|
"logps/chosen": -1.6784818172454834, |
|
"logps/rejected": -1.882794737815857, |
|
"loss": 1.7524, |
|
"odds_ratio_loss": 0.7392091155052185, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.16784818470478058, |
|
"rewards/margins": 0.02043129876255989, |
|
"rewards/rejected": -0.18827947974205017, |
|
"sft_loss": 1.6784818172454834, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2201524132091448, |
|
"grad_norm": 5.280871868133545, |
|
"learning_rate": 4.933838450696757e-06, |
|
"logits/chosen": 238.5899200439453, |
|
"logits/rejected": 238.400390625, |
|
"logps/chosen": -1.512404203414917, |
|
"logps/rejected": -1.6778627634048462, |
|
"loss": 1.5856, |
|
"odds_ratio_loss": 0.7318023443222046, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.15124042332172394, |
|
"rewards/margins": 0.0165458582341671, |
|
"rewards/rejected": -0.16778628528118134, |
|
"sft_loss": 1.512404203414917, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23708721422523285, |
|
"grad_norm": 4.924640655517578, |
|
"learning_rate": 4.923314217160234e-06, |
|
"logits/chosen": 240.5208282470703, |
|
"logits/rejected": 241.1940460205078, |
|
"logps/chosen": -1.6607558727264404, |
|
"logps/rejected": -1.8365033864974976, |
|
"loss": 1.7352, |
|
"odds_ratio_loss": 0.7449204921722412, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.16607561707496643, |
|
"rewards/margins": 0.017574718222022057, |
|
"rewards/rejected": -0.18365031480789185, |
|
"sft_loss": 1.6607558727264404, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2540220152413209, |
|
"grad_norm": 1.817272663116455, |
|
"learning_rate": 4.9120264984383285e-06, |
|
"logits/chosen": 242.286376953125, |
|
"logits/rejected": 242.5799560546875, |
|
"logps/chosen": -1.3862061500549316, |
|
"logps/rejected": -1.5650876760482788, |
|
"loss": 1.454, |
|
"odds_ratio_loss": 0.6781331300735474, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13862061500549316, |
|
"rewards/margins": 0.01788817159831524, |
|
"rewards/rejected": -0.15650877356529236, |
|
"sft_loss": 1.3862061500549316, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.270956816257409, |
|
"grad_norm": 1.3204820156097412, |
|
"learning_rate": 4.899978850820176e-06, |
|
"logits/chosen": 244.1859588623047, |
|
"logits/rejected": 244.36865234375, |
|
"logps/chosen": -1.5278394222259521, |
|
"logps/rejected": -1.7709777355194092, |
|
"loss": 1.5938, |
|
"odds_ratio_loss": 0.6600898504257202, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.15278393030166626, |
|
"rewards/margins": 0.02431386150419712, |
|
"rewards/rejected": -0.17709779739379883, |
|
"sft_loss": 1.5278394222259521, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.28789161727349705, |
|
"grad_norm": 2.3198039531707764, |
|
"learning_rate": 4.887175070016795e-06, |
|
"logits/chosen": 242.52523803710938, |
|
"logits/rejected": 243.25057983398438, |
|
"logps/chosen": -1.3887180089950562, |
|
"logps/rejected": -1.6768462657928467, |
|
"loss": 1.4533, |
|
"odds_ratio_loss": 0.6456118822097778, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.13887180387973785, |
|
"rewards/margins": 0.02881282940506935, |
|
"rewards/rejected": -0.1676846444606781, |
|
"sft_loss": 1.3887180089950562, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3048264182895851, |
|
"grad_norm": 2.7539143562316895, |
|
"learning_rate": 4.873619189965217e-06, |
|
"logits/chosen": 246.211181640625, |
|
"logits/rejected": 246.160400390625, |
|
"logps/chosen": -1.5571125745773315, |
|
"logps/rejected": -1.8048861026763916, |
|
"loss": 1.6256, |
|
"odds_ratio_loss": 0.684849202632904, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.15571126341819763, |
|
"rewards/margins": 0.024777347221970558, |
|
"rewards/rejected": -0.18048861622810364, |
|
"sft_loss": 1.5571125745773315, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.32176121930567314, |
|
"grad_norm": 1.5920647382736206, |
|
"learning_rate": 4.859315481557563e-06, |
|
"logits/chosen": 243.8936309814453, |
|
"logits/rejected": 244.6537628173828, |
|
"logps/chosen": -1.3444526195526123, |
|
"logps/rejected": -1.622938871383667, |
|
"loss": 1.4127, |
|
"odds_ratio_loss": 0.6820069551467896, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.13444526493549347, |
|
"rewards/margins": 0.027848612517118454, |
|
"rewards/rejected": -0.16229388117790222, |
|
"sft_loss": 1.3444526195526123, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3386960203217612, |
|
"grad_norm": 1.2194499969482422, |
|
"learning_rate": 4.84426845129546e-06, |
|
"logits/chosen": 245.6062774658203, |
|
"logits/rejected": 245.9265899658203, |
|
"logps/chosen": -1.4105961322784424, |
|
"logps/rejected": -1.591505765914917, |
|
"loss": 1.4778, |
|
"odds_ratio_loss": 0.6719616055488586, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.14105962216854095, |
|
"rewards/margins": 0.018090957775712013, |
|
"rewards/rejected": -0.15915057063102722, |
|
"sft_loss": 1.4105961322784424, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3556308213378493, |
|
"grad_norm": 1.6798571348190308, |
|
"learning_rate": 4.828482839870233e-06, |
|
"logits/chosen": 244.5519561767578, |
|
"logits/rejected": 244.9039764404297, |
|
"logps/chosen": -1.4486945867538452, |
|
"logps/rejected": -1.5620288848876953, |
|
"loss": 1.5235, |
|
"odds_ratio_loss": 0.7479228973388672, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.14486947655677795, |
|
"rewards/margins": 0.011333415284752846, |
|
"rewards/rejected": -0.15620288252830505, |
|
"sft_loss": 1.4486945867538452, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.37256562235393736, |
|
"grad_norm": 1.2994054555892944, |
|
"learning_rate": 4.811963620669314e-06, |
|
"logits/chosen": 247.3550567626953, |
|
"logits/rejected": 247.1066436767578, |
|
"logps/chosen": -1.46903395652771, |
|
"logps/rejected": -1.5901343822479248, |
|
"loss": 1.5401, |
|
"odds_ratio_loss": 0.7104851007461548, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.14690342545509338, |
|
"rewards/margins": 0.012110031209886074, |
|
"rewards/rejected": -0.15901342034339905, |
|
"sft_loss": 1.46903395652771, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3895004233700254, |
|
"grad_norm": 1.8322347402572632, |
|
"learning_rate": 4.794715998209328e-06, |
|
"logits/chosen": 245.6450653076172, |
|
"logits/rejected": 246.178955078125, |
|
"logps/chosen": -1.3713748455047607, |
|
"logps/rejected": -1.5887004137039185, |
|
"loss": 1.4399, |
|
"odds_ratio_loss": 0.6854357719421387, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1371375024318695, |
|
"rewards/margins": 0.02173253521323204, |
|
"rewards/rejected": -0.15887005627155304, |
|
"sft_loss": 1.3713748455047607, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.40643522438611346, |
|
"grad_norm": 1.3373929262161255, |
|
"learning_rate": 4.7767454064963724e-06, |
|
"logits/chosen": 246.9872283935547, |
|
"logits/rejected": 247.23184204101562, |
|
"logps/chosen": -1.418143391609192, |
|
"logps/rejected": -1.6108840703964233, |
|
"loss": 1.485, |
|
"odds_ratio_loss": 0.6687358617782593, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.14181432127952576, |
|
"rewards/margins": 0.019274089485406876, |
|
"rewards/rejected": -0.16108840703964233, |
|
"sft_loss": 1.418143391609192, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.42337002540220153, |
|
"grad_norm": 2.7464914321899414, |
|
"learning_rate": 4.758057507313987e-06, |
|
"logits/chosen": 246.5187530517578, |
|
"logits/rejected": 247.2135772705078, |
|
"logps/chosen": -1.3453131914138794, |
|
"logps/rejected": -1.5121968984603882, |
|
"loss": 1.4146, |
|
"odds_ratio_loss": 0.6926182508468628, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.13453131914138794, |
|
"rewards/margins": 0.0166883897036314, |
|
"rewards/rejected": -0.1512196958065033, |
|
"sft_loss": 1.3453131914138794, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4403048264182896, |
|
"grad_norm": 6.080187797546387, |
|
"learning_rate": 4.73865818843936e-06, |
|
"logits/chosen": 247.1885223388672, |
|
"logits/rejected": 247.33627319335938, |
|
"logps/chosen": -1.4415591955184937, |
|
"logps/rejected": -1.6857059001922607, |
|
"loss": 1.5104, |
|
"odds_ratio_loss": 0.6885947585105896, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.14415593445301056, |
|
"rewards/margins": 0.0244146715849638, |
|
"rewards/rejected": -0.1685706079006195, |
|
"sft_loss": 1.4415591955184937, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4572396274343776, |
|
"grad_norm": 1.4364287853240967, |
|
"learning_rate": 4.718553561788339e-06, |
|
"logits/chosen": 247.25503540039062, |
|
"logits/rejected": 247.4564971923828, |
|
"logps/chosen": -1.3397562503814697, |
|
"logps/rejected": -1.5836654901504517, |
|
"loss": 1.4039, |
|
"odds_ratio_loss": 0.6409639120101929, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13397563993930817, |
|
"rewards/margins": 0.024390894919633865, |
|
"rewards/rejected": -0.15836653113365173, |
|
"sft_loss": 1.3397562503814697, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4741744284504657, |
|
"grad_norm": 1.979526162147522, |
|
"learning_rate": 4.697749961489822e-06, |
|
"logits/chosen": 246.6254425048828, |
|
"logits/rejected": 246.77536010742188, |
|
"logps/chosen": -1.5182511806488037, |
|
"logps/rejected": -1.7493031024932861, |
|
"loss": 1.5866, |
|
"odds_ratio_loss": 0.6837342381477356, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.15182514488697052, |
|
"rewards/margins": 0.023105164989829063, |
|
"rewards/rejected": -0.17493028938770294, |
|
"sft_loss": 1.5182511806488037, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4911092294665538, |
|
"grad_norm": 2.0010595321655273, |
|
"learning_rate": 4.67625394189013e-06, |
|
"logits/chosen": 246.59414672851562, |
|
"logits/rejected": 246.92410278320312, |
|
"logps/chosen": -1.317578673362732, |
|
"logps/rejected": -1.5447911024093628, |
|
"loss": 1.381, |
|
"odds_ratio_loss": 0.6341525316238403, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.13175788521766663, |
|
"rewards/margins": 0.022721242159605026, |
|
"rewards/rejected": -0.15447911620140076, |
|
"sft_loss": 1.317578673362732, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5080440304826418, |
|
"grad_norm": 3.1077401638031006, |
|
"learning_rate": 4.654072275488016e-06, |
|
"logits/chosen": 248.23709106445312, |
|
"logits/rejected": 248.05142211914062, |
|
"logps/chosen": -1.2916858196258545, |
|
"logps/rejected": -1.4983139038085938, |
|
"loss": 1.3555, |
|
"odds_ratio_loss": 0.6378514766693115, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1291685849428177, |
|
"rewards/margins": 0.020662816241383553, |
|
"rewards/rejected": -0.14983141422271729, |
|
"sft_loss": 1.2916858196258545, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5249788314987299, |
|
"grad_norm": 2.628451108932495, |
|
"learning_rate": 4.631211950800925e-06, |
|
"logits/chosen": 248.5096435546875, |
|
"logits/rejected": 248.2730712890625, |
|
"logps/chosen": -1.3065412044525146, |
|
"logps/rejected": -1.4278388023376465, |
|
"loss": 1.3779, |
|
"odds_ratio_loss": 0.7139269709587097, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.13065412640571594, |
|
"rewards/margins": 0.012129749171435833, |
|
"rewards/rejected": -0.14278386533260345, |
|
"sft_loss": 1.3065412044525146, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.541913632514818, |
|
"grad_norm": 2.022738218307495, |
|
"learning_rate": 4.6076801701632095e-06, |
|
"logits/chosen": 250.45730590820312, |
|
"logits/rejected": 249.79464721679688, |
|
"logps/chosen": -1.3189256191253662, |
|
"logps/rejected": -1.4538644552230835, |
|
"loss": 1.3894, |
|
"odds_ratio_loss": 0.7043124437332153, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.13189253211021423, |
|
"rewards/margins": 0.013493897393345833, |
|
"rewards/rejected": -0.1453864425420761, |
|
"sft_loss": 1.3189256191253662, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.558848433530906, |
|
"grad_norm": 1.9951550960540771, |
|
"learning_rate": 4.583484347456972e-06, |
|
"logits/chosen": 247.82229614257812, |
|
"logits/rejected": 248.18814086914062, |
|
"logps/chosen": -1.4347447156906128, |
|
"logps/rejected": -1.533195972442627, |
|
"loss": 1.5114, |
|
"odds_ratio_loss": 0.7667852640151978, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.14347447454929352, |
|
"rewards/margins": 0.00984511710703373, |
|
"rewards/rejected": -0.1533195823431015, |
|
"sft_loss": 1.4347447156906128, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5757832345469941, |
|
"grad_norm": 1.1787029504776, |
|
"learning_rate": 4.55863210577626e-06, |
|
"logits/chosen": 248.49844360351562, |
|
"logits/rejected": 249.04006958007812, |
|
"logps/chosen": -1.3999149799346924, |
|
"logps/rejected": -1.6324024200439453, |
|
"loss": 1.4655, |
|
"odds_ratio_loss": 0.6553832292556763, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.13999152183532715, |
|
"rewards/margins": 0.02324872463941574, |
|
"rewards/rejected": -0.1632402390241623, |
|
"sft_loss": 1.3999149799346924, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5927180355630821, |
|
"grad_norm": 1.3341506719589233, |
|
"learning_rate": 4.5331312750253465e-06, |
|
"logits/chosen": 250.0833740234375, |
|
"logits/rejected": 249.986328125, |
|
"logps/chosen": -1.353328824043274, |
|
"logps/rejected": -1.4628279209136963, |
|
"loss": 1.4282, |
|
"odds_ratio_loss": 0.7489296197891235, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1353328824043274, |
|
"rewards/margins": 0.010949901305139065, |
|
"rewards/rejected": -0.14628279209136963, |
|
"sft_loss": 1.353328824043274, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6096528365791702, |
|
"grad_norm": 7.017007827758789, |
|
"learning_rate": 4.506989889451858e-06, |
|
"logits/chosen": 250.78952026367188, |
|
"logits/rejected": 251.0829315185547, |
|
"logps/chosen": -1.3330605030059814, |
|
"logps/rejected": -1.492555022239685, |
|
"loss": 1.4015, |
|
"odds_ratio_loss": 0.6845325231552124, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.13330607116222382, |
|
"rewards/margins": 0.01594943180680275, |
|
"rewards/rejected": -0.14925549924373627, |
|
"sft_loss": 1.3330605030059814, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6265876375952583, |
|
"grad_norm": 2.496628761291504, |
|
"learning_rate": 4.480216185115512e-06, |
|
"logits/chosen": 251.03042602539062, |
|
"logits/rejected": 251.23403930664062, |
|
"logps/chosen": -1.3291356563568115, |
|
"logps/rejected": -1.5497068166732788, |
|
"loss": 1.3933, |
|
"odds_ratio_loss": 0.641234278678894, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13291357457637787, |
|
"rewards/margins": 0.022057104855775833, |
|
"rewards/rejected": -0.1549706757068634, |
|
"sft_loss": 1.3291356563568115, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6435224386113463, |
|
"grad_norm": 1.7296651601791382, |
|
"learning_rate": 4.4528185972932856e-06, |
|
"logits/chosen": 249.67446899414062, |
|
"logits/rejected": 250.25466918945312, |
|
"logps/chosen": -1.3678677082061768, |
|
"logps/rejected": -1.639181137084961, |
|
"loss": 1.4346, |
|
"odds_ratio_loss": 0.6676440834999084, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13678675889968872, |
|
"rewards/margins": 0.02713136002421379, |
|
"rewards/rejected": -0.1639181226491928, |
|
"sft_loss": 1.3678677082061768, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6604572396274344, |
|
"grad_norm": 5.801638126373291, |
|
"learning_rate": 4.424805757821803e-06, |
|
"logits/chosen": 249.99923706054688, |
|
"logits/rejected": 250.46279907226562, |
|
"logps/chosen": -1.4075113534927368, |
|
"logps/rejected": -1.539193868637085, |
|
"loss": 1.4777, |
|
"odds_ratio_loss": 0.7016426920890808, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14075112342834473, |
|
"rewards/margins": 0.013168272562325, |
|
"rewards/rejected": -0.15391941368579865, |
|
"sft_loss": 1.4075113534927368, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6773920406435224, |
|
"grad_norm": 2.283698558807373, |
|
"learning_rate": 4.396186492377812e-06, |
|
"logits/chosen": 250.4367218017578, |
|
"logits/rejected": 250.3822479248047, |
|
"logps/chosen": -1.3555715084075928, |
|
"logps/rejected": -1.6237144470214844, |
|
"loss": 1.4187, |
|
"odds_ratio_loss": 0.631491482257843, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1355571299791336, |
|
"rewards/margins": 0.026814306154847145, |
|
"rewards/rejected": -0.1623714417219162, |
|
"sft_loss": 1.3555715084075928, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6943268416596104, |
|
"grad_norm": 3.2836830615997314, |
|
"learning_rate": 4.366969817697578e-06, |
|
"logits/chosen": 249.8821563720703, |
|
"logits/rejected": 250.1019744873047, |
|
"logps/chosen": -1.3262561559677124, |
|
"logps/rejected": -1.481173038482666, |
|
"loss": 1.397, |
|
"odds_ratio_loss": 0.7070230841636658, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13262560963630676, |
|
"rewards/margins": 0.015491697005927563, |
|
"rewards/rejected": -0.1481173038482666, |
|
"sft_loss": 1.3262561559677124, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7112616426756986, |
|
"grad_norm": 2.213815689086914, |
|
"learning_rate": 4.337164938736086e-06, |
|
"logits/chosen": 250.5995635986328, |
|
"logits/rejected": 250.66165161132812, |
|
"logps/chosen": -1.3863251209259033, |
|
"logps/rejected": -1.513422966003418, |
|
"loss": 1.4589, |
|
"odds_ratio_loss": 0.7256019115447998, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.13863249123096466, |
|
"rewards/margins": 0.012709791772067547, |
|
"rewards/rejected": -0.15134228765964508, |
|
"sft_loss": 1.3863251209259033, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7281964436917866, |
|
"grad_norm": 1.0603761672973633, |
|
"learning_rate": 4.306781245766945e-06, |
|
"logits/chosen": 250.349853515625, |
|
"logits/rejected": 250.0968780517578, |
|
"logps/chosen": -1.2664977312088013, |
|
"logps/rejected": -1.475754976272583, |
|
"loss": 1.3367, |
|
"odds_ratio_loss": 0.7022702097892761, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12664976716041565, |
|
"rewards/margins": 0.0209257360547781, |
|
"rewards/rejected": -0.1475754976272583, |
|
"sft_loss": 1.2664977312088013, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7451312447078747, |
|
"grad_norm": 4.412312984466553, |
|
"learning_rate": 4.275828311423903e-06, |
|
"logits/chosen": 249.72463989257812, |
|
"logits/rejected": 250.4665985107422, |
|
"logps/chosen": -1.4287374019622803, |
|
"logps/rejected": -1.5396887063980103, |
|
"loss": 1.4987, |
|
"odds_ratio_loss": 0.6998986005783081, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.14287373423576355, |
|
"rewards/margins": 0.011095147579908371, |
|
"rewards/rejected": -0.15396887063980103, |
|
"sft_loss": 1.4287374019622803, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7620660457239627, |
|
"grad_norm": 6.307819843292236, |
|
"learning_rate": 4.244315887684912e-06, |
|
"logits/chosen": 250.8037109375, |
|
"logits/rejected": 251.0100555419922, |
|
"logps/chosen": -1.242921233177185, |
|
"logps/rejected": -1.4474495649337769, |
|
"loss": 1.3115, |
|
"odds_ratio_loss": 0.6855745911598206, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12429212033748627, |
|
"rewards/margins": 0.020452830940485, |
|
"rewards/rejected": -0.14474496245384216, |
|
"sft_loss": 1.242921233177185, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7790008467400508, |
|
"grad_norm": 1.9661298990249634, |
|
"learning_rate": 4.212253902799685e-06, |
|
"logits/chosen": 250.1886749267578, |
|
"logits/rejected": 250.97207641601562, |
|
"logps/chosen": -1.3525545597076416, |
|
"logps/rejected": -1.6002397537231445, |
|
"loss": 1.4203, |
|
"odds_ratio_loss": 0.6769625544548035, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.13525545597076416, |
|
"rewards/margins": 0.024768516421318054, |
|
"rewards/rejected": -0.16002397239208221, |
|
"sft_loss": 1.3525545597076416, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7959356477561389, |
|
"grad_norm": 2.6013875007629395, |
|
"learning_rate": 4.179652458161718e-06, |
|
"logits/chosen": 250.9954376220703, |
|
"logits/rejected": 250.8687286376953, |
|
"logps/chosen": -1.3839843273162842, |
|
"logps/rejected": -1.4946000576019287, |
|
"loss": 1.4574, |
|
"odds_ratio_loss": 0.7343354821205139, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1383984386920929, |
|
"rewards/margins": 0.011061567813158035, |
|
"rewards/rejected": -0.14946000277996063, |
|
"sft_loss": 1.3839843273162842, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8128704487722269, |
|
"grad_norm": 1.4564313888549805, |
|
"learning_rate": 4.146521825125765e-06, |
|
"logits/chosen": 251.6759796142578, |
|
"logits/rejected": 251.0865478515625, |
|
"logps/chosen": -1.3470927476882935, |
|
"logps/rejected": -1.5811121463775635, |
|
"loss": 1.4152, |
|
"odds_ratio_loss": 0.6814435720443726, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13470926880836487, |
|
"rewards/margins": 0.023401936516165733, |
|
"rewards/rejected": -0.15811121463775635, |
|
"sft_loss": 1.3470927476882935, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8298052497883149, |
|
"grad_norm": 2.20600962638855, |
|
"learning_rate": 4.11287244177176e-06, |
|
"logits/chosen": 252.32333374023438, |
|
"logits/rejected": 252.3999481201172, |
|
"logps/chosen": -1.313819408416748, |
|
"logps/rejected": -1.5711250305175781, |
|
"loss": 1.3752, |
|
"odds_ratio_loss": 0.6139523386955261, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13138195872306824, |
|
"rewards/margins": 0.0257305596023798, |
|
"rewards/rejected": -0.1571125090122223, |
|
"sft_loss": 1.313819408416748, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8467400508044031, |
|
"grad_norm": 6.151379108428955, |
|
"learning_rate": 4.078714909616215e-06, |
|
"logits/chosen": 250.7323760986328, |
|
"logits/rejected": 250.898681640625, |
|
"logps/chosen": -1.3562355041503906, |
|
"logps/rejected": -1.6217498779296875, |
|
"loss": 1.422, |
|
"odds_ratio_loss": 0.6572010517120361, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13562354445457458, |
|
"rewards/margins": 0.026551440358161926, |
|
"rewards/rejected": -0.1621749848127365, |
|
"sft_loss": 1.3562355041503906, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8467400508044031, |
|
"eval_logits/chosen": 250.27386474609375, |
|
"eval_logits/rejected": 250.56336975097656, |
|
"eval_logps/chosen": -1.3222252130508423, |
|
"eval_logps/rejected": -1.5458606481552124, |
|
"eval_loss": 1.3895596265792847, |
|
"eval_odds_ratio_loss": 0.6733438968658447, |
|
"eval_rewards/accuracies": 0.5752381086349487, |
|
"eval_rewards/chosen": -0.1322225034236908, |
|
"eval_rewards/margins": 0.022363554686307907, |
|
"eval_rewards/rejected": -0.1545860767364502, |
|
"eval_runtime": 222.405, |
|
"eval_samples_per_second": 4.721, |
|
"eval_sft_loss": 1.3222252130508423, |
|
"eval_steps_per_second": 2.361, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8636748518204911, |
|
"grad_norm": 7.651697635650635, |
|
"learning_rate": 4.044059990272125e-06, |
|
"logits/chosen": 250.82901000976562, |
|
"logits/rejected": 250.9202423095703, |
|
"logps/chosen": -1.3385546207427979, |
|
"logps/rejected": -1.5796321630477905, |
|
"loss": 1.4082, |
|
"odds_ratio_loss": 0.6965717077255249, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.13385547697544098, |
|
"rewards/margins": 0.024107756093144417, |
|
"rewards/rejected": -0.15796321630477905, |
|
"sft_loss": 1.3385546207427979, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8806096528365792, |
|
"grad_norm": 2.0727665424346924, |
|
"learning_rate": 4.0089186020584345e-06, |
|
"logits/chosen": 250.8845977783203, |
|
"logits/rejected": 251.09323120117188, |
|
"logps/chosen": -1.3856604099273682, |
|
"logps/rejected": -1.521828055381775, |
|
"loss": 1.453, |
|
"odds_ratio_loss": 0.6737630367279053, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1385660469532013, |
|
"rewards/margins": 0.013616764917969704, |
|
"rewards/rejected": -0.15218280255794525, |
|
"sft_loss": 1.3856604099273682, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8975444538526672, |
|
"grad_norm": 1.6497074365615845, |
|
"learning_rate": 3.973301816560124e-06, |
|
"logits/chosen": 250.5475616455078, |
|
"logits/rejected": 251.07559204101562, |
|
"logps/chosen": -1.2916876077651978, |
|
"logps/rejected": -1.5535304546356201, |
|
"loss": 1.355, |
|
"odds_ratio_loss": 0.6329156160354614, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.12916876375675201, |
|
"rewards/margins": 0.026184294372797012, |
|
"rewards/rejected": -0.15535303950309753, |
|
"sft_loss": 1.2916876077651978, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9144792548687553, |
|
"grad_norm": 2.159715414047241, |
|
"learning_rate": 3.937220855140021e-06, |
|
"logits/chosen": 252.92514038085938, |
|
"logits/rejected": 252.7586212158203, |
|
"logps/chosen": -1.2703886032104492, |
|
"logps/rejected": -1.4452732801437378, |
|
"loss": 1.3383, |
|
"odds_ratio_loss": 0.6788212060928345, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1270388662815094, |
|
"rewards/margins": 0.017488475888967514, |
|
"rewards/rejected": -0.14452733099460602, |
|
"sft_loss": 1.2703886032104492, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9314140558848434, |
|
"grad_norm": 1.6743594408035278, |
|
"learning_rate": 3.900687085403418e-06, |
|
"logits/chosen": 251.9027099609375, |
|
"logits/rejected": 251.82095336914062, |
|
"logps/chosen": -1.2356189489364624, |
|
"logps/rejected": -1.3227603435516357, |
|
"loss": 1.3057, |
|
"odds_ratio_loss": 0.7003273963928223, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12356189638376236, |
|
"rewards/margins": 0.008714134804904461, |
|
"rewards/rejected": -0.1322760283946991, |
|
"sft_loss": 1.2356189489364624, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9483488569009314, |
|
"grad_norm": 2.0650203227996826, |
|
"learning_rate": 3.863712017616614e-06, |
|
"logits/chosen": 252.4972686767578, |
|
"logits/rejected": 253.4659881591797, |
|
"logps/chosen": -1.33904230594635, |
|
"logps/rejected": -1.5850245952606201, |
|
"loss": 1.4043, |
|
"odds_ratio_loss": 0.6524351835250854, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13390421867370605, |
|
"rewards/margins": 0.024598244577646255, |
|
"rewards/rejected": -0.158502459526062, |
|
"sft_loss": 1.33904230594635, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9652836579170194, |
|
"grad_norm": 4.395691394805908, |
|
"learning_rate": 3.826307301080504e-06, |
|
"logits/chosen": 250.24227905273438, |
|
"logits/rejected": 250.7295379638672, |
|
"logps/chosen": -1.3605867624282837, |
|
"logps/rejected": -1.592639684677124, |
|
"loss": 1.436, |
|
"odds_ratio_loss": 0.7545104622840881, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13605865836143494, |
|
"rewards/margins": 0.02320530079305172, |
|
"rewards/rejected": -0.1592639982700348, |
|
"sft_loss": 1.3605867624282837, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9822184589331076, |
|
"grad_norm": 2.8619537353515625, |
|
"learning_rate": 3.7884847204603775e-06, |
|
"logits/chosen": 252.0938262939453, |
|
"logits/rejected": 252.2036895751953, |
|
"logps/chosen": -1.2960541248321533, |
|
"logps/rejected": -1.4377543926239014, |
|
"loss": 1.3661, |
|
"odds_ratio_loss": 0.7007730007171631, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.12960541248321533, |
|
"rewards/margins": 0.014170033857226372, |
|
"rewards/rejected": -0.14377544820308685, |
|
"sft_loss": 1.2960541248321533, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9991532599491956, |
|
"grad_norm": 2.728938102722168, |
|
"learning_rate": 3.750256192073058e-06, |
|
"logits/chosen": 252.297607421875, |
|
"logits/rejected": 252.3206024169922, |
|
"logps/chosen": -1.4331130981445312, |
|
"logps/rejected": -1.4828779697418213, |
|
"loss": 1.5094, |
|
"odds_ratio_loss": 0.7630207538604736, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1433113068342209, |
|
"rewards/margins": 0.004976483527570963, |
|
"rewards/rejected": -0.1482878029346466, |
|
"sft_loss": 1.4331130981445312, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.0160880609652836, |
|
"grad_norm": 4.769712924957275, |
|
"learning_rate": 3.7116337601325715e-06, |
|
"logits/chosen": 251.11611938476562, |
|
"logits/rejected": 251.3095245361328, |
|
"logps/chosen": -1.2182286977767944, |
|
"logps/rejected": -1.3984460830688477, |
|
"loss": 1.285, |
|
"odds_ratio_loss": 0.6677871346473694, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12182287871837616, |
|
"rewards/margins": 0.01802174374461174, |
|
"rewards/rejected": -0.139844611287117, |
|
"sft_loss": 1.2182286977767944, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0330228619813717, |
|
"grad_norm": 2.1474721431732178, |
|
"learning_rate": 3.6726295929555154e-06, |
|
"logits/chosen": 251.42453002929688, |
|
"logits/rejected": 252.20944213867188, |
|
"logps/chosen": -1.2515238523483276, |
|
"logps/rejected": -1.3369293212890625, |
|
"loss": 1.3216, |
|
"odds_ratio_loss": 0.7008894085884094, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12515239417552948, |
|
"rewards/margins": 0.008540543727576733, |
|
"rewards/rejected": -0.1336929351091385, |
|
"sft_loss": 1.2515238523483276, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0499576629974599, |
|
"grad_norm": 2.5692148208618164, |
|
"learning_rate": 3.6332559791273307e-06, |
|
"logits/chosen": 251.50045776367188, |
|
"logits/rejected": 251.7969970703125, |
|
"logps/chosen": -1.236567735671997, |
|
"logps/rejected": -1.4078432321548462, |
|
"loss": 1.3014, |
|
"odds_ratio_loss": 0.6482684016227722, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.12365677207708359, |
|
"rewards/margins": 0.01712755486369133, |
|
"rewards/rejected": -0.14078432321548462, |
|
"sft_loss": 1.236567735671997, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0668924640135478, |
|
"grad_norm": 3.342146635055542, |
|
"learning_rate": 3.593525323630681e-06, |
|
"logits/chosen": 251.15097045898438, |
|
"logits/rejected": 251.6619110107422, |
|
"logps/chosen": -1.2767521142959595, |
|
"logps/rejected": -1.4714807271957397, |
|
"loss": 1.34, |
|
"odds_ratio_loss": 0.6320163607597351, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12767520546913147, |
|
"rewards/margins": 0.019472863525152206, |
|
"rewards/rejected": -0.14714807271957397, |
|
"sft_loss": 1.2767521142959595, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.083827265029636, |
|
"grad_norm": 1.7750215530395508, |
|
"learning_rate": 3.5534501439371615e-06, |
|
"logits/chosen": 253.18130493164062, |
|
"logits/rejected": 253.9321746826172, |
|
"logps/chosen": -1.2172341346740723, |
|
"logps/rejected": -1.4529893398284912, |
|
"loss": 1.282, |
|
"odds_ratio_loss": 0.6480832099914551, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12172339856624603, |
|
"rewards/margins": 0.023575523868203163, |
|
"rewards/rejected": -0.14529892802238464, |
|
"sft_loss": 1.2172341346740723, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.100762066045724, |
|
"grad_norm": 3.0737292766571045, |
|
"learning_rate": 3.5130430660635633e-06, |
|
"logits/chosen": 253.6792449951172, |
|
"logits/rejected": 253.87606811523438, |
|
"logps/chosen": -1.2902581691741943, |
|
"logps/rejected": -1.4731221199035645, |
|
"loss": 1.3556, |
|
"odds_ratio_loss": 0.6539097428321838, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12902581691741943, |
|
"rewards/margins": 0.01828640140593052, |
|
"rewards/rejected": -0.1473122239112854, |
|
"sft_loss": 1.2902581691741943, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.117696867061812, |
|
"grad_norm": 1.3352795839309692, |
|
"learning_rate": 3.4723168205939444e-06, |
|
"logits/chosen": 252.404541015625, |
|
"logits/rejected": 252.8050994873047, |
|
"logps/chosen": -1.2593786716461182, |
|
"logps/rejected": -1.3454132080078125, |
|
"loss": 1.3296, |
|
"odds_ratio_loss": 0.7021333575248718, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12593786418437958, |
|
"rewards/margins": 0.008603455498814583, |
|
"rewards/rejected": -0.134541317820549, |
|
"sft_loss": 1.2593786716461182, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1346316680779, |
|
"grad_norm": 1.4886754751205444, |
|
"learning_rate": 3.431284238668754e-06, |
|
"logits/chosen": 252.7022247314453, |
|
"logits/rejected": 253.0660858154297, |
|
"logps/chosen": -1.344042420387268, |
|
"logps/rejected": -1.5089060068130493, |
|
"loss": 1.4141, |
|
"odds_ratio_loss": 0.7006558775901794, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.134404256939888, |
|
"rewards/margins": 0.01648634299635887, |
|
"rewards/rejected": -0.15089061856269836, |
|
"sft_loss": 1.344042420387268, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.1515664690939882, |
|
"grad_norm": 8.335692405700684, |
|
"learning_rate": 3.389958247942274e-06, |
|
"logits/chosen": 251.40444946289062, |
|
"logits/rejected": 252.54983520507812, |
|
"logps/chosen": -1.358946442604065, |
|
"logps/rejected": -1.5931769609451294, |
|
"loss": 1.4306, |
|
"odds_ratio_loss": 0.7168671488761902, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.13589465618133545, |
|
"rewards/margins": 0.023423049598932266, |
|
"rewards/rejected": -0.15931770205497742, |
|
"sft_loss": 1.358946442604065, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.168501270110076, |
|
"grad_norm": 2.2777557373046875, |
|
"learning_rate": 3.3483518685096588e-06, |
|
"logits/chosen": 252.7519989013672, |
|
"logits/rejected": 252.7262420654297, |
|
"logps/chosen": -1.313045859336853, |
|
"logps/rejected": -1.4913846254348755, |
|
"loss": 1.3792, |
|
"odds_ratio_loss": 0.6616145372390747, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13130459189414978, |
|
"rewards/margins": 0.017833886668086052, |
|
"rewards/rejected": -0.14913848042488098, |
|
"sft_loss": 1.313045859336853, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1854360711261642, |
|
"grad_norm": 1.7483341693878174, |
|
"learning_rate": 3.306478208804839e-06, |
|
"logits/chosen": 251.73300170898438, |
|
"logits/rejected": 252.00765991210938, |
|
"logps/chosen": -1.3198022842407227, |
|
"logps/rejected": -1.5368399620056152, |
|
"loss": 1.3902, |
|
"odds_ratio_loss": 0.7039870023727417, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.13198022544384003, |
|
"rewards/margins": 0.02170378342270851, |
|
"rewards/rejected": -0.15368400514125824, |
|
"sft_loss": 1.3198022842407227, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2023708721422524, |
|
"grad_norm": 3.1045162677764893, |
|
"learning_rate": 3.264350461470608e-06, |
|
"logits/chosen": 251.3062744140625, |
|
"logits/rejected": 251.24673461914062, |
|
"logps/chosen": -1.191873550415039, |
|
"logps/rejected": -1.5103055238723755, |
|
"loss": 1.2531, |
|
"odds_ratio_loss": 0.6123217344284058, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1191873699426651, |
|
"rewards/margins": 0.031843192875385284, |
|
"rewards/rejected": -0.15103057026863098, |
|
"sft_loss": 1.191873550415039, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2193056731583405, |
|
"grad_norm": 4.187674522399902, |
|
"learning_rate": 3.2219818992021685e-06, |
|
"logits/chosen": 250.6509246826172, |
|
"logits/rejected": 250.8859100341797, |
|
"logps/chosen": -1.1646994352340698, |
|
"logps/rejected": -1.4936308860778809, |
|
"loss": 1.2266, |
|
"odds_ratio_loss": 0.6186606884002686, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.11646995693445206, |
|
"rewards/margins": 0.032893143594264984, |
|
"rewards/rejected": -0.14936310052871704, |
|
"sft_loss": 1.1646994352340698, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.2362404741744284, |
|
"grad_norm": 6.236975193023682, |
|
"learning_rate": 3.1793858705654595e-06, |
|
"logits/chosen": 252.42489624023438, |
|
"logits/rejected": 252.6690673828125, |
|
"logps/chosen": -1.1537976264953613, |
|
"logps/rejected": -1.407354712486267, |
|
"loss": 1.2166, |
|
"odds_ratio_loss": 0.6279497146606445, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11537975072860718, |
|
"rewards/margins": 0.02535572089254856, |
|
"rewards/rejected": -0.1407354772090912, |
|
"sft_loss": 1.1537976264953613, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.2531752751905165, |
|
"grad_norm": 4.287051677703857, |
|
"learning_rate": 3.1365757957915787e-06, |
|
"logits/chosen": 252.34127807617188, |
|
"logits/rejected": 252.6292724609375, |
|
"logps/chosen": -1.2895433902740479, |
|
"logps/rejected": -1.4524219036102295, |
|
"loss": 1.3549, |
|
"odds_ratio_loss": 0.6531358957290649, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12895433604717255, |
|
"rewards/margins": 0.016287846490740776, |
|
"rewards/rejected": -0.14524218440055847, |
|
"sft_loss": 1.2895433902740479, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2701100762066047, |
|
"grad_norm": 3.4889471530914307, |
|
"learning_rate": 3.093565162548633e-06, |
|
"logits/chosen": 252.25082397460938, |
|
"logits/rejected": 252.3234405517578, |
|
"logps/chosen": -1.3181928396224976, |
|
"logps/rejected": -1.5464909076690674, |
|
"loss": 1.3886, |
|
"odds_ratio_loss": 0.7041035890579224, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.13181930780410767, |
|
"rewards/margins": 0.02282978780567646, |
|
"rewards/rejected": -0.15464909374713898, |
|
"sft_loss": 1.3181928396224976, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2870448772226926, |
|
"grad_norm": 2.6276607513427734, |
|
"learning_rate": 3.0503675216923294e-06, |
|
"logits/chosen": 252.7392120361328, |
|
"logits/rejected": 253.15933227539062, |
|
"logps/chosen": -1.1722157001495361, |
|
"logps/rejected": -1.3820818662643433, |
|
"loss": 1.2364, |
|
"odds_ratio_loss": 0.642305850982666, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11722154915332794, |
|
"rewards/margins": 0.020986629649996758, |
|
"rewards/rejected": -0.13820818066596985, |
|
"sft_loss": 1.1722157001495361, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.3039796782387807, |
|
"grad_norm": 2.102112054824829, |
|
"learning_rate": 3.0069964829966748e-06, |
|
"logits/chosen": 251.8651885986328, |
|
"logits/rejected": 252.33065795898438, |
|
"logps/chosen": -1.238761305809021, |
|
"logps/rejected": -1.385432481765747, |
|
"loss": 1.3081, |
|
"odds_ratio_loss": 0.6930680274963379, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.12387613952159882, |
|
"rewards/margins": 0.014667103998363018, |
|
"rewards/rejected": -0.1385432332754135, |
|
"sft_loss": 1.238761305809021, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3209144792548688, |
|
"grad_norm": 1.8301111459732056, |
|
"learning_rate": 2.963465710866094e-06, |
|
"logits/chosen": 251.42507934570312, |
|
"logits/rejected": 251.70413208007812, |
|
"logps/chosen": -1.2377374172210693, |
|
"logps/rejected": -1.572333574295044, |
|
"loss": 1.2996, |
|
"odds_ratio_loss": 0.6182882189750671, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1237737163901329, |
|
"rewards/margins": 0.0334596149623394, |
|
"rewards/rejected": -0.1572333723306656, |
|
"sft_loss": 1.2377374172210693, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.337849280270957, |
|
"grad_norm": 7.058846473693848, |
|
"learning_rate": 2.919788920030357e-06, |
|
"logits/chosen": 253.6296844482422, |
|
"logits/rejected": 254.1488800048828, |
|
"logps/chosen": -1.2885361909866333, |
|
"logps/rejected": -1.4078810214996338, |
|
"loss": 1.3598, |
|
"odds_ratio_loss": 0.7127668857574463, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.12885363399982452, |
|
"rewards/margins": 0.011934494599699974, |
|
"rewards/rejected": -0.14078812301158905, |
|
"sft_loss": 1.2885361909866333, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.3547840812870449, |
|
"grad_norm": 3.2417685985565186, |
|
"learning_rate": 2.8759798712236303e-06, |
|
"logits/chosen": 253.9468231201172, |
|
"logits/rejected": 254.49008178710938, |
|
"logps/chosen": -1.2510448694229126, |
|
"logps/rejected": -1.554084300994873, |
|
"loss": 1.3164, |
|
"odds_ratio_loss": 0.6536312103271484, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12510448694229126, |
|
"rewards/margins": 0.030303955078125, |
|
"rewards/rejected": -0.15540844202041626, |
|
"sft_loss": 1.2510448694229126, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.371718882303133, |
|
"grad_norm": 1.9042048454284668, |
|
"learning_rate": 2.8320523668490507e-06, |
|
"logits/chosen": 253.7815704345703, |
|
"logits/rejected": 254.0332489013672, |
|
"logps/chosen": -1.2883363962173462, |
|
"logps/rejected": -1.4249004125595093, |
|
"loss": 1.3613, |
|
"odds_ratio_loss": 0.7295271754264832, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12883363664150238, |
|
"rewards/margins": 0.01365639828145504, |
|
"rewards/rejected": -0.14249004423618317, |
|
"sft_loss": 1.2883363962173462, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.388653683319221, |
|
"grad_norm": 3.2637712955474854, |
|
"learning_rate": 2.7880202466301597e-06, |
|
"logits/chosen": 253.0078125, |
|
"logits/rejected": 253.09902954101562, |
|
"logps/chosen": -1.2434396743774414, |
|
"logps/rejected": -1.434788465499878, |
|
"loss": 1.3141, |
|
"odds_ratio_loss": 0.7062419652938843, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12434396892786026, |
|
"rewards/margins": 0.019134875386953354, |
|
"rewards/rejected": -0.14347884058952332, |
|
"sft_loss": 1.2434396743774414, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.405588484335309, |
|
"grad_norm": 2.8521387577056885, |
|
"learning_rate": 2.7438973832505854e-06, |
|
"logits/chosen": 251.6912841796875, |
|
"logits/rejected": 252.3306121826172, |
|
"logps/chosen": -1.175612211227417, |
|
"logps/rejected": -1.4439500570297241, |
|
"loss": 1.2387, |
|
"odds_ratio_loss": 0.6307461857795715, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1175612211227417, |
|
"rewards/margins": 0.026833802461624146, |
|
"rewards/rejected": -0.14439500868320465, |
|
"sft_loss": 1.175612211227417, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4225232853513972, |
|
"grad_norm": 8.145666122436523, |
|
"learning_rate": 2.699697677983341e-06, |
|
"logits/chosen": 253.7397918701172, |
|
"logits/rejected": 253.99203491210938, |
|
"logps/chosen": -1.2202482223510742, |
|
"logps/rejected": -1.2863436937332153, |
|
"loss": 1.2952, |
|
"odds_ratio_loss": 0.7496277093887329, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.12202481925487518, |
|
"rewards/margins": 0.006609548814594746, |
|
"rewards/rejected": -0.12863437831401825, |
|
"sft_loss": 1.2202482223510742, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.4394580863674853, |
|
"grad_norm": 8.698111534118652, |
|
"learning_rate": 2.6554350563111115e-06, |
|
"logits/chosen": 252.4715118408203, |
|
"logits/rejected": 252.827392578125, |
|
"logps/chosen": -1.286290168762207, |
|
"logps/rejected": -1.4043002128601074, |
|
"loss": 1.3605, |
|
"odds_ratio_loss": 0.7424478530883789, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12862902879714966, |
|
"rewards/margins": 0.011800997890532017, |
|
"rewards/rejected": -0.1404300183057785, |
|
"sft_loss": 1.286290168762207, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4563928873835732, |
|
"grad_norm": 5.0890398025512695, |
|
"learning_rate": 2.611123463538913e-06, |
|
"logits/chosen": 251.801513671875, |
|
"logits/rejected": 251.6995391845703, |
|
"logps/chosen": -1.1361697912216187, |
|
"logps/rejected": -1.4048140048980713, |
|
"loss": 1.1998, |
|
"odds_ratio_loss": 0.6365838050842285, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.11361698061227798, |
|
"rewards/margins": 0.02686440572142601, |
|
"rewards/rejected": -0.1404813826084137, |
|
"sft_loss": 1.1361697912216187, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4733276883996613, |
|
"grad_norm": 2.2515621185302734, |
|
"learning_rate": 2.566776860400514e-06, |
|
"logits/chosen": 253.1812286376953, |
|
"logits/rejected": 253.3629913330078, |
|
"logps/chosen": -1.3416874408721924, |
|
"logps/rejected": -1.5248029232025146, |
|
"loss": 1.4085, |
|
"odds_ratio_loss": 0.6685901880264282, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.13416874408721924, |
|
"rewards/margins": 0.01831156201660633, |
|
"rewards/rejected": -0.15248030424118042, |
|
"sft_loss": 1.3416874408721924, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4902624894157492, |
|
"grad_norm": 1.626927137374878, |
|
"learning_rate": 2.522409218659989e-06, |
|
"logits/chosen": 254.90966796875, |
|
"logits/rejected": 255.2332305908203, |
|
"logps/chosen": -1.2846667766571045, |
|
"logps/rejected": -1.4117066860198975, |
|
"loss": 1.3537, |
|
"odds_ratio_loss": 0.6902014017105103, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1284666806459427, |
|
"rewards/margins": 0.012703998014330864, |
|
"rewards/rejected": -0.1411706656217575, |
|
"sft_loss": 1.2846667766571045, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.5071972904318374, |
|
"grad_norm": 2.0912625789642334, |
|
"learning_rate": 2.4780345167097976e-06, |
|
"logits/chosen": 252.5232696533203, |
|
"logits/rejected": 253.27197265625, |
|
"logps/chosen": -1.2608062028884888, |
|
"logps/rejected": -1.5506179332733154, |
|
"loss": 1.3254, |
|
"odds_ratio_loss": 0.6455532908439636, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12608060240745544, |
|
"rewards/margins": 0.02898118831217289, |
|
"rewards/rejected": -0.15506179630756378, |
|
"sft_loss": 1.2608062028884888, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.5241320914479255, |
|
"grad_norm": 2.230842351913452, |
|
"learning_rate": 2.4336667351667747e-06, |
|
"logits/chosen": 254.10025024414062, |
|
"logits/rejected": 254.30361938476562, |
|
"logps/chosen": -1.2958338260650635, |
|
"logps/rejected": -1.5468194484710693, |
|
"loss": 1.358, |
|
"odds_ratio_loss": 0.6219068765640259, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.12958340346813202, |
|
"rewards/margins": 0.025098532438278198, |
|
"rewards/rejected": -0.1546819508075714, |
|
"sft_loss": 1.2958338260650635, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5410668924640136, |
|
"grad_norm": 4.812232494354248, |
|
"learning_rate": 2.3893198524674264e-06, |
|
"logits/chosen": 252.809814453125, |
|
"logits/rejected": 252.73892211914062, |
|
"logps/chosen": -1.2310715913772583, |
|
"logps/rejected": -1.428946852684021, |
|
"loss": 1.2975, |
|
"odds_ratio_loss": 0.6640914678573608, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12310715764760971, |
|
"rewards/margins": 0.019787531346082687, |
|
"rewards/rejected": -0.1428947001695633, |
|
"sft_loss": 1.2310715913772583, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.5580016934801018, |
|
"grad_norm": 2.730430841445923, |
|
"learning_rate": 2.345007840463904e-06, |
|
"logits/chosen": 254.012939453125, |
|
"logits/rejected": 254.33950805664062, |
|
"logps/chosen": -1.2894508838653564, |
|
"logps/rejected": -1.4280718564987183, |
|
"loss": 1.3585, |
|
"odds_ratio_loss": 0.6902931332588196, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12894509732723236, |
|
"rewards/margins": 0.013862092979252338, |
|
"rewards/rejected": -0.14280718564987183, |
|
"sft_loss": 1.2894508838653564, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5749364944961897, |
|
"grad_norm": 3.1161863803863525, |
|
"learning_rate": 2.3007446600220572e-06, |
|
"logits/chosen": 254.1707305908203, |
|
"logits/rejected": 253.95748901367188, |
|
"logps/chosen": -1.2720592021942139, |
|
"logps/rejected": -1.4615156650543213, |
|
"loss": 1.3436, |
|
"odds_ratio_loss": 0.7155615091323853, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.12720590829849243, |
|
"rewards/margins": 0.018945645540952682, |
|
"rewards/rejected": -0.1461515724658966, |
|
"sft_loss": 1.2720592021942139, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5918712955122776, |
|
"grad_norm": 2.035860538482666, |
|
"learning_rate": 2.2565442566229507e-06, |
|
"logits/chosen": 253.0424346923828, |
|
"logits/rejected": 253.29043579101562, |
|
"logps/chosen": -1.2878587245941162, |
|
"logps/rejected": -1.4205825328826904, |
|
"loss": 1.3592, |
|
"odds_ratio_loss": 0.7135877013206482, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1287858784198761, |
|
"rewards/margins": 0.013272365555167198, |
|
"rewards/rejected": -0.14205823838710785, |
|
"sft_loss": 1.2878587245941162, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.6088060965283657, |
|
"grad_norm": 2.338395357131958, |
|
"learning_rate": 2.2124205559692195e-06, |
|
"logits/chosen": 253.37295532226562, |
|
"logits/rejected": 253.95040893554688, |
|
"logps/chosen": -1.2591735124588013, |
|
"logps/rejected": -1.4865353107452393, |
|
"loss": 1.3211, |
|
"odds_ratio_loss": 0.6195861101150513, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.12591736018657684, |
|
"rewards/margins": 0.02273617871105671, |
|
"rewards/rejected": -0.1486535370349884, |
|
"sft_loss": 1.2591735124588013, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.6257408975444538, |
|
"grad_norm": 3.2883460521698, |
|
"learning_rate": 2.168387459597666e-06, |
|
"logits/chosen": 253.46572875976562, |
|
"logits/rejected": 253.340576171875, |
|
"logps/chosen": -1.2676080465316772, |
|
"logps/rejected": -1.5205793380737305, |
|
"loss": 1.3311, |
|
"odds_ratio_loss": 0.6347194910049438, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1267608106136322, |
|
"rewards/margins": 0.025297129526734352, |
|
"rewards/rejected": -0.1520579308271408, |
|
"sft_loss": 1.2676080465316772, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.642675698560542, |
|
"grad_norm": 3.6740529537200928, |
|
"learning_rate": 2.1244588404994648e-06, |
|
"logits/chosen": 251.4237518310547, |
|
"logits/rejected": 251.6488494873047, |
|
"logps/chosen": -1.2188889980316162, |
|
"logps/rejected": -1.379131555557251, |
|
"loss": 1.2882, |
|
"odds_ratio_loss": 0.693446695804596, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1218889132142067, |
|
"rewards/margins": 0.016024235635995865, |
|
"rewards/rejected": -0.13791313767433167, |
|
"sft_loss": 1.2188889980316162, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.65961049957663, |
|
"grad_norm": 6.992788791656494, |
|
"learning_rate": 2.08064853874936e-06, |
|
"logits/chosen": 253.6184539794922, |
|
"logits/rejected": 253.8686981201172, |
|
"logps/chosen": -1.2761471271514893, |
|
"logps/rejected": -1.4780161380767822, |
|
"loss": 1.3423, |
|
"odds_ratio_loss": 0.6611601114273071, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12761470675468445, |
|
"rewards/margins": 0.020186906680464745, |
|
"rewards/rejected": -0.14780160784721375, |
|
"sft_loss": 1.2761471271514893, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.676545300592718, |
|
"grad_norm": 4.474498748779297, |
|
"learning_rate": 2.0369703571452387e-06, |
|
"logits/chosen": 253.08810424804688, |
|
"logits/rejected": 253.7334747314453, |
|
"logps/chosen": -1.1575729846954346, |
|
"logps/rejected": -1.4460541009902954, |
|
"loss": 1.2163, |
|
"odds_ratio_loss": 0.5876318216323853, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.1157573014497757, |
|
"rewards/margins": 0.02884811721742153, |
|
"rewards/rejected": -0.14460542798042297, |
|
"sft_loss": 1.1575729846954346, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.6934801016088061, |
|
"grad_norm": 2.3749420642852783, |
|
"learning_rate": 1.993438056859441e-06, |
|
"logits/chosen": 254.9521026611328, |
|
"logits/rejected": 255.0730743408203, |
|
"logps/chosen": -1.2478586435317993, |
|
"logps/rejected": -1.4748018980026245, |
|
"loss": 1.3103, |
|
"odds_ratio_loss": 0.6246139407157898, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.124785877764225, |
|
"rewards/margins": 0.022694315761327744, |
|
"rewards/rejected": -0.14748017489910126, |
|
"sft_loss": 1.2478586435317993, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6934801016088061, |
|
"eval_logits/chosen": 252.814697265625, |
|
"eval_logits/rejected": 253.1349639892578, |
|
"eval_logps/chosen": -1.2641632556915283, |
|
"eval_logps/rejected": -1.4885586500167847, |
|
"eval_loss": 1.3313392400741577, |
|
"eval_odds_ratio_loss": 0.6717599630355835, |
|
"eval_rewards/accuracies": 0.569523811340332, |
|
"eval_rewards/chosen": -0.12641634047031403, |
|
"eval_rewards/margins": 0.022439539432525635, |
|
"eval_rewards/rejected": -0.14885587990283966, |
|
"eval_runtime": 221.9589, |
|
"eval_samples_per_second": 4.731, |
|
"eval_sft_loss": 1.2641632556915283, |
|
"eval_steps_per_second": 2.365, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.710414902624894, |
|
"grad_norm": 2.523041009902954, |
|
"learning_rate": 1.9500653531031917e-06, |
|
"logits/chosen": 253.83682250976562, |
|
"logits/rejected": 254.0968780517578, |
|
"logps/chosen": -1.231729507446289, |
|
"logps/rejected": -1.4748117923736572, |
|
"loss": 1.2981, |
|
"odds_ratio_loss": 0.6637791395187378, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12317295372486115, |
|
"rewards/margins": 0.02430824190378189, |
|
"rewards/rejected": -0.14748118817806244, |
|
"sft_loss": 1.231729507446289, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.7273497036409822, |
|
"grad_norm": 2.989529848098755, |
|
"learning_rate": 1.9068659108055117e-06, |
|
"logits/chosen": 252.50570678710938, |
|
"logits/rejected": 252.9430389404297, |
|
"logps/chosen": -1.2413790225982666, |
|
"logps/rejected": -1.4306474924087524, |
|
"loss": 1.3076, |
|
"odds_ratio_loss": 0.6622155904769897, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12413790076971054, |
|
"rewards/margins": 0.018926845863461494, |
|
"rewards/rejected": -0.14306475222110748, |
|
"sft_loss": 1.2413790225982666, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.7442845046570703, |
|
"grad_norm": 1.9568462371826172, |
|
"learning_rate": 1.863853340307962e-06, |
|
"logits/chosen": 252.41305541992188, |
|
"logits/rejected": 253.2246551513672, |
|
"logps/chosen": -1.0862369537353516, |
|
"logps/rejected": -1.4562585353851318, |
|
"loss": 1.1465, |
|
"odds_ratio_loss": 0.6029458045959473, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1086236983537674, |
|
"rewards/margins": 0.037002164870500565, |
|
"rewards/rejected": -0.14562585949897766, |
|
"sft_loss": 1.0862369537353516, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.7612193056731584, |
|
"grad_norm": 2.321819543838501, |
|
"learning_rate": 1.8210411930766019e-06, |
|
"logits/chosen": 253.8018035888672, |
|
"logits/rejected": 253.84048461914062, |
|
"logps/chosen": -1.2366716861724854, |
|
"logps/rejected": -1.473456621170044, |
|
"loss": 1.3013, |
|
"odds_ratio_loss": 0.6460654139518738, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.1236671656370163, |
|
"rewards/margins": 0.023678502067923546, |
|
"rewards/rejected": -0.1473456621170044, |
|
"sft_loss": 1.2366716861724854, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.7781541066892466, |
|
"grad_norm": 2.2399725914001465, |
|
"learning_rate": 1.7784429574324803e-06, |
|
"logits/chosen": 253.9780731201172, |
|
"logits/rejected": 253.922607421875, |
|
"logps/chosen": -1.199683427810669, |
|
"logps/rejected": -1.4928243160247803, |
|
"loss": 1.2597, |
|
"odds_ratio_loss": 0.599722683429718, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11996833980083466, |
|
"rewards/margins": 0.02931409515440464, |
|
"rewards/rejected": -0.14928242564201355, |
|
"sft_loss": 1.199683427810669, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7950889077053345, |
|
"grad_norm": 1.5600874423980713, |
|
"learning_rate": 1.7360720543020327e-06, |
|
"logits/chosen": 254.2810516357422, |
|
"logits/rejected": 254.4838409423828, |
|
"logps/chosen": -1.2215511798858643, |
|
"logps/rejected": -1.3866732120513916, |
|
"loss": 1.2883, |
|
"odds_ratio_loss": 0.6674426794052124, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12215511500835419, |
|
"rewards/margins": 0.016512203961610794, |
|
"rewards/rejected": -0.13866731524467468, |
|
"sft_loss": 1.2215511798858643, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.8120237087214224, |
|
"grad_norm": 6.640655994415283, |
|
"learning_rate": 1.6939418329887042e-06, |
|
"logits/chosen": 254.4890594482422, |
|
"logits/rejected": 254.67626953125, |
|
"logps/chosen": -1.2743191719055176, |
|
"logps/rejected": -1.505152940750122, |
|
"loss": 1.3393, |
|
"odds_ratio_loss": 0.6494858264923096, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12743191421031952, |
|
"rewards/margins": 0.023083385080099106, |
|
"rewards/rejected": -0.15051528811454773, |
|
"sft_loss": 1.2743191719055176, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.8289585097375105, |
|
"grad_norm": 2.518933057785034, |
|
"learning_rate": 1.6520655669671467e-06, |
|
"logits/chosen": 254.40719604492188, |
|
"logits/rejected": 254.98049926757812, |
|
"logps/chosen": -1.2717182636260986, |
|
"logps/rejected": -1.383216142654419, |
|
"loss": 1.3404, |
|
"odds_ratio_loss": 0.686890721321106, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1271718293428421, |
|
"rewards/margins": 0.011149783618748188, |
|
"rewards/rejected": -0.13832160830497742, |
|
"sft_loss": 1.2717182636260986, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.8458933107535986, |
|
"grad_norm": 2.115421772003174, |
|
"learning_rate": 1.610456449701294e-06, |
|
"logits/chosen": 254.34115600585938, |
|
"logits/rejected": 254.33877563476562, |
|
"logps/chosen": -1.2869365215301514, |
|
"logps/rejected": -1.4801461696624756, |
|
"loss": 1.356, |
|
"odds_ratio_loss": 0.6902877688407898, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12869365513324738, |
|
"rewards/margins": 0.019320974126458168, |
|
"rewards/rejected": -0.1480146199464798, |
|
"sft_loss": 1.2869365215301514, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.8628281117696868, |
|
"grad_norm": 3.411358118057251, |
|
"learning_rate": 1.5691275904876545e-06, |
|
"logits/chosen": 253.40380859375, |
|
"logits/rejected": 253.9809112548828, |
|
"logps/chosen": -1.3146874904632568, |
|
"logps/rejected": -1.605054497718811, |
|
"loss": 1.3787, |
|
"odds_ratio_loss": 0.6398962140083313, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1314687579870224, |
|
"rewards/margins": 0.02903667651116848, |
|
"rewards/rejected": -0.16050544381141663, |
|
"sft_loss": 1.3146874904632568, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.879762912785775, |
|
"grad_norm": 2.1282906532287598, |
|
"learning_rate": 1.5280920103251235e-06, |
|
"logits/chosen": 254.62075805664062, |
|
"logits/rejected": 255.1248321533203, |
|
"logps/chosen": -1.219244360923767, |
|
"logps/rejected": -1.4468841552734375, |
|
"loss": 1.2855, |
|
"odds_ratio_loss": 0.6621590852737427, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12192445993423462, |
|
"rewards/margins": 0.022763973101973534, |
|
"rewards/rejected": -0.1446884274482727, |
|
"sft_loss": 1.219244360923767, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8966977138018628, |
|
"grad_norm": 3.646892786026001, |
|
"learning_rate": 1.4873626378126015e-06, |
|
"logits/chosen": 253.64559936523438, |
|
"logits/rejected": 253.7223663330078, |
|
"logps/chosen": -1.2419589757919312, |
|
"logps/rejected": -1.5452362298965454, |
|
"loss": 1.3108, |
|
"odds_ratio_loss": 0.6882845759391785, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12419591099023819, |
|
"rewards/margins": 0.03032771870493889, |
|
"rewards/rejected": -0.15452361106872559, |
|
"sft_loss": 1.2419589757919312, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.913632514817951, |
|
"grad_norm": 2.910693407058716, |
|
"learning_rate": 1.446952305075738e-06, |
|
"logits/chosen": 255.579833984375, |
|
"logits/rejected": 255.78054809570312, |
|
"logps/chosen": -1.220640778541565, |
|
"logps/rejected": -1.3528368473052979, |
|
"loss": 1.2889, |
|
"odds_ratio_loss": 0.6829566359519958, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12206407636404037, |
|
"rewards/margins": 0.013219590298831463, |
|
"rewards/rejected": -0.1352836638689041, |
|
"sft_loss": 1.220640778541565, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.9305673158340388, |
|
"grad_norm": 1.925073504447937, |
|
"learning_rate": 1.406873743724065e-06, |
|
"logits/chosen": 254.1175537109375, |
|
"logits/rejected": 253.83895874023438, |
|
"logps/chosen": -1.2488387823104858, |
|
"logps/rejected": -1.625109314918518, |
|
"loss": 1.3124, |
|
"odds_ratio_loss": 0.6353055238723755, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.12488389015197754, |
|
"rewards/margins": 0.03762705251574516, |
|
"rewards/rejected": -0.1625109314918518, |
|
"sft_loss": 1.2488387823104858, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.947502116850127, |
|
"grad_norm": 1.9894033670425415, |
|
"learning_rate": 1.3671395808397898e-06, |
|
"logits/chosen": 254.44320678710938, |
|
"logits/rejected": 254.1394500732422, |
|
"logps/chosen": -1.1459629535675049, |
|
"logps/rejected": -1.3053247928619385, |
|
"loss": 1.212, |
|
"odds_ratio_loss": 0.6603742241859436, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11459628492593765, |
|
"rewards/margins": 0.015936192125082016, |
|
"rewards/rejected": -0.13053248822689056, |
|
"sft_loss": 1.1459629535675049, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.964436917866215, |
|
"grad_norm": 8.539448738098145, |
|
"learning_rate": 1.3277623349995418e-06, |
|
"logits/chosen": 254.93515014648438, |
|
"logits/rejected": 256.26190185546875, |
|
"logps/chosen": -1.2024309635162354, |
|
"logps/rejected": -1.3554456233978271, |
|
"loss": 1.2708, |
|
"odds_ratio_loss": 0.6840348243713379, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.12024309486150742, |
|
"rewards/margins": 0.015301482751965523, |
|
"rewards/rejected": -0.1355445832014084, |
|
"sft_loss": 1.2024309635162354, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.9813717188823032, |
|
"grad_norm": 3.0065865516662598, |
|
"learning_rate": 1.2887544123302781e-06, |
|
"logits/chosen": 255.7893524169922, |
|
"logits/rejected": 255.7041473388672, |
|
"logps/chosen": -1.2803890705108643, |
|
"logps/rejected": -1.3648675680160522, |
|
"loss": 1.3554, |
|
"odds_ratio_loss": 0.7498189210891724, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1280389130115509, |
|
"rewards/margins": 0.008447853848338127, |
|
"rewards/rejected": -0.1364867389202118, |
|
"sft_loss": 1.2803890705108643, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.9983065198983911, |
|
"grad_norm": 1.9976747035980225, |
|
"learning_rate": 1.2501281026006393e-06, |
|
"logits/chosen": 253.8057403564453, |
|
"logits/rejected": 253.51309204101562, |
|
"logps/chosen": -1.2094168663024902, |
|
"logps/rejected": -1.379926323890686, |
|
"loss": 1.2795, |
|
"odds_ratio_loss": 0.7010444402694702, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.12094169855117798, |
|
"rewards/margins": 0.017050940543413162, |
|
"rewards/rejected": -0.13799263536930084, |
|
"sft_loss": 1.2094168663024902, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.015241320914479, |
|
"grad_norm": 2.3396496772766113, |
|
"learning_rate": 1.2118955753489523e-06, |
|
"logits/chosen": 253.90792846679688, |
|
"logits/rejected": 254.0242462158203, |
|
"logps/chosen": -1.2840607166290283, |
|
"logps/rejected": -1.4544405937194824, |
|
"loss": 1.3511, |
|
"odds_ratio_loss": 0.6707578897476196, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1284060776233673, |
|
"rewards/margins": 0.01703798957169056, |
|
"rewards/rejected": -0.14544406533241272, |
|
"sft_loss": 1.2840607166290283, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.032176121930567, |
|
"grad_norm": 3.239753007888794, |
|
"learning_rate": 1.1740688760491189e-06, |
|
"logits/chosen": 253.80667114257812, |
|
"logits/rejected": 254.23452758789062, |
|
"logps/chosen": -1.284844160079956, |
|
"logps/rejected": -1.5715402364730835, |
|
"loss": 1.3467, |
|
"odds_ratio_loss": 0.6180799007415771, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.12848441302776337, |
|
"rewards/margins": 0.02866961434483528, |
|
"rewards/rejected": -0.15715403854846954, |
|
"sft_loss": 1.284844160079956, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0491109229466553, |
|
"grad_norm": 3.396362066268921, |
|
"learning_rate": 1.1366599223155847e-06, |
|
"logits/chosen": 254.4167022705078, |
|
"logits/rejected": 254.499755859375, |
|
"logps/chosen": -1.2031781673431396, |
|
"logps/rejected": -1.4534327983856201, |
|
"loss": 1.2695, |
|
"odds_ratio_loss": 0.66279536485672, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12031783163547516, |
|
"rewards/margins": 0.025025445967912674, |
|
"rewards/rejected": -0.14534327387809753, |
|
"sft_loss": 1.2031781673431396, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.0660457239627434, |
|
"grad_norm": 2.935955762863159, |
|
"learning_rate": 1.0996805001486067e-06, |
|
"logits/chosen": 253.8230438232422, |
|
"logits/rejected": 254.1538848876953, |
|
"logps/chosen": -1.2398602962493896, |
|
"logps/rejected": -1.4281715154647827, |
|
"loss": 1.306, |
|
"odds_ratio_loss": 0.6616999506950378, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12398604303598404, |
|
"rewards/margins": 0.018831118941307068, |
|
"rewards/rejected": -0.1428171545267105, |
|
"sft_loss": 1.2398602962493896, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.0829805249788316, |
|
"grad_norm": 5.044885635375977, |
|
"learning_rate": 1.0631422602209608e-06, |
|
"logits/chosen": 253.986083984375, |
|
"logits/rejected": 254.6449737548828, |
|
"logps/chosen": -1.2787022590637207, |
|
"logps/rejected": -1.4583269357681274, |
|
"loss": 1.3487, |
|
"odds_ratio_loss": 0.700279951095581, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12787023186683655, |
|
"rewards/margins": 0.01796245574951172, |
|
"rewards/rejected": -0.14583268761634827, |
|
"sft_loss": 1.2787022590637207, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.0999153259949197, |
|
"grad_norm": 1.7120074033737183, |
|
"learning_rate": 1.027056714207319e-06, |
|
"logits/chosen": 255.4936981201172, |
|
"logits/rejected": 255.05712890625, |
|
"logps/chosen": -1.317291498184204, |
|
"logps/rejected": -1.599304437637329, |
|
"loss": 1.3792, |
|
"odds_ratio_loss": 0.6188714504241943, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1317291557788849, |
|
"rewards/margins": 0.02820129692554474, |
|
"rewards/rejected": -0.15993043780326843, |
|
"sft_loss": 1.317291498184204, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.116850127011008, |
|
"grad_norm": 2.1434166431427, |
|
"learning_rate": 9.914352311573838e-07, |
|
"logits/chosen": 254.13870239257812, |
|
"logits/rejected": 253.87496948242188, |
|
"logps/chosen": -1.0639550685882568, |
|
"logps/rejected": -1.3396233320236206, |
|
"loss": 1.1253, |
|
"odds_ratio_loss": 0.6130257248878479, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.10639550536870956, |
|
"rewards/margins": 0.027566824108362198, |
|
"rewards/rejected": -0.13396233320236206, |
|
"sft_loss": 1.0639550685882568, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.1337849280270955, |
|
"grad_norm": 1.7394886016845703, |
|
"learning_rate": 9.562890339139877e-07, |
|
"logits/chosen": 252.9702606201172, |
|
"logits/rejected": 253.41909790039062, |
|
"logps/chosen": -1.1834205389022827, |
|
"logps/rejected": -1.4220540523529053, |
|
"loss": 1.2498, |
|
"odds_ratio_loss": 0.6642176508903503, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11834204196929932, |
|
"rewards/margins": 0.02386336401104927, |
|
"rewards/rejected": -0.14220541715621948, |
|
"sft_loss": 1.1834205389022827, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.1507197290431836, |
|
"grad_norm": 2.333282232284546, |
|
"learning_rate": 9.216291955772374e-07, |
|
"logits/chosen": 253.9619903564453, |
|
"logits/rejected": 254.09951782226562, |
|
"logps/chosen": -1.1872704029083252, |
|
"logps/rejected": -1.3576862812042236, |
|
"loss": 1.2522, |
|
"odds_ratio_loss": 0.6492857933044434, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11872704327106476, |
|
"rewards/margins": 0.017041588202118874, |
|
"rewards/rejected": -0.13576863706111908, |
|
"sft_loss": 1.1872704029083252, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.167654530059272, |
|
"grad_norm": 6.051000595092773, |
|
"learning_rate": 8.874666360158457e-07, |
|
"logits/chosen": 253.09268188476562, |
|
"logits/rejected": 253.14846801757812, |
|
"logps/chosen": -1.1666978597640991, |
|
"logps/rejected": -1.3771264553070068, |
|
"loss": 1.2333, |
|
"odds_ratio_loss": 0.6663509607315063, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.11666979640722275, |
|
"rewards/margins": 0.021042857319116592, |
|
"rewards/rejected": -0.13771264255046844, |
|
"sft_loss": 1.1666978597640991, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.18458933107536, |
|
"grad_norm": 2.681307315826416, |
|
"learning_rate": 8.538121184267315e-07, |
|
"logits/chosen": 252.548095703125, |
|
"logits/rejected": 253.38436889648438, |
|
"logps/chosen": -1.107893705368042, |
|
"logps/rejected": -1.3506486415863037, |
|
"loss": 1.1702, |
|
"odds_ratio_loss": 0.6233555674552917, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11078937351703644, |
|
"rewards/margins": 0.024275511503219604, |
|
"rewards/rejected": -0.13506487011909485, |
|
"sft_loss": 1.107893705368042, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.201524132091448, |
|
"grad_norm": 2.337139368057251, |
|
"learning_rate": 8.206762459439907e-07, |
|
"logits/chosen": 252.81088256835938, |
|
"logits/rejected": 253.55819702148438, |
|
"logps/chosen": -1.2137644290924072, |
|
"logps/rejected": -1.405731201171875, |
|
"loss": 1.2819, |
|
"odds_ratio_loss": 0.6810044050216675, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12137645483016968, |
|
"rewards/margins": 0.019196683540940285, |
|
"rewards/rejected": -0.14057312905788422, |
|
"sft_loss": 1.2137644290924072, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.218458933107536, |
|
"grad_norm": 2.8654181957244873, |
|
"learning_rate": 7.880694582982898e-07, |
|
"logits/chosen": 254.5794677734375, |
|
"logits/rejected": 255.022705078125, |
|
"logps/chosen": -1.2548917531967163, |
|
"logps/rejected": -1.4934711456298828, |
|
"loss": 1.3177, |
|
"odds_ratio_loss": 0.6284439563751221, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12548920512199402, |
|
"rewards/margins": 0.02385791949927807, |
|
"rewards/rejected": -0.14934709668159485, |
|
"sft_loss": 1.2548917531967163, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.235393734123624, |
|
"grad_norm": 2.6947178840637207, |
|
"learning_rate": 7.560020285277401e-07, |
|
"logits/chosen": 254.29129028320312, |
|
"logits/rejected": 254.16165161132812, |
|
"logps/chosen": -1.2197682857513428, |
|
"logps/rejected": -1.3262290954589844, |
|
"loss": 1.2905, |
|
"odds_ratio_loss": 0.7075908780097961, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1219768151640892, |
|
"rewards/margins": 0.010646088980138302, |
|
"rewards/rejected": -0.13262291252613068, |
|
"sft_loss": 1.2197682857513428, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.252328535139712, |
|
"grad_norm": 3.4201550483703613, |
|
"learning_rate": 7.244840597412956e-07, |
|
"logits/chosen": 253.959228515625, |
|
"logits/rejected": 254.095458984375, |
|
"logps/chosen": -1.244969129562378, |
|
"logps/rejected": -1.3216217756271362, |
|
"loss": 1.3196, |
|
"odds_ratio_loss": 0.7459138631820679, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.12449691444635391, |
|
"rewards/margins": 0.007665269076824188, |
|
"rewards/rejected": -0.1321621835231781, |
|
"sft_loss": 1.244969129562378, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.2692633361558, |
|
"grad_norm": 4.195887565612793, |
|
"learning_rate": 6.935254819356796e-07, |
|
"logits/chosen": 255.4742889404297, |
|
"logits/rejected": 255.7603759765625, |
|
"logps/chosen": -1.221064805984497, |
|
"logps/rejected": -1.4360755681991577, |
|
"loss": 1.2872, |
|
"odds_ratio_loss": 0.6609319448471069, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.12210647761821747, |
|
"rewards/margins": 0.021501056849956512, |
|
"rewards/rejected": -0.14360754191875458, |
|
"sft_loss": 1.221064805984497, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.2861981371718882, |
|
"grad_norm": 1.8310470581054688, |
|
"learning_rate": 6.631360488668662e-07, |
|
"logits/chosen": 253.05575561523438, |
|
"logits/rejected": 253.70724487304688, |
|
"logps/chosen": -1.1173431873321533, |
|
"logps/rejected": -1.3836314678192139, |
|
"loss": 1.1808, |
|
"odds_ratio_loss": 0.634362518787384, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11173433065414429, |
|
"rewards/margins": 0.02662881650030613, |
|
"rewards/rejected": -0.13836315274238586, |
|
"sft_loss": 1.1173431873321533, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.3031329381879764, |
|
"grad_norm": 3.754059076309204, |
|
"learning_rate": 6.333253349770672e-07, |
|
"logits/chosen": 255.578125, |
|
"logits/rejected": 255.3465118408203, |
|
"logps/chosen": -1.257102370262146, |
|
"logps/rejected": -1.4100966453552246, |
|
"loss": 1.3274, |
|
"odds_ratio_loss": 0.7030817270278931, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12571023404598236, |
|
"rewards/margins": 0.015299415215849876, |
|
"rewards/rejected": -0.14100965857505798, |
|
"sft_loss": 1.257102370262146, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.3200677392040645, |
|
"grad_norm": 1.6480987071990967, |
|
"learning_rate": 6.041027323782364e-07, |
|
"logits/chosen": 254.044921875, |
|
"logits/rejected": 254.229248046875, |
|
"logps/chosen": -1.1556494235992432, |
|
"logps/rejected": -1.4046926498413086, |
|
"loss": 1.2175, |
|
"odds_ratio_loss": 0.6188673973083496, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.11556495726108551, |
|
"rewards/margins": 0.02490430511534214, |
|
"rewards/rejected": -0.1404692679643631, |
|
"sft_loss": 1.1556494235992432, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.337002540220152, |
|
"grad_norm": 2.606630563735962, |
|
"learning_rate": 5.754774478929969e-07, |
|
"logits/chosen": 253.4460906982422, |
|
"logits/rejected": 253.7098388671875, |
|
"logps/chosen": -1.2188177108764648, |
|
"logps/rejected": -1.42665433883667, |
|
"loss": 1.2822, |
|
"odds_ratio_loss": 0.6334772706031799, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12188176810741425, |
|
"rewards/margins": 0.020783668383955956, |
|
"rewards/rejected": -0.14266543090343475, |
|
"sft_loss": 1.2188177108764648, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.3539373412362403, |
|
"grad_norm": 4.384988784790039, |
|
"learning_rate": 5.474585001539634e-07, |
|
"logits/chosen": 254.74624633789062, |
|
"logits/rejected": 255.39364624023438, |
|
"logps/chosen": -1.1506468057632446, |
|
"logps/rejected": -1.3720335960388184, |
|
"loss": 1.2119, |
|
"odds_ratio_loss": 0.612425684928894, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.11506466567516327, |
|
"rewards/margins": 0.02213868871331215, |
|
"rewards/rejected": -0.1372033655643463, |
|
"sft_loss": 1.1506468057632446, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.3708721422523285, |
|
"grad_norm": 1.8550916910171509, |
|
"learning_rate": 5.200547167623424e-07, |
|
"logits/chosen": 255.02920532226562, |
|
"logits/rejected": 254.79867553710938, |
|
"logps/chosen": -1.2350322008132935, |
|
"logps/rejected": -1.4950242042541504, |
|
"loss": 1.2975, |
|
"odds_ratio_loss": 0.6242570281028748, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.12350322306156158, |
|
"rewards/margins": 0.02599920891225338, |
|
"rewards/rejected": -0.14950242638587952, |
|
"sft_loss": 1.2350322008132935, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.3878069432684166, |
|
"grad_norm": 1.3218871355056763, |
|
"learning_rate": 4.932747315067271e-07, |
|
"logits/chosen": 253.67959594726562, |
|
"logits/rejected": 253.9455108642578, |
|
"logps/chosen": -1.1884933710098267, |
|
"logps/rejected": -1.4974465370178223, |
|
"loss": 1.2494, |
|
"odds_ratio_loss": 0.6091898083686829, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11884935200214386, |
|
"rewards/margins": 0.030895307660102844, |
|
"rewards/rejected": -0.1497446447610855, |
|
"sft_loss": 1.1884933710098267, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.4047417442845047, |
|
"grad_norm": 4.312244415283203, |
|
"learning_rate": 4.6712698164294553e-07, |
|
"logits/chosen": 253.365966796875, |
|
"logits/rejected": 254.00637817382812, |
|
"logps/chosen": -1.1741528511047363, |
|
"logps/rejected": -1.393843650817871, |
|
"loss": 1.2374, |
|
"odds_ratio_loss": 0.6321112513542175, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11741527169942856, |
|
"rewards/margins": 0.021969076246023178, |
|
"rewards/rejected": -0.13938435912132263, |
|
"sft_loss": 1.1741528511047363, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.421676545300593, |
|
"grad_norm": 2.516547918319702, |
|
"learning_rate": 4.41619705235842e-07, |
|
"logits/chosen": 254.66812133789062, |
|
"logits/rejected": 255.44058227539062, |
|
"logps/chosen": -1.243293285369873, |
|
"logps/rejected": -1.6239792108535767, |
|
"loss": 1.3027, |
|
"odds_ratio_loss": 0.5943632125854492, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1243293434381485, |
|
"rewards/margins": 0.03806859627366066, |
|
"rewards/rejected": -0.16239793598651886, |
|
"sft_loss": 1.243293285369873, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.438611346316681, |
|
"grad_norm": 1.4834553003311157, |
|
"learning_rate": 4.167609385637961e-07, |
|
"logits/chosen": 254.78579711914062, |
|
"logits/rejected": 255.18881225585938, |
|
"logps/chosen": -1.2608935832977295, |
|
"logps/rejected": -1.425330638885498, |
|
"loss": 1.3255, |
|
"odds_ratio_loss": 0.6463701128959656, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.12608936429023743, |
|
"rewards/margins": 0.016443699598312378, |
|
"rewards/rejected": -0.1425330489873886, |
|
"sft_loss": 1.2608935832977295, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.4555461473327687, |
|
"grad_norm": 3.3783388137817383, |
|
"learning_rate": 3.9255851358683567e-07, |
|
"logits/chosen": 254.845458984375, |
|
"logits/rejected": 255.02957153320312, |
|
"logps/chosen": -1.1708389520645142, |
|
"logps/rejected": -1.3822017908096313, |
|
"loss": 1.2391, |
|
"odds_ratio_loss": 0.6828811764717102, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.11708390712738037, |
|
"rewards/margins": 0.02113628014922142, |
|
"rewards/rejected": -0.1382201761007309, |
|
"sft_loss": 1.1708389520645142, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.472480948348857, |
|
"grad_norm": 3.0601887702941895, |
|
"learning_rate": 3.690200554791082e-07, |
|
"logits/chosen": 252.8287811279297, |
|
"logits/rejected": 252.99655151367188, |
|
"logps/chosen": -1.207948923110962, |
|
"logps/rejected": -1.4645483493804932, |
|
"loss": 1.2682, |
|
"odds_ratio_loss": 0.6026018261909485, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12079490721225739, |
|
"rewards/margins": 0.025659915059804916, |
|
"rewards/rejected": -0.1464548110961914, |
|
"sft_loss": 1.207948923110962, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.489415749364945, |
|
"grad_norm": 4.717799186706543, |
|
"learning_rate": 3.461529802265079e-07, |
|
"logits/chosen": 253.954833984375, |
|
"logits/rejected": 254.8602752685547, |
|
"logps/chosen": -1.2261638641357422, |
|
"logps/rejected": -1.3805973529815674, |
|
"loss": 1.2931, |
|
"odds_ratio_loss": 0.6691168546676636, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.12261638790369034, |
|
"rewards/margins": 0.01544335950165987, |
|
"rewards/rejected": -0.13805975019931793, |
|
"sft_loss": 1.2261638641357422, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.506350550381033, |
|
"grad_norm": 2.0962188243865967, |
|
"learning_rate": 3.2396449229020883e-07, |
|
"logits/chosen": 254.4154815673828, |
|
"logits/rejected": 254.2891082763672, |
|
"logps/chosen": -1.242130994796753, |
|
"logps/rejected": -1.4029905796051025, |
|
"loss": 1.3096, |
|
"odds_ratio_loss": 0.6747825741767883, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1242130845785141, |
|
"rewards/margins": 0.016085978597402573, |
|
"rewards/rejected": -0.14029906690120697, |
|
"sft_loss": 1.242130994796753, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.523285351397121, |
|
"grad_norm": 4.5739922523498535, |
|
"learning_rate": 3.024615823368371e-07, |
|
"logits/chosen": 253.8447723388672, |
|
"logits/rejected": 253.74185180664062, |
|
"logps/chosen": -1.1965720653533936, |
|
"logps/rejected": -1.408341407775879, |
|
"loss": 1.2608, |
|
"odds_ratio_loss": 0.6427493691444397, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.11965718120336533, |
|
"rewards/margins": 0.021176962181925774, |
|
"rewards/rejected": -0.14083415269851685, |
|
"sft_loss": 1.1965720653533936, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.5402201524132093, |
|
"grad_norm": 2.1523611545562744, |
|
"learning_rate": 2.8165102503600716e-07, |
|
"logits/chosen": 255.3013153076172, |
|
"logits/rejected": 254.82162475585938, |
|
"logps/chosen": -1.1430598497390747, |
|
"logps/rejected": -1.4227603673934937, |
|
"loss": 1.2057, |
|
"odds_ratio_loss": 0.6268513798713684, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11430598795413971, |
|
"rewards/margins": 0.027970051392912865, |
|
"rewards/rejected": -0.14227603375911713, |
|
"sft_loss": 1.1430598497390747, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.5402201524132093, |
|
"eval_logits/chosen": 253.5438995361328, |
|
"eval_logits/rejected": 253.86448669433594, |
|
"eval_logps/chosen": -1.2487919330596924, |
|
"eval_logps/rejected": -1.470909833908081, |
|
"eval_loss": 1.3159173727035522, |
|
"eval_odds_ratio_loss": 0.6712530851364136, |
|
"eval_rewards/accuracies": 0.561904788017273, |
|
"eval_rewards/chosen": -0.12487921118736267, |
|
"eval_rewards/margins": 0.02221176214516163, |
|
"eval_rewards/rejected": -0.14709095656871796, |
|
"eval_runtime": 222.3485, |
|
"eval_samples_per_second": 4.722, |
|
"eval_sft_loss": 1.2487919330596924, |
|
"eval_steps_per_second": 2.361, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.557154953429297, |
|
"grad_norm": 1.7336281538009644, |
|
"learning_rate": 2.615393769259039e-07, |
|
"logits/chosen": 254.2406768798828, |
|
"logits/rejected": 254.7086639404297, |
|
"logps/chosen": -1.319853663444519, |
|
"logps/rejected": -1.414665937423706, |
|
"loss": 1.3937, |
|
"odds_ratio_loss": 0.7381461262702942, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1319853961467743, |
|
"rewards/margins": 0.009481215849518776, |
|
"rewards/rejected": -0.14146658778190613, |
|
"sft_loss": 1.319853663444519, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.574089754445385, |
|
"grad_norm": 2.717740297317505, |
|
"learning_rate": 2.421329743475917e-07, |
|
"logits/chosen": 254.42990112304688, |
|
"logits/rejected": 254.5959014892578, |
|
"logps/chosen": -1.1657871007919312, |
|
"logps/rejected": -1.3640342950820923, |
|
"loss": 1.2334, |
|
"odds_ratio_loss": 0.6763022541999817, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11657872051000595, |
|
"rewards/margins": 0.019824711605906487, |
|
"rewards/rejected": -0.13640344142913818, |
|
"sft_loss": 1.1657871007919312, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.5910245554614733, |
|
"grad_norm": 2.8658461570739746, |
|
"learning_rate": 2.234379314486973e-07, |
|
"logits/chosen": 253.9791259765625, |
|
"logits/rejected": 254.4918670654297, |
|
"logps/chosen": -1.2597442865371704, |
|
"logps/rejected": -1.434729814529419, |
|
"loss": 1.3281, |
|
"odds_ratio_loss": 0.6833871006965637, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12597441673278809, |
|
"rewards/margins": 0.017498571425676346, |
|
"rewards/rejected": -0.14347299933433533, |
|
"sft_loss": 1.2597442865371704, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.6079593564775614, |
|
"grad_norm": 1.626412034034729, |
|
"learning_rate": 2.0546013825709783e-07, |
|
"logits/chosen": 254.2110595703125, |
|
"logits/rejected": 254.682373046875, |
|
"logps/chosen": -1.2033625841140747, |
|
"logps/rejected": -1.5869375467300415, |
|
"loss": 1.2628, |
|
"odds_ratio_loss": 0.5944391489028931, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12033627182245255, |
|
"rewards/margins": 0.03835749253630638, |
|
"rewards/rejected": -0.15869374573230743, |
|
"sft_loss": 1.2033625841140747, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.6248941574936495, |
|
"grad_norm": 3.6966705322265625, |
|
"learning_rate": 1.88205258825217e-07, |
|
"logits/chosen": 253.447998046875, |
|
"logits/rejected": 254.12582397460938, |
|
"logps/chosen": -1.0821640491485596, |
|
"logps/rejected": -1.3699367046356201, |
|
"loss": 1.1414, |
|
"odds_ratio_loss": 0.5919198393821716, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.10821641981601715, |
|
"rewards/margins": 0.028777271509170532, |
|
"rewards/rejected": -0.1369936764240265, |
|
"sft_loss": 1.0821640491485596, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.6418289585097376, |
|
"grad_norm": 2.906128168106079, |
|
"learning_rate": 1.7167872944552245e-07, |
|
"logits/chosen": 253.732666015625, |
|
"logits/rejected": 254.1513214111328, |
|
"logps/chosen": -1.1799228191375732, |
|
"logps/rejected": -1.3946034908294678, |
|
"loss": 1.2452, |
|
"odds_ratio_loss": 0.6522972583770752, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11799228191375732, |
|
"rewards/margins": 0.021468069404363632, |
|
"rewards/rejected": -0.13946035504341125, |
|
"sft_loss": 1.1799228191375732, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.6587637595258258, |
|
"grad_norm": 2.2223029136657715, |
|
"learning_rate": 1.5588575693777142e-07, |
|
"logits/chosen": 255.2970428466797, |
|
"logits/rejected": 255.1865692138672, |
|
"logps/chosen": -1.1618893146514893, |
|
"logps/rejected": -1.3099621534347534, |
|
"loss": 1.2281, |
|
"odds_ratio_loss": 0.6616276502609253, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11618892103433609, |
|
"rewards/margins": 0.01480729877948761, |
|
"rewards/rejected": -0.1309962272644043, |
|
"sft_loss": 1.1618893146514893, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.675698560541914, |
|
"grad_norm": 3.071002721786499, |
|
"learning_rate": 1.4083131700856428e-07, |
|
"logits/chosen": 252.95462036132812, |
|
"logits/rejected": 253.45449829101562, |
|
"logps/chosen": -1.275651454925537, |
|
"logps/rejected": -1.421471357345581, |
|
"loss": 1.3435, |
|
"odds_ratio_loss": 0.6788088083267212, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1275651454925537, |
|
"rewards/margins": 0.01458200253546238, |
|
"rewards/rejected": -0.14214713871479034, |
|
"sft_loss": 1.275651454925537, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.6926333615580016, |
|
"grad_norm": 3.7156240940093994, |
|
"learning_rate": 1.2652015268370315e-07, |
|
"logits/chosen": 255.25674438476562, |
|
"logits/rejected": 255.61105346679688, |
|
"logps/chosen": -1.173892617225647, |
|
"logps/rejected": -1.431540846824646, |
|
"loss": 1.2371, |
|
"odds_ratio_loss": 0.6318042874336243, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1173892617225647, |
|
"rewards/margins": 0.02576482854783535, |
|
"rewards/rejected": -0.1431540995836258, |
|
"sft_loss": 1.173892617225647, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.7095681625740897, |
|
"grad_norm": 3.5745534896850586, |
|
"learning_rate": 1.1295677281386502e-07, |
|
"logits/chosen": 254.6566162109375, |
|
"logits/rejected": 254.93807983398438, |
|
"logps/chosen": -1.3123610019683838, |
|
"logps/rejected": -1.6006231307983398, |
|
"loss": 1.3777, |
|
"odds_ratio_loss": 0.6533895134925842, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13123610615730286, |
|
"rewards/margins": 0.028826218098402023, |
|
"rewards/rejected": -0.16006234288215637, |
|
"sft_loss": 1.3123610019683838, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.726502963590178, |
|
"grad_norm": 2.5153346061706543, |
|
"learning_rate": 1.0014545065404973e-07, |
|
"logits/chosen": 254.83837890625, |
|
"logits/rejected": 255.0562286376953, |
|
"logps/chosen": -1.2541964054107666, |
|
"logps/rejected": -1.501185655593872, |
|
"loss": 1.323, |
|
"odds_ratio_loss": 0.6881270408630371, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12541964650154114, |
|
"rewards/margins": 0.02469891868531704, |
|
"rewards/rejected": -0.15011855959892273, |
|
"sft_loss": 1.2541964054107666, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.743437764606266, |
|
"grad_norm": 1.9238418340682983, |
|
"learning_rate": 8.809022251725502e-08, |
|
"logits/chosen": 255.2646484375, |
|
"logits/rejected": 255.28665161132812, |
|
"logps/chosen": -1.2217843532562256, |
|
"logps/rejected": -1.5161769390106201, |
|
"loss": 1.285, |
|
"odds_ratio_loss": 0.6321364045143127, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12217842042446136, |
|
"rewards/margins": 0.029439279809594154, |
|
"rewards/rejected": -0.15161770582199097, |
|
"sft_loss": 1.2217843532562256, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.7603725656223537, |
|
"grad_norm": 2.2195630073547363, |
|
"learning_rate": 7.679488650280509e-08, |
|
"logits/chosen": 256.6839904785156, |
|
"logits/rejected": 256.9764404296875, |
|
"logps/chosen": -1.246552586555481, |
|
"logps/rejected": -1.428499460220337, |
|
"loss": 1.3117, |
|
"odds_ratio_loss": 0.6510922312736511, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12465526908636093, |
|
"rewards/margins": 0.01819469965994358, |
|
"rewards/rejected": -0.14284996688365936, |
|
"sft_loss": 1.246552586555481, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.777307366638442, |
|
"grad_norm": 5.003395080566406, |
|
"learning_rate": 6.626300129972563e-08, |
|
"logits/chosen": 254.61721801757812, |
|
"logits/rejected": 255.3614044189453, |
|
"logps/chosen": -1.1782416105270386, |
|
"logps/rejected": -1.39430570602417, |
|
"loss": 1.2449, |
|
"odds_ratio_loss": 0.6665213704109192, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11782417446374893, |
|
"rewards/margins": 0.021606411784887314, |
|
"rewards/rejected": -0.13943056762218475, |
|
"sft_loss": 1.1782416105270386, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.79424216765453, |
|
"grad_norm": 2.193207263946533, |
|
"learning_rate": 5.649788506555065e-08, |
|
"logits/chosen": 253.5979461669922, |
|
"logits/rejected": 254.72201538085938, |
|
"logps/chosen": -1.2255361080169678, |
|
"logps/rejected": -1.4795286655426025, |
|
"loss": 1.2878, |
|
"odds_ratio_loss": 0.6227026581764221, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12255360931158066, |
|
"rewards/margins": 0.02539927139878273, |
|
"rewards/rejected": -0.1479528844356537, |
|
"sft_loss": 1.2255361080169678, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.811176968670618, |
|
"grad_norm": 2.1340768337249756, |
|
"learning_rate": 4.7502614380908474e-08, |
|
"logits/chosen": 255.0865020751953, |
|
"logits/rejected": 255.60958862304688, |
|
"logps/chosen": -1.1954472064971924, |
|
"logps/rejected": -1.39599609375, |
|
"loss": 1.2631, |
|
"odds_ratio_loss": 0.6766428351402283, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11954472959041595, |
|
"rewards/margins": 0.020054878666996956, |
|
"rewards/rejected": -0.13959960639476776, |
|
"sft_loss": 1.1954472064971924, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.828111769686706, |
|
"grad_norm": 2.8715949058532715, |
|
"learning_rate": 3.9280023280222066e-08, |
|
"logits/chosen": 254.49807739257812, |
|
"logits/rejected": 254.8501434326172, |
|
"logps/chosen": -1.1896106004714966, |
|
"logps/rejected": -1.3948595523834229, |
|
"loss": 1.2531, |
|
"odds_ratio_loss": 0.6348463892936707, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11896105855703354, |
|
"rewards/margins": 0.020524896681308746, |
|
"rewards/rejected": -0.1394859403371811, |
|
"sft_loss": 1.1896106004714966, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.8450465707027943, |
|
"grad_norm": 1.6025958061218262, |
|
"learning_rate": 3.1832702358818855e-08, |
|
"logits/chosen": 254.23095703125, |
|
"logits/rejected": 254.564208984375, |
|
"logps/chosen": -1.2220954895019531, |
|
"logps/rejected": -1.390905737876892, |
|
"loss": 1.288, |
|
"odds_ratio_loss": 0.6586533784866333, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.12220954895019531, |
|
"rewards/margins": 0.016881009563803673, |
|
"rewards/rejected": -0.13909055292606354, |
|
"sft_loss": 1.2220954895019531, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.8619813717188824, |
|
"grad_norm": 2.6114532947540283, |
|
"learning_rate": 2.5162997956746647e-08, |
|
"logits/chosen": 254.39810180664062, |
|
"logits/rejected": 254.410400390625, |
|
"logps/chosen": -1.2105869054794312, |
|
"logps/rejected": -1.523001790046692, |
|
"loss": 1.2743, |
|
"odds_ratio_loss": 0.6369892954826355, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12105868011713028, |
|
"rewards/margins": 0.031241485849022865, |
|
"rewards/rejected": -0.15230019390583038, |
|
"sft_loss": 1.2105869054794312, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.8789161727349706, |
|
"grad_norm": 5.513568878173828, |
|
"learning_rate": 1.9273011419536914e-08, |
|
"logits/chosen": 254.4501495361328, |
|
"logits/rejected": 254.602783203125, |
|
"logps/chosen": -1.2663971185684204, |
|
"logps/rejected": -1.3631950616836548, |
|
"loss": 1.3412, |
|
"odds_ratio_loss": 0.7475703954696655, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1266397088766098, |
|
"rewards/margins": 0.009679802693426609, |
|
"rewards/rejected": -0.13631951808929443, |
|
"sft_loss": 1.2663971185684204, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.8958509737510583, |
|
"grad_norm": 2.4077532291412354, |
|
"learning_rate": 1.4164598436159083e-08, |
|
"logits/chosen": 254.43917846679688, |
|
"logits/rejected": 254.9999237060547, |
|
"logps/chosen": -1.2015140056610107, |
|
"logps/rejected": -1.3471964597702026, |
|
"loss": 1.2679, |
|
"odds_ratio_loss": 0.6641986966133118, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.12015138566493988, |
|
"rewards/margins": 0.014568252488970757, |
|
"rewards/rejected": -0.1347196400165558, |
|
"sft_loss": 1.2015140056610107, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.9127857747671464, |
|
"grad_norm": 1.6812494993209839, |
|
"learning_rate": 9.839368454371556e-09, |
|
"logits/chosen": 253.5565185546875, |
|
"logits/rejected": 253.43716430664062, |
|
"logps/chosen": -1.1501060724258423, |
|
"logps/rejected": -1.454131007194519, |
|
"loss": 1.213, |
|
"odds_ratio_loss": 0.6291440725326538, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11501061916351318, |
|
"rewards/margins": 0.03040250577032566, |
|
"rewards/rejected": -0.1454131305217743, |
|
"sft_loss": 1.1501060724258423, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.9297205757832345, |
|
"grad_norm": 1.9758036136627197, |
|
"learning_rate": 6.298684173650649e-09, |
|
"logits/chosen": 255.69775390625, |
|
"logits/rejected": 255.80801391601562, |
|
"logps/chosen": -1.1473584175109863, |
|
"logps/rejected": -1.4585325717926025, |
|
"loss": 1.2129, |
|
"odds_ratio_loss": 0.6559008359909058, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11473584175109863, |
|
"rewards/margins": 0.031117429956793785, |
|
"rewards/rejected": -0.14585328102111816, |
|
"sft_loss": 1.1473584175109863, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.9466553767993227, |
|
"grad_norm": 1.9825971126556396, |
|
"learning_rate": 3.543661115860686e-09, |
|
"logits/chosen": 253.49093627929688, |
|
"logits/rejected": 254.39404296875, |
|
"logps/chosen": -1.2223292589187622, |
|
"logps/rejected": -1.468933343887329, |
|
"loss": 1.2883, |
|
"odds_ratio_loss": 0.660078763961792, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12223292887210846, |
|
"rewards/margins": 0.02466042898595333, |
|
"rewards/rejected": -0.14689335227012634, |
|
"sft_loss": 1.2223292589187622, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.963590177815411, |
|
"grad_norm": 2.3436944484710693, |
|
"learning_rate": 1.575167273800693e-09, |
|
"logits/chosen": 253.98019409179688, |
|
"logits/rejected": 254.1809844970703, |
|
"logps/chosen": -1.1822068691253662, |
|
"logps/rejected": -1.3398927450180054, |
|
"loss": 1.249, |
|
"odds_ratio_loss": 0.6677161455154419, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.11822070181369781, |
|
"rewards/margins": 0.015768591314554214, |
|
"rewards/rejected": -0.13398927450180054, |
|
"sft_loss": 1.1822068691253662, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.9805249788314985, |
|
"grad_norm": 6.531856536865234, |
|
"learning_rate": 3.9382283773564676e-10, |
|
"logits/chosen": 254.8570098876953, |
|
"logits/rejected": 255.1466064453125, |
|
"logps/chosen": -1.272533655166626, |
|
"logps/rejected": -1.498098611831665, |
|
"loss": 1.3394, |
|
"odds_ratio_loss": 0.6682497262954712, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12725338339805603, |
|
"rewards/margins": 0.02255650982260704, |
|
"rewards/rejected": -0.14980986714363098, |
|
"sft_loss": 1.272533655166626, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.9974597798475866, |
|
"grad_norm": 10.32663345336914, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 253.9761199951172, |
|
"logits/rejected": 253.9351806640625, |
|
"logps/chosen": -1.3296254873275757, |
|
"logps/rejected": -1.604189157485962, |
|
"loss": 1.3966, |
|
"odds_ratio_loss": 0.6696831583976746, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.13296253979206085, |
|
"rewards/margins": 0.02745637856423855, |
|
"rewards/rejected": -0.16041892766952515, |
|
"sft_loss": 1.3296254873275757, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.9974597798475866, |
|
"step": 1770, |
|
"total_flos": 2.135641671600046e+18, |
|
"train_loss": 1.3953795637788071, |
|
"train_runtime": 23493.5825, |
|
"train_samples_per_second": 1.206, |
|
"train_steps_per_second": 0.075 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1770, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 2.135641671600046e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|