|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996020692399522, |
|
"eval_steps": 100, |
|
"global_step": 1256, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/losses": 0.23031963407993317, |
|
"debug/policy_weights": 0.3322809934616089, |
|
"debug/raw_losses": 0.6931471824645996, |
|
"epoch": 0.0007958615200955034, |
|
"grad_norm": 1.6289096206321385, |
|
"learning_rate": 3.968253968253968e-09, |
|
"logits/chosen": -2.735659122467041, |
|
"logits/rejected": -2.7581238746643066, |
|
"logps/chosen": -124.62968444824219, |
|
"logps/rejected": -168.09475708007812, |
|
"loss": 0.2239, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/losses": 0.23617929220199585, |
|
"debug/policy_weights": 0.3407440781593323, |
|
"debug/raw_losses": 0.6931117177009583, |
|
"epoch": 0.007958615200955034, |
|
"grad_norm": 1.6861129588975887, |
|
"learning_rate": 3.968253968253968e-08, |
|
"logits/chosen": -2.7388463020324707, |
|
"logits/rejected": -2.727876901626587, |
|
"logps/chosen": -146.6982879638672, |
|
"logps/rejected": -131.2141571044922, |
|
"loss": 0.2295, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": 3.8997877709334716e-05, |
|
"rewards/margins": 7.3335635534022e-05, |
|
"rewards/rejected": -3.4337710530962795e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/losses": 0.22658827900886536, |
|
"debug/policy_weights": 0.32686564326286316, |
|
"debug/raw_losses": 0.6932188272476196, |
|
"epoch": 0.01591723040191007, |
|
"grad_norm": 1.5613292890727226, |
|
"learning_rate": 7.936507936507936e-08, |
|
"logits/chosen": -2.706713914871216, |
|
"logits/rejected": -2.7037758827209473, |
|
"logps/chosen": -129.48782348632812, |
|
"logps/rejected": -130.2589874267578, |
|
"loss": 0.2239, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00019513315055519342, |
|
"rewards/margins": -0.00014081124390941113, |
|
"rewards/rejected": -5.432188117993064e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/losses": 0.21325843036174774, |
|
"debug/policy_weights": 0.307701975107193, |
|
"debug/raw_losses": 0.6930276155471802, |
|
"epoch": 0.0238758456028651, |
|
"grad_norm": 1.5386121917266484, |
|
"learning_rate": 1.1904761904761903e-07, |
|
"logits/chosen": -2.6837477684020996, |
|
"logits/rejected": -2.6807758808135986, |
|
"logps/chosen": -141.8106231689453, |
|
"logps/rejected": -155.6637420654297, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00021137837029527873, |
|
"rewards/margins": 0.00024208621471188962, |
|
"rewards/rejected": -3.070788807235658e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/losses": 0.2177859991788864, |
|
"debug/policy_weights": 0.3142939507961273, |
|
"debug/raw_losses": 0.6929190158843994, |
|
"epoch": 0.03183446080382014, |
|
"grad_norm": 1.637054860498854, |
|
"learning_rate": 1.5873015873015872e-07, |
|
"logits/chosen": -2.691988468170166, |
|
"logits/rejected": -2.6842727661132812, |
|
"logps/chosen": -154.9478302001953, |
|
"logps/rejected": -164.12692260742188, |
|
"loss": 0.221, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0012577458983287215, |
|
"rewards/margins": 0.00046192569425329566, |
|
"rewards/rejected": -0.001719671650789678, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/losses": 0.22946766018867493, |
|
"debug/policy_weights": 0.33167093992233276, |
|
"debug/raw_losses": 0.6918202042579651, |
|
"epoch": 0.03979307600477517, |
|
"grad_norm": 1.480789109824279, |
|
"learning_rate": 1.984126984126984e-07, |
|
"logits/chosen": -2.706690788269043, |
|
"logits/rejected": -2.6879990100860596, |
|
"logps/chosen": -144.00408935546875, |
|
"logps/rejected": -137.75918579101562, |
|
"loss": 0.2233, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0028049442917108536, |
|
"rewards/margins": 0.0026701870374381542, |
|
"rewards/rejected": -0.005475131794810295, |
|
"step": 50 |
|
}, |
|
{ |
|
"debug/losses": 0.22743281722068787, |
|
"debug/policy_weights": 0.32873186469078064, |
|
"debug/raw_losses": 0.6915570497512817, |
|
"epoch": 0.0477516912057302, |
|
"grad_norm": 1.4857805665702917, |
|
"learning_rate": 2.3809523809523806e-07, |
|
"logits/chosen": -2.7156319618225098, |
|
"logits/rejected": -2.7164268493652344, |
|
"logps/chosen": -145.94308471679688, |
|
"logps/rejected": -159.51734924316406, |
|
"loss": 0.2192, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0031640075612813234, |
|
"rewards/margins": 0.003222744446247816, |
|
"rewards/rejected": -0.006386751774698496, |
|
"step": 60 |
|
}, |
|
{ |
|
"debug/losses": 0.2175430804491043, |
|
"debug/policy_weights": 0.3152545094490051, |
|
"debug/raw_losses": 0.6902174353599548, |
|
"epoch": 0.055710306406685235, |
|
"grad_norm": 1.5369085878162871, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -2.736572265625, |
|
"logits/rejected": -2.7276124954223633, |
|
"logps/chosen": -149.30255126953125, |
|
"logps/rejected": -143.44100952148438, |
|
"loss": 0.2114, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.016642453148961067, |
|
"rewards/margins": 0.006107243709266186, |
|
"rewards/rejected": -0.02274969592690468, |
|
"step": 70 |
|
}, |
|
{ |
|
"debug/losses": 0.18839044868946075, |
|
"debug/policy_weights": 0.2725599408149719, |
|
"debug/raw_losses": 0.6915421485900879, |
|
"epoch": 0.06366892160764027, |
|
"grad_norm": 1.5333558257053381, |
|
"learning_rate": 3.1746031746031743e-07, |
|
"logits/chosen": -2.7109673023223877, |
|
"logits/rejected": -2.6925058364868164, |
|
"logps/chosen": -157.96241760253906, |
|
"logps/rejected": -149.37074279785156, |
|
"loss": 0.2012, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04828154668211937, |
|
"rewards/margins": 0.0038675833493471146, |
|
"rewards/rejected": -0.05214913561940193, |
|
"step": 80 |
|
}, |
|
{ |
|
"debug/losses": 0.18670453131198883, |
|
"debug/policy_weights": 0.273386150598526, |
|
"debug/raw_losses": 0.6827707290649414, |
|
"epoch": 0.07162753680859531, |
|
"grad_norm": 1.6092945600702757, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"logits/chosen": -2.7211790084838867, |
|
"logits/rejected": -2.728004217147827, |
|
"logps/chosen": -152.60055541992188, |
|
"logps/rejected": -173.4809112548828, |
|
"loss": 0.1852, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0675431415438652, |
|
"rewards/margins": 0.02248724177479744, |
|
"rewards/rejected": -0.09003038704395294, |
|
"step": 90 |
|
}, |
|
{ |
|
"debug/losses": 0.16796275973320007, |
|
"debug/policy_weights": 0.24775293469429016, |
|
"debug/raw_losses": 0.6785470843315125, |
|
"epoch": 0.07958615200955034, |
|
"grad_norm": 1.5168173744160047, |
|
"learning_rate": 3.968253968253968e-07, |
|
"logits/chosen": -2.6893362998962402, |
|
"logits/rejected": -2.672621250152588, |
|
"logps/chosen": -149.28890991210938, |
|
"logps/rejected": -143.28530883789062, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.11476944386959076, |
|
"rewards/margins": 0.0327371247112751, |
|
"rewards/rejected": -0.14750656485557556, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07958615200955034, |
|
"eval_debug/losses": 0.16163869202136993, |
|
"eval_debug/policy_weights": 0.23812691867351532, |
|
"eval_debug/raw_losses": 0.6786960959434509, |
|
"eval_logits/chosen": -2.7123541831970215, |
|
"eval_logits/rejected": -2.70428729057312, |
|
"eval_logps/chosen": -158.4906768798828, |
|
"eval_logps/rejected": -168.3474884033203, |
|
"eval_loss": 0.1630723923444748, |
|
"eval_rewards/accuracies": 0.5923507213592529, |
|
"eval_rewards/chosen": -0.14247193932533264, |
|
"eval_rewards/margins": 0.03401322290301323, |
|
"eval_rewards/rejected": -0.17648516595363617, |
|
"eval_runtime": 153.0792, |
|
"eval_samples_per_second": 55.867, |
|
"eval_steps_per_second": 0.875, |
|
"step": 100 |
|
}, |
|
{ |
|
"debug/losses": 0.15102894604206085, |
|
"debug/policy_weights": 0.21911868453025818, |
|
"debug/raw_losses": 0.6911253333091736, |
|
"epoch": 0.08754476721050537, |
|
"grad_norm": 1.50091397853791, |
|
"learning_rate": 4.365079365079365e-07, |
|
"logits/chosen": -2.690058946609497, |
|
"logits/rejected": -2.6713109016418457, |
|
"logps/chosen": -179.52647399902344, |
|
"logps/rejected": -165.1460418701172, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.2123573273420334, |
|
"rewards/margins": 0.010995535179972649, |
|
"rewards/rejected": -0.22335286438465118, |
|
"step": 110 |
|
}, |
|
{ |
|
"debug/losses": 0.13807068765163422, |
|
"debug/policy_weights": 0.2061845362186432, |
|
"debug/raw_losses": 0.6786133646965027, |
|
"epoch": 0.0955033824114604, |
|
"grad_norm": 1.3930389291929042, |
|
"learning_rate": 4.761904761904761e-07, |
|
"logits/chosen": -2.663013219833374, |
|
"logits/rejected": -2.650160312652588, |
|
"logps/chosen": -168.0050506591797, |
|
"logps/rejected": -173.11129760742188, |
|
"loss": 0.1337, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.2887328267097473, |
|
"rewards/margins": 0.042750097811222076, |
|
"rewards/rejected": -0.3314829468727112, |
|
"step": 120 |
|
}, |
|
{ |
|
"debug/losses": 0.1164780706167221, |
|
"debug/policy_weights": 0.17811226844787598, |
|
"debug/raw_losses": 0.6562212705612183, |
|
"epoch": 0.10346199761241544, |
|
"grad_norm": 2.4007915699235807, |
|
"learning_rate": 4.999845414634076e-07, |
|
"logits/chosen": -2.674560546875, |
|
"logits/rejected": -2.6469898223876953, |
|
"logps/chosen": -188.45101928710938, |
|
"logps/rejected": -178.65158081054688, |
|
"loss": 0.1182, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.33541449904441833, |
|
"rewards/margins": 0.09415977448225021, |
|
"rewards/rejected": -0.42957431077957153, |
|
"step": 130 |
|
}, |
|
{ |
|
"debug/losses": 0.09183915704488754, |
|
"debug/policy_weights": 0.14337728917598724, |
|
"debug/raw_losses": 0.65064537525177, |
|
"epoch": 0.11142061281337047, |
|
"grad_norm": 1.3456206004862283, |
|
"learning_rate": 4.998106548810311e-07, |
|
"logits/chosen": -2.628385543823242, |
|
"logits/rejected": -2.6050915718078613, |
|
"logps/chosen": -198.1656951904297, |
|
"logps/rejected": -188.9591064453125, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5256301760673523, |
|
"rewards/margins": 0.11118575185537338, |
|
"rewards/rejected": -0.6368159055709839, |
|
"step": 140 |
|
}, |
|
{ |
|
"debug/losses": 0.07921925187110901, |
|
"debug/policy_weights": 0.1330244094133377, |
|
"debug/raw_losses": 0.6098042130470276, |
|
"epoch": 0.1193792280143255, |
|
"grad_norm": 2.0760765044039147, |
|
"learning_rate": 4.994436933879359e-07, |
|
"logits/chosen": -2.606598138809204, |
|
"logits/rejected": -2.600273847579956, |
|
"logps/chosen": -192.59347534179688, |
|
"logps/rejected": -219.58139038085938, |
|
"loss": 0.0924, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5305424332618713, |
|
"rewards/margins": 0.20539744198322296, |
|
"rewards/rejected": -0.7359398603439331, |
|
"step": 150 |
|
}, |
|
{ |
|
"debug/losses": 0.07018786668777466, |
|
"debug/policy_weights": 0.11792769283056259, |
|
"debug/raw_losses": 0.6061297655105591, |
|
"epoch": 0.12733784321528055, |
|
"grad_norm": 1.6935156833490121, |
|
"learning_rate": 4.988839406031596e-07, |
|
"logits/chosen": -2.5744402408599854, |
|
"logits/rejected": -2.5837349891662598, |
|
"logps/chosen": -186.702880859375, |
|
"logps/rejected": -239.1992645263672, |
|
"loss": 0.0757, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.660643458366394, |
|
"rewards/margins": 0.2579377293586731, |
|
"rewards/rejected": -0.9185811877250671, |
|
"step": 160 |
|
}, |
|
{ |
|
"debug/losses": 0.04878082871437073, |
|
"debug/policy_weights": 0.08209049701690674, |
|
"debug/raw_losses": 0.6344524621963501, |
|
"epoch": 0.13529645841623558, |
|
"grad_norm": 2.6486930748428503, |
|
"learning_rate": 4.981318291512395e-07, |
|
"logits/chosen": -2.4930522441864014, |
|
"logits/rejected": -2.486619472503662, |
|
"logps/chosen": -233.8449249267578, |
|
"logps/rejected": -264.4144592285156, |
|
"loss": 0.0581, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.974342942237854, |
|
"rewards/margins": 0.22036199271678925, |
|
"rewards/rejected": -1.1947048902511597, |
|
"step": 170 |
|
}, |
|
{ |
|
"debug/losses": 0.0591508224606514, |
|
"debug/policy_weights": 0.08082972466945648, |
|
"debug/raw_losses": 0.6892833113670349, |
|
"epoch": 0.14325507361719061, |
|
"grad_norm": 2.7902190886457094, |
|
"learning_rate": 4.971879403278432e-07, |
|
"logits/chosen": -2.502582550048828, |
|
"logits/rejected": -2.485318660736084, |
|
"logps/chosen": -251.0359344482422, |
|
"logps/rejected": -252.9623565673828, |
|
"loss": 0.0563, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.0250937938690186, |
|
"rewards/margins": 0.06508847326040268, |
|
"rewards/rejected": -1.0901821851730347, |
|
"step": 180 |
|
}, |
|
{ |
|
"debug/losses": 0.08667734265327454, |
|
"debug/policy_weights": 0.12972070276737213, |
|
"debug/raw_losses": 0.6562903523445129, |
|
"epoch": 0.15121368881814565, |
|
"grad_norm": 3.107961610097759, |
|
"learning_rate": 4.960530036504941e-07, |
|
"logits/chosen": -2.49674654006958, |
|
"logits/rejected": -2.4783453941345215, |
|
"logps/chosen": -218.3079833984375, |
|
"logps/rejected": -227.2297821044922, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7395851612091064, |
|
"rewards/margins": 0.15448498725891113, |
|
"rewards/rejected": -0.8940702676773071, |
|
"step": 190 |
|
}, |
|
{ |
|
"debug/losses": 0.07691031694412231, |
|
"debug/policy_weights": 0.12212906777858734, |
|
"debug/raw_losses": 0.6350489854812622, |
|
"epoch": 0.15917230401910068, |
|
"grad_norm": 1.9719680635837602, |
|
"learning_rate": 4.947278962947386e-07, |
|
"logits/chosen": -2.455076217651367, |
|
"logits/rejected": -2.459906578063965, |
|
"logps/chosen": -222.7344970703125, |
|
"logps/rejected": -255.70956420898438, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.7358509302139282, |
|
"rewards/margins": 0.1919489949941635, |
|
"rewards/rejected": -0.9277998805046082, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15917230401910068, |
|
"eval_debug/losses": 0.08000103384256363, |
|
"eval_debug/policy_weights": 0.12660016119480133, |
|
"eval_debug/raw_losses": 0.6296245455741882, |
|
"eval_logits/chosen": -2.4997141361236572, |
|
"eval_logits/rejected": -2.4878616333007812, |
|
"eval_logps/chosen": -215.84107971191406, |
|
"eval_logps/rejected": -243.79220581054688, |
|
"eval_loss": 0.08260933309793472, |
|
"eval_rewards/accuracies": 0.6483209133148193, |
|
"eval_rewards/chosen": -0.7159760594367981, |
|
"eval_rewards/margins": 0.2149561196565628, |
|
"eval_rewards/rejected": -0.9309321045875549, |
|
"eval_runtime": 153.0714, |
|
"eval_samples_per_second": 55.869, |
|
"eval_steps_per_second": 0.875, |
|
"step": 200 |
|
}, |
|
{ |
|
"debug/losses": 0.07473501563072205, |
|
"debug/policy_weights": 0.12107981741428375, |
|
"debug/raw_losses": 0.6079034805297852, |
|
"epoch": 0.1671309192200557, |
|
"grad_norm": 1.4064853346615585, |
|
"learning_rate": 4.932136424161899e-07, |
|
"logits/chosen": -2.458627223968506, |
|
"logits/rejected": -2.446324586868286, |
|
"logps/chosen": -200.37945556640625, |
|
"logps/rejected": -234.1180419921875, |
|
"loss": 0.0798, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7478693127632141, |
|
"rewards/margins": 0.2762502133846283, |
|
"rewards/rejected": -1.0241196155548096, |
|
"step": 210 |
|
}, |
|
{ |
|
"debug/losses": 0.05417264252901077, |
|
"debug/policy_weights": 0.09399188309907913, |
|
"debug/raw_losses": 0.5990099310874939, |
|
"epoch": 0.17508953442101075, |
|
"grad_norm": 1.9293056998121305, |
|
"learning_rate": 4.915114123589732e-07, |
|
"logits/chosen": -2.487759590148926, |
|
"logits/rejected": -2.465144395828247, |
|
"logps/chosen": -230.71896362304688, |
|
"logps/rejected": -261.5802917480469, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.9381935000419617, |
|
"rewards/margins": 0.3212595582008362, |
|
"rewards/rejected": -1.2594529390335083, |
|
"step": 220 |
|
}, |
|
{ |
|
"debug/losses": 0.07093538343906403, |
|
"debug/policy_weights": 0.11489018052816391, |
|
"debug/raw_losses": 0.5926896929740906, |
|
"epoch": 0.18304814962196578, |
|
"grad_norm": 1.6411550414623024, |
|
"learning_rate": 4.896225217511849e-07, |
|
"logits/chosen": -2.5063443183898926, |
|
"logits/rejected": -2.502354860305786, |
|
"logps/chosen": -224.992919921875, |
|
"logps/rejected": -266.00543212890625, |
|
"loss": 0.0686, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8569623231887817, |
|
"rewards/margins": 0.3157426714897156, |
|
"rewards/rejected": -1.172705054283142, |
|
"step": 230 |
|
}, |
|
{ |
|
"debug/losses": 0.06816870719194412, |
|
"debug/policy_weights": 0.10983666032552719, |
|
"debug/raw_losses": 0.6124148964881897, |
|
"epoch": 0.1910067648229208, |
|
"grad_norm": 1.6733877297747892, |
|
"learning_rate": 4.875484304880629e-07, |
|
"logits/chosen": -2.512686252593994, |
|
"logits/rejected": -2.4914631843566895, |
|
"logps/chosen": -242.66162109375, |
|
"logps/rejected": -266.0993957519531, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8090359568595886, |
|
"rewards/margins": 0.2927786111831665, |
|
"rewards/rejected": -1.1018145084381104, |
|
"step": 240 |
|
}, |
|
{ |
|
"debug/losses": 0.06458164751529694, |
|
"debug/policy_weights": 0.10547280311584473, |
|
"debug/raw_losses": 0.6321852803230286, |
|
"epoch": 0.19896538002387584, |
|
"grad_norm": 1.766771502743095, |
|
"learning_rate": 4.852907416036558e-07, |
|
"logits/chosen": -2.43923020362854, |
|
"logits/rejected": -2.432621479034424, |
|
"logps/chosen": -224.22988891601562, |
|
"logps/rejected": -259.5379943847656, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.8493305444717407, |
|
"rewards/margins": 0.22976212203502655, |
|
"rewards/rejected": -1.0790926218032837, |
|
"step": 250 |
|
}, |
|
{ |
|
"debug/losses": 0.06774205714464188, |
|
"debug/policy_weights": 0.11494859308004379, |
|
"debug/raw_losses": 0.6017103791236877, |
|
"epoch": 0.20692399522483088, |
|
"grad_norm": 2.1704551991420646, |
|
"learning_rate": 4.828512000318616e-07, |
|
"logits/chosen": -2.4806602001190186, |
|
"logits/rejected": -2.4344286918640137, |
|
"logps/chosen": -252.88290405273438, |
|
"logps/rejected": -272.6732482910156, |
|
"loss": 0.0658, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8818208575248718, |
|
"rewards/margins": 0.29337772727012634, |
|
"rewards/rejected": -1.1751985549926758, |
|
"step": 260 |
|
}, |
|
{ |
|
"debug/losses": 0.06392187625169754, |
|
"debug/policy_weights": 0.10442598909139633, |
|
"debug/raw_losses": 0.6145210266113281, |
|
"epoch": 0.2148826104257859, |
|
"grad_norm": 1.6273411526384032, |
|
"learning_rate": 4.802316912577946e-07, |
|
"logits/chosen": -2.4324231147766113, |
|
"logits/rejected": -2.39255690574646, |
|
"logps/chosen": -231.31167602539062, |
|
"logps/rejected": -244.3099365234375, |
|
"loss": 0.0687, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8366304636001587, |
|
"rewards/margins": 0.2642030119895935, |
|
"rewards/rejected": -1.100833535194397, |
|
"step": 270 |
|
}, |
|
{ |
|
"debug/losses": 0.05992782115936279, |
|
"debug/policy_weights": 0.0925414115190506, |
|
"debug/raw_losses": 0.6348901987075806, |
|
"epoch": 0.22284122562674094, |
|
"grad_norm": 1.6517986020806328, |
|
"learning_rate": 4.774342398605221e-07, |
|
"logits/chosen": -2.416997194290161, |
|
"logits/rejected": -2.3983612060546875, |
|
"logps/chosen": -245.29135131835938, |
|
"logps/rejected": -261.55218505859375, |
|
"loss": 0.0566, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9968692660331726, |
|
"rewards/margins": 0.22003936767578125, |
|
"rewards/rejected": -1.2169086933135986, |
|
"step": 280 |
|
}, |
|
{ |
|
"debug/losses": 0.06707664579153061, |
|
"debug/policy_weights": 0.10901203006505966, |
|
"debug/raw_losses": 0.6151145100593567, |
|
"epoch": 0.23079984082769597, |
|
"grad_norm": 1.9541190672933495, |
|
"learning_rate": 4.744610079482978e-07, |
|
"logits/chosen": -2.4758219718933105, |
|
"logits/rejected": -2.4370126724243164, |
|
"logps/chosen": -264.2386474609375, |
|
"logps/rejected": -282.10357666015625, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9856462478637695, |
|
"rewards/margins": 0.2682034969329834, |
|
"rewards/rejected": -1.253849744796753, |
|
"step": 290 |
|
}, |
|
{ |
|
"debug/losses": 0.04470010846853256, |
|
"debug/policy_weights": 0.07902451604604721, |
|
"debug/raw_losses": 0.6110645532608032, |
|
"epoch": 0.238758456028651, |
|
"grad_norm": 2.239630938666137, |
|
"learning_rate": 4.713142934875005e-07, |
|
"logits/chosen": -2.4241604804992676, |
|
"logits/rejected": -2.382441997528076, |
|
"logps/chosen": -255.75048828125, |
|
"logps/rejected": -272.4277038574219, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0819556713104248, |
|
"rewards/margins": 0.33363839983940125, |
|
"rewards/rejected": -1.4155938625335693, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.238758456028651, |
|
"eval_debug/losses": 0.05503418296575546, |
|
"eval_debug/policy_weights": 0.08935731649398804, |
|
"eval_debug/raw_losses": 0.6165817379951477, |
|
"eval_logits/chosen": -2.4302186965942383, |
|
"eval_logits/rejected": -2.416015386581421, |
|
"eval_logps/chosen": -253.9807891845703, |
|
"eval_logps/rejected": -292.5660705566406, |
|
"eval_loss": 0.05720232427120209, |
|
"eval_rewards/accuracies": 0.6641790866851807, |
|
"eval_rewards/chosen": -1.0973730087280273, |
|
"eval_rewards/margins": 0.32129794359207153, |
|
"eval_rewards/rejected": -1.418670892715454, |
|
"eval_runtime": 153.1278, |
|
"eval_samples_per_second": 55.849, |
|
"eval_steps_per_second": 0.875, |
|
"step": 300 |
|
}, |
|
{ |
|
"debug/losses": 0.04352787882089615, |
|
"debug/policy_weights": 0.07053101807832718, |
|
"debug/raw_losses": 0.5888990163803101, |
|
"epoch": 0.24671707122960604, |
|
"grad_norm": 1.9538707292393864, |
|
"learning_rate": 4.679965285265706e-07, |
|
"logits/chosen": -2.3698525428771973, |
|
"logits/rejected": -2.360182285308838, |
|
"logps/chosen": -235.0277099609375, |
|
"logps/rejected": -285.10247802734375, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1747382879257202, |
|
"rewards/margins": 0.38018131256103516, |
|
"rewards/rejected": -1.5549194812774658, |
|
"step": 310 |
|
}, |
|
{ |
|
"debug/losses": 0.036569319665431976, |
|
"debug/policy_weights": 0.06743182241916656, |
|
"debug/raw_losses": 0.5270095467567444, |
|
"epoch": 0.2546756864305611, |
|
"grad_norm": 1.4572562934726576, |
|
"learning_rate": 4.64510277316316e-07, |
|
"logits/chosen": -2.2776880264282227, |
|
"logits/rejected": -2.233410358428955, |
|
"logps/chosen": -288.60418701171875, |
|
"logps/rejected": -347.5012512207031, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4297213554382324, |
|
"rewards/margins": 0.6530755162239075, |
|
"rewards/rejected": -2.082796573638916, |
|
"step": 320 |
|
}, |
|
{ |
|
"debug/losses": 0.02330527827143669, |
|
"debug/policy_weights": 0.034696005284786224, |
|
"debug/raw_losses": 0.6584650874137878, |
|
"epoch": 0.26263430163151613, |
|
"grad_norm": 1.0590322815828623, |
|
"learning_rate": 4.6085823432804137e-07, |
|
"logits/chosen": -2.21286678314209, |
|
"logits/rejected": -2.183129072189331, |
|
"logps/chosen": -389.7990417480469, |
|
"logps/rejected": -418.78045654296875, |
|
"loss": 0.0215, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.4603824615478516, |
|
"rewards/margins": 0.2922320067882538, |
|
"rewards/rejected": -2.7526144981384277, |
|
"step": 330 |
|
}, |
|
{ |
|
"debug/losses": 0.020010516047477722, |
|
"debug/policy_weights": 0.033290598541498184, |
|
"debug/raw_losses": 0.615290641784668, |
|
"epoch": 0.27059291683247116, |
|
"grad_norm": 0.6231939002180164, |
|
"learning_rate": 4.570432221710314e-07, |
|
"logits/chosen": -2.2122933864593506, |
|
"logits/rejected": -2.194932460784912, |
|
"logps/chosen": -384.0797119140625, |
|
"logps/rejected": -438.1153259277344, |
|
"loss": 0.018, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.3789823055267334, |
|
"rewards/margins": 0.44676661491394043, |
|
"rewards/rejected": -2.825748920440674, |
|
"step": 340 |
|
}, |
|
{ |
|
"debug/losses": 0.027714502066373825, |
|
"debug/policy_weights": 0.04697718471288681, |
|
"debug/raw_losses": 0.6301008462905884, |
|
"epoch": 0.2785515320334262, |
|
"grad_norm": 1.3624865342914951, |
|
"learning_rate": 4.5306818941099866e-07, |
|
"logits/chosen": -2.262915849685669, |
|
"logits/rejected": -2.2063891887664795, |
|
"logps/chosen": -356.70513916015625, |
|
"logps/rejected": -376.57891845703125, |
|
"loss": 0.0251, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0648093223571777, |
|
"rewards/margins": 0.3629462420940399, |
|
"rewards/rejected": -2.427755832672119, |
|
"step": 350 |
|
}, |
|
{ |
|
"debug/losses": 0.03147003799676895, |
|
"debug/policy_weights": 0.049594730138778687, |
|
"debug/raw_losses": 0.5973548889160156, |
|
"epoch": 0.28651014723438123, |
|
"grad_norm": 1.0467766810317458, |
|
"learning_rate": 4.4893620829118124e-07, |
|
"logits/chosen": -2.250816822052002, |
|
"logits/rejected": -2.222325563430786, |
|
"logps/chosen": -329.90728759765625, |
|
"logps/rejected": -360.37164306640625, |
|
"loss": 0.0335, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.781701683998108, |
|
"rewards/margins": 0.3680455684661865, |
|
"rewards/rejected": -2.149747371673584, |
|
"step": 360 |
|
}, |
|
{ |
|
"debug/losses": 0.029663532972335815, |
|
"debug/policy_weights": 0.049563243985176086, |
|
"debug/raw_losses": 0.655312180519104, |
|
"epoch": 0.29446876243533626, |
|
"grad_norm": 1.209879585308472, |
|
"learning_rate": 4.4465047235785185e-07, |
|
"logits/chosen": -2.2697219848632812, |
|
"logits/rejected": -2.2369260787963867, |
|
"logps/chosen": -354.7026062011719, |
|
"logps/rejected": -364.2904968261719, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0985820293426514, |
|
"rewards/margins": 0.2616536021232605, |
|
"rewards/rejected": -2.3602359294891357, |
|
"step": 370 |
|
}, |
|
{ |
|
"debug/losses": 0.02233636938035488, |
|
"debug/policy_weights": 0.03632068261504173, |
|
"debug/raw_losses": 0.6318480968475342, |
|
"epoch": 0.3024273776362913, |
|
"grad_norm": 0.8855115172661371, |
|
"learning_rate": 4.40214293992074e-07, |
|
"logits/chosen": -2.1508800983428955, |
|
"logits/rejected": -2.1226308345794678, |
|
"logps/chosen": -370.634765625, |
|
"logps/rejected": -402.4145202636719, |
|
"loss": 0.025, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.332416534423828, |
|
"rewards/margins": 0.34561586380004883, |
|
"rewards/rejected": -2.678032398223877, |
|
"step": 380 |
|
}, |
|
{ |
|
"debug/losses": 0.03705073148012161, |
|
"debug/policy_weights": 0.058359406888484955, |
|
"debug/raw_losses": 0.6300365328788757, |
|
"epoch": 0.3103859928372463, |
|
"grad_norm": 1.213922080492998, |
|
"learning_rate": 4.3563110184961234e-07, |
|
"logits/chosen": -2.2061028480529785, |
|
"logits/rejected": -2.182528018951416, |
|
"logps/chosen": -346.3789978027344, |
|
"logps/rejected": -381.57293701171875, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9717462062835693, |
|
"rewards/margins": 0.32251009345054626, |
|
"rewards/rejected": -2.2942566871643066, |
|
"step": 390 |
|
}, |
|
{ |
|
"debug/losses": 0.02749418281018734, |
|
"debug/policy_weights": 0.050195060670375824, |
|
"debug/raw_losses": 0.5436308979988098, |
|
"epoch": 0.31834460803820136, |
|
"grad_norm": 1.1171579871093724, |
|
"learning_rate": 4.3090443821097566e-07, |
|
"logits/chosen": -2.2397525310516357, |
|
"logits/rejected": -2.213344097137451, |
|
"logps/chosen": -328.1468200683594, |
|
"logps/rejected": -400.07891845703125, |
|
"loss": 0.0288, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9207608699798584, |
|
"rewards/margins": 0.5777884721755981, |
|
"rewards/rejected": -2.498549222946167, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.31834460803820136, |
|
"eval_debug/losses": 0.0286563690751791, |
|
"eval_debug/policy_weights": 0.04770776256918907, |
|
"eval_debug/raw_losses": 0.6043540835380554, |
|
"eval_logits/chosen": -2.257312536239624, |
|
"eval_logits/rejected": -2.237586736679077, |
|
"eval_logps/chosen": -339.86920166015625, |
|
"eval_logps/rejected": -388.4184265136719, |
|
"eval_loss": 0.030234459787607193, |
|
"eval_rewards/accuracies": 0.6697761416435242, |
|
"eval_rewards/chosen": -1.9562571048736572, |
|
"eval_rewards/margins": 0.4209369122982025, |
|
"eval_rewards/rejected": -2.37719464302063, |
|
"eval_runtime": 153.0016, |
|
"eval_samples_per_second": 55.895, |
|
"eval_steps_per_second": 0.876, |
|
"step": 400 |
|
}, |
|
{ |
|
"debug/losses": 0.026865383610129356, |
|
"debug/policy_weights": 0.04847482591867447, |
|
"debug/raw_losses": 0.5621613264083862, |
|
"epoch": 0.3263032232391564, |
|
"grad_norm": 1.2502124213306463, |
|
"learning_rate": 4.2603795624364195e-07, |
|
"logits/chosen": -2.2415857315063477, |
|
"logits/rejected": -2.195265293121338, |
|
"logps/chosen": -318.52496337890625, |
|
"logps/rejected": -356.39923095703125, |
|
"loss": 0.0289, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.8215112686157227, |
|
"rewards/margins": 0.4676322042942047, |
|
"rewards/rejected": -2.2891438007354736, |
|
"step": 410 |
|
}, |
|
{ |
|
"debug/losses": 0.03042503260076046, |
|
"debug/policy_weights": 0.05211452394723892, |
|
"debug/raw_losses": 0.5721229314804077, |
|
"epoch": 0.3342618384401114, |
|
"grad_norm": 0.917628025882299, |
|
"learning_rate": 4.210354171785795e-07, |
|
"logits/chosen": -2.2705883979797363, |
|
"logits/rejected": -2.2648684978485107, |
|
"logps/chosen": -333.748046875, |
|
"logps/rejected": -389.78167724609375, |
|
"loss": 0.0285, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9158363342285156, |
|
"rewards/margins": 0.4352361559867859, |
|
"rewards/rejected": -2.3510725498199463, |
|
"step": 420 |
|
}, |
|
{ |
|
"debug/losses": 0.028450261801481247, |
|
"debug/policy_weights": 0.049022845923900604, |
|
"debug/raw_losses": 0.6050612330436707, |
|
"epoch": 0.34222045364106646, |
|
"grad_norm": 1.3942371078185885, |
|
"learning_rate": 4.15900687403248e-07, |
|
"logits/chosen": -2.260845184326172, |
|
"logits/rejected": -2.2445521354675293, |
|
"logps/chosen": -337.31494140625, |
|
"logps/rejected": -379.3194580078125, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.9938061237335205, |
|
"rewards/margins": 0.39662283658981323, |
|
"rewards/rejected": -2.3904290199279785, |
|
"step": 430 |
|
}, |
|
{ |
|
"debug/losses": 0.031036963686347008, |
|
"debug/policy_weights": 0.05087602883577347, |
|
"debug/raw_losses": 0.6116689443588257, |
|
"epoch": 0.3501790688420215, |
|
"grad_norm": 1.2091608750974971, |
|
"learning_rate": 4.1063773547332584e-07, |
|
"logits/chosen": -2.271272659301758, |
|
"logits/rejected": -2.2465357780456543, |
|
"logps/chosen": -337.552490234375, |
|
"logps/rejected": -381.76239013671875, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9061357975006104, |
|
"rewards/margins": 0.43205636739730835, |
|
"rewards/rejected": -2.3381924629211426, |
|
"step": 440 |
|
}, |
|
{ |
|
"debug/losses": 0.02100428007543087, |
|
"debug/policy_weights": 0.040780894458293915, |
|
"debug/raw_losses": 0.5640828609466553, |
|
"epoch": 0.3581376840429765, |
|
"grad_norm": 1.2368184816859698, |
|
"learning_rate": 4.0525062904547276e-07, |
|
"logits/chosen": -2.2050936222076416, |
|
"logits/rejected": -2.1659345626831055, |
|
"logps/chosen": -341.0467834472656, |
|
"logps/rejected": -385.0848693847656, |
|
"loss": 0.029, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.084838390350342, |
|
"rewards/margins": 0.5166509747505188, |
|
"rewards/rejected": -2.601489543914795, |
|
"step": 450 |
|
}, |
|
{ |
|
"debug/losses": 0.03121250309050083, |
|
"debug/policy_weights": 0.05547971650958061, |
|
"debug/raw_losses": 0.5400241613388062, |
|
"epoch": 0.36609629924393156, |
|
"grad_norm": 1.2892249196934267, |
|
"learning_rate": 3.997435317334988e-07, |
|
"logits/chosen": -2.279585361480713, |
|
"logits/rejected": -2.251013994216919, |
|
"logps/chosen": -340.62738037109375, |
|
"logps/rejected": -401.3656005859375, |
|
"loss": 0.03, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.936126708984375, |
|
"rewards/margins": 0.5458568334579468, |
|
"rewards/rejected": -2.4819834232330322, |
|
"step": 460 |
|
}, |
|
{ |
|
"debug/losses": 0.0340055450797081, |
|
"debug/policy_weights": 0.05620163679122925, |
|
"debug/raw_losses": 0.5971667170524597, |
|
"epoch": 0.3740549144448866, |
|
"grad_norm": 1.0883622256614363, |
|
"learning_rate": 3.941206998903701e-07, |
|
"logits/chosen": -2.304421901702881, |
|
"logits/rejected": -2.2663235664367676, |
|
"logps/chosen": -359.36468505859375, |
|
"logps/rejected": -393.1944885253906, |
|
"loss": 0.0283, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.045952320098877, |
|
"rewards/margins": 0.3646782636642456, |
|
"rewards/rejected": -2.410630702972412, |
|
"step": 470 |
|
}, |
|
{ |
|
"debug/losses": 0.02767511084675789, |
|
"debug/policy_weights": 0.042119450867176056, |
|
"debug/raw_losses": 0.5988866090774536, |
|
"epoch": 0.3820135296458416, |
|
"grad_norm": 1.153171503132909, |
|
"learning_rate": 3.8838647931853684e-07, |
|
"logits/chosen": -2.15468430519104, |
|
"logits/rejected": -2.124429702758789, |
|
"logps/chosen": -333.17987060546875, |
|
"logps/rejected": -377.18231201171875, |
|
"loss": 0.0293, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.112797260284424, |
|
"rewards/margins": 0.4070938527584076, |
|
"rewards/rejected": -2.519890785217285, |
|
"step": 480 |
|
}, |
|
{ |
|
"debug/losses": 0.031500790268182755, |
|
"debug/policy_weights": 0.05641796067357063, |
|
"debug/raw_losses": 0.5616481900215149, |
|
"epoch": 0.38997214484679665, |
|
"grad_norm": 1.584575140641085, |
|
"learning_rate": 3.825453019111281e-07, |
|
"logits/chosen": -2.1593868732452393, |
|
"logits/rejected": -2.1285595893859863, |
|
"logps/chosen": -337.25396728515625, |
|
"logps/rejected": -398.2967529296875, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9278494119644165, |
|
"rewards/margins": 0.5101264119148254, |
|
"rewards/rejected": -2.4379756450653076, |
|
"step": 490 |
|
}, |
|
{ |
|
"debug/losses": 0.042373210191726685, |
|
"debug/policy_weights": 0.0751892626285553, |
|
"debug/raw_losses": 0.5488675832748413, |
|
"epoch": 0.3979307600477517, |
|
"grad_norm": 1.6646537625502156, |
|
"learning_rate": 3.7660168222660824e-07, |
|
"logits/chosen": -2.291050910949707, |
|
"logits/rejected": -2.2317252159118652, |
|
"logps/chosen": -337.90264892578125, |
|
"logps/rejected": -370.1439514160156, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6945345401763916, |
|
"rewards/margins": 0.4844774305820465, |
|
"rewards/rejected": -2.179011821746826, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3979307600477517, |
|
"eval_debug/losses": 0.039384085685014725, |
|
"eval_debug/policy_weights": 0.06587707996368408, |
|
"eval_debug/raw_losses": 0.6064032912254333, |
|
"eval_logits/chosen": -2.2540104389190674, |
|
"eval_logits/rejected": -2.226517677307129, |
|
"eval_logps/chosen": -315.93218994140625, |
|
"eval_logps/rejected": -366.12408447265625, |
|
"eval_loss": 0.040718384087085724, |
|
"eval_rewards/accuracies": 0.6697761416435242, |
|
"eval_rewards/chosen": -1.71688711643219, |
|
"eval_rewards/margins": 0.43736401200294495, |
|
"eval_rewards/rejected": -2.1542508602142334, |
|
"eval_runtime": 153.0429, |
|
"eval_samples_per_second": 55.88, |
|
"eval_steps_per_second": 0.876, |
|
"step": 500 |
|
}, |
|
{ |
|
"debug/losses": 0.044026248157024384, |
|
"debug/policy_weights": 0.07014697045087814, |
|
"debug/raw_losses": 0.6108037829399109, |
|
"epoch": 0.4058893752487067, |
|
"grad_norm": 1.7093431457694426, |
|
"learning_rate": 3.705602139995416e-07, |
|
"logits/chosen": -2.22845721244812, |
|
"logits/rejected": -2.192497491836548, |
|
"logps/chosen": -311.43267822265625, |
|
"logps/rejected": -353.4495544433594, |
|
"loss": 0.0503, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.591703176498413, |
|
"rewards/margins": 0.43914633989334106, |
|
"rewards/rejected": -2.0308496952056885, |
|
"step": 510 |
|
}, |
|
{ |
|
"debug/losses": 0.04649205133318901, |
|
"debug/policy_weights": 0.07462817430496216, |
|
"debug/raw_losses": 0.6169490814208984, |
|
"epoch": 0.41384799044966175, |
|
"grad_norm": 1.3610692184849214, |
|
"learning_rate": 3.6442556659016475e-07, |
|
"logits/chosen": -2.287468671798706, |
|
"logits/rejected": -2.2674317359924316, |
|
"logps/chosen": -294.1647033691406, |
|
"logps/rejected": -330.3625183105469, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.4665789604187012, |
|
"rewards/margins": 0.33825773000717163, |
|
"rewards/rejected": -1.804836630821228, |
|
"step": 520 |
|
}, |
|
{ |
|
"debug/losses": 0.04447519779205322, |
|
"debug/policy_weights": 0.06717316806316376, |
|
"debug/raw_losses": 0.6419554948806763, |
|
"epoch": 0.4218066056506168, |
|
"grad_norm": 1.196394399121714, |
|
"learning_rate": 3.582024813755076e-07, |
|
"logits/chosen": -2.258765697479248, |
|
"logits/rejected": -2.2340471744537354, |
|
"logps/chosen": -329.6542053222656, |
|
"logps/rejected": -353.29425048828125, |
|
"loss": 0.0341, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.7902171611785889, |
|
"rewards/margins": 0.2765839397907257, |
|
"rewards/rejected": -2.0668013095855713, |
|
"step": 530 |
|
}, |
|
{ |
|
"debug/losses": 0.024901706725358963, |
|
"debug/policy_weights": 0.04227976128458977, |
|
"debug/raw_losses": 0.586767315864563, |
|
"epoch": 0.4297652208515718, |
|
"grad_norm": 1.0501870660898793, |
|
"learning_rate": 3.5189576808485404e-07, |
|
"logits/chosen": -2.2085609436035156, |
|
"logits/rejected": -2.182343006134033, |
|
"logps/chosen": -358.0467529296875, |
|
"logps/rejected": -406.2567443847656, |
|
"loss": 0.0256, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0745999813079834, |
|
"rewards/margins": 0.43312233686447144, |
|
"rewards/rejected": -2.5077223777770996, |
|
"step": 540 |
|
}, |
|
{ |
|
"debug/losses": 0.027683740481734276, |
|
"debug/policy_weights": 0.0427556186914444, |
|
"debug/raw_losses": 0.6474384069442749, |
|
"epoch": 0.43772383605252685, |
|
"grad_norm": 1.6499830455402607, |
|
"learning_rate": 3.4551030108237433e-07, |
|
"logits/chosen": -2.1517820358276367, |
|
"logits/rejected": -2.0897772312164307, |
|
"logps/chosen": -370.8080749511719, |
|
"logps/rejected": -387.0035400390625, |
|
"loss": 0.0258, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.1823668479919434, |
|
"rewards/margins": 0.329748272895813, |
|
"rewards/rejected": -2.512115001678467, |
|
"step": 550 |
|
}, |
|
{ |
|
"debug/losses": 0.026466000825166702, |
|
"debug/policy_weights": 0.05167583376169205, |
|
"debug/raw_losses": 0.5545183420181274, |
|
"epoch": 0.4456824512534819, |
|
"grad_norm": 1.3487018057051514, |
|
"learning_rate": 3.390510155998023e-07, |
|
"logits/chosen": -2.20346736907959, |
|
"logits/rejected": -2.157214879989624, |
|
"logps/chosen": -348.9996643066406, |
|
"logps/rejected": -407.4266357421875, |
|
"loss": 0.0282, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.9474118947982788, |
|
"rewards/margins": 0.5647018551826477, |
|
"rewards/rejected": -2.5121140480041504, |
|
"step": 560 |
|
}, |
|
{ |
|
"debug/losses": 0.03509330004453659, |
|
"debug/policy_weights": 0.05596591904759407, |
|
"debug/raw_losses": 0.5913228988647461, |
|
"epoch": 0.4536410664544369, |
|
"grad_norm": 1.489783171592465, |
|
"learning_rate": 3.325229039220684e-07, |
|
"logits/chosen": -2.111301898956299, |
|
"logits/rejected": -2.084190607070923, |
|
"logps/chosen": -355.8735656738281, |
|
"logps/rejected": -399.67095947265625, |
|
"loss": 0.0317, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.0182948112487793, |
|
"rewards/margins": 0.457012414932251, |
|
"rewards/rejected": -2.4753072261810303, |
|
"step": 570 |
|
}, |
|
{ |
|
"debug/losses": 0.03357679396867752, |
|
"debug/policy_weights": 0.05124642699956894, |
|
"debug/raw_losses": 0.6098935008049011, |
|
"epoch": 0.46159968165539195, |
|
"grad_norm": 1.242702956848461, |
|
"learning_rate": 3.2593101152883795e-07, |
|
"logits/chosen": -2.1088345050811768, |
|
"logits/rejected": -2.067966938018799, |
|
"logps/chosen": -359.7041320800781, |
|
"logps/rejected": -397.36822509765625, |
|
"loss": 0.0233, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.107734203338623, |
|
"rewards/margins": 0.3990080952644348, |
|
"rewards/rejected": -2.506742238998413, |
|
"step": 580 |
|
}, |
|
{ |
|
"debug/losses": 0.03141509369015694, |
|
"debug/policy_weights": 0.05401759222149849, |
|
"debug/raw_losses": 0.5844415426254272, |
|
"epoch": 0.469558296856347, |
|
"grad_norm": 1.1114396949980998, |
|
"learning_rate": 3.192804331949349e-07, |
|
"logits/chosen": -2.114969253540039, |
|
"logits/rejected": -2.0797171592712402, |
|
"logps/chosen": -340.1515197753906, |
|
"logps/rejected": -380.93218994140625, |
|
"loss": 0.0275, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.975218415260315, |
|
"rewards/margins": 0.44081950187683105, |
|
"rewards/rejected": -2.4160375595092773, |
|
"step": 590 |
|
}, |
|
{ |
|
"debug/losses": 0.028428133577108383, |
|
"debug/policy_weights": 0.04971148818731308, |
|
"debug/raw_losses": 0.574429452419281, |
|
"epoch": 0.477516912057302, |
|
"grad_norm": 1.8581586725148402, |
|
"learning_rate": 3.125763090526674e-07, |
|
"logits/chosen": -2.1486268043518066, |
|
"logits/rejected": -2.094489574432373, |
|
"logps/chosen": -346.2554016113281, |
|
"logps/rejected": -392.0444030761719, |
|
"loss": 0.0309, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.914086937904358, |
|
"rewards/margins": 0.5169549584388733, |
|
"rewards/rejected": -2.431041717529297, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.477516912057302, |
|
"eval_debug/losses": 0.02868218533694744, |
|
"eval_debug/policy_weights": 0.048851560801267624, |
|
"eval_debug/raw_losses": 0.589902400970459, |
|
"eval_logits/chosen": -2.115903377532959, |
|
"eval_logits/rejected": -2.0849289894104004, |
|
"eval_logps/chosen": -339.2856750488281, |
|
"eval_logps/rejected": -391.61474609375, |
|
"eval_loss": 0.030176514759659767, |
|
"eval_rewards/accuracies": 0.6660447716712952, |
|
"eval_rewards/chosen": -1.9504221677780151, |
|
"eval_rewards/margins": 0.4587351679801941, |
|
"eval_rewards/rejected": -2.4091572761535645, |
|
"eval_runtime": 153.0228, |
|
"eval_samples_per_second": 55.887, |
|
"eval_steps_per_second": 0.876, |
|
"step": 600 |
|
}, |
|
{ |
|
"debug/losses": 0.0295234564691782, |
|
"debug/policy_weights": 0.047347038984298706, |
|
"debug/raw_losses": 0.6249476671218872, |
|
"epoch": 0.48547552725825704, |
|
"grad_norm": 1.005872549018641, |
|
"learning_rate": 3.0582382061909623e-07, |
|
"logits/chosen": -2.1071560382843018, |
|
"logits/rejected": -2.065136432647705, |
|
"logps/chosen": -339.8302917480469, |
|
"logps/rejected": -375.41748046875, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.9870408773422241, |
|
"rewards/margins": 0.33729949593544006, |
|
"rewards/rejected": -2.324340343475342, |
|
"step": 610 |
|
}, |
|
{ |
|
"debug/losses": 0.025071118026971817, |
|
"debug/policy_weights": 0.040535397827625275, |
|
"debug/raw_losses": 0.600980818271637, |
|
"epoch": 0.4934341424592121, |
|
"grad_norm": 1.3090829877164378, |
|
"learning_rate": 2.9902818679131775e-07, |
|
"logits/chosen": -2.0073862075805664, |
|
"logits/rejected": -1.9497063159942627, |
|
"logps/chosen": -363.63079833984375, |
|
"logps/rejected": -403.33758544921875, |
|
"loss": 0.0267, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.0890800952911377, |
|
"rewards/margins": 0.452686071395874, |
|
"rewards/rejected": -2.5417656898498535, |
|
"step": 620 |
|
}, |
|
{ |
|
"debug/losses": 0.02215055376291275, |
|
"debug/policy_weights": 0.03932579606771469, |
|
"debug/raw_losses": 0.5642175078392029, |
|
"epoch": 0.5013927576601671, |
|
"grad_norm": 1.5282212337262977, |
|
"learning_rate": 2.921946598128571e-07, |
|
"logits/chosen": -1.9308440685272217, |
|
"logits/rejected": -1.8768609762191772, |
|
"logps/chosen": -357.49041748046875, |
|
"logps/rejected": -387.71112060546875, |
|
"loss": 0.0273, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.0471129417419434, |
|
"rewards/margins": 0.5093515515327454, |
|
"rewards/rejected": -2.556464433670044, |
|
"step": 630 |
|
}, |
|
{ |
|
"debug/losses": 0.02335355058312416, |
|
"debug/policy_weights": 0.03469157591462135, |
|
"debug/raw_losses": 0.6207915544509888, |
|
"epoch": 0.5093513728611222, |
|
"grad_norm": 1.444583233150152, |
|
"learning_rate": 2.8532852121428733e-07, |
|
"logits/chosen": -1.7392104864120483, |
|
"logits/rejected": -1.655212640762329, |
|
"logps/chosen": -359.82733154296875, |
|
"logps/rejected": -401.420654296875, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.243035316467285, |
|
"rewards/margins": 0.43869590759277344, |
|
"rewards/rejected": -2.6817312240600586, |
|
"step": 640 |
|
}, |
|
{ |
|
"debug/losses": 0.02076483704149723, |
|
"debug/policy_weights": 0.034380484372377396, |
|
"debug/raw_losses": 0.55111163854599, |
|
"epoch": 0.5173099880620772, |
|
"grad_norm": 1.169956473894214, |
|
"learning_rate": 2.7843507773121414e-07, |
|
"logits/chosen": -1.711505651473999, |
|
"logits/rejected": -1.6203527450561523, |
|
"logps/chosen": -366.01141357421875, |
|
"logps/rejected": -429.87237548828125, |
|
"loss": 0.0239, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.2879557609558105, |
|
"rewards/margins": 0.5667728185653687, |
|
"rewards/rejected": -2.8547282218933105, |
|
"step": 650 |
|
}, |
|
{ |
|
"debug/losses": 0.017944971099495888, |
|
"debug/policy_weights": 0.03764867037534714, |
|
"debug/raw_losses": 0.526678740978241, |
|
"epoch": 0.5252686032630323, |
|
"grad_norm": 1.1259040599406782, |
|
"learning_rate": 2.715196572027789e-07, |
|
"logits/chosen": -1.6266266107559204, |
|
"logits/rejected": -1.4919464588165283, |
|
"logps/chosen": -371.72772216796875, |
|
"logps/rejected": -443.4164123535156, |
|
"loss": 0.0222, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.272620677947998, |
|
"rewards/margins": 0.6203452944755554, |
|
"rewards/rejected": -2.892965793609619, |
|
"step": 660 |
|
}, |
|
{ |
|
"debug/losses": 0.01783212646842003, |
|
"debug/policy_weights": 0.03255900740623474, |
|
"debug/raw_losses": 0.5750246047973633, |
|
"epoch": 0.5332272184639872, |
|
"grad_norm": 1.0656793710017185, |
|
"learning_rate": 2.645876044538521e-07, |
|
"logits/chosen": -1.7439796924591064, |
|
"logits/rejected": -1.6551119089126587, |
|
"logps/chosen": -379.49322509765625, |
|
"logps/rejected": -420.49029541015625, |
|
"loss": 0.0206, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.303612232208252, |
|
"rewards/margins": 0.4928767681121826, |
|
"rewards/rejected": -2.7964890003204346, |
|
"step": 670 |
|
}, |
|
{ |
|
"debug/losses": 0.020845994353294373, |
|
"debug/policy_weights": 0.03576134517788887, |
|
"debug/raw_losses": 0.5663331151008606, |
|
"epoch": 0.5411858336649423, |
|
"grad_norm": 1.0909834179777187, |
|
"learning_rate": 2.5764427716409815e-07, |
|
"logits/chosen": -1.7359968423843384, |
|
"logits/rejected": -1.6398273706436157, |
|
"logps/chosen": -392.9457702636719, |
|
"logps/rejected": -438.53436279296875, |
|
"loss": 0.0186, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.3817615509033203, |
|
"rewards/margins": 0.4670361578464508, |
|
"rewards/rejected": -2.848797559738159, |
|
"step": 680 |
|
}, |
|
{ |
|
"debug/losses": 0.02242189273238182, |
|
"debug/policy_weights": 0.03661586716771126, |
|
"debug/raw_losses": 0.6055338978767395, |
|
"epoch": 0.5491444488658973, |
|
"grad_norm": 1.2304923828991894, |
|
"learning_rate": 2.5069504172710494e-07, |
|
"logits/chosen": -1.7902625799179077, |
|
"logits/rejected": -1.747271180152893, |
|
"logps/chosen": -390.880859375, |
|
"logps/rejected": -448.6715393066406, |
|
"loss": 0.0201, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.4254586696624756, |
|
"rewards/margins": 0.39989569783210754, |
|
"rewards/rejected": -2.8253543376922607, |
|
"step": 690 |
|
}, |
|
{ |
|
"debug/losses": 0.02342490293085575, |
|
"debug/policy_weights": 0.03432609513401985, |
|
"debug/raw_losses": 0.6200428605079651, |
|
"epoch": 0.5571030640668524, |
|
"grad_norm": 0.9871754800391283, |
|
"learning_rate": 2.4374526910277886e-07, |
|
"logits/chosen": -1.8094078302383423, |
|
"logits/rejected": -1.6969894170761108, |
|
"logps/chosen": -372.88385009765625, |
|
"logps/rejected": -403.9977722167969, |
|
"loss": 0.0203, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.29471492767334, |
|
"rewards/margins": 0.35542750358581543, |
|
"rewards/rejected": -2.6501424312591553, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5571030640668524, |
|
"eval_debug/losses": 0.018499089404940605, |
|
"eval_debug/policy_weights": 0.031654853373765945, |
|
"eval_debug/raw_losses": 0.580752432346344, |
|
"eval_logits/chosen": -1.7383748292922974, |
|
"eval_logits/rejected": -1.6612528562545776, |
|
"eval_logps/chosen": -377.3936767578125, |
|
"eval_logps/rejected": -427.1260681152344, |
|
"eval_loss": 0.01980224810540676, |
|
"eval_rewards/accuracies": 0.6856343150138855, |
|
"eval_rewards/chosen": -2.3315021991729736, |
|
"eval_rewards/margins": 0.43276873230934143, |
|
"eval_rewards/rejected": -2.764270544052124, |
|
"eval_runtime": 153.0401, |
|
"eval_samples_per_second": 55.881, |
|
"eval_steps_per_second": 0.876, |
|
"step": 700 |
|
}, |
|
{ |
|
"debug/losses": 0.018895355984568596, |
|
"debug/policy_weights": 0.03160635381937027, |
|
"debug/raw_losses": 0.607495903968811, |
|
"epoch": 0.5650616792678074, |
|
"grad_norm": 1.3171395438648685, |
|
"learning_rate": 2.368003306662104e-07, |
|
"logits/chosen": -1.6481819152832031, |
|
"logits/rejected": -1.5332415103912354, |
|
"logps/chosen": -404.7178955078125, |
|
"logps/rejected": -433.3427734375, |
|
"loss": 0.0188, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.4912447929382324, |
|
"rewards/margins": 0.39817237854003906, |
|
"rewards/rejected": -2.8894169330596924, |
|
"step": 710 |
|
}, |
|
{ |
|
"debug/losses": 0.019927415996789932, |
|
"debug/policy_weights": 0.03931970149278641, |
|
"debug/raw_losses": 0.5693992376327515, |
|
"epoch": 0.5730202944687625, |
|
"grad_norm": 1.252735847781797, |
|
"learning_rate": 2.2986559405621886e-07, |
|
"logits/chosen": -1.5497167110443115, |
|
"logits/rejected": -1.4008510112762451, |
|
"logps/chosen": -398.9753723144531, |
|
"logps/rejected": -445.42926025390625, |
|
"loss": 0.0199, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.365363121032715, |
|
"rewards/margins": 0.5205937623977661, |
|
"rewards/rejected": -2.8859567642211914, |
|
"step": 720 |
|
}, |
|
{ |
|
"debug/losses": 0.017138421535491943, |
|
"debug/policy_weights": 0.029266973957419395, |
|
"debug/raw_losses": 0.5414391756057739, |
|
"epoch": 0.5809789096697174, |
|
"grad_norm": 0.9922063489426967, |
|
"learning_rate": 2.2294641902678443e-07, |
|
"logits/chosen": -1.5957987308502197, |
|
"logits/rejected": -1.4495995044708252, |
|
"logps/chosen": -367.09942626953125, |
|
"logps/rejected": -425.4873046875, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.3193514347076416, |
|
"rewards/margins": 0.6168329119682312, |
|
"rewards/rejected": -2.9361839294433594, |
|
"step": 730 |
|
}, |
|
{ |
|
"debug/losses": 0.013756600208580494, |
|
"debug/policy_weights": 0.029798978939652443, |
|
"debug/raw_losses": 0.537264347076416, |
|
"epoch": 0.5889375248706725, |
|
"grad_norm": 1.6691124809021063, |
|
"learning_rate": 2.160481533045751e-07, |
|
"logits/chosen": -1.561067819595337, |
|
"logits/rejected": -1.3976190090179443, |
|
"logps/chosen": -395.62347412109375, |
|
"logps/rejected": -438.599609375, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.454890012741089, |
|
"rewards/margins": 0.5239530801773071, |
|
"rewards/rejected": -2.9788429737091064, |
|
"step": 740 |
|
}, |
|
{ |
|
"debug/losses": 0.01619603857398033, |
|
"debug/policy_weights": 0.031267426908016205, |
|
"debug/raw_losses": 0.5780462622642517, |
|
"epoch": 0.5968961400716275, |
|
"grad_norm": 1.077635653848249, |
|
"learning_rate": 2.0917612845576882e-07, |
|
"logits/chosen": -1.6572452783584595, |
|
"logits/rejected": -1.4474142789840698, |
|
"logps/chosen": -395.1420593261719, |
|
"logps/rejected": -431.25732421875, |
|
"loss": 0.0189, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.3865952491760254, |
|
"rewards/margins": 0.5750153064727783, |
|
"rewards/rejected": -2.9616103172302246, |
|
"step": 750 |
|
}, |
|
{ |
|
"debug/losses": 0.0169072188436985, |
|
"debug/policy_weights": 0.028911063447594643, |
|
"debug/raw_losses": 0.6073988080024719, |
|
"epoch": 0.6048547552725826, |
|
"grad_norm": 1.2161871750266422, |
|
"learning_rate": 2.0233565576536564e-07, |
|
"logits/chosen": -1.533752202987671, |
|
"logits/rejected": -1.4571056365966797, |
|
"logps/chosen": -383.4120788574219, |
|
"logps/rejected": -433.785888671875, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.414424419403076, |
|
"rewards/margins": 0.4532749652862549, |
|
"rewards/rejected": -2.867699384689331, |
|
"step": 760 |
|
}, |
|
{ |
|
"debug/losses": 0.018340473994612694, |
|
"debug/policy_weights": 0.03365606069564819, |
|
"debug/raw_losses": 0.5463643670082092, |
|
"epoch": 0.6128133704735376, |
|
"grad_norm": 1.1430708218569445, |
|
"learning_rate": 1.9553202213217537e-07, |
|
"logits/chosen": -1.549505352973938, |
|
"logits/rejected": -1.4244543313980103, |
|
"logps/chosen": -367.46600341796875, |
|
"logps/rejected": -431.8912048339844, |
|
"loss": 0.018, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.3706705570220947, |
|
"rewards/margins": 0.5944007039070129, |
|
"rewards/rejected": -2.965071201324463, |
|
"step": 770 |
|
}, |
|
{ |
|
"debug/losses": 0.019901562482118607, |
|
"debug/policy_weights": 0.03774517774581909, |
|
"debug/raw_losses": 0.5596235990524292, |
|
"epoch": 0.6207719856744927, |
|
"grad_norm": 1.1044700038610857, |
|
"learning_rate": 1.887704859826528e-07, |
|
"logits/chosen": -1.5840781927108765, |
|
"logits/rejected": -1.435772180557251, |
|
"logps/chosen": -411.3043518066406, |
|
"logps/rejected": -474.3914489746094, |
|
"loss": 0.0186, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.5005176067352295, |
|
"rewards/margins": 0.6052481532096863, |
|
"rewards/rejected": -3.1057658195495605, |
|
"step": 780 |
|
}, |
|
{ |
|
"debug/losses": 0.020346064120531082, |
|
"debug/policy_weights": 0.03269239515066147, |
|
"debug/raw_losses": 0.5674707889556885, |
|
"epoch": 0.6287306008754476, |
|
"grad_norm": 0.9859185287093285, |
|
"learning_rate": 1.8205627320673836e-07, |
|
"logits/chosen": -1.475979208946228, |
|
"logits/rejected": -1.2797057628631592, |
|
"logps/chosen": -401.9871826171875, |
|
"logps/rejected": -459.9845275878906, |
|
"loss": 0.018, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.484294891357422, |
|
"rewards/margins": 0.6510842442512512, |
|
"rewards/rejected": -3.135378837585449, |
|
"step": 790 |
|
}, |
|
{ |
|
"debug/losses": 0.017856208607554436, |
|
"debug/policy_weights": 0.033618152141571045, |
|
"debug/raw_losses": 0.5967596769332886, |
|
"epoch": 0.6366892160764027, |
|
"grad_norm": 0.9736061211829494, |
|
"learning_rate": 1.7539457311884675e-07, |
|
"logits/chosen": -1.3345762491226196, |
|
"logits/rejected": -1.1230970621109009, |
|
"logps/chosen": -407.16778564453125, |
|
"logps/rejected": -441.09149169921875, |
|
"loss": 0.0192, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.494783878326416, |
|
"rewards/margins": 0.4510740339756012, |
|
"rewards/rejected": -2.9458580017089844, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6366892160764027, |
|
"eval_debug/losses": 0.01690017618238926, |
|
"eval_debug/policy_weights": 0.02897428721189499, |
|
"eval_debug/raw_losses": 0.5789195895195007, |
|
"eval_logits/chosen": -1.2121543884277344, |
|
"eval_logits/rejected": -1.0483382940292358, |
|
"eval_logps/chosen": -403.5320739746094, |
|
"eval_logps/rejected": -462.95257568359375, |
|
"eval_loss": 0.018223632127046585, |
|
"eval_rewards/accuracies": 0.6865671873092651, |
|
"eval_rewards/chosen": -2.592885971069336, |
|
"eval_rewards/margins": 0.5296501517295837, |
|
"eval_rewards/rejected": -3.1225357055664062, |
|
"eval_runtime": 153.0023, |
|
"eval_samples_per_second": 55.895, |
|
"eval_steps_per_second": 0.876, |
|
"step": 800 |
|
}, |
|
{ |
|
"debug/losses": 0.0213015079498291, |
|
"debug/policy_weights": 0.034373920410871506, |
|
"debug/raw_losses": 0.6232832670211792, |
|
"epoch": 0.6446478312773577, |
|
"grad_norm": 1.2970621410520329, |
|
"learning_rate": 1.687905344471226e-07, |
|
"logits/chosen": -1.2676408290863037, |
|
"logits/rejected": -1.1279184818267822, |
|
"logps/chosen": -419.06475830078125, |
|
"logps/rejected": -464.020263671875, |
|
"loss": 0.0194, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.55409836769104, |
|
"rewards/margins": 0.43357712030410767, |
|
"rewards/rejected": -2.987675905227661, |
|
"step": 810 |
|
}, |
|
{ |
|
"debug/losses": 0.02103157714009285, |
|
"debug/policy_weights": 0.029956122860312462, |
|
"debug/raw_losses": 0.6147562265396118, |
|
"epoch": 0.6526064464783128, |
|
"grad_norm": 0.7803955054344923, |
|
"learning_rate": 1.6224926135406693e-07, |
|
"logits/chosen": -1.371311902999878, |
|
"logits/rejected": -1.1925647258758545, |
|
"logps/chosen": -411.35577392578125, |
|
"logps/rejected": -442.8028259277344, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.6678333282470703, |
|
"rewards/margins": 0.4167654514312744, |
|
"rewards/rejected": -3.084599018096924, |
|
"step": 820 |
|
}, |
|
{ |
|
"debug/losses": 0.017109088599681854, |
|
"debug/policy_weights": 0.03144029527902603, |
|
"debug/raw_losses": 0.5839846730232239, |
|
"epoch": 0.6605650616792678, |
|
"grad_norm": 0.9253818549329441, |
|
"learning_rate": 1.557758094916053e-07, |
|
"logits/chosen": -1.5016579627990723, |
|
"logits/rejected": -1.3439366817474365, |
|
"logps/chosen": -427.08551025390625, |
|
"logps/rejected": -477.54718017578125, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.701488494873047, |
|
"rewards/margins": 0.5098705887794495, |
|
"rewards/rejected": -3.2113590240478516, |
|
"step": 830 |
|
}, |
|
{ |
|
"debug/losses": 0.01964627578854561, |
|
"debug/policy_weights": 0.03611503541469574, |
|
"debug/raw_losses": 0.5748011469841003, |
|
"epoch": 0.6685236768802229, |
|
"grad_norm": 0.8808070127931911, |
|
"learning_rate": 1.4937518209365108e-07, |
|
"logits/chosen": -1.5935519933700562, |
|
"logits/rejected": -1.422109603881836, |
|
"logps/chosen": -435.3828125, |
|
"logps/rejected": -461.01763916015625, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.5694196224212646, |
|
"rewards/margins": 0.5077784061431885, |
|
"rewards/rejected": -3.077198028564453, |
|
"step": 840 |
|
}, |
|
{ |
|
"debug/losses": 0.017254317179322243, |
|
"debug/policy_weights": 0.030938278883695602, |
|
"debug/raw_losses": 0.5966542959213257, |
|
"epoch": 0.6764822920811778, |
|
"grad_norm": 0.9343951943413922, |
|
"learning_rate": 1.4305232610918045e-07, |
|
"logits/chosen": -1.5590174198150635, |
|
"logits/rejected": -1.4611246585845947, |
|
"logps/chosen": -413.28875732421875, |
|
"logps/rejected": -451.7987365722656, |
|
"loss": 0.0186, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.672389507293701, |
|
"rewards/margins": 0.4185652732849121, |
|
"rewards/rejected": -3.090954542160034, |
|
"step": 850 |
|
}, |
|
{ |
|
"debug/losses": 0.01943446323275566, |
|
"debug/policy_weights": 0.031098250299692154, |
|
"debug/raw_losses": 0.6142188906669617, |
|
"epoch": 0.6844409072821329, |
|
"grad_norm": 0.9375815515878454, |
|
"learning_rate": 1.3681212837880977e-07, |
|
"logits/chosen": -1.6131696701049805, |
|
"logits/rejected": -1.5382283926010132, |
|
"logps/chosen": -378.0366516113281, |
|
"logps/rejected": -439.3740234375, |
|
"loss": 0.0193, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.4397616386413574, |
|
"rewards/margins": 0.4309278428554535, |
|
"rewards/rejected": -2.870689868927002, |
|
"step": 860 |
|
}, |
|
{ |
|
"debug/losses": 0.018429789692163467, |
|
"debug/policy_weights": 0.03325992077589035, |
|
"debug/raw_losses": 0.5925087928771973, |
|
"epoch": 0.6923995224830879, |
|
"grad_norm": 1.232238112884586, |
|
"learning_rate": 1.3065941185782977e-07, |
|
"logits/chosen": -1.5463390350341797, |
|
"logits/rejected": -1.3958414793014526, |
|
"logps/chosen": -399.37152099609375, |
|
"logps/rejected": -427.0814514160156, |
|
"loss": 0.0211, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.4714484214782715, |
|
"rewards/margins": 0.43900877237319946, |
|
"rewards/rejected": -2.910456895828247, |
|
"step": 870 |
|
}, |
|
{ |
|
"debug/losses": 0.01962456852197647, |
|
"debug/policy_weights": 0.046004533767700195, |
|
"debug/raw_losses": 0.4855673313140869, |
|
"epoch": 0.700358137684043, |
|
"grad_norm": 1.2553897674721153, |
|
"learning_rate": 1.2459893188861613e-07, |
|
"logits/chosen": -1.717850685119629, |
|
"logits/rejected": -1.556694746017456, |
|
"logps/chosen": -361.3697204589844, |
|
"logps/rejected": -447.73858642578125, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1288363933563232, |
|
"rewards/margins": 0.7239478826522827, |
|
"rewards/rejected": -2.8527846336364746, |
|
"step": 880 |
|
}, |
|
{ |
|
"debug/losses": 0.022167332470417023, |
|
"debug/policy_weights": 0.04253797605633736, |
|
"debug/raw_losses": 0.5294119119644165, |
|
"epoch": 0.708316752884998, |
|
"grad_norm": 1.099703402287045, |
|
"learning_rate": 1.1863537252529548e-07, |
|
"logits/chosen": -1.6016219854354858, |
|
"logits/rejected": -1.3953754901885986, |
|
"logps/chosen": -387.30731201171875, |
|
"logps/rejected": -434.49169921875, |
|
"loss": 0.0214, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.2904038429260254, |
|
"rewards/margins": 0.5490304231643677, |
|
"rewards/rejected": -2.8394341468811035, |
|
"step": 890 |
|
}, |
|
{ |
|
"debug/losses": 0.020326469093561172, |
|
"debug/policy_weights": 0.03977964445948601, |
|
"debug/raw_losses": 0.5732988119125366, |
|
"epoch": 0.716275368085953, |
|
"grad_norm": 1.090687554043691, |
|
"learning_rate": 1.1277334291351145e-07, |
|
"logits/chosen": -1.4579260349273682, |
|
"logits/rejected": -1.2984815835952759, |
|
"logps/chosen": -366.3522033691406, |
|
"logps/rejected": -429.67950439453125, |
|
"loss": 0.0233, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.290104866027832, |
|
"rewards/margins": 0.5741912722587585, |
|
"rewards/rejected": -2.8642961978912354, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.716275368085953, |
|
"eval_debug/losses": 0.02210032381117344, |
|
"eval_debug/policy_weights": 0.03874586522579193, |
|
"eval_debug/raw_losses": 0.5725884437561035, |
|
"eval_logits/chosen": -1.4492710828781128, |
|
"eval_logits/rejected": -1.3096469640731812, |
|
"eval_logps/chosen": -377.3470153808594, |
|
"eval_logps/rejected": -440.0111389160156, |
|
"eval_loss": 0.023749953135848045, |
|
"eval_rewards/accuracies": 0.6809701323509216, |
|
"eval_rewards/chosen": -2.3310351371765137, |
|
"eval_rewards/margins": 0.5620867609977722, |
|
"eval_rewards/rejected": -2.8931214809417725, |
|
"eval_runtime": 152.9735, |
|
"eval_samples_per_second": 55.905, |
|
"eval_steps_per_second": 0.876, |
|
"step": 900 |
|
}, |
|
{ |
|
"debug/losses": 0.02190154604613781, |
|
"debug/policy_weights": 0.04413367062807083, |
|
"debug/raw_losses": 0.4940849244594574, |
|
"epoch": 0.724233983286908, |
|
"grad_norm": 1.0479997589103751, |
|
"learning_rate": 1.0701737372808431e-07, |
|
"logits/chosen": -1.4712375402450562, |
|
"logits/rejected": -1.3293416500091553, |
|
"logps/chosen": -357.8360290527344, |
|
"logps/rejected": -447.7298889160156, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.14089298248291, |
|
"rewards/margins": 0.7697595357894897, |
|
"rewards/rejected": -2.9106526374816895, |
|
"step": 910 |
|
}, |
|
{ |
|
"debug/losses": 0.01954295113682747, |
|
"debug/policy_weights": 0.036859314888715744, |
|
"debug/raw_losses": 0.5885938405990601, |
|
"epoch": 0.7321925984878631, |
|
"grad_norm": 1.129536901422531, |
|
"learning_rate": 1.0137191367132078e-07, |
|
"logits/chosen": -1.4527212381362915, |
|
"logits/rejected": -1.2773245573043823, |
|
"logps/chosen": -412.63983154296875, |
|
"logps/rejected": -471.3998107910156, |
|
"loss": 0.0189, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.5654666423797607, |
|
"rewards/margins": 0.5423773527145386, |
|
"rewards/rejected": -3.107844114303589, |
|
"step": 920 |
|
}, |
|
{ |
|
"debug/losses": 0.02070821449160576, |
|
"debug/policy_weights": 0.033360544592142105, |
|
"debug/raw_losses": 0.6200900673866272, |
|
"epoch": 0.7401512136888182, |
|
"grad_norm": 0.8545282722936337, |
|
"learning_rate": 9.584132603467827e-08, |
|
"logits/chosen": -1.449439287185669, |
|
"logits/rejected": -1.263599157333374, |
|
"logps/chosen": -435.151123046875, |
|
"logps/rejected": -468.1375427246094, |
|
"loss": 0.0189, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.67981219291687, |
|
"rewards/margins": 0.4587249755859375, |
|
"rewards/rejected": -3.1385371685028076, |
|
"step": 930 |
|
}, |
|
{ |
|
"debug/losses": 0.020046690478920937, |
|
"debug/policy_weights": 0.03236168995499611, |
|
"debug/raw_losses": 0.5747401118278503, |
|
"epoch": 0.7481098288897732, |
|
"grad_norm": 0.9918593494773874, |
|
"learning_rate": 9.042988532644249e-08, |
|
"logits/chosen": -1.5396702289581299, |
|
"logits/rejected": -1.397327184677124, |
|
"logps/chosen": -402.87237548828125, |
|
"logps/rejected": -470.3892517089844, |
|
"loss": 0.0195, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.59519362449646, |
|
"rewards/margins": 0.5613120794296265, |
|
"rewards/rejected": -3.156505823135376, |
|
"step": 940 |
|
}, |
|
{ |
|
"debug/losses": 0.01932508684694767, |
|
"debug/policy_weights": 0.03236791491508484, |
|
"debug/raw_losses": 0.5765026211738586, |
|
"epoch": 0.7560684440907283, |
|
"grad_norm": 0.6875663367151513, |
|
"learning_rate": 8.514177396802428e-08, |
|
"logits/chosen": -1.6237666606903076, |
|
"logits/rejected": -1.5048038959503174, |
|
"logps/chosen": -407.4132385253906, |
|
"logps/rejected": -465.19927978515625, |
|
"loss": 0.0181, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.675424814224243, |
|
"rewards/margins": 0.5076571106910706, |
|
"rewards/rejected": -3.183081865310669, |
|
"step": 950 |
|
}, |
|
{ |
|
"debug/losses": 0.017698202282190323, |
|
"debug/policy_weights": 0.031711481511592865, |
|
"debug/raw_losses": 0.5835244059562683, |
|
"epoch": 0.7640270592916832, |
|
"grad_norm": 0.7868483840339101, |
|
"learning_rate": 7.998107906142839e-08, |
|
"logits/chosen": -1.6007171869277954, |
|
"logits/rejected": -1.478947401046753, |
|
"logps/chosen": -396.6483459472656, |
|
"logps/rejected": -433.88458251953125, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.5362672805786133, |
|
"rewards/margins": 0.4365662932395935, |
|
"rewards/rejected": -2.9728338718414307, |
|
"step": 960 |
|
}, |
|
{ |
|
"debug/losses": 0.016292816027998924, |
|
"debug/policy_weights": 0.02861318551003933, |
|
"debug/raw_losses": 0.5646917819976807, |
|
"epoch": 0.7719856744926383, |
|
"grad_norm": 1.0427659029787784, |
|
"learning_rate": 7.495178923039396e-08, |
|
"logits/chosen": -1.6051594018936157, |
|
"logits/rejected": -1.5850203037261963, |
|
"logps/chosen": -380.40289306640625, |
|
"logps/rejected": -462.0849609375, |
|
"loss": 0.0195, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.517289638519287, |
|
"rewards/margins": 0.546326220035553, |
|
"rewards/rejected": -3.0636162757873535, |
|
"step": 970 |
|
}, |
|
{ |
|
"debug/losses": 0.017953380942344666, |
|
"debug/policy_weights": 0.030950292944908142, |
|
"debug/raw_losses": 0.5466476678848267, |
|
"epoch": 0.7799442896935933, |
|
"grad_norm": 1.3967641035945062, |
|
"learning_rate": 7.005779153764682e-08, |
|
"logits/chosen": -1.622676134109497, |
|
"logits/rejected": -1.4608395099639893, |
|
"logps/chosen": -385.9210205078125, |
|
"logps/rejected": -437.5826721191406, |
|
"loss": 0.0211, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.431375741958618, |
|
"rewards/margins": 0.5695705413818359, |
|
"rewards/rejected": -3.000946283340454, |
|
"step": 980 |
|
}, |
|
{ |
|
"debug/losses": 0.020693689584732056, |
|
"debug/policy_weights": 0.03592243418097496, |
|
"debug/raw_losses": 0.5423937439918518, |
|
"epoch": 0.7879029048945484, |
|
"grad_norm": 1.5010217405708293, |
|
"learning_rate": 6.530286848064698e-08, |
|
"logits/chosen": -1.5891830921173096, |
|
"logits/rejected": -1.4476853609085083, |
|
"logps/chosen": -388.16107177734375, |
|
"logps/rejected": -452.994384765625, |
|
"loss": 0.0194, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.35011625289917, |
|
"rewards/margins": 0.6105698347091675, |
|
"rewards/rejected": -2.960686206817627, |
|
"step": 990 |
|
}, |
|
{ |
|
"debug/losses": 0.01797301694750786, |
|
"debug/policy_weights": 0.035282202064991, |
|
"debug/raw_losses": 0.5305525064468384, |
|
"epoch": 0.7958615200955034, |
|
"grad_norm": 1.0175708862425297, |
|
"learning_rate": 6.069069506815325e-08, |
|
"logits/chosen": -1.6233867406845093, |
|
"logits/rejected": -1.3854453563690186, |
|
"logps/chosen": -379.2464294433594, |
|
"logps/rejected": -441.9747009277344, |
|
"loss": 0.0213, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.343348503112793, |
|
"rewards/margins": 0.6720021963119507, |
|
"rewards/rejected": -3.015350818634033, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7958615200955034, |
|
"eval_debug/losses": 0.02031446062028408, |
|
"eval_debug/policy_weights": 0.035683903843164444, |
|
"eval_debug/raw_losses": 0.569391131401062, |
|
"eval_logits/chosen": -1.604931116104126, |
|
"eval_logits/rejected": -1.4880036115646362, |
|
"eval_logps/chosen": -386.5316467285156, |
|
"eval_logps/rejected": -446.7563781738281, |
|
"eval_loss": 0.021877959370613098, |
|
"eval_rewards/accuracies": 0.6930969953536987, |
|
"eval_rewards/chosen": -2.422881603240967, |
|
"eval_rewards/margins": 0.5376923680305481, |
|
"eval_rewards/rejected": -2.960574150085449, |
|
"eval_runtime": 152.9371, |
|
"eval_samples_per_second": 55.918, |
|
"eval_steps_per_second": 0.876, |
|
"step": 1000 |
|
}, |
|
{ |
|
"debug/losses": 0.023462774232029915, |
|
"debug/policy_weights": 0.035995837301015854, |
|
"debug/raw_losses": 0.6167944669723511, |
|
"epoch": 0.8038201352964585, |
|
"grad_norm": 1.1729268102061192, |
|
"learning_rate": 5.6224835979863714e-08, |
|
"logits/chosen": -1.646054983139038, |
|
"logits/rejected": -1.4877814054489136, |
|
"logps/chosen": -394.0400085449219, |
|
"logps/rejected": -429.5767517089844, |
|
"loss": 0.0209, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.4372658729553223, |
|
"rewards/margins": 0.43345385789871216, |
|
"rewards/rejected": -2.8707196712493896, |
|
"step": 1010 |
|
}, |
|
{ |
|
"debug/losses": 0.01666095480322838, |
|
"debug/policy_weights": 0.03231491893529892, |
|
"debug/raw_losses": 0.5554049015045166, |
|
"epoch": 0.8117787504974134, |
|
"grad_norm": 1.767673975921384, |
|
"learning_rate": 5.190874281132851e-08, |
|
"logits/chosen": -1.6193885803222656, |
|
"logits/rejected": -1.4846832752227783, |
|
"logps/chosen": -377.4434509277344, |
|
"logps/rejected": -439.32159423828125, |
|
"loss": 0.0215, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.3773646354675293, |
|
"rewards/margins": 0.5905806422233582, |
|
"rewards/rejected": -2.9679455757141113, |
|
"step": 1020 |
|
}, |
|
{ |
|
"debug/losses": 0.018973354250192642, |
|
"debug/policy_weights": 0.033219628036022186, |
|
"debug/raw_losses": 0.5266579389572144, |
|
"epoch": 0.8197373656983685, |
|
"grad_norm": 1.193592806014732, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": -1.5476404428482056, |
|
"logits/rejected": -1.3509008884429932, |
|
"logps/chosen": -378.56463623046875, |
|
"logps/rejected": -437.563720703125, |
|
"loss": 0.0206, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.3850128650665283, |
|
"rewards/margins": 0.6664969325065613, |
|
"rewards/rejected": -3.051509380340576, |
|
"step": 1030 |
|
}, |
|
{ |
|
"debug/losses": 0.021493710577487946, |
|
"debug/policy_weights": 0.035152681171894073, |
|
"debug/raw_losses": 0.6217910051345825, |
|
"epoch": 0.8276959808993235, |
|
"grad_norm": 1.4851010788677348, |
|
"learning_rate": 4.373907927832513e-08, |
|
"logits/chosen": -1.5497913360595703, |
|
"logits/rejected": -1.4415110349655151, |
|
"logps/chosen": -370.7920837402344, |
|
"logps/rejected": -425.83636474609375, |
|
"loss": 0.0226, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.4308905601501465, |
|
"rewards/margins": 0.4381667971611023, |
|
"rewards/rejected": -2.8690574169158936, |
|
"step": 1040 |
|
}, |
|
{ |
|
"debug/losses": 0.019884012639522552, |
|
"debug/policy_weights": 0.03903160244226456, |
|
"debug/raw_losses": 0.5188706517219543, |
|
"epoch": 0.8356545961002786, |
|
"grad_norm": 1.1564325896000056, |
|
"learning_rate": 3.9891823124345665e-08, |
|
"logits/chosen": -1.6113007068634033, |
|
"logits/rejected": -1.4773445129394531, |
|
"logps/chosen": -376.01031494140625, |
|
"logps/rejected": -441.5926208496094, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.249936819076538, |
|
"rewards/margins": 0.6867498159408569, |
|
"rewards/rejected": -2.9366867542266846, |
|
"step": 1050 |
|
}, |
|
{ |
|
"debug/losses": 0.018301142379641533, |
|
"debug/policy_weights": 0.03422557935118675, |
|
"debug/raw_losses": 0.5713909864425659, |
|
"epoch": 0.8436132113012336, |
|
"grad_norm": 1.0553462531854274, |
|
"learning_rate": 3.620695643093924e-08, |
|
"logits/chosen": -1.6144059896469116, |
|
"logits/rejected": -1.5364993810653687, |
|
"logps/chosen": -362.0438537597656, |
|
"logps/rejected": -426.15283203125, |
|
"loss": 0.021, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.2525696754455566, |
|
"rewards/margins": 0.5051141977310181, |
|
"rewards/rejected": -2.7576839923858643, |
|
"step": 1060 |
|
}, |
|
{ |
|
"debug/losses": 0.023652352392673492, |
|
"debug/policy_weights": 0.038290224969387054, |
|
"debug/raw_losses": 0.5432866811752319, |
|
"epoch": 0.8515718265021887, |
|
"grad_norm": 1.6604665806944348, |
|
"learning_rate": 3.268732717634032e-08, |
|
"logits/chosen": -1.5845705270767212, |
|
"logits/rejected": -1.4731186628341675, |
|
"logps/chosen": -363.3177795410156, |
|
"logps/rejected": -417.6956481933594, |
|
"loss": 0.0233, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2692339420318604, |
|
"rewards/margins": 0.5667887926101685, |
|
"rewards/rejected": -2.8360228538513184, |
|
"step": 1070 |
|
}, |
|
{ |
|
"debug/losses": 0.024808544665575027, |
|
"debug/policy_weights": 0.04168625548481941, |
|
"debug/raw_losses": 0.598394513130188, |
|
"epoch": 0.8595304417031436, |
|
"grad_norm": 1.1200756250900803, |
|
"learning_rate": 2.9335655629243645e-08, |
|
"logits/chosen": -1.6236356496810913, |
|
"logits/rejected": -1.505225658416748, |
|
"logps/chosen": -384.00665283203125, |
|
"logps/rejected": -440.53741455078125, |
|
"loss": 0.023, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.3362393379211426, |
|
"rewards/margins": 0.45273178815841675, |
|
"rewards/rejected": -2.788971424102783, |
|
"step": 1080 |
|
}, |
|
{ |
|
"debug/losses": 0.019663607701659203, |
|
"debug/policy_weights": 0.03420232608914375, |
|
"debug/raw_losses": 0.6034265160560608, |
|
"epoch": 0.8674890569040987, |
|
"grad_norm": 0.9926669013809155, |
|
"learning_rate": 2.6154532246349476e-08, |
|
"logits/chosen": -1.6022891998291016, |
|
"logits/rejected": -1.448965311050415, |
|
"logps/chosen": -376.11602783203125, |
|
"logps/rejected": -401.70013427734375, |
|
"loss": 0.0236, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.345458984375, |
|
"rewards/margins": 0.435396671295166, |
|
"rewards/rejected": -2.780856132507324, |
|
"step": 1090 |
|
}, |
|
{ |
|
"debug/losses": 0.027548635378479958, |
|
"debug/policy_weights": 0.04641988128423691, |
|
"debug/raw_losses": 0.6471782922744751, |
|
"epoch": 0.8754476721050537, |
|
"grad_norm": 1.6941121596085795, |
|
"learning_rate": 2.31464156702382e-08, |
|
"logits/chosen": -1.6227413415908813, |
|
"logits/rejected": -1.4754379987716675, |
|
"logps/chosen": -386.9624328613281, |
|
"logps/rejected": -417.848876953125, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.3037209510803223, |
|
"rewards/margins": 0.3875313103199005, |
|
"rewards/rejected": -2.691251754760742, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8754476721050537, |
|
"eval_debug/losses": 0.021541133522987366, |
|
"eval_debug/policy_weights": 0.037866536527872086, |
|
"eval_debug/raw_losses": 0.5694783926010132, |
|
"eval_logits/chosen": -1.6574220657348633, |
|
"eval_logits/rejected": -1.5526961088180542, |
|
"eval_logps/chosen": -371.6010437011719, |
|
"eval_logps/rejected": -429.42828369140625, |
|
"eval_loss": 0.023058408871293068, |
|
"eval_rewards/accuracies": 0.6949626803398132, |
|
"eval_rewards/chosen": -2.2735750675201416, |
|
"eval_rewards/margins": 0.5137178301811218, |
|
"eval_rewards/rejected": -2.787292718887329, |
|
"eval_runtime": 153.1149, |
|
"eval_samples_per_second": 55.853, |
|
"eval_steps_per_second": 0.875, |
|
"step": 1100 |
|
}, |
|
{ |
|
"debug/losses": 0.01961921714246273, |
|
"debug/policy_weights": 0.0378187857568264, |
|
"debug/raw_losses": 0.5709212422370911, |
|
"epoch": 0.8834062873060088, |
|
"grad_norm": 1.0232523554179078, |
|
"learning_rate": 2.031363082912252e-08, |
|
"logits/chosen": -1.6505826711654663, |
|
"logits/rejected": -1.548697829246521, |
|
"logps/chosen": -354.1360168457031, |
|
"logps/rejected": -411.10906982421875, |
|
"loss": 0.0236, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.2260138988494873, |
|
"rewards/margins": 0.478458970785141, |
|
"rewards/rejected": -2.7044730186462402, |
|
"step": 1110 |
|
}, |
|
{ |
|
"debug/losses": 0.021199991926550865, |
|
"debug/policy_weights": 0.03719371557235718, |
|
"debug/raw_losses": 0.6032904386520386, |
|
"epoch": 0.8913649025069638, |
|
"grad_norm": 1.0591369289157555, |
|
"learning_rate": 1.7658367139945228e-08, |
|
"logits/chosen": -1.6456480026245117, |
|
"logits/rejected": -1.5404677391052246, |
|
"logps/chosen": -370.068603515625, |
|
"logps/rejected": -416.41162109375, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.257601737976074, |
|
"rewards/margins": 0.47389450669288635, |
|
"rewards/rejected": -2.7314963340759277, |
|
"step": 1120 |
|
}, |
|
{ |
|
"debug/losses": 0.022228095680475235, |
|
"debug/policy_weights": 0.04186805337667465, |
|
"debug/raw_losses": 0.5478729009628296, |
|
"epoch": 0.8993235177079189, |
|
"grad_norm": 1.5518095173727842, |
|
"learning_rate": 1.5182676816211632e-08, |
|
"logits/chosen": -1.6441676616668701, |
|
"logits/rejected": -1.5106528997421265, |
|
"logps/chosen": -369.6057434082031, |
|
"logps/rejected": -442.1670837402344, |
|
"loss": 0.0236, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.2719249725341797, |
|
"rewards/margins": 0.5875225067138672, |
|
"rewards/rejected": -2.859447717666626, |
|
"step": 1130 |
|
}, |
|
{ |
|
"debug/losses": 0.02779330313205719, |
|
"debug/policy_weights": 0.04451151564717293, |
|
"debug/raw_losses": 0.6167975068092346, |
|
"epoch": 0.9072821329088738, |
|
"grad_norm": 1.1093999376056516, |
|
"learning_rate": 1.2888473281864597e-08, |
|
"logits/chosen": -1.670432686805725, |
|
"logits/rejected": -1.5831201076507568, |
|
"logps/chosen": -379.52020263671875, |
|
"logps/rejected": -415.2666931152344, |
|
"loss": 0.0234, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.217050075531006, |
|
"rewards/margins": 0.4062577188014984, |
|
"rewards/rejected": -2.623307704925537, |
|
"step": 1140 |
|
}, |
|
{ |
|
"debug/losses": 0.02286931499838829, |
|
"debug/policy_weights": 0.039380840957164764, |
|
"debug/raw_losses": 0.5585586428642273, |
|
"epoch": 0.9152407481098289, |
|
"grad_norm": 1.024708050069526, |
|
"learning_rate": 1.0777529692427679e-08, |
|
"logits/chosen": -1.5818754434585571, |
|
"logits/rejected": -1.4253770112991333, |
|
"logps/chosen": -367.00750732421875, |
|
"logps/rejected": -408.79974365234375, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.1622977256774902, |
|
"rewards/margins": 0.5264004468917847, |
|
"rewards/rejected": -2.6886980533599854, |
|
"step": 1150 |
|
}, |
|
{ |
|
"debug/losses": 0.025199243798851967, |
|
"debug/policy_weights": 0.03711647912859917, |
|
"debug/raw_losses": 0.5976914167404175, |
|
"epoch": 0.9231993633107839, |
|
"grad_norm": 1.1752665323906102, |
|
"learning_rate": 8.851477564560061e-09, |
|
"logits/chosen": -1.5626524686813354, |
|
"logits/rejected": -1.385860800743103, |
|
"logps/chosen": -362.3895263671875, |
|
"logps/rejected": -427.11468505859375, |
|
"loss": 0.023, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.2021470069885254, |
|
"rewards/margins": 0.5378574728965759, |
|
"rewards/rejected": -2.740004539489746, |
|
"step": 1160 |
|
}, |
|
{ |
|
"debug/losses": 0.023396309465169907, |
|
"debug/policy_weights": 0.045775819569826126, |
|
"debug/raw_losses": 0.56801837682724, |
|
"epoch": 0.931157978511739, |
|
"grad_norm": 1.0873844543068034, |
|
"learning_rate": 7.111805515081531e-09, |
|
"logits/chosen": -1.593715786933899, |
|
"logits/rejected": -1.4320154190063477, |
|
"logps/chosen": -387.57379150390625, |
|
"logps/rejected": -439.60333251953125, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.3309168815612793, |
|
"rewards/margins": 0.5536500215530396, |
|
"rewards/rejected": -2.8845667839050293, |
|
"step": 1170 |
|
}, |
|
{ |
|
"debug/losses": 0.026339393109083176, |
|
"debug/policy_weights": 0.043764419853687286, |
|
"debug/raw_losses": 0.5899370908737183, |
|
"epoch": 0.939116593712694, |
|
"grad_norm": 1.026780874391343, |
|
"learning_rate": 5.559858110443016e-09, |
|
"logits/chosen": -1.6738373041152954, |
|
"logits/rejected": -1.5380009412765503, |
|
"logps/chosen": -377.5504150390625, |
|
"logps/rejected": -429.8271484375, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.216423749923706, |
|
"rewards/margins": 0.5017086267471313, |
|
"rewards/rejected": -2.7181320190429688, |
|
"step": 1180 |
|
}, |
|
{ |
|
"debug/losses": 0.01717524789273739, |
|
"debug/policy_weights": 0.033865705132484436, |
|
"debug/raw_losses": 0.5577677488327026, |
|
"epoch": 0.947075208913649, |
|
"grad_norm": 1.2386891264420188, |
|
"learning_rate": 4.196834827531276e-09, |
|
"logits/chosen": -1.5762865543365479, |
|
"logits/rejected": -1.4528791904449463, |
|
"logps/chosen": -376.5120544433594, |
|
"logps/rejected": -440.51055908203125, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.3341188430786133, |
|
"rewards/margins": 0.6440759897232056, |
|
"rewards/rejected": -2.9781947135925293, |
|
"step": 1190 |
|
}, |
|
{ |
|
"debug/losses": 0.018522335216403008, |
|
"debug/policy_weights": 0.03287139907479286, |
|
"debug/raw_losses": 0.5465400218963623, |
|
"epoch": 0.955033824114604, |
|
"grad_norm": 1.0907629974938928, |
|
"learning_rate": 3.023789126611137e-09, |
|
"logits/chosen": -1.6226059198379517, |
|
"logits/rejected": -1.4353219270706177, |
|
"logps/chosen": -370.7825927734375, |
|
"logps/rejected": -422.1617126464844, |
|
"loss": 0.0216, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.234524965286255, |
|
"rewards/margins": 0.5858966112136841, |
|
"rewards/rejected": -2.8204216957092285, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.955033824114604, |
|
"eval_debug/losses": 0.021226363256573677, |
|
"eval_debug/policy_weights": 0.03739428520202637, |
|
"eval_debug/raw_losses": 0.5681630373001099, |
|
"eval_logits/chosen": -1.583396553993225, |
|
"eval_logits/rejected": -1.4622374773025513, |
|
"eval_logps/chosen": -375.3782043457031, |
|
"eval_logps/rejected": -435.4866638183594, |
|
"eval_loss": 0.022734906524419785, |
|
"eval_rewards/accuracies": 0.6930969953536987, |
|
"eval_rewards/chosen": -2.311347484588623, |
|
"eval_rewards/margins": 0.536529541015625, |
|
"eval_rewards/rejected": -2.847877264022827, |
|
"eval_runtime": 152.9233, |
|
"eval_samples_per_second": 55.923, |
|
"eval_steps_per_second": 0.876, |
|
"step": 1200 |
|
}, |
|
{ |
|
"debug/losses": 0.01947428658604622, |
|
"debug/policy_weights": 0.03627028688788414, |
|
"debug/raw_losses": 0.5707622766494751, |
|
"epoch": 0.9629924393155591, |
|
"grad_norm": 1.0632313642647104, |
|
"learning_rate": 2.041627637121929e-09, |
|
"logits/chosen": -1.5518733263015747, |
|
"logits/rejected": -1.381583333015442, |
|
"logps/chosen": -376.3943786621094, |
|
"logps/rejected": -449.06536865234375, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.3494064807891846, |
|
"rewards/margins": 0.5541393756866455, |
|
"rewards/rejected": -2.90354585647583, |
|
"step": 1210 |
|
}, |
|
{ |
|
"debug/losses": 0.02166592888534069, |
|
"debug/policy_weights": 0.03883267566561699, |
|
"debug/raw_losses": 0.6050316095352173, |
|
"epoch": 0.9709510545165141, |
|
"grad_norm": 1.2818450205185703, |
|
"learning_rate": 1.2511094569571668e-09, |
|
"logits/chosen": -1.5095674991607666, |
|
"logits/rejected": -1.303008794784546, |
|
"logps/chosen": -378.7860412597656, |
|
"logps/rejected": -401.0227966308594, |
|
"loss": 0.0226, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.3036277294158936, |
|
"rewards/margins": 0.4543466567993164, |
|
"rewards/rejected": -2.757974147796631, |
|
"step": 1220 |
|
}, |
|
{ |
|
"debug/losses": 0.019198119640350342, |
|
"debug/policy_weights": 0.03157269209623337, |
|
"debug/raw_losses": 0.5717985033988953, |
|
"epoch": 0.9789096697174692, |
|
"grad_norm": 1.048649146210472, |
|
"learning_rate": 6.528455657691112e-10, |
|
"logits/chosen": -1.4732084274291992, |
|
"logits/rejected": -1.4151369333267212, |
|
"logps/chosen": -380.28582763671875, |
|
"logps/rejected": -445.0985412597656, |
|
"loss": 0.0206, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.415482759475708, |
|
"rewards/margins": 0.5356068015098572, |
|
"rewards/rejected": -2.951089382171631, |
|
"step": 1230 |
|
}, |
|
{ |
|
"debug/losses": 0.022800814360380173, |
|
"debug/policy_weights": 0.03683259338140488, |
|
"debug/raw_losses": 0.5676442980766296, |
|
"epoch": 0.9868682849184242, |
|
"grad_norm": 1.3029480412499796, |
|
"learning_rate": 2.4729835275189016e-10, |
|
"logits/chosen": -1.5280263423919678, |
|
"logits/rejected": -1.38931405544281, |
|
"logps/chosen": -378.03851318359375, |
|
"logps/rejected": -444.1370544433594, |
|
"loss": 0.0213, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.373324394226074, |
|
"rewards/margins": 0.5974202156066895, |
|
"rewards/rejected": -2.9707443714141846, |
|
"step": 1240 |
|
}, |
|
{ |
|
"debug/losses": 0.018399138003587723, |
|
"debug/policy_weights": 0.034402213990688324, |
|
"debug/raw_losses": 0.5340577363967896, |
|
"epoch": 0.9948269001193792, |
|
"grad_norm": 1.3789162938036494, |
|
"learning_rate": 3.478125926756337e-11, |
|
"logits/chosen": -1.4897087812423706, |
|
"logits/rejected": -1.3781123161315918, |
|
"logps/chosen": -379.60504150390625, |
|
"logps/rejected": -452.0511169433594, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.3788774013519287, |
|
"rewards/margins": 0.6080917716026306, |
|
"rewards/rejected": -2.986969470977783, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9996020692399522, |
|
"step": 1256, |
|
"total_flos": 0.0, |
|
"train_loss": 0.048100706314442646, |
|
"train_runtime": 10605.3952, |
|
"train_samples_per_second": 15.162, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1256, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|