|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 8.482095207381906, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.715719223022461, |
|
"logits/rejected": -2.648977279663086, |
|
"logps/chosen": -280.43304443359375, |
|
"logps/rejected": -269.5838623046875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.16883844563723, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.732839345932007, |
|
"logits/rejected": -2.667829990386963, |
|
"logps/chosen": -252.18836975097656, |
|
"logps/rejected": -247.36721801757812, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": -0.00020123104332014918, |
|
"rewards/margins": -0.000496760243549943, |
|
"rewards/rejected": 0.00029552922933362424, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 14.177063698718772, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.69161057472229, |
|
"logits/rejected": -2.6473488807678223, |
|
"logps/chosen": -281.9129333496094, |
|
"logps/rejected": -240.79598999023438, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0002729900588747114, |
|
"rewards/margins": 0.0022440399043262005, |
|
"rewards/rejected": -0.002517030341550708, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 9.049842796547402, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.7057762145996094, |
|
"logits/rejected": -2.6666598320007324, |
|
"logps/chosen": -263.8976135253906, |
|
"logps/rejected": -272.29376220703125, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.005987819284200668, |
|
"rewards/margins": 0.008191236294806004, |
|
"rewards/rejected": -0.0022034167777746916, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 8.064536836305289, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.644676446914673, |
|
"logits/rejected": -2.6523287296295166, |
|
"logps/chosen": -250.64651489257812, |
|
"logps/rejected": -240.74032592773438, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.03351370617747307, |
|
"rewards/margins": 0.024272698909044266, |
|
"rewards/rejected": 0.009241009131073952, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 10.48307069571783, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.61690616607666, |
|
"logits/rejected": -2.606698989868164, |
|
"logps/chosen": -280.32537841796875, |
|
"logps/rejected": -285.14044189453125, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.027328694239258766, |
|
"rewards/margins": 0.08270208537578583, |
|
"rewards/rejected": -0.05537338927388191, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 11.217495552902317, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.6028363704681396, |
|
"logits/rejected": -2.567634344100952, |
|
"logps/chosen": -257.1751403808594, |
|
"logps/rejected": -286.7103271484375, |
|
"loss": 0.6382, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.04173869267106056, |
|
"rewards/margins": 0.14793893694877625, |
|
"rewards/rejected": -0.1896776258945465, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 17.002512653941917, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.5477585792541504, |
|
"logits/rejected": -2.5538854598999023, |
|
"logps/chosen": -292.3646240234375, |
|
"logps/rejected": -295.1927795410156, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.17977146804332733, |
|
"rewards/margins": 0.22043642401695251, |
|
"rewards/rejected": -0.40020790696144104, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 20.83722381714638, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.428433895111084, |
|
"logits/rejected": -2.4765429496765137, |
|
"logps/chosen": -269.8500671386719, |
|
"logps/rejected": -280.10003662109375, |
|
"loss": 0.6115, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.11873555183410645, |
|
"rewards/margins": 0.21846923232078552, |
|
"rewards/rejected": -0.3372047543525696, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 20.582635256945927, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.3149566650390625, |
|
"logits/rejected": -2.3007912635803223, |
|
"logps/chosen": -331.8517761230469, |
|
"logps/rejected": -353.4700012207031, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4723566174507141, |
|
"rewards/margins": 0.2896661162376404, |
|
"rewards/rejected": -0.7620226740837097, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 15.474211826471516, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.2471911907196045, |
|
"logits/rejected": -2.2106168270111084, |
|
"logps/chosen": -346.5975646972656, |
|
"logps/rejected": -342.70501708984375, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5387172698974609, |
|
"rewards/margins": 0.4208316206932068, |
|
"rewards/rejected": -0.9595489501953125, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.839242935180664, |
|
"eval_logits/rejected": -1.8929309844970703, |
|
"eval_logps/chosen": -327.45477294921875, |
|
"eval_logps/rejected": -373.7126159667969, |
|
"eval_loss": 0.5814013481140137, |
|
"eval_rewards/accuracies": 0.6953125, |
|
"eval_rewards/chosen": -0.6485257148742676, |
|
"eval_rewards/margins": 0.46177732944488525, |
|
"eval_rewards/rejected": -1.1103030443191528, |
|
"eval_runtime": 42.9635, |
|
"eval_samples_per_second": 46.551, |
|
"eval_steps_per_second": 0.745, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 20.231938142939548, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -1.7017923593521118, |
|
"logits/rejected": -1.6744320392608643, |
|
"logps/chosen": -330.83038330078125, |
|
"logps/rejected": -384.0965576171875, |
|
"loss": 0.559, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6345968246459961, |
|
"rewards/margins": 0.5422910451889038, |
|
"rewards/rejected": -1.1768878698349, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 33.96451578064435, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -1.5577061176300049, |
|
"logits/rejected": -1.454756498336792, |
|
"logps/chosen": -354.79632568359375, |
|
"logps/rejected": -374.60076904296875, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6455836892127991, |
|
"rewards/margins": 0.6170965433120728, |
|
"rewards/rejected": -1.2626802921295166, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 22.138057395490627, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -1.985395073890686, |
|
"logits/rejected": -1.7330490350723267, |
|
"logps/chosen": -381.68524169921875, |
|
"logps/rejected": -393.6003723144531, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6896110773086548, |
|
"rewards/margins": 0.46295279264450073, |
|
"rewards/rejected": -1.1525638103485107, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 22.948235111766778, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -1.7121162414550781, |
|
"logits/rejected": -1.4992659091949463, |
|
"logps/chosen": -352.444580078125, |
|
"logps/rejected": -359.44293212890625, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7159131765365601, |
|
"rewards/margins": 0.5610858798027039, |
|
"rewards/rejected": -1.2769991159439087, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 21.94614729668817, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -1.6999915838241577, |
|
"logits/rejected": -1.5280932188034058, |
|
"logps/chosen": -363.3777770996094, |
|
"logps/rejected": -409.53472900390625, |
|
"loss": 0.5354, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6176407933235168, |
|
"rewards/margins": 0.7246734499931335, |
|
"rewards/rejected": -1.3423142433166504, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 25.951614311748312, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -0.8817731142044067, |
|
"logits/rejected": -0.6137579679489136, |
|
"logps/chosen": -382.1938171386719, |
|
"logps/rejected": -432.11004638671875, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0363513231277466, |
|
"rewards/margins": 0.7439759373664856, |
|
"rewards/rejected": -1.7803272008895874, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 22.62183660087882, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -0.8362399339675903, |
|
"logits/rejected": -0.5129006505012512, |
|
"logps/chosen": -383.85943603515625, |
|
"logps/rejected": -450.67376708984375, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1981004476547241, |
|
"rewards/margins": 0.5252794623374939, |
|
"rewards/rejected": -1.7233800888061523, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 20.471718314655647, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -1.1186776161193848, |
|
"logits/rejected": -0.807415783405304, |
|
"logps/chosen": -367.24517822265625, |
|
"logps/rejected": -383.65325927734375, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7441693544387817, |
|
"rewards/margins": 0.7058154344558716, |
|
"rewards/rejected": -1.4499847888946533, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 27.806972171143425, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -0.8124464750289917, |
|
"logits/rejected": -0.3878273069858551, |
|
"logps/chosen": -388.7113342285156, |
|
"logps/rejected": -429.1011657714844, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1005661487579346, |
|
"rewards/margins": 0.6401538252830505, |
|
"rewards/rejected": -1.7407200336456299, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 26.53253409564454, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -0.11316045373678207, |
|
"logits/rejected": 0.337258517742157, |
|
"logps/chosen": -417.2351989746094, |
|
"logps/rejected": -437.7659606933594, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.1853481531143188, |
|
"rewards/margins": 0.7383456826210022, |
|
"rewards/rejected": -1.9236938953399658, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -0.4834875464439392, |
|
"eval_logits/rejected": -0.16467474400997162, |
|
"eval_logps/chosen": -377.3648681640625, |
|
"eval_logps/rejected": -458.6296691894531, |
|
"eval_loss": 0.5258087515830994, |
|
"eval_rewards/accuracies": 0.7578125, |
|
"eval_rewards/chosen": -1.1476268768310547, |
|
"eval_rewards/margins": 0.8118469715118408, |
|
"eval_rewards/rejected": -1.959473729133606, |
|
"eval_runtime": 42.8446, |
|
"eval_samples_per_second": 46.68, |
|
"eval_steps_per_second": 0.747, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 32.607007304168285, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -0.4726165235042572, |
|
"logits/rejected": 0.2889423966407776, |
|
"logps/chosen": -445.2005310058594, |
|
"logps/rejected": -516.150390625, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5719295740127563, |
|
"rewards/margins": 0.899932861328125, |
|
"rewards/rejected": -2.471862316131592, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 26.982376912169133, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": 0.02971530519425869, |
|
"logits/rejected": 0.6972896456718445, |
|
"logps/chosen": -488.6690979003906, |
|
"logps/rejected": -517.7215576171875, |
|
"loss": 0.5045, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.9397350549697876, |
|
"rewards/margins": 0.67905592918396, |
|
"rewards/rejected": -2.618790626525879, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 28.31686307704009, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": 0.3274112641811371, |
|
"logits/rejected": 1.6159236431121826, |
|
"logps/chosen": -472.36590576171875, |
|
"logps/rejected": -560.3911743164062, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.139970302581787, |
|
"rewards/margins": 0.9002918004989624, |
|
"rewards/rejected": -3.040262460708618, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 26.93455267413187, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": 0.34222739934921265, |
|
"logits/rejected": 1.275580644607544, |
|
"logps/chosen": -493.95916748046875, |
|
"logps/rejected": -570.4260864257812, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.126845598220825, |
|
"rewards/margins": 0.9677503705024719, |
|
"rewards/rejected": -3.0945961475372314, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 34.97761081158377, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": 0.15051239728927612, |
|
"logits/rejected": 1.3550792932510376, |
|
"logps/chosen": -447.21490478515625, |
|
"logps/rejected": -547.626708984375, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.229414463043213, |
|
"rewards/margins": 1.004492998123169, |
|
"rewards/rejected": -3.2339072227478027, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 22.98186057582904, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -0.6737252473831177, |
|
"logits/rejected": -0.22596630454063416, |
|
"logps/chosen": -423.21124267578125, |
|
"logps/rejected": -537.1641845703125, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5507694482803345, |
|
"rewards/margins": 1.1678037643432617, |
|
"rewards/rejected": -2.7185730934143066, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 24.568187901601522, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -0.7251144647598267, |
|
"logits/rejected": 0.055858515202999115, |
|
"logps/chosen": -460.47369384765625, |
|
"logps/rejected": -515.4779663085938, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7928097248077393, |
|
"rewards/margins": 0.9766537547111511, |
|
"rewards/rejected": -2.769463300704956, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 26.18745823449182, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -0.2248018980026245, |
|
"logits/rejected": 0.7499777674674988, |
|
"logps/chosen": -570.017822265625, |
|
"logps/rejected": -637.1007690429688, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.686659097671509, |
|
"rewards/margins": 1.049829125404358, |
|
"rewards/rejected": -3.736487865447998, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 27.843254345626296, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": 0.017834633588790894, |
|
"logits/rejected": 0.5087012052536011, |
|
"logps/chosen": -523.2457275390625, |
|
"logps/rejected": -614.5767211914062, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.559013605117798, |
|
"rewards/margins": 0.9906851053237915, |
|
"rewards/rejected": -3.5496985912323, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 28.812343531542734, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -0.09833024442195892, |
|
"logits/rejected": 0.13667245209217072, |
|
"logps/chosen": -530.9118041992188, |
|
"logps/rejected": -594.4251098632812, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.618612766265869, |
|
"rewards/margins": 0.6055675745010376, |
|
"rewards/rejected": -3.224180221557617, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -0.46580713987350464, |
|
"eval_logits/rejected": -0.05736924335360527, |
|
"eval_logps/chosen": -498.608642578125, |
|
"eval_logps/rejected": -600.8517456054688, |
|
"eval_loss": 0.5078553557395935, |
|
"eval_rewards/accuracies": 0.765625, |
|
"eval_rewards/chosen": -2.3600645065307617, |
|
"eval_rewards/margins": 1.021630048751831, |
|
"eval_rewards/rejected": -3.3816945552825928, |
|
"eval_runtime": 42.9133, |
|
"eval_samples_per_second": 46.606, |
|
"eval_steps_per_second": 0.746, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 24.395908084810543, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -0.20998772978782654, |
|
"logits/rejected": 0.597333550453186, |
|
"logps/chosen": -553.8638305664062, |
|
"logps/rejected": -622.6854248046875, |
|
"loss": 0.4601, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.6587719917297363, |
|
"rewards/margins": 1.019250512123108, |
|
"rewards/rejected": -3.678022861480713, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 34.66768420911215, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": 0.16718712449073792, |
|
"logits/rejected": 0.9329349398612976, |
|
"logps/chosen": -593.7220458984375, |
|
"logps/rejected": -669.1134033203125, |
|
"loss": 0.4733, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.0446815490722656, |
|
"rewards/margins": 1.103857159614563, |
|
"rewards/rejected": -4.1485395431518555, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 28.471803594507218, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -0.11941705644130707, |
|
"logits/rejected": 0.6031097173690796, |
|
"logps/chosen": -576.390869140625, |
|
"logps/rejected": -662.9435424804688, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.895800828933716, |
|
"rewards/margins": 1.0525661706924438, |
|
"rewards/rejected": -3.9483673572540283, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 29.920730024726208, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -0.6586230397224426, |
|
"logits/rejected": -0.01618196628987789, |
|
"logps/chosen": -558.61328125, |
|
"logps/rejected": -645.0929565429688, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.6631388664245605, |
|
"rewards/margins": 0.9750139117240906, |
|
"rewards/rejected": -3.638153076171875, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 28.451943555198234, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -0.27327781915664673, |
|
"logits/rejected": 0.1500699818134308, |
|
"logps/chosen": -551.64501953125, |
|
"logps/rejected": -615.7404174804688, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.8532228469848633, |
|
"rewards/margins": 0.7952502369880676, |
|
"rewards/rejected": -3.6484732627868652, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 25.677922944710783, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -0.08269649744033813, |
|
"logits/rejected": 0.27684250473976135, |
|
"logps/chosen": -541.1990966796875, |
|
"logps/rejected": -651.2036743164062, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.9707937240600586, |
|
"rewards/margins": 0.9196038246154785, |
|
"rewards/rejected": -3.8903980255126953, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 36.170517312989006, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -0.2130926102399826, |
|
"logits/rejected": 0.6245878338813782, |
|
"logps/chosen": -593.176513671875, |
|
"logps/rejected": -702.429443359375, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.0834755897521973, |
|
"rewards/margins": 1.1826813220977783, |
|
"rewards/rejected": -4.266157150268555, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 29.948938424368286, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -0.31227773427963257, |
|
"logits/rejected": 0.1758570820093155, |
|
"logps/chosen": -569.8441162109375, |
|
"logps/rejected": -653.1160278320312, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.861605644226074, |
|
"rewards/margins": 1.1251481771469116, |
|
"rewards/rejected": -3.9867539405822754, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 28.643050867948876, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -0.4378163814544678, |
|
"logits/rejected": 0.1975017786026001, |
|
"logps/chosen": -487.02447509765625, |
|
"logps/rejected": -627.4256591796875, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.4755725860595703, |
|
"rewards/margins": 1.2951563596725464, |
|
"rewards/rejected": -3.770728588104248, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 28.740512536833982, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -0.464282363653183, |
|
"logits/rejected": 0.3086758852005005, |
|
"logps/chosen": -510.72564697265625, |
|
"logps/rejected": -584.2462158203125, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.4956374168395996, |
|
"rewards/margins": 0.9018915891647339, |
|
"rewards/rejected": -3.397528886795044, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -0.5081287026405334, |
|
"eval_logits/rejected": -0.03897371515631676, |
|
"eval_logps/chosen": -499.4172058105469, |
|
"eval_logps/rejected": -610.7911376953125, |
|
"eval_loss": 0.49998462200164795, |
|
"eval_rewards/accuracies": 0.76953125, |
|
"eval_rewards/chosen": -2.368149995803833, |
|
"eval_rewards/margins": 1.1129380464553833, |
|
"eval_rewards/rejected": -3.4810879230499268, |
|
"eval_runtime": 43.5613, |
|
"eval_samples_per_second": 45.912, |
|
"eval_steps_per_second": 0.735, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 29.137185743173024, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -0.427814781665802, |
|
"logits/rejected": 0.47378939390182495, |
|
"logps/chosen": -502.87939453125, |
|
"logps/rejected": -579.4691162109375, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.4262542724609375, |
|
"rewards/margins": 0.9697957038879395, |
|
"rewards/rejected": -3.396049976348877, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 28.72794205317694, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -0.4604805111885071, |
|
"logits/rejected": 0.19487139582633972, |
|
"logps/chosen": -517.6533813476562, |
|
"logps/rejected": -557.2385864257812, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.416501760482788, |
|
"rewards/margins": 0.9597095251083374, |
|
"rewards/rejected": -3.376211643218994, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 21.80245746988776, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -0.3804924488067627, |
|
"logits/rejected": 0.39526861906051636, |
|
"logps/chosen": -535.6771240234375, |
|
"logps/rejected": -618.59326171875, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5517053604125977, |
|
"rewards/margins": 1.1444313526153564, |
|
"rewards/rejected": -3.696136474609375, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 25.533460874638475, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -0.2722262442111969, |
|
"logits/rejected": 0.6270692944526672, |
|
"logps/chosen": -534.4539184570312, |
|
"logps/rejected": -636.7295532226562, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.657174587249756, |
|
"rewards/margins": 1.1696617603302002, |
|
"rewards/rejected": -3.826836347579956, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 29.41617255057514, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -0.4902656078338623, |
|
"logits/rejected": 0.2050538957118988, |
|
"logps/chosen": -552.1812744140625, |
|
"logps/rejected": -615.8868408203125, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5670862197875977, |
|
"rewards/margins": 1.0947918891906738, |
|
"rewards/rejected": -3.6618781089782715, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 26.40697406008373, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -0.5525180697441101, |
|
"logits/rejected": 0.4095240533351898, |
|
"logps/chosen": -521.8342895507812, |
|
"logps/rejected": -599.0489501953125, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4641835689544678, |
|
"rewards/margins": 1.049481987953186, |
|
"rewards/rejected": -3.5136656761169434, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 40.87484210645662, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -0.38540196418762207, |
|
"logits/rejected": 0.25467342138290405, |
|
"logps/chosen": -550.1776123046875, |
|
"logps/rejected": -633.5750732421875, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.6054952144622803, |
|
"rewards/margins": 0.973294734954834, |
|
"rewards/rejected": -3.5787901878356934, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.535196884905444, |
|
"train_runtime": 3527.4214, |
|
"train_samples_per_second": 17.331, |
|
"train_steps_per_second": 0.136 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|