|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998089050257978, |
|
"eval_steps": 100, |
|
"global_step": 2616, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.8359375, |
|
"learning_rate": 1.908396946564885e-09, |
|
"logits/chosen": -3.3302907943725586, |
|
"logits/rejected": -3.269564628601074, |
|
"logps/chosen": -81.06770324707031, |
|
"logps/rejected": -120.27629089355469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 1.9083969465648856e-08, |
|
"logits/chosen": -3.2870726585388184, |
|
"logits/rejected": -3.03167986869812, |
|
"logps/chosen": -196.69744873046875, |
|
"logps/rejected": -238.91517639160156, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": -0.00020526793377939612, |
|
"rewards/margins": -0.0005111552309244871, |
|
"rewards/margins_max": 0.0015908111818134785, |
|
"rewards/margins_min": -0.0026131218764930964, |
|
"rewards/margins_std": 0.002972629852592945, |
|
"rewards/rejected": 0.0003058873408008367, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 3.816793893129771e-08, |
|
"logits/chosen": -3.2914886474609375, |
|
"logits/rejected": -2.96828293800354, |
|
"logps/chosen": -205.34304809570312, |
|
"logps/rejected": -226.73660278320312, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00018589093815535307, |
|
"rewards/margins": 0.0002819823566824198, |
|
"rewards/margins_max": 0.0020934194326400757, |
|
"rewards/margins_min": -0.001529454835690558, |
|
"rewards/margins_std": 0.0025617589708417654, |
|
"rewards/rejected": -9.609150583855808e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.953125, |
|
"learning_rate": 5.725190839694656e-08, |
|
"logits/chosen": -3.212151288986206, |
|
"logits/rejected": -2.9446871280670166, |
|
"logps/chosen": -218.0032196044922, |
|
"logps/rejected": -250.72683715820312, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 4.363178595667705e-05, |
|
"rewards/margins": 0.0007587060681544244, |
|
"rewards/margins_max": 0.002750576240941882, |
|
"rewards/margins_min": -0.001233164221048355, |
|
"rewards/margins_std": 0.002816930180415511, |
|
"rewards/rejected": -0.0007150743040256202, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.0, |
|
"learning_rate": 7.633587786259542e-08, |
|
"logits/chosen": -3.209655284881592, |
|
"logits/rejected": -2.982962131500244, |
|
"logps/chosen": -203.66883850097656, |
|
"logps/rejected": -245.7393341064453, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.00010408814705442637, |
|
"rewards/margins": 0.0006946413777768612, |
|
"rewards/margins_max": 0.0033363462425768375, |
|
"rewards/margins_min": -0.0019470632541924715, |
|
"rewards/margins_std": 0.003735934616997838, |
|
"rewards/rejected": -0.000798729422967881, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.296875, |
|
"learning_rate": 9.541984732824428e-08, |
|
"logits/chosen": -3.225496768951416, |
|
"logits/rejected": -2.956040620803833, |
|
"logps/chosen": -225.0539093017578, |
|
"logps/rejected": -225.1595458984375, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.000378791824914515, |
|
"rewards/margins": 0.0016400141175836325, |
|
"rewards/margins_max": 0.004391551483422518, |
|
"rewards/margins_min": -0.0011115235975012183, |
|
"rewards/margins_std": 0.0038912619929760695, |
|
"rewards/rejected": -0.0020188060589134693, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.25, |
|
"learning_rate": 1.1450381679389312e-07, |
|
"logits/chosen": -3.2494418621063232, |
|
"logits/rejected": -2.886786460876465, |
|
"logps/chosen": -215.2377166748047, |
|
"logps/rejected": -227.4031219482422, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0002904659486375749, |
|
"rewards/margins": 0.003191638272255659, |
|
"rewards/margins_max": 0.005964426789432764, |
|
"rewards/margins_min": 0.0004188496677670628, |
|
"rewards/margins_std": 0.00392131507396698, |
|
"rewards/rejected": -0.0029011727310717106, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 1.3358778625954197e-07, |
|
"logits/chosen": -3.2788949012756348, |
|
"logits/rejected": -2.9262871742248535, |
|
"logps/chosen": -179.19461059570312, |
|
"logps/rejected": -200.50384521484375, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.000643759616650641, |
|
"rewards/margins": 0.005182401277124882, |
|
"rewards/margins_max": 0.008886445313692093, |
|
"rewards/margins_min": 0.0014783585211262107, |
|
"rewards/margins_std": 0.005238307174295187, |
|
"rewards/rejected": -0.004538641776889563, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.375, |
|
"learning_rate": 1.5267175572519085e-07, |
|
"logits/chosen": -3.2208304405212402, |
|
"logits/rejected": -2.935650110244751, |
|
"logps/chosen": -209.3942413330078, |
|
"logps/rejected": -238.1333465576172, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.0011481496039777994, |
|
"rewards/margins": 0.004855319391936064, |
|
"rewards/margins_max": 0.007420788519084454, |
|
"rewards/margins_min": 0.0022898507304489613, |
|
"rewards/margins_std": 0.0036281212233006954, |
|
"rewards/rejected": -0.006003469228744507, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.625, |
|
"learning_rate": 1.717557251908397e-07, |
|
"logits/chosen": -3.2265000343322754, |
|
"logits/rejected": -2.9264445304870605, |
|
"logps/chosen": -220.7260284423828, |
|
"logps/rejected": -239.3951873779297, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -6.91156237735413e-05, |
|
"rewards/margins": 0.008305264636874199, |
|
"rewards/margins_max": 0.013177113607525826, |
|
"rewards/margins_min": 0.003433419391512871, |
|
"rewards/margins_std": 0.0068898312747478485, |
|
"rewards/rejected": -0.008374381810426712, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 1.9083969465648855e-07, |
|
"logits/chosen": -3.179041862487793, |
|
"logits/rejected": -2.956526041030884, |
|
"logps/chosen": -163.98475646972656, |
|
"logps/rejected": -195.57383728027344, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0005407828139141202, |
|
"rewards/margins": 0.010149060748517513, |
|
"rewards/margins_max": 0.01537814736366272, |
|
"rewards/margins_min": 0.004919976461678743, |
|
"rewards/margins_std": 0.007395043037831783, |
|
"rewards/rejected": -0.010689844377338886, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.96875, |
|
"learning_rate": 2.0992366412213738e-07, |
|
"logits/chosen": -3.272021532058716, |
|
"logits/rejected": -3.0388495922088623, |
|
"logps/chosen": -198.17446899414062, |
|
"logps/rejected": -233.8720703125, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.00015886728942859918, |
|
"rewards/margins": 0.01238742470741272, |
|
"rewards/margins_max": 0.017614034935832024, |
|
"rewards/margins_min": 0.007160813547670841, |
|
"rewards/margins_std": 0.00739154452458024, |
|
"rewards/rejected": -0.012546291574835777, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.8203125, |
|
"learning_rate": 2.2900763358778623e-07, |
|
"logits/chosen": -3.2624504566192627, |
|
"logits/rejected": -3.0127363204956055, |
|
"logps/chosen": -185.32650756835938, |
|
"logps/rejected": -233.5771026611328, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0006447458872571588, |
|
"rewards/margins": 0.015380805358290672, |
|
"rewards/margins_max": 0.022997872903943062, |
|
"rewards/margins_min": 0.007763735018670559, |
|
"rewards/margins_std": 0.01077216025441885, |
|
"rewards/rejected": -0.016025548800826073, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.96875, |
|
"learning_rate": 2.480916030534351e-07, |
|
"logits/chosen": -3.210339069366455, |
|
"logits/rejected": -2.8807480335235596, |
|
"logps/chosen": -192.64651489257812, |
|
"logps/rejected": -240.62496948242188, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.0016148982103914022, |
|
"rewards/margins": 0.021930258721113205, |
|
"rewards/margins_max": 0.03173477575182915, |
|
"rewards/margins_min": 0.012125745415687561, |
|
"rewards/margins_std": 0.013865679502487183, |
|
"rewards/rejected": -0.02354515716433525, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 2.6717557251908394e-07, |
|
"logits/chosen": -3.1735782623291016, |
|
"logits/rejected": -2.892324447631836, |
|
"logps/chosen": -202.73878479003906, |
|
"logps/rejected": -245.81185913085938, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.00102281104773283, |
|
"rewards/margins": 0.022799000144004822, |
|
"rewards/margins_max": 0.0319000706076622, |
|
"rewards/margins_min": 0.013697926886379719, |
|
"rewards/margins_std": 0.012870860286056995, |
|
"rewards/rejected": -0.023821810260415077, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 2.8625954198473276e-07, |
|
"logits/chosen": -3.2441039085388184, |
|
"logits/rejected": -2.972184896469116, |
|
"logps/chosen": -170.27938842773438, |
|
"logps/rejected": -226.83462524414062, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.00395964365452528, |
|
"rewards/margins": 0.026034507900476456, |
|
"rewards/margins_max": 0.036855850368738174, |
|
"rewards/margins_min": 0.015213166363537312, |
|
"rewards/margins_std": 0.015303686261177063, |
|
"rewards/rejected": -0.02999415434896946, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.328125, |
|
"learning_rate": 3.053435114503817e-07, |
|
"logits/chosen": -3.2375476360321045, |
|
"logits/rejected": -2.889326333999634, |
|
"logps/chosen": -189.0028533935547, |
|
"logps/rejected": -238.2917022705078, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.003632964100688696, |
|
"rewards/margins": 0.03097444772720337, |
|
"rewards/margins_max": 0.03957698494195938, |
|
"rewards/margins_min": 0.02237190678715706, |
|
"rewards/margins_std": 0.012165828607976437, |
|
"rewards/rejected": -0.0346074104309082, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.796875, |
|
"learning_rate": 3.244274809160305e-07, |
|
"logits/chosen": -3.205308437347412, |
|
"logits/rejected": -2.902846097946167, |
|
"logps/chosen": -213.3687744140625, |
|
"logps/rejected": -253.6230010986328, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0031909930985420942, |
|
"rewards/margins": 0.03212432190775871, |
|
"rewards/margins_max": 0.04667423292994499, |
|
"rewards/margins_min": 0.017574409022927284, |
|
"rewards/margins_std": 0.020576683804392815, |
|
"rewards/rejected": -0.03531531244516373, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 3.435114503816794e-07, |
|
"logits/chosen": -3.233248233795166, |
|
"logits/rejected": -2.934068202972412, |
|
"logps/chosen": -163.8170623779297, |
|
"logps/rejected": -220.64205932617188, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.006008402444422245, |
|
"rewards/margins": 0.034125737845897675, |
|
"rewards/margins_max": 0.050690434873104095, |
|
"rewards/margins_min": 0.017561035230755806, |
|
"rewards/margins_std": 0.02342602238059044, |
|
"rewards/rejected": -0.040134135633707047, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 3.6259541984732823e-07, |
|
"logits/chosen": -3.282362699508667, |
|
"logits/rejected": -3.0158865451812744, |
|
"logps/chosen": -192.17164611816406, |
|
"logps/rejected": -220.742431640625, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.007757545914500952, |
|
"rewards/margins": 0.04202093929052353, |
|
"rewards/margins_max": 0.060148369520902634, |
|
"rewards/margins_min": 0.023893514648079872, |
|
"rewards/margins_std": 0.02563605271279812, |
|
"rewards/rejected": -0.04977848380804062, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.9765625, |
|
"learning_rate": 3.816793893129771e-07, |
|
"logits/chosen": -3.174314022064209, |
|
"logits/rejected": -2.9219601154327393, |
|
"logps/chosen": -194.1986846923828, |
|
"logps/rejected": -267.26055908203125, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.010592457838356495, |
|
"rewards/margins": 0.05324589088559151, |
|
"rewards/margins_max": 0.07371693104505539, |
|
"rewards/margins_min": 0.03277484327554703, |
|
"rewards/margins_std": 0.028950434178113937, |
|
"rewards/rejected": -0.06383834034204483, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 4.0076335877862593e-07, |
|
"logits/chosen": -3.2590651512145996, |
|
"logits/rejected": -3.009448528289795, |
|
"logps/chosen": -193.40025329589844, |
|
"logps/rejected": -269.24017333984375, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.006985983345657587, |
|
"rewards/margins": 0.05964884161949158, |
|
"rewards/margins_max": 0.08800722658634186, |
|
"rewards/margins_min": 0.03129046410322189, |
|
"rewards/margins_std": 0.04010480269789696, |
|
"rewards/rejected": -0.06663481891155243, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 4.1984732824427476e-07, |
|
"logits/chosen": -3.212986469268799, |
|
"logits/rejected": -2.872310161590576, |
|
"logps/chosen": -245.77197265625, |
|
"logps/rejected": -249.1974639892578, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.010857370682060719, |
|
"rewards/margins": 0.07341400533914566, |
|
"rewards/margins_max": 0.10470987856388092, |
|
"rewards/margins_min": 0.0421181321144104, |
|
"rewards/margins_std": 0.04425904154777527, |
|
"rewards/rejected": -0.08427136391401291, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 4.3893129770992364e-07, |
|
"logits/chosen": -3.1967101097106934, |
|
"logits/rejected": -2.923083782196045, |
|
"logps/chosen": -201.79908752441406, |
|
"logps/rejected": -248.243896484375, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.0139171052724123, |
|
"rewards/margins": 0.06787364184856415, |
|
"rewards/margins_max": 0.09401793777942657, |
|
"rewards/margins_min": 0.04172936826944351, |
|
"rewards/margins_std": 0.03697359561920166, |
|
"rewards/rejected": -0.08179076015949249, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.9453125, |
|
"learning_rate": 4.5801526717557246e-07, |
|
"logits/chosen": -3.285114288330078, |
|
"logits/rejected": -2.8961422443389893, |
|
"logps/chosen": -205.2576446533203, |
|
"logps/rejected": -215.2158203125, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.011311140842735767, |
|
"rewards/margins": 0.08977559953927994, |
|
"rewards/margins_max": 0.12556931376457214, |
|
"rewards/margins_min": 0.05398188903927803, |
|
"rewards/margins_std": 0.05061995983123779, |
|
"rewards/rejected": -0.10108675062656403, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 4.770992366412213e-07, |
|
"logits/chosen": -3.273923397064209, |
|
"logits/rejected": -2.972449779510498, |
|
"logps/chosen": -190.66929626464844, |
|
"logps/rejected": -236.36181640625, |
|
"loss": 0.65, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.021324489265680313, |
|
"rewards/margins": 0.09128403663635254, |
|
"rewards/margins_max": 0.1358390599489212, |
|
"rewards/margins_min": 0.04672900587320328, |
|
"rewards/margins_std": 0.0630103200674057, |
|
"rewards/rejected": -0.11260852962732315, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 4.961832061068702e-07, |
|
"logits/chosen": -3.2471776008605957, |
|
"logits/rejected": -2.966447353363037, |
|
"logps/chosen": -203.96543884277344, |
|
"logps/rejected": -235.2838897705078, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.020830674096941948, |
|
"rewards/margins": 0.09641597419977188, |
|
"rewards/margins_max": 0.1450330764055252, |
|
"rewards/margins_min": 0.04779886454343796, |
|
"rewards/margins_std": 0.06875498592853546, |
|
"rewards/rejected": -0.11724665015935898, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 4.99985751383753e-07, |
|
"logits/chosen": -3.2874855995178223, |
|
"logits/rejected": -3.0439696311950684, |
|
"logps/chosen": -198.27664184570312, |
|
"logps/rejected": -249.9978485107422, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.017464281991124153, |
|
"rewards/margins": 0.10535471141338348, |
|
"rewards/margins_max": 0.1504170447587967, |
|
"rewards/margins_min": 0.06029237434267998, |
|
"rewards/margins_std": 0.06372777372598648, |
|
"rewards/rejected": -0.12281899154186249, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 4.999278691638749e-07, |
|
"logits/chosen": -3.2457098960876465, |
|
"logits/rejected": -3.017002582550049, |
|
"logps/chosen": -213.6870880126953, |
|
"logps/rejected": -219.8416290283203, |
|
"loss": 0.6445, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.03185909986495972, |
|
"rewards/margins": 0.09040321409702301, |
|
"rewards/margins_max": 0.12558069825172424, |
|
"rewards/margins_min": 0.055225737392902374, |
|
"rewards/margins_std": 0.049748457968235016, |
|
"rewards/rejected": -0.12226231396198273, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 4.998254731031337e-07, |
|
"logits/chosen": -3.2504220008850098, |
|
"logits/rejected": -2.9975688457489014, |
|
"logps/chosen": -195.15164184570312, |
|
"logps/rejected": -227.1262664794922, |
|
"loss": 0.637, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.02987954393029213, |
|
"rewards/margins": 0.1159171611070633, |
|
"rewards/margins_max": 0.17162121832370758, |
|
"rewards/margins_min": 0.060213081538677216, |
|
"rewards/margins_std": 0.07877745479345322, |
|
"rewards/rejected": -0.14579668641090393, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.953125, |
|
"learning_rate": 4.996785814389591e-07, |
|
"logits/chosen": -3.309065580368042, |
|
"logits/rejected": -2.962916135787964, |
|
"logps/chosen": -166.18507385253906, |
|
"logps/rejected": -179.7928466796875, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.03008580207824707, |
|
"rewards/margins": 0.10214884579181671, |
|
"rewards/margins_max": 0.15787146985530853, |
|
"rewards/margins_min": 0.04642622545361519, |
|
"rewards/margins_std": 0.07880368083715439, |
|
"rewards/rejected": -0.13223466277122498, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 4.994872203337482e-07, |
|
"logits/chosen": -3.257145643234253, |
|
"logits/rejected": -2.9680373668670654, |
|
"logps/chosen": -202.3374786376953, |
|
"logps/rejected": -219.5332489013672, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.04237338528037071, |
|
"rewards/margins": 0.12682045996189117, |
|
"rewards/margins_max": 0.18470799922943115, |
|
"rewards/margins_min": 0.0689328983426094, |
|
"rewards/margins_std": 0.0818653553724289, |
|
"rewards/rejected": -0.169193834066391, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.9140625, |
|
"learning_rate": 4.992514238702059e-07, |
|
"logits/chosen": -3.314955234527588, |
|
"logits/rejected": -3.1039557456970215, |
|
"logps/chosen": -199.2044219970703, |
|
"logps/rejected": -245.0098876953125, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.042611610144376755, |
|
"rewards/margins": 0.15529310703277588, |
|
"rewards/margins_max": 0.21862797439098358, |
|
"rewards/margins_min": 0.09195823222398758, |
|
"rewards/margins_std": 0.08956903219223022, |
|
"rewards/rejected": -0.19790470600128174, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 4.989712340452743e-07, |
|
"logits/chosen": -3.231292247772217, |
|
"logits/rejected": -2.928297519683838, |
|
"logps/chosen": -197.5637969970703, |
|
"logps/rejected": -262.58245849609375, |
|
"loss": 0.6165, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.06006884574890137, |
|
"rewards/margins": 0.16975189745426178, |
|
"rewards/margins_max": 0.24120891094207764, |
|
"rewards/margins_min": 0.09829487651586533, |
|
"rewards/margins_std": 0.10105548053979874, |
|
"rewards/rejected": -0.22982072830200195, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 4.986467007626528e-07, |
|
"logits/chosen": -3.212827682495117, |
|
"logits/rejected": -2.941976547241211, |
|
"logps/chosen": -223.7288818359375, |
|
"logps/rejected": -274.39337158203125, |
|
"loss": 0.6163, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06747810542583466, |
|
"rewards/margins": 0.15132711827754974, |
|
"rewards/margins_max": 0.23900623619556427, |
|
"rewards/margins_min": 0.06364797800779343, |
|
"rewards/margins_std": 0.12399701774120331, |
|
"rewards/rejected": -0.2188052237033844, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 4.982778818239101e-07, |
|
"logits/chosen": -3.303295612335205, |
|
"logits/rejected": -3.0548179149627686, |
|
"logps/chosen": -181.9226837158203, |
|
"logps/rejected": -212.4131622314453, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.06988344341516495, |
|
"rewards/margins": 0.1707289218902588, |
|
"rewards/margins_max": 0.2409042865037918, |
|
"rewards/margins_min": 0.10055355727672577, |
|
"rewards/margins_std": 0.09924294799566269, |
|
"rewards/rejected": -0.24061235785484314, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.578125, |
|
"learning_rate": 4.978648429181893e-07, |
|
"logits/chosen": -3.270679473876953, |
|
"logits/rejected": -3.0009212493896484, |
|
"logps/chosen": -186.18716430664062, |
|
"logps/rejected": -220.5108642578125, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.07348515838384628, |
|
"rewards/margins": 0.1932184398174286, |
|
"rewards/margins_max": 0.274120956659317, |
|
"rewards/margins_min": 0.11231593787670135, |
|
"rewards/margins_std": 0.11441340297460556, |
|
"rewards/rejected": -0.26670360565185547, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 4.97407657610508e-07, |
|
"logits/chosen": -3.271900177001953, |
|
"logits/rejected": -3.0213537216186523, |
|
"logps/chosen": -203.10650634765625, |
|
"logps/rejected": -261.69158935546875, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.07118692994117737, |
|
"rewards/margins": 0.21831946074962616, |
|
"rewards/margins_max": 0.3098284602165222, |
|
"rewards/margins_min": 0.1268104463815689, |
|
"rewards/margins_std": 0.12941327691078186, |
|
"rewards/rejected": -0.28950637578964233, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 4.969064073286563e-07, |
|
"logits/chosen": -3.357651472091675, |
|
"logits/rejected": -3.0608596801757812, |
|
"logps/chosen": -208.51754760742188, |
|
"logps/rejected": -262.3056640625, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.09235924482345581, |
|
"rewards/margins": 0.19000664353370667, |
|
"rewards/margins_max": 0.29324302077293396, |
|
"rewards/margins_min": 0.08677025139331818, |
|
"rewards/margins_std": 0.14599831402301788, |
|
"rewards/rejected": -0.2823658585548401, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 4.963611813486935e-07, |
|
"logits/chosen": -3.2650883197784424, |
|
"logits/rejected": -2.899033308029175, |
|
"logps/chosen": -235.99942016601562, |
|
"logps/rejected": -312.6029357910156, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.08987380564212799, |
|
"rewards/margins": 0.28126034140586853, |
|
"rewards/margins_max": 0.40632572770118713, |
|
"rewards/margins_min": 0.15619491040706635, |
|
"rewards/margins_std": 0.1768692135810852, |
|
"rewards/rejected": -0.3711341321468353, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.375, |
|
"learning_rate": 4.957720767790477e-07, |
|
"logits/chosen": -3.2625536918640137, |
|
"logits/rejected": -3.0297203063964844, |
|
"logps/chosen": -189.33514404296875, |
|
"logps/rejected": -244.502685546875, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.09811891615390778, |
|
"rewards/margins": 0.2365761548280716, |
|
"rewards/margins_max": 0.33593007922172546, |
|
"rewards/margins_min": 0.13722223043441772, |
|
"rewards/margins_std": 0.14050766825675964, |
|
"rewards/rejected": -0.334695041179657, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 4.951391985432198e-07, |
|
"logits/chosen": -3.3009209632873535, |
|
"logits/rejected": -3.0335850715637207, |
|
"logps/chosen": -176.28749084472656, |
|
"logps/rejected": -214.1027374267578, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.1052820086479187, |
|
"rewards/margins": 0.2626669108867645, |
|
"rewards/margins_max": 0.3927062153816223, |
|
"rewards/margins_min": 0.13262765109539032, |
|
"rewards/margins_std": 0.1839032918214798, |
|
"rewards/rejected": -0.3679489493370056, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 4.944626593610968e-07, |
|
"logits/chosen": -3.2991390228271484, |
|
"logits/rejected": -2.928713321685791, |
|
"logps/chosen": -231.21533203125, |
|
"logps/rejected": -282.78961181640625, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.12326918542385101, |
|
"rewards/margins": 0.2481236755847931, |
|
"rewards/margins_max": 0.35814613103866577, |
|
"rewards/margins_min": 0.13810119032859802, |
|
"rewards/margins_std": 0.15559527277946472, |
|
"rewards/rejected": -0.3713928461074829, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 4.937425797288742e-07, |
|
"logits/chosen": -3.2530465126037598, |
|
"logits/rejected": -3.0559334754943848, |
|
"logps/chosen": -190.10829162597656, |
|
"logps/rejected": -273.42584228515625, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.13329292833805084, |
|
"rewards/margins": 0.29958730936050415, |
|
"rewards/margins_max": 0.4274858832359314, |
|
"rewards/margins_min": 0.17168866097927094, |
|
"rewards/margins_std": 0.1808759868144989, |
|
"rewards/rejected": -0.4328802227973938, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.609375, |
|
"learning_rate": 4.929790878975965e-07, |
|
"logits/chosen": -3.194209098815918, |
|
"logits/rejected": -2.93556547164917, |
|
"logps/chosen": -225.93896484375, |
|
"logps/rejected": -259.15179443359375, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.13958829641342163, |
|
"rewards/margins": 0.27957916259765625, |
|
"rewards/margins_max": 0.4229784905910492, |
|
"rewards/margins_min": 0.13617977499961853, |
|
"rewards/margins_std": 0.2027973234653473, |
|
"rewards/rejected": -0.4191674590110779, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.390625, |
|
"learning_rate": 4.921723198503132e-07, |
|
"logits/chosen": -3.26872181892395, |
|
"logits/rejected": -3.0192275047302246, |
|
"logps/chosen": -201.65443420410156, |
|
"logps/rejected": -289.94171142578125, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.15182039141654968, |
|
"rewards/margins": 0.33517175912857056, |
|
"rewards/margins_max": 0.4883649945259094, |
|
"rewards/margins_min": 0.18197841942310333, |
|
"rewards/margins_std": 0.21664805710315704, |
|
"rewards/rejected": -0.4869921803474426, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 4.913224192778603e-07, |
|
"logits/chosen": -3.334212064743042, |
|
"logits/rejected": -3.1082167625427246, |
|
"logps/chosen": -228.478759765625, |
|
"logps/rejected": -311.8726501464844, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.15663278102874756, |
|
"rewards/margins": 0.3697594106197357, |
|
"rewards/margins_max": 0.536509096622467, |
|
"rewards/margins_min": 0.20300979912281036, |
|
"rewards/margins_std": 0.23581957817077637, |
|
"rewards/rejected": -0.5263921618461609, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.6875, |
|
"learning_rate": 4.90429537553268e-07, |
|
"logits/chosen": -3.2931313514709473, |
|
"logits/rejected": -3.0807833671569824, |
|
"logps/chosen": -217.96719360351562, |
|
"logps/rejected": -276.76202392578125, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.19370004534721375, |
|
"rewards/margins": 0.32050028443336487, |
|
"rewards/margins_max": 0.4694735109806061, |
|
"rewards/margins_min": 0.17152710258960724, |
|
"rewards/margins_std": 0.21067988872528076, |
|
"rewards/rejected": -0.5142003297805786, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 4.894938337047995e-07, |
|
"logits/chosen": -3.20732045173645, |
|
"logits/rejected": -2.931908369064331, |
|
"logps/chosen": -264.7499694824219, |
|
"logps/rejected": -318.031005859375, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.20364347100257874, |
|
"rewards/margins": 0.42259034514427185, |
|
"rewards/margins_max": 0.5794626474380493, |
|
"rewards/margins_min": 0.2657180726528168, |
|
"rewards/margins_std": 0.2218509465456009, |
|
"rewards/rejected": -0.6262338757514954, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.8125, |
|
"learning_rate": 4.885154743876277e-07, |
|
"logits/chosen": -3.3099350929260254, |
|
"logits/rejected": -3.021787643432617, |
|
"logps/chosen": -200.65884399414062, |
|
"logps/rejected": -255.55300903320312, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19876748323440552, |
|
"rewards/margins": 0.349515825510025, |
|
"rewards/margins_max": 0.5269657373428345, |
|
"rewards/margins_min": 0.17206597328186035, |
|
"rewards/margins_std": 0.2509520649909973, |
|
"rewards/rejected": -0.5482833385467529, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 4.87494633854152e-07, |
|
"logits/chosen": -3.3137099742889404, |
|
"logits/rejected": -2.9903388023376465, |
|
"logps/chosen": -208.6374969482422, |
|
"logps/rejected": -282.22052001953125, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.23697516322135925, |
|
"rewards/margins": 0.4222859740257263, |
|
"rewards/margins_max": 0.6355811357498169, |
|
"rewards/margins_min": 0.20899085700511932, |
|
"rewards/margins_std": 0.30164486169815063, |
|
"rewards/rejected": -0.6592611074447632, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 4.864314939229637e-07, |
|
"logits/chosen": -3.306776523590088, |
|
"logits/rejected": -3.001708507537842, |
|
"logps/chosen": -255.39126586914062, |
|
"logps/rejected": -296.8736572265625, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.2626154124736786, |
|
"rewards/margins": 0.394510954618454, |
|
"rewards/margins_max": 0.6293004751205444, |
|
"rewards/margins_min": 0.1597214937210083, |
|
"rewards/margins_std": 0.332042396068573, |
|
"rewards/rejected": -0.6571264266967773, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 4.853262439464624e-07, |
|
"logits/chosen": -3.235895872116089, |
|
"logits/rejected": -2.9632811546325684, |
|
"logps/chosen": -203.52279663085938, |
|
"logps/rejected": -269.3291320800781, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.228928804397583, |
|
"rewards/margins": 0.3607742190361023, |
|
"rewards/margins_max": 0.5433238744735718, |
|
"rewards/margins_min": 0.17822448909282684, |
|
"rewards/margins_std": 0.25816428661346436, |
|
"rewards/rejected": -0.5897030234336853, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 4.841790807771307e-07, |
|
"logits/chosen": -3.166623592376709, |
|
"logits/rejected": -2.9125328063964844, |
|
"logps/chosen": -236.2999267578125, |
|
"logps/rejected": -336.2409362792969, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2680327296257019, |
|
"rewards/margins": 0.46621331572532654, |
|
"rewards/margins_max": 0.655846357345581, |
|
"rewards/margins_min": 0.2765803337097168, |
|
"rewards/margins_std": 0.26818156242370605, |
|
"rewards/rejected": -0.7342461347579956, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 4.82990208732474e-07, |
|
"logits/chosen": -3.3195443153381348, |
|
"logits/rejected": -3.0613291263580322, |
|
"logps/chosen": -174.39599609375, |
|
"logps/rejected": -257.5628967285156, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.25635436177253723, |
|
"rewards/margins": 0.4157690107822418, |
|
"rewards/margins_max": 0.6029486060142517, |
|
"rewards/margins_min": 0.22858937084674835, |
|
"rewards/margins_std": 0.26471197605133057, |
|
"rewards/rejected": -0.6721233129501343, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 4.817598395586301e-07, |
|
"logits/chosen": -3.2022743225097656, |
|
"logits/rejected": -2.887551784515381, |
|
"logps/chosen": -214.01513671875, |
|
"logps/rejected": -309.3605651855469, |
|
"loss": 0.4893, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.24757170677185059, |
|
"rewards/margins": 0.5083514451980591, |
|
"rewards/margins_max": 0.7038620114326477, |
|
"rewards/margins_min": 0.31284087896347046, |
|
"rewards/margins_std": 0.27649372816085815, |
|
"rewards/rejected": -0.7559231519699097, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 4.804881923926556e-07, |
|
"logits/chosen": -3.2066566944122314, |
|
"logits/rejected": -2.9500646591186523, |
|
"logps/chosen": -216.474609375, |
|
"logps/rejected": -268.8821716308594, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.30655044317245483, |
|
"rewards/margins": 0.4422832429409027, |
|
"rewards/margins_max": 0.6758220195770264, |
|
"rewards/margins_min": 0.20874443650245667, |
|
"rewards/margins_std": 0.33027374744415283, |
|
"rewards/rejected": -0.7488336563110352, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.046875, |
|
"learning_rate": 4.791754937234961e-07, |
|
"logits/chosen": -3.171912670135498, |
|
"logits/rejected": -2.918295383453369, |
|
"logps/chosen": -221.5971221923828, |
|
"logps/rejected": -293.3380432128906, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.29368987679481506, |
|
"rewards/margins": 0.5147393941879272, |
|
"rewards/margins_max": 0.7441326379776001, |
|
"rewards/margins_min": 0.28534621000289917, |
|
"rewards/margins_std": 0.32441097497940063, |
|
"rewards/rejected": -0.8084293603897095, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 4.778219773516472e-07, |
|
"logits/chosen": -3.1883535385131836, |
|
"logits/rejected": -2.9556570053100586, |
|
"logps/chosen": -247.1591796875, |
|
"logps/rejected": -354.550048828125, |
|
"loss": 0.4733, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.3137117326259613, |
|
"rewards/margins": 0.5875142812728882, |
|
"rewards/margins_max": 0.8420342206954956, |
|
"rewards/margins_min": 0.3329945206642151, |
|
"rewards/margins_std": 0.3599454462528229, |
|
"rewards/rejected": -0.9012260437011719, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.5, |
|
"learning_rate": 4.764278843475128e-07, |
|
"logits/chosen": -3.222123622894287, |
|
"logits/rejected": -2.993802547454834, |
|
"logps/chosen": -205.1719512939453, |
|
"logps/rejected": -280.248291015625, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.36355796456336975, |
|
"rewards/margins": 0.5085680484771729, |
|
"rewards/margins_max": 0.7664892673492432, |
|
"rewards/margins_min": 0.2506466507911682, |
|
"rewards/margins_std": 0.3647558093070984, |
|
"rewards/rejected": -0.8721259832382202, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.65625, |
|
"learning_rate": 4.749934630084691e-07, |
|
"logits/chosen": -3.3156044483184814, |
|
"logits/rejected": -3.007734775543213, |
|
"logps/chosen": -259.8424377441406, |
|
"logps/rejected": -326.71185302734375, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.30121082067489624, |
|
"rewards/margins": 0.45573800802230835, |
|
"rewards/margins_max": 0.6853656768798828, |
|
"rewards/margins_min": 0.22611038386821747, |
|
"rewards/margins_std": 0.32474246621131897, |
|
"rewards/rejected": -0.7569488286972046, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.859375, |
|
"learning_rate": 4.735189688146409e-07, |
|
"logits/chosen": -3.2011077404022217, |
|
"logits/rejected": -2.964134931564331, |
|
"logps/chosen": -244.4047088623047, |
|
"logps/rejected": -343.80706787109375, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.37781041860580444, |
|
"rewards/margins": 0.52849942445755, |
|
"rewards/margins_max": 0.8547603487968445, |
|
"rewards/margins_min": 0.20223841071128845, |
|
"rewards/margins_std": 0.46140265464782715, |
|
"rewards/rejected": -0.9063097834587097, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 4.7200466438339916e-07, |
|
"logits/chosen": -3.2470192909240723, |
|
"logits/rejected": -2.94683575630188, |
|
"logps/chosen": -260.84661865234375, |
|
"logps/rejected": -325.08734130859375, |
|
"loss": 0.484, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.3126043975353241, |
|
"rewards/margins": 0.5272418260574341, |
|
"rewards/margins_max": 0.8143749237060547, |
|
"rewards/margins_min": 0.24010880291461945, |
|
"rewards/margins_std": 0.40606746077537537, |
|
"rewards/rejected": -0.8398463129997253, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.0, |
|
"learning_rate": 4.704508194225866e-07, |
|
"logits/chosen": -3.2237815856933594, |
|
"logits/rejected": -2.926332950592041, |
|
"logps/chosen": -244.0590362548828, |
|
"logps/rejected": -343.0704345703125, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.39641642570495605, |
|
"rewards/margins": 0.609396755695343, |
|
"rewards/margins_max": 0.9228858947753906, |
|
"rewards/margins_min": 0.295907586812973, |
|
"rewards/margins_std": 0.44334059953689575, |
|
"rewards/rejected": -1.0058131217956543, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 4.688577106824814e-07, |
|
"logits/chosen": -3.252931594848633, |
|
"logits/rejected": -2.984818935394287, |
|
"logps/chosen": -207.40176391601562, |
|
"logps/rejected": -314.35943603515625, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.32950547337532043, |
|
"rewards/margins": 0.6272681951522827, |
|
"rewards/margins_max": 0.8804882168769836, |
|
"rewards/margins_min": 0.3740481734275818, |
|
"rewards/margins_std": 0.35810714960098267, |
|
"rewards/rejected": -0.956773579120636, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 4.672256219065059e-07, |
|
"logits/chosen": -3.2750720977783203, |
|
"logits/rejected": -2.9872028827667236, |
|
"logps/chosen": -234.55569458007812, |
|
"logps/rejected": -319.52337646484375, |
|
"loss": 0.4489, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4240082800388336, |
|
"rewards/margins": 0.6940449476242065, |
|
"rewards/margins_max": 1.0679466724395752, |
|
"rewards/margins_min": 0.3201431334018707, |
|
"rewards/margins_std": 0.5287769436836243, |
|
"rewards/rejected": -1.1180531978607178, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.03125, |
|
"learning_rate": 4.655548437806902e-07, |
|
"logits/chosen": -3.146660327911377, |
|
"logits/rejected": -2.780895471572876, |
|
"logps/chosen": -292.53155517578125, |
|
"logps/rejected": -379.9189453125, |
|
"loss": 0.444, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.43200021982192993, |
|
"rewards/margins": 0.7211217880249023, |
|
"rewards/margins_max": 1.0315678119659424, |
|
"rewards/margins_min": 0.41067585349082947, |
|
"rewards/margins_std": 0.4390367865562439, |
|
"rewards/rejected": -1.153122067451477, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.578125, |
|
"learning_rate": 4.6384567388189835e-07, |
|
"logits/chosen": -3.1456127166748047, |
|
"logits/rejected": -2.8448398113250732, |
|
"logps/chosen": -241.07363891601562, |
|
"logps/rejected": -331.47509765625, |
|
"loss": 0.4397, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.4291381239891052, |
|
"rewards/margins": 0.6892618536949158, |
|
"rewards/margins_max": 0.9590433835983276, |
|
"rewards/margins_min": 0.41948023438453674, |
|
"rewards/margins_std": 0.3815288245677948, |
|
"rewards/rejected": -1.118399977684021, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.046875, |
|
"learning_rate": 4.6209841662482874e-07, |
|
"logits/chosen": -3.130969524383545, |
|
"logits/rejected": -2.9060287475585938, |
|
"logps/chosen": -216.61447143554688, |
|
"logps/rejected": -346.3977355957031, |
|
"loss": 0.4242, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.42712074518203735, |
|
"rewards/margins": 0.7977662682533264, |
|
"rewards/margins_max": 1.103691816329956, |
|
"rewards/margins_min": 0.4918406009674072, |
|
"rewards/margins_std": 0.4326442778110504, |
|
"rewards/rejected": -1.2248871326446533, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 4.603133832077953e-07, |
|
"logits/chosen": -3.1673851013183594, |
|
"logits/rejected": -2.961228609085083, |
|
"logps/chosen": -228.9781036376953, |
|
"logps/rejected": -322.8971252441406, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.49834170937538147, |
|
"rewards/margins": 0.6934456825256348, |
|
"rewards/margins_max": 1.0356100797653198, |
|
"rewards/margins_min": 0.3512812554836273, |
|
"rewards/margins_std": 0.4838935434818268, |
|
"rewards/rejected": -1.1917873620986938, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.265625, |
|
"learning_rate": 4.58490891557301e-07, |
|
"logits/chosen": -3.1827971935272217, |
|
"logits/rejected": -2.9217400550842285, |
|
"logps/chosen": -214.40975952148438, |
|
"logps/rejected": -327.9997253417969, |
|
"loss": 0.4295, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.47098636627197266, |
|
"rewards/margins": 0.7713015675544739, |
|
"rewards/margins_max": 1.1291908025741577, |
|
"rewards/margins_min": 0.4134122431278229, |
|
"rewards/margins_std": 0.5061318874359131, |
|
"rewards/rejected": -1.2422878742218018, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 4.5663126627141346e-07, |
|
"logits/chosen": -3.233410596847534, |
|
"logits/rejected": -2.9121780395507812, |
|
"logps/chosen": -264.9963684082031, |
|
"logps/rejected": -351.5663757324219, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5588346123695374, |
|
"rewards/margins": 0.7766939997673035, |
|
"rewards/margins_max": 1.2045036554336548, |
|
"rewards/margins_min": 0.3488844037055969, |
|
"rewards/margins_std": 0.6050141453742981, |
|
"rewards/rejected": -1.3355286121368408, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 4.5473483856195085e-07, |
|
"logits/chosen": -3.1042134761810303, |
|
"logits/rejected": -2.884307384490967, |
|
"logps/chosen": -278.4771423339844, |
|
"logps/rejected": -435.12518310546875, |
|
"loss": 0.411, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5594338774681091, |
|
"rewards/margins": 0.87383633852005, |
|
"rewards/margins_max": 1.3376729488372803, |
|
"rewards/margins_min": 0.4099995493888855, |
|
"rewards/margins_std": 0.6559640765190125, |
|
"rewards/rejected": -1.4332702159881592, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 4.5280194619549197e-07, |
|
"logits/chosen": -3.1154184341430664, |
|
"logits/rejected": -2.8020920753479004, |
|
"logps/chosen": -263.41326904296875, |
|
"logps/rejected": -353.222900390625, |
|
"loss": 0.4186, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5418119430541992, |
|
"rewards/margins": 0.9471070170402527, |
|
"rewards/margins_max": 1.409268856048584, |
|
"rewards/margins_min": 0.48494523763656616, |
|
"rewards/margins_std": 0.653595507144928, |
|
"rewards/rejected": -1.4889190196990967, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.375, |
|
"learning_rate": 4.50832933433217e-07, |
|
"logits/chosen": -3.1423096656799316, |
|
"logits/rejected": -2.93670654296875, |
|
"logps/chosen": -230.8040008544922, |
|
"logps/rejected": -336.51263427734375, |
|
"loss": 0.4317, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.539901077747345, |
|
"rewards/margins": 0.7871753573417664, |
|
"rewards/margins_max": 1.2703603506088257, |
|
"rewards/margins_min": 0.3039904236793518, |
|
"rewards/margins_std": 0.6833267211914062, |
|
"rewards/rejected": -1.3270765542984009, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 4.4882815096959246e-07, |
|
"logits/chosen": -3.125920534133911, |
|
"logits/rejected": -2.9035699367523193, |
|
"logps/chosen": -220.22531127929688, |
|
"logps/rejected": -358.2559509277344, |
|
"loss": 0.4116, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5155752897262573, |
|
"rewards/margins": 0.72476726770401, |
|
"rewards/margins_max": 1.1075283288955688, |
|
"rewards/margins_min": 0.3420061469078064, |
|
"rewards/margins_std": 0.5413058996200562, |
|
"rewards/rejected": -1.2403424978256226, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.5, |
|
"learning_rate": 4.4678795586991023e-07, |
|
"logits/chosen": -3.1662585735321045, |
|
"logits/rejected": -2.9570212364196777, |
|
"logps/chosen": -251.75430297851562, |
|
"logps/rejected": -413.55145263671875, |
|
"loss": 0.415, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.561039924621582, |
|
"rewards/margins": 0.8251296877861023, |
|
"rewards/margins_max": 1.2652571201324463, |
|
"rewards/margins_min": 0.38500216603279114, |
|
"rewards/margins_std": 0.6224343180656433, |
|
"rewards/rejected": -1.3861695528030396, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 4.447127115066919e-07, |
|
"logits/chosen": -3.15718412399292, |
|
"logits/rejected": -2.9018607139587402, |
|
"logps/chosen": -254.60348510742188, |
|
"logps/rejected": -410.89581298828125, |
|
"loss": 0.4225, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.5274133682250977, |
|
"rewards/margins": 0.9205818176269531, |
|
"rewards/margins_max": 1.3906073570251465, |
|
"rewards/margins_min": 0.45055636763572693, |
|
"rewards/margins_std": 0.664716362953186, |
|
"rewards/rejected": -1.4479950666427612, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 4.375, |
|
"learning_rate": 4.4260278749496916e-07, |
|
"logits/chosen": -3.1671698093414307, |
|
"logits/rejected": -2.9417810440063477, |
|
"logps/chosen": -236.66024780273438, |
|
"logps/rejected": -383.62213134765625, |
|
"loss": 0.4109, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.6313245296478271, |
|
"rewards/margins": 1.054483413696289, |
|
"rewards/margins_max": 1.598443865776062, |
|
"rewards/margins_min": 0.5105229616165161, |
|
"rewards/margins_std": 0.7692762613296509, |
|
"rewards/rejected": -1.6858079433441162, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 4.4045855962645363e-07, |
|
"logits/chosen": -3.073002576828003, |
|
"logits/rejected": -2.8750250339508057, |
|
"logps/chosen": -236.89315795898438, |
|
"logps/rejected": -391.52423095703125, |
|
"loss": 0.3795, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.54277503490448, |
|
"rewards/margins": 0.9837581515312195, |
|
"rewards/margins_max": 1.4403795003890991, |
|
"rewards/margins_min": 0.5271369814872742, |
|
"rewards/margins_std": 0.6457599997520447, |
|
"rewards/rejected": -1.5265332460403442, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 4.09375, |
|
"learning_rate": 4.3828040980260504e-07, |
|
"logits/chosen": -3.178225517272949, |
|
"logits/rejected": -2.885007381439209, |
|
"logps/chosen": -285.74554443359375, |
|
"logps/rejected": -402.0855407714844, |
|
"loss": 0.3933, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.6648572683334351, |
|
"rewards/margins": 1.0061523914337158, |
|
"rewards/margins_max": 1.521410346031189, |
|
"rewards/margins_min": 0.4908943176269531, |
|
"rewards/margins_std": 0.7286848425865173, |
|
"rewards/rejected": -1.6710094213485718, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 4.360687259666129e-07, |
|
"logits/chosen": -3.152057647705078, |
|
"logits/rejected": -2.9315738677978516, |
|
"logps/chosen": -284.7326965332031, |
|
"logps/rejected": -381.3499450683594, |
|
"loss": 0.4157, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.646824061870575, |
|
"rewards/margins": 0.8092839121818542, |
|
"rewards/margins_max": 1.2926756143569946, |
|
"rewards/margins_min": 0.32589206099510193, |
|
"rewards/margins_std": 0.6836191415786743, |
|
"rewards/rejected": -1.4561078548431396, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 5.59375, |
|
"learning_rate": 4.3382390203430015e-07, |
|
"logits/chosen": -3.1908488273620605, |
|
"logits/rejected": -2.944361925125122, |
|
"logps/chosen": -296.958251953125, |
|
"logps/rejected": -394.0612487792969, |
|
"loss": 0.3889, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.818184494972229, |
|
"rewards/margins": 0.8906744122505188, |
|
"rewards/margins_max": 1.3960387706756592, |
|
"rewards/margins_min": 0.3853098750114441, |
|
"rewards/margins_std": 0.7146932482719421, |
|
"rewards/rejected": -1.7088590860366821, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 5.125, |
|
"learning_rate": 4.3154633782396493e-07, |
|
"logits/chosen": -3.162999153137207, |
|
"logits/rejected": -2.876415729522705, |
|
"logps/chosen": -304.06121826171875, |
|
"logps/rejected": -402.8589172363281, |
|
"loss": 0.4076, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.8082460165023804, |
|
"rewards/margins": 1.0327093601226807, |
|
"rewards/margins_max": 1.5921701192855835, |
|
"rewards/margins_min": 0.47324857115745544, |
|
"rewards/margins_std": 0.7911970615386963, |
|
"rewards/rejected": -1.840955376625061, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.28125, |
|
"learning_rate": 4.2923643898516983e-07, |
|
"logits/chosen": -3.105457067489624, |
|
"logits/rejected": -2.8327999114990234, |
|
"logps/chosen": -313.3583068847656, |
|
"logps/rejected": -489.62060546875, |
|
"loss": 0.3631, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.668003499507904, |
|
"rewards/margins": 1.2182451486587524, |
|
"rewards/margins_max": 1.824080228805542, |
|
"rewards/margins_min": 0.6124096512794495, |
|
"rewards/margins_std": 0.8567806482315063, |
|
"rewards/rejected": -1.8862483501434326, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 4.268946169264932e-07, |
|
"logits/chosen": -3.1345181465148926, |
|
"logits/rejected": -2.9487783908843994, |
|
"logps/chosen": -230.07131958007812, |
|
"logps/rejected": -395.2250671386719, |
|
"loss": 0.3687, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.7640656232833862, |
|
"rewards/margins": 1.161650538444519, |
|
"rewards/margins_max": 1.6811326742172241, |
|
"rewards/margins_min": 0.6421682238578796, |
|
"rewards/margins_std": 0.7346588969230652, |
|
"rewards/rejected": -1.9257161617279053, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 4.245212887422542e-07, |
|
"logits/chosen": -3.1086442470550537, |
|
"logits/rejected": -2.9294607639312744, |
|
"logps/chosen": -273.58416748046875, |
|
"logps/rejected": -438.7652282714844, |
|
"loss": 0.3613, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.8308951258659363, |
|
"rewards/margins": 1.2336242198944092, |
|
"rewards/margins_max": 1.882441520690918, |
|
"rewards/margins_min": 0.5848069190979004, |
|
"rewards/margins_std": 0.917566180229187, |
|
"rewards/rejected": -2.0645194053649902, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 4.1875, |
|
"learning_rate": 4.2211687713822574e-07, |
|
"logits/chosen": -3.089200019836426, |
|
"logits/rejected": -2.8396849632263184, |
|
"logps/chosen": -317.65618896484375, |
|
"logps/rejected": -468.9850158691406, |
|
"loss": 0.3686, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7857069373130798, |
|
"rewards/margins": 1.1800651550292969, |
|
"rewards/margins_max": 1.7438383102416992, |
|
"rewards/margins_min": 0.6162917017936707, |
|
"rewards/margins_std": 0.797295868396759, |
|
"rewards/rejected": -1.965772032737732, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 4.196818103563477e-07, |
|
"logits/chosen": -3.1307501792907715, |
|
"logits/rejected": -2.890303134918213, |
|
"logps/chosen": -284.9822692871094, |
|
"logps/rejected": -456.78582763671875, |
|
"loss": 0.3578, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.8763874769210815, |
|
"rewards/margins": 1.411645531654358, |
|
"rewards/margins_max": 2.090603828430176, |
|
"rewards/margins_min": 0.7326871156692505, |
|
"rewards/margins_std": 0.960192084312439, |
|
"rewards/rejected": -2.2880330085754395, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 4.625, |
|
"learning_rate": 4.172165220984541e-07, |
|
"logits/chosen": -3.075063467025757, |
|
"logits/rejected": -2.7540695667266846, |
|
"logps/chosen": -315.30389404296875, |
|
"logps/rejected": -419.2861328125, |
|
"loss": 0.3873, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.8819743394851685, |
|
"rewards/margins": 0.9528709650039673, |
|
"rewards/margins_max": 1.3681405782699585, |
|
"rewards/margins_min": 0.5376013517379761, |
|
"rewards/margins_std": 0.5872799754142761, |
|
"rewards/rejected": -1.8348453044891357, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.625, |
|
"learning_rate": 4.1472145144902775e-07, |
|
"logits/chosen": -3.0279951095581055, |
|
"logits/rejected": -2.79876708984375, |
|
"logps/chosen": -313.17828369140625, |
|
"logps/rejected": -494.333984375, |
|
"loss": 0.3517, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.9009196162223816, |
|
"rewards/margins": 1.3655102252960205, |
|
"rewards/margins_max": 2.058493137359619, |
|
"rewards/margins_min": 0.6725271344184875, |
|
"rewards/margins_std": 0.9800260663032532, |
|
"rewards/rejected": -2.2664296627044678, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 4.5, |
|
"learning_rate": 4.121970427969966e-07, |
|
"logits/chosen": -3.0823280811309814, |
|
"logits/rejected": -2.7526888847351074, |
|
"logps/chosen": -274.48516845703125, |
|
"logps/rejected": -379.95843505859375, |
|
"loss": 0.3596, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.8261201977729797, |
|
"rewards/margins": 1.091605544090271, |
|
"rewards/margins_max": 1.6673996448516846, |
|
"rewards/margins_min": 0.5158115029335022, |
|
"rewards/margins_std": 0.8142956495285034, |
|
"rewards/rejected": -1.9177258014678955, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 5.125, |
|
"learning_rate": 4.0964374575658496e-07, |
|
"logits/chosen": -3.0822136402130127, |
|
"logits/rejected": -2.8262829780578613, |
|
"logps/chosen": -312.8172912597656, |
|
"logps/rejected": -474.4310607910156, |
|
"loss": 0.3671, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.8950836062431335, |
|
"rewards/margins": 1.219533085823059, |
|
"rewards/margins_max": 1.8111814260482788, |
|
"rewards/margins_min": 0.6278846859931946, |
|
"rewards/margins_std": 0.8367172479629517, |
|
"rewards/rejected": -2.114616870880127, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 4.070620150872339e-07, |
|
"logits/chosen": -3.046312093734741, |
|
"logits/rejected": -2.8204238414764404, |
|
"logps/chosen": -294.24627685546875, |
|
"logps/rejected": -452.1871032714844, |
|
"loss": 0.3853, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.867325484752655, |
|
"rewards/margins": 1.1414000988006592, |
|
"rewards/margins_max": 1.6486103534698486, |
|
"rewards/margins_min": 0.6341898441314697, |
|
"rewards/margins_std": 0.717303454875946, |
|
"rewards/rejected": -2.008725643157959, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 4.044523106126061e-07, |
|
"logits/chosen": -3.0358595848083496, |
|
"logits/rejected": -2.808770179748535, |
|
"logps/chosen": -295.25604248046875, |
|
"logps/rejected": -459.0888671875, |
|
"loss": 0.3634, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.7942778468132019, |
|
"rewards/margins": 1.0517923831939697, |
|
"rewards/margins_max": 1.6069154739379883, |
|
"rewards/margins_min": 0.49666935205459595, |
|
"rewards/margins_std": 0.7850624322891235, |
|
"rewards/rejected": -1.8460700511932373, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 7.875, |
|
"learning_rate": 4.0181509713868765e-07, |
|
"logits/chosen": -3.1421244144439697, |
|
"logits/rejected": -2.8746113777160645, |
|
"logps/chosen": -295.7052917480469, |
|
"logps/rejected": -496.74700927734375, |
|
"loss": 0.3753, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.8820911645889282, |
|
"rewards/margins": 1.6108951568603516, |
|
"rewards/margins_max": 2.3748936653137207, |
|
"rewards/margins_min": 0.8468970060348511, |
|
"rewards/margins_std": 1.0804564952850342, |
|
"rewards/rejected": -2.4929862022399902, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 8.125, |
|
"learning_rate": 3.991508443710031e-07, |
|
"logits/chosen": -3.188358783721924, |
|
"logits/rejected": -2.9022974967956543, |
|
"logps/chosen": -326.6969299316406, |
|
"logps/rejected": -470.706787109375, |
|
"loss": 0.3591, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.9822729229927063, |
|
"rewards/margins": 1.193819522857666, |
|
"rewards/margins_max": 1.7589439153671265, |
|
"rewards/margins_min": 0.628695011138916, |
|
"rewards/margins_std": 0.7992067337036133, |
|
"rewards/rejected": -2.1760921478271484, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 3.9646002683095794e-07, |
|
"logits/chosen": -3.1077704429626465, |
|
"logits/rejected": -2.8467211723327637, |
|
"logps/chosen": -280.2427673339844, |
|
"logps/rejected": -415.1084899902344, |
|
"loss": 0.3603, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.8817678689956665, |
|
"rewards/margins": 1.1791865825653076, |
|
"rewards/margins_max": 1.6983654499053955, |
|
"rewards/margins_min": 0.660007655620575, |
|
"rewards/margins_std": 0.7342298030853271, |
|
"rewards/rejected": -2.0609545707702637, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 3.937431237713227e-07, |
|
"logits/chosen": -3.09631609916687, |
|
"logits/rejected": -2.7876319885253906, |
|
"logps/chosen": -343.01019287109375, |
|
"logps/rejected": -494.8714904785156, |
|
"loss": 0.3643, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.8703140020370483, |
|
"rewards/margins": 1.3103339672088623, |
|
"rewards/margins_max": 2.1210694313049316, |
|
"rewards/margins_min": 0.4995986819267273, |
|
"rewards/margins_std": 1.1465529203414917, |
|
"rewards/rejected": -2.1806480884552, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 3.910006190908753e-07, |
|
"logits/chosen": -3.1266021728515625, |
|
"logits/rejected": -2.803832530975342, |
|
"logps/chosen": -313.9851989746094, |
|
"logps/rejected": -454.4408264160156, |
|
"loss": 0.3745, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8938585519790649, |
|
"rewards/margins": 1.1841809749603271, |
|
"rewards/margins_max": 1.9790232181549072, |
|
"rewards/margins_min": 0.38933879137039185, |
|
"rewards/margins_std": 1.1240766048431396, |
|
"rewards/rejected": -2.0780396461486816, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 4.125, |
|
"learning_rate": 3.882330012482152e-07, |
|
"logits/chosen": -3.1263976097106934, |
|
"logits/rejected": -2.940556287765503, |
|
"logps/chosen": -265.8106689453125, |
|
"logps/rejected": -446.62603759765625, |
|
"loss": 0.3468, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.8905905485153198, |
|
"rewards/margins": 1.3784196376800537, |
|
"rewards/margins_max": 2.0906035900115967, |
|
"rewards/margins_min": 0.6662355065345764, |
|
"rewards/margins_std": 1.0071804523468018, |
|
"rewards/rejected": -2.269010066986084, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.984375, |
|
"learning_rate": 3.854407631747653e-07, |
|
"logits/chosen": -3.0423550605773926, |
|
"logits/rejected": -2.7694637775421143, |
|
"logps/chosen": -325.1182556152344, |
|
"logps/rejected": -545.2157592773438, |
|
"loss": 0.3244, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.9049133062362671, |
|
"rewards/margins": 1.3824645280838013, |
|
"rewards/margins_max": 2.044529438018799, |
|
"rewards/margins_min": 0.7203994989395142, |
|
"rewards/margins_std": 0.9363012313842773, |
|
"rewards/rejected": -2.2873778343200684, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.625, |
|
"learning_rate": 3.826244021869782e-07, |
|
"logits/chosen": -3.1056835651397705, |
|
"logits/rejected": -2.8130106925964355, |
|
"logps/chosen": -321.215576171875, |
|
"logps/rejected": -474.0379943847656, |
|
"loss": 0.3367, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.938174843788147, |
|
"rewards/margins": 1.3489874601364136, |
|
"rewards/margins_max": 2.1338260173797607, |
|
"rewards/margins_min": 0.564149022102356, |
|
"rewards/margins_std": 1.1099293231964111, |
|
"rewards/rejected": -2.2871623039245605, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 3.797844198977601e-07, |
|
"logits/chosen": -3.0277373790740967, |
|
"logits/rejected": -2.816030740737915, |
|
"logps/chosen": -305.11749267578125, |
|
"logps/rejected": -515.0469970703125, |
|
"loss": 0.3415, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.8409072160720825, |
|
"rewards/margins": 1.5637253522872925, |
|
"rewards/margins_max": 2.281038761138916, |
|
"rewards/margins_min": 0.8464117050170898, |
|
"rewards/margins_std": 1.014434576034546, |
|
"rewards/rejected": -2.404632568359375, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 3.769213221271306e-07, |
|
"logits/chosen": -3.0825905799865723, |
|
"logits/rejected": -2.811018466949463, |
|
"logps/chosen": -302.92364501953125, |
|
"logps/rejected": -513.6621704101562, |
|
"loss": 0.3001, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.9827852249145508, |
|
"rewards/margins": 1.5373833179473877, |
|
"rewards/margins_max": 2.370154857635498, |
|
"rewards/margins_min": 0.704612135887146, |
|
"rewards/margins_std": 1.1777164936065674, |
|
"rewards/rejected": -2.5201685428619385, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 3.740356188121326e-07, |
|
"logits/chosen": -3.0749309062957764, |
|
"logits/rejected": -2.808995008468628, |
|
"logps/chosen": -313.23779296875, |
|
"logps/rejected": -494.24658203125, |
|
"loss": 0.325, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.0359914302825928, |
|
"rewards/margins": 1.6361278295516968, |
|
"rewards/margins_max": 2.5817818641662598, |
|
"rewards/margins_min": 0.6904740929603577, |
|
"rewards/margins_std": 1.3373563289642334, |
|
"rewards/rejected": -2.672119140625, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 3.711278239160092e-07, |
|
"logits/chosen": -3.0516886711120605, |
|
"logits/rejected": -2.8046505451202393, |
|
"logps/chosen": -290.2547912597656, |
|
"logps/rejected": -450.64215087890625, |
|
"loss": 0.337, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.8965566754341125, |
|
"rewards/margins": 1.2011537551879883, |
|
"rewards/margins_max": 1.8123470544815063, |
|
"rewards/margins_min": 0.5899609327316284, |
|
"rewards/margins_std": 0.8643573522567749, |
|
"rewards/rejected": -2.097710371017456, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 5.625, |
|
"learning_rate": 3.681984553366629e-07, |
|
"logits/chosen": -3.067981719970703, |
|
"logits/rejected": -2.813534736633301, |
|
"logps/chosen": -317.41302490234375, |
|
"logps/rejected": -559.5974731445312, |
|
"loss": 0.3242, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.159551739692688, |
|
"rewards/margins": 1.695622205734253, |
|
"rewards/margins_max": 2.6865200996398926, |
|
"rewards/margins_min": 0.7047241926193237, |
|
"rewards/margins_std": 1.401341199874878, |
|
"rewards/rejected": -2.8551738262176514, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 3.652480348144152e-07, |
|
"logits/chosen": -3.0792620182037354, |
|
"logits/rejected": -2.8351120948791504, |
|
"logps/chosen": -315.0097351074219, |
|
"logps/rejected": -499.28741455078125, |
|
"loss": 0.349, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.2088465690612793, |
|
"rewards/margins": 1.4478918313980103, |
|
"rewards/margins_max": 2.187783718109131, |
|
"rewards/margins_min": 0.7079996466636658, |
|
"rewards/margins_std": 1.0463653802871704, |
|
"rewards/rejected": -2.656738042831421, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 3.6227708783908053e-07, |
|
"logits/chosen": -3.0654194355010986, |
|
"logits/rejected": -2.8562426567077637, |
|
"logps/chosen": -303.43280029296875, |
|
"logps/rejected": -504.5311584472656, |
|
"loss": 0.3367, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.1927417516708374, |
|
"rewards/margins": 1.5436158180236816, |
|
"rewards/margins_max": 2.4939346313476562, |
|
"rewards/margins_min": 0.5932968854904175, |
|
"rewards/margins_std": 1.3439538478851318, |
|
"rewards/rejected": -2.7363574504852295, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.59375, |
|
"learning_rate": 3.5928614355637324e-07, |
|
"logits/chosen": -3.0500872135162354, |
|
"logits/rejected": -2.8141045570373535, |
|
"logps/chosen": -288.33636474609375, |
|
"logps/rejected": -500.2225036621094, |
|
"loss": 0.3218, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9479445219039917, |
|
"rewards/margins": 1.6601346731185913, |
|
"rewards/margins_max": 2.4742271900177, |
|
"rewards/margins_min": 0.8460421562194824, |
|
"rewards/margins_std": 1.1513007879257202, |
|
"rewards/rejected": -2.608079433441162, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.921875, |
|
"learning_rate": 3.562757346736633e-07, |
|
"logits/chosen": -3.1348893642425537, |
|
"logits/rejected": -2.8808882236480713, |
|
"logps/chosen": -299.9723205566406, |
|
"logps/rejected": -512.7084350585938, |
|
"loss": 0.3175, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.1462712287902832, |
|
"rewards/margins": 1.8254330158233643, |
|
"rewards/margins_max": 2.717803716659546, |
|
"rewards/margins_min": 0.9330623745918274, |
|
"rewards/margins_std": 1.26200270652771, |
|
"rewards/rejected": -2.9717042446136475, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.265625, |
|
"learning_rate": 3.532463973650971e-07, |
|
"logits/chosen": -3.0953421592712402, |
|
"logits/rejected": -2.8315436840057373, |
|
"logps/chosen": -320.49896240234375, |
|
"logps/rejected": -559.3016357421875, |
|
"loss": 0.3058, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0349481105804443, |
|
"rewards/margins": 1.8946539163589478, |
|
"rewards/margins_max": 3.176825761795044, |
|
"rewards/margins_min": 0.612481951713562, |
|
"rewards/margins_std": 1.813265085220337, |
|
"rewards/rejected": -2.9296019077301025, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 3.501986711761016e-07, |
|
"logits/chosen": -3.0640907287597656, |
|
"logits/rejected": -2.8440439701080322, |
|
"logps/chosen": -262.0741882324219, |
|
"logps/rejected": -465.90155029296875, |
|
"loss": 0.3491, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.0486122369766235, |
|
"rewards/margins": 1.6883703470230103, |
|
"rewards/margins_max": 2.6328392028808594, |
|
"rewards/margins_min": 0.7439014911651611, |
|
"rewards/margins_std": 1.3356807231903076, |
|
"rewards/rejected": -2.736982822418213, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 5.0, |
|
"learning_rate": 3.4713309892728755e-07, |
|
"logits/chosen": -3.0397841930389404, |
|
"logits/rejected": -2.7506637573242188, |
|
"logps/chosen": -288.4527282714844, |
|
"logps/rejected": -446.4369201660156, |
|
"loss": 0.3477, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.0533605813980103, |
|
"rewards/margins": 1.4881670475006104, |
|
"rewards/margins_max": 2.2428665161132812, |
|
"rewards/margins_min": 0.7334678769111633, |
|
"rewards/margins_std": 1.0673058032989502, |
|
"rewards/rejected": -2.54152774810791, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.28125, |
|
"learning_rate": 3.4405022661776933e-07, |
|
"logits/chosen": -3.0119423866271973, |
|
"logits/rejected": -2.75697660446167, |
|
"logps/chosen": -313.69891357421875, |
|
"logps/rejected": -527.7509765625, |
|
"loss": 0.3095, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.0210492610931396, |
|
"rewards/margins": 1.6547205448150635, |
|
"rewards/margins_max": 2.5389270782470703, |
|
"rewards/margins_min": 0.770513653755188, |
|
"rewards/margins_std": 1.2504572868347168, |
|
"rewards/rejected": -2.675769567489624, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 3.40950603327919e-07, |
|
"logits/chosen": -3.034954309463501, |
|
"logits/rejected": -2.717050790786743, |
|
"logps/chosen": -311.44989013671875, |
|
"logps/rejected": -536.5928955078125, |
|
"loss": 0.2818, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1659419536590576, |
|
"rewards/margins": 2.0434906482696533, |
|
"rewards/margins_max": 2.969123363494873, |
|
"rewards/margins_min": 1.1178580522537231, |
|
"rewards/margins_std": 1.30904221534729, |
|
"rewards/rejected": -3.209432601928711, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 3.3783478112157144e-07, |
|
"logits/chosen": -3.0681750774383545, |
|
"logits/rejected": -2.842912197113037, |
|
"logps/chosen": -298.2220764160156, |
|
"logps/rejected": -496.28509521484375, |
|
"loss": 0.3477, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.1258448362350464, |
|
"rewards/margins": 1.5719826221466064, |
|
"rewards/margins_max": 2.495079278945923, |
|
"rewards/margins_min": 0.64888596534729, |
|
"rewards/margins_std": 1.3054558038711548, |
|
"rewards/rejected": -2.6978273391723633, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.0, |
|
"learning_rate": 3.347033149476976e-07, |
|
"logits/chosen": -3.000627279281616, |
|
"logits/rejected": -2.82975172996521, |
|
"logps/chosen": -327.8063049316406, |
|
"logps/rejected": -512.77587890625, |
|
"loss": 0.3452, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0002752542495728, |
|
"rewards/margins": 1.4279507398605347, |
|
"rewards/margins_max": 2.1153764724731445, |
|
"rewards/margins_min": 0.7405253648757935, |
|
"rewards/margins_std": 0.9721664190292358, |
|
"rewards/rejected": -2.4282259941101074, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 4.75, |
|
"learning_rate": 3.3155676254156496e-07, |
|
"logits/chosen": -3.07631254196167, |
|
"logits/rejected": -2.837388038635254, |
|
"logps/chosen": -340.0731506347656, |
|
"logps/rejected": -575.410888671875, |
|
"loss": 0.3498, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.210822343826294, |
|
"rewards/margins": 1.6817712783813477, |
|
"rewards/margins_max": 2.6699368953704834, |
|
"rewards/margins_min": 0.6936053037643433, |
|
"rewards/margins_std": 1.3974777460098267, |
|
"rewards/rejected": -2.8925936222076416, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 3.2839568432540064e-07, |
|
"logits/chosen": -3.0555381774902344, |
|
"logits/rejected": -2.8638923168182373, |
|
"logps/chosen": -336.87213134765625, |
|
"logps/rejected": -524.3484497070312, |
|
"loss": 0.2813, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.0288445949554443, |
|
"rewards/margins": 1.6101176738739014, |
|
"rewards/margins_max": 2.4245457649230957, |
|
"rewards/margins_min": 0.7956899404525757, |
|
"rewards/margins_std": 1.1517750024795532, |
|
"rewards/rejected": -2.6389622688293457, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 5.375, |
|
"learning_rate": 3.252206433085768e-07, |
|
"logits/chosen": -3.0896949768066406, |
|
"logits/rejected": -2.7695508003234863, |
|
"logps/chosen": -315.62530517578125, |
|
"logps/rejected": -450.58856201171875, |
|
"loss": 0.3317, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.0867998600006104, |
|
"rewards/margins": 1.556259274482727, |
|
"rewards/margins_max": 2.347869396209717, |
|
"rewards/margins_min": 0.7646490335464478, |
|
"rewards/margins_std": 1.1195060014724731, |
|
"rewards/rejected": -2.643059253692627, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 8.375, |
|
"learning_rate": 3.220322049873344e-07, |
|
"logits/chosen": -3.053899049758911, |
|
"logits/rejected": -2.7996201515197754, |
|
"logps/chosen": -293.49700927734375, |
|
"logps/rejected": -517.9727172851562, |
|
"loss": 0.3098, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.0438956022262573, |
|
"rewards/margins": 1.7273838520050049, |
|
"rewards/margins_max": 2.7014057636260986, |
|
"rewards/margins_min": 0.7533617615699768, |
|
"rewards/margins_std": 1.3774750232696533, |
|
"rewards/rejected": -2.7712790966033936, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 8.875, |
|
"learning_rate": 3.1883093724406493e-07, |
|
"logits/chosen": -3.0701441764831543, |
|
"logits/rejected": -2.776665687561035, |
|
"logps/chosen": -336.69366455078125, |
|
"logps/rejected": -524.4509887695312, |
|
"loss": 0.3133, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.1884243488311768, |
|
"rewards/margins": 1.7032394409179688, |
|
"rewards/margins_max": 2.4934375286102295, |
|
"rewards/margins_min": 0.9130409359931946, |
|
"rewards/margins_std": 1.117509126663208, |
|
"rewards/rejected": -2.8916633129119873, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 3.156174102461666e-07, |
|
"logits/chosen": -3.0736892223358154, |
|
"logits/rejected": -2.805081605911255, |
|
"logps/chosen": -311.7049255371094, |
|
"logps/rejected": -526.9119873046875, |
|
"loss": 0.3061, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.4257994890213013, |
|
"rewards/margins": 1.9017452001571655, |
|
"rewards/margins_max": 3.0200836658477783, |
|
"rewards/margins_min": 0.7834072113037109, |
|
"rewards/margins_std": 1.581568956375122, |
|
"rewards/rejected": -3.327544689178467, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 3.1239219634449347e-07, |
|
"logits/chosen": -2.9780619144439697, |
|
"logits/rejected": -2.771231174468994, |
|
"logps/chosen": -298.0756530761719, |
|
"logps/rejected": -519.0932006835938, |
|
"loss": 0.3565, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2179266214370728, |
|
"rewards/margins": 1.8608818054199219, |
|
"rewards/margins_max": 2.9755165576934814, |
|
"rewards/margins_min": 0.7462473511695862, |
|
"rewards/margins_std": 1.5763311386108398, |
|
"rewards/rejected": -3.078808307647705, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 3.0915586997141624e-07, |
|
"logits/chosen": -3.0769824981689453, |
|
"logits/rejected": -2.8244097232818604, |
|
"logps/chosen": -355.56427001953125, |
|
"logps/rejected": -530.8713989257812, |
|
"loss": 0.3188, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.1896952390670776, |
|
"rewards/margins": 1.6044542789459229, |
|
"rewards/margins_max": 2.3940157890319824, |
|
"rewards/margins_min": 0.8148924708366394, |
|
"rewards/margins_std": 1.1166088581085205, |
|
"rewards/rejected": -2.794149875640869, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 3.059090075385117e-07, |
|
"logits/chosen": -3.030339002609253, |
|
"logits/rejected": -2.784923791885376, |
|
"logps/chosen": -345.70062255859375, |
|
"logps/rejected": -536.1658325195312, |
|
"loss": 0.3522, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.2129261493682861, |
|
"rewards/margins": 1.6397039890289307, |
|
"rewards/margins_max": 2.4216091632843018, |
|
"rewards/margins_min": 0.8577985763549805, |
|
"rewards/margins_std": 1.1057811975479126, |
|
"rewards/rejected": -2.852630138397217, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.59375, |
|
"learning_rate": 3.0265218733390004e-07, |
|
"logits/chosen": -3.020380973815918, |
|
"logits/rejected": -2.6990604400634766, |
|
"logps/chosen": -326.10516357421875, |
|
"logps/rejected": -501.0379943847656, |
|
"loss": 0.323, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.0968921184539795, |
|
"rewards/margins": 1.6457980871200562, |
|
"rewards/margins_max": 2.706159830093384, |
|
"rewards/margins_min": 0.5854364633560181, |
|
"rewards/margins_std": 1.4995777606964111, |
|
"rewards/rejected": -2.742690324783325, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 9.25, |
|
"learning_rate": 2.993859894192477e-07, |
|
"logits/chosen": -3.0308547019958496, |
|
"logits/rejected": -2.7866368293762207, |
|
"logps/chosen": -305.8663635253906, |
|
"logps/rejected": -504.67413330078125, |
|
"loss": 0.2811, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2033522129058838, |
|
"rewards/margins": 1.8916361331939697, |
|
"rewards/margins_max": 2.847817897796631, |
|
"rewards/margins_min": 0.9354543685913086, |
|
"rewards/margins_std": 1.3522452116012573, |
|
"rewards/rejected": -3.0949883460998535, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 2.961109955264549e-07, |
|
"logits/chosen": -3.040210723876953, |
|
"logits/rejected": -2.740476369857788, |
|
"logps/chosen": -336.0604553222656, |
|
"logps/rejected": -490.28961181640625, |
|
"loss": 0.3035, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.12978994846344, |
|
"rewards/margins": 1.6222766637802124, |
|
"rewards/margins_max": 2.5636367797851562, |
|
"rewards/margins_min": 0.6809166669845581, |
|
"rewards/margins_std": 1.3312841653823853, |
|
"rewards/rejected": -2.7520663738250732, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.6875, |
|
"learning_rate": 2.9282778895404474e-07, |
|
"logits/chosen": -2.9875807762145996, |
|
"logits/rejected": -2.7361321449279785, |
|
"logps/chosen": -326.562744140625, |
|
"logps/rejected": -542.1422729492188, |
|
"loss": 0.2952, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2226388454437256, |
|
"rewards/margins": 1.9603245258331299, |
|
"rewards/margins_max": 2.9473373889923096, |
|
"rewards/margins_min": 0.9733120203018188, |
|
"rewards/margins_std": 1.3958467245101929, |
|
"rewards/rejected": -3.1829633712768555, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.84375, |
|
"learning_rate": 2.895369544632739e-07, |
|
"logits/chosen": -3.1332552433013916, |
|
"logits/rejected": -2.908846855163574, |
|
"logps/chosen": -306.4833068847656, |
|
"logps/rejected": -578.1776123046875, |
|
"loss": 0.3291, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.3200147151947021, |
|
"rewards/margins": 2.0552730560302734, |
|
"rewards/margins_max": 3.0561721324920654, |
|
"rewards/margins_min": 1.0543737411499023, |
|
"rewards/margins_std": 1.415485143661499, |
|
"rewards/rejected": -3.3752880096435547, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 2.8623907817398305e-07, |
|
"logits/chosen": -3.0694479942321777, |
|
"logits/rejected": -2.8231074810028076, |
|
"logps/chosen": -339.3045959472656, |
|
"logps/rejected": -579.858154296875, |
|
"loss": 0.2679, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.2447212934494019, |
|
"rewards/margins": 2.1124045848846436, |
|
"rewards/margins_max": 3.3244433403015137, |
|
"rewards/margins_min": 0.9003661274909973, |
|
"rewards/margins_std": 1.7140815258026123, |
|
"rewards/rejected": -3.357126235961914, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 4.0, |
|
"learning_rate": 2.8293474746020467e-07, |
|
"logits/chosen": -3.0237174034118652, |
|
"logits/rejected": -2.7316677570343018, |
|
"logps/chosen": -341.8821716308594, |
|
"logps/rejected": -525.6448974609375, |
|
"loss": 0.3374, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.3774925470352173, |
|
"rewards/margins": 1.6952784061431885, |
|
"rewards/margins_max": 2.7387375831604004, |
|
"rewards/margins_min": 0.6518189311027527, |
|
"rewards/margins_std": 1.4756742715835571, |
|
"rewards/rejected": -3.072770595550537, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 2.796245508455478e-07, |
|
"logits/chosen": -3.1177384853363037, |
|
"logits/rejected": -2.8181514739990234, |
|
"logps/chosen": -315.0109558105469, |
|
"logps/rejected": -522.7584228515625, |
|
"loss": 0.2943, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.494227647781372, |
|
"rewards/margins": 1.7711334228515625, |
|
"rewards/margins_max": 2.687786340713501, |
|
"rewards/margins_min": 0.8544808626174927, |
|
"rewards/margins_std": 1.2963426113128662, |
|
"rewards/rejected": -3.2653610706329346, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 9.25, |
|
"learning_rate": 2.7630907789837765e-07, |
|
"logits/chosen": -2.9989418983459473, |
|
"logits/rejected": -2.716702938079834, |
|
"logps/chosen": -306.7658996582031, |
|
"logps/rejected": -521.6512451171875, |
|
"loss": 0.3498, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.203908920288086, |
|
"rewards/margins": 1.7978990077972412, |
|
"rewards/margins_max": 2.8368608951568604, |
|
"rewards/margins_min": 0.7589374780654907, |
|
"rewards/margins_std": 1.469313621520996, |
|
"rewards/rejected": -3.0018081665039062, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 9.375, |
|
"learning_rate": 2.7298891912681063e-07, |
|
"logits/chosen": -3.0549111366271973, |
|
"logits/rejected": -2.8135063648223877, |
|
"logps/chosen": -332.3044128417969, |
|
"logps/rejected": -516.0485229492188, |
|
"loss": 0.3097, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.4526578187942505, |
|
"rewards/margins": 1.7673333883285522, |
|
"rewards/margins_max": 2.8053770065307617, |
|
"rewards/margins_min": 0.7292898893356323, |
|
"rewards/margins_std": 1.4680153131484985, |
|
"rewards/rejected": -3.2199912071228027, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.875, |
|
"learning_rate": 2.696646658735396e-07, |
|
"logits/chosen": -3.087620973587036, |
|
"logits/rejected": -2.817905902862549, |
|
"logps/chosen": -336.32794189453125, |
|
"logps/rejected": -506.7635192871094, |
|
"loss": 0.315, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.2476387023925781, |
|
"rewards/margins": 1.67379891872406, |
|
"rewards/margins_max": 2.6297619342803955, |
|
"rewards/margins_min": 0.717836320400238, |
|
"rewards/margins_std": 1.3519353866577148, |
|
"rewards/rejected": -2.9214377403259277, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 5.25, |
|
"learning_rate": 2.6633691021051225e-07, |
|
"logits/chosen": -3.058051347732544, |
|
"logits/rejected": -2.823256254196167, |
|
"logps/chosen": -315.0464172363281, |
|
"logps/rejected": -477.55255126953125, |
|
"loss": 0.3293, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.428307294845581, |
|
"rewards/margins": 1.4829260110855103, |
|
"rewards/margins_max": 2.2689406871795654, |
|
"rewards/margins_min": 0.6969112753868103, |
|
"rewards/margins_std": 1.1115926504135132, |
|
"rewards/rejected": -2.911233425140381, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 8.75, |
|
"learning_rate": 2.630062448334792e-07, |
|
"logits/chosen": -3.0223097801208496, |
|
"logits/rejected": -2.8338611125946045, |
|
"logps/chosen": -287.4037170410156, |
|
"logps/rejected": -502.56329345703125, |
|
"loss": 0.3243, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.0643078088760376, |
|
"rewards/margins": 1.7524343729019165, |
|
"rewards/margins_max": 2.540440797805786, |
|
"rewards/margins_min": 0.964427649974823, |
|
"rewards/margins_std": 1.1144096851348877, |
|
"rewards/rejected": -2.816742420196533, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 6.9375, |
|
"learning_rate": 2.596732629564309e-07, |
|
"logits/chosen": -3.0276882648468018, |
|
"logits/rejected": -2.8791420459747314, |
|
"logps/chosen": -303.8673095703125, |
|
"logps/rejected": -552.97314453125, |
|
"loss": 0.3188, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.2054811716079712, |
|
"rewards/margins": 1.6991316080093384, |
|
"rewards/margins_max": 2.568361759185791, |
|
"rewards/margins_min": 0.8299016952514648, |
|
"rewards/margins_std": 1.2292768955230713, |
|
"rewards/rejected": -2.9046127796173096, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.71875, |
|
"learning_rate": 2.5633855820594197e-07, |
|
"logits/chosen": -3.15714430809021, |
|
"logits/rejected": -2.9314417839050293, |
|
"logps/chosen": -320.03668212890625, |
|
"logps/rejected": -644.4346923828125, |
|
"loss": 0.3033, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3862671852111816, |
|
"rewards/margins": 2.538801431655884, |
|
"rewards/margins_max": 3.948146343231201, |
|
"rewards/margins_min": 1.1294561624526978, |
|
"rewards/margins_std": 1.9931151866912842, |
|
"rewards/rejected": -3.9250686168670654, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 4.09375, |
|
"learning_rate": 2.530027245154423e-07, |
|
"logits/chosen": -2.962214708328247, |
|
"logits/rejected": -2.746046543121338, |
|
"logps/chosen": -328.22784423828125, |
|
"logps/rejected": -617.6292724609375, |
|
"loss": 0.2839, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3232418298721313, |
|
"rewards/margins": 2.3371334075927734, |
|
"rewards/margins_max": 3.821005344390869, |
|
"rewards/margins_min": 0.8532617688179016, |
|
"rewards/margins_std": 2.0985114574432373, |
|
"rewards/rejected": -3.6603751182556152, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 4.6875, |
|
"learning_rate": 2.496663560194338e-07, |
|
"logits/chosen": -3.0740249156951904, |
|
"logits/rejected": -2.795609951019287, |
|
"logps/chosen": -326.8231506347656, |
|
"logps/rejected": -546.2271728515625, |
|
"loss": 0.2853, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.206667184829712, |
|
"rewards/margins": 1.9110145568847656, |
|
"rewards/margins_max": 3.0812644958496094, |
|
"rewards/margins_min": 0.7407640814781189, |
|
"rewards/margins_std": 1.6549837589263916, |
|
"rewards/rejected": -3.1176817417144775, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 2.4633004694767086e-07, |
|
"logits/chosen": -3.035614252090454, |
|
"logits/rejected": -2.816070556640625, |
|
"logps/chosen": -287.0525207519531, |
|
"logps/rejected": -511.42010498046875, |
|
"loss": 0.3196, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.2523744106292725, |
|
"rewards/margins": 1.8659451007843018, |
|
"rewards/margins_max": 2.831995725631714, |
|
"rewards/margins_min": 0.8998948931694031, |
|
"rewards/margins_std": 1.3662011623382568, |
|
"rewards/rejected": -3.1183197498321533, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 2.4299439151932387e-07, |
|
"logits/chosen": -2.9972755908966064, |
|
"logits/rejected": -2.7236037254333496, |
|
"logps/chosen": -341.40789794921875, |
|
"logps/rejected": -611.5470581054688, |
|
"loss": 0.2963, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2511876821517944, |
|
"rewards/margins": 2.2317798137664795, |
|
"rewards/margins_max": 3.6496143341064453, |
|
"rewards/margins_min": 0.8139451146125793, |
|
"rewards/margins_std": 2.0051209926605225, |
|
"rewards/rejected": -3.4829673767089844, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 2.3965998383714496e-07, |
|
"logits/chosen": -3.0932700634002686, |
|
"logits/rejected": -2.840926170349121, |
|
"logps/chosen": -316.71820068359375, |
|
"logps/rejected": -547.8084716796875, |
|
"loss": 0.3287, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2523170709609985, |
|
"rewards/margins": 1.8998005390167236, |
|
"rewards/margins_max": 3.1817667484283447, |
|
"rewards/margins_min": 0.617834210395813, |
|
"rewards/margins_std": 1.812973976135254, |
|
"rewards/rejected": -3.1521174907684326, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 2.3632741778165442e-07, |
|
"logits/chosen": -3.0418949127197266, |
|
"logits/rejected": -2.756995916366577, |
|
"logps/chosen": -330.74176025390625, |
|
"logps/rejected": -586.962646484375, |
|
"loss": 0.2811, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.4056506156921387, |
|
"rewards/margins": 2.22354793548584, |
|
"rewards/margins_max": 3.1083569526672363, |
|
"rewards/margins_min": 1.3387386798858643, |
|
"rewards/margins_std": 1.2513091564178467, |
|
"rewards/rejected": -3.6291985511779785, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 5.0, |
|
"learning_rate": 2.3299728690536608e-07, |
|
"logits/chosen": -3.123486280441284, |
|
"logits/rejected": -2.8105220794677734, |
|
"logps/chosen": -334.51751708984375, |
|
"logps/rejected": -510.5174865722656, |
|
"loss": 0.3437, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.432910680770874, |
|
"rewards/margins": 1.7944892644882202, |
|
"rewards/margins_max": 2.8090806007385254, |
|
"rewards/margins_min": 0.7798979878425598, |
|
"rewards/margins_std": 1.434848666191101, |
|
"rewards/rejected": -3.227400302886963, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 7.125, |
|
"learning_rate": 2.296701843270721e-07, |
|
"logits/chosen": -3.0233614444732666, |
|
"logits/rejected": -2.722904920578003, |
|
"logps/chosen": -383.2572021484375, |
|
"logps/rejected": -575.3277587890625, |
|
"loss": 0.306, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.3304874897003174, |
|
"rewards/margins": 1.631151556968689, |
|
"rewards/margins_max": 2.6261086463928223, |
|
"rewards/margins_min": 0.6361947655677795, |
|
"rewards/margins_std": 1.4070814847946167, |
|
"rewards/rejected": -2.961639165878296, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 5.90625, |
|
"learning_rate": 2.2634670262620448e-07, |
|
"logits/chosen": -3.0541083812713623, |
|
"logits/rejected": -2.8443028926849365, |
|
"logps/chosen": -283.5533142089844, |
|
"logps/rejected": -542.0906982421875, |
|
"loss": 0.294, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.050548791885376, |
|
"rewards/margins": 2.099508285522461, |
|
"rewards/margins_max": 3.110419273376465, |
|
"rewards/margins_min": 1.0885975360870361, |
|
"rewards/margins_std": 1.429643988609314, |
|
"rewards/rejected": -3.150057077407837, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 2.2302743373729206e-07, |
|
"logits/chosen": -3.053959369659424, |
|
"logits/rejected": -2.795989751815796, |
|
"logps/chosen": -303.9230651855469, |
|
"logps/rejected": -550.1535034179688, |
|
"loss": 0.295, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2478278875350952, |
|
"rewards/margins": 2.0318145751953125, |
|
"rewards/margins_max": 3.167066812515259, |
|
"rewards/margins_min": 0.8965622782707214, |
|
"rewards/margins_std": 1.6054890155792236, |
|
"rewards/rejected": -3.2796425819396973, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.59375, |
|
"learning_rate": 2.1971296884453388e-07, |
|
"logits/chosen": -3.122481346130371, |
|
"logits/rejected": -2.825242519378662, |
|
"logps/chosen": -341.49603271484375, |
|
"logps/rejected": -550.0460815429688, |
|
"loss": 0.3166, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.2628682851791382, |
|
"rewards/margins": 1.9104182720184326, |
|
"rewards/margins_max": 3.124661684036255, |
|
"rewards/margins_min": 0.6961749792098999, |
|
"rewards/margins_std": 1.7171993255615234, |
|
"rewards/rejected": -3.1732866764068604, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 4.84375, |
|
"learning_rate": 2.1640389827650468e-07, |
|
"logits/chosen": -3.086146116256714, |
|
"logits/rejected": -2.824626922607422, |
|
"logps/chosen": -328.03009033203125, |
|
"logps/rejected": -552.62060546875, |
|
"loss": 0.287, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.9867761731147766, |
|
"rewards/margins": 2.0814549922943115, |
|
"rewards/margins_max": 3.2015552520751953, |
|
"rewards/margins_min": 0.9613545536994934, |
|
"rewards/margins_std": 1.5840612649917603, |
|
"rewards/rejected": -3.0682311058044434, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 2.1310081140101327e-07, |
|
"logits/chosen": -3.09912371635437, |
|
"logits/rejected": -2.8447043895721436, |
|
"logps/chosen": -317.84014892578125, |
|
"logps/rejected": -500.9691467285156, |
|
"loss": 0.3094, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.4269239902496338, |
|
"rewards/margins": 1.7367973327636719, |
|
"rewards/margins_max": 2.898564577102661, |
|
"rewards/margins_min": 0.5750298500061035, |
|
"rewards/margins_std": 1.6429872512817383, |
|
"rewards/rejected": -3.1637210845947266, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 14.0, |
|
"learning_rate": 2.0980429652013295e-07, |
|
"logits/chosen": -3.072404623031616, |
|
"logits/rejected": -2.8281359672546387, |
|
"logps/chosen": -310.8489685058594, |
|
"logps/rejected": -544.1529541015625, |
|
"loss": 0.3317, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.312307596206665, |
|
"rewards/margins": 2.0402112007141113, |
|
"rewards/margins_max": 3.448382616043091, |
|
"rewards/margins_min": 0.6320397853851318, |
|
"rewards/margins_std": 1.991454839706421, |
|
"rewards/rejected": -3.3525187969207764, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.484375, |
|
"learning_rate": 2.0651494076541996e-07, |
|
"logits/chosen": -3.017042636871338, |
|
"logits/rejected": -2.806481122970581, |
|
"logps/chosen": -300.30584716796875, |
|
"logps/rejected": -586.01904296875, |
|
"loss": 0.3159, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.2244617938995361, |
|
"rewards/margins": 2.292558431625366, |
|
"rewards/margins_max": 3.585287094116211, |
|
"rewards/margins_min": 0.999829888343811, |
|
"rewards/margins_std": 1.8281943798065186, |
|
"rewards/rejected": -3.5170199871063232, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.0, |
|
"learning_rate": 2.0323332999334193e-07, |
|
"logits/chosen": -3.004047155380249, |
|
"logits/rejected": -2.7635300159454346, |
|
"logps/chosen": -297.9124450683594, |
|
"logps/rejected": -565.5846557617188, |
|
"loss": 0.322, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.4263161420822144, |
|
"rewards/margins": 2.243640184402466, |
|
"rewards/margins_max": 3.4020352363586426, |
|
"rewards/margins_min": 1.0852453708648682, |
|
"rewards/margins_std": 1.6382176876068115, |
|
"rewards/rejected": -3.6699562072753906, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 1.999600486809331e-07, |
|
"logits/chosen": -3.0038676261901855, |
|
"logits/rejected": -2.782977819442749, |
|
"logps/chosen": -319.99542236328125, |
|
"logps/rejected": -548.05615234375, |
|
"loss": 0.3235, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2841845750808716, |
|
"rewards/margins": 2.042858600616455, |
|
"rewards/margins_max": 3.4500343799591064, |
|
"rewards/margins_min": 0.6356827020645142, |
|
"rewards/margins_std": 1.9900470972061157, |
|
"rewards/rejected": -3.327043056488037, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 1.9669567982169428e-07, |
|
"logits/chosen": -3.0793793201446533, |
|
"logits/rejected": -2.839048147201538, |
|
"logps/chosen": -321.1004333496094, |
|
"logps/rejected": -508.93914794921875, |
|
"loss": 0.3593, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.6104228496551514, |
|
"rewards/margins": 1.6521637439727783, |
|
"rewards/margins_max": 3.04083251953125, |
|
"rewards/margins_min": 0.26349449157714844, |
|
"rewards/margins_std": 1.9638748168945312, |
|
"rewards/rejected": -3.2625865936279297, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 7.5, |
|
"learning_rate": 1.9344080482175835e-07, |
|
"logits/chosen": -2.9939608573913574, |
|
"logits/rejected": -2.805124521255493, |
|
"logps/chosen": -302.0801696777344, |
|
"logps/rejected": -551.068359375, |
|
"loss": 0.2995, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.3268810510635376, |
|
"rewards/margins": 2.172919988632202, |
|
"rewards/margins_max": 3.472337245941162, |
|
"rewards/margins_min": 0.8735028505325317, |
|
"rewards/margins_std": 1.8376535177230835, |
|
"rewards/rejected": -3.4998011589050293, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 6.5, |
|
"learning_rate": 1.9019600339633797e-07, |
|
"logits/chosen": -3.0695903301239014, |
|
"logits/rejected": -2.8530142307281494, |
|
"logps/chosen": -364.30255126953125, |
|
"logps/rejected": -550.4779052734375, |
|
"loss": 0.329, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.4837919473648071, |
|
"rewards/margins": 1.7356408834457397, |
|
"rewards/margins_max": 2.772507905960083, |
|
"rewards/margins_min": 0.6987739205360413, |
|
"rewards/margins_std": 1.4663512706756592, |
|
"rewards/rejected": -3.2194323539733887, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 1.8696185346647386e-07, |
|
"logits/chosen": -3.0316128730773926, |
|
"logits/rejected": -2.7795474529266357, |
|
"logps/chosen": -301.7175598144531, |
|
"logps/rejected": -547.9525146484375, |
|
"loss": 0.3055, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.2730237245559692, |
|
"rewards/margins": 2.1233882904052734, |
|
"rewards/margins_max": 3.315976619720459, |
|
"rewards/margins_min": 0.9308001399040222, |
|
"rewards/margins_std": 1.6865743398666382, |
|
"rewards/rejected": -3.3964123725891113, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 14.375, |
|
"learning_rate": 1.8373893105610356e-07, |
|
"logits/chosen": -3.0410008430480957, |
|
"logits/rejected": -2.821869373321533, |
|
"logps/chosen": -300.0240783691406, |
|
"logps/rejected": -579.7957153320312, |
|
"loss": 0.2773, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1710954904556274, |
|
"rewards/margins": 2.312669038772583, |
|
"rewards/margins_max": 3.4646263122558594, |
|
"rewards/margins_min": 1.1607115268707275, |
|
"rewards/margins_std": 1.629113793373108, |
|
"rewards/rejected": -3.4837639331817627, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 4.96875, |
|
"learning_rate": 1.8052781018946776e-07, |
|
"logits/chosen": -3.058983325958252, |
|
"logits/rejected": -2.825737476348877, |
|
"logps/chosen": -318.34185791015625, |
|
"logps/rejected": -551.1431274414062, |
|
"loss": 0.3039, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.0352818965911865, |
|
"rewards/margins": 1.9767980575561523, |
|
"rewards/margins_max": 3.301335573196411, |
|
"rewards/margins_min": 0.6522601842880249, |
|
"rewards/margins_std": 1.8731791973114014, |
|
"rewards/rejected": -3.012079954147339, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 4.5, |
|
"learning_rate": 1.7732906278887222e-07, |
|
"logits/chosen": -2.973245620727539, |
|
"logits/rejected": -2.6782615184783936, |
|
"logps/chosen": -339.115234375, |
|
"logps/rejected": -555.8124389648438, |
|
"loss": 0.303, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1952590942382812, |
|
"rewards/margins": 1.9027913808822632, |
|
"rewards/margins_max": 2.9387829303741455, |
|
"rewards/margins_min": 0.8667998313903809, |
|
"rewards/margins_std": 1.4651134014129639, |
|
"rewards/rejected": -3.098050594329834, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 1.7414325857282526e-07, |
|
"logits/chosen": -3.071760892868042, |
|
"logits/rejected": -2.8689608573913574, |
|
"logps/chosen": -285.7099304199219, |
|
"logps/rejected": -561.7498779296875, |
|
"loss": 0.3187, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2145211696624756, |
|
"rewards/margins": 1.9574220180511475, |
|
"rewards/margins_max": 2.9473042488098145, |
|
"rewards/margins_min": 0.9675399661064148, |
|
"rewards/margins_std": 1.399904489517212, |
|
"rewards/rejected": -3.171943426132202, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 1.709709649545662e-07, |
|
"logits/chosen": -3.034092426300049, |
|
"logits/rejected": -2.7237911224365234, |
|
"logps/chosen": -315.25665283203125, |
|
"logps/rejected": -538.0850830078125, |
|
"loss": 0.2661, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.0960243940353394, |
|
"rewards/margins": 2.064706325531006, |
|
"rewards/margins_max": 3.1389946937561035, |
|
"rewards/margins_min": 0.9904179573059082, |
|
"rewards/margins_std": 1.519273281097412, |
|
"rewards/rejected": -3.1607306003570557, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 1.6781274694100599e-07, |
|
"logits/chosen": -3.057701587677002, |
|
"logits/rejected": -2.8561203479766846, |
|
"logps/chosen": -312.094970703125, |
|
"logps/rejected": -571.929443359375, |
|
"loss": 0.2961, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.2997673749923706, |
|
"rewards/margins": 2.410301685333252, |
|
"rewards/margins_max": 3.7557570934295654, |
|
"rewards/margins_min": 1.064846396446228, |
|
"rewards/margins_std": 1.9027611017227173, |
|
"rewards/rejected": -3.7100696563720703, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 1.6466916703209532e-07, |
|
"logits/chosen": -3.1039514541625977, |
|
"logits/rejected": -2.8413565158843994, |
|
"logps/chosen": -323.77264404296875, |
|
"logps/rejected": -546.0987548828125, |
|
"loss": 0.2862, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.2360670566558838, |
|
"rewards/margins": 1.9988797903060913, |
|
"rewards/margins_max": 3.3218770027160645, |
|
"rewards/margins_min": 0.6758825182914734, |
|
"rewards/margins_std": 1.87100088596344, |
|
"rewards/rejected": -3.2349467277526855, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 1.6154078512063948e-07, |
|
"logits/chosen": -3.0772576332092285, |
|
"logits/rejected": -2.8048458099365234, |
|
"logps/chosen": -354.092529296875, |
|
"logps/rejected": -560.3134765625, |
|
"loss": 0.3128, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.3545913696289062, |
|
"rewards/margins": 1.8672946691513062, |
|
"rewards/margins_max": 2.8722145557403564, |
|
"rewards/margins_min": 0.8623749613761902, |
|
"rewards/margins_std": 1.4211711883544922, |
|
"rewards/rejected": -3.221886157989502, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 1.5842815839257787e-07, |
|
"logits/chosen": -3.0298233032226562, |
|
"logits/rejected": -2.7669081687927246, |
|
"logps/chosen": -308.47601318359375, |
|
"logps/rejected": -582.238525390625, |
|
"loss": 0.2733, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.0452951192855835, |
|
"rewards/margins": 2.085793972015381, |
|
"rewards/margins_max": 3.37977933883667, |
|
"rewards/margins_min": 0.7918087244033813, |
|
"rewards/margins_std": 1.8299715518951416, |
|
"rewards/rejected": -3.131088972091675, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 3.90625, |
|
"learning_rate": 1.553318412277455e-07, |
|
"logits/chosen": -2.9976773262023926, |
|
"logits/rejected": -2.753671884536743, |
|
"logps/chosen": -310.14483642578125, |
|
"logps/rejected": -524.3933715820312, |
|
"loss": 0.3032, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.0656635761260986, |
|
"rewards/margins": 1.9153623580932617, |
|
"rewards/margins_max": 2.951770305633545, |
|
"rewards/margins_min": 0.8789544105529785, |
|
"rewards/margins_std": 1.4657022953033447, |
|
"rewards/rejected": -2.9810261726379395, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 25.0, |
|
"learning_rate": 1.5225238510113375e-07, |
|
"logits/chosen": -3.018756628036499, |
|
"logits/rejected": -2.8047399520874023, |
|
"logps/chosen": -296.329345703125, |
|
"logps/rejected": -502.345703125, |
|
"loss": 0.329, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.2520067691802979, |
|
"rewards/margins": 1.5649439096450806, |
|
"rewards/margins_max": 2.421596050262451, |
|
"rewards/margins_min": 0.7082915306091309, |
|
"rewards/margins_std": 1.211489200592041, |
|
"rewards/rejected": -2.816950559616089, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 4.875, |
|
"learning_rate": 1.4919033848466962e-07, |
|
"logits/chosen": -3.09132981300354, |
|
"logits/rejected": -2.871804714202881, |
|
"logps/chosen": -319.3221130371094, |
|
"logps/rejected": -574.5970458984375, |
|
"loss": 0.2568, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.2768352031707764, |
|
"rewards/margins": 2.041638135910034, |
|
"rewards/margins_max": 2.9990642070770264, |
|
"rewards/margins_min": 1.0842119455337524, |
|
"rewards/margins_std": 1.3540050983428955, |
|
"rewards/rejected": -3.3184731006622314, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 4.9375, |
|
"learning_rate": 1.461462467495284e-07, |
|
"logits/chosen": -3.0827224254608154, |
|
"logits/rejected": -2.813368082046509, |
|
"logps/chosen": -358.135009765625, |
|
"logps/rejected": -571.1021728515625, |
|
"loss": 0.3097, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.214004635810852, |
|
"rewards/margins": 1.886165976524353, |
|
"rewards/margins_max": 2.9341206550598145, |
|
"rewards/margins_min": 0.8382118344306946, |
|
"rewards/margins_std": 1.4820313453674316, |
|
"rewards/rejected": -3.100170612335205, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 1.4312065206900021e-07, |
|
"logits/chosen": -3.048474073410034, |
|
"logits/rejected": -2.8194785118103027, |
|
"logps/chosen": -378.2091369628906, |
|
"logps/rejected": -616.70703125, |
|
"loss": 0.3034, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.3116062879562378, |
|
"rewards/margins": 2.1770853996276855, |
|
"rewards/margins_max": 3.711287021636963, |
|
"rewards/margins_min": 0.6428841948509216, |
|
"rewards/margins_std": 2.1696884632110596, |
|
"rewards/rejected": -3.488692045211792, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 6.0, |
|
"learning_rate": 1.401140933219247e-07, |
|
"logits/chosen": -3.1598076820373535, |
|
"logits/rejected": -2.8843746185302734, |
|
"logps/chosen": -340.0314636230469, |
|
"logps/rejected": -537.6961669921875, |
|
"loss": 0.3382, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.213836908340454, |
|
"rewards/margins": 1.8678576946258545, |
|
"rewards/margins_max": 2.903883934020996, |
|
"rewards/margins_min": 0.831831157207489, |
|
"rewards/margins_std": 1.4651625156402588, |
|
"rewards/rejected": -3.0816946029663086, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 4.09375, |
|
"learning_rate": 1.37127105996713e-07, |
|
"logits/chosen": -3.06451153755188, |
|
"logits/rejected": -2.7919728755950928, |
|
"logps/chosen": -302.01983642578125, |
|
"logps/rejected": -489.68426513671875, |
|
"loss": 0.3351, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.1221381425857544, |
|
"rewards/margins": 1.8237155675888062, |
|
"rewards/margins_max": 2.9347827434539795, |
|
"rewards/margins_min": 0.7126487493515015, |
|
"rewards/margins_std": 1.5712860822677612, |
|
"rewards/rejected": -2.9458537101745605, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 1.3416022209597428e-07, |
|
"logits/chosen": -3.0701396465301514, |
|
"logits/rejected": -2.8056159019470215, |
|
"logps/chosen": -283.6884765625, |
|
"logps/rejected": -581.525146484375, |
|
"loss": 0.2636, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.2768995761871338, |
|
"rewards/margins": 2.5604019165039062, |
|
"rewards/margins_max": 3.7317397594451904, |
|
"rewards/margins_min": 1.389063835144043, |
|
"rewards/margins_std": 1.6565221548080444, |
|
"rewards/rejected": -3.837301254272461, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 1.3121397004176192e-07, |
|
"logits/chosen": -3.0139946937561035, |
|
"logits/rejected": -2.8204457759857178, |
|
"logps/chosen": -328.19464111328125, |
|
"logps/rejected": -496.7694396972656, |
|
"loss": 0.4052, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.3268804550170898, |
|
"rewards/margins": 1.4515395164489746, |
|
"rewards/margins_max": 2.597702741622925, |
|
"rewards/margins_min": 0.3053762912750244, |
|
"rewards/margins_std": 1.6209194660186768, |
|
"rewards/rejected": -2.7784199714660645, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 1.2828887458145803e-07, |
|
"logits/chosen": -3.0096335411071777, |
|
"logits/rejected": -2.7300803661346436, |
|
"logps/chosen": -338.5462646484375, |
|
"logps/rejected": -565.7578125, |
|
"loss": 0.3193, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.3147917985916138, |
|
"rewards/margins": 1.8738778829574585, |
|
"rewards/margins_max": 2.926962375640869, |
|
"rewards/margins_min": 0.8207935094833374, |
|
"rewards/margins_std": 1.489286184310913, |
|
"rewards/rejected": -3.1886696815490723, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 1.2538545669431277e-07, |
|
"logits/chosen": -2.9853451251983643, |
|
"logits/rejected": -2.771872043609619, |
|
"logps/chosen": -289.3203125, |
|
"logps/rejected": -526.6580810546875, |
|
"loss": 0.2986, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.1213651895523071, |
|
"rewards/margins": 1.9687013626098633, |
|
"rewards/margins_max": 3.2991204261779785, |
|
"rewards/margins_min": 0.638282299041748, |
|
"rewards/margins_std": 1.8814964294433594, |
|
"rewards/rejected": -3.090066432952881, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.859375, |
|
"learning_rate": 1.2250423349865385e-07, |
|
"logits/chosen": -3.014766216278076, |
|
"logits/rejected": -2.7629036903381348, |
|
"logps/chosen": -315.1452941894531, |
|
"logps/rejected": -545.957275390625, |
|
"loss": 0.326, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.2841541767120361, |
|
"rewards/margins": 2.023402214050293, |
|
"rewards/margins_max": 3.26432466506958, |
|
"rewards/margins_min": 0.7824802398681641, |
|
"rewards/margins_std": 1.7549289464950562, |
|
"rewards/rejected": -3.307556629180908, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 1.1964571815978466e-07, |
|
"logits/chosen": -3.092644214630127, |
|
"logits/rejected": -2.8553617000579834, |
|
"logps/chosen": -312.3218688964844, |
|
"logps/rejected": -571.085693359375, |
|
"loss": 0.2972, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.4290850162506104, |
|
"rewards/margins": 2.0302395820617676, |
|
"rewards/margins_max": 3.186135768890381, |
|
"rewards/margins_min": 0.874343752861023, |
|
"rewards/margins_std": 1.63468337059021, |
|
"rewards/rejected": -3.459324598312378, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 3.625, |
|
"learning_rate": 1.1681041979858625e-07, |
|
"logits/chosen": -3.033277988433838, |
|
"logits/rejected": -2.7657084465026855, |
|
"logps/chosen": -306.395751953125, |
|
"logps/rejected": -543.9789428710938, |
|
"loss": 0.3153, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.2107644081115723, |
|
"rewards/margins": 2.087048292160034, |
|
"rewards/margins_max": 3.434495210647583, |
|
"rewards/margins_min": 0.7396020889282227, |
|
"rewards/margins_std": 1.9055770635604858, |
|
"rewards/rejected": -3.2978129386901855, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 1.1399884340083885e-07, |
|
"logits/chosen": -3.0568225383758545, |
|
"logits/rejected": -2.797673225402832, |
|
"logps/chosen": -353.53240966796875, |
|
"logps/rejected": -581.5899047851562, |
|
"loss": 0.3496, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.299001932144165, |
|
"rewards/margins": 1.858944296836853, |
|
"rewards/margins_max": 3.037067413330078, |
|
"rewards/margins_min": 0.6808212399482727, |
|
"rewards/margins_std": 1.6661179065704346, |
|
"rewards/rejected": -3.1579461097717285, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 1.1121148972728103e-07, |
|
"logits/chosen": -2.9481570720672607, |
|
"logits/rejected": -2.7904300689697266, |
|
"logps/chosen": -265.8861389160156, |
|
"logps/rejected": -570.0164794921875, |
|
"loss": 0.2655, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1066445112228394, |
|
"rewards/margins": 2.189269781112671, |
|
"rewards/margins_max": 3.4255566596984863, |
|
"rewards/margins_min": 0.9529832005500793, |
|
"rewards/margins_std": 1.74837327003479, |
|
"rewards/rejected": -3.2959141731262207, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 1.0844885522442074e-07, |
|
"logits/chosen": -3.0224673748016357, |
|
"logits/rejected": -2.7885611057281494, |
|
"logps/chosen": -345.76611328125, |
|
"logps/rejected": -586.8031005859375, |
|
"loss": 0.3028, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.2756171226501465, |
|
"rewards/margins": 2.0080113410949707, |
|
"rewards/margins_max": 3.22710919380188, |
|
"rewards/margins_min": 0.7889140844345093, |
|
"rewards/margins_std": 1.7240642309188843, |
|
"rewards/rejected": -3.2836289405822754, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 5.25, |
|
"learning_rate": 1.0571143193611442e-07, |
|
"logits/chosen": -3.0753421783447266, |
|
"logits/rejected": -2.75911283493042, |
|
"logps/chosen": -365.8919372558594, |
|
"logps/rejected": -536.22119140625, |
|
"loss": 0.3328, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.240403175354004, |
|
"rewards/margins": 1.6046421527862549, |
|
"rewards/margins_max": 2.751216173171997, |
|
"rewards/margins_min": 0.4580683708190918, |
|
"rewards/margins_std": 1.6215002536773682, |
|
"rewards/rejected": -2.845045566558838, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 8.625, |
|
"learning_rate": 1.0299970741593139e-07, |
|
"logits/chosen": -3.0553393363952637, |
|
"logits/rejected": -2.8270599842071533, |
|
"logps/chosen": -332.8170471191406, |
|
"logps/rejected": -568.9782104492188, |
|
"loss": 0.3059, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.547603964805603, |
|
"rewards/margins": 2.207674741744995, |
|
"rewards/margins_max": 3.555354356765747, |
|
"rewards/margins_min": 0.8599950075149536, |
|
"rewards/margins_std": 1.9059069156646729, |
|
"rewards/rejected": -3.7552788257598877, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.265625, |
|
"learning_rate": 1.0031416464031654e-07, |
|
"logits/chosen": -3.004044532775879, |
|
"logits/rejected": -2.7544479370117188, |
|
"logps/chosen": -296.4781494140625, |
|
"logps/rejected": -561.3325805664062, |
|
"loss": 0.2683, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.03928542137146, |
|
"rewards/margins": 2.1187376976013184, |
|
"rewards/margins_max": 3.1847622394561768, |
|
"rewards/margins_min": 1.052713394165039, |
|
"rewards/margins_std": 1.507586121559143, |
|
"rewards/rejected": -3.1580233573913574, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 9.765528192256928e-08, |
|
"logits/chosen": -3.0507009029388428, |
|
"logits/rejected": -2.7500503063201904, |
|
"logps/chosen": -355.6528625488281, |
|
"logps/rejected": -617.4149169921875, |
|
"loss": 0.2526, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.4352195262908936, |
|
"rewards/margins": 2.4360108375549316, |
|
"rewards/margins_max": 3.6944077014923096, |
|
"rewards/margins_min": 1.1776138544082642, |
|
"rewards/margins_std": 1.779642105102539, |
|
"rewards/rejected": -3.871230363845825, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 31.5, |
|
"learning_rate": 9.502353282765305e-08, |
|
"logits/chosen": -3.0021920204162598, |
|
"logits/rejected": -2.794358491897583, |
|
"logps/chosen": -335.790283203125, |
|
"logps/rejected": -586.9259643554688, |
|
"loss": 0.3317, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3478296995162964, |
|
"rewards/margins": 2.267702341079712, |
|
"rewards/margins_max": 3.787633180618286, |
|
"rewards/margins_min": 0.7477713823318481, |
|
"rewards/margins_std": 2.1495070457458496, |
|
"rewards/rejected": -3.6155319213867188, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 9.241938608784952e-08, |
|
"logits/chosen": -3.0923657417297363, |
|
"logits/rejected": -2.8275530338287354, |
|
"logps/chosen": -343.7580871582031, |
|
"logps/rejected": -642.3468627929688, |
|
"loss": 0.2858, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.290956735610962, |
|
"rewards/margins": 2.2989776134490967, |
|
"rewards/margins_max": 3.6432526111602783, |
|
"rewards/margins_min": 0.9547020196914673, |
|
"rewards/margins_std": 1.901092529296875, |
|
"rewards/rejected": -3.5899341106414795, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 8.984330551927474e-08, |
|
"logits/chosen": -3.041471242904663, |
|
"logits/rejected": -2.7196826934814453, |
|
"logps/chosen": -335.8681335449219, |
|
"logps/rejected": -525.2930908203125, |
|
"loss": 0.296, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.2617841958999634, |
|
"rewards/margins": 1.64345383644104, |
|
"rewards/margins_max": 2.7398273944854736, |
|
"rewards/margins_min": 0.5470799207687378, |
|
"rewards/margins_std": 1.550506591796875, |
|
"rewards/rejected": -2.905237913131714, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 8.729574993927027e-08, |
|
"logits/chosen": -3.0498995780944824, |
|
"logits/rejected": -2.7774124145507812, |
|
"logps/chosen": -298.8482666015625, |
|
"logps/rejected": -548.7689819335938, |
|
"loss": 0.2758, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.2929372787475586, |
|
"rewards/margins": 2.247105121612549, |
|
"rewards/margins_max": 3.341545581817627, |
|
"rewards/margins_min": 1.1526648998260498, |
|
"rewards/margins_std": 1.5477720499038696, |
|
"rewards/rejected": -3.5400421619415283, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 4.5, |
|
"learning_rate": 8.47771730846844e-08, |
|
"logits/chosen": -3.0463712215423584, |
|
"logits/rejected": -2.820709705352783, |
|
"logps/chosen": -355.2076416015625, |
|
"logps/rejected": -596.3829345703125, |
|
"loss": 0.3158, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.3873268365859985, |
|
"rewards/margins": 2.0964386463165283, |
|
"rewards/margins_max": 3.318743944168091, |
|
"rewards/margins_min": 0.8741332292556763, |
|
"rewards/margins_std": 1.728601098060608, |
|
"rewards/rejected": -3.4837653636932373, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 8.228802353105879e-08, |
|
"logits/chosen": -3.0121498107910156, |
|
"logits/rejected": -2.75282621383667, |
|
"logps/chosen": -333.64752197265625, |
|
"logps/rejected": -581.3692626953125, |
|
"loss": 0.2918, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.227485179901123, |
|
"rewards/margins": 2.29580020904541, |
|
"rewards/margins_max": 3.5629870891571045, |
|
"rewards/margins_min": 1.0286139249801636, |
|
"rewards/margins_std": 1.7920722961425781, |
|
"rewards/rejected": -3.5232856273651123, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 3.625, |
|
"learning_rate": 7.982874461273436e-08, |
|
"logits/chosen": -3.079096794128418, |
|
"logits/rejected": -2.8346662521362305, |
|
"logps/chosen": -356.8946838378906, |
|
"logps/rejected": -570.3480224609375, |
|
"loss": 0.3284, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.3698757886886597, |
|
"rewards/margins": 1.7264187335968018, |
|
"rewards/margins_max": 2.5157878398895264, |
|
"rewards/margins_min": 0.9370495080947876, |
|
"rewards/margins_std": 1.1163365840911865, |
|
"rewards/rejected": -3.096294403076172, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 7.739977434388989e-08, |
|
"logits/chosen": -3.095477819442749, |
|
"logits/rejected": -2.766036033630371, |
|
"logps/chosen": -303.59136962890625, |
|
"logps/rejected": -491.6221618652344, |
|
"loss": 0.3677, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.1862236261367798, |
|
"rewards/margins": 1.5905078649520874, |
|
"rewards/margins_max": 2.6899428367614746, |
|
"rewards/margins_min": 0.4910725951194763, |
|
"rewards/margins_std": 1.5548362731933594, |
|
"rewards/rejected": -2.7767317295074463, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 7.500154534052932e-08, |
|
"logits/chosen": -3.0353145599365234, |
|
"logits/rejected": -2.828632116317749, |
|
"logps/chosen": -309.5120849609375, |
|
"logps/rejected": -628.2648315429688, |
|
"loss": 0.2883, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.3138376474380493, |
|
"rewards/margins": 2.359034299850464, |
|
"rewards/margins_max": 3.667515516281128, |
|
"rewards/margins_min": 1.0505527257919312, |
|
"rewards/margins_std": 1.850472092628479, |
|
"rewards/rejected": -3.6728718280792236, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 7.263448474342967e-08, |
|
"logits/chosen": -3.044409990310669, |
|
"logits/rejected": -2.806182384490967, |
|
"logps/chosen": -288.3754577636719, |
|
"logps/rejected": -628.6566162109375, |
|
"loss": 0.2556, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.1923936605453491, |
|
"rewards/margins": 2.8368632793426514, |
|
"rewards/margins_max": 4.438173770904541, |
|
"rewards/margins_min": 1.2355536222457886, |
|
"rewards/margins_std": 2.264594316482544, |
|
"rewards/rejected": -4.029257297515869, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 3.875, |
|
"learning_rate": 7.02990141420641e-08, |
|
"logits/chosen": -3.081254720687866, |
|
"logits/rejected": -2.7363178730010986, |
|
"logps/chosen": -341.04254150390625, |
|
"logps/rejected": -610.2572021484375, |
|
"loss": 0.2851, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.430784821510315, |
|
"rewards/margins": 2.504645824432373, |
|
"rewards/margins_max": 3.509565830230713, |
|
"rewards/margins_min": 1.4997262954711914, |
|
"rewards/margins_std": 1.4211710691452026, |
|
"rewards/rejected": -3.9354305267333984, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.8125, |
|
"learning_rate": 6.799554949951459e-08, |
|
"logits/chosen": -3.0202066898345947, |
|
"logits/rejected": -2.839040994644165, |
|
"logps/chosen": -313.3542175292969, |
|
"logps/rejected": -556.8174438476562, |
|
"loss": 0.3178, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.3130320310592651, |
|
"rewards/margins": 2.1237895488739014, |
|
"rewards/margins_max": 3.231538772583008, |
|
"rewards/margins_min": 1.0160400867462158, |
|
"rewards/margins_std": 1.566594123840332, |
|
"rewards/rejected": -3.436821699142456, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 6.57245010783855e-08, |
|
"logits/chosen": -3.0388119220733643, |
|
"logits/rejected": -2.771134376525879, |
|
"logps/chosen": -348.8595275878906, |
|
"logps/rejected": -569.1920776367188, |
|
"loss": 0.2867, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.1526178121566772, |
|
"rewards/margins": 2.0269393920898438, |
|
"rewards/margins_max": 3.0691983699798584, |
|
"rewards/margins_min": 0.98468017578125, |
|
"rewards/margins_std": 1.4739770889282227, |
|
"rewards/rejected": -3.1795573234558105, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 4.65625, |
|
"learning_rate": 6.348627336773337e-08, |
|
"logits/chosen": -3.071326732635498, |
|
"logits/rejected": -2.8534488677978516, |
|
"logps/chosen": -312.7285461425781, |
|
"logps/rejected": -518.8443603515625, |
|
"loss": 0.3679, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.1652886867523193, |
|
"rewards/margins": 1.7322543859481812, |
|
"rewards/margins_max": 2.9441657066345215, |
|
"rewards/margins_min": 0.5203433036804199, |
|
"rewards/margins_std": 1.7139012813568115, |
|
"rewards/rejected": -2.89754319190979, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 3.1875, |
|
"learning_rate": 6.12812650110248e-08, |
|
"logits/chosen": -2.9665591716766357, |
|
"logits/rejected": -2.7093448638916016, |
|
"logps/chosen": -285.0913391113281, |
|
"logps/rejected": -539.43603515625, |
|
"loss": 0.3377, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.1528418064117432, |
|
"rewards/margins": 2.0406405925750732, |
|
"rewards/margins_max": 3.2592735290527344, |
|
"rewards/margins_min": 0.8220078349113464, |
|
"rewards/margins_std": 1.7234073877334595, |
|
"rewards/rejected": -3.1934823989868164, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 5.910986873513485e-08, |
|
"logits/chosen": -3.0143747329711914, |
|
"logits/rejected": -2.7879300117492676, |
|
"logps/chosen": -306.3103942871094, |
|
"logps/rejected": -582.0455932617188, |
|
"loss": 0.2541, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.3029712438583374, |
|
"rewards/margins": 2.4551165103912354, |
|
"rewards/margins_max": 3.8277435302734375, |
|
"rewards/margins_min": 1.0824897289276123, |
|
"rewards/margins_std": 1.9411876201629639, |
|
"rewards/rejected": -3.758087635040283, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 5.697247128040036e-08, |
|
"logits/chosen": -3.0154166221618652, |
|
"logits/rejected": -2.8093299865722656, |
|
"logps/chosen": -337.4362487792969, |
|
"logps/rejected": -536.3162841796875, |
|
"loss": 0.3107, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.3548002243041992, |
|
"rewards/margins": 1.746351957321167, |
|
"rewards/margins_max": 2.8274214267730713, |
|
"rewards/margins_min": 0.6652824878692627, |
|
"rewards/margins_std": 1.5288629531860352, |
|
"rewards/rejected": -3.101152181625366, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 7.0625, |
|
"learning_rate": 5.486945333173851e-08, |
|
"logits/chosen": -3.050468683242798, |
|
"logits/rejected": -2.7848165035247803, |
|
"logps/chosen": -342.97528076171875, |
|
"logps/rejected": -538.8195190429688, |
|
"loss": 0.3164, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2223647832870483, |
|
"rewards/margins": 1.7037765979766846, |
|
"rewards/margins_max": 2.781301975250244, |
|
"rewards/margins_min": 0.6262511014938354, |
|
"rewards/margins_std": 1.523850917816162, |
|
"rewards/rejected": -2.9261412620544434, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 5.280118945084422e-08, |
|
"logits/chosen": -2.931457757949829, |
|
"logits/rejected": -2.6956043243408203, |
|
"logps/chosen": -311.7135925292969, |
|
"logps/rejected": -522.5653076171875, |
|
"loss": 0.2989, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1440449953079224, |
|
"rewards/margins": 1.8880630731582642, |
|
"rewards/margins_max": 2.7849342823028564, |
|
"rewards/margins_min": 0.9911916851997375, |
|
"rewards/margins_std": 1.2683675289154053, |
|
"rewards/rejected": -3.0321078300476074, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 5.75, |
|
"learning_rate": 5.076804800947834e-08, |
|
"logits/chosen": -3.0498359203338623, |
|
"logits/rejected": -2.8847270011901855, |
|
"logps/chosen": -293.4126892089844, |
|
"logps/rejected": -525.2155151367188, |
|
"loss": 0.3278, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.3807709217071533, |
|
"rewards/margins": 1.8494899272918701, |
|
"rewards/margins_max": 2.753748655319214, |
|
"rewards/margins_min": 0.9452314376831055, |
|
"rewards/margins_std": 1.2788149118423462, |
|
"rewards/rejected": -3.2302603721618652, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.578125, |
|
"learning_rate": 4.877039112385814e-08, |
|
"logits/chosen": -3.111642360687256, |
|
"logits/rejected": -2.8808951377868652, |
|
"logps/chosen": -282.8914489746094, |
|
"logps/rejected": -561.9642333984375, |
|
"loss": 0.3362, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1109817028045654, |
|
"rewards/margins": 2.2935783863067627, |
|
"rewards/margins_max": 3.600482940673828, |
|
"rewards/margins_min": 0.9866735339164734, |
|
"rewards/margins_std": 1.8482424020767212, |
|
"rewards/rejected": -3.404560089111328, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 4.680857459016196e-08, |
|
"logits/chosen": -3.049572467803955, |
|
"logits/rejected": -2.7968525886535645, |
|
"logps/chosen": -312.0675354003906, |
|
"logps/rejected": -581.7535400390625, |
|
"loss": 0.2966, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1987165212631226, |
|
"rewards/margins": 2.0752511024475098, |
|
"rewards/margins_max": 3.3707432746887207, |
|
"rewards/margins_min": 0.7797588109970093, |
|
"rewards/margins_std": 1.8321025371551514, |
|
"rewards/rejected": -3.2739672660827637, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 4.4882947821159563e-08, |
|
"logits/chosen": -3.0233592987060547, |
|
"logits/rejected": -2.7910993099212646, |
|
"logps/chosen": -279.7276306152344, |
|
"logps/rejected": -569.35791015625, |
|
"loss": 0.3003, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1314738988876343, |
|
"rewards/margins": 2.3299572467803955, |
|
"rewards/margins_max": 3.502735137939453, |
|
"rewards/margins_min": 1.1571792364120483, |
|
"rewards/margins_std": 1.6585586071014404, |
|
"rewards/rejected": -3.4614310264587402, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 4.299385378397907e-08, |
|
"logits/chosen": -2.9819421768188477, |
|
"logits/rejected": -2.7031192779541016, |
|
"logps/chosen": -361.3184509277344, |
|
"logps/rejected": -575.5309448242188, |
|
"loss": 0.2952, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.4940073490142822, |
|
"rewards/margins": 2.0843570232391357, |
|
"rewards/margins_max": 3.1069493293762207, |
|
"rewards/margins_min": 1.0617649555206299, |
|
"rewards/margins_std": 1.4461634159088135, |
|
"rewards/rejected": -3.578364133834839, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 3.78125, |
|
"learning_rate": 4.114162893902259e-08, |
|
"logits/chosen": -2.9869210720062256, |
|
"logits/rejected": -2.736611843109131, |
|
"logps/chosen": -311.1821594238281, |
|
"logps/rejected": -537.497314453125, |
|
"loss": 0.3177, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.308483362197876, |
|
"rewards/margins": 1.9217637777328491, |
|
"rewards/margins_max": 3.026961326599121, |
|
"rewards/margins_min": 0.8165658712387085, |
|
"rewards/margins_std": 1.5629857778549194, |
|
"rewards/rejected": -3.2302470207214355, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 4.59375, |
|
"learning_rate": 3.9326603180040216e-08, |
|
"logits/chosen": -2.9793362617492676, |
|
"logits/rejected": -2.692769765853882, |
|
"logps/chosen": -323.01751708984375, |
|
"logps/rejected": -538.4032592773438, |
|
"loss": 0.3301, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.254874587059021, |
|
"rewards/margins": 1.8814289569854736, |
|
"rewards/margins_max": 2.7431275844573975, |
|
"rewards/margins_min": 1.0197299718856812, |
|
"rewards/margins_std": 1.2186262607574463, |
|
"rewards/rejected": -3.136303663253784, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 3.59375, |
|
"learning_rate": 3.754909977537357e-08, |
|
"logits/chosen": -3.031088352203369, |
|
"logits/rejected": -2.7731916904449463, |
|
"logps/chosen": -350.8569641113281, |
|
"logps/rejected": -553.83642578125, |
|
"loss": 0.33, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.4683470726013184, |
|
"rewards/margins": 1.888584852218628, |
|
"rewards/margins_max": 3.0695700645446777, |
|
"rewards/margins_min": 0.707599937915802, |
|
"rewards/margins_std": 1.6701648235321045, |
|
"rewards/rejected": -3.356931686401367, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 3.5809435310379556e-08, |
|
"logits/chosen": -3.0824389457702637, |
|
"logits/rejected": -2.8151016235351562, |
|
"logps/chosen": -291.5655212402344, |
|
"logps/rejected": -544.6790161132812, |
|
"loss": 0.306, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.1818848848342896, |
|
"rewards/margins": 2.224213123321533, |
|
"rewards/margins_max": 3.5027458667755127, |
|
"rewards/margins_min": 0.9456807971000671, |
|
"rewards/margins_std": 1.8081178665161133, |
|
"rewards/rejected": -3.4060981273651123, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 3.410791963104473e-08, |
|
"logits/chosen": -3.0624046325683594, |
|
"logits/rejected": -2.817383289337158, |
|
"logps/chosen": -316.928466796875, |
|
"logps/rejected": -554.0638427734375, |
|
"loss": 0.2644, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.2545480728149414, |
|
"rewards/margins": 2.1589207649230957, |
|
"rewards/margins_max": 3.1708950996398926, |
|
"rewards/margins_min": 1.1469463109970093, |
|
"rewards/margins_std": 1.431147813796997, |
|
"rewards/rejected": -3.413468837738037, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 3.625, |
|
"learning_rate": 3.2444855788799075e-08, |
|
"logits/chosen": -3.0546398162841797, |
|
"logits/rejected": -2.768601179122925, |
|
"logps/chosen": -311.37213134765625, |
|
"logps/rejected": -548.6424560546875, |
|
"loss": 0.2899, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.1787984371185303, |
|
"rewards/margins": 1.9840619564056396, |
|
"rewards/margins_max": 2.971385955810547, |
|
"rewards/margins_min": 0.9967382550239563, |
|
"rewards/margins_std": 1.3962868452072144, |
|
"rewards/rejected": -3.16286039352417, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 3.082053998654105e-08, |
|
"logits/chosen": -3.0698394775390625, |
|
"logits/rejected": -2.863145112991333, |
|
"logps/chosen": -305.7530212402344, |
|
"logps/rejected": -512.8255004882812, |
|
"loss": 0.3497, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2612425088882446, |
|
"rewards/margins": 1.7084944248199463, |
|
"rewards/margins_max": 2.653332233428955, |
|
"rewards/margins_min": 0.7636561393737793, |
|
"rewards/margins_std": 1.33620285987854, |
|
"rewards/rejected": -2.9697365760803223, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 18.0, |
|
"learning_rate": 2.9235261525881322e-08, |
|
"logits/chosen": -3.0396475791931152, |
|
"logits/rejected": -2.8047540187835693, |
|
"logps/chosen": -313.1606140136719, |
|
"logps/rejected": -493.97412109375, |
|
"loss": 0.369, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.431262731552124, |
|
"rewards/margins": 1.6049553155899048, |
|
"rewards/margins_max": 2.717376232147217, |
|
"rewards/margins_min": 0.49253416061401367, |
|
"rewards/margins_std": 1.5732009410858154, |
|
"rewards/rejected": -3.03621768951416, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 2.7689302755616732e-08, |
|
"logits/chosen": -3.084151268005371, |
|
"logits/rejected": -2.7894577980041504, |
|
"logps/chosen": -317.45355224609375, |
|
"logps/rejected": -485.2964782714844, |
|
"loss": 0.3091, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.1630436182022095, |
|
"rewards/margins": 1.6040722131729126, |
|
"rewards/margins_max": 2.4594357013702393, |
|
"rewards/margins_min": 0.7487087845802307, |
|
"rewards/margins_std": 1.20966637134552, |
|
"rewards/rejected": -2.767115831375122, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 15.9375, |
|
"learning_rate": 2.6182939021441584e-08, |
|
"logits/chosen": -3.061344861984253, |
|
"logits/rejected": -2.835780620574951, |
|
"logps/chosen": -323.6014404296875, |
|
"logps/rejected": -565.6847534179688, |
|
"loss": 0.3159, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.2828537225723267, |
|
"rewards/margins": 2.0452182292938232, |
|
"rewards/margins_max": 3.042397975921631, |
|
"rewards/margins_min": 1.0480389595031738, |
|
"rewards/margins_std": 1.4102246761322021, |
|
"rewards/rejected": -3.3280720710754395, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 2.4716438616906975e-08, |
|
"logits/chosen": -2.9988512992858887, |
|
"logits/rejected": -2.801060199737549, |
|
"logps/chosen": -342.7635498046875, |
|
"logps/rejected": -568.91552734375, |
|
"loss": 0.2964, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.5359113216400146, |
|
"rewards/margins": 1.9777557849884033, |
|
"rewards/margins_max": 3.3029723167419434, |
|
"rewards/margins_min": 0.6525388360023499, |
|
"rewards/margins_std": 1.8741397857666016, |
|
"rewards/rejected": -3.513666868209839, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 2.3290062735635914e-08, |
|
"logits/chosen": -3.061750888824463, |
|
"logits/rejected": -2.8491766452789307, |
|
"logps/chosen": -309.3940734863281, |
|
"logps/rejected": -563.3863525390625, |
|
"loss": 0.2996, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.4320241212844849, |
|
"rewards/margins": 2.2175357341766357, |
|
"rewards/margins_max": 3.681055784225464, |
|
"rewards/margins_min": 0.7540156841278076, |
|
"rewards/margins_std": 2.069730043411255, |
|
"rewards/rejected": -3.649559736251831, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 9.0, |
|
"learning_rate": 2.1904065424802997e-08, |
|
"logits/chosen": -3.0209171772003174, |
|
"logits/rejected": -2.817324638366699, |
|
"logps/chosen": -368.29156494140625, |
|
"logps/rejected": -561.48876953125, |
|
"loss": 0.3503, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.4189422130584717, |
|
"rewards/margins": 1.4640214443206787, |
|
"rewards/margins_max": 2.460878849029541, |
|
"rewards/margins_min": 0.4671642780303955, |
|
"rewards/margins_std": 1.40976881980896, |
|
"rewards/rejected": -2.8829636573791504, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.84375, |
|
"learning_rate": 2.0558693539886595e-08, |
|
"logits/chosen": -3.0322585105895996, |
|
"logits/rejected": -2.836747884750366, |
|
"logps/chosen": -325.99859619140625, |
|
"logps/rejected": -588.0489501953125, |
|
"loss": 0.292, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.230724573135376, |
|
"rewards/margins": 2.264963150024414, |
|
"rewards/margins_max": 3.455662965774536, |
|
"rewards/margins_min": 1.0742634534835815, |
|
"rewards/margins_std": 1.683903694152832, |
|
"rewards/rejected": -3.495687961578369, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 4.875, |
|
"learning_rate": 1.9254186700702667e-08, |
|
"logits/chosen": -3.0456230640411377, |
|
"logits/rejected": -2.819836139678955, |
|
"logps/chosen": -326.4022521972656, |
|
"logps/rejected": -572.4381713867188, |
|
"loss": 0.299, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.2172520160675049, |
|
"rewards/margins": 2.1527469158172607, |
|
"rewards/margins_max": 3.443035125732422, |
|
"rewards/margins_min": 0.8624590039253235, |
|
"rewards/margins_std": 1.8247426748275757, |
|
"rewards/rejected": -3.3699989318847656, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 4.90625, |
|
"learning_rate": 1.799077724872644e-08, |
|
"logits/chosen": -3.0328400135040283, |
|
"logits/rejected": -2.8203063011169434, |
|
"logps/chosen": -283.4230041503906, |
|
"logps/rejected": -519.71826171875, |
|
"loss": 0.3396, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2383689880371094, |
|
"rewards/margins": 1.9863897562026978, |
|
"rewards/margins_max": 3.1531052589416504, |
|
"rewards/margins_min": 0.8196744918823242, |
|
"rewards/margins_std": 1.64998459815979, |
|
"rewards/rejected": -3.2247588634490967, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 1.6768690205711173e-08, |
|
"logits/chosen": -3.133364200592041, |
|
"logits/rejected": -2.901482343673706, |
|
"logps/chosen": -367.28460693359375, |
|
"logps/rejected": -628.627197265625, |
|
"loss": 0.2691, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.2051244974136353, |
|
"rewards/margins": 1.9070392847061157, |
|
"rewards/margins_max": 3.0293052196502686, |
|
"rewards/margins_min": 0.784773051738739, |
|
"rewards/margins_std": 1.587123990058899, |
|
"rewards/rejected": -3.112163782119751, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 1.558814323361002e-08, |
|
"logits/chosen": -3.0593042373657227, |
|
"logits/rejected": -2.857165813446045, |
|
"logps/chosen": -307.4436950683594, |
|
"logps/rejected": -567.9011840820312, |
|
"loss": 0.2897, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.1894042491912842, |
|
"rewards/margins": 1.9394479990005493, |
|
"rewards/margins_max": 2.996490716934204, |
|
"rewards/margins_min": 0.8824055790901184, |
|
"rewards/margins_std": 1.4948837757110596, |
|
"rewards/rejected": -3.128852128982544, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 5.3125, |
|
"learning_rate": 1.4449346595809014e-08, |
|
"logits/chosen": -2.9993538856506348, |
|
"logits/rejected": -2.744136095046997, |
|
"logps/chosen": -317.97711181640625, |
|
"logps/rejected": -568.7042236328125, |
|
"loss": 0.2759, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.3569293022155762, |
|
"rewards/margins": 2.2924506664276123, |
|
"rewards/margins_max": 3.323498249053955, |
|
"rewards/margins_min": 1.2614028453826904, |
|
"rewards/margins_std": 1.4581215381622314, |
|
"rewards/rejected": -3.6493797302246094, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 1.3352503119677867e-08, |
|
"logits/chosen": -3.0968692302703857, |
|
"logits/rejected": -2.8480188846588135, |
|
"logps/chosen": -323.5044860839844, |
|
"logps/rejected": -632.51318359375, |
|
"loss": 0.2821, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2613385915756226, |
|
"rewards/margins": 2.256674289703369, |
|
"rewards/margins_max": 3.341503858566284, |
|
"rewards/margins_min": 1.1718448400497437, |
|
"rewards/margins_std": 1.5341806411743164, |
|
"rewards/rejected": -3.5180130004882812, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 1.2297808160444928e-08, |
|
"logits/chosen": -3.090576648712158, |
|
"logits/rejected": -2.854712963104248, |
|
"logps/chosen": -310.79571533203125, |
|
"logps/rejected": -578.5720825195312, |
|
"loss": 0.3438, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1912732124328613, |
|
"rewards/margins": 2.3021795749664307, |
|
"rewards/margins_max": 3.6481099128723145, |
|
"rewards/margins_min": 0.9562493562698364, |
|
"rewards/margins_std": 1.903433084487915, |
|
"rewards/rejected": -3.493452787399292, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 1.1285449566403094e-08, |
|
"logits/chosen": -3.020697832107544, |
|
"logits/rejected": -2.7931606769561768, |
|
"logps/chosen": -302.6512451171875, |
|
"logps/rejected": -517.8263549804688, |
|
"loss": 0.3479, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2877894639968872, |
|
"rewards/margins": 1.8629472255706787, |
|
"rewards/margins_max": 3.250032901763916, |
|
"rewards/margins_min": 0.47586172819137573, |
|
"rewards/margins_std": 1.9616352319717407, |
|
"rewards/rejected": -3.1507368087768555, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 5.78125, |
|
"learning_rate": 1.0315607645452834e-08, |
|
"logits/chosen": -3.067622661590576, |
|
"logits/rejected": -2.768596649169922, |
|
"logps/chosen": -316.2894592285156, |
|
"logps/rejected": -593.660888671875, |
|
"loss": 0.2706, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1791785955429077, |
|
"rewards/margins": 2.433260679244995, |
|
"rewards/margins_max": 3.671154022216797, |
|
"rewards/margins_min": 1.1953675746917725, |
|
"rewards/margins_std": 1.7506450414657593, |
|
"rewards/rejected": -3.6124393939971924, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 7.25, |
|
"learning_rate": 9.388455132988054e-09, |
|
"logits/chosen": -3.003413677215576, |
|
"logits/rejected": -2.767271041870117, |
|
"logps/chosen": -302.7046203613281, |
|
"logps/rejected": -537.4854736328125, |
|
"loss": 0.3177, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.296728253364563, |
|
"rewards/margins": 2.105752468109131, |
|
"rewards/margins_max": 3.5697181224823, |
|
"rewards/margins_min": 0.6417877078056335, |
|
"rewards/margins_std": 2.070359230041504, |
|
"rewards/rejected": -3.4024810791015625, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 13.9375, |
|
"learning_rate": 8.504157161130786e-09, |
|
"logits/chosen": -2.9792139530181885, |
|
"logits/rejected": -2.774827480316162, |
|
"logps/chosen": -265.3929748535156, |
|
"logps/rejected": -534.5281982421875, |
|
"loss": 0.3145, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.139758586883545, |
|
"rewards/margins": 2.1166577339172363, |
|
"rewards/margins_max": 3.568591594696045, |
|
"rewards/margins_min": 0.6647233366966248, |
|
"rewards/margins_std": 2.053344964981079, |
|
"rewards/rejected": -3.256415843963623, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 7.662871229320106e-09, |
|
"logits/chosen": -3.083374500274658, |
|
"logits/rejected": -2.891068935394287, |
|
"logps/chosen": -320.02984619140625, |
|
"logps/rejected": -579.4918823242188, |
|
"loss": 0.2939, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.4123804569244385, |
|
"rewards/margins": 2.1614882946014404, |
|
"rewards/margins_max": 3.478391647338867, |
|
"rewards/margins_min": 0.8445852994918823, |
|
"rewards/margins_std": 1.862382173538208, |
|
"rewards/rejected": -3.5738685131073, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 6.864747176260288e-09, |
|
"logits/chosen": -3.0258588790893555, |
|
"logits/rejected": -2.75772762298584, |
|
"logps/chosen": -327.40850830078125, |
|
"logps/rejected": -539.2699584960938, |
|
"loss": 0.3086, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.2346729040145874, |
|
"rewards/margins": 1.9701659679412842, |
|
"rewards/margins_max": 2.9854886531829834, |
|
"rewards/margins_min": 0.9548432230949402, |
|
"rewards/margins_std": 1.4358831644058228, |
|
"rewards/rejected": -3.204838991165161, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 6.10992715323369e-09, |
|
"logits/chosen": -3.0179896354675293, |
|
"logits/rejected": -2.7822508811950684, |
|
"logps/chosen": -303.41876220703125, |
|
"logps/rejected": -566.8040771484375, |
|
"loss": 0.289, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1414506435394287, |
|
"rewards/margins": 2.2253711223602295, |
|
"rewards/margins_max": 3.476818561553955, |
|
"rewards/margins_min": 0.9739240407943726, |
|
"rewards/margins_std": 1.7698135375976562, |
|
"rewards/rejected": -3.366821765899658, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 5.398545598782528e-09, |
|
"logits/chosen": -2.946030855178833, |
|
"logits/rejected": -2.7149243354797363, |
|
"logps/chosen": -321.11444091796875, |
|
"logps/rejected": -576.3018188476562, |
|
"loss": 0.3173, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2470299005508423, |
|
"rewards/margins": 2.265151023864746, |
|
"rewards/margins_max": 3.5846290588378906, |
|
"rewards/margins_min": 0.9456728100776672, |
|
"rewards/margins_std": 1.8660240173339844, |
|
"rewards/rejected": -3.512180805206299, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 7.0, |
|
"learning_rate": 4.730729214764417e-09, |
|
"logits/chosen": -3.089811325073242, |
|
"logits/rejected": -2.8148770332336426, |
|
"logps/chosen": -333.5905456542969, |
|
"logps/rejected": -606.7553100585938, |
|
"loss": 0.316, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.306650161743164, |
|
"rewards/margins": 2.504451274871826, |
|
"rewards/margins_max": 3.8686652183532715, |
|
"rewards/margins_min": 1.1402372121810913, |
|
"rewards/margins_std": 1.9292898178100586, |
|
"rewards/rejected": -3.8111014366149902, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.106596943786095e-09, |
|
"logits/chosen": -3.0792603492736816, |
|
"logits/rejected": -2.848381519317627, |
|
"logps/chosen": -339.4683837890625, |
|
"logps/rejected": -617.6273803710938, |
|
"loss": 0.301, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.349266529083252, |
|
"rewards/margins": 2.4606006145477295, |
|
"rewards/margins_max": 3.7330939769744873, |
|
"rewards/margins_min": 1.1881073713302612, |
|
"rewards/margins_std": 1.7995771169662476, |
|
"rewards/rejected": -3.8098673820495605, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 3.526259948018778e-09, |
|
"logits/chosen": -3.0362095832824707, |
|
"logits/rejected": -2.8663055896759033, |
|
"logps/chosen": -365.0773620605469, |
|
"logps/rejected": -618.965087890625, |
|
"loss": 0.2862, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.4282352924346924, |
|
"rewards/margins": 1.976050615310669, |
|
"rewards/margins_max": 3.0453240871429443, |
|
"rewards/margins_min": 0.9067766070365906, |
|
"rewards/margins_std": 1.5121815204620361, |
|
"rewards/rejected": -3.4042859077453613, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 2.989821589399505e-09, |
|
"logits/chosen": -3.0085415840148926, |
|
"logits/rejected": -2.8075146675109863, |
|
"logps/chosen": -322.4816589355469, |
|
"logps/rejected": -571.5615844726562, |
|
"loss": 0.2951, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.4327504634857178, |
|
"rewards/margins": 2.106593132019043, |
|
"rewards/margins_max": 3.44807767868042, |
|
"rewards/margins_min": 0.7651088833808899, |
|
"rewards/margins_std": 1.8971455097198486, |
|
"rewards/rejected": -3.539344072341919, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 2.4973774112216628e-09, |
|
"logits/chosen": -3.05271577835083, |
|
"logits/rejected": -2.8218674659729004, |
|
"logps/chosen": -337.2112121582031, |
|
"logps/rejected": -588.983642578125, |
|
"loss": 0.3041, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.3858591318130493, |
|
"rewards/margins": 1.8815782070159912, |
|
"rewards/margins_max": 3.0813260078430176, |
|
"rewards/margins_min": 0.6818308234214783, |
|
"rewards/margins_std": 1.6966991424560547, |
|
"rewards/rejected": -3.267437696456909, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 2.049015121118075e-09, |
|
"logits/chosen": -3.0388782024383545, |
|
"logits/rejected": -2.7133755683898926, |
|
"logps/chosen": -308.1634216308594, |
|
"logps/rejected": -517.82763671875, |
|
"loss": 0.3344, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2324225902557373, |
|
"rewards/margins": 1.8060786724090576, |
|
"rewards/margins_max": 2.9905166625976562, |
|
"rewards/margins_min": 0.6216403841972351, |
|
"rewards/margins_std": 1.675048589706421, |
|
"rewards/rejected": -3.038501262664795, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 1.6448145754396625e-09, |
|
"logits/chosen": -3.0789260864257812, |
|
"logits/rejected": -2.7612602710723877, |
|
"logps/chosen": -335.08795166015625, |
|
"logps/rejected": -567.6199340820312, |
|
"loss": 0.3478, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.3611897230148315, |
|
"rewards/margins": 1.9877793788909912, |
|
"rewards/margins_max": 3.0852162837982178, |
|
"rewards/margins_min": 0.8903425931930542, |
|
"rewards/margins_std": 1.5520099401474, |
|
"rewards/rejected": -3.3489692211151123, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 1.2848477650325984e-09, |
|
"logits/chosen": -3.0007452964782715, |
|
"logits/rejected": -2.734440326690674, |
|
"logps/chosen": -272.87933349609375, |
|
"logps/rejected": -529.8055419921875, |
|
"loss": 0.3078, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1611636877059937, |
|
"rewards/margins": 2.15301775932312, |
|
"rewards/margins_max": 3.293184757232666, |
|
"rewards/margins_min": 1.0128505229949951, |
|
"rewards/margins_std": 1.612439751625061, |
|
"rewards/rejected": -3.3141815662384033, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 10.125, |
|
"learning_rate": 9.691788024160374e-10, |
|
"logits/chosen": -3.0972485542297363, |
|
"logits/rejected": -2.8439688682556152, |
|
"logps/chosen": -263.6483154296875, |
|
"logps/rejected": -510.2359313964844, |
|
"loss": 0.287, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1465216875076294, |
|
"rewards/margins": 2.213108539581299, |
|
"rewards/margins_max": 3.255052089691162, |
|
"rewards/margins_min": 1.1711642742156982, |
|
"rewards/margins_std": 1.4735311269760132, |
|
"rewards/rejected": -3.3596298694610596, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 6.978639103634443e-10, |
|
"logits/chosen": -3.022329807281494, |
|
"logits/rejected": -2.8326973915100098, |
|
"logps/chosen": -286.43682861328125, |
|
"logps/rejected": -538.6163940429688, |
|
"loss": 0.2913, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.083613395690918, |
|
"rewards/margins": 2.006159543991089, |
|
"rewards/margins_max": 2.973505735397339, |
|
"rewards/margins_min": 1.0388134717941284, |
|
"rewards/margins_std": 1.3680341243743896, |
|
"rewards/rejected": -3.089773178100586, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 5.625, |
|
"learning_rate": 4.709514118888813e-10, |
|
"logits/chosen": -3.088514566421509, |
|
"logits/rejected": -2.8289153575897217, |
|
"logps/chosen": -352.9057922363281, |
|
"logps/rejected": -557.4052734375, |
|
"loss": 0.2997, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.546465516090393, |
|
"rewards/margins": 1.8814094066619873, |
|
"rewards/margins_max": 2.9587039947509766, |
|
"rewards/margins_min": 0.8041146993637085, |
|
"rewards/margins_std": 1.5235246419906616, |
|
"rewards/rejected": -3.427874803543091, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 2.884817216402546e-10, |
|
"logits/chosen": -3.038875102996826, |
|
"logits/rejected": -2.7465806007385254, |
|
"logps/chosen": -348.75567626953125, |
|
"logps/rejected": -502.0702209472656, |
|
"loss": 0.3639, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.3894237279891968, |
|
"rewards/margins": 1.6732631921768188, |
|
"rewards/margins_max": 2.5952847003936768, |
|
"rewards/margins_min": 0.7512421011924744, |
|
"rewards/margins_std": 1.303934931755066, |
|
"rewards/rejected": -3.0626869201660156, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 4.625, |
|
"learning_rate": 1.5048733870137719e-10, |
|
"logits/chosen": -3.0684123039245605, |
|
"logits/rejected": -2.8573265075683594, |
|
"logps/chosen": -363.2174377441406, |
|
"logps/rejected": -667.3125610351562, |
|
"loss": 0.3333, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.3807947635650635, |
|
"rewards/margins": 2.112975597381592, |
|
"rewards/margins_max": 3.1048035621643066, |
|
"rewards/margins_min": 1.121147632598877, |
|
"rewards/margins_std": 1.4026567935943604, |
|
"rewards/rejected": -3.493770122528076, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 17.375, |
|
"learning_rate": 5.699284080346034e-11, |
|
"logits/chosen": -3.046823263168335, |
|
"logits/rejected": -2.718289613723755, |
|
"logps/chosen": -332.06915283203125, |
|
"logps/rejected": -508.7821350097656, |
|
"loss": 0.3244, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.235876441001892, |
|
"rewards/margins": 1.7919772863388062, |
|
"rewards/margins_max": 2.906499147415161, |
|
"rewards/margins_min": 0.677455484867096, |
|
"rewards/margins_std": 1.5761719942092896, |
|
"rewards/rejected": -3.0278537273406982, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 8.014879947837449e-12, |
|
"logits/chosen": -3.0880868434906006, |
|
"logits/rejected": -2.723057985305786, |
|
"logps/chosen": -331.1163024902344, |
|
"logps/rejected": -531.2738037109375, |
|
"loss": 0.296, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.2712851762771606, |
|
"rewards/margins": 1.8950446844100952, |
|
"rewards/margins_max": 3.0397720336914062, |
|
"rewards/margins_min": 0.7503169775009155, |
|
"rewards/margins_std": 1.6188892126083374, |
|
"rewards/rejected": -3.1663296222686768, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.2173049449920654, |
|
"eval_logits/rejected": -2.112638235092163, |
|
"eval_logps/chosen": -363.9157409667969, |
|
"eval_logps/rejected": -358.1680908203125, |
|
"eval_loss": 0.7011914253234863, |
|
"eval_rewards/accuracies": 0.5506666898727417, |
|
"eval_rewards/chosen": -0.8454986810684204, |
|
"eval_rewards/margins": 0.07637320458889008, |
|
"eval_rewards/margins_max": 0.9500231146812439, |
|
"eval_rewards/margins_min": -0.7111339569091797, |
|
"eval_rewards/margins_std": 0.5453019738197327, |
|
"eval_rewards/rejected": -0.9218719005584717, |
|
"eval_runtime": 1325.5647, |
|
"eval_samples_per_second": 4.526, |
|
"eval_steps_per_second": 0.283, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2616, |
|
"total_flos": 0.0, |
|
"train_loss": 0.39473982852533324, |
|
"train_runtime": 26215.8871, |
|
"train_samples_per_second": 1.597, |
|
"train_steps_per_second": 0.1 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2616, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|