{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9987943737441393, "eval_steps": 400, "global_step": 466, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010716677829872739, "grad_norm": 8.743332175030668, "learning_rate": 1.0638297872340425e-07, "logits/chosen": -2.890564441680908, "logits/rejected": -2.88779878616333, "logps/chosen": -0.9741678237915039, "logps/rejected": -0.9829432368278503, "loss": 1.9541, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.9741678237915039, "rewards/margins": 0.008775472640991211, "rewards/rejected": -0.9829432368278503, "step": 5 }, { "epoch": 0.021433355659745478, "grad_norm": 10.116815688406033, "learning_rate": 2.127659574468085e-07, "logits/chosen": -2.906921863555908, "logits/rejected": -2.892861843109131, "logps/chosen": -0.9780665636062622, "logps/rejected": -1.0012353658676147, "loss": 1.9659, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.9780665636062622, "rewards/margins": 0.02316875383257866, "rewards/rejected": -1.0012353658676147, "step": 10 }, { "epoch": 0.032150033489618215, "grad_norm": 7.106110538153892, "learning_rate": 3.1914893617021275e-07, "logits/chosen": -2.8951611518859863, "logits/rejected": -2.917839288711548, "logps/chosen": -0.9592474102973938, "logps/rejected": -0.9844042658805847, "loss": 1.9681, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.9592474102973938, "rewards/margins": 0.025157025083899498, "rewards/rejected": -0.9844042658805847, "step": 15 }, { "epoch": 0.042866711319490956, "grad_norm": 7.768435236696218, "learning_rate": 4.25531914893617e-07, "logits/chosen": -2.9024550914764404, "logits/rejected": -2.9008612632751465, "logps/chosen": -0.9704982042312622, "logps/rejected": -0.9749953150749207, "loss": 1.9515, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.9704982042312622, "rewards/margins": 0.004497217480093241, "rewards/rejected": -0.9749953150749207, "step": 20 }, { "epoch": 0.0535833891493637, "grad_norm": 6.749888376224144, "learning_rate": 5.319148936170212e-07, "logits/chosen": -2.8302013874053955, "logits/rejected": -2.8350861072540283, "logps/chosen": -0.9493886828422546, "logps/rejected": -0.9983908534049988, "loss": 1.9589, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.9493886828422546, "rewards/margins": 0.04900217801332474, "rewards/rejected": -0.9983908534049988, "step": 25 }, { "epoch": 0.06430006697923643, "grad_norm": 8.485574549470025, "learning_rate": 6.382978723404255e-07, "logits/chosen": -2.8937394618988037, "logits/rejected": -2.9207406044006348, "logps/chosen": -0.9754034876823425, "logps/rejected": -0.9767441749572754, "loss": 1.9649, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.9754034876823425, "rewards/margins": 0.0013406850630417466, "rewards/rejected": -0.9767441749572754, "step": 30 }, { "epoch": 0.07501674480910918, "grad_norm": 7.870295088882763, "learning_rate": 7.446808510638297e-07, "logits/chosen": -2.9515388011932373, "logits/rejected": -2.9212729930877686, "logps/chosen": -0.943376362323761, "logps/rejected": -0.9886476397514343, "loss": 1.9685, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.943376362323761, "rewards/margins": 0.04527140408754349, "rewards/rejected": -0.9886476397514343, "step": 35 }, { "epoch": 0.08573342263898191, "grad_norm": 8.140974741892732, "learning_rate": 8.51063829787234e-07, "logits/chosen": -2.886676788330078, "logits/rejected": -2.884409189224243, "logps/chosen": -0.9962645769119263, "logps/rejected": -0.9912681579589844, "loss": 1.9721, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.9962645769119263, "rewards/margins": -0.004996386356651783, "rewards/rejected": -0.9912681579589844, "step": 40 }, { "epoch": 0.09645010046885466, "grad_norm": 7.451026538163931, "learning_rate": 9.574468085106384e-07, "logits/chosen": -2.845064878463745, "logits/rejected": -2.834542751312256, "logps/chosen": -0.9799020886421204, "logps/rejected": -1.0003235340118408, "loss": 1.9741, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.9799020886421204, "rewards/margins": 0.020421411842107773, "rewards/rejected": -1.0003235340118408, "step": 45 }, { "epoch": 0.1071667782987274, "grad_norm": 7.309795501443683, "learning_rate": 9.998735159083292e-07, "logits/chosen": -2.8403611183166504, "logits/rejected": -2.8530068397521973, "logps/chosen": -0.9515609741210938, "logps/rejected": -0.9347367286682129, "loss": 1.9306, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.9515609741210938, "rewards/margins": -0.01682429201900959, "rewards/rejected": -0.9347367286682129, "step": 50 }, { "epoch": 0.11788345612860013, "grad_norm": 8.763620887929697, "learning_rate": 9.99100789302024e-07, "logits/chosen": -2.861494541168213, "logits/rejected": -2.8731250762939453, "logps/chosen": -0.9942106008529663, "logps/rejected": -1.0270545482635498, "loss": 1.9564, "rewards/accuracies": 0.53125, "rewards/chosen": -0.9942106008529663, "rewards/margins": 0.032844070345163345, "rewards/rejected": -1.0270545482635498, "step": 55 }, { "epoch": 0.12860013395847286, "grad_norm": 10.67048824818839, "learning_rate": 9.976266896046142e-07, "logits/chosen": -2.745513439178467, "logits/rejected": -2.7677714824676514, "logps/chosen": -0.9298070669174194, "logps/rejected": -0.9584784507751465, "loss": 1.9575, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.9298070669174194, "rewards/margins": 0.028671354055404663, "rewards/rejected": -0.9584784507751465, "step": 60 }, { "epoch": 0.13931681178834562, "grad_norm": 8.587332020158266, "learning_rate": 9.954532883292758e-07, "logits/chosen": -2.8174448013305664, "logits/rejected": -2.827878713607788, "logps/chosen": -0.9096312522888184, "logps/rejected": -0.9340154528617859, "loss": 1.9445, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.9096312522888184, "rewards/margins": 0.02438436448574066, "rewards/rejected": -0.9340154528617859, "step": 65 }, { "epoch": 0.15003348961821836, "grad_norm": 8.28588297183277, "learning_rate": 9.925836396991307e-07, "logits/chosen": -2.7543578147888184, "logits/rejected": -2.7767698764801025, "logps/chosen": -0.9463087320327759, "logps/rejected": -1.000276803970337, "loss": 1.9328, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.9463087320327759, "rewards/margins": 0.05396796017885208, "rewards/rejected": -1.000276803970337, "step": 70 }, { "epoch": 0.1607501674480911, "grad_norm": 9.088492984557346, "learning_rate": 9.89021776355227e-07, "logits/chosen": -2.7640278339385986, "logits/rejected": -2.7647509574890137, "logps/chosen": -0.9192463755607605, "logps/rejected": -0.9625679850578308, "loss": 1.95, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9192463755607605, "rewards/margins": 0.043321557343006134, "rewards/rejected": -0.9625679850578308, "step": 75 }, { "epoch": 0.17146684527796383, "grad_norm": 9.431427301698536, "learning_rate": 9.847727036895757e-07, "logits/chosen": -2.8038690090179443, "logits/rejected": -2.82570481300354, "logps/chosen": -0.9232662320137024, "logps/rejected": -1.0234979391098022, "loss": 1.9471, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.9232662320137024, "rewards/margins": 0.10023164749145508, "rewards/rejected": -1.0234979391098022, "step": 80 }, { "epoch": 0.18218352310783656, "grad_norm": 10.888524371529982, "learning_rate": 9.79842392811207e-07, "logits/chosen": -2.8308658599853516, "logits/rejected": -2.840531349182129, "logps/chosen": -0.9489914774894714, "logps/rejected": -1.0153648853302002, "loss": 1.9556, "rewards/accuracies": 0.59375, "rewards/chosen": -0.9489914774894714, "rewards/margins": 0.06637347489595413, "rewards/rejected": -1.0153648853302002, "step": 85 }, { "epoch": 0.19290020093770932, "grad_norm": 11.29650020138619, "learning_rate": 9.742377721551285e-07, "logits/chosen": -2.837176561355591, "logits/rejected": -2.8398804664611816, "logps/chosen": -0.9703192710876465, "logps/rejected": -1.0296133756637573, "loss": 1.9557, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.9703192710876465, "rewards/margins": 0.05929403379559517, "rewards/rejected": -1.0296133756637573, "step": 90 }, { "epoch": 0.20361687876758205, "grad_norm": 10.210746185136436, "learning_rate": 9.679667177459793e-07, "logits/chosen": -2.8701424598693848, "logits/rejected": -2.8827216625213623, "logps/chosen": -0.9922488927841187, "logps/rejected": -1.0364185571670532, "loss": 1.935, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.9922488927841187, "rewards/margins": 0.04416964575648308, "rewards/rejected": -1.0364185571670532, "step": 95 }, { "epoch": 0.2143335565974548, "grad_norm": 12.483295768661268, "learning_rate": 9.61038042130062e-07, "logits/chosen": -2.8926491737365723, "logits/rejected": -2.901106357574463, "logps/chosen": -1.0361850261688232, "logps/rejected": -1.0975309610366821, "loss": 1.9596, "rewards/accuracies": 0.59375, "rewards/chosen": -1.0361850261688232, "rewards/margins": 0.06134594604372978, "rewards/rejected": -1.0975309610366821, "step": 100 }, { "epoch": 0.22505023442732752, "grad_norm": 10.251687371352437, "learning_rate": 9.534614819913056e-07, "logits/chosen": -2.8896324634552, "logits/rejected": -2.8919472694396973, "logps/chosen": -0.951892077922821, "logps/rejected": -1.0144917964935303, "loss": 1.9124, "rewards/accuracies": 0.5625, "rewards/chosen": -0.951892077922821, "rewards/margins": 0.06259982287883759, "rewards/rejected": -1.0144917964935303, "step": 105 }, { "epoch": 0.23576691225720026, "grad_norm": 12.050194786037592, "learning_rate": 9.45247684468561e-07, "logits/chosen": -2.884787082672119, "logits/rejected": -2.8778603076934814, "logps/chosen": -0.9767457842826843, "logps/rejected": -1.0685449838638306, "loss": 1.9478, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9767457842826843, "rewards/margins": 0.09179918467998505, "rewards/rejected": -1.0685449838638306, "step": 110 }, { "epoch": 0.24648359008707302, "grad_norm": 16.70841820517718, "learning_rate": 9.364081921934605e-07, "logits/chosen": -2.938917636871338, "logits/rejected": -2.9450087547302246, "logps/chosen": -0.9636579751968384, "logps/rejected": -0.9759002923965454, "loss": 1.9752, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.9636579751968384, "rewards/margins": 0.01224228460341692, "rewards/rejected": -0.9759002923965454, "step": 115 }, { "epoch": 0.2572002679169457, "grad_norm": 11.293944567951943, "learning_rate": 9.269554270698635e-07, "logits/chosen": -2.945613384246826, "logits/rejected": -2.9629931449890137, "logps/chosen": -0.9591239094734192, "logps/rejected": -0.9764735102653503, "loss": 1.9573, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.9591239094734192, "rewards/margins": 0.01734955981373787, "rewards/rejected": -0.9764735102653503, "step": 120 }, { "epoch": 0.2679169457468185, "grad_norm": 11.980171410561908, "learning_rate": 9.169026728176843e-07, "logits/chosen": -2.933450222015381, "logits/rejected": -2.9331278800964355, "logps/chosen": -0.9632900357246399, "logps/rejected": -1.0157678127288818, "loss": 1.9492, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.9632900357246399, "rewards/margins": 0.05247765779495239, "rewards/rejected": -1.0157678127288818, "step": 125 }, { "epoch": 0.27863362357669125, "grad_norm": 13.043968582425784, "learning_rate": 9.062640563056338e-07, "logits/chosen": -2.991359233856201, "logits/rejected": -2.988865613937378, "logps/chosen": -0.9493595361709595, "logps/rejected": -1.034130334854126, "loss": 1.922, "rewards/accuracies": 0.59375, "rewards/chosen": -0.9493595361709595, "rewards/margins": 0.08477075397968292, "rewards/rejected": -1.034130334854126, "step": 130 }, { "epoch": 0.289350301406564, "grad_norm": 12.532276437925889, "learning_rate": 8.950545276991059e-07, "logits/chosen": -3.0021941661834717, "logits/rejected": -3.0065648555755615, "logps/chosen": -0.9963411092758179, "logps/rejected": -1.0470635890960693, "loss": 1.9424, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.9963411092758179, "rewards/margins": 0.05072233825922012, "rewards/rejected": -1.0470635890960693, "step": 135 }, { "epoch": 0.3000669792364367, "grad_norm": 12.705557690233402, "learning_rate": 8.832898394511059e-07, "logits/chosen": -2.9534518718719482, "logits/rejected": -2.955967903137207, "logps/chosen": -0.9860572814941406, "logps/rejected": -1.0404431819915771, "loss": 1.9178, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.9860572814941406, "rewards/margins": 0.054385989904403687, "rewards/rejected": -1.0404431819915771, "step": 140 }, { "epoch": 0.31078365706630945, "grad_norm": 13.072919082266676, "learning_rate": 8.709865241657479e-07, "logits/chosen": -3.0093483924865723, "logits/rejected": -2.979004383087158, "logps/chosen": -0.9930181503295898, "logps/rejected": -1.0835025310516357, "loss": 1.9157, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.9930181503295898, "rewards/margins": 0.09048454463481903, "rewards/rejected": -1.0835025310516357, "step": 145 }, { "epoch": 0.3215003348961822, "grad_norm": 12.426824715988731, "learning_rate": 8.581618713654237e-07, "logits/chosen": -3.013441324234009, "logits/rejected": -3.020658016204834, "logps/chosen": -0.9797670245170593, "logps/rejected": -1.0478479862213135, "loss": 1.9384, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9797670245170593, "rewards/margins": 0.06808092445135117, "rewards/rejected": -1.0478479862213135, "step": 150 }, { "epoch": 0.3322170127260549, "grad_norm": 12.994046950866517, "learning_rate": 8.448339031942969e-07, "logits/chosen": -3.1380882263183594, "logits/rejected": -3.119088649749756, "logps/chosen": -1.0096795558929443, "logps/rejected": -1.0793818235397339, "loss": 1.9501, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.0096795558929443, "rewards/margins": 0.0697021633386612, "rewards/rejected": -1.0793818235397339, "step": 155 }, { "epoch": 0.34293369055592765, "grad_norm": 14.285933742292032, "learning_rate": 8.310213490922615e-07, "logits/chosen": -3.086894989013672, "logits/rejected": -3.0781033039093018, "logps/chosen": -1.01212477684021, "logps/rejected": -1.1041617393493652, "loss": 1.924, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.01212477684021, "rewards/margins": 0.09203706681728363, "rewards/rejected": -1.1041617393493652, "step": 160 }, { "epoch": 0.3536503683858004, "grad_norm": 12.902647964980785, "learning_rate": 8.167436194749575e-07, "logits/chosen": -3.154498815536499, "logits/rejected": -3.1380507946014404, "logps/chosen": -0.9696465730667114, "logps/rejected": -1.0397228002548218, "loss": 1.9234, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.9696465730667114, "rewards/margins": 0.07007621228694916, "rewards/rejected": -1.0397228002548218, "step": 165 }, { "epoch": 0.3643670462156731, "grad_norm": 10.461713458782954, "learning_rate": 8.020207784568291e-07, "logits/chosen": -3.0909907817840576, "logits/rejected": -3.0954232215881348, "logps/chosen": -0.9789296388626099, "logps/rejected": -1.0583703517913818, "loss": 1.8994, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9789296388626099, "rewards/margins": 0.07944078743457794, "rewards/rejected": -1.0583703517913818, "step": 170 }, { "epoch": 0.3750837240455459, "grad_norm": 14.39267394950559, "learning_rate": 7.868735156555566e-07, "logits/chosen": -3.113248348236084, "logits/rejected": -3.116093158721924, "logps/chosen": -0.9837858080863953, "logps/rejected": -1.0499708652496338, "loss": 1.9274, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.9837858080863953, "rewards/margins": 0.06618491560220718, "rewards/rejected": -1.0499708652496338, "step": 175 }, { "epoch": 0.38580040187541864, "grad_norm": 15.99990919657707, "learning_rate": 7.713231171174868e-07, "logits/chosen": -3.117457866668701, "logits/rejected": -3.1266255378723145, "logps/chosen": -1.0582538843154907, "logps/rejected": -1.133825659751892, "loss": 1.9527, "rewards/accuracies": 0.53125, "rewards/chosen": -1.0582538843154907, "rewards/margins": 0.0755719467997551, "rewards/rejected": -1.133825659751892, "step": 180 }, { "epoch": 0.3965170797052914, "grad_norm": 14.766672490703176, "learning_rate": 7.553914354049162e-07, "logits/chosen": -3.0913758277893066, "logits/rejected": -3.103057384490967, "logps/chosen": -1.023158073425293, "logps/rejected": -1.06888747215271, "loss": 1.9093, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.023158073425293, "rewards/margins": 0.04572921246290207, "rewards/rejected": -1.06888747215271, "step": 185 }, { "epoch": 0.4072337575351641, "grad_norm": 15.970782695590072, "learning_rate": 7.39100858887266e-07, "logits/chosen": -3.0117366313934326, "logits/rejected": -3.0253918170928955, "logps/chosen": -0.9792415499687195, "logps/rejected": -1.192731499671936, "loss": 1.9159, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9792415499687195, "rewards/margins": 0.2134898155927658, "rewards/rejected": -1.192731499671936, "step": 190 }, { "epoch": 0.41795043536503684, "grad_norm": 14.772743027007902, "learning_rate": 7.224742802793004e-07, "logits/chosen": -3.117619514465332, "logits/rejected": -3.105710506439209, "logps/chosen": -0.9820570945739746, "logps/rejected": -1.0550075769424438, "loss": 1.9206, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.9820570945739746, "rewards/margins": 0.07295046001672745, "rewards/rejected": -1.0550075769424438, "step": 195 }, { "epoch": 0.4286671131949096, "grad_norm": 13.569195793364742, "learning_rate": 7.055350644706022e-07, "logits/chosen": -3.0321404933929443, "logits/rejected": -3.049098491668701, "logps/chosen": -0.9425796270370483, "logps/rejected": -1.015730857849121, "loss": 1.8967, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.9425796270370483, "rewards/margins": 0.07315118610858917, "rewards/rejected": -1.015730857849121, "step": 200 }, { "epoch": 0.4393837910247823, "grad_norm": 17.357578155356272, "learning_rate": 6.883070156915139e-07, "logits/chosen": -3.1063551902770996, "logits/rejected": -3.087735652923584, "logps/chosen": -0.9921137094497681, "logps/rejected": -1.0688540935516357, "loss": 1.9072, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.9921137094497681, "rewards/margins": 0.07674027979373932, "rewards/rejected": -1.0688540935516357, "step": 205 }, { "epoch": 0.45010046885465504, "grad_norm": 18.638096035157822, "learning_rate": 6.708143440616845e-07, "logits/chosen": -3.037801504135132, "logits/rejected": -3.043381452560425, "logps/chosen": -0.9749434590339661, "logps/rejected": -1.093065619468689, "loss": 1.8807, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9749434590339661, "rewards/margins": 0.11812222003936768, "rewards/rejected": -1.093065619468689, "step": 210 }, { "epoch": 0.4608171466845278, "grad_norm": 14.384225079310538, "learning_rate": 6.530816315682306e-07, "logits/chosen": -3.039210319519043, "logits/rejected": -3.036463499069214, "logps/chosen": -0.9878280758857727, "logps/rejected": -1.0471224784851074, "loss": 1.8936, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.9878280758857727, "rewards/margins": 0.0592944398522377, "rewards/rejected": -1.0471224784851074, "step": 215 }, { "epoch": 0.4715338245144005, "grad_norm": 15.635777348265833, "learning_rate": 6.351337975213237e-07, "logits/chosen": -2.952924966812134, "logits/rejected": -2.9542202949523926, "logps/chosen": -0.950057327747345, "logps/rejected": -1.0494937896728516, "loss": 1.9176, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.950057327747345, "rewards/margins": 0.0994364321231842, "rewards/rejected": -1.0494937896728516, "step": 220 }, { "epoch": 0.4822505023442733, "grad_norm": 15.744387463358793, "learning_rate": 6.169960635357437e-07, "logits/chosen": -2.9482624530792236, "logits/rejected": -2.989917278289795, "logps/chosen": -0.9638145565986633, "logps/rejected": -1.1090409755706787, "loss": 1.9033, "rewards/accuracies": 0.65625, "rewards/chosen": -0.9638145565986633, "rewards/margins": 0.14522647857666016, "rewards/rejected": -1.1090409755706787, "step": 225 }, { "epoch": 0.49296718017414604, "grad_norm": 20.61441976992923, "learning_rate": 5.98693918087613e-07, "logits/chosen": -2.8699066638946533, "logits/rejected": -2.870300531387329, "logps/chosen": -0.9924839735031128, "logps/rejected": -1.0983588695526123, "loss": 1.8841, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9924839735031128, "rewards/margins": 0.10587481409311295, "rewards/rejected": -1.0983588695526123, "step": 230 }, { "epoch": 0.5036838580040187, "grad_norm": 16.252130697815385, "learning_rate": 5.802530806961194e-07, "logits/chosen": -2.817322254180908, "logits/rejected": -2.829948663711548, "logps/chosen": -0.980228066444397, "logps/rejected": -1.1411757469177246, "loss": 1.8909, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.980228066444397, "rewards/margins": 0.16094763576984406, "rewards/rejected": -1.1411757469177246, "step": 235 }, { "epoch": 0.5144005358338914, "grad_norm": 18.85039464903285, "learning_rate": 5.616994657805565e-07, "logits/chosen": -2.8348546028137207, "logits/rejected": -2.8363900184631348, "logps/chosen": -1.0022538900375366, "logps/rejected": -1.091737985610962, "loss": 1.9144, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.0022538900375366, "rewards/margins": 0.08948404341936111, "rewards/rejected": -1.091737985610962, "step": 240 }, { "epoch": 0.5251172136637642, "grad_norm": 18.11195320561811, "learning_rate": 5.430591462434792e-07, "logits/chosen": -2.8302159309387207, "logits/rejected": -2.8293166160583496, "logps/chosen": -0.9994494318962097, "logps/rejected": -1.0923607349395752, "loss": 1.8816, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.9994494318962097, "rewards/margins": 0.09291131794452667, "rewards/rejected": -1.0923607349395752, "step": 245 }, { "epoch": 0.535833891493637, "grad_norm": 17.05860095183427, "learning_rate": 5.24358316831145e-07, "logits/chosen": -2.8530397415161133, "logits/rejected": -2.8722071647644043, "logps/chosen": -1.039880394935608, "logps/rejected": -1.1407957077026367, "loss": 1.8947, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.039880394935608, "rewards/margins": 0.10091539472341537, "rewards/rejected": -1.1407957077026367, "step": 250 }, { "epoch": 0.5465505693235098, "grad_norm": 18.749033093794495, "learning_rate": 5.05623257322734e-07, "logits/chosen": -2.93331241607666, "logits/rejected": -2.944380283355713, "logps/chosen": -0.9659943580627441, "logps/rejected": -1.0837066173553467, "loss": 1.8979, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.9659943580627441, "rewards/margins": 0.11771215498447418, "rewards/rejected": -1.0837066173553467, "step": 255 }, { "epoch": 0.5572672471533825, "grad_norm": 16.94811716813612, "learning_rate": 4.868802956000706e-07, "logits/chosen": -2.931999683380127, "logits/rejected": -2.9354639053344727, "logps/chosen": -0.9535225033760071, "logps/rejected": -1.0598043203353882, "loss": 1.8988, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.9535225033760071, "rewards/margins": 0.10628175735473633, "rewards/rejected": -1.0598043203353882, "step": 260 }, { "epoch": 0.5679839249832552, "grad_norm": 16.773823816327198, "learning_rate": 4.681557706497518e-07, "logits/chosen": -2.9415526390075684, "logits/rejected": -2.928161144256592, "logps/chosen": -0.9740009307861328, "logps/rejected": -1.1567602157592773, "loss": 1.8795, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.9740009307861328, "rewards/margins": 0.1827593445777893, "rewards/rejected": -1.1567602157592773, "step": 265 }, { "epoch": 0.578700602813128, "grad_norm": 18.918032339525077, "learning_rate": 4.494759955496678e-07, "logits/chosen": -2.977628231048584, "logits/rejected": -2.993778705596924, "logps/chosen": -0.9572176933288574, "logps/rejected": -1.0660667419433594, "loss": 1.8826, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9572176933288574, "rewards/margins": 0.1088489443063736, "rewards/rejected": -1.0660667419433594, "step": 270 }, { "epoch": 0.5894172806430007, "grad_norm": 18.052342935862615, "learning_rate": 4.3086722049193145e-07, "logits/chosen": -3.0297939777374268, "logits/rejected": -3.0311572551727295, "logps/chosen": -0.9626353979110718, "logps/rejected": -1.1277216672897339, "loss": 1.8892, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.9626353979110718, "rewards/margins": 0.16508622467517853, "rewards/rejected": -1.1277216672897339, "step": 275 }, { "epoch": 0.6001339584728734, "grad_norm": 17.526484616917582, "learning_rate": 4.1235559589418164e-07, "logits/chosen": -2.9183692932128906, "logits/rejected": -2.9183740615844727, "logps/chosen": -1.0271321535110474, "logps/rejected": -1.1547820568084717, "loss": 1.8912, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.0271321535110474, "rewards/margins": 0.1276499330997467, "rewards/rejected": -1.1547820568084717, "step": 280 }, { "epoch": 0.6108506363027462, "grad_norm": 18.526748745941557, "learning_rate": 3.9396713565109375e-07, "logits/chosen": -2.999577045440674, "logits/rejected": -2.9965460300445557, "logps/chosen": -0.9945805668830872, "logps/rejected": -1.077164888381958, "loss": 1.9015, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.9945805668830872, "rewards/margins": 0.08258435875177383, "rewards/rejected": -1.077164888381958, "step": 285 }, { "epoch": 0.6215673141326189, "grad_norm": 20.379894797382978, "learning_rate": 3.757276805777454e-07, "logits/chosen": -3.0354697704315186, "logits/rejected": -3.0337672233581543, "logps/chosen": -0.9179645776748657, "logps/rejected": -1.0484569072723389, "loss": 1.8803, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.9179645776748657, "rewards/margins": 0.13049228489398956, "rewards/rejected": -1.0484569072723389, "step": 290 }, { "epoch": 0.6322839919624916, "grad_norm": 20.690228448794066, "learning_rate": 3.5766286209620446e-07, "logits/chosen": -3.001173973083496, "logits/rejected": -2.9999818801879883, "logps/chosen": -0.9978302717208862, "logps/rejected": -1.218730092048645, "loss": 1.8787, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.9978302717208862, "rewards/margins": 0.22089961171150208, "rewards/rejected": -1.218730092048645, "step": 295 }, { "epoch": 0.6430006697923644, "grad_norm": 21.088864907959156, "learning_rate": 3.3979806621637095e-07, "logits/chosen": -3.0734269618988037, "logits/rejected": -3.0850183963775635, "logps/chosen": -1.0158809423446655, "logps/rejected": -1.203379511833191, "loss": 1.8412, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0158809423446655, "rewards/margins": 0.1874985247850418, "rewards/rejected": -1.203379511833191, "step": 300 }, { "epoch": 0.6537173476222371, "grad_norm": 20.56239032846782, "learning_rate": 3.221583978616932e-07, "logits/chosen": -3.0289087295532227, "logits/rejected": -3.0159077644348145, "logps/chosen": -1.0385640859603882, "logps/rejected": -1.1604039669036865, "loss": 1.8697, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0385640859603882, "rewards/margins": 0.12183995544910431, "rewards/rejected": -1.1604039669036865, "step": 305 }, { "epoch": 0.6644340254521098, "grad_norm": 23.460225989452297, "learning_rate": 3.047686455898836e-07, "logits/chosen": -3.080087423324585, "logits/rejected": -3.0696513652801514, "logps/chosen": -1.0038082599639893, "logps/rejected": -1.1881027221679688, "loss": 1.8639, "rewards/accuracies": 0.65625, "rewards/chosen": -1.0038082599639893, "rewards/margins": 0.18429455161094666, "rewards/rejected": -1.1881027221679688, "step": 310 }, { "epoch": 0.6751507032819826, "grad_norm": 20.943994771592788, "learning_rate": 2.8765324675821734e-07, "logits/chosen": -2.944671630859375, "logits/rejected": -2.9371066093444824, "logps/chosen": -1.0098899602890015, "logps/rejected": -1.1762586832046509, "loss": 1.8745, "rewards/accuracies": 0.625, "rewards/chosen": -1.0098899602890015, "rewards/margins": 0.16636863350868225, "rewards/rejected": -1.1762586832046509, "step": 315 }, { "epoch": 0.6858673811118553, "grad_norm": 23.376564204026522, "learning_rate": 2.708362531823621e-07, "logits/chosen": -2.9114675521850586, "logits/rejected": -2.8761847019195557, "logps/chosen": -1.0019965171813965, "logps/rejected": -1.203962802886963, "loss": 1.8847, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0019965171813965, "rewards/margins": 0.20196643471717834, "rewards/rejected": -1.203962802886963, "step": 320 }, { "epoch": 0.696584058941728, "grad_norm": 21.72889429456692, "learning_rate": 2.5434129733700093e-07, "logits/chosen": -2.859740734100342, "logits/rejected": -2.850984573364258, "logps/chosen": -0.9476163983345032, "logps/rejected": -1.190605640411377, "loss": 1.8338, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.9476163983345032, "rewards/margins": 0.24298930168151855, "rewards/rejected": -1.190605640411377, "step": 325 }, { "epoch": 0.7073007367716008, "grad_norm": 23.976896941165123, "learning_rate": 2.3819155914574235e-07, "logits/chosen": -2.881459951400757, "logits/rejected": -2.860196113586426, "logps/chosen": -0.9749948382377625, "logps/rejected": -1.1787073612213135, "loss": 1.8473, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9749948382377625, "rewards/margins": 0.20371253788471222, "rewards/rejected": -1.1787073612213135, "step": 330 }, { "epoch": 0.7180174146014735, "grad_norm": 28.808399946877696, "learning_rate": 2.2240973340698882e-07, "logits/chosen": -2.9146461486816406, "logits/rejected": -2.9025330543518066, "logps/chosen": -0.9778841137886047, "logps/rejected": -1.1417423486709595, "loss": 1.8579, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9778841137886047, "rewards/margins": 0.16385838389396667, "rewards/rejected": -1.1417423486709595, "step": 335 }, { "epoch": 0.7287340924313462, "grad_norm": 25.882278976736618, "learning_rate": 2.0701799790153896e-07, "logits/chosen": -2.8679490089416504, "logits/rejected": -2.8503952026367188, "logps/chosen": -0.9974260330200195, "logps/rejected": -1.2173974514007568, "loss": 1.858, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.9974260330200195, "rewards/margins": 0.21997132897377014, "rewards/rejected": -1.2173974514007568, "step": 340 }, { "epoch": 0.739450770261219, "grad_norm": 25.899929236131076, "learning_rate": 1.9203798222674138e-07, "logits/chosen": -2.881312608718872, "logits/rejected": -2.886810779571533, "logps/chosen": -0.9790380597114563, "logps/rejected": -1.2078144550323486, "loss": 1.8434, "rewards/accuracies": 0.625, "rewards/chosen": -0.9790380597114563, "rewards/margins": 0.2287764996290207, "rewards/rejected": -1.2078144550323486, "step": 345 }, { "epoch": 0.7501674480910918, "grad_norm": 22.394258706384488, "learning_rate": 1.774907374009953e-07, "logits/chosen": -2.854478120803833, "logits/rejected": -2.8414759635925293, "logps/chosen": -0.9902400970458984, "logps/rejected": -1.1531636714935303, "loss": 1.8534, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9902400970458984, "rewards/margins": 0.16292349994182587, "rewards/rejected": -1.1531636714935303, "step": 350 }, { "epoch": 0.7608841259209645, "grad_norm": 32.75022427627133, "learning_rate": 1.6339670628131326e-07, "logits/chosen": -2.8383326530456543, "logits/rejected": -2.819650888442993, "logps/chosen": -1.0634033679962158, "logps/rejected": -1.194883942604065, "loss": 1.876, "rewards/accuracies": 0.59375, "rewards/chosen": -1.0634033679962158, "rewards/margins": 0.13148045539855957, "rewards/rejected": -1.194883942604065, "step": 355 }, { "epoch": 0.7716008037508373, "grad_norm": 23.817047943255176, "learning_rate": 1.4977569483551632e-07, "logits/chosen": -2.8515446186065674, "logits/rejected": -2.8371310234069824, "logps/chosen": -0.9948540925979614, "logps/rejected": -1.1568361520767212, "loss": 1.8636, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.9948540925979614, "rewards/margins": 0.16198216378688812, "rewards/rejected": -1.1568361520767212, "step": 360 }, { "epoch": 0.78231748158071, "grad_norm": 29.671187736811945, "learning_rate": 1.366468443094343e-07, "logits/chosen": -2.875363826751709, "logits/rejected": -2.8553128242492676, "logps/chosen": -1.039432168006897, "logps/rejected": -1.2274564504623413, "loss": 1.859, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.039432168006897, "rewards/margins": 0.18802431225776672, "rewards/rejected": -1.2274564504623413, "step": 365 }, { "epoch": 0.7930341594105828, "grad_norm": 33.954689650694874, "learning_rate": 1.240286043282197e-07, "logits/chosen": -2.8917102813720703, "logits/rejected": -2.8770086765289307, "logps/chosen": -1.0392128229141235, "logps/rejected": -1.1902668476104736, "loss": 1.8531, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.0392128229141235, "rewards/margins": 0.15105411410331726, "rewards/rejected": -1.1902668476104736, "step": 370 }, { "epoch": 0.8037508372404555, "grad_norm": 29.25246301873259, "learning_rate": 1.1193870696958058e-07, "logits/chosen": -2.866624116897583, "logits/rejected": -2.865328073501587, "logps/chosen": -1.0088036060333252, "logps/rejected": -1.1752225160598755, "loss": 1.871, "rewards/accuracies": 0.625, "rewards/chosen": -1.0088036060333252, "rewards/margins": 0.16641899943351746, "rewards/rejected": -1.1752225160598755, "step": 375 }, { "epoch": 0.8144675150703282, "grad_norm": 23.976856879448345, "learning_rate": 1.003941418453616e-07, "logits/chosen": -2.9358296394348145, "logits/rejected": -2.9078102111816406, "logps/chosen": -0.9131346940994263, "logps/rejected": -1.0847430229187012, "loss": 1.8614, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9131346940994263, "rewards/margins": 0.1716083586215973, "rewards/rejected": -1.0847430229187012, "step": 380 }, { "epoch": 0.825184192900201, "grad_norm": 32.2123584818521, "learning_rate": 8.941113222649327e-08, "logits/chosen": -2.8775150775909424, "logits/rejected": -2.842041254043579, "logps/chosen": -1.0087697505950928, "logps/rejected": -1.1664918661117554, "loss": 1.8549, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.0087697505950928, "rewards/margins": 0.15772202610969543, "rewards/rejected": -1.1664918661117554, "step": 385 }, { "epoch": 0.8359008707300737, "grad_norm": 31.35752151311303, "learning_rate": 7.900511224486083e-08, "logits/chosen": -2.8315508365631104, "logits/rejected": -2.801546812057495, "logps/chosen": -1.1222208738327026, "logps/rejected": -1.3023992776870728, "loss": 1.8914, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.1222208738327026, "rewards/margins": 0.18017837405204773, "rewards/rejected": -1.3023992776870728, "step": 390 }, { "epoch": 0.8466175485599464, "grad_norm": 31.331425662268856, "learning_rate": 6.919070520412768e-08, "logits/chosen": -2.8807644844055176, "logits/rejected": -2.877798080444336, "logps/chosen": -1.002872347831726, "logps/rejected": -1.2327783107757568, "loss": 1.8638, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.002872347831726, "rewards/margins": 0.22990599274635315, "rewards/rejected": -1.2327783107757568, "step": 395 }, { "epoch": 0.8573342263898192, "grad_norm": 27.689197557194653, "learning_rate": 5.998170302999528e-08, "logits/chosen": -2.866481304168701, "logits/rejected": -2.851442575454712, "logps/chosen": -0.9658647775650024, "logps/rejected": -1.2617170810699463, "loss": 1.8319, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.9658647775650024, "rewards/margins": 0.2958524823188782, "rewards/rejected": -1.2617170810699463, "step": 400 }, { "epoch": 0.8573342263898192, "eval_logits/chosen": -2.7880678176879883, "eval_logits/rejected": -2.7752296924591064, "eval_logps/chosen": -0.9790560007095337, "eval_logps/rejected": -1.1183466911315918, "eval_loss": 1.8910752534866333, "eval_rewards/accuracies": 0.626329779624939, "eval_rewards/chosen": -0.9790560007095337, "eval_rewards/margins": 0.1392906755208969, "eval_rewards/rejected": -1.1183466911315918, "eval_runtime": 436.0547, "eval_samples_per_second": 6.866, "eval_steps_per_second": 0.431, "step": 400 }, { "epoch": 0.8680509042196919, "grad_norm": 32.62329228952369, "learning_rate": 5.1391046888775493e-08, "logits/chosen": -2.8493621349334717, "logits/rejected": -2.8683342933654785, "logps/chosen": -0.949614405632019, "logps/rejected": -1.1213003396987915, "loss": 1.8697, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.949614405632019, "rewards/margins": 0.1716858595609665, "rewards/rejected": -1.1213003396987915, "step": 405 }, { "epoch": 0.8787675820495646, "grad_norm": 25.8354383097588, "learning_rate": 4.343080900151375e-08, "logits/chosen": -2.881870985031128, "logits/rejected": -2.899685859680176, "logps/chosen": -0.9602924585342407, "logps/rejected": -1.1478478908538818, "loss": 1.8356, "rewards/accuracies": 0.65625, "rewards/chosen": -0.9602924585342407, "rewards/margins": 0.1875552237033844, "rewards/rejected": -1.1478478908538818, "step": 410 }, { "epoch": 0.8894842598794374, "grad_norm": 25.748109045396355, "learning_rate": 3.611217567921709e-08, "logits/chosen": -2.8336663246154785, "logits/rejected": -2.813194513320923, "logps/chosen": -1.000084400177002, "logps/rejected": -1.1840862035751343, "loss": 1.8298, "rewards/accuracies": 0.6875, "rewards/chosen": -1.000084400177002, "rewards/margins": 0.18400196731090546, "rewards/rejected": -1.1840862035751343, "step": 415 }, { "epoch": 0.9002009377093101, "grad_norm": 32.06643463040125, "learning_rate": 2.944543160302787e-08, "logits/chosen": -2.857529878616333, "logits/rejected": -2.8420491218566895, "logps/chosen": -0.9639240503311157, "logps/rejected": -1.1220653057098389, "loss": 1.8748, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.9639240503311157, "rewards/margins": 0.15814124047756195, "rewards/rejected": -1.1220653057098389, "step": 420 }, { "epoch": 0.9109176155391828, "grad_norm": 35.70189678978648, "learning_rate": 2.343994537143479e-08, "logits/chosen": -2.8647522926330566, "logits/rejected": -2.8519949913024902, "logps/chosen": -0.9895116686820984, "logps/rejected": -1.3003708124160767, "loss": 1.8221, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9895116686820984, "rewards/margins": 0.31085896492004395, "rewards/rejected": -1.3003708124160767, "step": 425 }, { "epoch": 0.9216342933690556, "grad_norm": 34.791804330091374, "learning_rate": 1.81041563348297e-08, "logits/chosen": -2.837439775466919, "logits/rejected": -2.818183660507202, "logps/chosen": -1.0866570472717285, "logps/rejected": -1.2705637216567993, "loss": 1.8716, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0866570472717285, "rewards/margins": 0.1839066594839096, "rewards/rejected": -1.2705637216567993, "step": 430 }, { "epoch": 0.9323509711989283, "grad_norm": 37.275898794915584, "learning_rate": 1.3445562735912962e-08, "logits/chosen": -2.9299168586730957, "logits/rejected": -2.9030609130859375, "logps/chosen": -0.9819711446762085, "logps/rejected": -1.2765450477600098, "loss": 1.8211, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.9819711446762085, "rewards/margins": 0.29457393288612366, "rewards/rejected": -1.2765450477600098, "step": 435 }, { "epoch": 0.943067649028801, "grad_norm": 27.67934081660638, "learning_rate": 9.470711172611722e-09, "logits/chosen": -2.852858066558838, "logits/rejected": -2.8415844440460205, "logps/chosen": -0.9760105013847351, "logps/rejected": -1.2501945495605469, "loss": 1.8076, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.9760105013847351, "rewards/margins": 0.27418413758277893, "rewards/rejected": -1.2501945495605469, "step": 440 }, { "epoch": 0.9537843268586738, "grad_norm": 31.020987107038604, "learning_rate": 6.185187398319691e-09, "logits/chosen": -2.848702907562256, "logits/rejected": -2.8342273235321045, "logps/chosen": -1.029883623123169, "logps/rejected": -1.24784255027771, "loss": 1.8487, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.029883623123169, "rewards/margins": 0.2179589569568634, "rewards/rejected": -1.24784255027771, "step": 445 }, { "epoch": 0.9645010046885466, "grad_norm": 30.471294839063304, "learning_rate": 3.593608472386045e-09, "logits/chosen": -2.8422036170959473, "logits/rejected": -2.8164522647857666, "logps/chosen": -0.9777735471725464, "logps/rejected": -1.1690075397491455, "loss": 1.8265, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.9777735471725464, "rewards/margins": 0.19123412668704987, "rewards/rejected": -1.1690075397491455, "step": 450 }, { "epoch": 0.9752176825184193, "grad_norm": 30.36082211778746, "learning_rate": 1.6996162718847518e-09, "logits/chosen": -2.811438798904419, "logits/rejected": -2.8197503089904785, "logps/chosen": -0.9904121160507202, "logps/rejected": -1.257320523262024, "loss": 1.8079, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.9904121160507202, "rewards/margins": 0.2669083774089813, "rewards/rejected": -1.257320523262024, "step": 455 }, { "epoch": 0.9859343603482921, "grad_norm": 38.11034342674612, "learning_rate": 5.058723737811355e-10, "logits/chosen": -2.829425096511841, "logits/rejected": -2.828883647918701, "logps/chosen": -1.0095789432525635, "logps/rejected": -1.2887779474258423, "loss": 1.8606, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0095789432525635, "rewards/margins": 0.27919891476631165, "rewards/rejected": -1.2887779474258423, "step": 460 }, { "epoch": 0.9966510381781648, "grad_norm": 31.577486151371264, "learning_rate": 1.405431468848306e-11, "logits/chosen": -2.846008777618408, "logits/rejected": -2.832801580429077, "logps/chosen": -1.0791490077972412, "logps/rejected": -1.249805212020874, "loss": 1.8779, "rewards/accuracies": 0.65625, "rewards/chosen": -1.0791490077972412, "rewards/margins": 0.170656219124794, "rewards/rejected": -1.249805212020874, "step": 465 }, { "epoch": 0.9987943737441393, "step": 466, "total_flos": 0.0, "train_loss": 1.899593046294773, "train_runtime": 19390.7723, "train_samples_per_second": 3.08, "train_steps_per_second": 0.024 } ], "logging_steps": 5, "max_steps": 466, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }