{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9994242947610823, "eval_steps": 100, "global_step": 868, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011514104778353484, "grad_norm": 35.91765211885503, "learning_rate": 5.747126436781609e-09, "logits/chosen": -2.086653709411621, "logits/rejected": -2.069509267807007, "logps/chosen": -361.22979736328125, "logps/rejected": -328.4201354980469, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.011514104778353483, "grad_norm": 37.62574042925476, "learning_rate": 5.747126436781609e-08, "logits/chosen": -2.192697763442993, "logits/rejected": -2.1893699169158936, "logps/chosen": -346.8982238769531, "logps/rejected": -305.4053039550781, "loss": 0.6929, "rewards/accuracies": 0.4652777910232544, "rewards/chosen": 0.00022573958267457783, "rewards/margins": 0.00043605040991678834, "rewards/rejected": -0.00021031053620390594, "step": 10 }, { "epoch": 0.023028209556706966, "grad_norm": 33.76619596156607, "learning_rate": 1.1494252873563217e-07, "logits/chosen": -2.170515775680542, "logits/rejected": -2.1960134506225586, "logps/chosen": -322.89593505859375, "logps/rejected": -279.732177734375, "loss": 0.6923, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.006018324755132198, "rewards/margins": 0.0009490437805652618, "rewards/rejected": 0.005069280508905649, "step": 20 }, { "epoch": 0.03454231433506045, "grad_norm": 36.02949439768653, "learning_rate": 1.7241379310344828e-07, "logits/chosen": -2.226337194442749, "logits/rejected": -2.215334415435791, "logps/chosen": -343.44012451171875, "logps/rejected": -305.6834411621094, "loss": 0.6875, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0371861457824707, "rewards/margins": 0.012388146482408047, "rewards/rejected": 0.02479800209403038, "step": 30 }, { "epoch": 0.04605641911341393, "grad_norm": 30.794242683432575, "learning_rate": 2.2988505747126435e-07, "logits/chosen": -2.3109958171844482, "logits/rejected": -2.272737979888916, "logps/chosen": -313.8249206542969, "logps/rejected": -281.3092956542969, "loss": 0.6758, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.10629389435052872, "rewards/margins": 0.035184551030397415, "rewards/rejected": 0.071109339594841, "step": 40 }, { "epoch": 0.057570523891767415, "grad_norm": 29.832104382822315, "learning_rate": 2.873563218390804e-07, "logits/chosen": -2.4144537448883057, "logits/rejected": -2.4051060676574707, "logps/chosen": -335.85626220703125, "logps/rejected": -322.4024658203125, "loss": 0.664, "rewards/accuracies": 0.606249988079071, "rewards/chosen": 0.20595140755176544, "rewards/margins": 0.058795731514692307, "rewards/rejected": 0.14715565741062164, "step": 50 }, { "epoch": 0.0690846286701209, "grad_norm": 27.97699348851217, "learning_rate": 3.4482758620689656e-07, "logits/chosen": -2.4252865314483643, "logits/rejected": -2.4110381603240967, "logps/chosen": -293.0983581542969, "logps/rejected": -276.4584655761719, "loss": 0.6437, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.26091432571411133, "rewards/margins": 0.12072187662124634, "rewards/rejected": 0.140192449092865, "step": 60 }, { "epoch": 0.08059873344847437, "grad_norm": 26.14817360357517, "learning_rate": 4.0229885057471266e-07, "logits/chosen": -2.5252156257629395, "logits/rejected": -2.488867998123169, "logps/chosen": -341.91156005859375, "logps/rejected": -308.27032470703125, "loss": 0.6192, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.3610069155693054, "rewards/margins": 0.20518210530281067, "rewards/rejected": 0.15582481026649475, "step": 70 }, { "epoch": 0.09211283822682786, "grad_norm": 26.01503586020309, "learning_rate": 4.597701149425287e-07, "logits/chosen": -2.443207263946533, "logits/rejected": -2.4321365356445312, "logps/chosen": -303.1759948730469, "logps/rejected": -293.99212646484375, "loss": 0.5946, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.2370336949825287, "rewards/margins": 0.22374853491783142, "rewards/rejected": 0.013285147957503796, "step": 80 }, { "epoch": 0.10362694300518134, "grad_norm": 28.597789728089687, "learning_rate": 4.999817969178237e-07, "logits/chosen": -2.468017578125, "logits/rejected": -2.45894718170166, "logps/chosen": -341.286376953125, "logps/rejected": -346.0598449707031, "loss": 0.5438, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2997075915336609, "rewards/margins": 0.4598621726036072, "rewards/rejected": -0.16015461087226868, "step": 90 }, { "epoch": 0.11514104778353483, "grad_norm": 31.239635888342793, "learning_rate": 4.996582603056428e-07, "logits/chosen": -2.290760040283203, "logits/rejected": -2.2722649574279785, "logps/chosen": -325.2711181640625, "logps/rejected": -352.16949462890625, "loss": 0.5118, "rewards/accuracies": 0.75, "rewards/chosen": -0.0031170793808996677, "rewards/margins": 0.5678674578666687, "rewards/rejected": -0.5709845423698425, "step": 100 }, { "epoch": 0.11514104778353483, "eval_logits/chosen": -2.2212953567504883, "eval_logits/rejected": -2.1984219551086426, "eval_logps/chosen": -390.5766296386719, "eval_logps/rejected": -417.6701354980469, "eval_loss": 0.592314600944519, "eval_rewards/accuracies": 0.70703125, "eval_rewards/chosen": -0.11199207603931427, "eval_rewards/margins": 0.3385947644710541, "eval_rewards/rejected": -0.45058679580688477, "eval_runtime": 98.608, "eval_samples_per_second": 20.282, "eval_steps_per_second": 0.325, "step": 100 }, { "epoch": 0.1266551525618883, "grad_norm": 28.87850245767613, "learning_rate": 4.989308132738126e-07, "logits/chosen": -2.224853754043579, "logits/rejected": -2.1996631622314453, "logps/chosen": -334.91888427734375, "logps/rejected": -380.91668701171875, "loss": 0.4719, "rewards/accuracies": 0.731249988079071, "rewards/chosen": 0.0493912398815155, "rewards/margins": 0.8100606203079224, "rewards/rejected": -0.7606694102287292, "step": 110 }, { "epoch": 0.1381692573402418, "grad_norm": 29.398659404338673, "learning_rate": 4.978006327248536e-07, "logits/chosen": -2.199742555618286, "logits/rejected": -2.1492202281951904, "logps/chosen": -314.296142578125, "logps/rejected": -369.991455078125, "loss": 0.4704, "rewards/accuracies": 0.768750011920929, "rewards/chosen": 0.09014983475208282, "rewards/margins": 0.9132173657417297, "rewards/rejected": -0.8230674862861633, "step": 120 }, { "epoch": 0.1496833621185953, "grad_norm": 30.44019666597221, "learning_rate": 4.962695471250032e-07, "logits/chosen": -2.1790311336517334, "logits/rejected": -2.1547985076904297, "logps/chosen": -302.8690490722656, "logps/rejected": -415.23095703125, "loss": 0.4555, "rewards/accuracies": 0.768750011920929, "rewards/chosen": 0.09897075593471527, "rewards/margins": 1.2424136400222778, "rewards/rejected": -1.1434428691864014, "step": 130 }, { "epoch": 0.16119746689694875, "grad_norm": 33.58601902040164, "learning_rate": 4.94340033546025e-07, "logits/chosen": -2.2502989768981934, "logits/rejected": -2.2536580562591553, "logps/chosen": -325.1845397949219, "logps/rejected": -431.7062072753906, "loss": 0.4345, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": 0.23212842643260956, "rewards/margins": 1.2962288856506348, "rewards/rejected": -1.0641005039215088, "step": 140 }, { "epoch": 0.17271157167530224, "grad_norm": 32.120902840689595, "learning_rate": 4.920152136576705e-07, "logits/chosen": -2.44754958152771, "logits/rejected": -2.4280953407287598, "logps/chosen": -325.13916015625, "logps/rejected": -465.1835021972656, "loss": 0.4604, "rewards/accuracies": 0.78125, "rewards/chosen": 0.07414035499095917, "rewards/margins": 1.312412977218628, "rewards/rejected": -1.2382725477218628, "step": 150 }, { "epoch": 0.18422567645365573, "grad_norm": 27.142754060910285, "learning_rate": 4.892988486772756e-07, "logits/chosen": -2.7220418453216553, "logits/rejected": -2.731748342514038, "logps/chosen": -341.7224426269531, "logps/rejected": -451.0387268066406, "loss": 0.4331, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": 0.20678754150867462, "rewards/margins": 1.3990733623504639, "rewards/rejected": -1.1922857761383057, "step": 160 }, { "epoch": 0.19573978123200922, "grad_norm": 29.918359187167102, "learning_rate": 4.861953332846629e-07, "logits/chosen": -2.796257495880127, "logits/rejected": -2.810292959213257, "logps/chosen": -360.57257080078125, "logps/rejected": -441.2469787597656, "loss": 0.4495, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.04986714571714401, "rewards/margins": 1.442570447921753, "rewards/rejected": -1.3927034139633179, "step": 170 }, { "epoch": 0.20725388601036268, "grad_norm": 28.18581518610586, "learning_rate": 4.827096885121953e-07, "logits/chosen": -2.9461441040039062, "logits/rejected": -2.936654567718506, "logps/chosen": -342.01666259765625, "logps/rejected": -421.7103576660156, "loss": 0.435, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.026858195662498474, "rewards/margins": 1.3959574699401855, "rewards/rejected": -1.4228156805038452, "step": 180 }, { "epoch": 0.21876799078871617, "grad_norm": 35.53737142925795, "learning_rate": 4.788475536214821e-07, "logits/chosen": -3.022078275680542, "logits/rejected": -3.0052285194396973, "logps/chosen": -336.94830322265625, "logps/rejected": -493.62359619140625, "loss": 0.4228, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.03777497634291649, "rewards/margins": 1.5011249780654907, "rewards/rejected": -1.4633500576019287, "step": 190 }, { "epoch": 0.23028209556706966, "grad_norm": 32.357788149040054, "learning_rate": 4.746151769798818e-07, "logits/chosen": -3.098576545715332, "logits/rejected": -3.122755527496338, "logps/chosen": -350.237060546875, "logps/rejected": -476.60345458984375, "loss": 0.4206, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": 0.029024356976151466, "rewards/margins": 1.669870376586914, "rewards/rejected": -1.6408460140228271, "step": 200 }, { "epoch": 0.23028209556706966, "eval_logits/chosen": -3.164449691772461, "eval_logits/rejected": -3.2280213832855225, "eval_logps/chosen": -408.5089416503906, "eval_logps/rejected": -480.46405029296875, "eval_loss": 0.5054616928100586, "eval_rewards/accuracies": 0.80078125, "eval_rewards/chosen": -0.2913154363632202, "eval_rewards/margins": 0.7872099280357361, "eval_rewards/rejected": -1.078525424003601, "eval_runtime": 98.2744, "eval_samples_per_second": 20.351, "eval_steps_per_second": 0.326, "step": 200 }, { "epoch": 0.24179620034542315, "grad_norm": 33.674165033906036, "learning_rate": 4.7001940595156055e-07, "logits/chosen": -3.1950924396514893, "logits/rejected": -3.276893138885498, "logps/chosen": -364.2984313964844, "logps/rejected": -458.85418701171875, "loss": 0.4096, "rewards/accuracies": 0.78125, "rewards/chosen": -0.04986700415611267, "rewards/margins": 1.6173715591430664, "rewards/rejected": -1.6672385931015015, "step": 210 }, { "epoch": 0.2533103051237766, "grad_norm": 33.42353087043008, "learning_rate": 4.650676758194623e-07, "logits/chosen": -3.289186477661133, "logits/rejected": -3.4233367443084717, "logps/chosen": -340.89410400390625, "logps/rejected": -531.8297729492188, "loss": 0.417, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3013092875480652, "rewards/margins": 2.0576224327087402, "rewards/rejected": -2.35893177986145, "step": 220 }, { "epoch": 0.26482440990213013, "grad_norm": 28.030706610514635, "learning_rate": 4.5976799775611215e-07, "logits/chosen": -3.4384427070617676, "logits/rejected": -3.6002049446105957, "logps/chosen": -357.27099609375, "logps/rejected": -521.6351318359375, "loss": 0.4404, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.060726016759872437, "rewards/margins": 2.054325580596924, "rewards/rejected": -2.115051746368408, "step": 230 }, { "epoch": 0.2763385146804836, "grad_norm": 30.164608033500873, "learning_rate": 4.5412894586271543e-07, "logits/chosen": -3.5104153156280518, "logits/rejected": -3.591907024383545, "logps/chosen": -341.6837463378906, "logps/rejected": -471.0796813964844, "loss": 0.4392, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.23911134898662567, "rewards/margins": 1.611322045326233, "rewards/rejected": -1.850433588027954, "step": 240 }, { "epoch": 0.28785261945883706, "grad_norm": 31.949435858685035, "learning_rate": 4.481596432975201e-07, "logits/chosen": -3.528832197189331, "logits/rejected": -3.651289463043213, "logps/chosen": -336.5597229003906, "logps/rejected": -484.8773498535156, "loss": 0.425, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": 0.026675838977098465, "rewards/margins": 1.7153713703155518, "rewards/rejected": -1.6886956691741943, "step": 250 }, { "epoch": 0.2993667242371906, "grad_norm": 27.939909687462926, "learning_rate": 4.41869747515886e-07, "logits/chosen": -3.489166736602783, "logits/rejected": -3.7278106212615967, "logps/chosen": -356.98907470703125, "logps/rejected": -521.9197387695312, "loss": 0.4148, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.07938538491725922, "rewards/margins": 2.32578706741333, "rewards/rejected": -2.24640154838562, "step": 260 }, { "epoch": 0.31088082901554404, "grad_norm": 34.336437982786, "learning_rate": 4.352694346459396e-07, "logits/chosen": -3.69819974899292, "logits/rejected": -3.856245756149292, "logps/chosen": -312.3550109863281, "logps/rejected": -512.3087768554688, "loss": 0.3868, "rewards/accuracies": 0.84375, "rewards/chosen": 0.007610364351421595, "rewards/margins": 2.3179588317871094, "rewards/rejected": -2.3103487491607666, "step": 270 }, { "epoch": 0.3223949337938975, "grad_norm": 31.93422033932675, "learning_rate": 4.2836938302509256e-07, "logits/chosen": -3.8322901725769043, "logits/rejected": -4.021459579467773, "logps/chosen": -364.43157958984375, "logps/rejected": -556.7454223632812, "loss": 0.3795, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.3510279357433319, "rewards/margins": 2.118349075317383, "rewards/rejected": -2.469377040863037, "step": 280 }, { "epoch": 0.333909038572251, "grad_norm": 43.67643614347539, "learning_rate": 4.2118075592405874e-07, "logits/chosen": -4.014069080352783, "logits/rejected": -4.166284561157227, "logps/chosen": -366.17498779296875, "logps/rejected": -511.95806884765625, "loss": 0.4028, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3753136992454529, "rewards/margins": 1.9316318035125732, "rewards/rejected": -2.306945562362671, "step": 290 }, { "epoch": 0.3454231433506045, "grad_norm": 33.05155256360138, "learning_rate": 4.137151834863213e-07, "logits/chosen": -3.932748794555664, "logits/rejected": -4.1272077560424805, "logps/chosen": -338.482666015625, "logps/rejected": -491.4756774902344, "loss": 0.4144, "rewards/accuracies": 0.78125, "rewards/chosen": -0.12368359416723251, "rewards/margins": 1.6778045892715454, "rewards/rejected": -1.8014881610870361, "step": 300 }, { "epoch": 0.3454231433506045, "eval_logits/chosen": -3.886050224304199, "eval_logits/rejected": -4.0962815284729, "eval_logps/chosen": -410.2217712402344, "eval_logps/rejected": -499.97003173828125, "eval_loss": 0.45044589042663574, "eval_rewards/accuracies": 0.77734375, "eval_rewards/chosen": -0.3084433674812317, "eval_rewards/margins": 0.9651419520378113, "eval_rewards/rejected": -1.273585319519043, "eval_runtime": 99.0297, "eval_samples_per_second": 20.196, "eval_steps_per_second": 0.323, "step": 300 }, { "epoch": 0.356937248128958, "grad_norm": 30.758950038626843, "learning_rate": 4.059847439122671e-07, "logits/chosen": -4.072343826293945, "logits/rejected": -4.278454780578613, "logps/chosen": -332.38323974609375, "logps/rejected": -486.20587158203125, "loss": 0.4126, "rewards/accuracies": 0.768750011920929, "rewards/chosen": 0.11183549463748932, "rewards/margins": 1.9423106908798218, "rewards/rejected": -1.8304752111434937, "step": 310 }, { "epoch": 0.36845135290731146, "grad_norm": 35.899670349090925, "learning_rate": 3.98001943918432e-07, "logits/chosen": -4.233328819274902, "logits/rejected": -4.456056594848633, "logps/chosen": -370.2253723144531, "logps/rejected": -577.809814453125, "loss": 0.3732, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.1710590422153473, "rewards/margins": 2.226891279220581, "rewards/rejected": -2.3979504108428955, "step": 320 }, { "epoch": 0.3799654576856649, "grad_norm": 31.506974249108822, "learning_rate": 3.8977969850346866e-07, "logits/chosen": -4.291365146636963, "logits/rejected": -4.589537143707275, "logps/chosen": -402.2667541503906, "logps/rejected": -580.32080078125, "loss": 0.4158, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.40963658690452576, "rewards/margins": 2.1939713954925537, "rewards/rejected": -2.6036081314086914, "step": 330 }, { "epoch": 0.39147956246401844, "grad_norm": 42.312479747132286, "learning_rate": 3.8133131005357465e-07, "logits/chosen": -4.51456356048584, "logits/rejected": -4.711074352264404, "logps/chosen": -356.7383117675781, "logps/rejected": -599.3222045898438, "loss": 0.3868, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3934357762336731, "rewards/margins": 2.4568190574645996, "rewards/rejected": -2.850255012512207, "step": 340 }, { "epoch": 0.4029936672423719, "grad_norm": 34.94322397599626, "learning_rate": 3.7267044682118435e-07, "logits/chosen": -4.381545066833496, "logits/rejected": -4.7945661544799805, "logps/chosen": -396.62408447265625, "logps/rejected": -617.2008666992188, "loss": 0.3886, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.23957356810569763, "rewards/margins": 2.6808698177337646, "rewards/rejected": -2.920443296432495, "step": 350 }, { "epoch": 0.41450777202072536, "grad_norm": 35.153895155661694, "learning_rate": 3.638111208117425e-07, "logits/chosen": -4.376262664794922, "logits/rejected": -4.689536094665527, "logps/chosen": -387.55474853515625, "logps/rejected": -586.8858642578125, "loss": 0.4037, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.23621347546577454, "rewards/margins": 2.256948232650757, "rewards/rejected": -2.493161678314209, "step": 360 }, { "epoch": 0.4260218767990789, "grad_norm": 30.56527510711544, "learning_rate": 3.5476766511433605e-07, "logits/chosen": -4.566588878631592, "logits/rejected": -4.897808074951172, "logps/chosen": -381.00604248046875, "logps/rejected": -585.059814453125, "loss": 0.3902, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14318397641181946, "rewards/margins": 2.517329692840576, "rewards/rejected": -2.6605141162872314, "step": 370 }, { "epoch": 0.43753598157743234, "grad_norm": 34.017679923693805, "learning_rate": 3.455547107128602e-07, "logits/chosen": -4.60725736618042, "logits/rejected": -5.102498531341553, "logps/chosen": -385.83770751953125, "logps/rejected": -623.3347778320312, "loss": 0.3929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5362241268157959, "rewards/margins": 2.6802401542663574, "rewards/rejected": -3.2164645195007324, "step": 380 }, { "epoch": 0.44905008635578586, "grad_norm": 33.15867623899776, "learning_rate": 3.361871628152338e-07, "logits/chosen": -4.563677787780762, "logits/rejected": -4.989599227905273, "logps/chosen": -367.84814453125, "logps/rejected": -567.6351318359375, "loss": 0.4213, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.3700157105922699, "rewards/margins": 2.4626548290252686, "rewards/rejected": -2.8326706886291504, "step": 390 }, { "epoch": 0.4605641911341393, "grad_norm": 35.10207305823101, "learning_rate": 3.2668017673896077e-07, "logits/chosen": -4.686192035675049, "logits/rejected": -5.130132675170898, "logps/chosen": -351.6319885253906, "logps/rejected": -523.5940551757812, "loss": 0.4011, "rewards/accuracies": 0.78125, "rewards/chosen": -0.2101125717163086, "rewards/margins": 2.3180549144744873, "rewards/rejected": -2.528167247772217, "step": 400 }, { "epoch": 0.4605641911341393, "eval_logits/chosen": -4.5018205642700195, "eval_logits/rejected": -4.837046146392822, "eval_logps/chosen": -421.8441162109375, "eval_logps/rejected": -525.9361572265625, "eval_loss": 0.4135480225086212, "eval_rewards/accuracies": 0.80859375, "eval_rewards/chosen": -0.42466747760772705, "eval_rewards/margins": 1.1085797548294067, "eval_rewards/rejected": -1.5332471132278442, "eval_runtime": 98.3292, "eval_samples_per_second": 20.34, "eval_steps_per_second": 0.325, "step": 400 }, { "epoch": 0.4720782959124928, "grad_norm": 33.086992992339596, "learning_rate": 3.1704913339205103e-07, "logits/chosen": -4.71237850189209, "logits/rejected": -5.09951639175415, "logps/chosen": -392.43292236328125, "logps/rejected": -596.8004150390625, "loss": 0.3894, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.45191723108291626, "rewards/margins": 2.4984166622161865, "rewards/rejected": -2.950334072113037, "step": 410 }, { "epoch": 0.4835924006908463, "grad_norm": 36.9499485623677, "learning_rate": 3.0730961438896885e-07, "logits/chosen": -4.71737003326416, "logits/rejected": -5.089630603790283, "logps/chosen": -371.7138977050781, "logps/rejected": -539.5205078125, "loss": 0.3986, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.6353754997253418, "rewards/margins": 1.956162452697754, "rewards/rejected": -2.591538190841675, "step": 420 }, { "epoch": 0.49510650546919976, "grad_norm": 28.416064555595714, "learning_rate": 2.9747737684186795e-07, "logits/chosen": -4.5956220626831055, "logits/rejected": -5.009639263153076, "logps/chosen": -388.5729064941406, "logps/rejected": -566.389892578125, "loss": 0.3953, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5186115503311157, "rewards/margins": 2.118881940841675, "rewards/rejected": -2.63749361038208, "step": 430 }, { "epoch": 0.5066206102475532, "grad_norm": 35.02068361332514, "learning_rate": 2.8756832786789663e-07, "logits/chosen": -4.5723748207092285, "logits/rejected": -5.229958534240723, "logps/chosen": -344.8235778808594, "logps/rejected": -562.1149291992188, "loss": 0.3753, "rewards/accuracies": 0.84375, "rewards/chosen": -0.18356148898601532, "rewards/margins": 2.6801793575286865, "rewards/rejected": -2.863740921020508, "step": 440 }, { "epoch": 0.5181347150259067, "grad_norm": 29.90766637224572, "learning_rate": 2.7759849885381747e-07, "logits/chosen": -4.58120059967041, "logits/rejected": -5.108014106750488, "logps/chosen": -380.8218688964844, "logps/rejected": -558.5294189453125, "loss": 0.395, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.36003825068473816, "rewards/margins": 2.234218120574951, "rewards/rejected": -2.594256639480591, "step": 450 }, { "epoch": 0.5296488198042603, "grad_norm": 43.539308942722826, "learning_rate": 2.675840195195762e-07, "logits/chosen": -4.849000453948975, "logits/rejected": -5.308794975280762, "logps/chosen": -353.55523681640625, "logps/rejected": -619.9716796875, "loss": 0.3685, "rewards/accuracies": 0.84375, "rewards/chosen": -0.29138100147247314, "rewards/margins": 2.825038433074951, "rewards/rejected": -3.116419553756714, "step": 460 }, { "epoch": 0.5411629245826137, "grad_norm": 33.774855687056665, "learning_rate": 2.575410918227829e-07, "logits/chosen": -4.863161087036133, "logits/rejected": -5.457709312438965, "logps/chosen": -411.6463317871094, "logps/rejected": -598.97314453125, "loss": 0.3821, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.46561044454574585, "rewards/margins": 2.4459636211395264, "rewards/rejected": -2.911574602127075, "step": 470 }, { "epoch": 0.5526770293609672, "grad_norm": 33.53580470090372, "learning_rate": 2.474859637463226e-07, "logits/chosen": -5.079291343688965, "logits/rejected": -5.424225807189941, "logps/chosen": -389.027099609375, "logps/rejected": -587.9437255859375, "loss": 0.3962, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.4632614254951477, "rewards/margins": 2.3001296520233154, "rewards/rejected": -2.7633910179138184, "step": 480 }, { "epoch": 0.5641911341393206, "grad_norm": 32.1453411001328, "learning_rate": 2.3743490301150355e-07, "logits/chosen": -5.007067680358887, "logits/rejected": -5.361691474914551, "logps/chosen": -343.4484558105469, "logps/rejected": -570.6577758789062, "loss": 0.3902, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14810001850128174, "rewards/margins": 2.4624667167663574, "rewards/rejected": -2.6105666160583496, "step": 490 }, { "epoch": 0.5757052389176741, "grad_norm": 32.90845084744282, "learning_rate": 2.274041707592724e-07, "logits/chosen": -4.921438694000244, "logits/rejected": -5.355481147766113, "logps/chosen": -339.01129150390625, "logps/rejected": -556.4103393554688, "loss": 0.3915, "rewards/accuracies": 0.78125, "rewards/chosen": -0.14777924120426178, "rewards/margins": 2.432879686355591, "rewards/rejected": -2.5806591510772705, "step": 500 }, { "epoch": 0.5757052389176741, "eval_logits/chosen": -4.767510890960693, "eval_logits/rejected": -5.187655925750732, "eval_logps/chosen": -418.29376220703125, "eval_logps/rejected": -544.0393676757812, "eval_loss": 0.37398749589920044, "eval_rewards/accuracies": 0.8515625, "eval_rewards/chosen": -0.389164000749588, "eval_rewards/margins": 1.3251150846481323, "eval_rewards/rejected": -1.7142791748046875, "eval_runtime": 98.0381, "eval_samples_per_second": 20.4, "eval_steps_per_second": 0.326, "step": 500 }, { "epoch": 0.5872193436960277, "grad_norm": 31.42761305876207, "learning_rate": 2.17409995242075e-07, "logits/chosen": -5.038609504699707, "logits/rejected": -5.722345352172852, "logps/chosen": -372.905517578125, "logps/rejected": -569.4352416992188, "loss": 0.376, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.27033573389053345, "rewards/margins": 2.4031970500946045, "rewards/rejected": -2.6735329627990723, "step": 510 }, { "epoch": 0.5987334484743811, "grad_norm": 29.61275457382243, "learning_rate": 2.0746854556892544e-07, "logits/chosen": -5.438863754272461, "logits/rejected": -5.798094749450684, "logps/chosen": -407.27008056640625, "logps/rejected": -620.6509399414062, "loss": 0.3645, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.43467459082603455, "rewards/margins": 2.4455971717834473, "rewards/rejected": -2.8802719116210938, "step": 520 }, { "epoch": 0.6102475532527346, "grad_norm": 27.24117353879226, "learning_rate": 1.9759590554616173e-07, "logits/chosen": -5.715832710266113, "logits/rejected": -6.058187961578369, "logps/chosen": -397.95849609375, "logps/rejected": -609.6741943359375, "loss": 0.3968, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6830942034721375, "rewards/margins": 2.4185569286346436, "rewards/rejected": -3.101651191711426, "step": 530 }, { "epoch": 0.6217616580310881, "grad_norm": 30.859422948077256, "learning_rate": 1.8780804765620746e-07, "logits/chosen": -5.4331769943237305, "logits/rejected": -5.7857160568237305, "logps/chosen": -373.3824462890625, "logps/rejected": -528.5029296875, "loss": 0.4178, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4058764576911926, "rewards/margins": 1.9241279363632202, "rewards/rejected": -2.3300044536590576, "step": 540 }, { "epoch": 0.6332757628094415, "grad_norm": 35.78902948656132, "learning_rate": 1.7812080721643973e-07, "logits/chosen": -5.20429801940918, "logits/rejected": -5.622688293457031, "logps/chosen": -401.1048889160156, "logps/rejected": -605.438232421875, "loss": 0.3956, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27011531591415405, "rewards/margins": 2.323632001876831, "rewards/rejected": -2.593747615814209, "step": 550 }, { "epoch": 0.644789867587795, "grad_norm": 31.09337668064834, "learning_rate": 1.6854985675997063e-07, "logits/chosen": -5.3274736404418945, "logits/rejected": -5.779025554656982, "logps/chosen": -370.87823486328125, "logps/rejected": -599.370361328125, "loss": 0.377, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.30361196398735046, "rewards/margins": 2.5692386627197266, "rewards/rejected": -2.8728506565093994, "step": 560 }, { "epoch": 0.6563039723661486, "grad_norm": 31.49748801480019, "learning_rate": 1.5911068067978818e-07, "logits/chosen": -5.422667503356934, "logits/rejected": -5.991160869598389, "logps/chosen": -363.42791748046875, "logps/rejected": -606.8687744140625, "loss": 0.3651, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.3893515467643738, "rewards/margins": 2.7044646739959717, "rewards/rejected": -3.093816041946411, "step": 570 }, { "epoch": 0.667818077144502, "grad_norm": 40.80686884426901, "learning_rate": 1.4981855017728197e-07, "logits/chosen": -5.2194623947143555, "logits/rejected": -5.8604302406311035, "logps/chosen": -378.5892028808594, "logps/rejected": -623.4224853515625, "loss": 0.3681, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.4009127616882324, "rewards/margins": 2.839203357696533, "rewards/rejected": -3.2401161193847656, "step": 580 }, { "epoch": 0.6793321819228555, "grad_norm": 35.637123676945, "learning_rate": 1.406884985556804e-07, "logits/chosen": -5.340333461761475, "logits/rejected": -5.9213457107543945, "logps/chosen": -366.98126220703125, "logps/rejected": -646.6055297851562, "loss": 0.3892, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3502456843852997, "rewards/margins": 3.1350584030151367, "rewards/rejected": -3.4853038787841797, "step": 590 }, { "epoch": 0.690846286701209, "grad_norm": 38.133176182262396, "learning_rate": 1.3173529689837354e-07, "logits/chosen": -5.227208137512207, "logits/rejected": -5.730982780456543, "logps/chosen": -406.6194152832031, "logps/rejected": -642.0016479492188, "loss": 0.3726, "rewards/accuracies": 0.8125, "rewards/chosen": -0.19344040751457214, "rewards/margins": 2.756740093231201, "rewards/rejected": -2.9501805305480957, "step": 600 }, { "epoch": 0.690846286701209, "eval_logits/chosen": -5.146116256713867, "eval_logits/rejected": -5.624752044677734, "eval_logps/chosen": -427.4439392089844, "eval_logps/rejected": -561.528564453125, "eval_loss": 0.3467547297477722, "eval_rewards/accuracies": 0.84375, "eval_rewards/chosen": -0.4806651175022125, "eval_rewards/margins": 1.408505916595459, "eval_rewards/rejected": -1.8891710042953491, "eval_runtime": 98.3003, "eval_samples_per_second": 20.346, "eval_steps_per_second": 0.326, "step": 600 }, { "epoch": 0.7023603914795624, "grad_norm": 35.76369238749813, "learning_rate": 1.2297343017146726e-07, "logits/chosen": -5.63295316696167, "logits/rejected": -6.0680012702941895, "logps/chosen": -352.22650146484375, "logps/rejected": -569.6236572265625, "loss": 0.3654, "rewards/accuracies": 0.8125, "rewards/chosen": -0.400468111038208, "rewards/margins": 2.252286672592163, "rewards/rejected": -2.65275502204895, "step": 610 }, { "epoch": 0.713874496257916, "grad_norm": 42.53908245265289, "learning_rate": 1.1441707378923474e-07, "logits/chosen": -5.555817604064941, "logits/rejected": -5.891648292541504, "logps/chosen": -372.3026123046875, "logps/rejected": -608.4457397460938, "loss": 0.3719, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.5105666518211365, "rewards/margins": 2.334003448486328, "rewards/rejected": -2.844569683074951, "step": 620 }, { "epoch": 0.7253886010362695, "grad_norm": 33.40462593975916, "learning_rate": 1.06080070680377e-07, "logits/chosen": -5.389917850494385, "logits/rejected": -5.883559226989746, "logps/chosen": -380.6363525390625, "logps/rejected": -589.5970458984375, "loss": 0.3608, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4320860803127289, "rewards/margins": 2.423119068145752, "rewards/rejected": -2.8552052974700928, "step": 630 }, { "epoch": 0.7369027058146229, "grad_norm": 40.31781331240861, "learning_rate": 9.797590889219587e-08, "logits/chosen": -5.418898582458496, "logits/rejected": -6.029601097106934, "logps/chosen": -331.7992248535156, "logps/rejected": -644.7623291015625, "loss": 0.4071, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.26965656876564026, "rewards/margins": 3.317509174346924, "rewards/rejected": -3.5871658325195312, "step": 640 }, { "epoch": 0.7484168105929764, "grad_norm": 30.964195430126203, "learning_rate": 9.011769976891367e-08, "logits/chosen": -5.33644962310791, "logits/rejected": -5.905170440673828, "logps/chosen": -370.828369140625, "logps/rejected": -630.619140625, "loss": 0.3809, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.340393990278244, "rewards/margins": 2.9275107383728027, "rewards/rejected": -3.267904758453369, "step": 650 }, { "epoch": 0.7599309153713298, "grad_norm": 34.09027033994428, "learning_rate": 8.251815673944218e-08, "logits/chosen": -5.566973686218262, "logits/rejected": -5.901907444000244, "logps/chosen": -373.8709411621094, "logps/rejected": -626.88720703125, "loss": 0.3664, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31639060378074646, "rewards/margins": 2.5317635536193848, "rewards/rejected": -2.848154067993164, "step": 660 }, { "epoch": 0.7714450201496834, "grad_norm": 33.748663190230474, "learning_rate": 7.518957474892148e-08, "logits/chosen": -5.544904708862305, "logits/rejected": -6.055120468139648, "logps/chosen": -366.33306884765625, "logps/rejected": -662.8927001953125, "loss": 0.3675, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.4155319333076477, "rewards/margins": 3.206387758255005, "rewards/rejected": -3.621919631958008, "step": 670 }, { "epoch": 0.7829591249280369, "grad_norm": 33.43366335799461, "learning_rate": 6.814381036730274e-08, "logits/chosen": -5.3579840660095215, "logits/rejected": -5.930968284606934, "logps/chosen": -384.45245361328125, "logps/rejected": -620.3960571289062, "loss": 0.3748, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3938636779785156, "rewards/margins": 2.738201856613159, "rewards/rejected": -3.132065773010254, "step": 680 }, { "epoch": 0.7944732297063903, "grad_norm": 31.210525154632403, "learning_rate": 6.139226260715872e-08, "logits/chosen": -5.434956073760986, "logits/rejected": -5.966610908508301, "logps/chosen": -387.60162353515625, "logps/rejected": -664.8744506835938, "loss": 0.355, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4180675446987152, "rewards/margins": 2.967360019683838, "rewards/rejected": -3.385427474975586, "step": 690 }, { "epoch": 0.8059873344847438, "grad_norm": 33.963445753535076, "learning_rate": 5.4945854481754734e-08, "logits/chosen": -5.527676105499268, "logits/rejected": -5.960885047912598, "logps/chosen": -374.95916748046875, "logps/rejected": -630.1693725585938, "loss": 0.3522, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3166103959083557, "rewards/margins": 2.8152191638946533, "rewards/rejected": -3.1318297386169434, "step": 700 }, { "epoch": 0.8059873344847438, "eval_logits/chosen": -5.210726261138916, "eval_logits/rejected": -5.681924343109131, "eval_logps/chosen": -433.6905517578125, "eval_logps/rejected": -577.3692016601562, "eval_loss": 0.32489100098609924, "eval_rewards/accuracies": 0.87890625, "eval_rewards/chosen": -0.5431313514709473, "eval_rewards/margins": 1.5044457912445068, "eval_rewards/rejected": -2.047577142715454, "eval_runtime": 98.0334, "eval_samples_per_second": 20.401, "eval_steps_per_second": 0.326, "step": 700 }, { "epoch": 0.8175014392630973, "grad_norm": 32.382102785679976, "learning_rate": 4.881501533321605e-08, "logits/chosen": -5.631700038909912, "logits/rejected": -6.175845146179199, "logps/chosen": -364.59674072265625, "logps/rejected": -615.4799194335938, "loss": 0.3861, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4184879660606384, "rewards/margins": 2.884592294692993, "rewards/rejected": -3.3030803203582764, "step": 710 }, { "epoch": 0.8290155440414507, "grad_norm": 29.844564520231344, "learning_rate": 4.300966395938377e-08, "logits/chosen": -5.579652309417725, "logits/rejected": -6.021969795227051, "logps/chosen": -410.3070373535156, "logps/rejected": -654.1072387695312, "loss": 0.3805, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.40225619077682495, "rewards/margins": 2.8050906658172607, "rewards/rejected": -3.2073471546173096, "step": 720 }, { "epoch": 0.8405296488198043, "grad_norm": 34.64605949847163, "learning_rate": 3.7539192566655246e-08, "logits/chosen": -5.749828338623047, "logits/rejected": -6.230714321136475, "logps/chosen": -372.4962463378906, "logps/rejected": -620.4830932617188, "loss": 0.3701, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.3709852397441864, "rewards/margins": 2.7844834327697754, "rewards/rejected": -3.155468702316284, "step": 730 }, { "epoch": 0.8520437535981578, "grad_norm": 38.917435902608844, "learning_rate": 3.24124515747731e-08, "logits/chosen": -5.770384311676025, "logits/rejected": -6.440248966217041, "logps/chosen": -377.38360595703125, "logps/rejected": -670.9470825195312, "loss": 0.3725, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.46737533807754517, "rewards/margins": 3.3466858863830566, "rewards/rejected": -3.814060926437378, "step": 740 }, { "epoch": 0.8635578583765112, "grad_norm": 35.39576347923302, "learning_rate": 2.763773529814506e-08, "logits/chosen": -5.80182409286499, "logits/rejected": -6.183619976043701, "logps/chosen": -363.37359619140625, "logps/rejected": -643.1031494140625, "loss": 0.3736, "rewards/accuracies": 0.78125, "rewards/chosen": -0.513085663318634, "rewards/margins": 2.7367725372314453, "rewards/rejected": -3.2498581409454346, "step": 750 }, { "epoch": 0.8750719631548647, "grad_norm": 35.82536365897154, "learning_rate": 2.3222768526860698e-08, "logits/chosen": -5.800836563110352, "logits/rejected": -6.234482288360596, "logps/chosen": -365.31903076171875, "logps/rejected": -579.0399169921875, "loss": 0.3663, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.5012763738632202, "rewards/margins": 2.1673426628112793, "rewards/rejected": -2.668619394302368, "step": 760 }, { "epoch": 0.8865860679332181, "grad_norm": 37.880330092886545, "learning_rate": 1.9174694029115146e-08, "logits/chosen": -5.784181594848633, "logits/rejected": -6.484677314758301, "logps/chosen": -376.74908447265625, "logps/rejected": -637.3211059570312, "loss": 0.38, "rewards/accuracies": 0.875, "rewards/chosen": -0.3697873055934906, "rewards/margins": 3.116102933883667, "rewards/rejected": -3.4858901500701904, "step": 770 }, { "epoch": 0.8981001727115717, "grad_norm": 37.173154353795034, "learning_rate": 1.5500060995258134e-08, "logits/chosen": -5.590546607971191, "logits/rejected": -6.252056121826172, "logps/chosen": -404.06219482421875, "logps/rejected": -671.0790405273438, "loss": 0.3644, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.4821314811706543, "rewards/margins": 2.973552703857422, "rewards/rejected": -3.455684185028076, "step": 780 }, { "epoch": 0.9096142774899252, "grad_norm": 38.483209821819536, "learning_rate": 1.2204814442165812e-08, "logits/chosen": -5.847277641296387, "logits/rejected": -6.545414924621582, "logps/chosen": -402.4599609375, "logps/rejected": -618.3992309570312, "loss": 0.3744, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5441657900810242, "rewards/margins": 2.8156542778015137, "rewards/rejected": -3.3598198890686035, "step": 790 }, { "epoch": 0.9211283822682786, "grad_norm": 36.88952100776894, "learning_rate": 9.294285595075669e-09, "logits/chosen": -5.882547378540039, "logits/rejected": -6.232880115509033, "logps/chosen": -359.8563537597656, "logps/rejected": -655.06787109375, "loss": 0.3643, "rewards/accuracies": 0.84375, "rewards/chosen": -0.496969074010849, "rewards/margins": 2.759918689727783, "rewards/rejected": -3.256887912750244, "step": 800 }, { "epoch": 0.9211283822682786, "eval_logits/chosen": -5.403136253356934, "eval_logits/rejected": -5.885165214538574, "eval_logps/chosen": -439.6992492675781, "eval_logps/rejected": -584.2129516601562, "eval_loss": 0.31831786036491394, "eval_rewards/accuracies": 0.87109375, "eval_rewards/chosen": -0.6032183170318604, "eval_rewards/margins": 1.5127967596054077, "eval_rewards/rejected": -2.1160147190093994, "eval_runtime": 98.1126, "eval_samples_per_second": 20.385, "eval_steps_per_second": 0.326, "step": 800 }, { "epoch": 0.9326424870466321, "grad_norm": 43.94120514478602, "learning_rate": 6.773183262446914e-09, "logits/chosen": -5.6489362716674805, "logits/rejected": -6.28032112121582, "logps/chosen": -353.1646423339844, "logps/rejected": -609.9522705078125, "loss": 0.3848, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40985745191574097, "rewards/margins": 2.7903153896331787, "rewards/rejected": -3.2001731395721436, "step": 810 }, { "epoch": 0.9441565918249856, "grad_norm": 33.525448706821926, "learning_rate": 4.645586217799452e-09, "logits/chosen": -5.750053882598877, "logits/rejected": -6.382951259613037, "logps/chosen": -408.31915283203125, "logps/rejected": -624.9613037109375, "loss": 0.3682, "rewards/accuracies": 0.8125, "rewards/chosen": -0.44873589277267456, "rewards/margins": 2.5182459354400635, "rewards/rejected": -2.966981887817383, "step": 820 }, { "epoch": 0.9556706966033391, "grad_norm": 32.59312352646331, "learning_rate": 2.9149366008568987e-09, "logits/chosen": -5.68507194519043, "logits/rejected": -6.2285284996032715, "logps/chosen": -345.0586853027344, "logps/rejected": -635.7188720703125, "loss": 0.3761, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.39172735810279846, "rewards/margins": 2.9998083114624023, "rewards/rejected": -3.391535520553589, "step": 830 }, { "epoch": 0.9671848013816926, "grad_norm": 37.49243505993372, "learning_rate": 1.5840343486700215e-09, "logits/chosen": -5.730424404144287, "logits/rejected": -6.221343040466309, "logps/chosen": -356.298583984375, "logps/rejected": -621.7361450195312, "loss": 0.3928, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.4219423830509186, "rewards/margins": 2.8504931926727295, "rewards/rejected": -3.272435426712036, "step": 840 }, { "epoch": 0.9786989061600461, "grad_norm": 33.08948980944996, "learning_rate": 6.550326657293881e-10, "logits/chosen": -5.9162678718566895, "logits/rejected": -6.479850769042969, "logps/chosen": -360.3614196777344, "logps/rejected": -608.4212646484375, "loss": 0.3596, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.3865709900856018, "rewards/margins": 2.8733856678009033, "rewards/rejected": -3.2599568367004395, "step": 850 }, { "epoch": 0.9902130109383995, "grad_norm": 33.68247028780298, "learning_rate": 1.2943454039654467e-10, "logits/chosen": -5.6706414222717285, "logits/rejected": -6.1612443923950195, "logps/chosen": -388.79510498046875, "logps/rejected": -634.7048950195312, "loss": 0.3777, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.3972472846508026, "rewards/margins": 2.8383138179779053, "rewards/rejected": -3.2355613708496094, "step": 860 }, { "epoch": 0.9994242947610823, "step": 868, "total_flos": 0.0, "train_loss": 0.4218231642850533, "train_runtime": 14967.0092, "train_samples_per_second": 7.425, "train_steps_per_second": 0.058 } ], "logging_steps": 10, "max_steps": 868, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }