|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994242947610823, |
|
"eval_steps": 100, |
|
"global_step": 868, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011514104778353484, |
|
"grad_norm": 35.91765211885503, |
|
"learning_rate": 5.747126436781609e-09, |
|
"logits/chosen": -2.086653709411621, |
|
"logits/rejected": -2.069509267807007, |
|
"logps/chosen": -361.22979736328125, |
|
"logps/rejected": -328.4201354980469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011514104778353483, |
|
"grad_norm": 37.62574042925476, |
|
"learning_rate": 5.747126436781609e-08, |
|
"logits/chosen": -2.192697763442993, |
|
"logits/rejected": -2.1893699169158936, |
|
"logps/chosen": -346.8982238769531, |
|
"logps/rejected": -305.4053039550781, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4652777910232544, |
|
"rewards/chosen": 0.00022573958267457783, |
|
"rewards/margins": 0.00043605040991678834, |
|
"rewards/rejected": -0.00021031053620390594, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.023028209556706966, |
|
"grad_norm": 33.76619596156607, |
|
"learning_rate": 1.1494252873563217e-07, |
|
"logits/chosen": -2.170515775680542, |
|
"logits/rejected": -2.1960134506225586, |
|
"logps/chosen": -322.89593505859375, |
|
"logps/rejected": -279.732177734375, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.006018324755132198, |
|
"rewards/margins": 0.0009490437805652618, |
|
"rewards/rejected": 0.005069280508905649, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03454231433506045, |
|
"grad_norm": 36.02949439768653, |
|
"learning_rate": 1.7241379310344828e-07, |
|
"logits/chosen": -2.226337194442749, |
|
"logits/rejected": -2.215334415435791, |
|
"logps/chosen": -343.44012451171875, |
|
"logps/rejected": -305.6834411621094, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0371861457824707, |
|
"rewards/margins": 0.012388146482408047, |
|
"rewards/rejected": 0.02479800209403038, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04605641911341393, |
|
"grad_norm": 30.794242683432575, |
|
"learning_rate": 2.2988505747126435e-07, |
|
"logits/chosen": -2.3109958171844482, |
|
"logits/rejected": -2.272737979888916, |
|
"logps/chosen": -313.8249206542969, |
|
"logps/rejected": -281.3092956542969, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.10629389435052872, |
|
"rewards/margins": 0.035184551030397415, |
|
"rewards/rejected": 0.071109339594841, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.057570523891767415, |
|
"grad_norm": 29.832104382822315, |
|
"learning_rate": 2.873563218390804e-07, |
|
"logits/chosen": -2.4144537448883057, |
|
"logits/rejected": -2.4051060676574707, |
|
"logps/chosen": -335.85626220703125, |
|
"logps/rejected": -322.4024658203125, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.20595140755176544, |
|
"rewards/margins": 0.058795731514692307, |
|
"rewards/rejected": 0.14715565741062164, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0690846286701209, |
|
"grad_norm": 27.97699348851217, |
|
"learning_rate": 3.4482758620689656e-07, |
|
"logits/chosen": -2.4252865314483643, |
|
"logits/rejected": -2.4110381603240967, |
|
"logps/chosen": -293.0983581542969, |
|
"logps/rejected": -276.4584655761719, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.26091432571411133, |
|
"rewards/margins": 0.12072187662124634, |
|
"rewards/rejected": 0.140192449092865, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08059873344847437, |
|
"grad_norm": 26.14817360357517, |
|
"learning_rate": 4.0229885057471266e-07, |
|
"logits/chosen": -2.5252156257629395, |
|
"logits/rejected": -2.488867998123169, |
|
"logps/chosen": -341.91156005859375, |
|
"logps/rejected": -308.27032470703125, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.3610069155693054, |
|
"rewards/margins": 0.20518210530281067, |
|
"rewards/rejected": 0.15582481026649475, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09211283822682786, |
|
"grad_norm": 26.01503586020309, |
|
"learning_rate": 4.597701149425287e-07, |
|
"logits/chosen": -2.443207263946533, |
|
"logits/rejected": -2.4321365356445312, |
|
"logps/chosen": -303.1759948730469, |
|
"logps/rejected": -293.99212646484375, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.2370336949825287, |
|
"rewards/margins": 0.22374853491783142, |
|
"rewards/rejected": 0.013285147957503796, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.10362694300518134, |
|
"grad_norm": 28.597789728089687, |
|
"learning_rate": 4.999817969178237e-07, |
|
"logits/chosen": -2.468017578125, |
|
"logits/rejected": -2.45894718170166, |
|
"logps/chosen": -341.286376953125, |
|
"logps/rejected": -346.0598449707031, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.2997075915336609, |
|
"rewards/margins": 0.4598621726036072, |
|
"rewards/rejected": -0.16015461087226868, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11514104778353483, |
|
"grad_norm": 31.239635888342793, |
|
"learning_rate": 4.996582603056428e-07, |
|
"logits/chosen": -2.290760040283203, |
|
"logits/rejected": -2.2722649574279785, |
|
"logps/chosen": -325.2711181640625, |
|
"logps/rejected": -352.16949462890625, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0031170793808996677, |
|
"rewards/margins": 0.5678674578666687, |
|
"rewards/rejected": -0.5709845423698425, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11514104778353483, |
|
"eval_logits/chosen": -2.2212953567504883, |
|
"eval_logits/rejected": -2.1984219551086426, |
|
"eval_logps/chosen": -390.5766296386719, |
|
"eval_logps/rejected": -417.6701354980469, |
|
"eval_loss": 0.592314600944519, |
|
"eval_rewards/accuracies": 0.70703125, |
|
"eval_rewards/chosen": -0.11199207603931427, |
|
"eval_rewards/margins": 0.3385947644710541, |
|
"eval_rewards/rejected": -0.45058679580688477, |
|
"eval_runtime": 98.608, |
|
"eval_samples_per_second": 20.282, |
|
"eval_steps_per_second": 0.325, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1266551525618883, |
|
"grad_norm": 28.87850245767613, |
|
"learning_rate": 4.989308132738126e-07, |
|
"logits/chosen": -2.224853754043579, |
|
"logits/rejected": -2.1996631622314453, |
|
"logps/chosen": -334.91888427734375, |
|
"logps/rejected": -380.91668701171875, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.0493912398815155, |
|
"rewards/margins": 0.8100606203079224, |
|
"rewards/rejected": -0.7606694102287292, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1381692573402418, |
|
"grad_norm": 29.398659404338673, |
|
"learning_rate": 4.978006327248536e-07, |
|
"logits/chosen": -2.199742555618286, |
|
"logits/rejected": -2.1492202281951904, |
|
"logps/chosen": -314.296142578125, |
|
"logps/rejected": -369.991455078125, |
|
"loss": 0.4704, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.09014983475208282, |
|
"rewards/margins": 0.9132173657417297, |
|
"rewards/rejected": -0.8230674862861633, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1496833621185953, |
|
"grad_norm": 30.44019666597221, |
|
"learning_rate": 4.962695471250032e-07, |
|
"logits/chosen": -2.1790311336517334, |
|
"logits/rejected": -2.1547985076904297, |
|
"logps/chosen": -302.8690490722656, |
|
"logps/rejected": -415.23095703125, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.09897075593471527, |
|
"rewards/margins": 1.2424136400222778, |
|
"rewards/rejected": -1.1434428691864014, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16119746689694875, |
|
"grad_norm": 33.58601902040164, |
|
"learning_rate": 4.94340033546025e-07, |
|
"logits/chosen": -2.2502989768981934, |
|
"logits/rejected": -2.2536580562591553, |
|
"logps/chosen": -325.1845397949219, |
|
"logps/rejected": -431.7062072753906, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.23212842643260956, |
|
"rewards/margins": 1.2962288856506348, |
|
"rewards/rejected": -1.0641005039215088, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17271157167530224, |
|
"grad_norm": 32.120902840689595, |
|
"learning_rate": 4.920152136576705e-07, |
|
"logits/chosen": -2.44754958152771, |
|
"logits/rejected": -2.4280953407287598, |
|
"logps/chosen": -325.13916015625, |
|
"logps/rejected": -465.1835021972656, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.07414035499095917, |
|
"rewards/margins": 1.312412977218628, |
|
"rewards/rejected": -1.2382725477218628, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18422567645365573, |
|
"grad_norm": 27.142754060910285, |
|
"learning_rate": 4.892988486772756e-07, |
|
"logits/chosen": -2.7220418453216553, |
|
"logits/rejected": -2.731748342514038, |
|
"logps/chosen": -341.7224426269531, |
|
"logps/rejected": -451.0387268066406, |
|
"loss": 0.4331, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.20678754150867462, |
|
"rewards/margins": 1.3990733623504639, |
|
"rewards/rejected": -1.1922857761383057, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19573978123200922, |
|
"grad_norm": 29.918359187167102, |
|
"learning_rate": 4.861953332846629e-07, |
|
"logits/chosen": -2.796257495880127, |
|
"logits/rejected": -2.810292959213257, |
|
"logps/chosen": -360.57257080078125, |
|
"logps/rejected": -441.2469787597656, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.04986714571714401, |
|
"rewards/margins": 1.442570447921753, |
|
"rewards/rejected": -1.3927034139633179, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.20725388601036268, |
|
"grad_norm": 28.18581518610586, |
|
"learning_rate": 4.827096885121953e-07, |
|
"logits/chosen": -2.9461441040039062, |
|
"logits/rejected": -2.936654567718506, |
|
"logps/chosen": -342.01666259765625, |
|
"logps/rejected": -421.7103576660156, |
|
"loss": 0.435, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.026858195662498474, |
|
"rewards/margins": 1.3959574699401855, |
|
"rewards/rejected": -1.4228156805038452, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21876799078871617, |
|
"grad_norm": 35.53737142925795, |
|
"learning_rate": 4.788475536214821e-07, |
|
"logits/chosen": -3.022078275680542, |
|
"logits/rejected": -3.0052285194396973, |
|
"logps/chosen": -336.94830322265625, |
|
"logps/rejected": -493.62359619140625, |
|
"loss": 0.4228, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.03777497634291649, |
|
"rewards/margins": 1.5011249780654907, |
|
"rewards/rejected": -1.4633500576019287, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.23028209556706966, |
|
"grad_norm": 32.357788149040054, |
|
"learning_rate": 4.746151769798818e-07, |
|
"logits/chosen": -3.098576545715332, |
|
"logits/rejected": -3.122755527496338, |
|
"logps/chosen": -350.237060546875, |
|
"logps/rejected": -476.60345458984375, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.029024356976151466, |
|
"rewards/margins": 1.669870376586914, |
|
"rewards/rejected": -1.6408460140228271, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23028209556706966, |
|
"eval_logits/chosen": -3.164449691772461, |
|
"eval_logits/rejected": -3.2280213832855225, |
|
"eval_logps/chosen": -408.5089416503906, |
|
"eval_logps/rejected": -480.46405029296875, |
|
"eval_loss": 0.5054616928100586, |
|
"eval_rewards/accuracies": 0.80078125, |
|
"eval_rewards/chosen": -0.2913154363632202, |
|
"eval_rewards/margins": 0.7872099280357361, |
|
"eval_rewards/rejected": -1.078525424003601, |
|
"eval_runtime": 98.2744, |
|
"eval_samples_per_second": 20.351, |
|
"eval_steps_per_second": 0.326, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24179620034542315, |
|
"grad_norm": 33.674165033906036, |
|
"learning_rate": 4.7001940595156055e-07, |
|
"logits/chosen": -3.1950924396514893, |
|
"logits/rejected": -3.276893138885498, |
|
"logps/chosen": -364.2984313964844, |
|
"logps/rejected": -458.85418701171875, |
|
"loss": 0.4096, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.04986700415611267, |
|
"rewards/margins": 1.6173715591430664, |
|
"rewards/rejected": -1.6672385931015015, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2533103051237766, |
|
"grad_norm": 33.42353087043008, |
|
"learning_rate": 4.650676758194623e-07, |
|
"logits/chosen": -3.289186477661133, |
|
"logits/rejected": -3.4233367443084717, |
|
"logps/chosen": -340.89410400390625, |
|
"logps/rejected": -531.8297729492188, |
|
"loss": 0.417, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3013092875480652, |
|
"rewards/margins": 2.0576224327087402, |
|
"rewards/rejected": -2.35893177986145, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.26482440990213013, |
|
"grad_norm": 28.030706610514635, |
|
"learning_rate": 4.5976799775611215e-07, |
|
"logits/chosen": -3.4384427070617676, |
|
"logits/rejected": -3.6002049446105957, |
|
"logps/chosen": -357.27099609375, |
|
"logps/rejected": -521.6351318359375, |
|
"loss": 0.4404, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.060726016759872437, |
|
"rewards/margins": 2.054325580596924, |
|
"rewards/rejected": -2.115051746368408, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2763385146804836, |
|
"grad_norm": 30.164608033500873, |
|
"learning_rate": 4.5412894586271543e-07, |
|
"logits/chosen": -3.5104153156280518, |
|
"logits/rejected": -3.591907024383545, |
|
"logps/chosen": -341.6837463378906, |
|
"logps/rejected": -471.0796813964844, |
|
"loss": 0.4392, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.23911134898662567, |
|
"rewards/margins": 1.611322045326233, |
|
"rewards/rejected": -1.850433588027954, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.28785261945883706, |
|
"grad_norm": 31.949435858685035, |
|
"learning_rate": 4.481596432975201e-07, |
|
"logits/chosen": -3.528832197189331, |
|
"logits/rejected": -3.651289463043213, |
|
"logps/chosen": -336.5597229003906, |
|
"logps/rejected": -484.8773498535156, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.026675838977098465, |
|
"rewards/margins": 1.7153713703155518, |
|
"rewards/rejected": -1.6886956691741943, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2993667242371906, |
|
"grad_norm": 27.939909687462926, |
|
"learning_rate": 4.41869747515886e-07, |
|
"logits/chosen": -3.489166736602783, |
|
"logits/rejected": -3.7278106212615967, |
|
"logps/chosen": -356.98907470703125, |
|
"logps/rejected": -521.9197387695312, |
|
"loss": 0.4148, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.07938538491725922, |
|
"rewards/margins": 2.32578706741333, |
|
"rewards/rejected": -2.24640154838562, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.31088082901554404, |
|
"grad_norm": 34.336437982786, |
|
"learning_rate": 4.352694346459396e-07, |
|
"logits/chosen": -3.69819974899292, |
|
"logits/rejected": -3.856245756149292, |
|
"logps/chosen": -312.3550109863281, |
|
"logps/rejected": -512.3087768554688, |
|
"loss": 0.3868, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.007610364351421595, |
|
"rewards/margins": 2.3179588317871094, |
|
"rewards/rejected": -2.3103487491607666, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3223949337938975, |
|
"grad_norm": 31.93422033932675, |
|
"learning_rate": 4.2836938302509256e-07, |
|
"logits/chosen": -3.8322901725769043, |
|
"logits/rejected": -4.021459579467773, |
|
"logps/chosen": -364.43157958984375, |
|
"logps/rejected": -556.7454223632812, |
|
"loss": 0.3795, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.3510279357433319, |
|
"rewards/margins": 2.118349075317383, |
|
"rewards/rejected": -2.469377040863037, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.333909038572251, |
|
"grad_norm": 43.67643614347539, |
|
"learning_rate": 4.2118075592405874e-07, |
|
"logits/chosen": -4.014069080352783, |
|
"logits/rejected": -4.166284561157227, |
|
"logps/chosen": -366.17498779296875, |
|
"logps/rejected": -511.95806884765625, |
|
"loss": 0.4028, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3753136992454529, |
|
"rewards/margins": 1.9316318035125732, |
|
"rewards/rejected": -2.306945562362671, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3454231433506045, |
|
"grad_norm": 33.05155256360138, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -3.932748794555664, |
|
"logits/rejected": -4.1272077560424805, |
|
"logps/chosen": -338.482666015625, |
|
"logps/rejected": -491.4756774902344, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.12368359416723251, |
|
"rewards/margins": 1.6778045892715454, |
|
"rewards/rejected": -1.8014881610870361, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3454231433506045, |
|
"eval_logits/chosen": -3.886050224304199, |
|
"eval_logits/rejected": -4.0962815284729, |
|
"eval_logps/chosen": -410.2217712402344, |
|
"eval_logps/rejected": -499.97003173828125, |
|
"eval_loss": 0.45044589042663574, |
|
"eval_rewards/accuracies": 0.77734375, |
|
"eval_rewards/chosen": -0.3084433674812317, |
|
"eval_rewards/margins": 0.9651419520378113, |
|
"eval_rewards/rejected": -1.273585319519043, |
|
"eval_runtime": 99.0297, |
|
"eval_samples_per_second": 20.196, |
|
"eval_steps_per_second": 0.323, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.356937248128958, |
|
"grad_norm": 30.758950038626843, |
|
"learning_rate": 4.059847439122671e-07, |
|
"logits/chosen": -4.072343826293945, |
|
"logits/rejected": -4.278454780578613, |
|
"logps/chosen": -332.38323974609375, |
|
"logps/rejected": -486.20587158203125, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.11183549463748932, |
|
"rewards/margins": 1.9423106908798218, |
|
"rewards/rejected": -1.8304752111434937, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.36845135290731146, |
|
"grad_norm": 35.899670349090925, |
|
"learning_rate": 3.98001943918432e-07, |
|
"logits/chosen": -4.233328819274902, |
|
"logits/rejected": -4.456056594848633, |
|
"logps/chosen": -370.2253723144531, |
|
"logps/rejected": -577.809814453125, |
|
"loss": 0.3732, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.1710590422153473, |
|
"rewards/margins": 2.226891279220581, |
|
"rewards/rejected": -2.3979504108428955, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3799654576856649, |
|
"grad_norm": 31.506974249108822, |
|
"learning_rate": 3.8977969850346866e-07, |
|
"logits/chosen": -4.291365146636963, |
|
"logits/rejected": -4.589537143707275, |
|
"logps/chosen": -402.2667541503906, |
|
"logps/rejected": -580.32080078125, |
|
"loss": 0.4158, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.40963658690452576, |
|
"rewards/margins": 2.1939713954925537, |
|
"rewards/rejected": -2.6036081314086914, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.39147956246401844, |
|
"grad_norm": 42.312479747132286, |
|
"learning_rate": 3.8133131005357465e-07, |
|
"logits/chosen": -4.51456356048584, |
|
"logits/rejected": -4.711074352264404, |
|
"logps/chosen": -356.7383117675781, |
|
"logps/rejected": -599.3222045898438, |
|
"loss": 0.3868, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3934357762336731, |
|
"rewards/margins": 2.4568190574645996, |
|
"rewards/rejected": -2.850255012512207, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4029936672423719, |
|
"grad_norm": 34.94322397599626, |
|
"learning_rate": 3.7267044682118435e-07, |
|
"logits/chosen": -4.381545066833496, |
|
"logits/rejected": -4.7945661544799805, |
|
"logps/chosen": -396.62408447265625, |
|
"logps/rejected": -617.2008666992188, |
|
"loss": 0.3886, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.23957356810569763, |
|
"rewards/margins": 2.6808698177337646, |
|
"rewards/rejected": -2.920443296432495, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.41450777202072536, |
|
"grad_norm": 35.153895155661694, |
|
"learning_rate": 3.638111208117425e-07, |
|
"logits/chosen": -4.376262664794922, |
|
"logits/rejected": -4.689536094665527, |
|
"logps/chosen": -387.55474853515625, |
|
"logps/rejected": -586.8858642578125, |
|
"loss": 0.4037, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.23621347546577454, |
|
"rewards/margins": 2.256948232650757, |
|
"rewards/rejected": -2.493161678314209, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4260218767990789, |
|
"grad_norm": 30.56527510711544, |
|
"learning_rate": 3.5476766511433605e-07, |
|
"logits/chosen": -4.566588878631592, |
|
"logits/rejected": -4.897808074951172, |
|
"logps/chosen": -381.00604248046875, |
|
"logps/rejected": -585.059814453125, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14318397641181946, |
|
"rewards/margins": 2.517329692840576, |
|
"rewards/rejected": -2.6605141162872314, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.43753598157743234, |
|
"grad_norm": 34.017679923693805, |
|
"learning_rate": 3.455547107128602e-07, |
|
"logits/chosen": -4.60725736618042, |
|
"logits/rejected": -5.102498531341553, |
|
"logps/chosen": -385.83770751953125, |
|
"logps/rejected": -623.3347778320312, |
|
"loss": 0.3929, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5362241268157959, |
|
"rewards/margins": 2.6802401542663574, |
|
"rewards/rejected": -3.2164645195007324, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.44905008635578586, |
|
"grad_norm": 33.15867623899776, |
|
"learning_rate": 3.361871628152338e-07, |
|
"logits/chosen": -4.563677787780762, |
|
"logits/rejected": -4.989599227905273, |
|
"logps/chosen": -367.84814453125, |
|
"logps/rejected": -567.6351318359375, |
|
"loss": 0.4213, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.3700157105922699, |
|
"rewards/margins": 2.4626548290252686, |
|
"rewards/rejected": -2.8326706886291504, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4605641911341393, |
|
"grad_norm": 35.10207305823101, |
|
"learning_rate": 3.2668017673896077e-07, |
|
"logits/chosen": -4.686192035675049, |
|
"logits/rejected": -5.130132675170898, |
|
"logps/chosen": -351.6319885253906, |
|
"logps/rejected": -523.5940551757812, |
|
"loss": 0.4011, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2101125717163086, |
|
"rewards/margins": 2.3180549144744873, |
|
"rewards/rejected": -2.528167247772217, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4605641911341393, |
|
"eval_logits/chosen": -4.5018205642700195, |
|
"eval_logits/rejected": -4.837046146392822, |
|
"eval_logps/chosen": -421.8441162109375, |
|
"eval_logps/rejected": -525.9361572265625, |
|
"eval_loss": 0.4135480225086212, |
|
"eval_rewards/accuracies": 0.80859375, |
|
"eval_rewards/chosen": -0.42466747760772705, |
|
"eval_rewards/margins": 1.1085797548294067, |
|
"eval_rewards/rejected": -1.5332471132278442, |
|
"eval_runtime": 98.3292, |
|
"eval_samples_per_second": 20.34, |
|
"eval_steps_per_second": 0.325, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4720782959124928, |
|
"grad_norm": 33.086992992339596, |
|
"learning_rate": 3.1704913339205103e-07, |
|
"logits/chosen": -4.71237850189209, |
|
"logits/rejected": -5.09951639175415, |
|
"logps/chosen": -392.43292236328125, |
|
"logps/rejected": -596.8004150390625, |
|
"loss": 0.3894, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.45191723108291626, |
|
"rewards/margins": 2.4984166622161865, |
|
"rewards/rejected": -2.950334072113037, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4835924006908463, |
|
"grad_norm": 36.9499485623677, |
|
"learning_rate": 3.0730961438896885e-07, |
|
"logits/chosen": -4.71737003326416, |
|
"logits/rejected": -5.089630603790283, |
|
"logps/chosen": -371.7138977050781, |
|
"logps/rejected": -539.5205078125, |
|
"loss": 0.3986, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6353754997253418, |
|
"rewards/margins": 1.956162452697754, |
|
"rewards/rejected": -2.591538190841675, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.49510650546919976, |
|
"grad_norm": 28.416064555595714, |
|
"learning_rate": 2.9747737684186795e-07, |
|
"logits/chosen": -4.5956220626831055, |
|
"logits/rejected": -5.009639263153076, |
|
"logps/chosen": -388.5729064941406, |
|
"logps/rejected": -566.389892578125, |
|
"loss": 0.3953, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5186115503311157, |
|
"rewards/margins": 2.118881940841675, |
|
"rewards/rejected": -2.63749361038208, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5066206102475532, |
|
"grad_norm": 35.02068361332514, |
|
"learning_rate": 2.8756832786789663e-07, |
|
"logits/chosen": -4.5723748207092285, |
|
"logits/rejected": -5.229958534240723, |
|
"logps/chosen": -344.8235778808594, |
|
"logps/rejected": -562.1149291992188, |
|
"loss": 0.3753, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.18356148898601532, |
|
"rewards/margins": 2.6801793575286865, |
|
"rewards/rejected": -2.863740921020508, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5181347150259067, |
|
"grad_norm": 29.90766637224572, |
|
"learning_rate": 2.7759849885381747e-07, |
|
"logits/chosen": -4.58120059967041, |
|
"logits/rejected": -5.108014106750488, |
|
"logps/chosen": -380.8218688964844, |
|
"logps/rejected": -558.5294189453125, |
|
"loss": 0.395, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.36003825068473816, |
|
"rewards/margins": 2.234218120574951, |
|
"rewards/rejected": -2.594256639480591, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5296488198042603, |
|
"grad_norm": 43.539308942722826, |
|
"learning_rate": 2.675840195195762e-07, |
|
"logits/chosen": -4.849000453948975, |
|
"logits/rejected": -5.308794975280762, |
|
"logps/chosen": -353.55523681640625, |
|
"logps/rejected": -619.9716796875, |
|
"loss": 0.3685, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.29138100147247314, |
|
"rewards/margins": 2.825038433074951, |
|
"rewards/rejected": -3.116419553756714, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5411629245826137, |
|
"grad_norm": 33.774855687056665, |
|
"learning_rate": 2.575410918227829e-07, |
|
"logits/chosen": -4.863161087036133, |
|
"logits/rejected": -5.457709312438965, |
|
"logps/chosen": -411.6463317871094, |
|
"logps/rejected": -598.97314453125, |
|
"loss": 0.3821, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.46561044454574585, |
|
"rewards/margins": 2.4459636211395264, |
|
"rewards/rejected": -2.911574602127075, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5526770293609672, |
|
"grad_norm": 33.53580470090372, |
|
"learning_rate": 2.474859637463226e-07, |
|
"logits/chosen": -5.079291343688965, |
|
"logits/rejected": -5.424225807189941, |
|
"logps/chosen": -389.027099609375, |
|
"logps/rejected": -587.9437255859375, |
|
"loss": 0.3962, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.4632614254951477, |
|
"rewards/margins": 2.3001296520233154, |
|
"rewards/rejected": -2.7633910179138184, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5641911341393206, |
|
"grad_norm": 32.1453411001328, |
|
"learning_rate": 2.3743490301150355e-07, |
|
"logits/chosen": -5.007067680358887, |
|
"logits/rejected": -5.361691474914551, |
|
"logps/chosen": -343.4484558105469, |
|
"logps/rejected": -570.6577758789062, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14810001850128174, |
|
"rewards/margins": 2.4624667167663574, |
|
"rewards/rejected": -2.6105666160583496, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5757052389176741, |
|
"grad_norm": 32.90845084744282, |
|
"learning_rate": 2.274041707592724e-07, |
|
"logits/chosen": -4.921438694000244, |
|
"logits/rejected": -5.355481147766113, |
|
"logps/chosen": -339.01129150390625, |
|
"logps/rejected": -556.4103393554688, |
|
"loss": 0.3915, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.14777924120426178, |
|
"rewards/margins": 2.432879686355591, |
|
"rewards/rejected": -2.5806591510772705, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5757052389176741, |
|
"eval_logits/chosen": -4.767510890960693, |
|
"eval_logits/rejected": -5.187655925750732, |
|
"eval_logps/chosen": -418.29376220703125, |
|
"eval_logps/rejected": -544.0393676757812, |
|
"eval_loss": 0.37398749589920044, |
|
"eval_rewards/accuracies": 0.8515625, |
|
"eval_rewards/chosen": -0.389164000749588, |
|
"eval_rewards/margins": 1.3251150846481323, |
|
"eval_rewards/rejected": -1.7142791748046875, |
|
"eval_runtime": 98.0381, |
|
"eval_samples_per_second": 20.4, |
|
"eval_steps_per_second": 0.326, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5872193436960277, |
|
"grad_norm": 31.42761305876207, |
|
"learning_rate": 2.17409995242075e-07, |
|
"logits/chosen": -5.038609504699707, |
|
"logits/rejected": -5.722345352172852, |
|
"logps/chosen": -372.905517578125, |
|
"logps/rejected": -569.4352416992188, |
|
"loss": 0.376, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.27033573389053345, |
|
"rewards/margins": 2.4031970500946045, |
|
"rewards/rejected": -2.6735329627990723, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5987334484743811, |
|
"grad_norm": 29.61275457382243, |
|
"learning_rate": 2.0746854556892544e-07, |
|
"logits/chosen": -5.438863754272461, |
|
"logits/rejected": -5.798094749450684, |
|
"logps/chosen": -407.27008056640625, |
|
"logps/rejected": -620.6509399414062, |
|
"loss": 0.3645, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.43467459082603455, |
|
"rewards/margins": 2.4455971717834473, |
|
"rewards/rejected": -2.8802719116210938, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6102475532527346, |
|
"grad_norm": 27.24117353879226, |
|
"learning_rate": 1.9759590554616173e-07, |
|
"logits/chosen": -5.715832710266113, |
|
"logits/rejected": -6.058187961578369, |
|
"logps/chosen": -397.95849609375, |
|
"logps/rejected": -609.6741943359375, |
|
"loss": 0.3968, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6830942034721375, |
|
"rewards/margins": 2.4185569286346436, |
|
"rewards/rejected": -3.101651191711426, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6217616580310881, |
|
"grad_norm": 30.859422948077256, |
|
"learning_rate": 1.8780804765620746e-07, |
|
"logits/chosen": -5.4331769943237305, |
|
"logits/rejected": -5.7857160568237305, |
|
"logps/chosen": -373.3824462890625, |
|
"logps/rejected": -528.5029296875, |
|
"loss": 0.4178, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4058764576911926, |
|
"rewards/margins": 1.9241279363632202, |
|
"rewards/rejected": -2.3300044536590576, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.6332757628094415, |
|
"grad_norm": 35.78902948656132, |
|
"learning_rate": 1.7812080721643973e-07, |
|
"logits/chosen": -5.20429801940918, |
|
"logits/rejected": -5.622688293457031, |
|
"logps/chosen": -401.1048889160156, |
|
"logps/rejected": -605.438232421875, |
|
"loss": 0.3956, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.27011531591415405, |
|
"rewards/margins": 2.323632001876831, |
|
"rewards/rejected": -2.593747615814209, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.644789867587795, |
|
"grad_norm": 31.09337668064834, |
|
"learning_rate": 1.6854985675997063e-07, |
|
"logits/chosen": -5.3274736404418945, |
|
"logits/rejected": -5.779025554656982, |
|
"logps/chosen": -370.87823486328125, |
|
"logps/rejected": -599.370361328125, |
|
"loss": 0.377, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.30361196398735046, |
|
"rewards/margins": 2.5692386627197266, |
|
"rewards/rejected": -2.8728506565093994, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6563039723661486, |
|
"grad_norm": 31.49748801480019, |
|
"learning_rate": 1.5911068067978818e-07, |
|
"logits/chosen": -5.422667503356934, |
|
"logits/rejected": -5.991160869598389, |
|
"logps/chosen": -363.42791748046875, |
|
"logps/rejected": -606.8687744140625, |
|
"loss": 0.3651, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.3893515467643738, |
|
"rewards/margins": 2.7044646739959717, |
|
"rewards/rejected": -3.093816041946411, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.667818077144502, |
|
"grad_norm": 40.80686884426901, |
|
"learning_rate": 1.4981855017728197e-07, |
|
"logits/chosen": -5.2194623947143555, |
|
"logits/rejected": -5.8604302406311035, |
|
"logps/chosen": -378.5892028808594, |
|
"logps/rejected": -623.4224853515625, |
|
"loss": 0.3681, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.4009127616882324, |
|
"rewards/margins": 2.839203357696533, |
|
"rewards/rejected": -3.2401161193847656, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6793321819228555, |
|
"grad_norm": 35.637123676945, |
|
"learning_rate": 1.406884985556804e-07, |
|
"logits/chosen": -5.340333461761475, |
|
"logits/rejected": -5.9213457107543945, |
|
"logps/chosen": -366.98126220703125, |
|
"logps/rejected": -646.6055297851562, |
|
"loss": 0.3892, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3502456843852997, |
|
"rewards/margins": 3.1350584030151367, |
|
"rewards/rejected": -3.4853038787841797, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.690846286701209, |
|
"grad_norm": 38.133176182262396, |
|
"learning_rate": 1.3173529689837354e-07, |
|
"logits/chosen": -5.227208137512207, |
|
"logits/rejected": -5.730982780456543, |
|
"logps/chosen": -406.6194152832031, |
|
"logps/rejected": -642.0016479492188, |
|
"loss": 0.3726, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.19344040751457214, |
|
"rewards/margins": 2.756740093231201, |
|
"rewards/rejected": -2.9501805305480957, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.690846286701209, |
|
"eval_logits/chosen": -5.146116256713867, |
|
"eval_logits/rejected": -5.624752044677734, |
|
"eval_logps/chosen": -427.4439392089844, |
|
"eval_logps/rejected": -561.528564453125, |
|
"eval_loss": 0.3467547297477722, |
|
"eval_rewards/accuracies": 0.84375, |
|
"eval_rewards/chosen": -0.4806651175022125, |
|
"eval_rewards/margins": 1.408505916595459, |
|
"eval_rewards/rejected": -1.8891710042953491, |
|
"eval_runtime": 98.3003, |
|
"eval_samples_per_second": 20.346, |
|
"eval_steps_per_second": 0.326, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7023603914795624, |
|
"grad_norm": 35.76369238749813, |
|
"learning_rate": 1.2297343017146726e-07, |
|
"logits/chosen": -5.63295316696167, |
|
"logits/rejected": -6.0680012702941895, |
|
"logps/chosen": -352.22650146484375, |
|
"logps/rejected": -569.6236572265625, |
|
"loss": 0.3654, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.400468111038208, |
|
"rewards/margins": 2.252286672592163, |
|
"rewards/rejected": -2.65275502204895, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.713874496257916, |
|
"grad_norm": 42.53908245265289, |
|
"learning_rate": 1.1441707378923474e-07, |
|
"logits/chosen": -5.555817604064941, |
|
"logits/rejected": -5.891648292541504, |
|
"logps/chosen": -372.3026123046875, |
|
"logps/rejected": -608.4457397460938, |
|
"loss": 0.3719, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5105666518211365, |
|
"rewards/margins": 2.334003448486328, |
|
"rewards/rejected": -2.844569683074951, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7253886010362695, |
|
"grad_norm": 33.40462593975916, |
|
"learning_rate": 1.06080070680377e-07, |
|
"logits/chosen": -5.389917850494385, |
|
"logits/rejected": -5.883559226989746, |
|
"logps/chosen": -380.6363525390625, |
|
"logps/rejected": -589.5970458984375, |
|
"loss": 0.3608, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4320860803127289, |
|
"rewards/margins": 2.423119068145752, |
|
"rewards/rejected": -2.8552052974700928, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.7369027058146229, |
|
"grad_norm": 40.31781331240861, |
|
"learning_rate": 9.797590889219587e-08, |
|
"logits/chosen": -5.418898582458496, |
|
"logits/rejected": -6.029601097106934, |
|
"logps/chosen": -331.7992248535156, |
|
"logps/rejected": -644.7623291015625, |
|
"loss": 0.4071, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.26965656876564026, |
|
"rewards/margins": 3.317509174346924, |
|
"rewards/rejected": -3.5871658325195312, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.7484168105929764, |
|
"grad_norm": 30.964195430126203, |
|
"learning_rate": 9.011769976891367e-08, |
|
"logits/chosen": -5.33644962310791, |
|
"logits/rejected": -5.905170440673828, |
|
"logps/chosen": -370.828369140625, |
|
"logps/rejected": -630.619140625, |
|
"loss": 0.3809, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.340393990278244, |
|
"rewards/margins": 2.9275107383728027, |
|
"rewards/rejected": -3.267904758453369, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7599309153713298, |
|
"grad_norm": 34.09027033994428, |
|
"learning_rate": 8.251815673944218e-08, |
|
"logits/chosen": -5.566973686218262, |
|
"logits/rejected": -5.901907444000244, |
|
"logps/chosen": -373.8709411621094, |
|
"logps/rejected": -626.88720703125, |
|
"loss": 0.3664, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.31639060378074646, |
|
"rewards/margins": 2.5317635536193848, |
|
"rewards/rejected": -2.848154067993164, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7714450201496834, |
|
"grad_norm": 33.748663190230474, |
|
"learning_rate": 7.518957474892148e-08, |
|
"logits/chosen": -5.544904708862305, |
|
"logits/rejected": -6.055120468139648, |
|
"logps/chosen": -366.33306884765625, |
|
"logps/rejected": -662.8927001953125, |
|
"loss": 0.3675, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.4155319333076477, |
|
"rewards/margins": 3.206387758255005, |
|
"rewards/rejected": -3.621919631958008, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7829591249280369, |
|
"grad_norm": 33.43366335799461, |
|
"learning_rate": 6.814381036730274e-08, |
|
"logits/chosen": -5.3579840660095215, |
|
"logits/rejected": -5.930968284606934, |
|
"logps/chosen": -384.45245361328125, |
|
"logps/rejected": -620.3960571289062, |
|
"loss": 0.3748, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3938636779785156, |
|
"rewards/margins": 2.738201856613159, |
|
"rewards/rejected": -3.132065773010254, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7944732297063903, |
|
"grad_norm": 31.210525154632403, |
|
"learning_rate": 6.139226260715872e-08, |
|
"logits/chosen": -5.434956073760986, |
|
"logits/rejected": -5.966610908508301, |
|
"logps/chosen": -387.60162353515625, |
|
"logps/rejected": -664.8744506835938, |
|
"loss": 0.355, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4180675446987152, |
|
"rewards/margins": 2.967360019683838, |
|
"rewards/rejected": -3.385427474975586, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.8059873344847438, |
|
"grad_norm": 33.963445753535076, |
|
"learning_rate": 5.4945854481754734e-08, |
|
"logits/chosen": -5.527676105499268, |
|
"logits/rejected": -5.960885047912598, |
|
"logps/chosen": -374.95916748046875, |
|
"logps/rejected": -630.1693725585938, |
|
"loss": 0.3522, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3166103959083557, |
|
"rewards/margins": 2.8152191638946533, |
|
"rewards/rejected": -3.1318297386169434, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8059873344847438, |
|
"eval_logits/chosen": -5.210726261138916, |
|
"eval_logits/rejected": -5.681924343109131, |
|
"eval_logps/chosen": -433.6905517578125, |
|
"eval_logps/rejected": -577.3692016601562, |
|
"eval_loss": 0.32489100098609924, |
|
"eval_rewards/accuracies": 0.87890625, |
|
"eval_rewards/chosen": -0.5431313514709473, |
|
"eval_rewards/margins": 1.5044457912445068, |
|
"eval_rewards/rejected": -2.047577142715454, |
|
"eval_runtime": 98.0334, |
|
"eval_samples_per_second": 20.401, |
|
"eval_steps_per_second": 0.326, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8175014392630973, |
|
"grad_norm": 32.382102785679976, |
|
"learning_rate": 4.881501533321605e-08, |
|
"logits/chosen": -5.631700038909912, |
|
"logits/rejected": -6.175845146179199, |
|
"logps/chosen": -364.59674072265625, |
|
"logps/rejected": -615.4799194335938, |
|
"loss": 0.3861, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4184879660606384, |
|
"rewards/margins": 2.884592294692993, |
|
"rewards/rejected": -3.3030803203582764, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.8290155440414507, |
|
"grad_norm": 29.844564520231344, |
|
"learning_rate": 4.300966395938377e-08, |
|
"logits/chosen": -5.579652309417725, |
|
"logits/rejected": -6.021969795227051, |
|
"logps/chosen": -410.3070373535156, |
|
"logps/rejected": -654.1072387695312, |
|
"loss": 0.3805, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.40225619077682495, |
|
"rewards/margins": 2.8050906658172607, |
|
"rewards/rejected": -3.2073471546173096, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.8405296488198043, |
|
"grad_norm": 34.64605949847163, |
|
"learning_rate": 3.7539192566655246e-08, |
|
"logits/chosen": -5.749828338623047, |
|
"logits/rejected": -6.230714321136475, |
|
"logps/chosen": -372.4962463378906, |
|
"logps/rejected": -620.4830932617188, |
|
"loss": 0.3701, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.3709852397441864, |
|
"rewards/margins": 2.7844834327697754, |
|
"rewards/rejected": -3.155468702316284, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.8520437535981578, |
|
"grad_norm": 38.917435902608844, |
|
"learning_rate": 3.24124515747731e-08, |
|
"logits/chosen": -5.770384311676025, |
|
"logits/rejected": -6.440248966217041, |
|
"logps/chosen": -377.38360595703125, |
|
"logps/rejected": -670.9470825195312, |
|
"loss": 0.3725, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.46737533807754517, |
|
"rewards/margins": 3.3466858863830566, |
|
"rewards/rejected": -3.814060926437378, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8635578583765112, |
|
"grad_norm": 35.39576347923302, |
|
"learning_rate": 2.763773529814506e-08, |
|
"logits/chosen": -5.80182409286499, |
|
"logits/rejected": -6.183619976043701, |
|
"logps/chosen": -363.37359619140625, |
|
"logps/rejected": -643.1031494140625, |
|
"loss": 0.3736, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.513085663318634, |
|
"rewards/margins": 2.7367725372314453, |
|
"rewards/rejected": -3.2498581409454346, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8750719631548647, |
|
"grad_norm": 35.82536365897154, |
|
"learning_rate": 2.3222768526860698e-08, |
|
"logits/chosen": -5.800836563110352, |
|
"logits/rejected": -6.234482288360596, |
|
"logps/chosen": -365.31903076171875, |
|
"logps/rejected": -579.0399169921875, |
|
"loss": 0.3663, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.5012763738632202, |
|
"rewards/margins": 2.1673426628112793, |
|
"rewards/rejected": -2.668619394302368, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8865860679332181, |
|
"grad_norm": 37.880330092886545, |
|
"learning_rate": 1.9174694029115146e-08, |
|
"logits/chosen": -5.784181594848633, |
|
"logits/rejected": -6.484677314758301, |
|
"logps/chosen": -376.74908447265625, |
|
"logps/rejected": -637.3211059570312, |
|
"loss": 0.38, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3697873055934906, |
|
"rewards/margins": 3.116102933883667, |
|
"rewards/rejected": -3.4858901500701904, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8981001727115717, |
|
"grad_norm": 37.173154353795034, |
|
"learning_rate": 1.5500060995258134e-08, |
|
"logits/chosen": -5.590546607971191, |
|
"logits/rejected": -6.252056121826172, |
|
"logps/chosen": -404.06219482421875, |
|
"logps/rejected": -671.0790405273438, |
|
"loss": 0.3644, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.4821314811706543, |
|
"rewards/margins": 2.973552703857422, |
|
"rewards/rejected": -3.455684185028076, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.9096142774899252, |
|
"grad_norm": 38.483209821819536, |
|
"learning_rate": 1.2204814442165812e-08, |
|
"logits/chosen": -5.847277641296387, |
|
"logits/rejected": -6.545414924621582, |
|
"logps/chosen": -402.4599609375, |
|
"logps/rejected": -618.3992309570312, |
|
"loss": 0.3744, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5441657900810242, |
|
"rewards/margins": 2.8156542778015137, |
|
"rewards/rejected": -3.3598198890686035, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.9211283822682786, |
|
"grad_norm": 36.88952100776894, |
|
"learning_rate": 9.294285595075669e-09, |
|
"logits/chosen": -5.882547378540039, |
|
"logits/rejected": -6.232880115509033, |
|
"logps/chosen": -359.8563537597656, |
|
"logps/rejected": -655.06787109375, |
|
"loss": 0.3643, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.496969074010849, |
|
"rewards/margins": 2.759918689727783, |
|
"rewards/rejected": -3.256887912750244, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9211283822682786, |
|
"eval_logits/chosen": -5.403136253356934, |
|
"eval_logits/rejected": -5.885165214538574, |
|
"eval_logps/chosen": -439.6992492675781, |
|
"eval_logps/rejected": -584.2129516601562, |
|
"eval_loss": 0.31831786036491394, |
|
"eval_rewards/accuracies": 0.87109375, |
|
"eval_rewards/chosen": -0.6032183170318604, |
|
"eval_rewards/margins": 1.5127967596054077, |
|
"eval_rewards/rejected": -2.1160147190093994, |
|
"eval_runtime": 98.1126, |
|
"eval_samples_per_second": 20.385, |
|
"eval_steps_per_second": 0.326, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9326424870466321, |
|
"grad_norm": 43.94120514478602, |
|
"learning_rate": 6.773183262446914e-09, |
|
"logits/chosen": -5.6489362716674805, |
|
"logits/rejected": -6.28032112121582, |
|
"logps/chosen": -353.1646423339844, |
|
"logps/rejected": -609.9522705078125, |
|
"loss": 0.3848, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.40985745191574097, |
|
"rewards/margins": 2.7903153896331787, |
|
"rewards/rejected": -3.2001731395721436, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.9441565918249856, |
|
"grad_norm": 33.525448706821926, |
|
"learning_rate": 4.645586217799452e-09, |
|
"logits/chosen": -5.750053882598877, |
|
"logits/rejected": -6.382951259613037, |
|
"logps/chosen": -408.31915283203125, |
|
"logps/rejected": -624.9613037109375, |
|
"loss": 0.3682, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.44873589277267456, |
|
"rewards/margins": 2.5182459354400635, |
|
"rewards/rejected": -2.966981887817383, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.9556706966033391, |
|
"grad_norm": 32.59312352646331, |
|
"learning_rate": 2.9149366008568987e-09, |
|
"logits/chosen": -5.68507194519043, |
|
"logits/rejected": -6.2285284996032715, |
|
"logps/chosen": -345.0586853027344, |
|
"logps/rejected": -635.7188720703125, |
|
"loss": 0.3761, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.39172735810279846, |
|
"rewards/margins": 2.9998083114624023, |
|
"rewards/rejected": -3.391535520553589, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9671848013816926, |
|
"grad_norm": 37.49243505993372, |
|
"learning_rate": 1.5840343486700215e-09, |
|
"logits/chosen": -5.730424404144287, |
|
"logits/rejected": -6.221343040466309, |
|
"logps/chosen": -356.298583984375, |
|
"logps/rejected": -621.7361450195312, |
|
"loss": 0.3928, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.4219423830509186, |
|
"rewards/margins": 2.8504931926727295, |
|
"rewards/rejected": -3.272435426712036, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.9786989061600461, |
|
"grad_norm": 33.08948980944996, |
|
"learning_rate": 6.550326657293881e-10, |
|
"logits/chosen": -5.9162678718566895, |
|
"logits/rejected": -6.479850769042969, |
|
"logps/chosen": -360.3614196777344, |
|
"logps/rejected": -608.4212646484375, |
|
"loss": 0.3596, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.3865709900856018, |
|
"rewards/margins": 2.8733856678009033, |
|
"rewards/rejected": -3.2599568367004395, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9902130109383995, |
|
"grad_norm": 33.68247028780298, |
|
"learning_rate": 1.2943454039654467e-10, |
|
"logits/chosen": -5.6706414222717285, |
|
"logits/rejected": -6.1612443923950195, |
|
"logps/chosen": -388.79510498046875, |
|
"logps/rejected": -634.7048950195312, |
|
"loss": 0.3777, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.3972472846508026, |
|
"rewards/margins": 2.8383138179779053, |
|
"rewards/rejected": -3.2355613708496094, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9994242947610823, |
|
"step": 868, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4218231642850533, |
|
"train_runtime": 14967.0092, |
|
"train_samples_per_second": 7.425, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 868, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|